diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..b826e68b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+# build directories
+build/
+build_lnx/
+build_win/
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..a1f40261
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,211 @@
+set( CONFORMANCE_SUFFIX "" )
+set(CLConform_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
+project(CLConform${CONFORMANCE_SUFFIX})
+
+if(CMAKE_BUILD_TYPE STREQUAL "release")
+    set (BUILD_FLAVOR "release")
+else(CMAKE_BUILD_TYPE STREQUAL "release")
+    set (BUILD_FLAVOR "debug")
+endif(CMAKE_BUILD_TYPE STREQUAL "release")
+
+set(CLConform_VERSION_MAJOR "2")
+set(CLConform_VERSION_MINOR "2")
+set(CLConform_VERSION_MICRO "0")
+set(CLConform_VERSION_EXTRA "")
+set(CLConform_VERSION "${CLConform_VERSION_MAJOR}.${CLConform_VERSION_MINOR}")
+set(CLConform_VERSION_FULL
+    "${CLConform_VERSION}.${CLConform_VERSION_MICRO}${CLConform_VERSION_EXTRA}")
+
+cmake_minimum_required(VERSION 2.8)
+
+add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_1_APIS=1)
+add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1)
+add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_2_APIS=1)
+add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS=1)
+add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_0_APIS=1)
+
+# where to look first for cmake modules, before ${CMAKE_ROOT}/Modules/ is checked
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules)
+
+
+# Support both VS2008 and VS2012.
+set(BUILD_DIR "$ENV{ADRENO_DRIVER}/build")
+if(MSVC90)
+  set(VS_BUILD_DIR "${BUILD_DIR}/vs2008")
+else(MSVC110)
+  set(VS_BUILD_DIR "${BUILD_DIR}/vs2012")
+endif(MSVC90)
+
+#-----------------------------------------------------------
+# Default Configurable Test Set
+#-----------------------------------------------------------
+set(D3D10_IS_SUPPORTED)
+set(D3D11_IS_SUPPORTED)
+set(GL_IS_SUPPORTED)
+
+
+#-----------------------------------------------------------
+# Tests prefix and suffix
+#-----------------------------------------------------------
+# Set the prefix and suffix for the generated executables
+# For example, if you want the api executable to be test_conformance_api_12
+# Set previx to "test_conformance_" and suffix to "_12"
+set(CONFORMANCE_PREFIX "test_" )
+set(CONFORMNACE_SUFFIX "" )
+
+#-----------------------------------------------------------
+# Vendor Customization
+#-----------------------------------------------------------
+#Vendor Customization File can be included here to provide a way to automatically
+#build driver as a depencency of the conformance tests, or other such CMake customization
+option(USE_VENDOR_CUSTOM_FILE "Use Vendor Customization File" OFF)
+if(USE_VENDOR_CUSTOM_FILE)
+    include(CMakeVendor.txt OPTIONAL)
+endif(USE_VENDOR_CUSTOM_FILE)
+
+#-----------------------------------------------------------
+# Development options for OpenCL C++ tests
+#-----------------------------------------------------------
+# Use OpenCL C kernels instead of OpenCL C++ kernels
+option(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS "Use OpenCL C kernels in OpenCL C++ tests" OFF)
+if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+  set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+# Only check if OpenCL C++ kernels compile to SPIR-V
+option(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION "Only check if OpenCL C++ kernels compile to SPIR-V" OFF)
+if(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
+  if(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+    message(FATAL_ERROR "Can't use OpenCL C kernels and compile to SPIR-V.")
+  endif(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS)
+  set(CLPP_DEVELOPMENT_OPTIONS ${CLPP_DEVELOPMENT_OPTIONS} -DCLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
+endif(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION)
+#
+if(CLPP_DEVELOPMENT_OPTIONS)
+  add_definitions(-DCLPP_DEVELOPMENT_OPTIONS)
+  add_definitions(${CLPP_DEVELOPMENT_OPTIONS})
+endif(CLPP_DEVELOPMENT_OPTIONS)
+
+# Offline OpenCL C/C++ compiler provided by Khronos is the only supported
+# offline compiler.
+#
+# Path to offline OpenCL C/C++ compiler provided by Khronos.
+# See https://github.com/KhronosGroup/SPIR/ (spirv-1.1 branch or newer SPIR-V-ready
+# branch should be used).
+if(CL_OFFLINE_COMPILER)
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCL_OFFLINE_COMPILER=${CL_OFFLINE_COMPILER}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCL_OFFLINE_COMPILER=${CL_OFFLINE_COMPILER}")
+    # Additional OpenCL C/C++ compiler option.
+    if(CL_OFFLINE_COMPILER_OPTIONS)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCL_OFFLINE_COMPILER_OPTIONS=${CL_OFFLINE_COMPILER_OPTIONS}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCL_OFFLINE_COMPILER_OPTIONS=${CL_OFFLINE_COMPILER_OPTIONS}")
+    endif(CL_OFFLINE_COMPILER_OPTIONS)
+else(CL_OFFLINE_COMPILER)
+    message(STATUS "OpenCL C/C++ compiler hasn't been found!")
+    message(FATAL_ERROR "Pass path to OpenCL C/C++ compiler in CL_OFFLINE_COMPILER")
+endif(CL_OFFLINE_COMPILER)
+
+# CL_LIBCLCXX_DIR - path to dir with OpenCL C++ STL (libclcxx)
+# CL_INCLUDE_DIR - path to dir with OpenCL headers
+# CL_LIBCLCXX_DIR - path to dir with OpenCL library
+if(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
+    set(OPENCL_INCLUDE_DIR ${CL_INCLUDE_DIR})
+    link_directories(${CL_LIB_DIR})
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCL_LIBCLCXX_DIR=${CL_LIBCLCXX_DIR}")
+else(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
+    message(STATUS "OpenCL hasn't been found!")
+    message(FATAL_ERROR "Either install OpenCL or pass -DCL_INCLUDE_DIR, -DCL_LIB_DIR and -DCL_LIBCLCXX_DIR")
+endif(CL_INCLUDE_DIR AND CL_LIB_DIR AND CL_LIBCLCXX_DIR)
+
+include(CheckFunctionExists)
+include(CheckIncludeFiles)
+
+if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
+    # -msse -mfpmath=sse to force gcc to use sse for float math,
+    # avoiding excess precision problems that cause tests like int2float
+    # to falsely fail. -ffloat-store also works, but WG suggested
+    # that sse would be better.
+    if(CMAKE_ARM_COMPILER OR ANDROID)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -std=gnu99 -Wno-narrowing")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -std=gnu++11 -Wno-narrowing")
+    else(CMAKE_ARM_COMPILER OR ANDROID)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -std=gnu99 -msse -mfpmath=sse -Wno-narrowing")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -msse -mfpmath=sse -std=gnu++11 -Wno-narrowing")
+    endif(CMAKE_ARM_COMPILER OR ANDROID)
+else()
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D__SSE__")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
+endif()
+
+list(APPEND CLConform_LIBRARIES ${OPENCL_LIBRARIES})
+if(ANDROID)
+    list(APPEND CLConform_LIBRARIES m)
+elseif(NOT WIN32)
+    list(APPEND CLConform_LIBRARIES pthread)
+endif(ANDROID)
+
+if(APPLE)
+    find_library(corefoundation CoreFoundation)
+    find_library(iokit IOKit)
+    list(APPEND CLConform_LIBRARIES ${corefoundation})
+    list(APPEND CLConform_LIBRARIES ${iokit})
+endif(APPLE)
+
+include_directories(SYSTEM ${OPENCL_INCLUDE_DIR})
+include_directories(${CLConform_SOURCE_DIR}/test_common/harness
+                    ${CLConform_SOURCE_DIR}/test_common/gles
+                    ${CLConform_SOURCE_DIR}/test_common/gl
+                    ${CMAKE_CURRENT_SOURCE_DIR}/test_common/harness)
+
+if(CMAKE_BUILD_TYPE STREQUAL "release")
+    set (BUILD_FLAVOR "release")
+elseif (CMAKE_BUILD_TYPE STREQUAL "debug")
+    set (BUILD_FLAVOR "debug")
+endif(CMAKE_BUILD_TYPE STREQUAL "release")
+
+
+add_subdirectory(test_conformance)
+
+set (PY_PATH   "${CLConform_SOURCE_DIR}/test_conformance/*.py")
+set (CSV_PATH  "${CLConform_SOURCE_DIR}/test_conformance/*.csv")
+# Support both VS2008 and VS2012.
+set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll")
+set (DST_DIR   "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug/")
+
+if (WIN32)
+    set (COPY "echo")
+    add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX} ALL
+                      COMMAND ${COPY} "${DLL_FILES}" "${DST_DIR}"
+                      COMMENT "Copying dll files.. ")
+else (WIN32)
+    set (COPY cp)
+    add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX})
+endif(WIN32)
+
+set_property(TARGET COPY_DLL${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
+
+if(WIN32)
+  add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ALL
+                     COMMAND ${COPY} ${PY_PATH} ${DST_DIR}
+                     COMMAND ${COPY} ${CSV_PATH} ${DST_DIR}
+                     COMMAND ${COPY} ${DLL_FILES} ${DST_DIR}
+                     COMMENT "Copying other files to output folder..." )
+else(WIN32)
+  add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} )
+endif(WIN32)
+
+# Copy required CL include directories into the build directory
+# as required for the compiler testing.
+
+# ... For running the compiler test on the command line.
+file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION ${DST_DIR})
+file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION ${DST_DIR})
+
+# ... For running the compiler test with VisualStudio.
+if(MSVC)
+  file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/includeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
+  file(COPY "${CLConform_SOURCE_DIR}/test_conformance/compiler/secondIncludeTestDirectory" DESTINATION "${CLConform_SOURCE_DIR}/build/test_conformance/compiler")
+endif(MSVC)
+
+set_property(TARGET COPY_FILES${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
diff --git a/CMakeVendor.txt b/CMakeVendor.txt
new file mode 100644
index 00000000..0f59481e
--- /dev/null
+++ b/CMakeVendor.txt
@@ -0,0 +1,15 @@
+# Include cmake files to build driver
+# to build driver as a dependency of tests
+# in this example environment variable $OPENCL_DRIVER points to driver base
+# Ex  include($ENV{OPENCL_DRIVER}/driver.cmake)
+
+# We intentionally hardcode "_win32" to ensure backwards compatibility (to avoid breaking HAAVE)
+if(ANDROID)
+   if(ARM64_V8A)
+       set(ARCH "64")
+   else(ARM64_V8A)
+       set(ARCH "32")
+   endif(ARM64_V8A)
+endif (ANDROID)
+
+set (CL_INCLUDE_DIR "$ENV{OPENCL_DRIVER}/include/public/")
diff --git a/build_android.py b/build_android.py
new file mode 100644
index 00000000..2bd7f5c8
--- /dev/null
+++ b/build_android.py
@@ -0,0 +1,159 @@
+#!/usr/bin/python
+
+
+#-------------------------------------------------------------------------------#
+# android-cmake and android-ndk based build script for conformance
+#-------------------------------------------------------------------------------#
+"""
+Dependencies:
+
+1) android-ndk version android-ndk-r10d or higher is required. Further, the environment
+   variable ANDROID_NDK should be defined to point to it.
+
+2) android-cmake should be installed (else the script can install it for you). If installed,
+   the environment variable ANDROID_CMAKE should point to install location, unless it is in the current
+   working directory in which case it is picked up by default.
+
+3) CL_INCLUDE_DIR should be defined to point to CL headers. Alternately, this can be provided
+   as an input (-I)
+
+4) Path to opencl library to link against (libOpenCL.so) can be provided using -L. If this isn't
+   available the script will try to use CL_LIB_DIR_64 or CL_LIB_DIR_32 environment variables -
+   if available - to pick up the right library for the architecture being built.
+
+
+"""
+
+import os
+import sys
+import subprocess
+import argparse
+import time
+import shlex
+
+start  = time.time()
+script = os.path.basename( sys.argv[ 0 ] )
+
+def die (msg):
+    print msg
+    exit(-1)
+
+def execute (cmdline):
+    retcode = subprocess.call(cmdline)
+    if retcode != 0:
+        raise Exception("Failed to execute '%s', got %d" % (commandLine, retcode))
+
+def build(args):
+    if not (args.testDir):
+        print("building...")
+        execute("make")
+    else:
+        if os.path.exists( os.path.join(args.bld_dir, "test_conformance", args.testDir) ):
+            os.chdir( os.path.join("test_conformance",args.testDir) )
+            print("Building test: %s..." %args.testDir)
+            execute("make")
+            os.chdir(args.bld_dir)
+        else:
+            print ("Error: %s test doesn't exist" %args.testDir)
+
+
+def configure (args):
+    print("configuring...")
+    cmdline = []
+    cmdline.extend(['cmake', "-DCMAKE_TOOLCHAIN_FILE=" + os.path.join(args.android_cmake,"android.toolchain.cmake")])
+    for var in args.cmake_defs :
+        cmdline.extend([ '-D', var ])
+    cmdline.extend(['-DCL_INCLUDE_DIR=' + args.inc_dir])
+    cmdline.extend(['-DCL_LIB_DIR=' + args.lib_dir])
+    cmdline.extend(['-DANDROID_NATIVE_API_LEVEL=' + "android-21"])
+    if args.arch == "64":
+        cmdline.extend(['-DANDROID_ABI=arm64-v8a'])
+        cmdline.extend(['-DANDROID_SO_UNDEFINED=ON'])
+    cmdline.extend([args.src_dir])
+    execute(cmdline)
+
+def check_var (parser, args, name):
+    if not(args.__dict__[name]):
+        parser.error("%s needs to be defined" % name)
+
+def print_config(args):
+    print("----------CONFIGURATION--------------\n")
+    print("android_cmake: %s" % args.android_cmake)
+    print("android_ndk:   %s" % args.android_ndk)
+    print("lib_dir:       %s" % args.lib_dir)
+    print("inc_dir:       %s" % args.inc_dir)
+    if len(args.cmake_defs):
+        print("cmake options:" + "\n:".join( [ " `%s'" % dir for dir in args.cmake_defs ] ))
+    print("architecture:  %s" % args.arch)
+    print("-------------------------------------\n")
+
+def get_input():
+    yes = set(['yes','y', 'ye', ''])
+    no = set(['no','n'])
+
+    choice = raw_input().lower()
+    if choice in yes:
+        return True
+    elif choice in no:
+        return False
+    else:
+        sys.stdout.write("Please respond with 'yes' or 'no'")
+        exit()
+
+def install_android_cmake():
+    parser.print_help()
+    print "\nandroid-cmake doesn't seem to be installed - It should be provided as a) cmdline input b) environment variable $ANDROID_CMAKE or c) present in the current directory\n"
+    print "if you would like to download and install it in the current directory please enter yes\n"
+    print "if you would like to provide an environment variable($ANDROID_CMAKE) or command-line input(--android_cmake) rerun the script enter no\n"
+    print "input: "
+    if get_input():
+        print("installing android-cmake")
+        subprocess.call(['git', 'clone', 'https://github.com/taka-no-me/android-cmake'])
+        args.android_cmake = os.path.join(args.src_dir,"android-cmake")
+    else:
+        exit()
+
+try:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--android_cmake', dest='android_cmake', default=os.environ.get('ANDROID_CMAKE'), help="Path to android-cmake (can also be set using environment variable $ANDROID_CMAKE).")
+    parser.add_argument('--android_ndk', dest='android_ndk', default=os.environ.get('ANDROID_NDK'), help="Path to android-ndk (can also be set using environment variable $ANDROID_NDK).")
+    parser.add_argument('-L','--lib_dir', dest='lib_dir', default="", help="Path to libOpenCL to link against (can also be set using environment variable $CL_LIB_DIR_32 and $CL_LIB_DIR_64).")
+    parser.add_argument('-I','--include_dir', dest='inc_dir', default=os.environ.get('CL_INCLUDE_DIR'), help="Path to headers (can also be set using environment variable $CL_INCLUDE_DIR).")
+    parser.add_argument('-D', dest='cmake_defs', action='append', default=[], help="Define CMAKE variable")
+    parser.add_argument('-a','--arch', default="32", help="Architecture to build for (32 or 64)")
+    parser.add_argument('-t','--test', dest='testDir', default="", help="Builds the given test")
+
+    args = parser.parse_args()
+
+    args.src_dir = os.path.realpath(os.path.dirname( sys.argv[ 0 ]))
+
+    if not (args.android_cmake):
+        if os.path.exists(os.path.join(args.src_dir,"android-cmake")):
+            args.android_cmake = os.path.join(args.src_dir,"android-cmake")
+        else:
+            install_android_cmake()
+
+    if not (args.lib_dir):
+        lib_var_name = "CL_LIB_DIR_" + ("32" if (args.arch == "32") else "64")
+        args.lib_dir = os.environ.get(lib_var_name)
+
+    check_var(parser, args, "android_cmake")
+    check_var(parser, args, "lib_dir")
+    check_var(parser, args, "inc_dir")
+    check_var(parser, args, "android_ndk")
+
+    print_config(args)
+
+    args.bld_dir = os.path.join(args.src_dir, 'bld_android_%s' % args.arch)
+    if not os.path.exists(args.bld_dir):
+        os.makedirs(args.bld_dir)
+    os.chdir(args.bld_dir)
+
+    configure(args)
+    build(args)
+
+    sys.exit( 0 )
+
+finally:
+    finish = time.time()
+    print("Elapsed time: %.0f s." % ( finish - start ) )
diff --git a/build_lnx.sh b/build_lnx.sh
new file mode 100644
index 00000000..c9bde6e1
--- /dev/null
+++ b/build_lnx.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+mkdir -p build_lnx
+cd build_lnx
+cmake -g "Unix Makefiles" ../ -DCL_OFFLINE_COMPILER=<TO_SET> -DCL_LIBCLCXX_DIR=<TO_SET> -DCL_INCLUDE_DIR=<TO_SET> -DCL_LIB_DIR=<TO_SET> -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=. -DOPENCL_LIBRARIES=OpenCL
+make --jobs 8
diff --git a/build_win.bat b/build_win.bat
new file mode 100644
index 00000000..0e715656
--- /dev/null
+++ b/build_win.bat
@@ -0,0 +1,32 @@
+@ECHO off
+setlocal ENABLEDELAYEDEXPANSION
+
+IF DEFINED ProgramFiles(x86) SET ProgFilesDir=%ProgramFiles(x86)%
+IF NOT DEFINED ProgFilesDir SET ProgFilesDir=%ProgramFiles%
+
+rem -------------------------------- Update these to match what's on your PC ------------------------------------------------
+
+SET VCPATH="%ProgFilesDir%\Microsoft Visual Studio 14.0\Common7\IDE\devenv.com"
+
+SET PATH=%CMAKEPATH%;%PATH%
+
+rem -------------------------------------------------------------------------------------------------------------------------
+
+setlocal ENABLEDELAYEDEXPANSION
+
+call "%VS140COMNTOOLS%\vsvars32.bat"
+
+mkdir build_win
+pushd build_win
+IF NOT EXIST CLConform.sln (
+   echo "Solution file not found, running Cmake"
+   cmake -G "Visual Studio 14 2015 Win64" ..\.  -DCL_OFFLINE_COMPILER=<TO_SET> -DCL_LIBCLCXX_DIR=<TO_SET> -DCL_INCLUDE_DIR=<TO_SET> -DCL_LIB_DIR=<TO_SET> -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=. -DOPENCL_LIBRARIES=OpenCL
+) else (
+   echo "Solution file found CLConform.sln "
+)
+
+echo Building CLConform.sln...
+%VCPATH% CLConform.sln /build
+
+
+GOTO:EOF
diff --git a/clean_tests.py b/clean_tests.py
new file mode 100644
index 00000000..4ee0294f
--- /dev/null
+++ b/clean_tests.py
@@ -0,0 +1,104 @@
+#!/usr/bin/python
+
+import sys, os, re
+from subprocess import Popen, PIPE
+from optparse import OptionParser
+
+# trail_spaces: This method removes the trailing whitespaces and trailing tabs
+def trail_spaces(line):
+    newline=line
+    carreturn = 0
+    if re.search("\r\n",line):
+        carreturn = 1
+    status = re.search("\s+$",line)
+    if status:
+        if carreturn:
+            newline = re.sub("\s+$","\r\n",line)
+        else:
+            newline = re.sub("\s+$","\n",line)
+
+    status = re.search("\t+$",newline)
+    if status:
+        newline = re.sub("\t+$","",newline)
+    return newline
+
+#convert_tabs: This methos converts tabs to 4 spaces
+def convert_tabs(line):
+    newline=line
+    status = re.search("\t",line)
+    if status:
+        newline = re.sub("\t","    ",line)
+    return newline
+
+#convert_lineends: This method converts lineendings from DOS to Unix
+def convert_lineends(line):
+    newline=line
+    status = re.search("\r\n",line)
+    if status:
+        newline = re.sub("\r\n","\n",line)
+    return newline
+
+#processfile: This method processes each file passed to it depending
+#             on the flags passed
+
+def processfile(file,tabs, lineends,trails,verbose):
+    processed_data = []
+    if verbose:
+        print "processing file: "+file
+
+    with open(file,'r') as fr:
+        data = fr.readlines()
+    for line in data:
+        if tabs:
+            line = convert_tabs(line)
+        if lineends:
+            line = convert_lineends(line)
+        if trails:
+            line = trail_spaces(line)
+        processed_data.append(line)
+
+    with open(file,'w') as fw:
+        fw.writelines(processed_data)
+
+#findfiles: This method finds all the code files present in current
+#            directory and subdirectories.
+
+def findfiles(tabs,lineends,trails,verbose):
+    testfiles = []
+    for root, dirs, files in os.walk("./"):
+        for file in files:
+            for extn in ('.c','.cpp','.h','.hpp'):
+                if file.endswith(extn):
+                    testfiles.append(os.path.join(root, file))
+    for file in testfiles:
+        processfile(file,tabs,lineends,trails,verbose)
+
+# Main function
+
+def main():
+
+    parser = OptionParser()
+    parser.add_option("--notabs", dest="tabs", action="store_false", default=True, help="Disable converting tabs to 4 spaces.")
+    parser.add_option("--notrails", dest="trails", action="store_false", default=True, help="Disable removing trailing whitespaces and trailing tabs.")
+    parser.add_option("--nolineends", dest="lineends", action="store_false", default=True, help=" Disable converting line endings to Unix from DOS.")
+    parser.add_option("--verbose", dest="verbose", action="store_true", default=False, help="Prints out the files being processed.")
+    parser.add_option("--git", dest="SHA1", default="", help="Processes only the files present in the particular <SHA1> commit.")
+    parser.add_option('-o', action="store", default=True, help="Default: All the code files (.c,.cpp,.h,.hpp) in the current directory and subdirectories will be processed")
+
+    (options, args) = parser.parse_args()
+
+    if options.SHA1:
+        pl = Popen(["git","show", "--pretty=format:", "--name-only",options.SHA1], stdout=PIPE)
+        cmdout = pl.communicate()[0]
+        gitout=cmdout.split("\n")
+        for file in gitout:
+            print file
+            if file:
+                processfile(file,options.tabs,options.lineends,options.trails,options.verbose)
+
+
+    if not options.SHA1:
+        findfiles(options.tabs,options.lineends,options.trails,options.verbose)
+
+# start the process by calling main
+main()
diff --git a/license.txt b/license.txt
new file mode 100644
index 00000000..2784a09d
--- /dev/null
+++ b/license.txt
@@ -0,0 +1,4 @@
+The code inside this directory and its subdirectories is 
+"Open GL (including Open CL) Automated Test System - Common Code" 
+and is subject to the license agreement between Apple and the licensee.
+
diff --git a/readme-spir-v-binaries.txt b/readme-spir-v-binaries.txt
new file mode 100644
index 00000000..1b6df9bc
--- /dev/null
+++ b/readme-spir-v-binaries.txt
@@ -0,0 +1,42 @@
+To run the 2.2 conformance tests test suite for the C++ features you need need 
+SPIR-V binaries.
+
+If you are using a conformance package then the binaries are included in the 
+package. If you are using conformance tests from gitlab repositories then the
+binaries need to be picked up from Khronos SVN URL mentioned below:
+
+https://cvs.khronos.org/svn/repos/OpenCL/trunk/Khronos/spirv/spirv10_2015.11.25.zip
+
+
+Alternatively you can check out and build all of the below repositories.
+
+1. SPIRV-LLVM
+LLVM with support for SPIR-V (required by clang compiler)
+Repository: https://gitlab.khronos.org/opencl/SPIRV-LLVM
+Branch: spec_constants 
+Notes: spirv-3.6.1 is a main branch with support for OpenCL C++ kernel language,
+  spec_constants is based on it, but it adds support for specialization constants.
+
+2. Clang 
+Clang with support for OpenCL C++ kernel language
+Repository: https://gitlab.khronos.org/opencl/clang
+Branch: spec_constants 
+Notes: spirv-1.1 is a main branch with support for OpenCL C++ kernel language, 
+  spec_constants is based on it, but it adds support for specialization constants.
+
+3. libclcxx
+OpenCL C++ Standard Library
+Repository: https://gitlab.khronos.org/opencl/libclcxx 
+Branch: lit_tests_cl22   
+Notes: lit_tests_cl22 branch includes both LIT tests and changes introduced in 
+  spec_constants branch, that is, implementation of Specialization Constants Library.   
+
+4. OpenCL 2.2 headers
+OpenCL 2.2 headers
+Repository: https://gitlab.khronos.org/opencl/headers 
+Branch: opencl22 
+
+5. OpenCL ICD (with 2.2 support)
+OpenCL ICD 
+Repository: https://gitlab.khronos.org/opencl/icd 
+Branch: dev_cl22 
\ No newline at end of file
diff --git a/test_common/Makefile b/test_common/Makefile
new file mode 100644
index 00000000..6b930323
--- /dev/null
+++ b/test_common/Makefile
@@ -0,0 +1,32 @@
+
+PRODUCTS = harness/\
+
+# utils/
+ 
+TOP=$(shell pwd)
+
+all: $(PRODUCTS)
+
+clean:
+	@for testdir in $(dir $(PRODUCTS))  ; \
+		do ( \
+			echo "==================================================================================" ; \
+			echo "Cleaning $$testdir" ; \
+			echo "==================================================================================" ; \
+			if test -d $$testdir; \
+				then cd $$testdir && make clean; \
+				else echo "Warning: Directory '$$testdir' Does Not Exist"; \
+			fi; \
+			); \
+		done \
+
+$(PRODUCTS): 
+	@echo "==================================================================================" ;
+	@echo "(`date "+%H:%M:%S"`) Make $@" ;
+	@echo "==================================================================================" ;
+	@if test -d $@; \
+		then cd $(dir $@) && make; \
+		else echo "Warning: Directory '$@' Does Not Exist"; \
+		fi; \
+
+.PHONY: clean $(PRODUCTS)  all
diff --git a/test_common/autotest/autotest.hpp b/test_common/autotest/autotest.hpp
new file mode 100644
index 00000000..cdaca6c3
--- /dev/null
+++ b/test_common/autotest/autotest.hpp
@@ -0,0 +1,58 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_COMMON_AUTOTEST_AUTOTEST_HPP
+#define TEST_COMMON_AUTOTEST_AUTOTEST_HPP
+
+#include "test_suite.hpp"
+#include "test_case.hpp"
+
+namespace autotest {
+    inline std::vector<const char*> get_strings_ptrs(const std::vector<std::string>& list)
+    {
+        std::vector<const char*> v;
+        for(auto& s : list)
+        {
+            v.push_back(s.c_str());
+        }
+        return v;
+    }
+}
+
+#define STR_JOIN( X, Y ) STR_DO_JOIN( X, Y )
+#define STR_DO_JOIN( X, Y ) STR_DO_JOIN_2(X,Y)
+#define STR_DO_JOIN_2( X, Y ) X##Y
+
+
+// How to use AUTO_TEST_CASE macro:
+//
+// AUTO_TEST_CASE(<test_case_name>)(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+// {
+//      (test case code...)
+// }
+//
+// It automatically registers created test case to global test_suite object. Test functions
+// names and pointers to those functions can be later retrieved this way: 
+// - std::vector<basefn> test_functions_list = autotest::test_suite::get_test_functions();
+// - std::vector<std::string> test_functions_names = autotest::test_suite::get_test_names();
+//
+// Helper function which constructs vector of const char pointers to test functions names:
+// - std::vector<const char *> test_functions_names_c_str = autotest::get_strings_ptrs(test_functions_names);
+#define AUTO_TEST_CASE(name) \
+    struct name { static int run_test(cl_device_id, cl_context, cl_command_queue, int); }; \
+    static autotest::detail::test_case_registration STR_JOIN(name, STR_JOIN(_registration, __LINE__)) (#name, name::run_test); \
+    int name::run_test
+
+#endif //TEST_COMMON_AUTOTEST_AUTOTEST_HPP
\ No newline at end of file
diff --git a/test_common/autotest/test_case.hpp b/test_common/autotest/test_case.hpp
new file mode 100644
index 00000000..17e68aac
--- /dev/null
+++ b/test_common/autotest/test_case.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_COMMON_AUTOTEST_TEST_CASE_HPP
+#define TEST_COMMON_AUTOTEST_TEST_CASE_HPP
+
+#include <string>
+
+#include "../../test_common/harness/threadTesting.h"
+
+namespace autotest
+{
+
+struct test_case {
+    // Test case name
+    const std::string name;
+    // Pointer to test function.
+    const basefn function_pointer;
+
+    test_case(const std::string& name, const basefn function_ptr)
+        : name(name), function_pointer(function_ptr)
+    {
+
+    }
+};
+
+} // end namespace autotest
+
+#endif // TEST_COMMON_AUTOTEST_TEST_CASE_HPP
diff --git a/test_common/autotest/test_suite.hpp b/test_common/autotest/test_suite.hpp
new file mode 100644
index 00000000..70b3e4eb
--- /dev/null
+++ b/test_common/autotest/test_suite.hpp
@@ -0,0 +1,83 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_COMMON_AUTOTEST_TEST_SUITE_HPP
+#define TEST_COMMON_AUTOTEST_TEST_SUITE_HPP
+
+#include <vector>
+#include <string> 
+
+#include "test_case.hpp"
+
+namespace autotest {
+
+struct test_suite {
+    test_suite(const std::string& name)
+        : name(name)
+    {
+
+    } 
+
+    void add(const test_case& tc)
+    {
+        test_cases.push_back(tc);
+    }    
+
+    static std::vector<basefn> get_test_functions()
+    {
+        std::vector<basefn> v;
+        for(auto& tc: global_test_suite().test_cases)
+        {
+            v.push_back(tc.function_pointer);
+        }
+        return v;
+    }
+
+    static std::vector<std::string> get_test_names()
+    {
+        std::vector<std::string> v;
+        for(auto& tc : global_test_suite().test_cases)
+        {
+            v.push_back(tc.name);
+        }
+        return v;
+    }
+
+    // List of test cases
+    std::vector<test_case> test_cases;
+    // Test suite name
+    const std::string name;
+
+    static test_suite& global_test_suite()
+    {
+        static test_suite global_test_suite("global");
+        return global_test_suite;
+    }
+};
+
+namespace detail {
+
+struct test_case_registration
+{
+    test_case_registration(const std::string& name, const basefn ptr)
+    {
+        ::autotest::test_suite::global_test_suite().add(test_case(name, ptr));
+    }
+};
+
+} // end detail namespace
+} // end autotest namespace
+
+#endif // TEST_COMMON_AUTOTEST_TEST_SUITE_HPP
diff --git a/test_common/config.hpp b/test_common/config.hpp
new file mode 100644
index 00000000..a037d4e4
--- /dev/null
+++ b/test_common/config.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_COMMON_CONFIG_HPP
+#define TEST_COMMON_CONFIG_HPP
+
+// Enable development options for OpenCL C++ tests (test_conformance/clpp)
+// #define DEVELOPMENT
+#if defined(CLPP_DEVELOPMENT_OPTIONS) && !defined(DEVELOPMENT)
+    #define DEVELOPMENT
+#endif
+
+#ifdef DEVELOPMENT
+    // If defined OpenCL C++ tests only checks if OpenCL C++ kernels compiles correctly
+    // #define ONLY_SPIRV_COMPILATION
+    #if defined(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION) && !defined(ONLY_SPIRV_COMPILATION)
+        #define ONLY_SPIRV_COMPILATION
+    #endif
+
+    #ifndef ONLY_SPIRV_COMPILATION
+        // If defined OpenCL C++ tests are run using OpenCL C kernels
+        // #define USE_OPENCLC_KERNELS
+        #if defined(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS) && !defined(USE_OPENCLC_KERNELS)
+            #define USE_OPENCLC_KERNELS
+        #endif
+    #endif    
+#endif
+
+#endif // TEST_COMMON_CONFIG_HPP
\ No newline at end of file
diff --git a/test_common/gl/gl_headers.h b/test_common/gl/gl_headers.h
new file mode 100644
index 00000000..61fbe63c
--- /dev/null
+++ b/test_common/gl/gl_headers.h
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _gl_headers_h
+#define _gl_headers_h
+
+#if defined( __APPLE__ )
+    #include <OpenGL/OpenGL.h>
+#if defined(CGL_VERSION_1_3)
+    #include <OpenGL/gl3.h>
+    #include <OpenGL/gl3ext.h>
+#else
+    #include <OpenGL/gl.h>
+    #include <OpenGL/glext.h>
+#endif
+    #include <GLUT/glut.h>
+#else
+#ifdef _WIN32
+    #include <windows.h>
+#endif
+#if defined( __ANDROID__ )
+    #ifndef GL_GLEXT_PROTOTYPES
+        #define GL_GLEXT_PROTOTYPES
+    #endif
+    #include <GLES/gl.h>
+    #include <GLES/glext.h>
+#else
+     #include <GL/glew.h>
+    #include <GL/gl.h>
+     #include <GL/glext.h>
+#endif
+#ifdef _WIN32
+    #include <GL/glut.h>
+#elif !defined(__ANDROID__)
+    #include <GL/freeglut.h>
+#endif
+
+#endif
+
+#ifdef _WIN32
+    GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
+    // No glutGetProcAddress in the standard glut v3.7.
+    #define glutGetProcAddress(procName) wglGetProcAddress(procName)
+#endif
+
+
+#endif    // __gl_headers_h
+
diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp
new file mode 100644
index 00000000..c02a4b12
--- /dev/null
+++ b/test_common/gl/helpers.cpp
@@ -0,0 +1,2496 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "helpers.h"
+#include "../harness/imageHelpers.h"
+
+// convert_float_to_half and convert_half_to_float may be found in test_conformance/images/image_helpers.cpp
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+#endif
+
+#if defined(__linux__)
+// On linux we dont link to GLU library to avoid comaptibility issues with
+// libstdc++
+// FIXME: Implement this
+const GLubyte* gluErrorString (GLenum error)
+{
+    const char* gl_Error = "OpenGL Error";
+    return (const GLubyte*)gl_Error;
+}
+#endif
+
+void * CreateGLTexture1DArray(size_t width, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d)
+{
+  *outError = 0;
+  GLenum err = 0;
+
+  char * buffer;
+  unsigned int size = 0;
+
+  // width_in_pixels * pixel_width * number_of_images:
+  if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) )
+  {
+    size = width * length;
+  }
+  else
+  {
+    size = width * length * 4;
+  }
+
+  buffer = (char *)CreateRandomData(type, size, d);
+
+  glGenTextures( 1, outTextureID );
+  glBindTexture( get_base_gl_target( target ), *outTextureID );
+  glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+  err = glGetError();
+  if( err != GL_NO_ERROR ) {
+    log_error( "ERROR: Failed to create GL texture object: %s!\n", gluErrorString( err ));
+    *outError = -1;
+    free( buffer );
+    return NULL;
+  }
+
+  // use TexImage2D to pump the 1D array fill of bits:
+  glTexImage2D( get_base_gl_target(target), 0, internalFormat, (GLsizei)width,
+    (GLsizei)length, 0, glFormat, glType, buffer );
+
+  err = glGetError();
+  if( err != GL_NO_ERROR ) {
+    if (err != GL_OUT_OF_MEMORY) {
+        log_error( "ERROR: Unable to load data using glTexImage2D for "
+          "TEXTURE_1D_ARRAY : %s : %s : %d : %d : %s : %s : Error %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(internalFormat),
+        (int)(width), (int)(length),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType),
+        gluErrorString( err ));
+
+        *outError = -1;
+    } else {
+        log_info( "WARNING: Unable to load data using glTexImage2D for "
+          "TEXTURE_1D_ARRAY : %s : %s : %d : %d : %s : %s : Error %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(internalFormat),
+        (int)(width), (int)(length),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType),
+        gluErrorString( err ));
+
+        *outError = -2;
+    }
+    free( buffer );
+    return NULL;
+  }
+
+  if( !allocateMem ) {
+    free( buffer );
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * length; i++ ) {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * length; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+  return buffer;
+}
+
+void * CreateGLTexture2DArray(size_t width, size_t height, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d)
+{
+  *outError = 0;
+
+  char * buffer;
+  unsigned int size = 0;
+
+  if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) )
+  {
+    size = width * height * length;
+  }
+  else
+  {
+    size = width * height * length * 4;
+  }
+
+  buffer = (char *)CreateRandomData(type, size, d);
+
+  if( type == kFloat && allocateMem )
+  {
+    // Re-fill the created buffer to just have [0-1] floats, since that's what it'd expect
+    cl_float *p = (cl_float *)buffer;
+    for( size_t i = 0; i < size; i++ )
+    {
+      p[ i ] = (float) genrand_real1( d );
+    }
+  }
+  else if( !allocateMem )
+    memset( buffer, 0, size * get_explicit_type_size( type ) );
+
+  glGenTextures( 1, outTextureID );
+
+  glBindTexture( target, *outTextureID );
+  glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+  glTexParameteri( target, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+  glTexParameteri( target, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+  glGetError();
+  //the default alignment in OpenGL is 4 bytes and need to be changed for GL_DEPTH_COMPONENT16 which is aligned to 2 bytes
+  if (internalFormat == GL_DEPTH_COMPONENT16)
+    glPixelStorei(GL_UNPACK_ALIGNMENT, get_explicit_type_size( type ));
+
+  glTexImage3D( target, 0, internalFormat, (GLsizei)width, (GLsizei)height,
+    (GLsizei)length, 0, glFormat, glType, buffer );
+
+  if (internalFormat == GL_DEPTH_COMPONENT16)
+    glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
+
+  GLenum err = glGetError();
+  if( err != GL_NO_ERROR )
+  {
+    if (err != GL_OUT_OF_MEMORY) {
+        log_error( "ERROR: Unable to load data into GL texture (%s) format %s "
+          "type %s internal format %s\n", gluErrorString( err ),
+          GetGLFormatName( glFormat ), get_explicit_type_name( type ),
+          GetGLFormatName( internalFormat ) );
+        *outError = -1;
+    } else {
+        log_info( "WARNING: Unable to load data into GL texture (%s) format %s "
+          "type %s internal format %s\n", gluErrorString( err ),
+          GetGLFormatName( glFormat ), get_explicit_type_name( type ),
+          GetGLFormatName( internalFormat ) );
+        *outError = -2;
+    }
+    delete [] buffer;
+    return NULL;
+  }
+
+  if( !allocateMem )
+  {
+    delete [] buffer;
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * height * length; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * length; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+  return buffer;
+}
+
+void * CreateGLTextureBuffer(size_t width, GLenum target,
+  GLenum glFormat, GLenum internalFormat, GLenum glType, ExplicitType type,
+  GLuint *outTex, GLuint *outBuf, int *outError, bool allocateMem, MTdata d)
+{
+  // First, generate a regular GL Buffer from random data.
+  *outError = 0;
+  GLenum err = 0;
+
+  char * buffer;
+  unsigned int size = 0;
+
+  // The buffer should be the array width * number of elements * element pitch
+  if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) )
+  {
+    size = width;
+  }
+  else
+  {
+    size = width * 4;
+  }
+
+  buffer = (char*)CreateRandomData(type, size, d);
+
+  err = glGetError();
+
+  glGenBuffers(1, outBuf);
+  glBindBuffer(GL_TEXTURE_BUFFER, *outBuf);
+
+  // Need to multiply by the type size:
+  size *= ( GetGLTypeSize( GetGLTypeForExplicitType(type) ) );
+
+  glBufferData(GL_TEXTURE_BUFFER, size, buffer, GL_DYNAMIC_DRAW);
+
+  // Now make a Texture out of this Buffer:
+
+  glGenTextures(1, outTex);
+  glBindTexture(GL_TEXTURE_BUFFER, *outTex);
+  glTexBuffer(GL_TEXTURE_BUFFER, internalFormat, *outBuf);
+
+  if ((err = glGetError())) {
+    log_error( "ERROR: Unable to load data into glTexBuffer : %s : %s : %d : %s : %s : Error %s\n",
+              GetGLTargetName(target),
+              GetGLFormatName(internalFormat),
+              (int)(size),
+              GetGLFormatName(glFormat),
+              GetGLTypeName(glType),
+              gluErrorString( err ));
+    *outError = -1;
+    delete [] buffer;
+    return NULL;
+  }
+
+  if( !allocateMem ) {
+    free( buffer );
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ ) {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+  return buffer;
+}
+
+void* CreateGLTexture1D( size_t width, GLenum target, GLenum glFormat,
+    GLenum internalFormat, GLenum glType, ExplicitType type,
+    GLuint *outTextureID, int *outError, bool allocateMem, MTdata d )
+{
+  *outError = 0;
+  GLenum err = 0;
+
+  char * buffer;
+  unsigned int size = 0;
+
+  // The buffer should be the array width * number of elements * element pitch
+  if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) )
+  {
+    size = width;
+  }
+  else
+  {
+    size = width * 4;
+  }
+
+  buffer = (char*)CreateRandomData(type, size, d);
+
+  glGenTextures( 1, outTextureID );
+  glBindTexture( get_base_gl_target( target ), *outTextureID );
+  glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+  err = glGetError();
+  if( err != GL_NO_ERROR )
+  {
+    log_error( "ERROR: Failed to create GL texture object: %s!\n", gluErrorString( err ));
+    *outError = -1;
+    free( buffer );
+    return NULL;
+  }
+
+  glTexImage1D( get_base_gl_target(target), 0, internalFormat, (GLsizei)width,
+    0, glFormat, glType, buffer );
+
+  err = glGetError();
+  if( err != GL_NO_ERROR )
+  {
+        if (err != GL_OUT_OF_MEMORY) {
+            log_error( "ERROR: Unable to load data into glTexImage1D : %s : %s : %d : %s : %s : Error %s\n",
+              GetGLTargetName(target),
+              GetGLFormatName(internalFormat),
+              (int)(width),
+              GetGLFormatName(glFormat),
+              GetGLTypeName(glType),
+              gluErrorString( err ));
+            *outError = -1;
+        } else {
+            log_info( "WARNING: Unable to load data into glTexImage1D : %s : %s : %d : %s : %s : Error %s\n",
+              GetGLTargetName(target),
+              GetGLFormatName(internalFormat),
+              (int)(width),
+              GetGLFormatName(glFormat),
+              GetGLTypeName(glType),
+              gluErrorString( err ));
+            *outError = -2;
+        }
+      free( buffer );
+      return NULL;
+  }
+
+  if( !allocateMem ) {
+    free( buffer );
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ ) {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+  return buffer;
+}
+
+void * CreateGLTexture2D( size_t width, size_t height,
+                        GLenum target, GLenum glFormat,
+                        GLenum internalFormat, GLenum glType,
+                        ExplicitType type, GLuint *outTextureID,
+                        int *outError, bool allocateMem, MTdata d )
+{
+    *outError = 0;
+    GLenum err = 0;
+
+    char * buffer;
+    unsigned int size = 0;
+
+    if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) )
+    {
+      size = width * height;
+    }
+    else
+    {
+      size = width * height * 4;
+    }
+
+    buffer = (char *)CreateRandomData(type, size, d);
+
+    glGenTextures( 1, outTextureID );
+    glBindTexture( get_base_gl_target( target ), *outTextureID );
+    glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+    glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Failed to create GL texture object: %s!\n", gluErrorString( err ));
+        *outError = -1;
+        free( buffer );
+        return NULL;
+    }
+
+    if( get_base_gl_target( target ) == GL_TEXTURE_CUBE_MAP )
+    {
+        char * temp = (char *)malloc(size * get_explicit_type_size( type ) * sizeof(cl_char));
+        if(allocateMem)
+            memcpy( temp, buffer, size * get_explicit_type_size( type ) );
+        else
+            memset( temp, 0, size * get_explicit_type_size( type ) );
+
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        free(temp);
+    }
+    else
+    {
+#ifdef DEBUG
+        log_info("- glTexImage2D : %s : %s : %d : %d : %s : %s\n",
+            GetGLTargetName(target),
+            GetGLFormatName(internalFormat),
+            width, height,
+            GetGLFormatName(glFormat),
+            GetGLTypeName(glType));
+
+        DumpGLBuffer(glType, width, height, buffer);
+#endif
+
+        //the default alignment in OpenGL is 4 bytes and need to be changed for GL_DEPTH_COMPONENT16 which is aligned to 2 bytes
+        if (internalFormat == GL_DEPTH_COMPONENT16)
+          glPixelStorei(GL_UNPACK_ALIGNMENT, get_explicit_type_size( type ));
+
+        glTexImage2D( get_base_gl_target(target), 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, buffer );
+
+        if (internalFormat == GL_DEPTH_COMPONENT16)
+          glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
+    }
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        if (err != GL_OUT_OF_MEMORY) {
+            log_error( "ERROR: Unable to load data into glTexImage2D : %s : %s : %d : %d : %s : %s : Error %s\n",
+                GetGLTargetName(target),
+                GetGLFormatName(internalFormat),
+                (int)(width), (int)(height),
+                GetGLFormatName(glFormat),
+                GetGLTypeName(glType),
+                gluErrorString( err ));
+            *outError = -1;
+        } else {
+            log_info( "WARNING: Unable to load data into glTexImage2D : %s : %s : %d : %d : %s : %s : Error %s\n",
+                GetGLTargetName(target),
+                GetGLFormatName(internalFormat),
+                (int)(width), (int)(height),
+                GetGLFormatName(glFormat),
+                GetGLTypeName(glType),
+                gluErrorString( err ));
+            *outError = -2;
+        }
+        free( buffer );
+        return NULL;
+    }
+
+#ifdef DEBUG
+    char * test = (char *)malloc(width * height * 4 * get_explicit_type_size( type ));
+    memset(test, 0, width * height * 4 * get_explicit_type_size( type ));
+
+    if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) )
+    {
+      glFormat = GL_RGBA;
+      glType = GL_FLOAT;
+    }
+
+    log_info("- glGetTexImage : %s : %s : %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType));
+
+    glGetTexImage(target, 0, glFormat, glType, test);
+
+    DumpGLBuffer(glType, width, height, test);
+
+    free(test);
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to read data from glGetTexImage : %s : %s : %s : Error %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType),
+        gluErrorString( err ));
+        return NULL;
+    }
+#endif
+
+    if( !allocateMem )
+    {
+        free( buffer );
+        return NULL;
+    }
+
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < width * height; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+    else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+    {
+      // Reverse and reorder to validate since in the
+      // kernel the read_imagef() call always returns RGBA
+      cl_uchar *p = (cl_uchar *)buffer;
+      for( size_t i = 0; i < width * height; i++ )
+      {
+        cl_uchar uc0 = p[i * 4 + 0];
+        cl_uchar uc1 = p[i * 4 + 1];
+        cl_uchar uc2 = p[i * 4 + 2];
+        cl_uchar uc3 = p[i * 4 + 3];
+
+        p[ i * 4 + 0 ] = uc1;
+        p[ i * 4 + 1 ] = uc2;
+        p[ i * 4 + 2 ] = uc3;
+        p[ i * 4 + 3 ] = uc0;
+      }
+    }
+
+    return buffer;
+}
+
+void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
+                          GLenum target, GLenum glFormat,
+                          GLenum internalFormat, GLenum glType,
+                          ExplicitType type, GLuint *outTextureID,
+                          int *outError, MTdata d, bool allocateMem)
+{
+    *outError = 0;
+
+    char * buffer;
+    unsigned int size = 0;
+
+    if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) )
+    {
+        size = width * height * depth;
+    }
+    else
+    {
+        size = width * height * depth * 4;
+    }
+
+    buffer = (char *)create_random_data( type, d, size );
+
+    if( type == kFloat && allocateMem )
+    {
+        // Re-fill the created buffer to just have [0-1] floats, since that's what it'd expect
+        cl_float *p = (cl_float *)buffer;
+        for( size_t i = 0; i < size; i++ )
+        {
+            p[ i ] = (float) genrand_real1( d );
+        }
+    }
+    else if( !allocateMem )
+        memset( buffer, 0, size * get_explicit_type_size( type ) );
+
+    glGenTextures( 1, outTextureID );
+
+    glBindTexture( target, *outTextureID );
+    glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+    glTexParameteri( target, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( target, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+    glGetError();
+    glTexImage3D( target, 0, internalFormat, (GLsizei)width, (GLsizei)height, (GLsizei)depth, 0, glFormat, glType, buffer );
+    GLenum err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        if (err != GL_OUT_OF_MEMORY) {
+            log_error( "ERROR: Unable to load data into GL texture (%s) format %s type %s internal format %s\n", gluErrorString( err ), GetGLFormatName( glFormat ), get_explicit_type_name( type ), GetGLFormatName( internalFormat ) );
+            *outError = -1;
+        } else {
+            log_info( "WARNING: Unable to load data into GL texture (%s) format %s type %s internal format %s\n", gluErrorString( err ), GetGLFormatName( glFormat ), get_explicit_type_name( type ), GetGLFormatName( internalFormat ) );
+            *outError = -2;
+        }
+        delete [] buffer;
+        return NULL;
+    }
+
+    if( !allocateMem )
+    {
+        delete [] buffer;
+        return NULL;
+    }
+
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < width * height * depth; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+    else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+    {
+      // Reverse and reorder to validate since in the
+      // kernel the read_imagef() call always returns RGBA
+      cl_uchar *p = (cl_uchar *)buffer;
+      for( size_t i = 0; i < width * height * depth; i++ )
+      {
+        cl_uchar uc0 = p[i * 4 + 0];
+        cl_uchar uc1 = p[i * 4 + 1];
+        cl_uchar uc2 = p[i * 4 + 2];
+        cl_uchar uc3 = p[i * 4 + 3];
+
+        p[ i * 4 + 0 ] = uc1;
+        p[ i * 4 + 1 ] = uc2;
+        p[ i * 4 + 2 ] = uc3;
+        p[ i * 4 + 3 ] = uc0;
+      }
+    }
+
+    return buffer;
+}
+
+void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
+                        GLenum glFormat, GLenum glInternalFormat,
+                        GLenum glType, ExplicitType typeToReadAs,
+                        size_t outWidth, size_t outHeight )
+{
+    // Read results from the GL texture
+    glBindTexture(get_base_gl_target(glTarget), glTexture);
+
+    GLint realWidth, realHeight, realDepth;
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_WIDTH, &realWidth );
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_HEIGHT, &realHeight );
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_DEPTH, &realDepth );
+
+    realDepth = (realDepth) ? realDepth : 1;
+
+    GLint realInternalFormat;
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &realInternalFormat );
+
+#ifdef DEBUG
+    log_info( "- Reading back from GL: %d x %d : %s : %s : %s : %s (stored as %s)\n",
+        realWidth, realHeight,
+        GetGLTargetName( glTarget),
+        GetGLFormatName( glInternalFormat ),
+        GetGLFormatName( glFormat ),
+        GetGLTypeName( glType ),
+        GetGLFormatName( realInternalFormat ));
+#endif
+
+    GLenum readBackFormat;
+    switch(glFormat)
+    {
+    case GL_RGBA_INTEGER_EXT:
+      readBackFormat = GL_RGBA_INTEGER_EXT;
+      break;
+    case GL_DEPTH_COMPONENT:
+      readBackFormat = GL_DEPTH_COMPONENT;
+      break;
+    case GL_DEPTH_STENCIL:
+      readBackFormat = GL_DEPTH_STENCIL;
+      break;
+    default:
+      readBackFormat = GL_RGBA;
+      break;
+    }
+
+    GLenum readBackType;
+    switch (glType) {
+#ifdef __APPLE__
+      case GL_UNSIGNED_INT_8_8_8_8:
+      case GL_UNSIGNED_INT_8_8_8_8_REV:
+        readBackType = GL_UNSIGNED_BYTE;
+        break;
+#endif
+      case GL_HALF_FLOAT:
+      case GL_UNSIGNED_BYTE:
+      case GL_UNSIGNED_SHORT:
+      case GL_UNSIGNED_INT:
+      case GL_BYTE:
+      case GL_SHORT:
+      case GL_INT:
+      case GL_FLOAT:
+      default:
+        readBackType = glType;
+    }
+
+    size_t outBytes;
+    if (get_base_gl_target(glTarget) != GL_TEXTURE_BUFFER) {
+        outBytes = realWidth * realHeight * realDepth * 4
+          * GetGLTypeSize(readBackType);
+    }
+    else {
+        outBytes = width * 4;
+
+        outBytes *= ( GetGLTypeSize( GetGLTypeForExplicitType(typeToReadAs) ) );
+    }
+
+    cl_char *outBuffer = (cl_char *)malloc( outBytes );
+    memset(outBuffer, 0, outBytes);
+
+    if (get_base_gl_target(glTarget) != GL_TEXTURE_BUFFER) {
+        //the default alignment in OpenGL is 4 bytes and need to be changed for GL_DEPTH_COMPONENT16 which is aligned to 2 bytes
+        if (realInternalFormat == GL_DEPTH_COMPONENT16)
+          glPixelStorei(GL_PACK_ALIGNMENT, 2);
+
+        glGetTexImage( glTarget, 0, readBackFormat, readBackType, outBuffer );
+
+        if (realInternalFormat == GL_DEPTH_COMPONENT16)
+          glPixelStorei(GL_PACK_ALIGNMENT, 4);
+    }
+    else {
+        glBindBuffer(GL_ARRAY_BUFFER, glBuf);
+        glGetBufferSubData(GL_ARRAY_BUFFER, 0, outBytes, outBuffer);
+    }
+
+#ifdef DEBUG
+
+    log_info( "- glGetTexImage: %s : %s : %s \n",
+        GetGLTargetName( glTarget),
+        GetGLFormatName(readBackFormat),
+        GetGLTypeName(readBackType));
+
+    DumpGLBuffer(readBackType, realWidth, realHeight, (void*)outBuffer);
+
+#endif
+
+    return (void *)outBuffer;
+}
+
+int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
+                            GLenum attachment, GLenum glFormat,
+                            GLenum internalFormat, GLenum glType,
+                            GLuint *outFramebuffer,
+                            GLuint *outRenderbuffer )
+{
+    GLenum err = 0;
+
+    // Generate a renderbuffer and bind
+    glGenRenderbuffersEXT( 1, outRenderbuffer );
+    glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, *outRenderbuffer );
+
+    // Allocate storage to the renderbuffer
+    glGetError();
+    glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, internalFormat, (GLsizei)width,  (GLsizei)height );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error("Failed to allocate render buffer storage!\n");
+        return 1701;
+    }
+
+    GLint realInternalFormat;
+    glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &realInternalFormat );
+    internalFormat = realInternalFormat;
+
+#ifdef DEBUG
+    GLint rsize, gsize, bsize, asize;
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_RED_SIZE_EXT,&rsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_GREEN_SIZE_EXT,&gsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_BLUE_SIZE_EXT,&bsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_ALPHA_SIZE_EXT,&asize);
+
+    log_info("Renderbuffer internal format requested: %s actual: %s sizes: r=%d g=%d b=%d a=%d\n",
+             GetGLFormatName( internalFormat ), GetGLFormatName( realInternalFormat ),
+             rsize, gsize, bsize, asize );
+#endif
+
+    // Create and bind a framebuffer to render with
+    glGenFramebuffersEXT( 1, outFramebuffer );
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, *outFramebuffer );
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to bind framebuffer : Error %s\n",
+                  gluErrorString( err ));
+
+        return -1;
+    }
+
+    // Attach to the framebuffer
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, *outRenderbuffer );
+    err = glGetError();
+    GLint status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
+    if( status != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s, status %x)\n", gluErrorString( err ), (int)status );
+        return -1;
+    }
+
+    return 0;
+}
+
+
+void reorder_verification_buffer(GLenum glFormat, GLenum glType, char* buffer, size_t num_pixels)
+{
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA)
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < num_pixels; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA)
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < num_pixels; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+}
+
+
+#ifdef GL_VERSION_3_2
+
+#define check_gl_error() \
+{ \
+  GLenum errnom = GL_NO_ERROR;\
+  if ((errnom = glGetError()) != GL_NO_ERROR)\
+    log_error("GL Error: 0x%04X at %s:%d\n", errnom, __FILE__, __LINE__);\
+}
+
+const char *get_gl_vector_type( GLenum internalformat )
+{
+  switch (internalformat) {
+  case GL_RGBA8:
+  case GL_RGBA16:
+  case GL_RGBA32F_ARB:
+  case GL_RGBA16F_ARB:
+  case GL_DEPTH_COMPONENT16:
+  case GL_DEPTH_COMPONENT32F:
+  case GL_DEPTH24_STENCIL8:
+  case GL_DEPTH32F_STENCIL8:
+    return "vec4";
+    break;
+  case GL_RGBA8I_EXT:
+  case GL_RGBA16I_EXT:
+  case GL_RGBA32I_EXT:
+    return "ivec4";
+    break;
+  case GL_RGBA8UI_EXT:
+  case GL_RGBA16UI_EXT:
+  case GL_RGBA32UI_EXT:
+    return "uvec4";
+    break;
+  default:
+    log_error("Test error: unsupported data type\n");
+    return "";
+    break;
+  }
+}
+
+const char *get_gl_data_type( GLenum internalformat )
+{
+  switch (internalformat) {
+  case GL_RGBA8:
+  case GL_RGBA16:
+  case GL_RGBA32F_ARB:
+  case GL_RGBA16F_ARB:
+  case GL_DEPTH_COMPONENT16:
+  case GL_DEPTH_COMPONENT32F:
+  case GL_DEPTH24_STENCIL8:
+  case GL_DEPTH32F_STENCIL8:
+    return "float";
+    break;
+  case GL_RGBA8I_EXT:
+  case GL_RGBA16I_EXT:
+  case GL_RGBA32I_EXT:
+    return "int";
+    break;
+  case GL_RGBA8UI_EXT:
+  case GL_RGBA16UI_EXT:
+  case GL_RGBA32UI_EXT:
+    return "uint";
+    break;
+  default:
+    log_error("Test error: unsupported data type\n");
+    return "";
+    break;
+  }
+}
+
+
+void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples,
+                                    GLenum target, GLenum glFormat,
+                                    GLenum internalFormat, GLenum glType,
+                                    ExplicitType type, GLuint *outTextureID,
+                                    int *outError, bool allocateMem, MTdata d , bool fixedSampleLocations)
+{
+  // This function creates a multisample texture and renders into each sample
+  // using a GLSL shader
+
+  // Check if the renderer supports enough samples
+  GLint max_samples = get_gl_max_samples(target, internalFormat);
+  check_gl_error()
+
+  if (max_samples < (GLint)samples)
+    log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples);
+
+  // Setup the GLSL program
+  const GLchar *vertex_source =
+  "#version 140\n"
+  "in vec4 att0;\n"
+  "void main (void) {\n"
+  " gl_Position = vec4(att0.xy,0.0,1.0);\n"
+  "}\n";
+
+  const GLchar *fragmentSource =
+  "#version 140\n"
+    "out %s out0;\n"
+    "uniform %s colorVal;\n"
+    "uniform float depthVal;\n"
+  "void main (void) {\n"
+    "    out0 = %s(colorVal);\n"
+    " gl_FragDepth = depthVal;\n"
+  "}\n";
+
+  GLchar fragmentShader[256];
+  sprintf(fragmentShader, fragmentSource, get_gl_vector_type(internalFormat), get_gl_data_type(internalFormat), get_gl_vector_type(internalFormat));
+  const GLchar *fragment_source = &fragmentShader[0];
+
+  glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER);
+  glShaderSource(vertex_shader, 1, &vertex_source, NULL);
+  glCompileShader(vertex_shader);
+  check_gl_error()
+
+  glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
+  glShaderSource(fragment_shader, 1, &fragment_source, NULL);
+  glCompileShader(fragment_shader);
+  check_gl_error()
+
+  GLuint prog = glCreateProgram();
+  glAttachShader(prog, vertex_shader);
+  glAttachShader(prog, fragment_shader);
+  check_gl_error()
+
+  glBindAttribLocation(prog, 0, "att0");
+  glLinkProgram(prog);
+  check_gl_error()
+
+  // Setup the FBO and texture
+  glFramebufferWrapper fbo;
+  glGenFramebuffers(1, &fbo);
+  glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+  check_gl_error()
+
+  glViewport(0, 0, width, height);
+  check_gl_error()
+
+  GLuint tex = 0;
+  glGenTextures(1, &tex);
+  glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, tex);
+  glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, internalFormat, width, height, fixedSampleLocations);
+  check_gl_error()
+
+  GLint attachment;
+  switch (internalFormat) {
+    case GL_DEPTH_COMPONENT16:
+    case GL_DEPTH_COMPONENT32F:
+    attachment = GL_DEPTH_ATTACHMENT;
+    break;
+    case GL_DEPTH24_STENCIL8:
+    case GL_DEPTH32F_STENCIL8:
+    attachment = GL_DEPTH_STENCIL_ATTACHMENT;
+      break;
+    default:
+    attachment = GL_COLOR_ATTACHMENT0;
+      break;
+  }
+
+  glFramebufferTexture(GL_FRAMEBUFFER, attachment, tex, 0);
+  check_gl_error()
+
+  GLint status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
+  if (status == GL_FRAMEBUFFER_UNSUPPORTED) {
+    log_info("GL status: GL_FRAMEBUFFER_UNSUPPORTED format %s multisample is not supported\n", GetGLFormatName(internalFormat));
+    *outTextureID = 0;
+    *outError = GL_FRAMEBUFFER_UNSUPPORTED;
+    return NULL;
+  }
+
+  if (status != GL_FRAMEBUFFER_COMPLETE) {
+    log_error("GL error: framebuffer incomplete status 0x%04X\n",status);
+    *outTextureID = 0;
+    *outError = status;
+    return NULL;
+  }
+
+  // Check if the framebuffer supports enough samples
+  GLint fbo_samples = 0;
+  glGetIntegerv(GL_SAMPLES, &fbo_samples);
+  check_gl_error();
+
+  if (fbo_samples < (GLint)samples)
+    log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples);
+
+  glUseProgram(prog);
+  check_gl_error()
+
+  if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) {
+    glDisable(GL_DEPTH_TEST);
+    check_gl_error()
+  }
+  else {
+    glEnable(GL_DEPTH_TEST);
+    glDepthFunc(GL_ALWAYS);
+    check_gl_error()
+  }
+
+  // Setup the VBO for rendering a quad
+  GLfloat quad[] = {
+    -1.0f, -1.0f,
+    1.0f, -1.0f,
+    1.0f,  1.0f,
+    -1.0f,  1.0f
+  };
+
+  glBufferWrapper vbo;
+  glGenBuffers(1, &vbo);
+  glBindBuffer(GL_ARRAY_BUFFER, vbo);
+  glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW);
+  check_gl_error()
+
+  glVertexArraysWrapper vao;
+  glGenVertexArrays(1, &vao);
+  glBindVertexArray(vao);
+  glEnableVertexAttribArray(0);
+  glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0);
+  check_gl_error()
+
+  //clearing color and depth buffer
+  glClearColor(0, 0, 0, 0);
+  glClear(GL_COLOR_BUFFER_BIT);
+  glClearDepth(1.0);
+  glClear(GL_DEPTH_BUFFER_BIT);
+
+  //calculating colors
+  double color_delta = 1.0 / samples;
+  double color = color_delta;
+
+  glEnable(GL_SAMPLE_MASK);
+  for (size_t i=0;i!=samples;++i) {
+    glSampleMaski(0, 1<<i);
+    GLint colorUniformLocation = glGetUniformLocation(prog, "colorVal");
+    switch (internalFormat) {
+    case GL_RGBA8I_EXT:
+      glUniform1i(colorUniformLocation, color * 0x7f);
+      break;
+    case GL_RGBA16I_EXT:
+      glUniform1i(colorUniformLocation, color * 0x7fff);
+      break;
+    case GL_RGBA32I_EXT:
+      glUniform1i(colorUniformLocation, color * 0x7fffffff);
+      break;
+    case GL_RGBA8UI_EXT:
+      glUniform1ui(colorUniformLocation, color * 0xff);
+      break;
+    case GL_RGBA16UI_EXT:
+      glUniform1ui(colorUniformLocation, color * 0xffff);
+      break;
+    case GL_RGBA32UI_EXT:
+      glUniform1ui(colorUniformLocation, color * 0xffffffff);
+      break;
+    default:
+      glUniform1f(colorUniformLocation, color);
+      break;
+    }
+
+    glUniform1f(glGetUniformLocation(prog, "depthVal"), color);
+    color += color_delta;
+
+    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+    check_gl_error();
+
+    glFlush();
+  }
+
+  glDisable(GL_SAMPLE_MASK);
+  check_gl_error();
+
+  *outTextureID = tex;
+
+  // Create an output buffer containing the expected results.
+  unsigned int size = 0;
+  if ( glType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV )
+  {
+    size = width * height * 2;
+  }
+  else if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) || (attachment == GL_DEPTH_ATTACHMENT) || (attachment == GL_DEPTH_STENCIL_ATTACHMENT))
+  {
+    size = width * height;
+  }
+  else
+  {
+    size = width * height * 4;
+  }
+
+  char *data = (char *)malloc(get_explicit_type_size(type) * size * samples);
+  char *p = data;
+  size_t stride = get_explicit_type_size(type);
+
+  for (size_t s=0;s!=samples;++s) {
+    double val = color_delta + (color_delta * s);
+    for (size_t i=0;i!=size;++i) {
+      switch (type) {
+      case kChar:
+        *((char*)p) = val * 0x7f;
+        break;
+        case kUChar:
+        *((unsigned char*)p) = val * 0xff;
+          break;
+        case kFloat:
+          *((float*)p) = val;
+          break;
+      case kShort:
+        *((short*)p) = val*0x7fff;
+        break;
+        case kUShort:
+        *((unsigned short*)p) = val*0xffff;
+          break;
+      case kInt:
+        *((int*)p) = val*0x7fffffff;
+          break;
+        case kUInt:
+        *((unsigned int*)p) = val*0xffffffff;
+          break;
+        case kHalf:
+          *((cl_ushort*)p) = convert_float_to_half(val);
+          break;
+        default:
+          log_error("Test error: unexpected type enum 0x%x\n",type);
+      }
+
+      p += stride;
+    }
+  }
+
+
+  if (allocateMem)
+    reorder_verification_buffer(glFormat,glType,data,width*height*samples);
+
+  return data;
+}
+
+void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
+                              size_t total_layers, size_t samples,
+                              GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+                              ExplicitType type, GLuint *outTextureID, int *outError,
+                              bool allocateMem, MTdata d, bool fixedSampleLocations)
+{
+  // This function creates a multisample texture and renders into each sample
+  // using a GLSL shader
+
+  // Check if the renderer supports enough samples
+  GLint max_samples = get_gl_max_samples(target, internalFormat);
+
+  if (max_samples < (GLint)samples)
+    log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples);
+
+  // Setup the GLSL program
+  const GLchar *vertex_source =
+  "#version 140\n"
+  "in vec4 att0;\n"
+  "void main (void) {\n"
+  " gl_Position = vec4(att0.xy,0.0,1.0);\n"
+  "}\n";
+
+  const GLchar *fragmentSource =
+  "#version 140\n"
+    "out %s out0;\n"
+    "uniform %s colorVal;\n"
+    "uniform float depthVal;\n"
+  "void main (void) {\n"
+    "    out0 = %s(colorVal);\n"
+    " gl_FragDepth = depthVal;\n"
+  "}\n";
+
+  GLchar fragmentShader[256];
+  sprintf(fragmentShader, fragmentSource, get_gl_vector_type(internalFormat), get_gl_data_type(internalFormat), get_gl_vector_type(internalFormat));
+  const GLchar *fragment_source = &fragmentShader[0];
+
+  glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER);
+  glShaderSource(vertex_shader, 1, &vertex_source, NULL);
+  glCompileShader(vertex_shader);
+  check_gl_error()
+
+  glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
+  glShaderSource(fragment_shader, 1, &fragment_source, NULL);
+  glCompileShader(fragment_shader);
+  check_gl_error()
+
+  glProgramWrapper prog = glCreateProgram();
+  glAttachShader(prog, vertex_shader);
+  glAttachShader(prog, fragment_shader);
+  check_gl_error()
+
+  glBindAttribLocation(prog, 0, "att0");
+  glLinkProgram(prog);
+  check_gl_error()
+
+  // Setup the FBO and texture
+  glFramebufferWrapper fbo;
+  glGenFramebuffers(1, &fbo);
+  glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+  check_gl_error()
+
+  glViewport(0, 0, width, height);
+  check_gl_error()
+
+  GLuint tex = 0;
+  glGenTextures(1, &tex);
+  glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, tex);
+  glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, samples, internalFormat, width, height, total_layers, fixedSampleLocations);
+  check_gl_error()
+
+  GLint attachment;
+  switch (internalFormat) {
+    case GL_DEPTH_COMPONENT16:
+    case GL_DEPTH_COMPONENT32F:
+    attachment = GL_DEPTH_ATTACHMENT;
+    break;
+    case GL_DEPTH24_STENCIL8:
+    case GL_DEPTH32F_STENCIL8:
+    attachment = GL_DEPTH_STENCIL_ATTACHMENT;
+      break;
+    default:
+    attachment = GL_COLOR_ATTACHMENT0;
+    break;
+  }
+
+  //calculating colors
+  double color_delta = 1.0 / (total_layers * samples);
+  double color = color_delta;
+
+  if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) {
+    glDisable(GL_DEPTH_TEST);
+  check_gl_error()
+  }
+  else {
+    glEnable(GL_DEPTH_TEST);
+    glDepthFunc(GL_ALWAYS);
+    check_gl_error()
+  }
+
+  // Setup the VBO for rendering a quad
+  GLfloat quad[] = {
+    -1.0f, -1.0f,
+    1.0f, -1.0f,
+    1.0f,  1.0f,
+    -1.0f,  1.0f
+  };
+
+  glBufferWrapper vbo;
+  glGenBuffers(1, &vbo);
+  glBindBuffer(GL_ARRAY_BUFFER, vbo);
+  glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW);
+  check_gl_error()
+
+  glVertexArraysWrapper vao;
+  glGenVertexArrays(1, &vao);
+  glBindVertexArray(vao);
+  glEnableVertexAttribArray(0);
+  glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0);
+    check_gl_error()
+
+  for (size_t l=0; l!=total_layers; ++l) {
+    glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, tex, 0, l);
+      check_gl_error()
+
+    GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
+    if (status == GL_FRAMEBUFFER_UNSUPPORTED) {
+      log_info("GL status: GL_FRAMEBUFFER_UNSUPPORTED format %s multisample array is not supported\n", GetGLFormatName(internalFormat));
+      *outTextureID = 0;
+      *outError = GL_FRAMEBUFFER_UNSUPPORTED;
+      return NULL;
+    }
+
+    if (status != GL_FRAMEBUFFER_COMPLETE) {
+      log_error("GL error: framebuffer incomplete status 0x%04X\n",status);
+      *outTextureID = 0;
+      *outError = status;
+      return NULL;
+    }
+
+    // Check if the framebuffer supports enough samples
+    GLint fbo_samples = 0;
+    glGetIntegerv(GL_SAMPLES, &fbo_samples);
+    check_gl_error();
+
+    if (fbo_samples < (GLint)samples)
+      log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples);
+
+    glUseProgram(prog);
+    check_gl_error()
+
+    //clearing color and depth buffer
+    glClearColor(0, 0, 0, 0);
+    glClear(GL_COLOR_BUFFER_BIT);
+    glClearDepth(1.0);
+    glClear(GL_DEPTH_BUFFER_BIT);
+
+    glEnable(GL_SAMPLE_MASK);
+    for (size_t s=0;s!=samples;++s) {
+      double val = color_delta + color_delta * (l * samples + s);
+
+      glSampleMaski(0, 1<<s);
+      GLint colorUniformLocation = glGetUniformLocation(prog, "colorVal");
+      switch (internalFormat) {
+      case GL_RGBA8I_EXT:
+        glUniform1i(colorUniformLocation, val * 0x7f);
+        break;
+      case GL_RGBA16I_EXT:
+        glUniform1i(colorUniformLocation, val * 0x7fff);
+        break;
+      case GL_RGBA32I_EXT:
+        glUniform1i(colorUniformLocation, val * 0x7fffffff);
+        break;
+      case GL_RGBA8UI_EXT:
+        glUniform1ui(colorUniformLocation, val * 0xff);
+        break;
+      case GL_RGBA16UI_EXT:
+        glUniform1ui(colorUniformLocation, val * 0xffff);
+        break;
+      case GL_RGBA32UI_EXT:
+        glUniform1ui(colorUniformLocation, val * 0xffffffff);
+        break;
+      default:
+        glUniform1f(colorUniformLocation, val);
+        break;
+      }
+
+      glUniform1f(glGetUniformLocation(prog, "depthVal"), val);
+
+      glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
+      check_gl_error();
+
+      glFlush();
+    }
+
+    glDisable(GL_SAMPLE_MASK);
+    check_gl_error();
+  }
+
+  *outTextureID = tex;
+
+  // Create an output buffer containing the expected results.
+  unsigned int size = 0;
+  if ( glType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV )
+  {
+    size = width * height * 2;
+  }
+  else if ( (glType == GL_UNSIGNED_INT_2_10_10_10_REV) || (glType == GL_UNSIGNED_INT_10_10_10_2) || (attachment == GL_DEPTH_ATTACHMENT) || (attachment == GL_DEPTH_STENCIL_ATTACHMENT))
+  {
+    size = width * height;
+  }
+  else
+  {
+    size = width * height * 4;
+  }
+
+  char *data = (char *)malloc(get_explicit_type_size(type) * size * total_layers * samples);
+  char *p = data;
+  size_t stride = get_explicit_type_size(type);
+
+  for (size_t s=0;s!=samples;++s) {
+    for (size_t l=0;l!=total_layers;++l) {
+      double val = color_delta + color_delta * (l * samples + s);
+    for (size_t i=0;i!=size;++i) {
+      switch (type) {
+        case kChar:
+          *((char*)p) = val * 0x7f;
+          break;
+        case kUChar:
+          *((unsigned char*)p) = val*0xff;
+          break;
+        case kFloat:
+          *((float*)p) = val;
+          break;
+        case kShort:
+          *((short*)p) = val * 0x7fff;
+          break;
+        case kUShort:
+          *((unsigned short*)p) = val * 0xffff;
+          break;
+        case kInt:
+          *((int*)p) = val * 0x7fffffff;
+          break;
+        case kUInt:
+          *((unsigned int*)p) = val*0xffffffff;
+          break;
+        case kHalf:
+          *((cl_ushort*)p) = convert_float_to_half(val);
+          break;
+        default:
+          log_error("Test error: unexpected type enum 0x%x\n",type);
+      }
+
+      p += stride;
+    }
+  }
+  }
+
+  if (allocateMem)
+    reorder_verification_buffer(glFormat,glType,data,width*height*samples*total_layers);
+
+  return data;
+}
+
+#endif // GL_VERSION_3_2
+
+void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
+                             GLenum attachment, GLenum glFormat,
+                             GLenum internalFormat, GLenum glType,
+                             ExplicitType type,
+                             GLuint *outFramebuffer,
+                             GLuint *outRenderbuffer,
+                             int *outError, MTdata d, bool allocateMem )
+{
+    *outError = CreateGLRenderbufferRaw( width, height, attachment, glFormat, internalFormat,
+                            glType, outFramebuffer, outRenderbuffer );
+
+    if( *outError != 0 )
+        return NULL;
+
+    GLenum err = 0;
+
+    // Generate a renderbuffer and bind
+    glGenRenderbuffersEXT( 1, outRenderbuffer );
+    glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, *outRenderbuffer );
+
+    // Allocate storage to the renderbuffer
+    glGetError();
+    glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, internalFormat, (GLsizei)width,  (GLsizei)height );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        *outError = 1701;
+        log_error("Failed to allocate render buffer storage!\n");
+        return NULL;
+    }
+
+    GLint realInternalFormat;
+    glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &realInternalFormat );
+    internalFormat = realInternalFormat;
+
+#ifdef DEBUG
+    GLint rsize, gsize, bsize, asize;
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_RED_SIZE_EXT,&rsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_GREEN_SIZE_EXT,&gsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_BLUE_SIZE_EXT,&bsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_ALPHA_SIZE_EXT,&asize);
+
+    log_info("Renderbuffer internal format requested: %s actual: %s sizes: r=%d g=%d b=%d a=%d\n",
+              GetGLFormatName( internalFormat ), GetGLFormatName( realInternalFormat ),
+              rsize, gsize, bsize, asize );
+#endif
+
+    // Create and bind a framebuffer to render with
+    glGenFramebuffersEXT( 1, outFramebuffer );
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, *outFramebuffer );
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to bind framebuffer : Error %s\n",
+                  gluErrorString( err ));
+
+        *outError = -1;
+        return NULL;
+    }
+
+    // Attach to the framebuffer
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, *outRenderbuffer );
+    err = glGetError();
+    GLint status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
+    if( status != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        *outError = -1;
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s, status %x)\n", gluErrorString( err ), (int)status );
+        return NULL;
+    }
+
+    void* buffer = CreateRandomData(type, width * height * 4, d);
+
+#ifdef DEBUG
+    log_info( "- Fillling renderbuffer: %d : %d : %s : %s \n",
+             (int)width, (int)height,
+             GetGLFormatName(glFormat),
+             GetGLTypeName(glType));
+
+    DumpGLBuffer(glType, (int)width, (int)height, (void*)buffer);
+#endif
+
+    // Fill a texture with our input data
+    glTextureWrapper texture;
+    glGenTextures( 1, &texture );
+    glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texture );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
+    glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, internalFormat, width, height, 0, glFormat, glType, buffer );
+    glEnable( GL_TEXTURE_RECTANGLE_ARB );
+
+    // Render fullscreen textured quad
+    glDisable( GL_LIGHTING );
+    glViewport(0, 0, width, height);
+    glMatrixMode(GL_MODELVIEW);
+    glLoadIdentity();
+    glMatrixMode( GL_TEXTURE );
+    glLoadIdentity();
+    glMatrixMode(GL_PROJECTION);
+    glLoadIdentity();
+    glClear(GL_COLOR_BUFFER_BIT);
+    gluOrtho2D( -1.0, 1.0, -1.0, 1.0 );
+    glMatrixMode( GL_MODELVIEW );
+    glBegin( GL_QUADS );
+    {
+        glColor3f(1.0f, 1.0f, 1.0f);
+        glTexCoord2f( 0.0f, 0.0f );
+        glVertex3f( -1.0f, -1.0f, 0.0f );
+        glTexCoord2f( 0.0f, height );
+        glVertex3f( -1.0f, 1.0f, 0.0f );
+        glTexCoord2f( width, height );
+        glVertex3f( 1.0f, 1.0f, 0.0f );
+        glTexCoord2f( width, 0.0f );
+        glVertex3f( 1.0f, -1.0f, 0.0f );
+    }
+    glEnd();
+    glBindTexture( GL_TEXTURE_RECTANGLE_ARB, 0 );
+    glDisable( GL_TEXTURE_RECTANGLE_ARB );
+
+    glFlush();
+
+    // Read back the data in the renderbuffer
+    memset(buffer, 0, width * height * 4 * get_explicit_type_size( type ));
+    glReadBuffer( attachment );
+
+    glReadPixels( 0, 0, (GLsizei)width, (GLsizei)height, glFormat, glType, buffer );
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to read data via glReadPixels : %d : %d : %s : %s : Error %s\n",
+                  (int)width, (int)height,
+                  GetGLFormatName(glFormat),
+                  GetGLTypeName(glType),
+                  gluErrorString( err ));
+        *outError = -1;
+    }
+
+#ifdef DEBUG
+    log_info( "- glReadPixels: %d : %d : %s : %s \n",
+             (int)width, (int)height,
+             GetGLFormatName(glFormat),
+             GetGLTypeName(glType));
+
+    DumpGLBuffer(glType, (int)width, (int)height, (void*)buffer);
+#endif
+
+    if( !allocateMem )
+    {
+        free( buffer );
+        return NULL;
+    }
+
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < (size_t)width * height; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+    else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+    {
+      // Reverse and reorder to validate since in the
+      // kernel the read_imagef() call always returns RGBA
+      cl_uchar *p = (cl_uchar *)buffer;
+      for( size_t i = 0; i < width * height; i++ )
+      {
+        cl_uchar uc0 = p[i * 4 + 0];
+        cl_uchar uc1 = p[i * 4 + 1];
+        cl_uchar uc2 = p[i * 4 + 2];
+        cl_uchar uc3 = p[i * 4 + 3];
+
+        p[ i * 4 + 0 ] = uc1;
+        p[ i * 4 + 1 ] = uc2;
+        p[ i * 4 + 2 ] = uc3;
+        p[ i * 4 + 3 ] = uc0;
+      }
+    }
+
+    return buffer;
+}
+
+void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
+                           GLenum attachment, GLenum glFormat,
+                           GLenum glInternalFormat, GLenum glType,
+                           ExplicitType typeToReadAs,
+                           size_t outWidth, size_t outHeight )
+{
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, glFramebuffer );
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, glRenderbuffer );
+
+    // Attach to the framebuffer
+    GLint err = glGetError();
+    if( glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT ) != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s)\n", gluErrorString( err ) );
+        return NULL;
+    }
+
+    // Read results from the GL renderbuffer
+#ifdef DEBUG
+    log_info( "- Reading back from GL: %d x %d : %s : %s : %s\n",
+             (int)outWidth, (int)outHeight,
+             GetGLFormatName( glInternalFormat ),
+             GetGLFormatName( glFormat ),
+             GetGLTypeName( glType ));
+#endif
+
+    GLenum readBackFormat = glFormat == GL_RGBA_INTEGER_EXT ? GL_RGBA_INTEGER_EXT : GL_RGBA;
+    GLenum readBackType = glType;
+
+    size_t outBytes = outWidth * outHeight * 4 * GetGLTypeSize(readBackType);
+    void *outBuffer = malloc( outBytes );
+    memset(outBuffer, 0, outBytes);
+
+    glReadPixels( 0, 0, (GLsizei)outWidth, (GLsizei)outHeight, readBackFormat, readBackType, outBuffer );
+
+#ifdef DEBUG
+    log_info( "- glReadPixels: %d : %d : %s : %s \n",
+             (int)outWidth, (int)outHeight,
+             GetGLFormatName(readBackFormat),
+             GetGLTypeName(readBackType));
+
+    DumpGLBuffer(readBackType, outWidth, outHeight, outBuffer);
+#endif
+
+    return (void *)outBuffer;
+}
+
+GLenum
+GetGLFormat(GLenum internalFormat)
+{
+    GLenum glFormat;
+    switch (internalFormat)
+    {
+        case GL_BGRA:
+        case GL_RGBA8:
+        case GL_RGBA16:
+        case GL_RGBA32F_ARB:
+            glFormat = GL_RGBA;
+            break;
+        case GL_RGBA8I_EXT:
+        case GL_RGBA16I_EXT:
+        case GL_RGBA32I_EXT:
+        case GL_RGBA8UI_EXT:
+        case GL_RGBA16UI_EXT:
+        case GL_RGBA32UI_EXT:
+            glFormat = GL_RGBA_INTEGER_EXT;
+            break;
+        default:
+            glFormat = GL_RGBA;
+            break;
+    }
+
+    return glFormat;
+}
+
+GLenum GetGLTypeForExplicitType(ExplicitType type)
+{
+    switch( type )
+    {
+        case kFloat:
+            return GL_FLOAT;
+        case kInt:
+            return GL_INT;
+        case kUInt:
+            return GL_UNSIGNED_INT;
+        case kShort:
+            return GL_SHORT;
+        case kUShort:
+            return GL_UNSIGNED_SHORT;
+        case kChar:
+            return GL_BYTE;
+        case kUChar:
+            return GL_UNSIGNED_BYTE;
+        case kHalf:
+#if defined( __APPLE__ )
+            return GL_HALF_FLOAT;
+#else
+            return GL_HALF_FLOAT_ARB;
+#endif
+        default:
+            return GL_INT;
+    };
+}
+
+size_t GetGLTypeSize(GLenum type)
+{
+    switch( type )
+    {
+        case GL_FLOAT:
+            return sizeof(GLfloat);
+        case GL_INT:
+            return sizeof(GLint);
+        case GL_UNSIGNED_INT:
+        case GL_UNSIGNED_INT_10_10_10_2:
+        case GL_UNSIGNED_INT_2_10_10_10_REV:
+        case GL_UNSIGNED_INT_24_8:
+            return sizeof(GLuint);
+        case GL_SHORT:
+            return sizeof(GLshort);
+        case GL_UNSIGNED_SHORT:
+            return sizeof(GLushort);
+        case GL_UNSIGNED_INT_8_8_8_8_REV:
+        case GL_BYTE:
+            return sizeof(GLbyte);
+        case GL_UNSIGNED_BYTE:
+            return sizeof(GLubyte);
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:
+#else
+        case GL_HALF_FLOAT_ARB:
+#endif
+            return sizeof(GLhalf);
+        case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
+            return 2 * sizeof(GLfloat);
+        default:
+            log_error("Unknown type 0x%x\n",type);
+            return 0;
+    };
+}
+
+ExplicitType GetExplicitTypeForGLType(GLenum type)
+{
+    switch( type )
+    {
+        case GL_FLOAT:
+            return kFloat;
+        case GL_INT:
+            return kInt;
+        case GL_UNSIGNED_INT:
+            return kUInt;
+        case GL_SHORT:
+            return kShort;
+        case GL_UNSIGNED_SHORT:
+            return kUShort;
+        case GL_BYTE:
+            return kChar;
+        case GL_UNSIGNED_BYTE:
+            return kUChar;
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:
+#else
+        case GL_HALF_FLOAT_ARB:
+#endif
+            return kHalf;
+        default:
+            return kFloat;
+    };
+}
+
+GLenum get_base_gl_target( GLenum target )
+{
+    switch( target )
+    {
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+            return GL_TEXTURE_CUBE_MAP;
+        default:
+            return target;
+    }
+}
+
+const char *GetGLTypeName( GLenum type )
+{
+    switch( type )
+    {
+        case GL_BYTE:            return "GL_BYTE";
+        case GL_UNSIGNED_BYTE:   return "GL_UNSIGNED_BYTE";
+        case GL_INT:             return "GL_INT";
+        case GL_UNSIGNED_INT:    return "GL_UNSIGNED_INT";
+        case GL_SHORT:           return "GL_SHORT";
+        case GL_UNSIGNED_SHORT:  return "GL_UNSIGNED_SHORT";
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:      return "GL_HALF_FLOAT";
+#else
+        case GL_HALF_FLOAT_ARB:  return "GL_HALF_FLOAT_ARB";
+#endif
+        case GL_FLOAT:           return "GL_FLOAT";
+        case GL_UNSIGNED_INT_8_8_8_8:        return "GL_UNSIGNED_INT_8_8_8_8";
+        case GL_UNSIGNED_INT_8_8_8_8_REV:    return "GL_UNSIGNED_INT_8_8_8_8_REV";
+        case GL_UNSIGNED_INT_10_10_10_2:     return "GL_UNSIGNED_INT_10_10_10_2";
+        case GL_UNSIGNED_INT_2_10_10_10_REV: return "GL_UNSIGNED_INT_2_10_10_10_REV";
+#ifdef GL_VERSION_3_2
+        case GL_UNSIGNED_INT_24_8: return "GL_UNSIGNED_INT_24_8";
+        case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: return "GL_FLOAT_32_UNSIGNED_INT_24_8_REV";
+#endif
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "0x%04x", (int)type);
+            return foo;
+        }
+    }
+}
+
+const char *GetGLTargetName( GLenum tgt )
+{
+    if( tgt == GL_TEXTURE_BUFFER ) return "GL_TEXTURE_BUFFER";
+    if( tgt == GL_TEXTURE_1D ) return "GL_TEXTURE_1D";
+    if( tgt == GL_TEXTURE_2D ) return "GL_TEXTURE_2D";
+    if( tgt == GL_TEXTURE_3D ) return "GL_TEXTURE_3D";
+    if( tgt == GL_TEXTURE_RECTANGLE_EXT ) return "GL_TEXTURE_RECTANGLE_EXT";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_X ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_X";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_Y ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_Y";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_Z ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_Z";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_X ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_X";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_Y ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_Y";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_Z ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_Z";
+    if( tgt == GL_TEXTURE_2D_MULTISAMPLE ) return "GL_TEXTURE_2D_MULTISAMPLE";
+    if( tgt == GL_TEXTURE_2D_MULTISAMPLE_ARRAY ) return "GL_TEXTURE_2D_MULTISAMPLE_ARRAY";
+
+    static char foo[ 128 ];
+    sprintf( foo, "0x%04x", (int)tgt);
+    return foo;
+}
+
+const char *GetGLAttachmentName( GLenum att )
+{
+    if( att == GL_COLOR_ATTACHMENT0_EXT ) return "GL_COLOR_ATTACHMENT0_EXT";
+    if( att == GL_COLOR_ATTACHMENT1_EXT ) return "GL_COLOR_ATTACHMENT1_EXT";
+    if( att == GL_COLOR_ATTACHMENT2_EXT ) return "GL_COLOR_ATTACHMENT2_EXT";
+    if( att == GL_COLOR_ATTACHMENT3_EXT ) return "GL_COLOR_ATTACHMENT3_EXT";
+    if( att == GL_COLOR_ATTACHMENT4_EXT ) return "GL_COLOR_ATTACHMENT4_EXT";
+    if( att == GL_COLOR_ATTACHMENT5_EXT ) return "GL_COLOR_ATTACHMENT5_EXT";
+    if( att == GL_COLOR_ATTACHMENT6_EXT ) return "GL_COLOR_ATTACHMENT6_EXT";
+    if( att == GL_COLOR_ATTACHMENT7_EXT ) return "GL_COLOR_ATTACHMENT7_EXT";
+    if( att == GL_COLOR_ATTACHMENT8_EXT ) return "GL_COLOR_ATTACHMENT8_EXT";
+    if( att == GL_DEPTH_ATTACHMENT_EXT ) return "GL_DEPTH_ATTACHMENT_EXT";
+    return "";
+}
+const char *GetGLBaseFormatName( GLenum baseformat )
+{
+    switch( baseformat )
+    {
+        case GL_RGBA8:          return "GL_RGBA";
+        case GL_RGBA16:         return "GL_RGBA";
+        case GL_RGBA:           return "GL_RGBA";
+        case GL_BGRA:           return "GL_BGRA";
+        case GL_RGBA8I_EXT:     return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA16I_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32I_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA8UI_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA16UI_EXT:   return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32UI_EXT:   return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32F_ARB:    return "GL_RGBA";
+
+        case GL_RGBA_INTEGER_EXT:   return "GL_RGBA_INTEGER_EXT";
+
+        case GL_ALPHA4: return "GL_ALPHA";
+        case GL_ALPHA8: return "GL_ALPHA";
+        case GL_ALPHA12: return "GL_ALPHA";
+        case GL_ALPHA16: return "GL_ALPHA";
+        case GL_LUMINANCE4: return "GL_LUMINANCE";
+        case GL_LUMINANCE8: return "GL_LUMINANCE";
+        case GL_LUMINANCE12: return "GL_LUMINANCE";
+        case GL_LUMINANCE16: return "GL_LUMINANCE";
+        case GL_LUMINANCE4_ALPHA4: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE6_ALPHA2: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE8_ALPHA8: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE12_ALPHA4: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE12_ALPHA12: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE16_ALPHA16: return "GL_LUMINANCE_ALPHA";
+        case GL_INTENSITY: return "GL_INTENSITY";
+        case GL_INTENSITY4: return "GL_INTENSITY";
+        case GL_INTENSITY8: return "GL_INTENSITY";
+        case GL_INTENSITY12: return "GL_INTENSITY";
+        case GL_INTENSITY16: return "GL_INTENSITY";
+        case GL_R3_G3_B2: return "GL_RGB";
+        case GL_RGB4: return "GL_RGB";
+        case GL_RGB5: return "GL_RGB";
+        case GL_RGB8: return "GL_RGB";
+        case GL_RGB10: return "GL_RGB";
+        case GL_RGB12: return "GL_RGB";
+        case GL_RGB16: return "GL_RGB";
+        case GL_RGBA2: return "GL_RGBA";
+        case GL_RGBA4: return "GL_RGBA";
+        case GL_RGB5_A1: return "GL_RGBA";
+        case GL_RGB10_A2: return "GL_RGBA";
+        case GL_RGBA12: return "GL_RGBA";
+#ifdef GL_VERSION_3_2
+        case GL_DEPTH_COMPONENT: return "GL_DEPTH_COMPONENT";
+        case GL_DEPTH_COMPONENT16: return "GL_DEPTH_COMPONENT";
+        case GL_DEPTH_COMPONENT24: return "GL_DEPTH_COMPONENT";
+        case GL_DEPTH_COMPONENT32: return "GL_DEPTH_COMPONENT";
+        case GL_DEPTH_COMPONENT32F: return "GL_DEPTH_COMPONENT";
+        case GL_DEPTH_STENCIL:      return "GL_DEPTH_STENCIL";
+        case GL_DEPTH24_STENCIL8:   return "GL_DEPTH_STENCIL";
+        case GL_DEPTH32F_STENCIL8:  return "GL_DEPTH_STENCIL";
+#endif
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "0x%04x", (int)baseformat );
+            return foo;
+        }
+    }
+}
+
+const char *GetGLFormatName( GLenum format )
+{
+    switch( format )
+    {
+        case GL_RGBA8:          return "GL_RGBA8";
+        case GL_RGBA16:         return "GL_RGBA16";
+        case GL_RGBA:           return "GL_RGBA";
+        case GL_BGRA:           return "GL_BGRA";
+        case GL_RGBA8I_EXT:     return "GL_RGBA8I_EXT";
+        case GL_RGBA16I_EXT:    return "GL_RGBA16I_EXT";
+        case GL_RGBA32I_EXT:    return "GL_RGBA32I_EXT";
+        case GL_RGBA8UI_EXT:    return "GL_RGBA8UI_EXT";
+        case GL_RGBA16UI_EXT:   return "GL_RGBA16UI_EXT";
+        case GL_RGBA32UI_EXT:   return "GL_RGBA32UI_EXT";
+        case GL_RGBA16F:        return "GL_RGBA16F";
+        case GL_RGBA32F:        return "GL_RGBA32F";
+
+        case GL_RGBA_INTEGER_EXT:   return "GL_RGBA_INTEGER_EXT";
+
+        case GL_ALPHA4: return "GL_ALPHA4";
+        case GL_ALPHA8: return "GL_ALPHA8";
+        case GL_ALPHA12: return "GL_ALPHA12";
+        case GL_ALPHA16: return "GL_ALPHA16";
+        case GL_LUMINANCE4: return "GL_LUMINANCE4";
+        case GL_LUMINANCE8: return "GL_LUMINANCE8";
+        case GL_LUMINANCE12: return "GL_LUMINANCE12";
+        case GL_LUMINANCE16: return "GL_LUMINANCE16";
+        case GL_LUMINANCE4_ALPHA4: return "GL_LUMINANCE4_ALPHA4";
+        case GL_LUMINANCE6_ALPHA2: return "GL_LUMINANCE6_ALPHA2";
+        case GL_LUMINANCE8_ALPHA8: return "GL_LUMINANCE8_ALPHA8";
+        case GL_LUMINANCE12_ALPHA4: return "GL_LUMINANCE12_ALPHA4";
+        case GL_LUMINANCE12_ALPHA12: return "GL_LUMINANCE12_ALPHA12";
+        case GL_LUMINANCE16_ALPHA16: return "GL_LUMINANCE16_ALPHA16";
+        case GL_INTENSITY: return "GL_INTENSITY";
+        case GL_INTENSITY4: return "GL_INTENSITY4";
+        case GL_INTENSITY8: return "GL_INTENSITY8";
+        case GL_INTENSITY12: return "GL_INTENSITY12";
+        case GL_INTENSITY16: return "GL_INTENSITY16";
+        case GL_R3_G3_B2: return "GL_R3_G3_B2";
+        case GL_RGB4: return "GL_RGB4";
+        case GL_RGB5: return "GL_RGB5";
+        case GL_RGB8: return "GL_RGB8";
+        case GL_RGB10: return "GL_RGB10";
+        case GL_RGB12: return "GL_RGB12";
+        case GL_RGB16: return "GL_RGB16";
+        case GL_RGBA2: return "GL_RGBA2";
+        case GL_RGBA4: return "GL_RGBA4";
+        case GL_RGB5_A1: return "GL_RGB5_A1";
+        case GL_RGB10_A2: return "GL_RGB10_A2";
+        case GL_RGBA12: return "GL_RGBA12";
+
+        case GL_INT:            return "GL_INT";
+        case GL_UNSIGNED_INT:   return "GL_UNSIGNED_INT";
+        case GL_SHORT:          return "GL_SHORT";
+        case GL_UNSIGNED_SHORT: return "GL_UNSIGNED_SHORT";
+        case GL_BYTE:           return "GL_BYTE";
+        case GL_UNSIGNED_BYTE:  return "GL_UNSIGNED_BYTE";
+        case GL_FLOAT:          return "GL_FLOAT";
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:     return "GL_HALF_FLOAT";
+#else
+        case GL_HALF_FLOAT_ARB: return "GL_HALF_FLOAT_ARB";
+#endif
+#ifdef GL_VERSION_3_2
+        case GL_DEPTH_STENCIL:      return "GL_DEPTH_STENCIL";
+        case GL_DEPTH_COMPONENT:    return "GL_DEPTH_COMPONENT";
+        case GL_DEPTH_COMPONENT16:  return "GL_DEPTH_COMPONENT16";
+        case GL_DEPTH_COMPONENT24:  return "GL_DEPTH_COMPONENT24";
+        case GL_DEPTH_COMPONENT32:  return "GL_DEPTH_COMPONENT32";
+        case GL_DEPTH_COMPONENT32F: return "GL_DEPTH_COMPONENT32F";
+        case GL_DEPTH24_STENCIL8:   return "GL_DEPTH24_STENCIL8";
+        case GL_DEPTH32F_STENCIL8:  return "GL_DEPTH32F_STENCIL8";
+#endif
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "0x%04x", (int)format);
+            return foo;
+        }
+    }
+}
+
+void* CreateRandomData( ExplicitType type, size_t count, MTdata d )
+{
+    switch(type)
+    {
+        case (kChar):
+        {
+            cl_char *p = (cl_char *)malloc(count * sizeof(cl_char));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_char)genrand_int32(d);
+            }
+            return (void*)p;
+        }
+        case (kUChar):
+        case (kUnsignedChar):
+        {
+            cl_uchar *p = (cl_uchar *)malloc(count * sizeof(cl_uchar));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] =  (cl_uchar)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kShort):
+        {
+            cl_short *p = (cl_short *)malloc(count * sizeof(cl_short));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_short)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kUShort):
+        case (kUnsignedShort):
+        {
+            cl_ushort *p = (cl_ushort *)malloc(count * sizeof(cl_ushort));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_ushort)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kInt):
+        {
+            cl_int *p = (cl_int *)malloc(count * sizeof(cl_int));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_int)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kUInt):
+        case (kUnsignedInt):
+        {
+            cl_uint *p = (cl_uint *)malloc(count * sizeof(cl_uint));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] =  (cl_uint)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+
+        case (kFloat):
+        {
+            cl_float *p = (cl_float *)malloc(count * sizeof(cl_float));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = get_random_float( 0.f, 1.f, d );
+            }
+
+            return (void*)p;
+        }
+        case (kHalf):
+        {
+            cl_half *p = (cl_half *)malloc(count * sizeof(cl_half));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = convert_float_to_half( get_random_float( 0.f, 1.f, d ) );
+            }
+
+            return (void*)p;
+        }
+        default:
+        {
+            log_error("Invalid explicit type specified for create random data!\n");
+            return 0;
+        }
+    }
+    return 0;
+}
+
+void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer)
+{
+    size_t i;
+    size_t count = width * height;
+    if(type == GL_BYTE)
+    {
+        cl_char* p = (cl_char*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_BYTE)
+    {
+        cl_uchar* p = (cl_uchar*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_INT)
+    {
+        cl_int* p = (cl_int*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_INT)
+    {
+        cl_uint* p = (cl_uint*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_SHORT)
+    {
+        cl_short* p = (cl_short*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_SHORT)
+    {
+        cl_ushort* p = (cl_ushort*)buffer;
+        for(i = 0; i <  count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_FLOAT)
+    {
+        cl_float* p = (cl_float*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %#f %#f %#f %#f\n", (unsigned int)(i),
+               p[i* 4 + 0],
+               p[i* 4 + 1],
+               p[i* 4 + 2],
+               p[i* 4 + 3]);
+    }
+}
+
+#if defined(_WIN32)
+#include <string.h>
+
+GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString)
+{
+  const size_t len = strlen((const char*)extName);
+  const char* str = (const char*)extString;
+
+  while (str != NULL) {
+    str = strstr(str, (const char*)extName);
+    if (str == NULL) {
+      break;
+    }
+    if ((str > (const char*)extString || str[-1] == ' ')
+        && (str[len] == ' ' || str[len] == '\0')) {
+      return GL_TRUE;
+    }
+    str = strchr(str + len, ' ');
+  }
+
+  return GL_FALSE;
+}
+
+#endif
+
+// Function pointers for the GL/CL calls
+clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
+clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
+clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
+clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
+clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
+clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
+clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
+clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
+clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
+
+int init_clgl_ext() {
+
+    // As OpenCL for the platforms.  Warn if more than one platform found,
+    // since this might not be the platform we want.  By default, we simply
+    // use the first returned platform.
+
+    cl_uint nplatforms;
+    cl_platform_id platform;
+    clGetPlatformIDs(0, NULL, &nplatforms);
+    clGetPlatformIDs(1, &platform, NULL);
+
+    if (nplatforms > 1) {
+        log_info("clGetPlatformIDs returned multiple values.  This is not "
+            "an error, but might result in obtaining incorrect function "
+            "pointers if you do not want the first returned platform.\n");
+
+        // Show them the platform name, in case it is a problem.
+
+        size_t size;
+        char *name;
+
+        clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
+        name = (char*)malloc(size);
+        clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
+
+        log_info("Using platform with name: %s \n", name);
+        free(name);
+    }
+
+    // Create the function pointer table
+    clCreateFromGLBuffer_ptr = (clCreateFromGLBuffer_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLBuffer");
+    if (clCreateFromGLBuffer_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLBuffer) returned NULL.\n");
+        return -1;
+    }
+    clCreateFromGLTexture2D_ptr = (clCreateFromGLTexture2D_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLTexture2D");
+    if (clCreateFromGLTexture2D_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLTexture2D) returned NULL.\n");
+        return -1;
+    }
+    clCreateFromGLTexture3D_ptr = (clCreateFromGLTexture3D_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLTexture3D");
+    if (clCreateFromGLTexture3D_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLTexture3D\") returned NULL.\n");
+        return -1;
+    }
+    clCreateFromGLTexture_ptr = (clCreateFromGLTexture_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLTexture");
+    if (clCreateFromGLTexture_ptr == NULL) {
+         log_error("clGetExtensionFunctionAddressForPlatform(platform,\"clCreateFromGLTexture\") returned NULL.\n");
+         return -1;
+    }
+    clCreateFromGLRenderbuffer_ptr = (clCreateFromGLRenderbuffer_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLRenderbuffer");
+    if (clCreateFromGLRenderbuffer_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLRenderbuffer) returned NULL.\n");
+        return -1;
+    }
+    clGetGLObjectInfo_ptr = (clGetGLObjectInfo_fn)clGetExtensionFunctionAddressForPlatform(platform,"clGetGLObjectInfo");
+    if (clGetGLObjectInfo_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clGetGLObjectInfo) returned NULL.\n");
+        return -1;
+    }
+    clGetGLTextureInfo_ptr = (clGetGLTextureInfo_fn)clGetExtensionFunctionAddressForPlatform(platform,"clGetGLTextureInfo");
+    if (clGetGLTextureInfo_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clGetGLTextureInfo) returned NULL.\n");
+        return -1;
+    }
+    clEnqueueAcquireGLObjects_ptr = (clEnqueueAcquireGLObjects_fn)clGetExtensionFunctionAddressForPlatform(platform,"clEnqueueAcquireGLObjects");
+    if (clEnqueueAcquireGLObjects_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clEnqueueAcquireGLObjects) returned NULL.\n");
+        return -1;
+    }
+    clEnqueueReleaseGLObjects_ptr = (clEnqueueReleaseGLObjects_fn)clGetExtensionFunctionAddressForPlatform(platform,"clEnqueueReleaseGLObjects");
+    if (clEnqueueReleaseGLObjects_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clEnqueueReleaseGLObjects) returned NULL.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+GLint get_gl_max_samples( GLenum target, GLenum internalformat )
+{
+    GLint max_samples = 0;
+#ifdef GL_VERSION_4_2
+    glGetInternalformativ(target, internalformat, GL_SAMPLES, 1, &max_samples);
+#else
+    switch (internalformat)
+    {
+        case GL_RGBA8I:
+        case GL_RGBA16I:
+        case GL_RGBA32I:
+        case GL_RGBA8UI:
+        case GL_RGBA16UI:
+        case GL_RGBA32UI:
+            glGetIntegerv(GL_MAX_INTEGER_SAMPLES, &max_samples);
+            break;
+        case GL_DEPTH_COMPONENT16:
+        case GL_DEPTH_COMPONENT32F:
+        case GL_DEPTH24_STENCIL8:
+        case GL_DEPTH32F_STENCIL8:
+            glGetIntegerv(GL_MAX_DEPTH_TEXTURE_SAMPLES, &max_samples);
+            break;
+        default:
+            glGetIntegerv(GL_MAX_COLOR_TEXTURE_SAMPLES, &max_samples);
+            break;
+    }
+#endif
+    return max_samples;
+}
diff --git a/test_common/gl/helpers.h b/test_common/gl/helpers.h
new file mode 100644
index 00000000..ace1d16a
--- /dev/null
+++ b/test_common/gl/helpers.h
@@ -0,0 +1,362 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _helpers_h
+#define _helpers_h
+
+#include "../harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#include "gl_headers.h"
+#include <CL/cl_gl.h>
+#else
+#include "gl_headers.h"
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context     context,
+                          cl_mem_flags   flags,
+                          GLuint         bufobj,
+                          int *          errcode_ret);
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context    context ,
+                           cl_mem_flags  flags ,
+                           GLuint        renderbuffer ,
+                           cl_int *      errcode_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem                 memobj ,
+                  cl_gl_object_type *    gl_object_type ,
+                  GLuint *               gl_object_name) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem                memobj ,
+                   cl_gl_texture_info    param_name ,
+                   size_t                param_value_size ,
+                   void *                param_value ,
+                   size_t *              param_value_size_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+
+extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
+extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
+extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
+extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
+extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
+extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
+extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
+extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
+extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
+
+
+class glBufferWrapper
+{
+    public:
+        glBufferWrapper() { mBuffer = 0; }
+        glBufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
+
+        glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glTextureWrapper
+{
+    public:
+        glTextureWrapper() { mHandle = 0; }
+        glTextureWrapper( GLuint b ) { mHandle = b; }
+        ~glTextureWrapper() {
+         if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
+        }
+
+        glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
+        operator GLuint() { return mHandle; }
+        operator GLuint *() { return &mHandle; }
+
+        GLuint * operator&() { return &mHandle; }
+
+        bool operator==( GLuint rhs ) { return mHandle == rhs; }
+
+    protected:
+
+    // The texture handle.
+        GLuint mHandle;
+};
+
+class glRenderbufferWrapper
+{
+    public:
+        glRenderbufferWrapper() { mBuffer = 0; }
+        glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
+
+        glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glFramebufferWrapper
+{
+    public:
+        glFramebufferWrapper() { mBuffer = 0; }
+        glFramebufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
+
+        glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glVertexArraysWrapper
+{
+public:
+  glVertexArraysWrapper() { mBuffer = 0; }
+  glVertexArraysWrapper( GLuint b ) { mBuffer = b; }
+  ~glVertexArraysWrapper() { if( mBuffer != 0 ) glDeleteVertexArrays( 1, &mBuffer ); }
+
+  glVertexArraysWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+  operator GLuint() { return mBuffer; }
+  operator GLuint *() { return &mBuffer; }
+
+  GLuint * operator&() { return &mBuffer; }
+
+  bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+protected:
+
+  GLuint mBuffer;
+};
+
+class glProgramWrapper
+{
+public:
+  glProgramWrapper() { mProgram = 0; }
+  glProgramWrapper( GLuint b ) { mProgram = b; }
+  ~glProgramWrapper() { if( mProgram != 0 ) glDeleteProgram( mProgram ); }
+
+  glProgramWrapper & operator=( const GLuint &rhs ) { mProgram = rhs; return *this; }
+  operator GLuint() { return mProgram; }
+  operator GLuint *() { return &mProgram; }
+
+  GLuint * operator&() { return &mProgram; }
+
+  bool operator==( GLuint rhs ) { return mProgram == rhs; }
+
+protected:
+
+  GLuint mProgram;
+};
+
+class glShaderWrapper
+{
+public:
+  glShaderWrapper() { mShader = 0; }
+  glShaderWrapper( GLuint b ) { mShader = b; }
+  ~glShaderWrapper() { if( mShader != 0 ) glDeleteShader( mShader ); }
+
+  glShaderWrapper & operator=( const GLuint &rhs ) { mShader = rhs; return *this; }
+  operator GLuint() { return mShader; }
+  operator GLuint *() { return &mShader; }
+
+  GLuint * operator&() { return &mShader; }
+
+  bool operator==( GLuint rhs ) { return mShader == rhs; }
+
+protected:
+
+  GLuint mShader;
+};
+
+// Helper functions (defined in helpers.cpp)
+
+extern void * CreateGLTexture1DArray( size_t width, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTextureBuffer( size_t width,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTexture1D(size_t width,
+                                GLenum target, GLenum glFormat,
+                                GLenum internalFormat, GLenum glType,
+                                ExplicitType type, GLuint *outTextureID,
+                                int *outError, bool allocateMem, MTdata d );
+
+extern void * CreateGLTexture2D( size_t width, size_t height,
+                               GLenum target, GLenum glFormat,
+                               GLenum internalFormat, GLenum glType,
+                               ExplicitType type, GLuint *outTextureID,
+                               int *outError, bool allocateMem, MTdata d );
+
+
+extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
+                                 GLenum target, GLenum glFormat,
+                                 GLenum internalFormat, GLenum glType,
+                                 ExplicitType type, GLuint *outTextureID,
+                                 int *outError, MTdata d, bool allocateMem = true );
+
+#ifdef GL_VERSION_3_2
+extern void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples,
+                                           GLenum target, GLenum glFormat,
+                                           GLenum internalFormat, GLenum glType,
+                                           ExplicitType type, GLuint *outTextureID,
+                                           int *outError, bool allocateMem, MTdata d,
+                                           bool fixedSampleLocations );
+
+extern void * CreateGLTexture2DArrayMultisample( size_t width, size_t height,
+                                                size_t length, size_t samples,
+                                                GLenum target, GLenum glFormat,
+                                                GLenum internalFormat, GLenum glType,
+                                                ExplicitType type, GLuint *outTextureID,
+                                                int *outError, bool allocateMem, MTdata d,
+                                                bool fixedSampleLocations );
+#endif
+
+extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
+                             GLenum glFormat, GLenum glInternalFormat,
+                             GLenum glType, ExplicitType typeToReadAs,
+                             size_t outWidth, size_t outHeight );
+
+extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
+                                   GLenum target, GLenum glFormat,
+                                   GLenum internalFormat, GLenum glType,
+                                   GLuint *outFramebuffer,
+                                   GLuint *outRenderbuffer );
+
+extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
+                                    GLenum target, GLenum glFormat,
+                                    GLenum internalFormat, GLenum glType,
+                                    ExplicitType type,
+                                    GLuint *outFramebuffer,
+                                    GLuint *outRenderbuffer,
+                                    int *outError, MTdata d, bool allocateMem );
+
+extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
+                                  GLenum attachment, GLenum glFormat,
+                                  GLenum glInternalFormat, GLenum glType,
+                                  ExplicitType typeToReadAs,
+                                  size_t outWidth, size_t outHeight );
+
+extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
+extern const char *GetGLTypeName( GLenum type );
+extern const char *GetGLAttachmentName( GLenum att );
+extern const char *GetGLTargetName( GLenum tgt );
+extern const char *GetGLBaseFormatName( GLenum baseformat );
+extern const char *GetGLFormatName( GLenum format );
+
+extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
+
+extern GLenum GetGLFormat(GLenum internalFormat);
+extern GLenum GetGLTypeForExplicitType(ExplicitType type);
+extern size_t GetGLTypeSize(GLenum type);
+extern ExplicitType GetExplicitTypeForGLType(GLenum type);
+
+extern GLenum get_base_gl_target( GLenum target );
+
+extern int init_clgl_ext( void );
+
+extern GLint get_gl_max_samples( GLenum target, GLenum internalformat );
+
+#endif // _helpers_h
+
+
+
diff --git a/test_common/gl/setup.h b/test_common/gl/setup.h
new file mode 100644
index 00000000..6ee810bb
--- /dev/null
+++ b/test_common/gl/setup.h
@@ -0,0 +1,48 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _setup_h
+#define _setup_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gl_headers.h"
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+
+// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
+// subclass internally, then return it as a definition for GLEnvironment::Create
+
+class GLEnvironment
+{
+    public:
+        GLEnvironment() {}
+        virtual ~GLEnvironment() {}
+
+         virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
+        virtual cl_context CreateCLContext( void ) = 0;
+        virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
+
+        static GLEnvironment *    Instance( void );
+
+
+};
+
+#endif // _setup_h
diff --git a/test_common/gl/setup_osx.cpp b/test_common/gl/setup_osx.cpp
new file mode 100644
index 00000000..01641be0
--- /dev/null
+++ b/test_common/gl/setup_osx.cpp
@@ -0,0 +1,156 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "setup.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include <OpenGL/CGLDevice.h>
+
+class OSXGLEnvironment : public GLEnvironment
+{
+    public:
+        OSXGLEnvironment()
+        {
+      mCGLContext = NULL;
+        }
+
+  virtual int Init( int *argc, char **argv, int use_opengl_32 )
+        {
+      if (!use_opengl_32) {
+
+        // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+      }
+
+      else {
+
+        CGLPixelFormatAttribute attribs[] = {
+          kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
+          kCGLPFAAllowOfflineRenderers,
+          kCGLPFANoRecovery,
+          kCGLPFAAccelerated,
+          kCGLPFADoubleBuffer,
+          (CGLPixelFormatAttribute)0
+        };
+
+        CGLError err;
+        CGLPixelFormatObj pix;
+        GLint npix;
+        err = CGLChoosePixelFormat (attribs, &pix, &npix);
+        if(err != kCGLNoError)
+          {
+            log_error("Failed to choose pixel format\n");
+            return -1;
+          }
+        err = CGLCreateContext(pix, NULL, &mCGLContext);
+        if(err != kCGLNoError)
+          {
+            log_error("Failed to create GL context\n");
+            return -1;
+          }
+        CGLSetCurrentContext(mCGLContext);
+      }
+
+            return 0;
+        }
+
+        virtual cl_context CreateCLContext( void )
+    {
+      int error;
+
+      if( mCGLContext == NULL )
+        mCGLContext = CGLGetCurrentContext();
+
+      CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
+      cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
+      cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
+      if (error) {
+        print_error(error, "clCreateContext failed");
+        return NULL;
+      }
+
+      // Verify that all devices in the context support the required extension
+      cl_device_id devices[64];
+      size_t size_out;
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
+      if (error) {
+        print_error(error, "clGetContextInfo failed");
+        return NULL;
+      }
+
+      char extensions[8192];
+      for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        if (error) {
+          print_error(error, "clGetDeviceInfo failed");
+          return NULL;
+        }
+
+        if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
+          log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
+          return NULL;
+        }
+      }
+      return context;
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+      int found_valid_device = 0;
+      cl_device_id devices[64];
+      cl_uint num_of_devices;
+      int error;
+      error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
+      if (error) {
+        print_error(error, "clGetDeviceIDs failed");
+        return -1;
+      }
+
+      char extensions[8192];
+      for (int i=0; i<(int)num_of_devices; i++) {
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        if (error) {
+          print_error(error, "clGetDeviceInfo failed");
+          return -1;
+        }
+
+        if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
+          log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
+        } else {
+          log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
+          found_valid_device = 1;
+        }
+      }
+            return found_valid_device;
+    }
+
+        virtual ~OSXGLEnvironment()
+        {
+            CGLDestroyContext( mCGLContext );
+        }
+
+        CGLContextObj mCGLContext;
+
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static OSXGLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new OSXGLEnvironment();
+    return env;
+}
diff --git a/test_common/gl/setup_win32.cpp b/test_common/gl/setup_win32.cpp
new file mode 100644
index 00000000..1c7b4b2f
--- /dev/null
+++ b/test_common/gl/setup_win32.cpp
@@ -0,0 +1,209 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define GL_GLEXT_PROTOTYPES
+
+#include "setup.h"
+#include "testBase.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#include <GL/gl.h>
+#include <GL/glut.h>
+#include <GL/glext.h>
+#include <GL/glut.h>
+#include <CL/cl_ext.h>
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
+    const cl_context_properties *properties,
+    cl_gl_context_info param_name,
+    size_t param_value_size,
+    void *param_value,
+    size_t *param_value_size_ret);
+
+// Rename references to this dynamically linked function to avoid
+// collision with static link version
+#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
+static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
+
+#define MAX_DEVICES 32
+
+class WGLEnvironment : public GLEnvironment
+{
+private:
+    cl_device_id m_devices[MAX_DEVICES];
+    int m_device_count;
+    cl_platform_id m_platform;
+    bool m_is_glut_init;
+
+public:
+    WGLEnvironment()
+    {
+        m_device_count = 0;
+        m_platform = 0;
+        m_is_glut_init = false;
+    }
+    virtual int Init( int *argc, char **argv, int use_opengl_32 )
+    {
+        if (!m_is_glut_init)
+        {
+            // Create a GLUT window to render into
+            glutInit( argc, argv );
+            glutInitWindowSize( 512, 512 );
+            glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+            glutCreateWindow( "OpenCL <-> OpenGL Test" );
+            glewInit();
+            m_is_glut_init = true;
+        }
+        return 0;
+    }
+
+    virtual cl_context CreateCLContext( void )
+    {
+        HGLRC hGLRC = wglGetCurrentContext();
+        HDC hDC = wglGetCurrentDC();
+        cl_context_properties properties[] = {
+            CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
+            CL_GL_CONTEXT_KHR,   (cl_context_properties) hGLRC,
+            CL_WGL_HDC_KHR,      (cl_context_properties) hDC,
+            0
+        };
+        cl_device_id devices[MAX_DEVICES];
+        size_t dev_size;
+        cl_int status;
+
+        if (!hGLRC || !hDC) {
+            print_error(CL_INVALID_CONTEXT, "No GL context bound");
+            return 0;
+        }
+
+        if (!clGetGLContextInfoKHR) {
+            // As OpenCL for the platforms.  Warn if more than one platform found,
+            // since this might not be the platform we want.  By default, we simply
+            // use the first returned platform.
+
+            cl_uint nplatforms;
+            cl_platform_id platform;
+            clGetPlatformIDs(0, NULL, &nplatforms);
+            clGetPlatformIDs(1, &platform, NULL);
+
+            if (nplatforms > 1) {
+                log_info("clGetPlatformIDs returned multiple values.  This is not "
+                    "an error, but might result in obtaining incorrect function "
+                    "pointers if you do not want the first returned platform.\n");
+
+                // Show them the platform name, in case it is a problem.
+
+                size_t size;
+                char *name;
+
+                clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
+                name = (char*)malloc(size);
+                clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
+
+                log_info("Using platform with name: %s \n", name);
+                free(name);
+            }
+
+            clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
+            if (!clGetGLContextInfoKHR) {
+                print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
+            }
+        }
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_DEVICES_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return 0;
+        }
+        dev_size /= sizeof(cl_device_id);
+        log_info("GL context supports %d compute devices\n", dev_size);
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return 0;
+        }
+
+        cl_device_id ctxDevice = m_devices[0];
+        if (dev_size > 0) {
+            log_info("GL context current device: 0x%x\n", devices[0]);
+            for (int i = 0; i < m_device_count; i++) {
+                if (m_devices[i] == devices[0]) {
+                    ctxDevice = devices[0];
+                    break;
+                }
+            }
+        } else {
+            log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
+        }
+
+        return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+        cl_device_id devices[MAX_DEVICES];
+        cl_uint num_of_devices;
+        int error;
+        error = clGetPlatformIDs(1, &m_platform, NULL);
+        if (error) {
+            print_error(error, "clGetPlatformIDs failed");
+            return -1;
+        }
+        error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
+        if (error) {
+            print_error(error, "clGetDeviceIDs failed");
+            return -1;
+        }
+
+        // Check all devices, search for one that supports cl_khr_gl_sharing
+        char extensions[8192];
+        for (int i=0; i<(int)num_of_devices; i++) {
+            error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+            if (error) {
+                print_error(error, "clGetDeviceInfo failed");
+                return -1;
+            }
+
+            if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
+                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            } else {
+                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+                m_devices[m_device_count++] = devices[i];
+            }
+        }
+        return m_device_count > 0;
+    }
+
+    virtual ~WGLEnvironment()
+    {
+    }
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static WGLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new WGLEnvironment();
+    return env;
+}
diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp
new file mode 100644
index 00000000..22bc7eed
--- /dev/null
+++ b/test_common/gl/setup_x11.cpp
@@ -0,0 +1,122 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define GL_GLEXT_PROTOTYPES
+
+#include "setup.h"
+#include "testBase.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#include <GL/gl.h>
+#include <GL/glut.h>
+#include <GL/glext.h>
+#include <GL/freeglut.h>
+#include <GL/glx.h>
+#include <CL/cl_ext.h>
+
+class X11GLEnvironment : public GLEnvironment
+{
+private:
+    cl_device_id m_devices[64];
+    cl_uint m_device_count;
+
+public:
+    X11GLEnvironment()
+    {
+        m_device_count = 0;
+    }
+    virtual int Init( int *argc, char **argv, int use_opencl_32 )
+    {
+         // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+        glewInit();
+        return 0;
+    }
+
+    virtual cl_context CreateCLContext( void )
+    {
+        GLXContext context = glXGetCurrentContext();
+        Display *dpy = glXGetCurrentDisplay();
+
+        cl_context_properties properties[] = {
+            CL_GL_CONTEXT_KHR,  (cl_context_properties) context,
+            CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
+            0
+        };
+        cl_int status;
+
+        if (!context || !dpy) {
+            print_error(CL_INVALID_CONTEXT, "No GL context bound");
+            return 0;
+        }
+
+        return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+        int found_valid_device = 0;
+        cl_platform_id platform;
+        cl_device_id devices[64];
+        cl_uint num_of_devices;
+        int error;
+        error = clGetPlatformIDs(1, &platform, NULL);
+        if (error) {
+            print_error(error, "clGetPlatformIDs failed");
+            return -1;
+        }
+        error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
+        // If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
+        if (error == CL_DEVICE_NOT_FOUND)
+          return 0;
+        if (error) {
+            print_error(error, "clGetDeviceIDs failed");
+            return -1;
+        }
+
+        char extensions[8192];
+        for (int i=0; i<(int)num_of_devices; i++) {
+            error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+            if (error) {
+                print_error(error, "clGetDeviceInfo failed");
+                return -1;
+            }
+
+            if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
+                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            } else {
+                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+                found_valid_device = 1;
+                m_devices[m_device_count++] = devices[i];
+            }
+        }
+        return found_valid_device;
+    }
+
+    virtual ~X11GLEnvironment()
+    {
+    }
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static X11GLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new X11GLEnvironment();
+    return env;
+}
diff --git a/test_common/gles/gl_headers.h b/test_common/gles/gl_headers.h
new file mode 100644
index 00000000..849da719
--- /dev/null
+++ b/test_common/gles/gl_headers.h
@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _gl_headers_h
+#define _gl_headers_h
+
+#define GL_GLEXT_PROTOTYPES 1
+
+#include <EGL/egl.h>
+
+#ifdef GLES3
+#include <GLES3/gl3.h>
+#else
+#include <GLES2/gl2.h>
+#endif
+
+#include <GLES2/gl2ext.h>
+#include <GLES2/gl2extQCOM.h>
+
+// Some macros to minimize the changes in the tests from GL to GLES2
+#define glGenRenderbuffersEXT        glGenRenderbuffers
+#define glDeleteRenderbuffersEXT     glDeleteRenderbuffers
+#define glBindRenderbufferEXT        glBindRenderbuffer
+#define glRenderbufferStorageEXT     glRenderbufferStorage
+#define glGetRenderbufferParameterivEXT glGetRenderbufferParameteriv
+#define glCheckFramebufferStatusEXT  glCheckFramebufferStatus
+#define glGenFramebuffersEXT         glGenFramebuffers
+#define glDeleteFramebuffersEXT      glDeleteFramebuffers
+#define glBindFramebufferEXT         glBindFramebuffer
+#define glFramebufferRenderbufferEXT glFramebufferRenderbuffer
+#define glTexImage3D                 glTexImage3DOES
+#define glutGetProcAddress           eglGetProcAddress
+
+#define GL_FRAMEBUFFER_EXT           GL_FRAMEBUFFER
+#define GL_FRAMEBUFFER_COMPLETE_EXT  GL_FRAMEBUFFER_COMPLETE
+#define GL_RENDERBUFFER_INTERNAL_FORMAT_EXT GL_RENDERBUFFER_INTERNAL_FORMAT
+#define GL_RENDERBUFFER_EXT          GL_RENDERBUFFER
+#define GL_COLOR_ATTACHMENT0_EXT     GL_COLOR_ATTACHMENT0
+#define GL_DEPTH_ATTACHMENT_EXT      GL_DEPTH_ATTACHMENT
+#define GL_TEXTURE_3D                GL_TEXTURE_3D_OES
+#define GL_READ_ONLY                 GL_BUFFER_ACCESS_OES
+
+#define GL_HALF_FLOAT_ARB            GL_HALF_FLOAT_OES
+#define GL_BGRA                      GL_BGRA_EXT
+#define GL_RGBA32F_ARB               GL_RGBA
+
+typedef unsigned short GLhalf;
+
+GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
+
+#endif    // __gl_headers_h
+
diff --git a/test_common/gles/helpers.cpp b/test_common/gles/helpers.cpp
new file mode 100644
index 00000000..188f9039
--- /dev/null
+++ b/test_common/gles/helpers.cpp
@@ -0,0 +1,1282 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "helpers.h"
+
+#include "gl_headers.h"
+
+#define CHECK_ERROR()\
+    {GLint __error = glGetError(); if(__error) {log_error( "GL ERROR: %s!\n", gluErrorString( err ));}}
+
+#if defined(__linux__) || defined(GL_ES_VERSION_2_0)
+// On linux we dont link to GLU library to avoid comaptibility issues with
+// libstdc++
+// FIXME: Implement this
+const GLubyte* gluErrorString (GLenum error)
+{
+    const char* gl_Error = "OpenGL Error";
+    return (const GLubyte*)gl_Error;
+}
+#endif
+
+static void DrawQuad(void);
+
+void * CreateGLTexture2D( size_t width, size_t height,
+                        GLenum target, GLenum glFormat,
+                        GLenum internalFormat, GLenum glType,
+                        ExplicitType type, GLuint *outTextureID,
+                        int *outError, bool allocateMem, MTdata d )
+{
+    *outError = 0;
+    GLenum err = 0;
+
+    char * buffer = (char *)CreateRandomData(type, width * height * 4, d);
+
+    glGenTextures( 1, outTextureID );
+    glBindTexture( get_base_gl_target( target ), *outTextureID );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Failed to create GL texture object: %s!\n", gluErrorString( err ));
+        *outError = -1;
+        free( buffer );
+        return NULL;
+    }
+
+#ifndef GL_ES_VERSION_2_0
+    glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+#endif
+    glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+    if( get_base_gl_target( target ) == GL_TEXTURE_CUBE_MAP )
+    {
+        char * temp = (char *)malloc(width * height * 4 * get_explicit_type_size( type ) * sizeof(cl_char));
+        if(allocateMem)
+            memcpy( temp, buffer, width * height * 4 * get_explicit_type_size( type ) );
+        else
+            memset( temp, 0, width * height * 4 * get_explicit_type_size( type ) );
+
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        free(temp);
+    }
+    else
+    {
+#ifdef GLES_DEBUG
+        log_info("- glTexImage2D : %s : %s : %d : %d : %s : %s\n",
+            GetGLTargetName(target),
+            GetGLFormatName(internalFormat),
+            width, height,
+            GetGLFormatName(glFormat),
+            GetGLTypeName(glType));
+
+        DumpGLBuffer(glType, width, height, buffer);
+
+#endif
+        glTexImage2D( get_base_gl_target(target), 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, buffer );
+    }
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        /**  In section 9.8.3.1. of the CL 1.1. spec it says that:
+          *
+          *     If a GL texture object with an internal format from table 9.4 is successfully created by
+          *     OpenGL, then there is guaranteed to be a mapping to one of the corresponding CL image
+          *     format(s) in that table.
+          *
+          *  Notice that some of the formats in table 9.4 are not supported in OpenGL ES 2.0.
+          */
+        log_info( "Warning: Skipping %s : %s : %d : %d : %s : %s : because glTexImage2D returned %s\n",
+            GetGLTargetName(target),
+            GetGLFormatName(internalFormat),
+            (int)(width), (int)(height),
+            GetGLFormatName(glFormat),
+            GetGLTypeName(glType),
+            gluErrorString( err ));
+
+        glDeleteTextures( 1, outTextureID );
+        *outTextureID = 0;
+        *outError = 0;
+        free( buffer );
+        err = glGetError();
+        return NULL;
+    }
+
+#ifdef GLES_DEBUG
+    memset(buffer, 0, width * height * 4 * get_explicit_type_size( type ));
+
+    log_info("- glGetTexImage : %s : %s : %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType));
+
+    glGetTexImage(target, 0, glFormat, glType, buffer);
+
+    DumpGLBuffer(type, width, height, buffer);
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to read data from glGetTexImage : %s : %s : %s : Error %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType),
+        gluErrorString( err ));
+        return NULL;
+    }
+#endif
+
+    if( !allocateMem )
+    {
+        free( buffer );
+        return NULL;
+    }
+
+#ifndef GL_ES_VERSION_2_0
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < width * height; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+#endif
+
+    return buffer;
+}
+
+void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
+                          GLenum target, GLenum glFormat,
+                          GLenum internalFormat, GLenum glType,
+                          ExplicitType type, GLuint *outTextureID,
+                          int *outError, MTdata d, bool allocateMem)
+{
+    *outError = 0;
+
+    char * buffer = (char *)create_random_data( type, d, width * height * depth * 4 );
+
+    if( type == kFloat && allocateMem )
+    {
+        // Re-fill the created buffer to just have [0-1] floats, since that's what it'd expect
+        cl_float *p = (cl_float *)buffer;
+        for( size_t i = 0; i < width * height * depth * 4; i++ )
+        {
+            p[ i ] = (float) genrand_real1( d );
+        }
+    }
+    else if( !allocateMem )
+        memset( buffer, 0, width * height * depth * 4 * get_explicit_type_size( type ) );
+
+    glGenTextures( 1, outTextureID );
+
+    glBindTexture( target, *outTextureID );
+#ifndef GL_ES_VERSION_2_0
+    glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+#endif
+    glTexParameteri( target, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( target, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+    glGetError();
+    glTexImage3D( target, 0, internalFormat, (GLsizei)width, (GLsizei)height, (GLsizei)depth, 0, glFormat, glType, buffer );
+    GLenum err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        /**  In section 9.8.3.1. of the CL 1.1. spec it says that:
+          *
+          *     If a GL texture object with an internal format from table 9.4 is successfully created by
+          *     OpenGL, then there is guaranteed to be a mapping to one of the corresponding CL image
+          *     format(s) in that table.
+          *
+          *  Notice that some of the formats in table 9.4 are not supported in OpenGL ES 2.0.
+          */
+        log_info( "Warning: Skipping %s : %s : %d : %d : %s : %s : because glTexImage3D returned %s\n",
+            GetGLTargetName(target),
+            GetGLFormatName(internalFormat),
+            (int)(width), (int)(height),
+            GetGLFormatName(glFormat),
+            GetGLTypeName(glType),
+            gluErrorString( err ));
+
+        *outError = 0;
+        delete[] buffer;
+        return NULL;
+    }
+
+    if( !allocateMem )
+    {
+        delete [] buffer;
+        return NULL;
+    }
+
+#ifndef GL_ES_VERSION_2_0
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < width * height * depth; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+#endif
+
+    return buffer;
+}
+
+void * ReadGLTexture( GLenum glTarget, GLuint glTexture,
+                        GLenum glFormat, GLenum glInternalFormat,
+                        GLenum glType, ExplicitType typeToReadAs,
+                        size_t outWidth, size_t outHeight )
+{
+    // Read results from the GL texture
+    glBindTexture(get_base_gl_target(glTarget), glTexture);
+
+    GLint realWidth, realHeight;
+    GLint realInternalFormat;
+    GLenum readBackFormat = GL_RGBA;
+    GLenum readBackType = glType;
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    size_t outBytes = outWidth * outHeight * 4 * GetGLTypeSize(readBackType);
+    cl_char *outBuffer = (cl_char *)malloc( outBytes );
+    GLenum err = 0;
+
+    memset(outBuffer, 0, outBytes);
+    glGenFramebuffersEXT( 1, &glFramebuffer );
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, glFramebuffer );
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, glTarget, glTexture, 0);
+    err = glGetError();
+    if (err != GL_NO_ERROR)
+    {
+        log_error("Failed to attach texture to FBO!\n");
+        return NULL;
+    }
+
+    glReadPixels( 0, 0, (GLsizei)outWidth, (GLsizei)outHeight, readBackFormat, readBackType, outBuffer );
+
+#ifdef GLES_DEBUG
+
+    log_info( "- glGetTexImage: %s : %s : %s \n",
+        GetGLTargetName( glTarget),
+        GetGLFormatName(readBackFormat),
+        GetGLTypeName(readBackType));
+
+    DumpGLBuffer(readBackType, realWidth, realHeight, (void*)outBuffer);
+
+#endif
+
+    return (void *)outBuffer;
+}
+
+int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
+                            GLenum attachment,
+                            GLenum rbFormat, GLenum rbType,
+                            GLuint *outFramebuffer,
+                            GLuint *outRenderbuffer )
+{
+    GLenum err = 0;
+
+    // Generate a renderbuffer and bind
+    glGenRenderbuffersEXT( 1, outRenderbuffer );
+    glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, *outRenderbuffer );
+
+    // Allocate storage to the renderbuffer
+    glGetError();
+    glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, rbFormat, (GLsizei)width,  (GLsizei)height );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error("Failed to allocate render buffer storage!\n");
+        return 1701;
+    }
+
+    GLint realInternalFormat;
+    glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &realInternalFormat );
+    rbFormat = realInternalFormat;
+
+#ifdef GLES_DEBUG
+    GLint rsize, gsize, bsize, asize;
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_RED_SIZE_EXT,&rsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_GREEN_SIZE_EXT,&gsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_BLUE_SIZE_EXT,&bsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_ALPHA_SIZE_EXT,&asize);
+
+    log_info("Renderbuffer internal format requested: %s actual: %s sizes: r=%d g=%d b=%d a=%d\n",
+             GetGLFormatName( internalFormat ), GetGLFormatName( realInternalFormat ),
+             rsize, gsize, bsize, asize );
+#endif
+
+    // Create and bind a framebuffer to render with
+    glGenFramebuffersEXT( 1, outFramebuffer );
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, *outFramebuffer );
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to bind framebuffer : Error %s\n",
+                  gluErrorString( err ));
+
+        return -1;
+    }
+
+    // Attach to the framebuffer
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, *outRenderbuffer );
+    err = glGetError();
+    GLint status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
+    if( status != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s, status %x)\n", gluErrorString( err ), (int)status );
+        return -1;
+    }
+
+    return 0;
+}
+
+static void DrawQuad(void)
+{
+    const char *vssrc =
+        "varying   mediump vec2 texCoord;\n"
+        "attribute vec2 inPosition;\n"
+        "void main() {\n"
+        "    texCoord    = vec2((inPosition.x+1.0)/2.0, (inPosition.y+1.0)/2.0);\n"
+        "    gl_Position = vec4(inPosition.x, inPosition.y, 0.0, 1.0);\n"
+        "}\n";
+    const char *fssrc =
+        "uniform sampler2D tex;\n"
+        "varying mediump vec2      texCoord;\n"
+        "void main() {\n"
+        "    gl_FragColor =  texture2D(tex, texCoord);\n"
+        "}\n";
+    GLuint vs, fs, program;
+    GLuint positionIdx = 0;
+    GLfloat x1 = -1.0f, x2 = 1.0f, y1 = -1.0f, y2 = 1.0f;
+    GLfloat vertices[4][2];
+    vertices[0][0] = x1; vertices[0][1] = y1;
+    vertices[1][0] = x2; vertices[1][1] = y1;
+    vertices[2][0] = x1; vertices[2][1] = y2;
+    vertices[3][0] = x2; vertices[3][1] = y2;
+
+    vs = glCreateShader(GL_VERTEX_SHADER);
+    fs = glCreateShader(GL_FRAGMENT_SHADER);
+
+    glShaderSource(vs, 1, &vssrc, NULL);
+    glShaderSource(fs, 1, &fssrc, NULL);
+
+    glCompileShader(vs);
+    glCompileShader(fs);
+
+    program = glCreateProgram();
+    glAttachShader(program, vs);
+    glAttachShader(program, fs);
+    glLinkProgram(program);
+    glUseProgram(program);
+
+    positionIdx = glGetAttribLocation(program, "inPosition");
+    glEnableVertexAttribArray(positionIdx);
+    glVertexAttribPointer(positionIdx, 2, GL_FLOAT, GL_FALSE, 0, vertices);
+    glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+
+    glUseProgram(0);
+    glDeleteProgram(program);
+    glDeleteShader(vs);
+    glDeleteShader(fs);
+}
+
+void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
+                             GLenum attachment,
+                             GLenum rbFormat, GLenum rbType,
+                             GLenum texFormat, GLenum texType,
+                             ExplicitType type,
+                             GLuint *outFramebuffer,
+                             GLuint *outRenderbuffer,
+                             int *outError, MTdata d, bool allocateMem )
+{
+    *outError = CreateGLRenderbufferRaw( width, height, attachment, rbFormat, rbType, outFramebuffer, outRenderbuffer );
+
+    if( *outError != 0 )
+        return NULL;
+
+    GLenum err = 0;
+
+    // Generate a renderbuffer and bind
+    glGenRenderbuffersEXT( 1, outRenderbuffer );
+    glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, *outRenderbuffer );
+
+    // Allocate storage to the renderbuffer
+    glGetError();
+    glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, rbFormat, (GLsizei)width,  (GLsizei)height );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        *outError = 1701;
+        log_error("Failed to allocate render buffer storage!\n");
+        return NULL;
+    }
+
+    GLint realInternalFormat;
+    glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &realInternalFormat );
+    rbFormat = realInternalFormat;
+
+#ifdef GLES_DEBUG
+    GLint rsize, gsize, bsize, asize;
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_RED_SIZE_EXT,&rsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_GREEN_SIZE_EXT,&gsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_BLUE_SIZE_EXT,&bsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_ALPHA_SIZE_EXT,&asize);
+
+    log_info("Renderbuffer internal format requested: %s actual: %s sizes: r=%d g=%d b=%d a=%d\n",
+              GetGLFormatName( internalFormat ), GetGLFormatName( realInternalFormat ),
+              rsize, gsize, bsize, asize );
+#endif
+
+    // Create and bind a framebuffer to render with
+    glGenFramebuffersEXT( 1, outFramebuffer );
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, *outFramebuffer );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to bind framebuffer : Error %s\n",
+                  gluErrorString( err ));
+
+        *outError = -1;
+        return NULL;
+    }
+
+    // Attach to the framebuffer
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, *outRenderbuffer );
+    CHECK_ERROR();
+    GLint status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
+    if( status != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        *outError = -1;
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s, status %x)\n", gluErrorString( err ), (int)status );
+        return NULL;
+    }
+
+    void* buffer = CreateRandomData(type, width * height * 4, d);
+
+#ifdef GLES_DEBUG
+    log_info( "- Fillling renderbuffer: %d : %d : %s : %s \n",
+             (int)width, (int)height,
+             GetGLFormatName(glFormat),
+             GetGLTypeName(glType));
+
+    DumpGLBuffer(glType, (int)width, (int)height, (void*)buffer);
+#endif
+
+    CHECK_ERROR();
+
+    // Fill a texture with our input data
+    glTextureWrapper texture;
+    glGenTextures( 1, &texture );
+    glBindTexture( GL_TEXTURE_2D, texture );
+    CHECK_ERROR();
+    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
+    glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
+    CHECK_ERROR();
+    glTexImage2D( GL_TEXTURE_2D, 0, texFormat, width, height, 0, texFormat, texType, buffer );
+    CHECK_ERROR();
+
+    // Render fullscreen textured quad
+    glViewport(0, 0, width, height);
+    DrawQuad();
+    CHECK_ERROR();
+
+    // Read back the data in the renderbuffer
+    memset(buffer, 0, width * height * 4 * get_explicit_type_size( type ));
+    glReadPixels( 0, 0, (GLsizei)width, (GLsizei)height, texFormat, texType, buffer );
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to read data via glReadPixels : %d : %d : %s : %s : Error %s\n",
+                  (int)width, (int)height,
+                  GetGLFormatName(texFormat),
+                  GetGLTypeName(texType),
+                  gluErrorString( err ));
+        *outError = -1;
+    }
+
+#ifdef GLES_DEBUG
+    log_info( "- glReadPixels: %d : %d : %s : %s \n",
+             (int)width, (int)height,
+             GetGLFormatName(glFormat),
+             GetGLTypeName(glType));
+
+    DumpGLBuffer(glType, (int)width, (int)height, (void*)buffer);
+#endif
+
+    if( !allocateMem )
+    {
+        free( buffer );
+        return NULL;
+    }
+
+#ifndef GL_ES_VERSION_2_0
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < (size_t)width * height; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+#endif
+
+    return buffer;
+}
+
+void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
+                           GLenum attachment,
+                           GLenum rbFormat, GLenum rbType,
+                           GLenum texFormat, GLenum texType,
+                           ExplicitType typeToReadAs,
+                           size_t outWidth, size_t outHeight )
+{
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, glFramebuffer );
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, glRenderbuffer );
+
+    // Attach to the framebuffer
+    GLint err = glGetError();
+    if( glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT ) != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s)\n", gluErrorString( err ) );
+        return NULL;
+    }
+
+    // Read results from the GL renderbuffer
+#ifdef GLES_DEBUG
+    log_info( "- Reading back from GL: %d x %d : %s : %s : %s\n",
+             (int)outWidth, (int)outHeight,
+             GetGLFormatName( glInternalFormat ),
+             GetGLFormatName( glFormat ),
+             GetGLTypeName( glType ));
+#endif
+
+    GLenum readBackFormat = GL_RGBA;
+    GLenum readBackType = texType;
+
+    size_t outBytes = outWidth * outHeight * 4 * GetGLTypeSize(readBackType);
+    void *outBuffer = malloc( outBytes );
+    memset(outBuffer, 0, outBytes);
+
+    glReadPixels( 0, 0, (GLsizei)outWidth, (GLsizei)outHeight, readBackFormat, readBackType, outBuffer );
+
+#ifdef GLES_DEBUG
+    log_info( "- glReadPixels: %d : %d : %s : %s \n",
+             (int)outWidth, (int)outHeight,
+             GetGLFormatName(readBackFormat),
+             GetGLTypeName(readBackType));
+
+    DumpGLBuffer(readBackType, outWidth, outHeight, outBuffer);
+#endif
+
+    return (void *)outBuffer;
+}
+
+GLenum
+GetGLFormat(GLenum internalFormat)
+{
+    GLenum glFormat;
+    switch (internalFormat)
+    {
+        case GL_BGRA:
+#ifndef GL_ES_VERSION_2_0
+        case GL_RGBA8:
+        case GL_RGBA16:
+        case GL_RGBA32F_ARB:
+#endif
+            glFormat = GL_RGBA;
+            break;
+#ifndef GL_ES_VERSION_2_0
+        case GL_RGBA8I_EXT:
+        case GL_RGBA16I_EXT:
+        case GL_RGBA32I_EXT:
+        case GL_RGBA8UI_EXT:
+        case GL_RGBA16UI_EXT:
+        case GL_RGBA32UI_EXT:
+            glFormat = GL_RGBA_INTEGER_EXT;
+            break;
+#endif
+        default:
+            glFormat = GL_RGBA;
+            break;
+    }
+
+    return glFormat;
+}
+
+GLenum GetGLTypeForExplicitType(ExplicitType type)
+{
+    switch( type )
+    {
+        case kFloat:
+            return GL_FLOAT;
+        case kInt:
+            return GL_INT;
+        case kUInt:
+            return GL_UNSIGNED_INT;
+        case kShort:
+            return GL_SHORT;
+        case kUShort:
+            return GL_UNSIGNED_SHORT;
+        case kChar:
+            return GL_BYTE;
+        case kUChar:
+            return GL_UNSIGNED_BYTE;
+        case kHalf:
+#if defined( __APPLE__ )
+            return GL_HALF_FLOAT;
+#else
+            return GL_HALF_FLOAT_ARB;
+#endif
+        default:
+            return GL_INT;
+    };
+}
+
+size_t GetGLTypeSize(GLenum type)
+{
+    switch( type )
+    {
+        case GL_FLOAT:
+            return sizeof(GLfloat);
+        case GL_INT:
+            return sizeof(GLint);
+        case GL_UNSIGNED_INT:
+            return sizeof(GLuint);
+        case GL_SHORT:
+            return sizeof(GLshort);
+        case GL_UNSIGNED_SHORT:
+            return sizeof(GLushort);
+        case GL_BYTE:
+            return sizeof(GLbyte);
+        case GL_UNSIGNED_BYTE:
+            return sizeof(GLubyte);
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:
+#else
+        case GL_HALF_FLOAT_ARB:
+#endif
+            return sizeof(GLhalf);
+        default:
+            return kFloat;
+    };
+}
+
+ExplicitType GetExplicitTypeForGLType(GLenum type)
+{
+    switch( type )
+    {
+        case GL_FLOAT:
+            return kFloat;
+        case GL_INT:
+            return kInt;
+        case GL_UNSIGNED_INT:
+            return kUInt;
+        case GL_SHORT:
+            return kShort;
+        case GL_UNSIGNED_SHORT:
+            return kUShort;
+        case GL_BYTE:
+            return kChar;
+        case GL_UNSIGNED_BYTE:
+            return kUChar;
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:
+#else
+        case GL_HALF_FLOAT_ARB:
+#endif
+            return kHalf;
+        default:
+            return kFloat;
+    };
+}
+
+GLenum get_base_gl_target( GLenum target )
+{
+    switch( target )
+    {
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+            return GL_TEXTURE_CUBE_MAP;
+        default:
+            return target;
+    }
+}
+
+const char *GetGLTypeName( GLenum type )
+{
+    switch( type )
+    {
+        case GL_BYTE:            return "GL_BYTE";
+        case GL_UNSIGNED_BYTE:   return "GL_UNSIGNED_BYTE";
+        case GL_INT:             return "GL_INT";
+        case GL_UNSIGNED_INT:    return "GL_UNSIGNED_INT";
+        case GL_SHORT:           return "GL_SHORT";
+        case GL_UNSIGNED_SHORT:  return "GL_UNSIGNED_SHORT";
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:      return "GL_HALF_FLOAT";
+#else
+        case GL_HALF_FLOAT_ARB:  return "GL_HALF_FLOAT_ARB";
+#endif
+        case GL_FLOAT:           return "GL_FLOAT";
+#ifndef GL_ES_VERSION_2_0
+        case GL_UNSIGNED_INT_8_8_8_8: return "GL_UNSIGNED_INT_8_8_8_8";
+        case GL_UNSIGNED_INT_8_8_8_8_REV: return "GL_UNSIGNED_INT_8_8_8_8_REV";
+#endif
+        default:
+        {
+        static char foo[ 128 ];
+        sprintf( foo, "(Unknown:0x%08x)", (int)type );
+        return foo;
+        }
+    }
+}
+
+const char *GetGLTargetName( GLenum tgt )
+{
+    if( tgt == GL_TEXTURE_2D )          return "GL_TEXTURE_2D";
+    if( tgt == GL_TEXTURE_3D )          return "GL_TEXTURE_3D";
+#ifndef GL_ES_VERSION_2_0
+    if( tgt == GL_TEXTURE_RECTANGLE_EXT ) return "GL_TEXTURE_RECTANGLE_EXT";
+#endif
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_X ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_X";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_Y ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_Y";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_Z ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_Z";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_X ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_X";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_Y ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_Y";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_Z ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_Z";
+    return "";
+}
+
+const char *GetGLAttachmentName( GLenum att )
+{
+    if( att == GL_COLOR_ATTACHMENT0_EXT ) return "GL_COLOR_ATTACHMENT0_EXT";
+#ifndef GL_ES_VERSION_2_0
+    if( att == GL_COLOR_ATTACHMENT1_EXT ) return "GL_COLOR_ATTACHMENT1_EXT";
+    if( att == GL_COLOR_ATTACHMENT2_EXT ) return "GL_COLOR_ATTACHMENT2_EXT";
+    if( att == GL_COLOR_ATTACHMENT3_EXT ) return "GL_COLOR_ATTACHMENT3_EXT";
+    if( att == GL_COLOR_ATTACHMENT4_EXT ) return "GL_COLOR_ATTACHMENT4_EXT";
+    if( att == GL_COLOR_ATTACHMENT5_EXT ) return "GL_COLOR_ATTACHMENT5_EXT";
+    if( att == GL_COLOR_ATTACHMENT6_EXT ) return "GL_COLOR_ATTACHMENT6_EXT";
+    if( att == GL_COLOR_ATTACHMENT7_EXT ) return "GL_COLOR_ATTACHMENT7_EXT";
+    if( att == GL_COLOR_ATTACHMENT8_EXT ) return "GL_COLOR_ATTACHMENT8_EXT";
+#endif
+    if( att == GL_DEPTH_ATTACHMENT_EXT ) return "GL_DEPTH_ATTACHMENT_EXT";
+    return "";
+}
+const char *GetGLBaseFormatName( GLenum baseformat )
+{
+    switch( baseformat )
+    {
+        case GL_RGBA:            return "GL_RGBA";
+#ifdef GL_ES_VERSION_2_0
+        case GL_BGRA_EXT:            return "GL_BGRA_EXT";
+#else
+        case GL_RGBA8:            return "GL_RGBA";
+        case GL_RGBA16:            return "GL_RGBA";
+        case GL_BGRA:            return "GL_BGRA";
+        case GL_RGBA8I_EXT:        return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA16I_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32I_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA8UI_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA16UI_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32UI_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32F_ARB:    return "GL_RGBA";
+
+        case GL_RGBA_INTEGER_EXT:    return "GL_RGBA_INTEGER_EXT";
+
+        case GL_ALPHA4: return "GL_ALPHA";
+        case GL_ALPHA8: return "GL_ALPHA";
+        case GL_ALPHA12: return "GL_ALPHA";
+        case GL_ALPHA16: return "GL_ALPHA";
+        case GL_LUMINANCE4: return "GL_LUMINANCE";
+        case GL_LUMINANCE8: return "GL_LUMINANCE";
+        case GL_LUMINANCE12: return "GL_LUMINANCE";
+        case GL_LUMINANCE16: return "GL_LUMINANCE";
+        case GL_LUMINANCE4_ALPHA4: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE6_ALPHA2: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE8_ALPHA8: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE12_ALPHA4: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE12_ALPHA12: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE16_ALPHA16: return "GL_LUMINANCE_ALPHA";
+        case GL_INTENSITY: return "GL_INTENSITY";
+        case GL_INTENSITY4: return "GL_INTENSITY";
+        case GL_INTENSITY8: return "GL_INTENSITY";
+        case GL_INTENSITY12: return "GL_INTENSITY";
+        case GL_INTENSITY16: return "GL_INTENSITY";
+        case GL_R3_G3_B2: return "GL_RGB";
+        case GL_RGB4: return "GL_RGB";
+        case GL_RGB5: return "GL_RGB";
+        case GL_RGB8: return "GL_RGB";
+        case GL_RGB10: return "GL_RGB";
+        case GL_RGB12: return "GL_RGB";
+        case GL_RGB16: return "GL_RGB";
+        case GL_RGBA2: return "GL_RGBA";
+        case GL_RGBA4: return "GL_RGBA";
+        case GL_RGB5_A1: return "GL_RGBA";
+        case GL_RGB10_A2: return "GL_RGBA";
+        case GL_RGBA12: return "GL_RGBA";
+#endif
+
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "(Unknown:0x%08x)", (int)baseformat );
+            return foo;
+        }
+    }
+}
+
+const char *GetGLFormatName( GLenum format )
+{
+    switch( format )
+    {
+        case GL_RGBA:            return "GL_RGBA";
+#ifdef GL_ES_VERSION_2_0
+        case GL_BGRA_EXT:            return "GL_BGRA_EXT";
+#else
+        case GL_RGBA8:            return "GL_RGBA8";
+        case GL_RGBA16:            return "GL_RGBA16";
+        case GL_BGRA:            return "GL_BGRA";
+        case GL_RGBA8I_EXT:        return "GL_RGBA8I_EXT";
+        case GL_RGBA16I_EXT:    return "GL_RGBA16I_EXT";
+        case GL_RGBA32I_EXT:    return "GL_RGBA32I_EXT";
+        case GL_RGBA8UI_EXT:    return "GL_RGBA8UI_EXT";
+        case GL_RGBA16UI_EXT:    return "GL_RGBA16UI_EXT";
+        case GL_RGBA32UI_EXT:    return "GL_RGBA32UI_EXT";
+        case GL_RGBA32F_ARB:    return "GL_RGBA32F_ARB";
+
+        case GL_RGBA_INTEGER_EXT:    return "GL_RGBA_INTEGER_EXT";
+
+        case GL_ALPHA4: return "GL_ALPHA4";
+        case GL_ALPHA8: return "GL_ALPHA8";
+        case GL_ALPHA12: return "GL_ALPHA12";
+        case GL_ALPHA16: return "GL_ALPHA16";
+        case GL_LUMINANCE4: return "GL_LUMINANCE4";
+        case GL_LUMINANCE8: return "GL_LUMINANCE8";
+        case GL_LUMINANCE12: return "GL_LUMINANCE12";
+        case GL_LUMINANCE16: return "GL_LUMINANCE16";
+        case GL_LUMINANCE4_ALPHA4: return "GL_LUMINANCE4_ALPHA4";
+        case GL_LUMINANCE6_ALPHA2: return "GL_LUMINANCE6_ALPHA2";
+        case GL_LUMINANCE8_ALPHA8: return "GL_LUMINANCE8_ALPHA8";
+        case GL_LUMINANCE12_ALPHA4: return "GL_LUMINANCE12_ALPHA4";
+        case GL_LUMINANCE12_ALPHA12: return "GL_LUMINANCE12_ALPHA12";
+        case GL_LUMINANCE16_ALPHA16: return "GL_LUMINANCE16_ALPHA16";
+        case GL_INTENSITY: return "GL_INTENSITY";
+        case GL_INTENSITY4: return "GL_INTENSITY4";
+        case GL_INTENSITY8: return "GL_INTENSITY8";
+        case GL_INTENSITY12: return "GL_INTENSITY12";
+        case GL_INTENSITY16: return "GL_INTENSITY16";
+        case GL_R3_G3_B2: return "GL_R3_G3_B2";
+        case GL_RGB4: return "GL_RGB4";
+        case GL_RGB5: return "GL_RGB5";
+        case GL_RGB8: return "GL_RGB8";
+        case GL_RGB10: return "GL_RGB10";
+        case GL_RGB12: return "GL_RGB12";
+        case GL_RGB16: return "GL_RGB16";
+        case GL_RGBA2: return "GL_RGBA2";
+        case GL_RGBA4: return "GL_RGBA4";
+        case GL_RGB5_A1: return "GL_RGB5_A1";
+        case GL_RGB10_A2: return "GL_RGB10_A2";
+        case GL_RGBA12: return "GL_RGBA12";
+#endif
+        case GL_INT:            return "GL_INT";
+        case GL_UNSIGNED_INT:    return "GL_UNSIGNED_INT";
+        case GL_SHORT:            return "GL_SHORT";
+        case GL_UNSIGNED_SHORT:    return "GL_UNSIGNED_SHORT";
+        case GL_BYTE:            return "GL_BYTE";
+        case GL_UNSIGNED_BYTE:    return "GL_UNSIGNED_BYTE";
+        case GL_FLOAT:            return "GL_FLOAT";
+#ifdef GL_ES_VERSION_2_0
+        case GL_HALF_FLOAT_OES: return "GL_HALF_FLOAT_OES";
+#else
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:        return "GL_HALF_FLOAT";
+#else
+        case GL_HALF_FLOAT_ARB: return "GL_HALF_FLOAT_ARB";
+#endif
+#endif
+
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "(Unknown:0x%08x)", (int)format );
+            return foo;
+        }
+    }
+}
+
+cl_ushort float2half_rte( float f )
+{
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
+}
+
+void* CreateRandomData( ExplicitType type, size_t count, MTdata d )
+{
+    switch(type)
+    {
+        case (kChar):
+        {
+            cl_char *p = (cl_char *)malloc(count * sizeof(cl_char));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_char)genrand_int32(d);
+            }
+            return (void*)p;
+        }
+        case (kUChar):
+        {
+            cl_uchar *p = (cl_uchar *)malloc(count * sizeof(cl_uchar));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] =  (cl_uchar)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kShort):
+        {
+            cl_short *p = (cl_short *)malloc(count * sizeof(cl_short));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_short)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kUShort):
+        {
+            cl_ushort *p = (cl_ushort *)malloc(count * sizeof(cl_ushort));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_ushort)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kInt):
+        {
+            cl_int *p = (cl_int *)malloc(count * sizeof(cl_int));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_int)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kUInt):
+        {
+            cl_uint *p = (cl_uint *)malloc(count * sizeof(cl_uint));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] =  (cl_uint)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+
+        case (kFloat):
+        {
+            cl_float *p = (cl_float *)malloc(count * sizeof(cl_float));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = get_random_float( 0.f, 1.f, d );
+            }
+
+            return (void*)p;
+        }
+        /* added support for half floats */
+        case (kHalf):
+        {
+            cl_half *p = (cl_half *)malloc(count * sizeof(cl_half));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = float2half_rte(get_random_float( 0.f, 1.f, d ));
+            }
+
+            return (void*)p;
+        }
+        default:
+        {
+            log_error("Invalid explicit type specified for create random data!\n");
+            return 0;
+        }
+    }
+    return 0;
+}
+
+void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer)
+{
+    size_t i;
+    size_t count = width * height;
+    if(type == GL_BYTE)
+    {
+        cl_char* p = (cl_char*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_BYTE)
+    {
+        cl_uchar* p = (cl_uchar*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_INT)
+    {
+        cl_int* p = (cl_int*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_INT)
+    {
+        cl_uint* p = (cl_uint*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_SHORT)
+    {
+        cl_short* p = (cl_short*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_SHORT)
+    {
+        cl_ushort* p = (cl_ushort*)buffer;
+        for(i = 0; i <  count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_FLOAT)
+    {
+        cl_float* p = (cl_float*)buffer;
+        for(i = 0; i < count; i++)
+        log_info("[%4d] %#f %#f %#f %#f\n", (unsigned int)(i),
+            p[i* 4 + 0],
+            p[i* 4 + 1],
+            p[i* 4 + 2],
+            p[i* 4 + 3]);
+    }
+}
+
+#if defined(_WIN32)
+#include <string.h>
+
+GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString)
+{
+  const size_t len = strlen((const char*)extName);
+  const char* str = (const char*)extString;
+
+  while (str != NULL) {
+    str = strstr(str, (const char*)extName);
+    if (str == NULL) {
+      break;
+    }
+    if ((str > (const char*)extString || str[-1] == ' ')
+        && (str[len] == ' ' || str[len] == '\0')) {
+      return GL_TRUE;
+    }
+    str = strchr(str + len, ' ');
+  }
+
+  return GL_FALSE;
+}
+
+#endif
+
+// Function pointers for the GL/CL calls
+clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
+clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
+clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
+clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
+clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
+clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
+clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
+
+int init_clgl_ext(cl_platform_id platform_id)
+{
+    // Create the function pointer table
+    clCreateFromGLBuffer_ptr = (clCreateFromGLBuffer_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clCreateFromGLBuffer");
+    if (clCreateFromGLBuffer_ptr == NULL)
+    {
+        log_error("clGetExtensionFunctionAddressForPlatform(clCreateFromGLBuffer) returned NULL.\n");
+        return -1;
+    }
+
+    clCreateFromGLTexture_ptr = (clCreateFromGLTexture_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clCreateFromGLTexture");
+    if (clCreateFromGLTexture_ptr == NULL)
+    {
+        log_error("clGetExtensionFunctionAddressForPlatform(clCreateFromGLTexture) returned NULL.\n");
+        return -1;
+    }
+
+    clCreateFromGLRenderbuffer_ptr = (clCreateFromGLRenderbuffer_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clCreateFromGLRenderbuffer");
+    if (clCreateFromGLRenderbuffer_ptr == NULL)
+    {
+        log_error("clGetExtensionFunctionAddressForPlatform(clCreateFromGLRenderbuffer) returned NULL.\n");
+        return -1;
+    }
+
+    clGetGLObjectInfo_ptr = (clGetGLObjectInfo_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clGetGLObjectInfo");
+    if (clGetGLObjectInfo_ptr == NULL)
+    {
+        log_error("clGetExtensionFunctionAddressForPlatform(clGetGLObjectInfo) returned NULL.\n");
+        return -1;
+    }
+
+    clGetGLTextureInfo_ptr = (clGetGLTextureInfo_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clGetGLTextureInfo");
+    if (clGetGLTextureInfo_ptr == NULL)
+    {
+        log_error("clGetExtensionFunctionAddressForPlatform(clGetGLTextureInfo) returned NULL.\n");
+        return -1;
+    }
+
+    clEnqueueAcquireGLObjects_ptr = (clEnqueueAcquireGLObjects_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clEnqueueAcquireGLObjects");
+    if (clEnqueueAcquireGLObjects_ptr == NULL)
+    {
+        log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueAcquireGLObjects) returned NULL.\n");
+        return -1;
+    }
+
+    clEnqueueReleaseGLObjects_ptr = (clEnqueueReleaseGLObjects_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clEnqueueReleaseGLObjects");
+    if (clEnqueueReleaseGLObjects_ptr == NULL)
+    {
+        log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueReleaseGLObjects) returned NULL.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+
diff --git a/test_common/gles/helpers.h b/test_common/gles/helpers.h
new file mode 100644
index 00000000..95faae12
--- /dev/null
+++ b/test_common/gles/helpers.h
@@ -0,0 +1,247 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _helpers_h
+#define _helpers_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#include "gl_headers.h"
+#include <CL/cl_gl.h>
+#else
+#include "gl_headers.h"
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context     context,
+                          cl_mem_flags   flags,
+                          GLuint         bufobj,
+                          int *          errcode_ret);
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context    context ,
+                           cl_mem_flags  flags ,
+                           GLuint        renderbuffer ,
+                           cl_int *      errcode_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem                 memobj ,
+                  cl_gl_object_type *    gl_object_type ,
+                  GLuint *               gl_object_name) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem                memobj ,
+                   cl_gl_texture_info    param_name ,
+                   size_t                param_value_size ,
+                   void *                param_value ,
+                   size_t *              param_value_size_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+
+extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
+extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
+extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
+extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
+extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
+extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
+extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
+
+
+class glBufferWrapper
+{
+    public:
+        glBufferWrapper() { mBuffer = 0; }
+        glBufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
+
+        glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glTextureWrapper
+{
+    public:
+        glTextureWrapper() { mBuffer = 0; }
+        glTextureWrapper( GLuint b ) { mBuffer = b; }
+        ~glTextureWrapper() { if( mBuffer != 0 ) glDeleteTextures( 1, &mBuffer ); }
+
+        glTextureWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glRenderbufferWrapper
+{
+    public:
+        glRenderbufferWrapper() { mBuffer = 0; }
+        glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
+
+        glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glFramebufferWrapper
+{
+    public:
+        glFramebufferWrapper() { mBuffer = 0; }
+        glFramebufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
+
+        glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+
+// Helper functions (defined in helpers.cpp)
+extern void * CreateGLTexture2D( size_t width, size_t height,
+                               GLenum target, GLenum glFormat,
+                               GLenum internalFormat, GLenum glType,
+                               ExplicitType type, GLuint *outTextureID,
+                               int *outError, bool allocateMem, MTdata d );
+
+
+extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
+                                 GLenum target, GLenum glFormat,
+                                 GLenum internalFormat, GLenum glType,
+                                 ExplicitType type, GLuint *outTextureID,
+                                 int *outError, MTdata d, bool allocateMem = true );
+
+extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture,
+                             GLenum glFormat, GLenum glInternalFormat,
+                             GLenum glType, ExplicitType typeToReadAs,
+                             size_t outWidth, size_t outHeight );
+
+void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
+                             GLenum attachment,
+                             GLenum rbFormat, GLenum rbType,
+                             GLenum texFormat, GLenum texType,
+                             ExplicitType type,
+                             GLuint *outFramebuffer,
+                             GLuint *outRenderbuffer,
+                             int *outError, MTdata d, bool allocateMem );
+
+int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
+                            GLenum attachment,
+                            GLenum rbFormat, GLenum rbType,
+                            GLuint *outFramebuffer,
+                            GLuint *outRenderbuffer );
+
+void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
+                           GLenum attachment,
+                           GLenum rbFormat, GLenum rbType,
+                           GLenum texFormat, GLenum texType,
+                           ExplicitType typeToReadAs,
+                           size_t outWidth, size_t outHeight );
+
+extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
+extern const char *GetGLTypeName( GLenum type );
+extern const char *GetGLAttachmentName( GLenum att );
+extern const char *GetGLTargetName( GLenum tgt );
+extern const char *GetGLBaseFormatName( GLenum baseformat );
+extern const char *GetGLFormatName( GLenum format );
+
+extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
+
+extern GLenum GetGLFormat(GLenum internalFormat);
+extern GLenum GetGLTypeForExplicitType(ExplicitType type);
+extern size_t GetGLTypeSize(GLenum type);
+extern ExplicitType GetExplicitTypeForGLType(GLenum type);
+
+extern GLenum get_base_gl_target( GLenum target );
+
+extern int init_clgl_ext( cl_platform_id platform_id );
+
+#endif // _helpers_h
+
+
+
diff --git a/test_common/gles/setup.h b/test_common/gles/setup.h
new file mode 100644
index 00000000..0b2a9fdf
--- /dev/null
+++ b/test_common/gles/setup.h
@@ -0,0 +1,46 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _setup_h
+#define _setup_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gl_headers.h"
+#include <CL/cl.h>
+
+
+// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
+// subclass internally, then return it as a definition for GLEnvironment::Create
+
+class GLEnvironment
+{
+    public:
+        GLEnvironment() {}
+        virtual ~GLEnvironment() {}
+
+         virtual int            Init( int *argc, char **argv, int use_opengl_32 ) = 0;
+        virtual cl_context    CreateCLContext( void ) = 0;
+        virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
+
+        // cleanup EGL environment properly when the test exit.
+        // This change does not affect any functionality of the test
+        virtual void terminate_egl_display() = 0;
+
+        static GLEnvironment *    Instance( void );
+};
+
+#endif // _setup_h
diff --git a/test_common/harness/Jamfile b/test_common/harness/Jamfile
new file mode 100644
index 00000000..21ebcc24
--- /dev/null
+++ b/test_common/harness/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements <include>.
+      <toolset>gcc:<cflags>"-xc++"
+      <toolset>msvc:<cflags>"/TP"
+      <warnings-as-errors>off
+    : usage-requirements <include>.
+    ;
+
+local harness.objs ;
+for source in [ glob *.c *.cpp ]
+{
+    harness.objs += [ obj $(source:B).obj : $(source) ] ;
+}
+
+alias harness : $(harness.objs)
+    : <use>/Runtime//OpenCL.lib : 
+    : <library>/Runtime//OpenCL.lib
+    ;
diff --git a/test_common/harness/Makefile b/test_common/harness/Makefile
new file mode 100644
index 00000000..2ac60643
--- /dev/null
+++ b/test_common/harness/Makefile
@@ -0,0 +1,41 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = conversions.c \
+	errorHelpers.c \
+	genericThread.cpp \
+	imageHelpers.cpp \
+	kernelHelpers.c \
+	mt19937.c \
+	rounding_mode.c \
+	testHarness.c \
+	testHarness.cpp \
+	ThreadPool.c \
+	threadTesting.c \
+	typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+all: $(OBJECTS)
+
+clean:
+	rm -f $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_common/harness/ThreadPool.c b/test_common/harness/ThreadPool.c
new file mode 100644
index 00000000..16f31e74
--- /dev/null
+++ b/test_common/harness/ThreadPool.c
@@ -0,0 +1,931 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "ThreadPool.h"
+#include "errorHelpers.h"
+#include "fpcontrol.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if  defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 )  // or any other POSIX system
+
+#if defined( _WIN32 )
+#include <windows.h>
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+#include "mingw_compat.h"
+#include <process.h>
+#else // !_WIN32
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/errno.h>
+#ifdef __linux__
+#include <sched.h>
+#endif
+#endif // !_WIN32
+
+// declarations
+#ifdef  _WIN32
+void ThreadPool_WorkerFunc( void *p );
+#else
+void *ThreadPool_WorkerFunc( void *p );
+#endif
+void ThreadPool_Init(void);
+void ThreadPool_Exit(void);
+
+#if defined (__MINGW32__)
+    // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
+    CRITICAL_SECTION     gAtomicLock;
+#elif defined( __GNUC__ ) || defined( _MSC_VER)
+#else
+    pthread_mutex_t     gAtomicLock;
+#endif
+
+// Atomic add operator with mem barrier.  Mem barrier needed to protect state modified by the worker functions.
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+#if defined (__MINGW32__)
+    // No atomics on Mingw32
+    EnterCriticalSection(&gAtomicLock);
+    cl_int old = *a;
+    *a = old + b;
+    LeaveCriticalSection(&gAtomicLock);
+    return old;
+#elif defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    return __sync_fetch_and_add( a, b );
+    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether __sync_fetch_and_add does a synchronize
+#elif defined( _MSC_VER )
+    return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
+#else
+    #warning  Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
+    if( pthread_mutex_lock(&gAtomicLock) )
+        log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+    cl_int old = *a;
+    *a = old + b;
+    if( pthread_mutex_unlock(&gAtomicLock) )
+        log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
+    return old;
+#endif
+}
+
+#if defined( _WIN32 )
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
+
+#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
+#define _INIT_ONCE           INIT_ONCE
+#define _PINIT_ONCE          PINIT_ONCE
+#define _InitOnceExecuteOnce InitOnceExecuteOnce
+#else // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+typedef volatile LONG _INIT_ONCE;
+typedef _INIT_ONCE *_PINIT_ONCE;
+typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
+
+#define _INIT_ONCE_UNINITIALIZED 0
+#define _INIT_ONCE_IN_PROGRESS   1
+#define _INIT_ONCE_DONE          2
+
+static BOOL _InitOnceExecuteOnce(
+  _PINIT_ONCE InitOnce,
+  _PINIT_ONCE_FN InitFn,
+  PVOID Parameter,
+  LPVOID *Context
+)
+{
+    while ( *InitOnce != _INIT_ONCE_DONE )
+    {
+        if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
+        {
+            InitFn( InitOnce, Parameter, Context );
+            *InitOnce = _INIT_ONCE_DONE;
+            return TRUE;
+        }
+        Sleep( 1 );
+    }
+    return TRUE;
+}
+#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_CONDITION_VARIABLE 1
+
+#if defined(HAS_CONDITION_VARIABLE)
+#define _CONDITION_VARIABLE          CONDITION_VARIABLE
+#define _InitializeConditionVariable InitializeConditionVariable
+#define _SleepConditionVariableCS    SleepConditionVariableCS
+#define _WakeAllConditionVariable    WakeAllConditionVariable
+#else // !HAS_CONDITION_VARIABLE
+typedef struct
+{
+    HANDLE           mEvent; // Used to park the thread.
+    CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
+    volatile cl_int  mWaiters; // Number of threads waiting on this cond var.
+    volatile cl_int  mGeneration; // Wait generation count.
+    volatile cl_int  mReleaseCount; // Number of releases to execute before reseting the event.
+} _CONDITION_VARIABLE;
+
+typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
+
+static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
+    InitializeCriticalSection( cond_var->mLock );
+    cond_var->mWaiters = 0;
+    cond_var->mGeneration = 0;
+#if !defined ( NDEBUG )
+    cond_var->mReleaseCount = 0;
+#endif // !NDEBUG
+}
+
+static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
+{
+    EnterCriticalSection( cond_var->mLock );
+    cl_int generation = cond_var->mGeneration;
+    ++cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+    LeaveCriticalSection( cond_lock );
+
+    while ( TRUE )
+    {
+        WaitForSingleObject( cond_var->mEvent, INFINITE );
+        EnterCriticalSection( cond_var->mLock );
+        BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
+        LeaveCriticalSection( cond_var->mLock );
+        if ( done )
+        {
+            break;
+        }
+    }
+
+    EnterCriticalSection( cond_lock );
+    EnterCriticalSection( cond_var->mLock );
+    if ( --cond_var->mReleaseCount == 0 )
+    {
+        ResetEvent( cond_var->mEvent );
+    }
+    --cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+}
+
+static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    EnterCriticalSection( cond_var->mLock );
+    if (cond_var->mWaiters > 0 )
+    {
+        ++cond_var->mGeneration;
+        cond_var->mReleaseCount = cond_var->mWaiters;
+        SetEvent( cond_var->mEvent );
+    }
+    LeaveCriticalSection( cond_var->mLock );
+}
+#endif // !HAS_CONDITION_VARIABLE
+#endif // _WIN32
+
+#define MAX_COUNT   (1<<29)
+
+// Global state to coordinate whether the threads have been launched successfully or not
+#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
+static _INIT_ONCE threadpool_init_control;
+#elif defined (_WIN32)  // MingW of XP
+static int threadpool_init_control;
+#else // Posix platforms
+pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
+#endif
+cl_int threadPoolInitErr = -1;          // set to CL_SUCCESS on successful thread launch
+
+// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do at a time,
+// because we are too lazy to set up a queue here, and don't expect to need one.
+#if defined( _WIN32 )
+CRITICAL_SECTION    gThreadPoolLock[1];
+#else // !_WIN32
+pthread_mutex_t     gThreadPoolLock;
+#endif // !_WIN32
+
+// Condition variable to park ThreadPool threads when not working
+#if defined( _WIN32 )
+CRITICAL_SECTION    cond_lock[1];
+_CONDITION_VARIABLE cond_var[1];
+#else // !_WIN32
+pthread_mutex_t     cond_lock;
+pthread_cond_t      cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunCount = 0;              // Condition variable state. How many iterations on the function left to run.
+                                                // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
+
+// State that only changes when the threadpool is not working.
+volatile TPFuncPtr  gFunc_ptr = NULL;
+volatile void       *gUserInfo = NULL;
+volatile cl_int     gJobCount = 0;
+
+// State that may change while the thread pool is working
+volatile cl_int     jobError = CL_SUCCESS;      // err code return for the job as a whole
+
+// Condition variable to park caller while waiting
+#if defined( _WIN32 )
+HANDLE              caller_event;
+#else // !_WIN32
+pthread_mutex_t     caller_cond_lock;
+pthread_cond_t      caller_cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunning = 0;       // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
+
+// The total number of threads launched.
+volatile cl_int     gThreadCount = 0;
+#ifdef _WIN32
+void ThreadPool_WorkerFunc( void *p )
+#else
+void *ThreadPool_WorkerFunc( void *p )
+#endif
+{
+    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
+    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+//    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+
+    while( MAX_COUNT > item )
+    {
+        cl_int err;
+
+        // check for more work to do
+        if( 0 >= item )
+        {
+//            log_info( "Thread %d has run out of work.\n", threadID );
+
+            // No work to do. Attempt to block waiting for work
+#if defined( _WIN32 )
+            EnterCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_lock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
+                goto exit;
+            }
+#endif // !_WIN32
+
+            cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
+//            log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
+            if( 1 == remaining )
+            { // last thread out signal the main thread to wake up
+#if defined( _WIN32 )
+                SetEvent( caller_event );
+#else // !_WIN32
+                if((err = pthread_mutex_lock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+                if( (err = pthread_cond_broadcast( &caller_cond_var )))
+                {
+                    log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
+                    goto exit;
+                }
+                if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+#endif // !_WIN32
+            }
+
+            // loop in case we are woken only to discover that some other thread already did all the work
+            while( 0 >= item )
+            {
+#if defined( _WIN32 )
+                _SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
+#else // !_WIN32
+                if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
+                {
+                    log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                    pthread_mutex_unlock( &cond_lock);
+                    goto exit;
+                }
+#endif // !_WIN32
+
+                // try again to get a valid item id
+                item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+                if( MAX_COUNT <= item )  // exit if we are done
+                {
+#if defined( _WIN32 )
+                    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+                    pthread_mutex_unlock( &cond_lock);
+#endif // !_WIN32
+                    goto exit;
+                }
+            }
+
+            ThreadPool_AtomicAdd( &gRunning, 1 );
+//            log_info( "Thread %d has found work.\n", threadID);
+
+#if defined( _WIN32 )
+            LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_unlock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                goto exit;
+            }
+#endif // !_WIN32
+
+        }
+
+        // we have a valid item, so do the work
+        if( CL_SUCCESS == jobError )  // but only if we haven't already encountered an error
+        {
+//            log_info( "Thread %d doing job %d\n", threadID, item - 1);
+
+#if defined(__APPLE__) && defined(__arm__)
+            // On most platforms which support denorm, default is FTZ off. However,
+            // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+            // This creates issues in result verification. Since spec allows the implementation to either flush or
+            // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+            // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+            // where reference is being computed to make sure we get non-flushed reference result. If implementation
+            // returns flushed result, we correctly take care of that in verification code.
+            FPU_mode_type oldMode;
+            DisableFTZ( &oldMode );
+#endif
+
+            // Call the user's function with this item ID
+            err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
+#if defined(__APPLE__) && defined(__arm__)
+            // Restore FP state
+            RestoreFPState( &oldMode );
+#endif
+
+            if( err )
+            {
+#if (__MINGW32__)
+                EnterCriticalSection(&gAtomicLock);
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                LeaveCriticalSection(&gAtomicLock);
+#elif defined( __GNUC__ )
+                // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+                // set the new error if we are the first one there.
+                __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
+
+                // drop run count to 0
+                gRunCount = 0;
+                __sync_synchronize();
+#elif defined( _MSC_VER )
+                // set the new error if we are the first one there.
+                _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
+
+                // drop run count to 0
+                gRunCount = 0;
+                _mm_mfence();
+#else
+                if( pthread_mutex_lock(&gAtomicLock) )
+                    log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                if( pthread_mutex_unlock(&gAtomicLock) )
+                    log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
+#endif
+            }
+        }
+
+        // get the next item
+        item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+    }
+
+exit:
+    log_info( "ThreadPool: thread %d exiting.\n", threadID );
+    ThreadPool_AtomicAdd( &gThreadCount, -1 );
+#if !defined(_WIN32)
+    return NULL;
+#endif
+}
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined.
+void        SetThreadCount( int count )
+{
+    if( threadPoolInitErr == CL_SUCCESS )
+    {
+        log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
+        abort();
+    }
+
+    gThreadCount = count;
+}
+
+void ThreadPool_Init(void)
+{
+    cl_int i;
+    int err;
+    volatile cl_uint threadID = 0;
+
+    // Check for manual override of multithreading code. We add this for better debuggability.
+    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
+    {
+        gThreadCount = 1;
+        return;
+    }
+
+    // Figure out how many threads to run -- check first for non-zero to give the implementation the chance
+    if( 0 == gThreadCount )
+    {
+#if defined(_MSC_VER) || defined (__MINGW64__)
+        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
+        DWORD length = 0;
+
+        GetLogicalProcessorInformation( NULL, &length );
+        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
+        if( buffer != NULL )
+        {
+            if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
+            {
+                PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
+                while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
+                {
+                    if( ptr->Relationship == RelationProcessorCore )
+                    {
+                        // Count the number of bits in ProcessorMask (number of logical cores)
+                        ULONG mask = ptr->ProcessorMask;
+                        while( mask )
+                        {
+                            ++gThreadCount;
+                            mask &= mask - 1; // Remove 1 bit at a time
+                        }
+                    }
+                    ++ptr;
+                }
+            }
+            free(buffer);
+        }
+#elif defined (__MINGW32__)
+        {
+            #warning  How about this, instead of hard coding it to 2?
+            SYSTEM_INFO sysinfo;
+            GetSystemInfo( &sysinfo );
+            gThreadCount = sysinfo.dwNumberOfProcessors;
+        }
+#elif defined (__linux__) && !defined(__ANDROID__)
+        cpu_set_t    affinity;
+        if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
+        {
+#if !(defined(CPU_COUNT))
+        gThreadCount = 1;
+#else
+            gThreadCount = CPU_COUNT(&affinity);
+#endif
+        }
+        else
+        {
+            gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
+        }
+#else // !_WIN32
+        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
+#endif // !_WIN32
+
+        // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
+        if( 1 == gThreadCount )
+            gThreadCount = 2;
+    }
+
+    // When working in 32 bit limit the thread number to 12
+    // This fix was made due to memory issues in integer_ops test
+    // When running integer_ops, the test opens as many threads as the
+    // machine has and each thread allocates a fixed amount of memory
+    // When running this test on dual socket machine in 32-bit, the
+    // process memory is not sufficient and the test fails
+    #if defined(_WIN32) && !defined(_M_X64)
+        if (gThreadCount > 12) {
+            gThreadCount = 12;
+        }
+    #endif
+
+    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
+    if( gThreadCount < 2 )
+    {
+        log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
+        gThreadCount = 1;
+        return;
+    }
+
+#if defined( _WIN32 )
+    InitializeCriticalSection( gThreadPoolLock );
+    InitializeCriticalSection( cond_lock );
+    _InitializeConditionVariable( cond_var );
+    caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
+#elif defined (__GNUC__)
+    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
+    // with some flavors of gcc compilers.
+    pthread_cond_init(&cond_var, NULL);
+    pthread_mutex_init(&cond_lock ,NULL);
+    pthread_cond_init(&caller_cond_var, NULL);
+    pthread_mutex_init(&caller_cond_lock, NULL);
+    pthread_mutex_init(&gThreadPoolLock, NULL);
+#endif
+
+#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
+    pthread_mutex_initialize(gAtomicLock);
+#elif defined (__MINGW32__)
+    InitializeCriticalSection(&gAtomicLock);
+#endif
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        gThreadCount = 1;
+        return;
+    }
+#endif // !_WIN32
+
+    gRunning = gThreadCount;
+    // init threads
+    for( i = 0; i < gThreadCount; i++ )
+    {
+#if defined( _WIN32 )
+        uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
+        err = ( handle == 0 );
+#else // !_WIN32
+        pthread_t tid = 0;
+        err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
+#endif // !_WIN32
+        if( err )
+        {
+            log_error( "Error %d launching thread %d\n", err, i );
+            threadPoolInitErr = err;
+            gThreadCount = i;
+            break;
+        }
+    }
+
+    atexit( ThreadPool_Exit );
+
+// block until they are done launching.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            return;
+        }
+#endif // !_WIN32
+    }
+    while( gRunCount != -gThreadCount );
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        return;
+    }
+#endif // !_WIN32
+
+    threadPoolInitErr = CL_SUCCESS;
+}
+
+#if defined(_MSC_VER)
+static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
+{
+    ThreadPool_Init();
+    return TRUE;
+}
+#endif
+
+void ThreadPool_Exit(void)
+{
+    int err, count;
+    gRunCount = CL_INT_MAX;
+
+#if defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    __sync_synchronize();
+#elif defined( _MSC_VER )
+    _mm_mfence();
+#else
+    #warning   If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
+#endif
+
+    // spin waiting for threads to die
+    for (count = 0; 0 != gThreadCount && count < 1000; count++)
+    {
+#if defined( _WIN32 )
+        _WakeAllConditionVariable( cond_var );
+        Sleep(1);
+#else // !_WIN32
+        if( (err = pthread_cond_broadcast( &cond_var )))
+        {
+            log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
+            break;
+        }
+        usleep(1000);
+#endif // !_WIN32
+    }
+
+    if( gThreadCount )
+        log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
+    else
+        log_info( "Thread pool exited in a orderly fashion.\n" );
+}
+
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+//
+// This function obviously has its shortcommings. Only one call to ThreadPool_Do
+// can be running at a time. It is not intended for general purpose use.
+// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
+// all available then it would make more sense to use those features.
+cl_int ThreadPool_Do( TPFuncPtr func_ptr,
+                      cl_uint count,
+                      void *userInfo )
+{
+    cl_int newErr;
+    cl_int err = 0;
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else //posix platform
+    err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif
+    // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
+    if( threadPoolInitErr )
+    {
+        cl_uint currentJob = 0;
+        cl_int  result = CL_SUCCESS;
+
+#if defined(__APPLE__) && defined(__arm__)
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+        for( currentJob = 0; currentJob < count; currentJob++ )
+            if((result = func_ptr( currentJob, 0, userInfo )))
+            {
+#if defined(__APPLE__) && defined(__arm__)
+                // Restore FP state before leaving
+                RestoreFPState( &oldMode );
+#endif
+                return result;
+            }
+
+#if defined(__APPLE__) && defined(__arm__)
+        // Restore FP state before leaving
+        RestoreFPState( &oldMode );
+#endif
+
+        return CL_SUCCESS;
+    }
+
+    if( count >= MAX_COUNT )
+    {
+        log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
+        return -1;
+    }
+
+    // Enter critical region
+#if defined( _WIN32 )
+    EnterCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    if( (err = pthread_mutex_lock( &gThreadPoolLock )))
+    {
+        switch (err)
+        {
+            case EDEADLK:
+                log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
+                break;
+            case EINVAL:
+                log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
+                break;
+            default:
+                break;
+        }
+        return err;
+    }
+#endif // !_WIN32
+
+    // Start modifying the job state observable by worker threads
+#if defined( _WIN32 )
+    EnterCriticalSection( cond_lock );
+#else // !_WIN32
+    if((err = pthread_mutex_lock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Prime the worker threads to get going
+    jobError = CL_SUCCESS;
+    gRunCount = gJobCount = count;
+    gFunc_ptr = func_ptr;
+    gUserInfo = userInfo;
+
+#if defined( _WIN32 )
+    ResetEvent(caller_event);
+    _WakeAllConditionVariable( cond_var );
+    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+    if( (err = pthread_cond_broadcast( &cond_var )))
+    {
+        log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+    if((err = pthread_mutex_unlock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+// block until they are done.  It would be slightly more efficient to do some of the work here though.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            goto exit;
+        }
+#endif // !_WIN32
+    }
+    while( gRunning );
+#if !defined(_WIN32)
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    err = jobError;
+
+exit:
+    // exit critical region
+#if defined( _WIN32 )
+    LeaveCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    newErr = pthread_mutex_unlock( &gThreadPoolLock );
+    if( newErr)
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
+        return err;
+    }
+#endif // !_WIN32
+
+    return err;
+}
+
+cl_uint GetThreadCount( void )
+{
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else
+    cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif // !_WIN32
+
+    if( gThreadCount < 1 )
+        return 1;
+
+    return gThreadCount;
+}
+
+#else
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
+#endif
+//
+// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
+// of OpenCL API, while also checking
+//
+// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
+// It is not okay to use this for conformance testing!!!
+//
+// Exception:  If your operating system does not support multithreaded execution of any kind, then you may use this code.
+//
+
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+    cl_uint r = *a;
+
+    // since this fallback code path is not multithreaded, we just do a regular add here
+    // If your operating system supports memory-barrier-atomics, use those here
+    *a = r + b;
+
+    return r;
+}
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+cl_int ThreadPool_Do(   TPFuncPtr func_ptr,
+                        cl_uint count,
+                        void *userInfo )
+{
+    cl_uint currentJob = 0;
+    cl_int  result = CL_SUCCESS;
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    // THIS FUNCTION IS NOT INTENDED FOR USE!!
+    log_error( "ERROR:  Test must be multithreaded!\n" );
+    exit(-1);
+#else
+    static int spewCount = 0;
+
+    if( 0 == spewCount )
+    {
+        log_info( "\nWARNING:  The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
+        spewCount = 1;
+    }
+#endif
+
+// The multithreaded code should mimic this behavior:
+    for( currentJob = 0; currentJob < count; currentJob++ )
+        if((result = func_ptr( currentJob, 0, userInfo )))
+            return result;
+
+    return CL_SUCCESS;
+}
+
+cl_uint GetThreadCount( void )
+{
+    return 1;
+}
+
+void SetThreadCount( int count )
+{
+    if( count > 1 )
+        log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
+}
+
+#endif
diff --git a/test_common/harness/ThreadPool.h b/test_common/harness/ThreadPool.h
new file mode 100644
index 00000000..7c392306
--- /dev/null
+++ b/test_common/harness/ThreadPool.h
@@ -0,0 +1,76 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef THREAD_POOL_H
+#define THREAD_POOL_H
+
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/cl.h>
+#endif
+
+#if defined(__cplusplus)
+    extern "C" {
+#endif
+
+//
+// An atomic add operator
+cl_int     ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b );    // returns old value
+
+// Your function prototype
+//
+// A function pointer to the function you want to execute in a multithreaded context.  No
+// synchronization primitives are provided, other than the atomic add above. You may not
+// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
+// work, however.
+//
+// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they will numbered be 0 through 7.
+// Note that while every job will be run, it is not guaranteed that every thread will wake up before
+// the work is done.
+typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
+
+// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
+// Some workitems may not run if a non-zero result is returned from func_ptr().
+// This function may not be called from a TPFuncPtr.
+cl_int      ThreadPool_Do(  TPFuncPtr func_ptr,
+                            cl_uint count,
+                            void *userInfo );
+
+// Returns the number of worker threads that underlie the threadpool.  The value passed
+// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
+// This is safe to call from a TPFuncPtr.
+cl_uint     GetThreadCount( void );
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
+void        SetThreadCount( int count );
+
+#ifdef __cplusplus
+    }   /* extern "C" */
+#endif
+
+
+#endif  /* THREAD_POOL_H  */
diff --git a/test_common/harness/clImageHelper.h b/test_common/harness/clImageHelper.h
new file mode 100644
index 00000000..b09b7bd4
--- /dev/null
+++ b/test_common/harness/clImageHelper.h
@@ -0,0 +1,290 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_clImageHelper_h
+#define test_conformance_clImageHelper_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <stdio.h>
+#include "errorHelpers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+  // helper function to replace clCreateImage2D , to make the existing code use
+  // the functions of version 1.2 and veriosn 1.1  respectively
+
+  static inline cl_mem create_image_2d  (cl_context context,
+                           cl_mem_flags flags,
+                           const cl_image_format *image_format,
+                           size_t image_width,
+                           size_t image_height,
+                           size_t image_row_pitch,
+                           void *host_ptr,
+                           cl_int *errcode_ret)
+  {
+    cl_mem mImage = NULL;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc_dest;
+    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+    image_desc_dest.image_width = image_width;
+    image_desc_dest.image_height = image_height;
+    image_desc_dest.image_depth= 0;// not usedfor 2d
+    image_desc_dest.image_array_size = 0;// not used for 2d
+    image_desc_dest.image_row_pitch = image_row_pitch;
+    image_desc_dest.image_slice_pitch = 0;
+    image_desc_dest.num_mip_levels = 0;
+    image_desc_dest.num_samples = 0;
+    image_desc_dest.mem_object = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+    // helper function to replace clCreateImage2D , to make the existing code use
+    // the functions of version 1.2 and veriosn 1.1  respectively
+
+    static inline cl_mem create_image_2d_buffer  (cl_context context,
+                                    cl_mem_flags flags,
+                                    const cl_image_format *image_format,
+                                    size_t image_width,
+                                    size_t image_height,
+                                    size_t image_row_pitch,
+                                    cl_mem buffer,
+                                    cl_int *errcode_ret)
+    {
+        cl_mem mImage = NULL;
+
+        cl_image_desc image_desc_dest;
+        image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+        image_desc_dest.image_width = image_width;
+        image_desc_dest.image_height = image_height;
+        image_desc_dest.image_depth= 0;// not usedfor 2d
+        image_desc_dest.image_array_size = 0;// not used for 2d
+        image_desc_dest.image_row_pitch = image_row_pitch;
+        image_desc_dest.image_slice_pitch = 0;
+        image_desc_dest.num_mip_levels = 0;
+        image_desc_dest.num_samples = 0;
+        image_desc_dest.mem_object = buffer;
+        mImage = clCreateImage( context, flags, image_format, &image_desc_dest, NULL, errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+
+
+  static inline cl_mem create_image_3d (cl_context context,
+                          cl_mem_flags flags,
+                          const cl_image_format *image_format,
+                          size_t image_width,
+                          size_t image_height,
+                          size_t image_depth,
+                          size_t image_row_pitch,
+                          size_t image_slice_pitch,
+                          void *host_ptr,
+                          cl_int *errcode_ret)
+  {
+    cl_mem mImage;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc;
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+    image_desc.image_width = image_width;
+    image_desc.image_height = image_height;
+    image_desc.image_depth = image_depth;
+    image_desc.image_array_size = 0;// not used for one image
+    image_desc.image_row_pitch = image_row_pitch;
+    image_desc.image_slice_pitch = image_slice_pitch;
+    image_desc.num_mip_levels = 0;
+    image_desc.num_samples = 0;
+    image_desc.mem_object = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context,
+                           flags,
+                           image_format,
+                           &image_desc,
+                           host_ptr,
+                           errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage3D( context,
+                             flags, image_format,
+                             image_width,
+                             image_height,
+                             image_depth,
+                             image_row_pitch,
+                             image_slice_pitch,
+                             host_ptr,
+                             errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+    static inline cl_mem create_image_2d_array (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_height,
+                                   size_t image_array_size,
+                                   size_t image_row_pitch,
+                                   size_t image_slice_pitch,
+                                   void *host_ptr,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = image_height;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    static inline cl_mem create_image_1d_array (cl_context context,
+                                         cl_mem_flags flags,
+                                         const cl_image_format *image_format,
+                                         size_t image_width,
+                                         size_t image_array_size,
+                                         size_t image_row_pitch,
+                                         size_t image_slice_pitch,
+                                         void *host_ptr,
+                                         cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    static inline cl_mem create_image_1d (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_row_pitch,
+                                   void *host_ptr,
+                                   cl_mem buffer,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = 0;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = buffer;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
new file mode 100644
index 00000000..23445c8e
--- /dev/null
+++ b/test_common/harness/compat.h
@@ -0,0 +1,388 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+#if defined(_WIN32) && defined (_MSC_VER)
+#include <Windows.h>
+#endif
+
+#ifdef __cplusplus
+    #define EXTERN_C extern "C"
+#else
+    #define EXTERN_C
+#endif
+
+
+//
+// stdlib.h
+//
+
+#include <stdlib.h>     // On Windows, _MAX_PATH defined there.
+
+// llabs appeared in MS C v16 (VS 10/2010).
+#if defined( _MSC_VER ) && _MSC_VER <= 1500
+    EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
+#endif
+
+
+//
+// stdbool.h
+//
+
+// stdbool.h appeared in MS C v18 (VS 12/2013).
+#if defined( _MSC_VER ) && MSC_VER <= 1700
+#if !defined(__cplusplus)
+typedef char bool;
+        #define true  1
+        #define false 0
+    #endif
+#else
+    #include <stdbool.h>
+#endif
+
+
+
+//
+// stdint.h
+//
+
+// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
+#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
+typedef unsigned char       uint8_t;
+typedef char                int8_t;
+typedef unsigned short      uint16_t;
+typedef short               int16_t;
+typedef unsigned int        uint32_t;
+typedef int                 int32_t;
+typedef unsigned long long  uint64_t;
+typedef long long           int64_t;
+#else
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+    #include <stdint.h>
+#endif
+
+
+
+//
+// float.h
+//
+
+#include <float.h>
+
+
+
+//
+// fenv.h
+//
+
+// fenv.h appeared in MS C v18 (VS 12/2013).
+#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
+    // reimplement fenv.h because windows doesn't have it
+    #define FE_INEXACT          0x0020
+    #define FE_UNDERFLOW        0x0010
+    #define FE_OVERFLOW         0x0008
+    #define FE_DIVBYZERO        0x0004
+    #define FE_INVALID          0x0001
+    #define FE_ALL_EXCEPT       0x003D
+    int fetestexcept(int excepts);
+    int feclearexcept(int excepts);
+#else
+    #include <fenv.h>
+#endif
+
+
+//
+// math.h
+//
+
+#if defined( __INTEL_COMPILER )
+    #include <mathimf.h>
+#else
+    #include <math.h>
+#endif
+
+#if defined( _MSC_VER )
+
+    #ifdef __cplusplus
+        extern "C" {
+    #endif
+
+#ifndef M_PI
+    #define M_PI    3.14159265358979323846264338327950288
+#endif
+
+    #if ! defined( __INTEL_COMPILER )
+
+        #ifndef NAN
+            #define NAN  (INFINITY - INFINITY)
+        #endif
+        #ifndef HUGE_VALF
+            #define HUGE_VALF (float)HUGE_VAL
+        #endif
+        #ifndef INFINITY
+            #define INFINITY    (FLT_MAX + FLT_MAX)
+        #endif
+        #ifndef isfinite
+            #define isfinite(x) _finite(x)
+        #endif
+        #ifndef isnan
+#define    isnan( x )       ((x) != (x))
+        #endif
+        #ifndef isinf
+#define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
+        #endif
+
+double rint( double x);
+float  rintf( float x);
+long double rintl( long double x);
+
+float cbrtf( float );
+double cbrt( double );
+
+int    ilogb( double x);
+int    ilogbf (float x);
+int    ilogbl(long double x);
+
+double fmax(double x, double y);
+double fmin(double x, double y);
+float  fmaxf( float x, float y );
+float  fminf(float x, float y);
+
+double      log2(double x);
+long double log2l(long double x);
+
+double      exp2(double x);
+long double exp2l(long double x);
+
+double      fdim(double x, double y);
+float       fdimf(float x, float y);
+long double fdiml(long double x, long double y);
+
+double      remquo( double x, double y, int *quo);
+float       remquof( float x, float y, int *quo);
+long double remquol( long double x, long double y, int *quo);
+
+long double scalblnl(long double x, long n);
+
+float hypotf(float x, float y);
+long double hypotl(long double x, long double y) ;
+double lgamma(double x);
+float  lgammaf(float x);
+
+double trunc(double x);
+float  truncf(float x);
+
+double log1p(double x);
+float  log1pf(float x);
+long double log1pl(long double x);
+
+double copysign(double x, double y);
+float  copysignf(float x, float y);
+long double copysignl(long double x, long double y);
+
+long lround(double x);
+long lroundf(float x);
+//long lroundl(long double x)
+
+double round(double x);
+float  roundf(float x);
+long double roundl(long double x);
+
+        int cf_signbit(double x);
+        int cf_signbitf(float x);
+
+// Added in _MSC_VER == 1800 (Visual Studio 2013)
+#if _MSC_VER < 1800
+        static int signbit(double x) { return  cf_signbit(x); }
+#endif
+        static int signbitf(float x) { return cf_signbitf(x); }
+
+long int lrint (double flt);
+long int lrintf (float flt);
+
+float   int2float (int32_t ix);
+int32_t float2int (float   fx);
+
+    #endif
+
+    #if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
+        // These functions appeared in Intel C v13.
+        float  nanf( const char* str);
+        double nan( const char* str);
+        long double nanl( const char* str);
+    #endif
+
+    #ifdef __cplusplus
+        }
+    #endif
+
+#endif
+
+#if defined( __ANDROID__ )
+    #define log2(X)  (log(X)/log(2))
+#endif
+
+
+
+//
+// stdio.h
+//
+
+#if defined(_MSC_VER)
+	// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
+	#if _MSC_VER < 1900
+		#define snprintf   sprintf_s
+	#endif
+#endif
+
+
+
+//
+// unistd.h
+//
+
+#if defined( _MSC_VER )
+    EXTERN_C unsigned int sleep( unsigned int sec );
+    EXTERN_C int usleep( int usec );
+#endif
+
+
+
+//
+// syscall.h
+//
+
+#if defined( __ANDROID__ )
+    // Android bionic's isn't providing SYS_sysctl wrappers.
+    #define SYS__sysctl  __NR__sysctl
+#endif
+
+
+
+// Some tests use _malloca which defined in malloc.h.
+#if !defined (__APPLE__)
+#include <malloc.h>
+#endif
+
+
+//
+// ???
+//
+
+#if defined( _MSC_VER )
+
+    #define MAXPATHLEN _MAX_PATH
+
+    EXTERN_C uint64_t ReadTime( void );
+    EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
+
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+    EXTERN_C int __builtin_clz(unsigned int pattern);
+
+#endif
+
+#ifndef MIN
+    #define MIN(x,y) (((x)<(y))?(x):(y))
+#endif
+#ifndef MAX
+    #define MAX(x,y) (((x)>(y))?(x):(y))
+#endif
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
+
+    This is a typical usage of the macros:
+
+        double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
+
+     (taken from math_brute_force/reference_math.c). There are two problems:
+
+        1.  There is an error here. On Windows in will produce incorrect result
+            `0x1.5555555555555p+50'. To have a correct result it should be written as
+            `MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
+            third argument is not obvious -- sometimes it should be the same as exponent of the
+            first argument, but sometimes not.
+
+        2.  Information is duplicated. It is easy to make a mistake.
+
+    Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+
+    #define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
+    #define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
+    #define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
+
+#else
+
+// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
+#define MAKE_HEX_FLOAT(x,y,z) x
+#define MAKE_HEX_DOUBLE(x,y,z) x
+#define MAKE_HEX_LONG(x,y,z) x
+
+#endif
+
+
+/*
+    ------------------------------------------------------------------------------------------------
+    HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
+    double respectively. Arguments:
+
+        sm    -- sign of number,
+        int   -- integer part of mantissa (without `0x' prefix),
+        fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
+        se    -- sign of exponent,
+        exp   -- absolute value of (binary) exponent.
+
+    Example:
+
+        double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
+
+    Note:
+
+        We have to pass signs as separate arguments because gcc pass negative integer values
+        (e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
+        `0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
+        literal.
+    ------------------------------------------------------------------------------------------------
+*/
+#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    // If compiler does not support hex floating point literals:
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm ldexpf(       (float)( 0x ## int ## fract ## UL  ), se exp + ilogbf(       (float) 0x ## int ) - ilogbf(       ( float )( 0x ## int ## fract ## UL  ) ) )
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm ldexp(       (double)( 0x ## int ## fract ## ULL ), se exp + ilogb(       (double) 0x ## int ) - ilogb(       ( double )( 0x ## int ## fract ## ULL ) ) )
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
+#else
+    // If compiler supports hex floating point literals: just concatenate all the parts into a literal.
+    #define HEX_FLT(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
+    #define HEX_DBL(  sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
+    #define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
+#endif
+
+#if defined(__MINGW32__)
+    #include <Windows.h>
+    #define sleep(sec) Sleep((sec) * 1000)
+#endif
+
+#endif // _COMPAT_H_
diff --git a/test_common/harness/conversions.c b/test_common/harness/conversions.c
new file mode 100644
index 00000000..72fd8cb3
--- /dev/null
+++ b/test_common/harness/conversions.c
@@ -0,0 +1,1198 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "conversions.h"
+#include <limits.h>
+#include <time.h>
+#include <assert.h>
+#include "mt19937.h"
+#include "compat.h"
+
+#if defined( __SSE__ ) || defined (_MSC_VER)
+    #include <xmmintrin.h>
+#endif
+#if defined( __SSE2__ ) || defined (_MSC_VER)
+    #include <emmintrin.h>
+#endif
+
+void print_type_to_string(ExplicitType type, void *data, char* string) {
+     switch (type) {
+       case kBool:
+      if (*(char*)data)
+        sprintf(string, "true");
+      else
+        sprintf(string, "false");
+            return;
+    case kChar:
+      sprintf(string, "%d", (int)*((cl_char*)data));
+      return;
+    case kUChar:
+    case kUnsignedChar:
+      sprintf(string, "%u", (int)*((cl_uchar*)data));
+      return;
+    case kShort:
+      sprintf(string, "%d", (int)*((cl_short*)data));
+      return;
+    case kUShort:
+    case kUnsignedShort:
+      sprintf(string, "%u", (int)*((cl_ushort*)data));
+      return;
+    case kInt:
+      sprintf(string, "%d", *((cl_int*)data));
+      return;
+    case kUInt:
+    case kUnsignedInt:
+      sprintf(string, "%u", *((cl_uint*)data));
+      return;
+    case kLong:
+      sprintf(string, "%lld", *((cl_long*)data));
+      return;
+    case kULong:
+    case kUnsignedLong:
+      sprintf(string, "%llu", *((cl_ulong*)data));
+      return;
+    case kFloat:
+      sprintf(string, "%f", *((cl_float*)data));
+      return;
+    case kHalf:
+      sprintf(string, "half");
+      return;
+    case kDouble:
+      sprintf(string, "%g", *((cl_double*)data));
+      return;
+    default:
+      sprintf(string, "INVALID");
+      return;
+  }
+
+}
+
+size_t get_explicit_type_size( ExplicitType type )
+{
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static size_t    sExplicitTypeSizes[] = {
+            sizeof( cl_bool ),
+            sizeof( cl_char ),
+            sizeof( cl_uchar ),
+            sizeof( cl_uchar ),
+            sizeof( cl_short ),
+            sizeof( cl_ushort ),
+            sizeof( cl_ushort ),
+            sizeof( cl_int ),
+            sizeof( cl_uint ),
+            sizeof( cl_uint ),
+            sizeof( cl_long ),
+            sizeof( cl_ulong ),
+            sizeof( cl_ulong ),
+            sizeof( cl_float ),
+            sizeof( cl_half ),
+            sizeof( cl_double )
+        };
+
+    return sExplicitTypeSizes[ type ];
+}
+
+const char * get_explicit_type_name( ExplicitType type )
+{
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static const char *sExplicitTypeNames[] = { "bool", "char", "uchar", "unsigned char", "short", "ushort", "unsigned short", "int",
+                            "uint", "unsigned int", "long", "ulong", "unsigned long", "float", "half", "double" };
+
+    return sExplicitTypeNames[ type ];
+}
+
+static long lrintf_clamped( float f );
+static long lrintf_clamped( float f )
+{
+    static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+
+    if( f >= -(float) LONG_MIN )
+        return LONG_MAX;
+
+    if( f <= (float) LONG_MIN )
+        return LONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
+    {
+        volatile float x = f;
+        float magicVal = magic[ f < 0 ];
+
+#if defined( __SSE__ ) || defined (_WIN32)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128 v = _mm_set_ss( x );
+        __m128 m = _mm_set_ss( magicVal );
+        v = _mm_add_ss( v, m );
+        v = _mm_sub_ss( v, m );
+        _mm_store_ss( (float*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long) f;
+}
+
+static long lrint_clamped( double f );
+static long lrint_clamped( double f )
+{
+    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+
+    if( sizeof( long ) > 4 )
+    {
+        if( f >= -(double) LONG_MIN )
+            return LONG_MAX;
+    }
+    else
+    {
+        if( f >= LONG_MAX )
+            return LONG_MAX;
+    }
+
+    if( f <= (double) LONG_MIN )
+        return LONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
+    {
+        volatile double x = f;
+        double magicVal = magic[ f < 0 ];
+#if defined( __SSE2__ ) || (defined (_MSC_VER))
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128d v = _mm_set_sd( x );
+        __m128d m = _mm_set_sd( magicVal );
+        v = _mm_add_sd( v, m );
+        v = _mm_sub_sd( v, m );
+        _mm_store_sd( (double*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long) f;
+}
+
+
+typedef cl_long Long;
+typedef cl_ulong ULong;
+
+static ULong sUpperLimits[ kNumExplicitTypes ] =
+    {
+        0,
+        127, 255, 255,
+        32767, 65535, 65535,
+        0x7fffffffLL, 0xffffffffLL, 0xffffffffLL,
+        0x7fffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL,
+        0, 0 };    // Last two values aren't stored here
+
+static Long sLowerLimits[ kNumExplicitTypes ] =
+    {
+        -1,
+        -128, 0, 0,
+        -32768, 0, 0,
+        0xffffffff80000000LL, 0, 0,
+        0x8000000000000000LL, 0, 0,
+        0, 0 };    // Last two values aren't stored here
+
+#define BOOL_CASE(inType) \
+        case kBool:    \
+            boolPtr = (bool *)outRaw; \
+            *boolPtr = ( *inType##Ptr ) != 0 ? true : false; \
+            break;
+
+#define SIMPLE_CAST_CASE(inType,outEnum,outType) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            *outType##Ptr = (outType)(*inType##Ptr);    \
+            break;
+
+// Sadly, the ULong downcasting cases need a separate #define to get rid of signed/unsigned comparison warnings
+#define DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && *inType##Ptr > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)*inType##Ptr > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( *inType##Ptr < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+
+#define U_DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( (ULong)*inType##Ptr > sUpperLimits[outEnum] )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+
+#define TO_FLOAT_CASE(inType)                \
+        case kFloat:                        \
+            floatPtr = (float *)outRaw;        \
+            *floatPtr = (float)(*inType##Ptr);    \
+            break;
+#define TO_DOUBLE_CASE(inType)                \
+        case kDouble:                        \
+            doublePtr = (double *)outRaw;        \
+            *doublePtr = (double)(*inType##Ptr);    \
+            break;
+
+
+/* Note: we use lrintf here to force the rounding instead of whatever the processor's current rounding mode is */
+#define FLOAT_ROUND_TO_NEAREST_CASE(outEnum,outType)    \
+        case outEnum:                                    \
+            outType##Ptr = (outType *)outRaw;            \
+            *outType##Ptr = (outType)lrintf_clamped( *floatPtr );    \
+            break;
+
+#define FLOAT_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*floatPtr;\
+            float largeRemainder = ( *floatPtr - (float)wholeValue ) * 10.f; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.f && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.f && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrintf_clamped( *floatPtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
+        break;
+
+#define DOUBLE_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*doublePtr;\
+            double largeRemainder = ( *doublePtr - (double)wholeValue ) * 10.0; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.0 && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.0 && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrint_clamped( *doublePtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
+        break;
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType )
+{
+    bool *boolPtr;
+    char *charPtr;
+    uchar *ucharPtr;
+    short *shortPtr;
+    ushort *ushortPtr;
+    int *intPtr;
+    uint *uintPtr;
+    Long *LongPtr;
+    ULong *ULongPtr;
+    float *floatPtr;
+    double *doublePtr;
+
+
+    switch( inType )
+    {
+        case kBool:
+            boolPtr = (bool *)inRaw;
+            switch( outType )
+            {
+                case kBool:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                case kChar:
+                case kUChar:
+                case kUnsignedChar:
+                case kShort:
+                case kUShort:
+                case kUnsignedShort:
+                case kInt:
+                case kUInt:
+                case kUnsignedInt:
+                case kLong:
+                case kULong:
+                case kUnsignedLong:
+                    memset( outRaw, *boolPtr ? 0xff : 0, get_explicit_type_size( outType ) );
+                    break;
+
+                case kFloat:
+                    floatPtr = (float *)outRaw;
+                    *floatPtr = ( *boolPtr ) ? -1.f : 0.f;
+                    break;
+                case kDouble:
+                    doublePtr = (double *)outRaw;
+                    *doublePtr = ( *boolPtr ) ? -1.0 : 0.0;
+                    break;
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kChar:
+            charPtr = (char *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(char)
+
+                case kChar:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(char,kUChar,uchar,saturate)
+                SIMPLE_CAST_CASE(char,kUnsignedChar,uchar)
+                SIMPLE_CAST_CASE(char,kShort,short)
+                SIMPLE_CAST_CASE(char,kUShort,ushort)
+                SIMPLE_CAST_CASE(char,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(char,kInt,int)
+                SIMPLE_CAST_CASE(char,kUInt,uint)
+                SIMPLE_CAST_CASE(char,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(char,kLong,Long)
+                SIMPLE_CAST_CASE(char,kULong,ULong)
+                SIMPLE_CAST_CASE(char,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(char)
+                TO_DOUBLE_CASE(char)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUChar:
+            ucharPtr = (uchar *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uchar)
+
+                case kUChar:
+                case kUnsignedChar:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedChar:
+            ucharPtr = (uchar *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uchar)
+
+                case kUChar:
+                case kUnsignedChar:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kShort:
+            shortPtr = (short *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(short)
+
+                case kShort:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(short,kChar,char,saturate)
+                DOWN_CAST_CASE(short,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(short,kUnsignedShort,ushort,saturate)
+                SIMPLE_CAST_CASE(short,kInt,int)
+                SIMPLE_CAST_CASE(short,kUInt,uint)
+                SIMPLE_CAST_CASE(short,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(short,kLong,Long)
+                SIMPLE_CAST_CASE(short,kULong,ULong)
+                SIMPLE_CAST_CASE(short,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(short)
+                TO_DOUBLE_CASE(short)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUShort:
+            ushortPtr = (ushort *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ushort)
+
+                case kUShort:
+                case kUnsignedShort:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedShort:
+            ushortPtr = (ushort *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ushort)
+
+                case kUShort:
+                case kUnsignedShort:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kInt:
+            intPtr = (int *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(int)
+
+                case kInt:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(int,kChar,char,saturate)
+                DOWN_CAST_CASE(int,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kShort,short,saturate)
+                DOWN_CAST_CASE(int,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUInt,uint,saturate)
+                DOWN_CAST_CASE(int,kUnsignedInt,uint,saturate)
+                SIMPLE_CAST_CASE(int,kLong,Long)
+                SIMPLE_CAST_CASE(int,kULong,ULong)
+                SIMPLE_CAST_CASE(int,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(int)
+                TO_DOUBLE_CASE(int)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUInt:
+            uintPtr = (uint *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uint)
+
+                case kUInt:
+                case kUnsignedInt:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedInt:
+            uintPtr = (uint *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uint)
+
+                case kUInt:
+                case kUnsignedInt:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kLong:
+            LongPtr = (Long *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(Long)
+
+                case kLong:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(Long,kChar,char,saturate)
+                DOWN_CAST_CASE(Long,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kShort,short,saturate)
+                DOWN_CAST_CASE(Long,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kInt,int,saturate)
+                DOWN_CAST_CASE(Long,kUInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kULong,ULong,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedLong,ULong,saturate)
+
+                TO_FLOAT_CASE(Long)
+                TO_DOUBLE_CASE(Long)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kULong:
+            ULongPtr = (ULong *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ULong)
+
+                case kUnsignedLong:
+                case kULong:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
+
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedLong:
+            ULongPtr = (ULong *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ULong)
+
+                case kULong:
+                case kUnsignedLong:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
+
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kFloat:
+            floatPtr = (float *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(float)
+
+                FLOAT_ROUND_CASE(kChar,char,roundType,saturate)
+                FLOAT_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kShort,short,roundType,saturate)
+                FLOAT_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kInt,int,roundType,saturate)
+                FLOAT_ROUND_CASE(kUInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kLong,Long,roundType,saturate)
+                FLOAT_ROUND_CASE(kULong,ULong,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
+
+                case kFloat:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                TO_DOUBLE_CASE(float);
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kDouble:
+            doublePtr = (double *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(double)
+
+                DOUBLE_ROUND_CASE(kChar,char,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kShort,short,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kInt,int,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kLong,Long,roundType,saturate)
+                DOUBLE_ROUND_CASE(kULong,ULong,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
+
+                TO_FLOAT_CASE(double);
+
+                case kDouble:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        default:
+            log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+            break;
+    }
+}
+
+void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData )
+{
+    bool *boolPtr;
+    cl_char *charPtr;
+    cl_uchar *ucharPtr;
+    cl_short *shortPtr;
+    cl_ushort *ushortPtr;
+    cl_int *intPtr;
+    cl_uint *uintPtr;
+    cl_long *longPtr;
+    cl_ulong *ulongPtr;
+    cl_float *floatPtr;
+    cl_double *doublePtr;
+    cl_ushort *halfPtr;
+    size_t i;
+    cl_uint bits = genrand_int32(d);
+    cl_uint bitsLeft = 32;
+
+    switch( type )
+    {
+        case kBool:
+            boolPtr = (bool *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                boolPtr[i] = ( bits & 1 ) ? true : false;
+                bits >>= 1; bitsLeft -= 1;
+            }
+            break;
+
+        case kChar:
+            charPtr = (cl_char *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                charPtr[i] = (cl_char)( (cl_int)(bits & 255 ) - 127 );
+                bits >>= 8; bitsLeft -= 8;
+            }
+            break;
+
+        case kUChar:
+        case kUnsignedChar:
+            ucharPtr = (cl_uchar *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                ucharPtr[i] = (cl_uchar)( bits & 255 );
+                bits >>= 8; bitsLeft -= 8;
+            }
+            break;
+
+        case kShort:
+            shortPtr = (cl_short *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                shortPtr[i] = (cl_short)( (cl_int)( bits & 65535 ) - 32767 );
+                bits >>= 16; bitsLeft -= 16;
+            }
+            break;
+
+        case kUShort:
+        case kUnsignedShort:
+            ushortPtr = (cl_ushort *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                ushortPtr[i] = (cl_ushort)( (cl_int)( bits & 65535 ) );
+                bits >>= 16; bitsLeft -= 16;
+            }
+            break;
+
+        case kInt:
+            intPtr = (cl_int *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                intPtr[i] = (cl_int)genrand_int32(d);
+            }
+            break;
+
+        case kUInt:
+        case kUnsignedInt:
+            uintPtr = (cl_uint *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                uintPtr[i] = (unsigned int)genrand_int32(d);
+            }
+            break;
+
+        case kLong:
+            longPtr = (cl_long *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
+            }
+            break;
+
+        case kULong:
+        case kUnsignedLong:
+            ulongPtr = (cl_ulong *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+            }
+            break;
+
+        case kFloat:
+            floatPtr = (cl_float *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                // [ -(double) 0x7fffffff, (double) 0x7fffffff ]
+                double t = genrand_real1(d);
+                floatPtr[i] = (float) ((1.0 - t) * -(double) 0x7fffffff + t * (double) 0x7fffffff);
+            }
+            break;
+
+        case kDouble:
+            doublePtr = (cl_double *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                cl_long u = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
+                double t = (double) u;
+                t *= MAKE_HEX_DOUBLE( 0x1.0p-32, 0x1, -32 );        // scale [-2**63, 2**63] to [-2**31, 2**31]
+                doublePtr[i] = t;
+            }
+            break;
+
+        case kHalf:
+            halfPtr = (ushort *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                halfPtr[i] = bits & 65535;     /* Kindly generates random bits for us */
+                bits >>= 16; bitsLeft -= 16;
+            }
+            break;
+
+        default:
+            log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
+            break;
+    }
+}
+
+void * create_random_data( ExplicitType type, MTdata d, size_t count )
+{
+    void *data = malloc( get_explicit_type_size( type ) * count );
+    generate_random_data( type, count, d, data );
+    return data;
+}
+
+cl_long read_upscale_signed( void *inRaw, ExplicitType inType )
+{
+    switch( inType )
+    {
+        case kChar:
+            return (cl_long)( *( (cl_char *)inRaw ) );
+        case kUChar:
+        case kUnsignedChar:
+            return (cl_long)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_long)( *( (cl_short *)inRaw ) );
+        case kUShort:
+        case kUnsignedShort:
+            return (cl_long)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_long)( *( (cl_int *)inRaw ) );
+        case kUInt:
+        case kUnsignedInt:
+            return (cl_long)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_long)( *( (cl_long *)inRaw ) );
+        case kULong:
+        case kUnsignedLong:
+            return (cl_long)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
+    }
+}
+
+cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType )
+{
+    switch( inType )
+    {
+        case kChar:
+            return (cl_ulong)( *( (cl_char *)inRaw ) );
+        case kUChar:
+        case kUnsignedChar:
+            return (cl_ulong)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_ulong)( *( (cl_short *)inRaw ) );
+        case kUShort:
+        case kUnsignedShort:
+            return (cl_ulong)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_ulong)( *( (cl_int *)inRaw ) );
+        case kUInt:
+        case kUnsignedInt:
+            return (cl_ulong)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_ulong)( *( (cl_long *)inRaw ) );
+        case kULong:
+        case kUnsignedLong:
+            return (cl_ulong)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
+    }
+}
+
+float read_as_float( void *inRaw, ExplicitType inType )
+{
+    switch( inType )
+    {
+        case kChar:
+            return (float)( *( (cl_char *)inRaw ) );
+        case kUChar:
+        case kUnsignedChar:
+            return (float)( *( (cl_char *)inRaw ) );
+        case kShort:
+            return (float)( *( (cl_short *)inRaw ) );
+        case kUShort:
+        case kUnsignedShort:
+            return (float)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (float)( *( (cl_int *)inRaw ) );
+        case kUInt:
+        case kUnsignedInt:
+            return (float)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (float)( *( (cl_long *)inRaw ) );
+        case kULong:
+        case kUnsignedLong:
+            return (float)( *( (cl_ulong *)inRaw ) );
+        case kFloat:
+            return *( (float *)inRaw );
+        case kDouble:
+            return (float) *( (double*)inRaw );
+        default:
+            return 0;
+    }
+}
+
+float get_random_float(float low, float high, MTdata d)
+{
+    float t = (float)((double)genrand_int32(d) / (double)0xFFFFFFFF);
+    return (1.0f - t) * low + t * high;
+}
+
+double get_random_double(double low, double high, MTdata d)
+{
+    cl_ulong u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32 );
+    double t = (double) u * MAKE_HEX_DOUBLE( 0x1.0p-64, 0x1, -64);
+    return (1.0f - t) * low + t * high;
+}
+
+float  any_float( MTdata d )
+{
+    union
+    {
+        float   f;
+        cl_uint u;
+    }u;
+
+    u.u = genrand_int32(d);
+    return u.f;
+}
+
+
+double  any_double( MTdata d )
+{
+    union
+    {
+        double   f;
+        cl_ulong u;
+    }u;
+
+    u.u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+    return u.f;
+}
+
+int          random_in_range( int minV, int maxV, MTdata d )
+{
+    cl_ulong r = ((cl_ulong) genrand_int32(d) ) * (maxV - minV + 1);
+    return (cl_uint)(r >> 32) + minV;
+}
+
+size_t get_random_size_t(size_t low, size_t high, MTdata d)
+{
+  enum { N = sizeof(size_t)/sizeof(int) };
+
+  union {
+    int word[N];
+    size_t size;
+  } u;
+
+  for (unsigned i=0; i != N; ++i) {
+    u.word[i] = genrand_int32(d);
+  }
+
+  assert(low <= high && "Invalid random number range specified");
+  size_t range = high - low;
+
+  return (range) ? low + ((u.size - low) % range) : low;
+}
+
+
diff --git a/test_common/harness/conversions.h b/test_common/harness/conversions.h
new file mode 100644
index 00000000..aa3cb6b4
--- /dev/null
+++ b/test_common/harness/conversions.h
@@ -0,0 +1,126 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _conversions_h
+#define _conversions_h
+
+#include "compat.h"
+
+#include "errorHelpers.h"
+#include "mt19937.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Note: the next three all have to match in size and order!! */
+
+enum ExplicitTypes
+{
+    kBool        = 0,
+    kChar,
+    kUChar,
+    kUnsignedChar,
+    kShort,
+    kUShort,
+    kUnsignedShort,
+    kInt,
+    kUInt,
+    kUnsignedInt,
+    kLong,
+    kULong,
+    kUnsignedLong,
+    kFloat,
+    kHalf,
+    kDouble,
+    kNumExplicitTypes
+};
+
+typedef enum ExplicitTypes    ExplicitType;
+
+enum RoundingTypes
+{
+    kRoundToEven = 0,
+    kRoundToZero,
+    kRoundToPosInf,
+    kRoundToNegInf,
+    kRoundToNearest,
+
+    kNumRoundingTypes,
+
+    kDefaultRoundingType = kRoundToNearest
+};
+
+typedef enum RoundingTypes    RoundingType;
+
+extern void             print_type_to_string(ExplicitType type, void *data, char* string);
+extern size_t           get_explicit_type_size( ExplicitType type );
+extern const char *     get_explicit_type_name( ExplicitType type );
+extern void             convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
+
+extern void             generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
+extern void    *         create_random_data( ExplicitType type, MTdata d, size_t count );
+
+extern cl_long          read_upscale_signed( void *inRaw, ExplicitType inType );
+extern cl_ulong         read_upscale_unsigned( void *inRaw, ExplicitType inType );
+extern float            read_as_float( void *inRaw, ExplicitType inType );
+
+extern float            get_random_float(float low, float high, MTdata d);
+extern double           get_random_double(double low, double high, MTdata d);
+extern float            any_float( MTdata d );
+extern double           any_double( MTdata d );
+
+extern int              random_in_range( int minV, int maxV, MTdata d );
+
+size_t get_random_size_t(size_t low, size_t high, MTdata d);
+
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( float x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf(x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+static inline int IsDoubleSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs( x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // _conversions_h
+
+
diff --git a/test_common/harness/errorHelpers.c b/test_common/harness/errorHelpers.c
new file mode 100644
index 00000000..db26e456
--- /dev/null
+++ b/test_common/harness/errorHelpers.c
@@ -0,0 +1,813 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "errorHelpers.h"
+
+extern bool gOfflineCompiler;
+
+const char    *IGetErrorString( int clErrorCode )
+{
+    switch( clErrorCode )
+    {
+        case CL_SUCCESS:                return "CL_SUCCESS";
+        case CL_DEVICE_NOT_FOUND:        return "CL_DEVICE_NOT_FOUND";
+        case CL_DEVICE_NOT_AVAILABLE:    return "CL_DEVICE_NOT_AVAILABLE";
+        case CL_COMPILER_NOT_AVAILABLE:    return "CL_COMPILER_NOT_AVAILABLE";
+        case CL_MEM_OBJECT_ALLOCATION_FAILURE:    return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+        case CL_OUT_OF_RESOURCES:        return "CL_OUT_OF_RESOURCES";
+        case CL_OUT_OF_HOST_MEMORY:        return "CL_OUT_OF_HOST_MEMORY";
+        case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
+        case CL_MEM_COPY_OVERLAP:        return "CL_MEM_COPY_OVERLAP";
+        case CL_IMAGE_FORMAT_MISMATCH:    return "CL_IMAGE_FORMAT_MISMATCH";
+        case CL_IMAGE_FORMAT_NOT_SUPPORTED:    return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+        case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
+        case CL_MAP_FAILURE:            return "CL_MAP_FAILURE";
+        case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+        case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+        case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
+        case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
+        case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
+        case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
+        case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+        case CL_INVALID_VALUE:            return "CL_INVALID_VALUE";
+        case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
+        case CL_INVALID_DEVICE:            return "CL_INVALID_DEVICE";
+        case CL_INVALID_CONTEXT:        return "CL_INVALID_CONTEXT";
+        case CL_INVALID_QUEUE_PROPERTIES:    return "CL_INVALID_QUEUE_PROPERTIES";
+        case CL_INVALID_COMMAND_QUEUE:    return "CL_INVALID_COMMAND_QUEUE";
+        case CL_INVALID_HOST_PTR:    return "CL_INVALID_HOST_PTR";
+        case CL_INVALID_MEM_OBJECT:        return "CL_INVALID_MEM_OBJECT";
+        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:        return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+        case CL_INVALID_IMAGE_SIZE:        return "CL_INVALID_IMAGE_SIZE";
+        case CL_INVALID_SAMPLER:        return "CL_INVALID_SAMPLER";
+        case CL_INVALID_BINARY:        return "CL_INVALID_BINARY";
+        case CL_INVALID_BUILD_OPTIONS:        return "CL_INVALID_BUILD_OPTIONS";
+        case CL_INVALID_PROGRAM:        return "CL_INVALID_PROGRAM";
+        case CL_INVALID_PROGRAM_EXECUTABLE:        return "CL_INVALID_PROGRAM_EXECUTABLE";
+        case CL_INVALID_KERNEL_NAME:    return "CL_INVALID_KERNEL_NAME";
+        case CL_INVALID_KERNEL_DEFINITION:    return "CL_INVALID_KERNEL_DEFINITION";
+        case CL_INVALID_KERNEL:            return "CL_INVALID_KERNEL";
+        case CL_INVALID_ARG_INDEX:        return "CL_INVALID_ARG_INDEX";
+        case CL_INVALID_ARG_VALUE:        return "CL_INVALID_ARG_VALUE";
+        case CL_INVALID_ARG_SIZE:        return "CL_INVALID_ARG_SIZE";
+        case CL_INVALID_KERNEL_ARGS:    return "CL_INVALID_KERNEL_ARGS";
+        case CL_INVALID_WORK_DIMENSION:        return "CL_INVALID_WORK_DIMENSION";
+        case CL_INVALID_WORK_GROUP_SIZE:    return "CL_INVALID_WORK_GROUP_SIZE";
+        case CL_INVALID_WORK_ITEM_SIZE:    return "CL_INVALID_WORK_ITEM_SIZE";
+        case CL_INVALID_GLOBAL_OFFSET:        return "CL_INVALID_GLOBAL_OFFSET";
+        case CL_INVALID_EVENT_WAIT_LIST:    return "CL_INVALID_EVENT_WAIT_LIST";
+        case CL_INVALID_EVENT:            return "CL_INVALID_EVENT";
+        case CL_INVALID_OPERATION:        return "CL_INVALID_OPERATION";
+        case CL_INVALID_GL_OBJECT:        return "CL_INVALID_GL_OBJECT";
+        case CL_INVALID_BUFFER_SIZE:    return "CL_INVALID_BUFFER_SIZE";
+        case CL_INVALID_MIP_LEVEL:      return "CL_INVALID_MIP_LEVEL";
+        case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
+        case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
+        case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
+        case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
+        case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
+        case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
+        default: return "(unknown)";
+    }
+}
+
+const char *GetChannelOrderName( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:      return "CL_R";
+        case CL_A:      return "CL_A";
+        case CL_Rx:     return "CL_Rx";
+        case CL_RG:     return "CL_RG";
+        case CL_RA:     return "CL_RA";
+        case CL_RGx:    return "CL_RGx";
+        case CL_RGB:    return "CL_RGB";
+        case CL_RGBx:   return "CL_RGBx";
+        case CL_RGBA:      return "CL_RGBA";
+        case CL_ARGB:      return "CL_ARGB";
+        case CL_BGRA:      return "CL_BGRA";
+        case CL_INTENSITY: return "CL_INTENSITY";
+        case CL_LUMINANCE: return "CL_LUMINANCE";
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
+#endif
+#if defined CL_ABGR_APPLE
+        case CL_ABGR_APPLE: return "CL_ABGR_APPLE";
+#endif
+        case CL_DEPTH: return "CL_DEPTH";
+        case CL_DEPTH_STENCIL: return "CL_DEPTH_STENCIL";
+        case CL_sRGB: return "CL_sRGB";
+        case CL_sRGBA: return "CL_sRGBA";
+        case CL_sRGBx: return "CL_sRGBx";
+        case CL_sBGRA: return "CL_sBGRA";
+        case CL_ABGR: return "CL_ABGR";
+        default: return NULL;
+    }
+}
+
+int IsChannelOrderSupported( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+        case CL_ABGR:
+        case CL_sRGB:
+        case CL_sRGBx:
+        case CL_sBGRA:
+        case CL_sRGBA:
+        case CL_DEPTH:
+            return 1;
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+            return 1;
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetChannelTypeName( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:         return "CL_SNORM_INT8";
+        case CL_SNORM_INT16:        return "CL_SNORM_INT16";
+        case CL_UNORM_INT8:         return "CL_UNORM_INT8";
+        case CL_UNORM_INT16:        return "CL_UNORM_INT16";
+        case CL_UNORM_SHORT_565:    return "CL_UNORM_SHORT_565";
+        case CL_UNORM_SHORT_555:    return "CL_UNORM_SHORT_555";
+        case CL_UNORM_INT_101010:   return "CL_UNORM_INT_101010";
+        case CL_SIGNED_INT8:        return "CL_SIGNED_INT8";
+        case CL_SIGNED_INT16:       return "CL_SIGNED_INT16";
+        case CL_SIGNED_INT32:       return "CL_SIGNED_INT32";
+        case CL_UNSIGNED_INT8:      return "CL_UNSIGNED_INT8";
+        case CL_UNSIGNED_INT16:     return "CL_UNSIGNED_INT16";
+        case CL_UNSIGNED_INT32:     return "CL_UNSIGNED_INT32";
+        case CL_HALF_FLOAT:         return "CL_HALF_FLOAT";
+        case CL_FLOAT:              return "CL_FLOAT";
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:     return "CL_SFIXED14_APPLE";
+#endif
+        case CL_UNORM_INT24:        return "CL_UNORM_INT24";
+        default:                    return NULL;
+    }
+}
+
+int IsChannelTypeSupported( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_UNORM_INT24:
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+            return 1;
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetAddressModeName( cl_addressing_mode mode )
+{
+    switch( mode )
+    {
+        case CL_ADDRESS_NONE:                return "CL_ADDRESS_NONE";
+        case CL_ADDRESS_CLAMP_TO_EDGE:        return "CL_ADDRESS_CLAMP_TO_EDGE";
+        case CL_ADDRESS_CLAMP:                return "CL_ADDRESS_CLAMP";
+        case CL_ADDRESS_REPEAT:                return "CL_ADDRESS_REPEAT";
+        case CL_ADDRESS_MIRRORED_REPEAT:    return "CL_ADDRESS_MIRRORED_REPEAT";
+        default:                            return NULL;
+    }
+}
+
+const char *GetDeviceTypeName( cl_device_type type )
+{
+    switch( type )
+    {
+        case CL_DEVICE_TYPE_GPU:    return "CL_DEVICE_TYPE_GPU";
+        case CL_DEVICE_TYPE_CPU:    return "CL_DEVICE_TYPE_CPU";
+        case CL_DEVICE_TYPE_ACCELERATOR:    return "CL_DEVICE_TYPE_ACCELERATOR";
+        case CL_DEVICE_TYPE_ALL:    return "CL_DEVICE_TYPE_ALL";
+        default:                    return NULL;
+    }
+}
+
+const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
+{
+    static char scratch[ 1024 ];
+    size_t i, j;
+
+    if( buffer == NULL )
+        buffer = scratch;
+
+    unsigned char *p = (unsigned char *)dataBuffer;
+    char *bPtr;
+
+    buffer[ 0 ] = 0;
+    bPtr = buffer;
+    for( i = 0; i < vecSize; i++ )
+    {
+        if( i > 0 )
+        {
+            bPtr[ 0 ] = ' ';
+            bPtr++;
+        }
+        for( j = 0; j < typeSize; j++ )
+        {
+            sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
+            bPtr += 2;
+        }
+        p += typeSize;
+    }
+    bPtr[ 0 ] = 0;
+
+    return buffer;
+}
+
+#ifndef MAX
+#define MAX( _a, _b )       ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+#if defined( _MSC_VER )
+#define scalbnf(_a, _i )    ldexpf( _a, _i )
+#define scalbn(_a, _i )     ldexp( _a, _i )
+#define scalbnl(_a, _i )    ldexpl( _a, _i )
+#endif
+
+static float Ulp_Error_Half_Float( float test, double reference );
+static inline float  half2float( cl_ushort half );
+
+// taken from math tests
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+static float Ulp_Error_Half_Float( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+    double testVal = test;
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    // reference is a normal power of two or a zero
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+// Taken from vLoadHalf test
+static inline float half2float( cl_ushort us )
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+float Ulp_Error_Half( cl_ushort test, float reference )
+{
+    return Ulp_Error_Half_Float( half2float(test), reference );
+}
+
+
+float Ulp_Error( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+    double testVal = test;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    if( isinf( reference ) )
+    {
+        if( testVal == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    if( isinf( testVal) )
+    { // infinite test value, but finite (but possibly overflowing in float) reference.
+      //
+      // The function probably overflowed prematurely here. Formally, the spec says this is
+      // an infinite ulp error and should not be tolerated. Unfortunately, this would mean
+      // that the internal precision of some half_pow implementations would have to be 29+ bits
+      // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
+      // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
+      // after rounding to single is 4*32 = 128, which will ultimately result in premature
+      // overflow, even though a good faith representation would be correct to within 2**-29
+      // interally.
+
+        // In the interest of not requiring the implementation go to extraordinary lengths to
+        // deliver a half precision function, we allow premature overflow within the limit
+        // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
+        // 2**128, the next value that would appear in the number line if float had sufficient range.
+        testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
+
+        // Note that the same hack may not work in long double, which is not guaranteed to have
+        // more range than double.  It is not clear that premature overflow should be tolerated for
+        // double.
+    }
+
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+float Ulp_Error_Double( double test, long double reference )
+{
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    int x;
+    long double testVal = test;
+    if( 0.5L != frexpl( reference, &x) )
+    { // Non-power of two and NaN
+        if( isinf( reference ) )
+        {
+            if( testVal == reference )
+                return 0.0f;
+
+            return (float) ( testVal - reference );
+        }
+
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
+
+        return result;
+
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
+
+    return result;
+}
+
+cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
+{
+  int error;
+  size_t size_ret;
+
+  // Does the program object exist?
+  if (program != NULL) {
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then allocate and query the device list from the context
+      cl_context context;
+      error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
+      test_error( error, "Unable to query program's context" );
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
+      test_error( error, "Unable to query context's device size" );
+      num_devices = size_ret / sizeof(cl_device_id);
+      device_list = (cl_device_id *) malloc(size_ret);
+      if (device_list == NULL) {
+          print_error( error, "malloc failed" );
+          return CL_OUT_OF_HOST_MEMORY;
+      }
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
+      test_error( error, "Unable to query context's devices" );
+
+    }
+
+    // For each device in the device_list
+    unsigned int i;
+    for (i = 0; i < num_devices; i++) {
+
+      // Get the build status
+      cl_build_status build_status;
+      error = clGetProgramBuildInfo(program,
+                                    device_list[i],
+                                    CL_PROGRAM_BUILD_STATUS,
+                                    sizeof(build_status),
+                                    &build_status,
+                                    &size_ret);
+      test_error( error, "Unable to query build status" );
+
+      // If the build failed then log the status, and allocate the build log, log it and free it
+      if (build_status != CL_BUILD_SUCCESS) {
+
+        log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to query build log size" );
+        char *build_log = (char *) malloc(size_ret);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to query build log" );
+        log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        free(build_log);
+
+      }
+
+    }
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then free the device list
+      free(device_list);
+
+    }
+
+  }
+
+  return CL_SUCCESS;
+}
+
+const char * subtests_requiring_opencl_1_2[] = {
+            "device_partition_equally",
+            "device_partition_by_counts",
+            "device_partition_by_affinity_domain_numa",
+            "device_partition_by_affinity_domain_l4_cache",
+            "device_partition_by_affinity_domain_l3_cache",
+            "device_partition_by_affinity_domain_l2_cache",
+            "device_partition_by_affinity_domain_l1_cache",
+            "device_partition_by_affinity_domain_next_partitionable",
+            "device_partition_all",
+    "buffer_fill_int",
+    "buffer_fill_uint",
+    "buffer_fill_short",
+    "buffer_fill_ushort",
+    "buffer_fill_char",
+    "buffer_fill_uchar",
+    "buffer_fill_long",
+    "buffer_fill_ulong",
+    "buffer_fill_float",
+    "buffer_fill_struct",
+  "test_mem_host_write_only_buffer",
+  "test_mem_host_write_only_subbuffer",
+  "test_mem_host_no_access_buffer",
+  "test_mem_host_no_access_subbuffer",
+  "test_mem_host_read_only_image",
+  "test_mem_host_write_only_image",
+  "test_mem_host_no_access_image",
+  // CL_MEM_HOST_{READ|WRITE}_ONLY api/
+    "get_buffer_info",
+    "get_image1d_info",
+    "get_image1d_array_info",
+    "get_image2d_array_info",
+  // gl/
+  "images_read_1D",
+  "images_write_1D",
+  "images_1D_getinfo",
+  "images_read_1Darray",
+  "images_write_1Darray",
+  "images_1Darray_getinfo",
+  "images_read_2Darray",
+  "images_write_2Darray",
+  "images_2Darray_getinfo",
+    "buffer_migrate",
+    "image_migrate",
+   // compiler/
+    "load_program_source",
+    "load_multistring_source",
+    "load_two_kernel_source",
+    "load_null_terminated_source",
+    "load_null_terminated_multi_line_source",
+    "load_null_terminated_partial_multi_line_source",
+    "load_discreet_length_source",
+    "get_program_source",
+    "get_program_build_info",
+    "get_program_info",
+    "large_compile",
+    "async_build",
+    "options_build_optimizations",
+    "options_build_macro",
+    "options_build_macro_existence",
+    "options_include_directory",
+    "options_denorm_cache",
+    "preprocessor_define_udef",
+    "preprocessor_include",
+    "preprocessor_line_error",
+    "preprocessor_pragma",
+    "compiler_defines_for_extensions",
+    "image_macro",
+    "simple_compile_only",
+    "simple_static_compile_only",
+    "simple_extern_compile_only",
+    "simple_compile_with_callback",
+    "simple_embedded_header_compile",
+    "simple_link_only",
+    "two_file_regular_variable_access",
+    "two_file_regular_struct_access",
+    "two_file_regular_function_access",
+    "simple_link_with_callback",
+    "simple_embedded_header_link",
+    "execute_after_simple_compile_and_link",
+    "execute_after_simple_compile_and_link_no_device_info",
+    "execute_after_simple_compile_and_link_with_defines",
+    "execute_after_simple_compile_and_link_with_callbacks",
+    "execute_after_simple_library_with_link",
+    "execute_after_two_file_link",
+    "execute_after_two_file_link",
+    "execute_after_embedded_header_link",
+    "execute_after_included_header_link",
+    "execute_after_serialize_reload_object",
+    "execute_after_serialize_reload_library",
+    "simple_library_only",
+    "simple_library_with_callback",
+    "simple_library_with_link",
+    "two_file_link",
+    "multi_file_libraries",
+    "multiple_files",
+    "multiple_libraries",
+    "multiple_files_multiple_libraries",
+    "multiple_embedded_headers",
+    "program_binary_type",
+    "compile_and_link_status_options_log",
+    // CL_PROGRAM_NUM_KERNELS, in api/
+    "get_kernel_arg_info",
+    "create_kernels_in_program",
+    // clEnqueue..WithWaitList, in events/
+    "event_enqueue_marker_with_event_list",
+    "event_enqueue_barrier_with_event_list",
+    "popcount"
+};
+
+const char * subtests_to_skip_with_offline_compiler[] = {
+            "get_kernel_arg_info",
+            "binary_create",
+            "load_program_source",
+            "load_multistring_source",
+            "load_two_kernel_source",
+            "load_null_terminated_source",
+            "load_null_terminated_multi_line_source",
+            "load_null_terminated_partial_multi_line_source",
+            "load_discreet_length_source",
+            "get_program_source",
+            "get_program_build_info",
+            "options_build_optimizations",
+            "options_build_macro",
+            "options_build_macro_existence",
+            "options_include_directory",
+            "options_denorm_cache",
+            "preprocessor_define_udef",
+            "preprocessor_include",
+            "preprocessor_line_error",
+            "preprocessor_pragma",
+            "compiler_defines_for_extensions",
+            "image_macro",
+            "simple_extern_compile_only",
+            "simple_embedded_header_compile",
+            "two_file_regular_variable_access",
+            "two_file_regular_struct_access",
+            "two_file_regular_function_access",
+            "simple_embedded_header_link",
+            "execute_after_simple_compile_and_link_with_defines",
+            "execute_after_simple_compile_and_link_with_callbacks",
+            "execute_after_embedded_header_link",
+            "execute_after_included_header_link",
+            "multi_file_libraries",
+            "multiple_files",
+            "multiple_libraries",
+            "multiple_files_multiple_libraries",
+            "multiple_embedded_headers",
+            "program_binary_type",
+            "compile_and_link_status_options_log",
+};
+
+int check_opencl_version_with_testname(const char *subtestname, cl_device_id device)
+{
+    int nRequiring12 = sizeof(subtests_requiring_opencl_1_2)/sizeof(char *);
+    size_t i;
+    for(i=0; i < nRequiring12; ++i) {
+        if(!strcmp(subtestname, subtests_requiring_opencl_1_2[i])) {
+            return check_opencl_version(device, 1, 2);
+        }
+    }
+    return 0;
+}
+
+int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion) {
+    int error;
+    char device_version[1024];
+    cl_uint majorVersion = 0, minorVersion = 0;
+    const char * required_version_ocl_12="OpenCL 1.2 ";
+
+    memset( device_version, 0, sizeof( device_version ) );
+    error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
+    test_error(error, "unable to get CL_DEVICE_VERSION");
+
+    if ( strncmp( device_version, "OpenCL 1.2", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 1;
+        minorVersion = 2;
+    } else if ( strncmp( device_version, "OpenCL 1.1", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 1;
+        minorVersion = 1;
+    } else if ( strncmp( device_version, "OpenCL 2.0", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 2;
+        minorVersion = 0;
+    } else if ( strncmp( device_version, "OpenCL 2.1", 10 ) == 0 && ( device_version[ 10 ] == 0 || device_version[ 10 ] == ' ' ) ) {
+        majorVersion = 2;
+        minorVersion = 1;
+    } else {
+        log_error( "ERROR: Unexpected version string: `%s'.\n", device_version );
+        return 1;
+    };
+
+    if (majorVersion >= requestedMajorVersion)
+        return 0;
+
+    if (minorVersion >= requestedMinorVersion)
+        return 0;
+
+    return 1;
+}
+
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device)
+{
+    if(gOfflineCompiler)
+    {
+        int nNotRequiredWithOfflineCompiler = sizeof(subtests_to_skip_with_offline_compiler)/sizeof(char *);
+        size_t i;
+        for(i=0; i < nNotRequiredWithOfflineCompiler; ++i) {
+            if(!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) {
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
new file mode 100644
index 00000000..3612c8c7
--- /dev/null
+++ b/test_common/harness/errorHelpers.h
@@ -0,0 +1,164 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _errorHelpers_h
+#define _errorHelpers_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+#include <stdlib.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+
+// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
+// functions as described below. This is helpful for integration into an automated testing
+// system.
+#if USE_ATF
+// export BUILD_WITH_ATF=1
+    #include <ATF/ATF.h>
+    #define test_start() ATFTestStart()
+    #define log_info ATFLogInfo
+    #define log_error ATFLogError
+    #define log_missing_feature ATFLogMissingFeature
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
+    #define test_finish() ATFTestFinish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
+    #define vlog ATFLogInfo
+    #define vlog_error ATFLogError
+#else
+    #include <stdio.h>
+    #define test_start()
+    #define log_info printf
+    #define log_error printf
+    #define log_missing_feature printf
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,        \
+                        _higherBetter?"higher is better":"lower is better", _number )
+    #define test_finish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,    \
+                        _higherBetter?"higher is better":"lower is better" , _number)
+    #ifdef _WIN32
+        #ifdef __MINGW32__
+            // Use __mingw_printf since it supports "%a" format specifier
+            #define vlog __mingw_printf
+            #define vlog_error __mingw_printf
+        #else
+            // Use home-baked function that treats "%a" as "%f"
+        static int vlog_win32(const char *format, ...);
+        #define vlog vlog_win32
+        #define vlog_error vlog_win32
+        #endif
+    #else
+        #define vlog_error printf
+        #define vlog printf
+    #endif
+#endif
+
+#define ct_assert(b)          ct_assert_i(b, __LINE__)
+#define ct_assert_i(b, line)  ct_assert_ii(b, line)
+#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
+
+#define test_error(errCode,msg)    test_error_ret(errCode,msg,errCode)
+#define test_error_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
+#define print_error(errCode,msg)    log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
+
+#define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the missing feature message
+#define test_missing_feature_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_missing_feature( errCode, msg ); return CL_SUCCESS ; } }
+#define print_missing_feature(errCode, msg) log_missing_feature("ERROR: Subtest %s tests a feature not supported by the device version! (from %s:%d)\n", msg, __FILE__, __LINE__ );
+
+#define test_missing_support_offline_cmpiler(errCode, msg) test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
+// this macro should always return CL_SUCCESS, but print the skip message on test not supported with offline compiler
+#define test_missing_support_offline_cmpiler_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { log_info( "INFO: Subtest %s tests is not supported in offline compiler execution path! (from %s:%d)\n", msg, __FILE__, __LINE__ ); return CL_SUCCESS ; } }
+
+// expected error code vs. what we got
+#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
+#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
+#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+
+extern const char    *IGetErrorString( int clErrorCode );
+
+extern float Ulp_Error_Half( cl_ushort test, float reference );
+extern float Ulp_Error( float test, double reference );
+extern float Ulp_Error_Double( double test, long double reference );
+
+extern const char *GetChannelTypeName( cl_channel_type type );
+extern int IsChannelTypeSupported( cl_channel_type type );
+extern const char *GetChannelOrderName( cl_channel_order order );
+extern int IsChannelOrderSupported( cl_channel_order order );
+extern const char *GetAddressModeName( cl_addressing_mode mode );
+
+extern const char *GetDeviceTypeName( cl_device_type type );
+int check_opencl_version_with_testname(const char *subtestname, cl_device_id device);
+int check_opencl_version(cl_device_id device, cl_uint requestedMajorVersion, cl_uint requestedMinorVersion);
+int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device);
+
+// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
+extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
+
+#if defined (_WIN32) && !defined(__MINGW32__)
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+static int vlog_win32(const char *format, ...)
+{
+    const char *new_format = format;
+
+    if (strstr(format, "%a")) {
+        char *temp;
+        if ((temp = strdup(format)) == NULL) {
+            printf("vlog_win32: Failed to allocate memory for strdup\n");
+            return -1;
+        }
+        new_format = temp;
+        while (*temp) {
+            // replace %a with %f
+            if ((*temp == '%') && (*(temp+1) == 'a')) {
+                *(temp+1) = 'f';
+            }
+            temp++;
+        }
+    }
+
+    va_list args;
+    va_start(args, format);
+    vprintf(new_format, args);
+    va_end(args);
+
+    if (new_format != format) {
+        free((void*)new_format);
+    }
+
+    return 0;
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _errorHelpers_h
+
+
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
new file mode 100644
index 00000000..4835db45
--- /dev/null
+++ b/test_common/harness/fpcontrol.h
@@ -0,0 +1,104 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _fpcontrol_h
+#define _fpcontrol_h
+
+// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
+// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
+// in integer code, and have found this is the only way to correctly verify operation.
+//
+// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
+// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
+// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
+#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
+    typedef int     FPU_mode_type;
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+    extern __thread fpu_control_t fpu_control;
+#endif
+    // Set the reference hardware floating point unit to FTZ mode
+    static inline void ForceFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode | 0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control |= _FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
+#else
+        #error ForceFTZ needs an implentation
+#endif
+    }
+
+    // Disable the denorm flush to zero
+    static inline void DisableFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode & ~0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control &= ~_FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
+#else
+    #error DisableFTZ needs an implentation
+#endif
+    }
+
+    // Restore the reference hardware to floating point state indicated by *mode
+    static inline void RestoreFPState( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        _mm_setcsr( *mode );
+#elif defined( __PPC__)
+        fpu_control = *mode;
+#elif defined (__arm__)
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
+#else
+        #error RestoreFPState needs an implementation
+#endif
+    }
+#else
+        #error ForceFTZ and RestoreFPState need implentations
+#endif
+
+#endif
diff --git a/test_common/harness/genericThread.cpp b/test_common/harness/genericThread.cpp
new file mode 100644
index 00000000..2b742fa3
--- /dev/null
+++ b/test_common/harness/genericThread.cpp
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "genericThread.h"
+
+#if defined(_WIN32)
+#include <windows.h>
+#else // !_WIN32
+#include <pthread.h>
+#endif
+
+void * genericThread::IStaticReflector( void * data )
+{
+    genericThread *t = (genericThread *)data;
+    return t->IRun();
+}
+
+bool genericThread::Start( void )
+{
+#if defined(_WIN32)
+    mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
+    return ( mHandle != NULL );
+#else // !_WIN32
+    int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
+    return ( error == 0 );
+#endif // !_WIN32
+}
+
+void * genericThread::Join( void )
+{
+#if defined(_WIN32)
+    WaitForSingleObject( (HANDLE)mHandle, INFINITE );
+    return NULL;
+#else // !_WIN32
+    void * retVal;
+    int error = pthread_join( (pthread_t)mHandle, &retVal );
+    if( error != 0 )
+        retVal = NULL;
+    return retVal;
+#endif // !_WIN32
+}
diff --git a/test_common/harness/genericThread.h b/test_common/harness/genericThread.h
new file mode 100644
index 00000000..168b7407
--- /dev/null
+++ b/test_common/harness/genericThread.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _genericThread_h
+#define _genericThread_h
+
+#include <stdio.h>
+
+class genericThread
+{
+    public:
+
+        virtual ~genericThread() {}
+
+        bool    Start( void );
+        void *    Join( void );
+
+    protected:
+
+        virtual void *    IRun( void ) = 0;
+
+    private:
+
+        void* mHandle;
+
+        static void * IStaticReflector( void * data );
+};
+
+#endif // _genericThread_h
+
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
new file mode 100644
index 00000000..3b47e271
--- /dev/null
+++ b/test_common/harness/imageHelpers.cpp
@@ -0,0 +1,3857 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "imageHelpers.h"
+#include <limits.h>
+#if defined( __APPLE__ )
+#include <sys/mman.h>
+#endif
+#if !defined (_WIN32) && !defined(__APPLE__)
+#include <malloc.h>
+#endif
+
+int gTestCount = 0;
+int gTestFailure = 0;
+RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
+
+static cl_ushort float2half_rte( float f );
+static cl_ushort float2half_rtz( float f );
+
+double
+sRGBmap(float fc)
+{
+    double c = (double)fc;
+
+#if !defined (_WIN32)
+    if (isnan(c))
+        c = 0.0;
+#else
+    if (_isnan(c))
+        c = 0.0;
+#endif
+
+    if (c > 1.0)
+        c = 1.0;
+    else if (c < 0.0)
+        c = 0.0;
+    else if (c < 0.0031308)
+        c = 12.92 * c;
+    else
+        c = (1055.0/1000.0) * pow(c, 5.0/12.0) - (55.0/1000.0);
+
+    return c * 255.0;
+}
+
+double
+sRGBunmap(float fc)
+{
+    double c = (double)fc;
+    double result;
+
+    if (c <= 0.04045)
+        result = c / 12.92;
+    else
+        result = pow((c + 0.055) / 1.055, 2.4);
+
+    return result;
+}
+
+
+size_t get_format_type_size( const cl_image_format *format )
+{
+    return get_channel_data_type_size( format->image_channel_data_type );
+}
+
+size_t get_channel_data_type_size( cl_channel_type channelType )
+{
+    switch( channelType )
+    {
+        case CL_SNORM_INT8:
+        case CL_UNORM_INT8:
+        case CL_SIGNED_INT8:
+        case CL_UNSIGNED_INT8:
+            return 1;
+
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT16:
+        case CL_SIGNED_INT16:
+        case CL_UNSIGNED_INT16:
+        case CL_HALF_FLOAT:
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+            return sizeof( cl_short );
+
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT32:
+            return sizeof( cl_int );
+
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_SHORT_565_REV:
+        case CL_UNORM_SHORT_555_REV:
+#endif
+            return 2;
+
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_INT_8888:
+        case CL_UNORM_INT_8888_REV:
+            return 4;
+#endif
+
+        case CL_UNORM_INT_101010:
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_INT_101010_REV:
+#endif
+            return 4;
+
+        case CL_FLOAT:
+            return sizeof( cl_float );
+
+        default:
+            return 0;
+    }
+}
+
+size_t get_format_channel_count( const cl_image_format *format )
+{
+    return get_channel_order_channel_count( format->image_channel_order );
+}
+
+size_t get_channel_order_channel_count( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+        case CL_DEPTH:
+        case CL_DEPTH_STENCIL:
+            return 1;
+
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+            return 2;
+
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_sRGB:
+        case CL_sRGBx:
+            return 3;
+
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+        case CL_sRGBA:
+        case CL_sBGRA:
+        case CL_ABGR:
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+#endif
+#ifdef CL_ABGR_APPLE
+        case CL_ABGR_APPLE:
+#endif
+          return 4;
+
+        default:
+          log_error("%s does not support 0x%x\n",__FUNCTION__,order);
+          return 0;
+    }
+}
+
+cl_channel_type  get_channel_type_from_name( const char *name )
+{
+    struct {
+        cl_channel_type type;
+        const char *name;
+    } typeNames[] = {
+        { CL_SNORM_INT8, "CL_SNORM_INT8" },
+        { CL_SNORM_INT16, "CL_SNORM_INT16" },
+        { CL_UNORM_INT8, "CL_UNORM_INT8" },
+        { CL_UNORM_INT16, "CL_UNORM_INT16" },
+        { CL_UNORM_INT24, "CL_UNORM_INT24" },
+        { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
+        { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
+        { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
+        { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
+        { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
+        { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
+        { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
+        { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
+        { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
+        { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
+        { CL_FLOAT, "CL_FLOAT" },
+#ifdef CL_SFIXED14_APPLE
+        { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
+#endif
+    };
+    for( size_t i = 0; i < sizeof( typeNames ) / sizeof( typeNames[ 0 ] ); i++ )
+    {
+        if( strcmp( typeNames[ i ].name, name ) == 0 || strcmp( typeNames[ i ].name + 3, name ) == 0 )
+            return typeNames[ i ].type;
+    }
+    return (cl_channel_type)-1;
+}
+
+cl_channel_order  get_channel_order_from_name( const char *name )
+{
+    const struct
+    {
+        cl_channel_order    order;
+        const char          *name;
+    }orderNames[] =
+    {
+        { CL_R, "CL_R" },
+        { CL_A, "CL_A" },
+        { CL_Rx, "CL_Rx" },
+        { CL_RG, "CL_RG" },
+        { CL_RA, "CL_RA" },
+        { CL_RGx, "CL_RGx" },
+        { CL_RGB, "CL_RGB" },
+        { CL_RGBx, "CL_RGBx" },
+        { CL_RGBA, "CL_RGBA" },
+        { CL_BGRA, "CL_BGRA" },
+        { CL_ARGB, "CL_ARGB" },
+        { CL_INTENSITY, "CL_INTENSITY"},
+        { CL_LUMINANCE, "CL_LUMINANCE"},
+        { CL_DEPTH, "CL_DEPTH" },
+        { CL_DEPTH_STENCIL, "CL_DEPTH_STENCIL" },
+        { CL_sRGB, "CL_sRGB" },
+        { CL_sRGBx, "CL_sRGBx" },
+        { CL_sRGBA, "CL_sRGBA" },
+        { CL_sBGRA, "CL_sBGRA" },
+        { CL_ABGR, "CL_ABGR" },
+#ifdef CL_1RGB_APPLE
+        { CL_1RGB_APPLE, "CL_1RGB_APPLE" },
+#endif
+#ifdef CL_BGR1_APPLE
+        { CL_BGR1_APPLE, "CL_BGR1_APPLE" },
+#endif
+    };
+
+    for( size_t i = 0; i < sizeof( orderNames ) / sizeof( orderNames[ 0 ] ); i++ )
+    {
+        if( strcmp( orderNames[ i ].name, name ) == 0 || strcmp( orderNames[ i ].name + 3, name ) == 0 )
+            return orderNames[ i ].order;
+    }
+    return (cl_channel_order)-1;
+}
+
+
+int is_format_signed( const cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SIGNED_INT8:
+        case CL_SNORM_INT16:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+size_t get_pixel_size( cl_image_format *format )
+{
+  switch( format->image_channel_data_type )
+  {
+    case CL_SNORM_INT8:
+    case CL_UNORM_INT8:
+    case CL_SIGNED_INT8:
+    case CL_UNSIGNED_INT8:
+      return get_format_channel_count( format );
+
+    case CL_SNORM_INT16:
+    case CL_UNORM_INT16:
+    case CL_SIGNED_INT16:
+    case CL_UNSIGNED_INT16:
+    case CL_HALF_FLOAT:
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+      return get_format_channel_count( format ) * sizeof( cl_ushort );
+
+    case CL_SIGNED_INT32:
+    case CL_UNSIGNED_INT32:
+      return get_format_channel_count( format ) * sizeof( cl_int );
+
+    case CL_UNORM_SHORT_565:
+    case CL_UNORM_SHORT_555:
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_SHORT_565_REV:
+    case CL_UNORM_SHORT_555_REV:
+#endif
+      return 2;
+
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_INT_8888:
+    case CL_UNORM_INT_8888_REV:
+      return 4;
+#endif
+
+    case CL_UNORM_INT_101010:
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_INT_101010_REV:
+#endif
+      return 4;
+
+    case CL_FLOAT:
+      return get_format_channel_count( format ) * sizeof( cl_float );
+
+    default:
+      return 0;
+  }
+}
+
+int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
+{
+    cl_image_format formatList[ 128 ];
+    unsigned int outFormatCount, i;
+    int error;
+
+
+    /* Make sure each image format is supported */
+    if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
+
+
+    /* Look for one that is an 8-bit format */
+    for( i = 0; i < outFormatCount; i++ )
+    {
+        if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
+       formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
+        {
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+        }
+    }
+
+    return -1;
+}
+
+int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
+{
+    cl_image_format formatList[ 128 ];
+    unsigned int outFormatCount, i;
+    int error;
+
+
+  /* Make sure each image format is supported */
+  if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
+
+  /* Look for one that is an 8-bit format */
+  for( i = 0; i < outFormatCount; i++ )
+  {
+        if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
+            formatList[ i ].image_channel_data_type == CL_FLOAT ||
+            formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
+            formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
+    {
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+    }
+    }
+
+    return -1;
+}
+
+int random_log_in_range( int minV, int maxV, MTdata d  )
+{
+    double v = log2( ( (double)genrand_int32(d) / (double)0xffffffff ) + 1 );
+    int iv = (int)( (float)( maxV - minV ) * v );
+    return iv + minV;
+}
+
+
+// Define the addressing functions
+typedef int (*AddressFn)( int value, size_t maxValue );
+
+int         NoAddressFn( int value, size_t maxValue )               { return value; }
+int         RepeatAddressFn( int value, size_t maxValue )
+{
+    if( value < 0 )
+        value += (int)maxValue;
+    else if( value >= (int)maxValue )
+        value -= (int)maxValue;
+    return value;
+}
+int         MirroredRepeatAddressFn( int value, size_t maxValue )
+{
+    if( value < 0 )
+        value  = 0;
+    else if( (size_t) value >= maxValue )
+        value = (int) (maxValue - 1);
+    return value;
+}
+int         ClampAddressFn( int value, size_t maxValue )            { return ( value < -1 ) ? -1 : ( ( value > (cl_long) maxValue ) ? (int)maxValue : value ); }
+int         ClampToEdgeNearestFn( int value, size_t maxValue )  { return ( value < 0 ) ? 0 : ( ( (size_t)value > maxValue - 1 ) ? (int)maxValue - 1 : value ); }
+AddressFn   ClampToEdgeLinearFn                                                 = ClampToEdgeNearestFn;
+
+// Note: normalized coords get repeated in normalized space, not unnormalized space! hence the special case here
+volatile float gFloatHome;
+float           RepeatNormalizedAddressFn( float fValue, size_t maxValue )
+{
+#ifndef _MSC_VER // Use original if not the VS compiler.
+    // General computation for repeat
+    return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
+#else // Otherwise, use this instead:
+    // Home the subtraction to a float to break up the sequence of x87
+    // instructions emitted by the VS compiler.
+    gFloatHome = fValue - floorf(fValue);
+    return gFloatHome * (float)maxValue;
+#endif
+}
+
+float           MirroredRepeatNormalizedAddressFn( float fValue, size_t maxValue )
+{
+    // Round to nearest multiple of two
+    float s_prime = 2.0f * rintf( fValue * 0.5f );        // Note halfway values flip flop here due to rte, but they both end up pointing the same place at the end of the day
+
+    // Reduce to [-1, 1], Apply mirroring -> [0, 1]
+    s_prime = fabsf( fValue - s_prime );
+
+    // un-normalize
+    return s_prime * (float) maxValue;
+}
+
+struct AddressingTable
+{
+    AddressingTable()
+    {
+        ct_assert( ( CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6 ) );
+        ct_assert( CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2 );
+
+        mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]            = NoAddressFn;
+        mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]             = NoAddressFn;
+        mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]          = RepeatAddressFn;
+        mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]           = RepeatAddressFn;
+        mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]   = ClampToEdgeNearestFn;
+        mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]    = ClampToEdgeLinearFn;
+        mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]           = ClampAddressFn;
+        mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]            = ClampAddressFn;
+        mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = MirroredRepeatAddressFn;
+        mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]  = MirroredRepeatAddressFn;
+    }
+
+    AddressFn operator[]( image_sampler_data *sampler )
+    {
+        return mTable[ (int)sampler->addressing_mode - CL_ADDRESS_NONE ][ (int)sampler->filter_mode - CL_FILTER_NEAREST ];
+    }
+
+    AddressFn mTable[ 6 ][ 2 ];
+};
+
+static AddressingTable  sAddressingTable;
+
+bool alpha_is_x(cl_image_format *format){
+    switch (format->image_channel_order) {
+        case CL_RGBx:
+        case CL_sRGBx:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool is_sRGBA_order(cl_channel_order image_channel_order){
+    switch (image_channel_order) {
+        case CL_sRGB:
+        case CL_sRGBx:
+        case CL_sRGBA:
+        case CL_sBGRA:
+            return true;
+        default:
+            return false;
+    }
+}
+
+// Format helpers
+
+int has_alpha(cl_image_format *format) {
+    switch (format->image_channel_order) {
+        case CL_R:
+            return 0;
+        case CL_A:
+        case CL_Rx:
+            return 1;
+        case CL_RG:
+            return 0;
+        case CL_RA:
+        case CL_RGx:
+            return 1;
+        case CL_RGB:
+        case CL_sRGB:
+            return 0;
+        case CL_RGBx:
+        case CL_sRGBx:
+            return 1;
+        case CL_RGBA:
+            return 1;
+        case CL_BGRA:
+            return 1;
+        case CL_ARGB:
+            return 1;
+        case CL_INTENSITY:
+            return 1;
+        case CL_LUMINANCE:
+            return 0;
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE: return 1;
+#endif
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE: return 1;
+#endif
+        case CL_sRGBA:
+        case CL_sBGRA:
+            return 1;
+        case CL_DEPTH:
+            return 0;
+        default:
+            log_error("Invalid image channel order: %d\n", format->image_channel_order);
+            return 0;
+    }
+
+}
+
+#define PRINT_MAX_SIZE_LOGIC 0
+
+#define SWAP( _a, _b )      do{ _a ^= _b; _b ^= _a; _a ^= _b; }while(0)
+#ifndef MAX
+    #define MAX( _a, _b )   ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize,       // CL_DEVICE_MAX_MEM_ALLOC_SIZE
+                   const cl_ulong maxTotalAllocSize,            // CL_DEVICE_GLOBAL_MEM_SIZE
+                   cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSizeBuffer) {
+
+    bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
+    bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
+
+    // Validate we have a reasonable max depth for 3D
+    if (is3D && maxDepth < 2) {
+        log_error("ERROR: Requesting max image sizes for 3D images when max depth is < 2.\n");
+        *numberOfSizes = 0;
+        return;
+    }
+    // Validate we have a reasonable max array size for 1D & 2D image arrays
+    if (isArray && maxArraySize < 2) {
+        log_error("ERROR: Requesting max image sizes for an image array when max array size is < 1.\n");
+        *numberOfSizes = 0;
+        return;
+    }
+
+    // Reduce the maximum because we are trying to test the max image dimensions, not the memory allocation
+    cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
+    cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
+    log_info("Note: max individual allocation adjusted down from %gMB to %gMB and max total allocation adjusted down from %gMB to %gMB.\n",
+             maxIndividualAllocSize/(1024.0*1024.0), adjustedMaxIndividualAllocSize/(1024.0*1024.0),
+             maxTotalAllocSize/(1024.0*1024.0), adjustedMaxTotalAllocSize/(1024.0*1024.0));
+
+    // Cap our max allocation to 1.0GB.
+    // FIXME -- why?  In the interest of not taking a long time?  We should still test this stuff...
+    if (adjustedMaxTotalAllocSize > (cl_ulong)1024*1024*1024) {
+      adjustedMaxTotalAllocSize = (cl_ulong)1024*1024*1024;
+      log_info("Limiting max total allocation size to %gMB (down from %gMB) for test.\n",
+        adjustedMaxTotalAllocSize/(1024.0*1024.0), maxTotalAllocSize/(1024.0*1024.0));
+    }
+
+    cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
+    if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize*2)
+        maxAllocSize = adjustedMaxTotalAllocSize/2;
+
+    size_t raw_pixel_size = get_pixel_size(format);
+    // If the test will be creating input (src) buffer of type int4 or float4, number of pixels will be
+    // governed by sizeof(int4 or float4) and not sizeof(dest fomat)
+    // Also if pixel size is 12 bytes i.e. RGB or RGBx, we adjust it to 16 bytes as GPUs has no concept
+    // of 3 channel images. GPUs expand these to four channel RGBA.
+    if(usingMaxPixelSizeBuffer || raw_pixel_size == 12)
+      raw_pixel_size = 16;
+    size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
+
+    log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, per-allocation limit %gMB.\n",
+             maxWidth, maxHeight, isArray ? maxArraySize : maxDepth, raw_pixel_size, (maxAllocSize/(1024.0*1024.0)));
+
+  // Keep track of the maximum sizes for each dimension
+  size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
+
+  switch (image_type) {
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      maximum_sizes[1] = maxArraySize;
+      maximum_sizes[2] = 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      maximum_sizes[2] = maxArraySize;
+      break;
+  }
+
+
+  // Given one fixed sized dimension, this code finds one or two other dimensions,
+  // both with very small size, such that the size does not exceed the maximum
+  // passed to this function
+
+#if defined(__x86_64) || defined (__arm64__) || defined (__ppc64__)
+  size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15};
+#else
+  size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13};
+#endif
+
+  static size_t other_size = 0;
+  enum { num_other_sizes = sizeof(other_sizes)/sizeof(size_t) };
+
+  (*numberOfSizes) = 0;
+
+  if (image_type == CL_MEM_OBJECT_IMAGE1D) {
+
+    double M = maximum_sizes[0];
+
+    // Store the size
+    sizes[(*numberOfSizes)][0] = (size_t)M;
+    sizes[(*numberOfSizes)][1] = 1;
+    sizes[(*numberOfSizes)][2] = 1;
+    ++(*numberOfSizes);
+  }
+
+  else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D) {
+
+    for (int fixed_dim=0;fixed_dim<2;++fixed_dim) {
+
+      // Determine the size of the fixed dimension
+      double M = maximum_sizes[fixed_dim];
+      double A = max_pixels;
+
+      int x0_dim = !fixed_dim;
+      double x0  = fmin(fmin(other_sizes[(other_size++)%num_other_sizes],A/M), maximum_sizes[x0_dim]);
+
+      // Store the size
+      sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
+      sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
+      sizes[(*numberOfSizes)][2]         = 1;
+      ++(*numberOfSizes);
+    }
+  }
+
+  else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE3D) {
+
+    // Iterate over dimensions, finding sizes for the non-fixed dimension
+    for (int fixed_dim=0;fixed_dim<3;++fixed_dim) {
+
+      // Determine the size of the fixed dimension
+      double M = maximum_sizes[fixed_dim];
+      double A = max_pixels;
+
+      // Find two other dimensions, x0 and x1
+      int x0_dim = (fixed_dim == 0) ? 1 : 0;
+      int x1_dim = (fixed_dim == 2) ? 1 : 2;
+
+      // Choose two other sizes for these dimensions
+      double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
+      // GPUs have certain restrictions on minimum width (row alignment) of images which has given us issues
+      // testing small widths in this test (say we set width to 3 for testing, and compute size based on this width and decide
+      // it fits within vram ... but GPU driver decides that, due to row alignment requirements, it has to use
+      // width of 16 which doesnt fit in vram). For this purpose we are not testing width < 16 for this test.
+      if(x0_dim == 0 && x0 < 16)
+        x0 = 16;
+      double x1 = fmin(fmin(A/M/x0,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
+      // Store the size
+      sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
+      sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
+      sizes[(*numberOfSizes)][x1_dim]    = (size_t)x1;
+      ++(*numberOfSizes);
+    }
+  }
+
+  // Log the results
+  for (int j=0; j<(int)(*numberOfSizes); j++) {
+    switch (image_type) {
+      case CL_MEM_OBJECT_IMAGE1D:
+        log_info(" size[%d] = [%ld] (%g MB image)\n",
+                 j, sizes[j][0], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      case CL_MEM_OBJECT_IMAGE2D:
+        log_info(" size[%d] = [%ld %ld] (%g MB image)\n",
+                 j, sizes[j][0], sizes[j][1], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      case CL_MEM_OBJECT_IMAGE3D:
+        log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n",
+                 j, sizes[j][0], sizes[j][1], sizes[j][2], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
+        break;
+    }
+  }
+}
+
+int issubnormal(float a)
+{
+    union { cl_int i; cl_float f; } u;
+    u.f = a;
+    return (u.i & 0x7f800000U) == 0;
+}
+
+float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
+    if (sampler->filter_mode == CL_FILTER_NEAREST)
+        return 0.0f;
+
+    switch (format->image_channel_data_type) {
+        case CL_SNORM_INT8:
+            return 1.0f/127.0f;
+        case CL_UNORM_INT8:
+            return 1.0f/255.0f;
+        case CL_UNORM_INT16:
+            return 1.0f/65535.0f;
+        case CL_SNORM_INT16:
+            return 1.0f/32767.0f;
+        case CL_FLOAT:
+            return CL_FLT_MIN;
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 0x1.0p-14f;
+#endif
+        default:
+            return 0.0f;
+    }
+}
+
+float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter )
+{
+    float maxError = 0.0f;
+    float sampleCount = 1.0f;
+    if( isLinearFilter )
+        sampleCount =  is3D ? 8.0f : 4.0f;
+
+    // Note that the ULP is defined here as the unit in the last place of the maximum
+    // magnitude sample used for filtering.
+
+    // Section 8.3
+    switch( format->image_channel_data_type )
+    {
+            // The spec allows 2 ulps of error for normalized formats
+        case CL_SNORM_INT8:
+        case CL_UNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT16:
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+            maxError = 2*FLT_EPSILON*sampleCount;       // Maximum sampling error for round to zero normalization based on multiplication
+            // by reciprocal (using reciprocal generated in round to +inf mode, so that 1.0 matches spec)
+            break;
+
+            // If the implementation supports these formats then it will have to allow rounding error here too,
+            // because not all 32-bit ints are exactly representable in float
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT32:
+            maxError = 1*FLT_EPSILON;
+            break;
+    }
+
+
+    // Section 8.2
+    if( sampler->addressing_mode == CL_ADDRESS_REPEAT || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT || sampler->filter_mode != CL_FILTER_NEAREST || sampler->normalized_coords )
+#if defined( __APPLE__ )
+    {
+        if( sampler->filter_mode != CL_FILTER_NEAREST )
+        {
+            extern cl_device_type   gDeviceType;
+            // The maximum
+            if( gDeviceType == CL_DEVICE_TYPE_GPU )
+                maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);              // Some GPUs ain't so accurate
+            else
+                // The standard method of 2d linear filtering delivers 4.0 ulps of error in round to nearest (8 in rtz).
+                maxError += 4.0f * FLT_EPSILON;
+        }
+        else
+            maxError += 4.0f * FLT_EPSILON;    // normalized coordinates will introduce some error into the fractional part of the address, affecting results
+    }
+#else
+    {
+#if !defined(_WIN32)
+#warning Implementations will likely wish to pick a max allowable sampling error policy here that is better than the spec
+#endif
+        // The spec allows linear filters to return any result most of the time.
+        // That's fine for implementations but a problem for testing. After all
+        // users aren't going to like garbage images.  We have "picked a number"
+        // here that we are going to attempt to conform to. Implementations are
+        // free to pick another number, like infinity, if they like.
+        // We picked a number for you, to provide /some/ sanity
+        maxError = MAKE_HEX_FLOAT(0x1.0p-7f, 0x1L, -7);
+        // ...but this is what the spec allows:
+        // maxError = INFINITY;
+        // Please feel free to pick any positive number. (NaN wont work.)
+    }
+#endif
+
+    // The error calculation itself can introduce error
+    maxError += FLT_EPSILON * 2;
+
+    return maxError;
+}
+
+size_t get_format_max_int( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SIGNED_INT8:
+            return 127;
+        case CL_UNORM_INT8:
+        case CL_UNSIGNED_INT8:
+            return 255;
+
+        case CL_SNORM_INT16:
+        case CL_SIGNED_INT16:
+            return 32767;
+
+        case CL_UNORM_INT16:
+        case CL_UNSIGNED_INT16:
+            return 65535;
+
+        case CL_SIGNED_INT32:
+            return 2147483647L;
+
+        case CL_UNSIGNED_INT32:
+            return 4294967295LL;
+
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+            return 31;
+
+        case CL_UNORM_INT_101010:
+            return 1023;
+
+        case CL_HALF_FLOAT:
+            return 1<<10;
+
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 16384;
+#endif
+        default:
+            return 0;
+    }
+}
+
+int get_format_min_int( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SIGNED_INT8:
+            return -128;
+        case CL_UNORM_INT8:
+        case CL_UNSIGNED_INT8:
+            return 0;
+
+        case CL_SNORM_INT16:
+        case CL_SIGNED_INT16:
+            return -32768;
+
+        case CL_UNORM_INT16:
+        case CL_UNSIGNED_INT16:
+            return 0;
+
+        case CL_SIGNED_INT32:
+            return -2147483648LL;
+
+        case CL_UNSIGNED_INT32:
+            return 0;
+
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+            return 0;
+
+        case CL_HALF_FLOAT:
+            return -1<<10;
+
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return -16384;
+#endif
+
+        default:
+            return 0;
+    }
+}
+
+float convert_half_to_float( unsigned short halfValue )
+{
+    // We have to take care of a few special cases, but in general, we just extract
+    // the same components from the half that exist in the float and re-stuff them
+    // For a description of the actual half format, see http://en.wikipedia.org/wiki/Half_precision
+    // Note: we store these in 32-bit ints to make the bit manipulations easier later
+    int sign =     ( halfValue >> 15 ) & 0x0001;
+    int exponent = ( halfValue >> 10 ) & 0x001f;
+    int mantissa = ( halfValue )       & 0x03ff;
+
+    // Note: we use a union here to be able to access the bits of a float directly
+    union
+    {
+        unsigned int bits;
+        float floatValue;
+    } outFloat;
+
+    // Special cases first
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+        {
+            // If both exponent and mantissa are 0, the number is +/- 0
+            outFloat.bits  = sign << 31;
+            return outFloat.floatValue; // Already done!
+        }
+
+        // If exponent is 0, it's a denormalized number, so we renormalize it
+        // Note: this is not terribly efficient, but oh well
+        while( ( mantissa & 0x00000400 ) == 0 )
+        {
+            mantissa <<= 1;
+            exponent--;
+        }
+
+        // The first bit is implicit, so we take it off and inc the exponent accordingly
+        exponent++;
+        mantissa &= ~(0x00000400);
+    }
+    else if( exponent == 31 ) // Special-case "numbers"
+    {
+        // If the exponent is 31, it's a special case number (+/- infinity or NAN).
+        // If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
+        // method is the same
+        outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
+        return outFloat.floatValue;
+    }
+
+    // Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
+    exponent += ( 127 - 15 );
+    mantissa <<= 13;
+
+    outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
+    return outFloat.floatValue;
+}
+
+
+
+cl_ushort convert_float_to_half( float f )
+{
+    switch( gFloatToHalfRoundingMode )
+    {
+        case kRoundToNearestEven:
+            return float2half_rte( f );
+        case kRoundTowardZero:
+            return float2half_rtz( f );
+        default:
+            log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
+            exit(-1);
+            return 0xffff;
+    }
+
+}
+
+cl_ushort float2half_rte( float f )
+    {
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+                }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+        }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
+    }
+
+cl_ushort float2half_rtz( float f )
+    {
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+        {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+        }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+        {
+        if( x == INFINITY )
+            return 0x7c00 | sign;
+
+        return 0x7bff | sign;
+        }
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+
+    u.u &= 0xFFFFE000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
+}
+
+class TEST
+{
+public:
+    TEST();
+};
+
+static TEST t;
+void  __vstore_half_rte(float f, size_t index, uint16_t *p)
+{
+    union{ unsigned int u; float f;} u;
+
+    u.f = f;
+    unsigned short r = (u.u >> 16) & 0x8000;
+    u.u &= 0x7fffffff;
+    if( u.u >= 0x33000000U )
+    {
+        if( u.u >= 0x47800000 )
+        {
+            if( u.u <= 0x7f800000 )
+                r |= 0x7c00;
+            else
+            {
+                r |= 0x7e00 | ( (u.u >> 13) & 0x3ff );
+            }
+        }
+        else
+        {
+            float x = u.f;
+            if( u.u < 0x38800000 )
+                u.u = 0x3f000000;
+            else
+                u.u += 0x06800000;
+            u.u &= 0x7f800000U;
+            x += u.f;
+            x -= u.f;
+            u.f = x * MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+            u.u >>= 13;
+            r |= (unsigned short) u.u;
+        }
+    }
+
+    ((unsigned short*)p)[index] = r;
+}
+
+TEST::TEST()
+{
+    return;
+    union
+    {
+        float f;
+        uint32_t i;
+    } test;
+    uint16_t control, myval;
+
+    log_info(" &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
+    test.i = 0;
+    do
+    {
+        if( ( test.i & 0xffffff ) == 0 )
+        {
+            if( ( test.i & 0xfffffff ) == 0 )
+                log_info( "*" );
+            else
+                log_info( "." );
+            fflush(stdout);
+        }
+        __vstore_half_rte( test.f, 0, &control );
+        myval = convert_float_to_half( test.f );
+        if( myval != control )
+        {
+            log_info( "\n******** ERROR: MyVal %04x control %04x source %12.24f\n", myval, control, test.f );
+            log_info( "         source bits: %08x   %a\n", test.i, test.f );
+            float t, c;
+            c = convert_half_to_float( control );
+            t = convert_half_to_float( myval );
+            log_info( "         converted control: %12.24f myval: %12.24f\n", c, t );
+        }
+        test.i++;
+    } while( test.i != 0 );
+    log_info("\n &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
+
+}
+
+cl_ulong get_image_size( image_descriptor const *imageInfo )
+{
+    cl_ulong imageSize;
+
+    // Assumes rowPitch and slicePitch are always correctly defined
+    if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
+    {
+      imageSize = (size_t) compute_mipmapped_image_size(*imageInfo);
+    }
+    else
+    {
+      switch (imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE1D:
+        imageSize = imageInfo->rowPitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE2D:
+        imageSize = imageInfo->height * imageInfo->rowPitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE3D:
+        imageSize = imageInfo->depth * imageInfo->slicePitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+        imageSize = imageInfo->arraySize * imageInfo->slicePitch;
+        break;
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+        imageSize = imageInfo->arraySize * imageInfo->slicePitch;
+        break;
+      default:
+        log_error("ERROR: Cannot identify image type %x\n", imageInfo->type);
+        abort();
+      }
+    }
+    return imageSize;
+}
+
+// Calculate image size in megabytes (strictly, mebibytes). Result is rounded up.
+cl_ulong get_image_size_mb( image_descriptor const *imageInfo )
+{
+    cl_ulong imageSize = get_image_size( imageInfo );
+    cl_ulong mb = imageSize / ( 1024 * 1024 );
+    if ( imageSize % ( 1024 * 1024 ) > 0 )
+    {
+        mb += 1;
+    }
+    return  mb;
+}
+
+
+extern bool gTestRounding;
+uint64_t gRoundingStartValue = 0;
+
+
+void escape_inf_nan_values( char* data, size_t allocSize ) {
+    // filter values with 8 not-quite-highest bits
+    unsigned int *intPtr = (unsigned int *)data;
+    for( size_t i = 0; i < allocSize >> 2; i++ )
+    {
+        if( ( intPtr[ i ] & 0x7F800000 ) == 0x7F800000 )
+            intPtr[ i ] ^= 0x40000000;
+    }
+
+    // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits = 0x7C00 are special)
+    unsigned short *shortPtr = (unsigned short *)data;
+    for( size_t i = 0; i < allocSize >> 1; i++ )
+    {
+        if( ( shortPtr[ i ] & 0x7C00 ) == 0x7C00 )
+            shortPtr[ i ] ^= 0x4000;
+    }
+}
+
+char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d )
+{
+    size_t allocSize = get_image_size( imageInfo );
+    size_t pixelRowBytes = imageInfo->width * get_pixel_size( imageInfo->format );
+    size_t i;
+
+    if (imageInfo->num_mip_levels > 1)
+      allocSize = compute_mipmapped_image_size(*imageInfo);
+
+#if defined (__APPLE__ )
+    char *data = NULL;
+    if (gDeviceType == CL_DEVICE_TYPE_CPU) {
+        size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
+
+        void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+        intptr_t data_end = (intptr_t)map + mapSize - 4096;
+        data = (char *)(data_end - (intptr_t)allocSize);
+
+        mprotect(map, 4096, PROT_NONE);
+        mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
+        P.reset(data, map, mapSize,allocSize);
+    } else {
+        data = (char *)malloc(allocSize);
+        P.reset(data,NULL,0,allocSize);
+    }
+#else
+    P.reset( NULL ); // Free already allocated memory first, then try to allocate new block.
+#if defined (_WIN32) && defined(_MSC_VER)
+    char *data = (char *)_aligned_malloc(allocSize, get_pixel_size(imageInfo->format));
+#elif defined(__MINGW32__)
+    char *data = (char *)__mingw_aligned_malloc(allocSize, get_pixel_size(imageInfo->format));
+#else
+    char *data = (char *)memalign(get_pixel_size(imageInfo->format), allocSize);
+#endif
+    P.reset(data,NULL,0,allocSize, true);
+#endif
+
+    if (data == NULL) {
+      log_error( "ERROR: Unable to malloc %lu bytes for generate_random_image_data\n", allocSize );
+      return 0;
+    }
+
+    if( gTestRounding )
+    {
+        // Special case: fill with a ramp from 0 to the size of the type
+        size_t typeSize = get_format_type_size( imageInfo->format );
+        switch( typeSize )
+        {
+            case 1:
+            {
+                char *ptr = data;
+                for( i = 0; i < allocSize; i++ )
+                    ptr[i] = (cl_char) (i + gRoundingStartValue);
+            }
+                break;
+            case 2:
+            {
+                cl_short *ptr = (cl_short*) data;
+                for( i = 0; i < allocSize / 2; i++ )
+                    ptr[i] = (cl_short) (i +  gRoundingStartValue);
+            }
+                break;
+            case 4:
+            {
+                cl_int *ptr = (cl_int*) data;
+                for( i = 0; i < allocSize / 4; i++ )
+                    ptr[i] = (cl_int) (i +  gRoundingStartValue);
+            }
+                break;
+        }
+
+        // Note: inf or nan float values would cause problems, although we don't know this will
+        // actually be a float, so we just know what to look for
+        escape_inf_nan_values( data, allocSize );
+        return data;
+    }
+
+    // Otherwise, we should be able to just fill with random bits no matter what
+    cl_uint *p = (cl_uint*) data;
+    for( i = 0; i + 4 <= allocSize; i += 4 )
+        p[ i / 4 ] = genrand_int32(d);
+
+    for( ; i < allocSize; i++ )
+        data[i] = genrand_int32(d);
+
+    // Note: inf or nan float values would cause problems, although we don't know this will
+    // actually be a float, so we just know what to look for
+    escape_inf_nan_values( data, allocSize );
+
+    if ( /*!gTestMipmaps*/ imageInfo->num_mip_levels < 2 )
+    {
+      // Fill unused edges with -1, NaN for float
+      if (imageInfo->rowPitch > pixelRowBytes)
+      {
+          size_t height = 0;
+
+          switch (imageInfo->type)
+          {
+              case CL_MEM_OBJECT_IMAGE2D:
+              case CL_MEM_OBJECT_IMAGE3D:
+              case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                  height = imageInfo->height;
+                  break;
+              case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                  height = imageInfo->arraySize;
+                  break;
+            }
+
+            // Fill in the row padding regions
+            for( i = 0; i < height; i++ )
+            {
+                size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
+                size_t length = imageInfo->rowPitch - pixelRowBytes;
+                memset( data + offset, 0xff, length );
+            }
+      }
+
+      // Fill in the slice padding regions, if necessary:
+
+      size_t slice_dimension = imageInfo->height;
+      if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+          slice_dimension = imageInfo->arraySize;
+      }
+
+      if (imageInfo->slicePitch > slice_dimension*imageInfo->rowPitch)
+      {
+          size_t depth = 0;
+          switch (imageInfo->type)
+          {
+            case CL_MEM_OBJECT_IMAGE2D:
+            case CL_MEM_OBJECT_IMAGE3D:
+                depth = imageInfo->depth;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                depth = imageInfo->arraySize;
+                break;
+          }
+
+          for( i = 0; i < depth; i++ )
+          {
+              size_t offset = i * imageInfo->slicePitch + slice_dimension*imageInfo->rowPitch;
+              size_t length = imageInfo->slicePitch - slice_dimension*imageInfo->rowPitch;
+              memset( data + offset, 0xff, length );
+          }
+      }
+    }
+
+    return data;
+}
+
+#define CLAMP_FLOAT( v ) ( fmaxf( fminf( v, 1.f ), -1.f ) )
+
+
+void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData, int lod )
+{
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
+    size_t slice_pitch_lod = 0, row_pitch_lod = 0;
+
+    if ( imageInfo->num_mip_levels > 1 )
+    {
+      switch(imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE3D :
+        depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
+      case CL_MEM_OBJECT_IMAGE2D :
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+        height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
+      default :
+        width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
+      }
+      row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
+      if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
+        slice_pitch_lod = row_pitch_lod;
+      else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+        slice_pitch_lod = row_pitch_lod * height_lod;
+    }
+    else
+    {
+      row_pitch_lod = imageInfo->rowPitch;
+      slice_pitch_lod = imageInfo->slicePitch;
+    }
+    if ( x < 0 || y < 0 || z < 0 || x >= (int)width_lod
+               || ( height_lod != 0 && y >= (int)height_lod )
+               || ( depth_lod != 0 && z >= (int)depth_lod )
+               || ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) )
+    {
+        // Border color
+        if (imageInfo->format->image_channel_order == CL_DEPTH)
+        {
+            outData[ 0 ] = 0;
+        }
+        else {
+            outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+            if (!has_alpha(imageInfo->format))
+                outData[3] = alpha_is_x(imageInfo->format) ? 0 : 1;
+        }
+        return;
+    }
+
+    cl_image_format *format = imageInfo->format;
+
+    unsigned int i;
+    float tempData[ 4 ];
+
+    // Advance to the right spot
+    char *ptr = (char *)imageData;
+    size_t pixelSize = get_pixel_size( format );
+
+    ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
+
+    // OpenCL only supports reading floats from certain formats
+    size_t channelCount = get_format_channel_count( format );
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        {
+            char *dPtr = (char *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 127.0f );
+            break;
+        }
+
+        case CL_UNORM_INT8:
+        {
+            unsigned char *dPtr = (unsigned char *)ptr;
+            for( i = 0; i < channelCount; i++ ) {
+                if((is_sRGBA_order(imageInfo->format->image_channel_order)) && i<3) // only RGB need to be converted for sRGBA
+                    tempData[ i ] = (float)sRGBunmap((float)dPtr[ i ] / 255.0f) ;
+                else
+                    tempData[ i ] = (float)dPtr[ i ] / 255.0f;
+            }
+            break;
+        }
+
+        case CL_SIGNED_INT8:
+        {
+            cl_char *dPtr = (cl_char *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] =  (float)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float) dPtr[ i ];
+            break;
+        }
+
+        case CL_SNORM_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 32767.0f );
+            break;
+        }
+
+        case CL_UNORM_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ] / 65535.0f;
+            break;
+        }
+
+        case CL_SIGNED_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float) dPtr[ i ];
+            break;
+        }
+
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = convert_half_to_float( dPtr[ i ] );
+            break;
+        }
+
+        case CL_SIGNED_INT32:
+        {
+            cl_int *dPtr = (cl_int *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT32:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_SHORT_565:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            tempData[ 0 ] = (float)( dPtr[ 0 ] >> 11 ) / (float)31;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 63 ) / (float)63;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
+            break;
+        }
+
+        case CL_UNORM_SHORT_555:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 31 ) / (float)31;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 31 ) / (float)31;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
+            break;
+        }
+
+        case CL_UNORM_INT_101010:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 20 ) & 0x3ff ) / (float)1023;
+            tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 0x3ff ) / (float)1023;
+            tempData[ 2 ] = (float)( dPtr[ 0 ] & 0x3ff ) / (float)1023;
+            break;
+        }
+
+        case CL_FLOAT:
+        {
+            float *dPtr = (float *)ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[ i ] = (float)dPtr[ i ];
+            break;
+        }
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            cl_ushort *dPtr = (cl_ushort*) ptr;
+            for( i = 0; i < channelCount; i++ )
+                tempData[i] = ((int) dPtr[i] - 16384) * 0x1.0p-14f;
+            break;
+        }
+#endif
+    }
+
+
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
+
+    switch( format->image_channel_order )
+    {
+        case CL_A:
+            outData[ 3 ] = tempData[ 0 ];
+            break;
+        case CL_R:
+        case CL_Rx:
+            outData[ 0 ] = tempData[ 0 ];
+            break;
+        case CL_RA:
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 1 ];
+            break;
+        case CL_RG:
+        case CL_RGx:
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            break;
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_sRGB:
+        case CL_sRGBx:
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
+            break;
+        case CL_RGBA:
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
+            outData[ 3 ] = tempData[ 3 ];
+            break;
+        case CL_ARGB:
+            outData[ 0 ] = tempData[ 1 ];
+            outData[ 1 ] = tempData[ 2 ];
+            outData[ 2 ] = tempData[ 3 ];
+            outData[ 3 ] = tempData[ 0 ];
+            break;
+        case CL_BGRA:
+        case CL_sBGRA:
+            outData[ 0 ] = tempData[ 2 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 3 ];
+            break;
+        case CL_INTENSITY:
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 0 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = tempData[ 0 ];
+            break;
+        case CL_LUMINANCE:
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 0 ];
+            outData[ 2 ] = tempData[ 0 ];
+            break;
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+            outData[ 0 ] = tempData[ 1 ];
+            outData[ 1 ] = tempData[ 2 ];
+            outData[ 2 ] = tempData[ 3 ];
+            outData[ 3 ] = 1.0f;
+            break;
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+            outData[ 0 ] = tempData[ 2 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 0 ];
+            outData[ 3 ] = 1.0f;
+            break;
+#endif
+        case CL_sRGBA:
+            outData[ 0 ] = tempData[ 0 ];
+            outData[ 1 ] = tempData[ 1 ];
+            outData[ 2 ] = tempData[ 2 ];
+            outData[ 3 ] = tempData[ 3 ];
+            break;
+        case CL_DEPTH:
+            outData[ 0 ] = tempData[ 0 ];
+            break;
+        default:
+            log_error("Invalid format:");
+            print_header(format, true);
+            break;
+    }
+}
+
+void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData )
+{
+  read_image_pixel_float( imageData, imageInfo, x, y, z, outData, 0 );
+}
+
+bool get_integer_coords( float x, float y, float z, size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ ) {
+    return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height, depth, imageSampler, imageInfo, outX, outY, outZ);
+}
+
+bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                               size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ )
+{
+    AddressFn adFn = sAddressingTable[ imageSampler ];
+
+    float refX = floorf( x ), refY = floorf( y ), refZ = floorf( z );
+
+    // Handle sampler-directed coordinate normalization + clamping.  Note that
+    // the array coordinate for image array types is expected to be
+    // unnormalized, and is clamped to 0..arraySize-1.
+    if( imageSampler->normalized_coords )
+    {
+        switch (imageSampler->addressing_mode)
+        {
+            case CL_ADDRESS_REPEAT:
+                x = RepeatNormalizedAddressFn( x, width );
+                if (height != 0) {
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                        y = RepeatNormalizedAddressFn( y, height );
+                }
+                if (depth != 0) {
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                        z = RepeatNormalizedAddressFn( z, depth );
+                }
+
+                if (xAddressOffset != 0.0) {
+                    // Add in the offset
+                    x += xAddressOffset;
+                    // Handle wrapping
+                    if (x > width)
+                        x -= (float)width;
+                    if (x < 0)
+                        x += (float)width;
+                }
+                if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
+                    // Add in the offset
+                    y += yAddressOffset;
+                    // Handle wrapping
+                    if (y > height)
+                        y -= (float)height;
+                    if (y < 0)
+                        y += (float)height;
+                }
+                if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
+                    // Add in the offset
+                    z += zAddressOffset;
+                    // Handle wrapping
+                    if (z > depth)
+                        z -= (float)depth;
+                    if (z < 0)
+                        z += (float)depth;
+                }
+                break;
+
+            case CL_ADDRESS_MIRRORED_REPEAT:
+                x = MirroredRepeatNormalizedAddressFn( x, width );
+                if (height != 0) {
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                        y = MirroredRepeatNormalizedAddressFn( y, height );
+                }
+                if (depth != 0) {
+                    if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                        z = MirroredRepeatNormalizedAddressFn( z, depth );
+                }
+
+                if (xAddressOffset != 0.0)
+                {
+                    float temp = x + xAddressOffset;
+                    if( temp > (float) width )
+                        temp = (float) width - (temp - (float) width );
+                    x = fabsf( temp );
+                }
+                if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
+                    float temp = y + yAddressOffset;
+                    if( temp > (float) height )
+                        temp = (float) height - (temp - (float) height );
+                    y = fabsf( temp );
+                }
+                if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
+                    float temp = z + zAddressOffset;
+                    if( temp > (float) depth )
+                        temp = (float) depth - (temp - (float) depth );
+                    z = fabsf( temp );
+                }
+                break;
+
+            default:
+                // Also, remultiply to the original coords. This simulates any truncation in
+                // the pass to OpenCL
+                x *= (float)width;
+                x += xAddressOffset;
+
+                if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                {
+                    y *= (float)height;
+                    y += yAddressOffset;
+                }
+
+                if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    z *= (float)depth;
+                    z += zAddressOffset;
+                }
+                break;
+        }
+    }
+
+    // At this point, we're dealing with non-normalized coordinates.
+
+    outX = adFn( floorf( x ), width );
+
+    // 1D and 2D arrays require special care for the index coordinate:
+
+    switch (imageInfo->type) {
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
+            outZ = 0.0f; /* don't care! */
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            outY = adFn( floorf( y ), height );
+            outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
+            break;
+        default:
+            // legacy path:
+            if (height != 0)
+                outY = adFn( floorf( y ), height );
+            if( depth != 0 )
+                outZ = adFn( floorf( z ), depth );
+    }
+
+    return !( (int)refX == outX && (int)refY == outY && (int)refZ == outZ );
+}
+
+static float frac(float a) {
+    return a - floorf(a);
+}
+
+static inline void pixelMax( const float a[4], const float b[4], float *results );
+static inline void pixelMax( const float a[4], const float b[4], float *results )
+{
+    for( int i = 0; i < 4; i++ )
+        results[i] = errMax( fabsf(a[i]), fabsf(b[i]) );
+}
+
+// If containsDenorms is NULL, flush denorms to zero
+// if containsDenorms is not NULL, record whether there are any denorms
+static inline void  check_for_denorms(float a[4], int *containsDenorms );
+static inline void  check_for_denorms(float a[4], int *containsDenorms )
+{
+    if( NULL == containsDenorms )
+    {
+        for( int i = 0; i < 4; i++ )
+        {
+            if( fabsf(a[i]) < FLT_MIN )
+                a[i] = copysignf( 0.0f, a[i] );
+        }
+    }
+    else
+    {
+        for( int i = 0; i < 4; i++ )
+        {
+            if( fabs(a[i]) < FLT_MIN )
+            {
+                *containsDenorms = 1;
+                break;
+            }
+        }
+    }
+}
+
+inline float calculate_array_index( float coord, float extent ) {
+    // from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
+    //
+    // given coordinate 'w' that represents an index:
+    // layer_index = clamp( rint(w), 0, image_array_size - 1)
+
+    float ret = rintf( coord );
+    ret = ret > extent ? extent : ret;
+    ret = ret < 0.0f ? 0.0f : ret;
+
+    return ret;
+}
+
+/*
+ * Utility function to unnormalized a coordinate given a particular sampler.
+ *
+ * name     - the name of the coordinate, used for verbose debugging only
+ * coord    - the coordinate requiring unnormalization
+ * offset   - an addressing offset to be added to the coordinate
+ * extent   - the max value for this coordinate (e.g. width for x)
+ */
+static float unnormalize_coordinate( const char* name, float coord,
+    float offset, float extent, cl_addressing_mode addressing_mode, int verbose )
+{
+    float ret = 0.0f;
+
+    switch (addressing_mode) {
+        case CL_ADDRESS_REPEAT:
+            ret = RepeatNormalizedAddressFn( coord, extent );
+
+            if ( verbose ) {
+                log_info( "\tRepeat filter denormalizes %s (%f) to %f\n",
+                    name, coord, ret );
+            }
+
+            if (offset != 0.0) {
+                // Add in the offset, and handle wrapping.
+                ret += offset;
+                if (ret > extent) ret -= extent;
+                if (ret < 0.0) ret += extent;
+            }
+
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
+            }
+            break;
+
+        case CL_ADDRESS_MIRRORED_REPEAT:
+            ret = MirroredRepeatNormalizedAddressFn( coord, extent );
+
+            if ( verbose ) {
+                log_info( "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
+                    name, coord, ret );
+            }
+
+            if (offset != 0.0) {
+                float temp = ret + offset;
+                if( temp > extent )
+                    temp = extent - (temp - extent );
+                ret = fabsf( temp );
+            }
+
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
+            }
+            break;
+
+        default:
+
+            ret = coord * extent;
+
+            if ( verbose ) {
+                log_info( "\tFilter denormalizes %s to %f (%f * %f)\n",
+                    name, ret, coord, extent);
+            }
+
+            ret += offset;
+
+            if (verbose && offset != 0.0f) {
+                log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
+            }
+    }
+
+    return ret;
+}
+
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z,
+                                    image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms ) {
+    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms);
+}
+
+// returns max pixel value of the pixels touched
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z,
+                                    image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod) {
+    return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms, lod);
+}
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod)
+{
+    AddressFn adFn = sAddressingTable[ imageSampler ];
+    FloatPixel returnVal;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
+    size_t slice_pitch_lod = 0, row_pitch_lod = 0;
+
+    if ( imageInfo->num_mip_levels > 1 )
+    {
+      switch(imageInfo->type)
+      {
+      case CL_MEM_OBJECT_IMAGE3D :
+        depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
+      case CL_MEM_OBJECT_IMAGE2D :
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+        height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
+      default :
+        width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
+      }
+      row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
+      if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
+        slice_pitch_lod = row_pitch_lod;
+      else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+        slice_pitch_lod = row_pitch_lod * height_lod;
+    }
+    else
+    {
+      slice_pitch_lod = imageInfo->slicePitch;
+      row_pitch_lod = imageInfo->rowPitch;
+    }
+
+    if( containsDenorms )
+        *containsDenorms = 0;
+
+    if( imageSampler->normalized_coords ) {
+
+        // We need to unnormalize our coordinates differently depending on
+        // the image type, but 'x' is always processed the same way.
+
+        x = unnormalize_coordinate("x", x, xAddressOffset, (float)width_lod,
+            imageSampler->addressing_mode, verbose);
+
+        switch (imageInfo->type) {
+
+            // The image array types require special care:
+
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                z = 0; // don't care -- unused for 1D arrays
+                break;
+
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
+                    imageSampler->addressing_mode, verbose);
+                break;
+
+            // Everybody else:
+
+            default:
+                y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
+                    imageSampler->addressing_mode, verbose);
+                z = unnormalize_coordinate("z", z, zAddressOffset, (float)depth_lod,
+                    imageSampler->addressing_mode, verbose);
+        }
+
+    } else if ( verbose ) {
+
+        switch (imageInfo->type) {
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                log_info("Starting coordinate: %f, array index %f\n", x, y);
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                log_info("Starting coordinate: %f, %f, array index %f\n", x, y, z);
+                break;
+            case CL_MEM_OBJECT_IMAGE1D:
+            case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+                log_info("Starting coordinate: %f\b", x);
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                log_info("Starting coordinate: %f, %f\n", x, y);
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+            default:
+                log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
+        }
+    }
+
+    // At this point, we have unnormalized coordinates.
+
+    if( imageSampler->filter_mode == CL_FILTER_NEAREST )
+    {
+        int ix, iy, iz;
+
+        // We apply the addressing function to the now-unnormalized
+        // coordinates.  Note that the array cases again require special
+        // care, per section 8.4 in the OpenCL 1.2 Specification.
+
+        ix = adFn( floorf( x ), width_lod );
+
+        switch (imageInfo->type) {
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                iy = calculate_array_index( y, (float)(imageInfo->arraySize - 1) );
+                iz = 0;
+                if( verbose ) {
+                  log_info("\tArray index %f evaluates to %d\n",y, iy );
+                }
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                iy = adFn( floorf( y ), height_lod );
+                iz = calculate_array_index( z, (float)(imageInfo->arraySize - 1) );
+                if( verbose ) {
+                    log_info("\tArray index %f evaluates to %d\n",z, iz );
+                }
+                break;
+            default:
+                iy = adFn( floorf( y ), height_lod );
+                if( depth_lod != 0 )
+                    iz = adFn( floorf( z ), depth_lod );
+                else
+                    iz = 0;
+        }
+
+        if( verbose ) {
+            if( iz )
+                log_info( "\tReference integer coords calculated: { %d, %d, %d }\n", ix, iy, iz );
+            else
+                log_info( "\tReference integer coords calculated: { %d, %d }\n", ix, iy );
+        }
+
+        read_image_pixel_float( imageData, imageInfo, ix, iy, iz, outData, lod );
+        check_for_denorms( outData, containsDenorms );
+        for( int i = 0; i < 4; i++ )
+            returnVal.p[i] = fabsf( outData[i] );
+        return returnVal;
+    }
+    else
+    {
+        // Linear filtering cases.
+
+        size_t width = width_lod, height = height_lod, depth = depth_lod;
+
+        // Image arrays can use 2D filtering, but require us to walk into the
+        // image a certain number of slices before reading.
+
+        if( depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
+                          imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+        {
+            float array_index = 0;
+
+            size_t layer_offset = 0;
+
+            if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
+                array_index = calculate_array_index(z, (float)(imageInfo->arraySize - 1));
+                layer_offset = slice_pitch_lod * (size_t)array_index;
+            }
+            else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
+                array_index = calculate_array_index(y, (float)(imageInfo->arraySize - 1));
+                layer_offset = slice_pitch_lod * (size_t)array_index;
+
+                // Set up y and height so that the filtering below is correct
+                // 1D filtering on a single slice.
+                height = 1;
+            }
+
+            int x1 = adFn( floorf( x - 0.5f ), width );
+            int y1 = 0;
+            int x2 = adFn( floorf( x - 0.5f ) + 1, width );
+            int y2 = 0;
+            if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D) &&
+                (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
+                (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
+                y1 = adFn( floorf( y - 0.5f ), height );
+                y2 = adFn( floorf( y - 0.5f ) + 1, height );
+            } else {
+              y = 0.5f;
+            }
+
+            if( verbose ) {
+                log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{ %d, %d } and i1:{ %d, %d }\n", x1, y1, x2, y2 );
+                log_info( "\tArray coordinate is %f\n", array_index);
+            }
+
+            // Walk to beginning of the 'correct' slice, if needed.
+            char* imgPtr = ((char*)imageData) + layer_offset;
+
+            float upLeft[ 4 ], upRight[ 4 ], lowLeft[ 4 ], lowRight[ 4 ];
+            float maxUp[4], maxLow[4];
+            read_image_pixel_float( imgPtr, imageInfo, x1, y1, 0, upLeft, lod );
+            read_image_pixel_float( imgPtr, imageInfo, x2, y1, 0, upRight, lod );
+            check_for_denorms( upLeft, containsDenorms );
+            check_for_denorms( upRight, containsDenorms );
+            pixelMax( upLeft, upRight, maxUp );
+            read_image_pixel_float( imgPtr, imageInfo, x1, y2, 0, lowLeft, lod );
+            read_image_pixel_float( imgPtr, imageInfo, x2, y2, 0, lowRight, lod );
+            check_for_denorms( lowLeft, containsDenorms );
+            check_for_denorms( lowRight, containsDenorms );
+            pixelMax( lowLeft, lowRight, maxLow );
+            pixelMax( maxUp, maxLow, returnVal.p );
+
+            if( verbose )
+            {
+                if( NULL == containsDenorms )
+                    log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
+                else
+                    log_info( "\tSampled pixels (rgba order):\n" );
+                log_info( "\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1], upLeft[2], upLeft[3] );
+                log_info( "\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1], upRight[2], upRight[3] );
+                log_info( "\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1], lowLeft[2], lowLeft[3] );
+                log_info( "\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1], lowRight[2], lowRight[3] );
+            }
+
+            bool printMe = false;
+            if( x1 <= 0 || x2 <= 0 || x1 >= (int)width-1 || x2 >= (int)width-1 )
+                printMe = true;
+            if( y1 <= 0 || y2 <= 0 || y1 >= (int)height-1 || y2 >= (int)height-1 )
+                printMe = true;
+
+            double weights[ 2 ][ 2 ];
+
+            weights[ 0 ][ 0 ] = weights[ 0 ][ 1 ] = 1.0 - frac( x - 0.5f );
+            weights[ 1 ][ 0 ] = weights[ 1 ][ 1 ] = frac( x - 0.5f );
+            weights[ 0 ][ 0 ] *= 1.0 - frac( y - 0.5f );
+            weights[ 1 ][ 0 ] *= 1.0 - frac( y - 0.5f );
+            weights[ 0 ][ 1 ] *= frac( y - 0.5f );
+            weights[ 1 ][ 1 ] *= frac( y - 0.5f );
+
+            if( verbose )
+                log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f\n",  frac( x - 0.5f ), frac( y - 0.5f ) );
+
+            for( int i = 0; i < 3; i++ )
+            {
+                outData[ i ] = (float)( ( upLeft[ i ] * weights[ 0 ][ 0 ] ) +
+                                        ( upRight[ i ] * weights[ 1 ][ 0 ] ) +
+                                        ( lowLeft[ i ] * weights[ 0 ][ 1 ] ) +
+                                        ( lowRight[ i ] * weights[ 1 ][ 1 ] ));
+                // flush subnormal results to zero if necessary
+                if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
+                    outData[i] = copysignf( 0.0f, outData[i] );
+            }
+            outData[ 3 ] = (float)( ( upLeft[ 3 ] * weights[ 0 ][ 0 ] ) +
+                                   ( upRight[ 3 ] * weights[ 1 ][ 0 ] ) +
+                                   ( lowLeft[ 3 ] * weights[ 0 ][ 1 ] ) +
+                                   ( lowRight[ 3 ] * weights[ 1 ][ 1 ] ));
+            // flush subnormal results to zero if necessary
+            if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
+                outData[3] = copysignf( 0.0f, outData[3] );
+        }
+        else
+        {
+            // 3D linear filtering
+            int x1 = adFn( floorf( x - 0.5f ), width_lod );
+            int y1 = adFn( floorf( y - 0.5f ), height_lod );
+            int z1 = adFn( floorf( z - 0.5f ), depth_lod );
+            int x2 = adFn( floorf( x - 0.5f ) + 1, width_lod );
+            int y2 = adFn( floorf( y - 0.5f ) + 1, height_lod );
+            int z2 = adFn( floorf( z - 0.5f ) + 1, depth_lod );
+
+            if( verbose )
+                log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{%d, %d, %d} and i1:{%d, %d, %d}\n", x1, y1, z1, x2, y2, z2 );
+
+            float upLeftA[ 4 ], upRightA[ 4 ], lowLeftA[ 4 ], lowRightA[ 4 ];
+            float upLeftB[ 4 ], upRightB[ 4 ], lowLeftB[ 4 ], lowRightB[ 4 ];
+            float pixelMaxA[4], pixelMaxB[4];
+            read_image_pixel_float( imageData, imageInfo, x1, y1, z1, upLeftA, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y1, z1, upRightA, lod );
+            check_for_denorms( upLeftA, containsDenorms );
+            check_for_denorms( upRightA, containsDenorms );
+            pixelMax( upLeftA, upRightA, pixelMaxA );
+            read_image_pixel_float( imageData, imageInfo, x1, y2, z1, lowLeftA, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y2, z1, lowRightA, lod );
+            check_for_denorms( lowLeftA, containsDenorms );
+            check_for_denorms( lowRightA, containsDenorms );
+            pixelMax( lowLeftA, lowRightA, pixelMaxB );
+            pixelMax( pixelMaxA, pixelMaxB, returnVal.p);
+            read_image_pixel_float( imageData, imageInfo, x1, y1, z2, upLeftB, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y1, z2, upRightB, lod );
+            check_for_denorms( upLeftB, containsDenorms );
+            check_for_denorms( upRightB, containsDenorms );
+            pixelMax( upLeftB, upRightB, pixelMaxA );
+            read_image_pixel_float( imageData, imageInfo, x1, y2, z2, lowLeftB, lod );
+            read_image_pixel_float( imageData, imageInfo, x2, y2, z2, lowRightB, lod );
+            check_for_denorms( lowLeftB, containsDenorms );
+            check_for_denorms( lowRightB, containsDenorms );
+            pixelMax( lowLeftB, lowRightB, pixelMaxB );
+            pixelMax( pixelMaxA, pixelMaxB, pixelMaxA);
+            pixelMax( pixelMaxA, returnVal.p, returnVal.p );
+
+            if( verbose )
+            {
+                if( NULL == containsDenorms )
+                    log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
+                else
+                    log_info( "\tSampled pixels (rgba order):\n" );
+                log_info( "\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1], upLeftA[2], upLeftA[3] );
+                log_info( "\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1], upRightA[2], upRightA[3] );
+                log_info( "\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1], lowLeftA[2], lowLeftA[3] );
+                log_info( "\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0], lowRightA[1], lowRightA[2], lowRightA[3] );
+                log_info( "\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1], upLeftB[2], upLeftB[3] );
+                log_info( "\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1], upRightB[2], upRightB[3] );
+                log_info( "\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1], lowLeftB[2], lowLeftB[3] );
+                log_info( "\t\tp111: %f, %f, %f, %f\n", lowRightB[0], lowRightB[1], lowRightB[2], lowRightB[3] );
+            }
+
+            double weights[ 2 ][ 2 ][ 2 ];
+
+            float a = frac( x - 0.5f ), b = frac( y - 0.5f ), c = frac( z - 0.5f );
+            weights[ 0 ][ 0 ][ 0 ] = weights[ 0 ][ 1 ][ 0 ] = weights[ 0 ][ 0 ][ 1 ] = weights[ 0 ][ 1 ][ 1 ] = 1.f - a;
+            weights[ 1 ][ 0 ][ 0 ] = weights[ 1 ][ 1 ][ 0 ] = weights[ 1 ][ 0 ][ 1 ] = weights[ 1 ][ 1 ][ 1 ] = a;
+            weights[ 0 ][ 0 ][ 0 ] *= 1.f - b;
+            weights[ 1 ][ 0 ][ 0 ] *= 1.f - b;
+            weights[ 0 ][ 0 ][ 1 ] *= 1.f - b;
+            weights[ 1 ][ 0 ][ 1 ] *= 1.f - b;
+            weights[ 0 ][ 1 ][ 0 ] *= b;
+            weights[ 1 ][ 1 ][ 0 ] *= b;
+            weights[ 0 ][ 1 ][ 1 ] *= b;
+            weights[ 1 ][ 1 ][ 1 ] *= b;
+            weights[ 0 ][ 0 ][ 0 ] *= 1.f - c;
+            weights[ 0 ][ 1 ][ 0 ] *= 1.f - c;
+            weights[ 1 ][ 0 ][ 0 ] *= 1.f - c;
+            weights[ 1 ][ 1 ][ 0 ] *= 1.f - c;
+            weights[ 0 ][ 0 ][ 1 ] *= c;
+            weights[ 0 ][ 1 ][ 1 ] *= c;
+            weights[ 1 ][ 0 ][ 1 ] *= c;
+            weights[ 1 ][ 1 ][ 1 ] *= c;
+
+            if( verbose )
+                log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f, frac( z - 0.5f ) = %f\n",
+                         frac( x - 0.5f ), frac( y - 0.5f ), frac( z - 0.5f )  );
+
+            for( int i = 0; i < 3; i++ )
+            {
+                outData[ i ] = (float)( ( upLeftA[ i ] * weights[ 0 ][ 0 ][ 0 ] ) +
+                                        ( upRightA[ i ] * weights[ 1 ][ 0 ][ 0 ] ) +
+                                        ( lowLeftA[ i ] * weights[ 0 ][ 1 ][ 0 ] ) +
+                                        ( lowRightA[ i ] * weights[ 1 ][ 1 ][ 0 ] ) +
+                                        ( upLeftB[ i ] * weights[ 0 ][ 0 ][ 1 ] ) +
+                                        ( upRightB[ i ] * weights[ 1 ][ 0 ][ 1 ] ) +
+                                        ( lowLeftB[ i ] * weights[ 0 ][ 1 ][ 1 ] ) +
+                                        ( lowRightB[ i ] * weights[ 1 ][ 1 ][ 1 ] ));
+                // flush subnormal results to zero if necessary
+                if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
+                    outData[i] = copysignf( 0.0f, outData[i] );
+            }
+            outData[ 3 ] = (float)( ( upLeftA[ 3 ] * weights[ 0 ][ 0 ][ 0 ] ) +
+                                   ( upRightA[ 3 ] * weights[ 1 ][ 0 ][ 0 ] ) +
+                                   ( lowLeftA[ 3 ] * weights[ 0 ][ 1 ][ 0 ] ) +
+                                   ( lowRightA[ 3 ] * weights[ 1 ][ 1 ][ 0 ] ) +
+                                   ( upLeftB[ 3 ] * weights[ 0 ][ 0 ][ 1 ] ) +
+                                   ( upRightB[ 3 ] * weights[ 1 ][ 0 ][ 1 ] ) +
+                                   ( lowLeftB[ 3 ] * weights[ 0 ][ 1 ][ 1 ] ) +
+                                   ( lowRightB[ 3 ] * weights[ 1 ][ 1 ][ 1 ] ));
+            // flush subnormal results to zero if necessary
+            if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
+                outData[3] = copysignf( 0.0f, outData[3] );
+        }
+
+        return returnVal;
+    }
+}
+
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms )
+{
+  return sample_image_pixel_float_offset( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
+    imageSampler, outData, verbose, containsDenorms, 0);
+}
+
+
+int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                               void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod )
+{
+    int foundCount = 0;
+    char *iPtr = (char *)imagePtr;
+    size_t width;
+    size_t depth;
+    size_t height;
+    size_t row_pitch;
+    size_t slice_pitch;
+
+    switch (imageInfo->type)
+    {
+    case CL_MEM_OBJECT_IMAGE1D:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = 1;
+      depth = 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = 1;
+      depth = imageInfo->arraySize;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = imageInfo->arraySize;
+      break;
+    case CL_MEM_OBJECT_IMAGE3D:
+      width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+      height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+      depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
+      break;
+    }
+
+    row_pitch = width * get_pixel_size( imageInfo->format );
+    slice_pitch = row_pitch * height;
+
+    for( size_t z = 0; z < depth; z++ )
+    {
+        for( size_t y = 0; y < height; y++ )
+        {
+            for( size_t x = 0; x < width; x++)
+            {
+                if( memcmp( iPtr, vectorToFind, vectorSize ) == 0 )
+                {
+                    if( foundCount == 0 )
+                    {
+                        *outX = (int)x;
+                        if (outY != NULL)
+                            *outY = (int)y;
+                        if( outZ != NULL )
+                            *outZ = (int)z;
+                    }
+                    foundCount++;
+                }
+                iPtr += vectorSize;
+            }
+            iPtr += row_pitch - ( width * vectorSize );
+        }
+        iPtr += slice_pitch - ( height * row_pitch );
+    }
+    return foundCount;
+}
+
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
+{
+    char vectorToFind[ 4 * 4 ];
+    size_t vectorSize = get_format_channel_count( imageInfo->format );
+
+
+    if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8 )
+    {
+        unsigned char *p = (unsigned char *)vectorToFind;
+        for( unsigned int i = 0; i < vectorSize; i++ )
+            p[i] = (unsigned char)valuesToFind[i];
+    }
+    else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16 )
+    {
+        unsigned short *p = (unsigned short *)vectorToFind;
+        for( unsigned int i = 0; i < vectorSize; i++ )
+            p[i] = (unsigned short)valuesToFind[i];
+        vectorSize *= 2;
+    }
+    else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32 )
+    {
+        unsigned int *p = (unsigned int *)vectorToFind;
+        for( unsigned int i = 0; i < vectorSize; i++ )
+            p[i] = (unsigned int)valuesToFind[i];
+        vectorSize *= 4;
+    }
+    else
+    {
+        log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
+        return false;
+    }
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
+}
+
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
+{
+    char vectorToFind[ 4 * 4 ];
+    size_t vectorSize = get_format_channel_count( imageInfo->format );
+
+    if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT8 )
+    {
+        char *p = (char *)vectorToFind;
+        for( unsigned int i = 0; i < vectorSize; i++ )
+            p[i] = (char)valuesToFind[i];
+    }
+    else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT16 )
+    {
+        short *p = (short *)vectorToFind;
+        for( unsigned int i = 0; i < vectorSize; i++ )
+            p[i] = (short)valuesToFind[i];
+        vectorSize *= 2;
+    }
+    else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT32 )
+    {
+        int *p = (int *)vectorToFind;
+        for( unsigned int i = 0; i < vectorSize; i++ )
+            p[i] = (int)valuesToFind[i];
+        vectorSize *= 4;
+    }
+    else
+    {
+        log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
+        return false;
+    }
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
+}
+
+int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                              float *valuesToFind, int *outX, int *outY, int *outZ, int lod )
+{
+    char vectorToFind[ 4 * 4 ];
+    float swizzled[4];
+    memcpy( swizzled, valuesToFind, sizeof( swizzled ) );
+    size_t vectorSize = get_pixel_size( imageInfo->format );
+    pack_image_pixel( swizzled, imageInfo->format, vectorToFind );
+    return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
+}
+
+template <class T> void swizzle_vector_for_image( T *srcVector, const cl_image_format *imageFormat )
+{
+    T temp;
+    switch( imageFormat->image_channel_order )
+    {
+        case CL_A:
+            srcVector[ 0 ] = srcVector[ 3 ];
+            break;
+        case CL_R:
+        case CL_Rx:
+        case CL_RG:
+        case CL_RGx:
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_RGBA:
+        case CL_sRGB:
+        case CL_sRGBx:
+        case CL_sRGBA:
+            break;
+        case CL_RA:
+            srcVector[ 1 ] = srcVector[ 3 ];
+            break;
+        case CL_ARGB:
+            temp = srcVector[ 3 ];
+            srcVector[ 3 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = srcVector[ 1 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            srcVector[ 0 ] = temp;
+            break;
+        case CL_BGRA:
+        case CL_sBGRA:
+            temp = srcVector[ 0 ];
+            srcVector[ 0 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = temp;
+            break;
+        case CL_INTENSITY:
+            srcVector[ 3 ] = srcVector[ 0 ];
+            srcVector[ 2 ] = srcVector[ 0 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            break;
+        case CL_LUMINANCE:
+            srcVector[ 2 ] = srcVector[ 0 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            break;
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+            temp = srcVector[ 3 ];
+            srcVector[ 3 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = srcVector[ 1 ];
+            srcVector[ 1 ] = srcVector[ 0 ];
+            srcVector[ 0 ] = temp;
+            break;
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+            temp = srcVector[ 0 ];
+            srcVector[ 0 ] = srcVector[ 2 ];
+            srcVector[ 2 ] = temp;
+            break;
+#endif
+    }
+}
+
+#define SATURATE( v, min, max ) ( v < min ? min : ( v > max ? max : v ) )
+
+void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData )
+{
+    swizzle_vector_for_image<unsigned int>( srcVector, imageFormat );
+    size_t channelCount = get_format_channel_count( imageFormat );
+
+    switch( imageFormat->image_channel_data_type )
+    {
+        case CL_UNSIGNED_INT8:
+        {
+            unsigned char *ptr = (unsigned char *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned char)SATURATE( srcVector[ i ], 0, 255 );
+            break;
+        }
+        case CL_UNSIGNED_INT16:
+        {
+            unsigned short *ptr = (unsigned short *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned short)SATURATE( srcVector[ i ], 0, 65535 );
+            break;
+        }
+        case CL_UNSIGNED_INT32:
+        {
+            unsigned int *ptr = (unsigned int *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned int)srcVector[ i ];
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData )
+{
+    swizzle_vector_for_image<int>( srcVector, imageFormat );
+    size_t chanelCount = get_format_channel_count( imageFormat );
+
+    switch( imageFormat->image_channel_data_type )
+    {
+        case CL_SIGNED_INT8:
+        {
+            char *ptr = (char *)outData;
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (char)SATURATE( srcVector[ i ], -128, 127 );
+            break;
+        }
+        case CL_SIGNED_INT16:
+        {
+            short *ptr = (short *)outData;
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (short)SATURATE( srcVector[ i ], -32768, 32767 );
+            break;
+        }
+        case CL_SIGNED_INT32:
+        {
+            int *ptr = (int *)outData;
+            for( unsigned int i = 0; i < chanelCount; i++ )
+                ptr[ i ] = (int)srcVector[ i ];
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+int round_to_even( float v )
+{
+    // clamp overflow
+    if( v >= - (float) INT_MIN )
+        return INT_MAX;
+    if( v <= (float) INT_MIN )
+        return INT_MIN;
+
+    // round fractional values to integer value
+    if( fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
+    {
+        static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23), MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
+        float magicVal = magic[ v < 0.0f ];
+        v += magicVal;
+        v -= magicVal;
+    }
+
+    return (int) v;
+}
+
+void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData )
+{
+    swizzle_vector_for_image<float>( srcVector, imageFormat );
+    size_t channelCount = get_format_channel_count( imageFormat );
+    switch( imageFormat->image_channel_data_type )
+    {
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *ptr = (cl_ushort *)outData;
+
+            switch( gFloatToHalfRoundingMode )
+            {
+                case kRoundToNearestEven:
+            for( unsigned int i = 0; i < channelCount; i++ )
+                        ptr[ i ] = float2half_rte( srcVector[ i ] );
+            break;
+                case kRoundTowardZero:
+                    for( unsigned int i = 0; i < channelCount; i++ )
+                        ptr[ i ] = float2half_rtz( srcVector[ i ] );
+                    break;
+                default:
+                    log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
+                    exit(-1);
+                    break;
+        }
+            break;
+        }
+
+        case CL_FLOAT:
+        {
+            cl_float *ptr = (cl_float *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = srcVector[ i ];
+            break;
+        }
+
+        case CL_SNORM_INT8:
+        {
+            cl_char *ptr = (cl_char *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (char)NORMALIZE_SIGNED( srcVector[ i ], -127.0f, 127.f );
+            break;
+        }
+        case CL_SNORM_INT16:
+        {
+            cl_short *ptr = (cl_short *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (short)NORMALIZE_SIGNED( srcVector[ i ], -32767.f, 32767.f  );
+            break;
+        }
+        case CL_UNORM_INT8:
+        {
+            cl_uchar *ptr = (cl_uchar *)outData;
+            if ( is_sRGBA_order(imageFormat->image_channel_order) )
+            {
+                ptr[ 0 ] = (unsigned char)( sRGBmap( srcVector[ 0 ] ) + 0.5 );
+                ptr[ 1 ] = (unsigned char)( sRGBmap( srcVector[ 1 ] ) + 0.5 );
+                ptr[ 2 ] = (unsigned char)( sRGBmap( srcVector[ 2 ] ) + 0.5 );
+                if (channelCount == 4)
+                    ptr[ 3 ] = (unsigned char)NORMALIZE( srcVector[ 3 ], 255.f );
+            }
+            else
+            {
+                for( unsigned int i = 0; i < channelCount; i++ )
+                    ptr[ i ] = (unsigned char)NORMALIZE( srcVector[ i ], 255.f );
+            }
+#ifdef CL_1RGB_APPLE
+            if( imageFormat->image_channel_order == CL_1RGB_APPLE )
+                ptr[0] = 255.0f;
+#endif
+#ifdef CL_BGR1_APPLE
+            if( imageFormat->image_channel_order == CL_BGR1_APPLE )
+                ptr[3] = 255.0f;
+#endif
+            break;
+        }
+        case CL_UNORM_INT16:
+        {
+            cl_ushort *ptr = (cl_ushort *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (unsigned short)NORMALIZE( srcVector[ i ], 65535.f );
+            break;
+        }
+        case CL_UNORM_SHORT_555:
+        {
+            cl_ushort *ptr = (cl_ushort *)outData;
+            ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 10 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 31.f ) & 31 ) << 5 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
+            break;
+        }
+        case CL_UNORM_SHORT_565:
+        {
+            cl_ushort *ptr = (cl_ushort *)outData;
+            ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 11 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 63.f ) & 63 ) << 5 ) |
+            ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
+            break;
+        }
+        case CL_UNORM_INT_101010:
+        {
+            cl_uint *ptr = (cl_uint *)outData;
+            ptr[ 0 ] = ( ( (unsigned int)NORMALIZE( srcVector[ 0 ], 1023.f ) & 1023 ) << 20 ) |
+            ( ( (unsigned int)NORMALIZE( srcVector[ 1 ], 1023.f ) & 1023 ) << 10 ) |
+            ( ( (unsigned int)NORMALIZE( srcVector[ 2 ], 1023.f ) & 1023 ) << 0 );
+            break;
+        }
+        case CL_SIGNED_INT8:
+        {
+            cl_char *ptr = (cl_char *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (char)CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
+            break;
+        }
+        case CL_SIGNED_INT16:
+        {
+            cl_short *ptr = (cl_short *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (short)CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
+            break;
+        }
+        case CL_SIGNED_INT32:
+        {
+            cl_int *ptr = (cl_int *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (int)CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  );
+            break;
+        }
+        case CL_UNSIGNED_INT8:
+        {
+            cl_uchar *ptr = (cl_uchar *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_uchar)CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
+            break;
+        }
+        case CL_UNSIGNED_INT16:
+        {
+            cl_ushort *ptr = (cl_ushort *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_ushort)CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
+            break;
+        }
+        case CL_UNSIGNED_INT32:
+        {
+            cl_uint *ptr = (cl_uint *)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                ptr[ i ] = (cl_uint)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  );
+            break;
+        }
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            cl_ushort *ptr = (cl_ushort*)outData;
+            for( unsigned int i = 0; i < channelCount; i++ )
+            {
+                cl_float f = fmaxf( srcVector[i], -1.0f );
+                f = fminf( f, 3.0f );
+                cl_int d = rintf(f * 0x1.0p14f);
+                d += 16384;
+                if( d > CL_USHRT_MAX )
+                    d = CL_USHRT_MAX;
+                ptr[i] = d;
+            }
+            break;
+        }
+#endif
+        default:
+            log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
+            exit(-1);
+            break;
+    }
+}
+
+void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors )
+{
+    size_t channelCount = get_format_channel_count( imageFormat );
+    switch( imageFormat->image_channel_data_type )
+    {
+        case CL_HALF_FLOAT:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = Ulp_Error_Half( ptr[i], srcVector[i] );
+
+            break;
+        }
+
+        case CL_FLOAT:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = Ulp_Error( ptr[i], srcVector[i] );
+
+            break;
+        }
+
+        case CL_SNORM_INT8:
+        {
+            const cl_char *ptr = (const cl_char *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -127.0f, 127.f );
+
+            break;
+        }
+        case CL_SNORM_INT16:
+        {
+            const cl_short *ptr = (const cl_short *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -32767.f, 32767.f  );
+
+            break;
+        }
+        case CL_UNORM_INT8:
+        {
+            const cl_uchar *ptr = (const cl_uchar *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 255.f  );
+
+            break;
+        }
+        case CL_UNORM_INT16:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 65535.f  );
+
+            break;
+        }
+        case CL_UNORM_SHORT_555:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+
+            errors[0] = ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
+            errors[1] = ((ptr[0] >>  5) & 31) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 31.f );
+            errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
+
+            break;
+        }
+        case CL_UNORM_SHORT_565:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+
+            errors[0] = ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
+            errors[1] = ((ptr[0] >>  5) & 63) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 63.f );
+            errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );
+
+            break;
+        }
+        case CL_UNORM_INT_101010:
+        {
+            const cl_uint *ptr = (const cl_uint *)results;
+
+            errors[0] = ((ptr[0] >> 20) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 1023.f );
+            errors[1] = ((ptr[0] >> 10) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 1023.f );
+            errors[2] = ((ptr[0] >>  0) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 1023.f );
+
+            break;
+        }
+        case CL_SIGNED_INT8:
+        {
+            const cl_char *ptr = (const cl_char *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[ i ] = ptr[i] - CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
+
+            break;
+        }
+        case CL_SIGNED_INT16:
+        {
+            const cl_short *ptr = (const cl_short *)results;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[ i ] - CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
+            break;
+        }
+        case CL_SIGNED_INT32:
+        {
+            const cl_int *ptr = (const cl_int *)results;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long) CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  ));
+            break;
+        }
+        case CL_UNSIGNED_INT8:
+        {
+            const cl_uchar *ptr = (const cl_uchar *)results;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
+            break;
+        }
+        case CL_UNSIGNED_INT16:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
+            break;
+        }
+        case CL_UNSIGNED_INT32:
+        {
+            const cl_uint *ptr = (const cl_uint *)results;
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  ));
+            break;
+        }
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            const cl_ushort *ptr = (const cl_ushort *)results;
+
+            for( unsigned int i = 0; i < channelCount; i++ )
+                errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( ((int) srcVector[ i ] - 16384), -16384.f, 49151.f  );
+
+            break;
+        }
+#endif
+        default:
+            log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
+            exit(-1);
+            break;
+    }
+}
+
+
+//
+//  Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
+//  This should be called lazily before attempting to verify image writes, otherwise an error will occur.
+//
+int  DetectFloatToHalfRoundingMode( cl_command_queue q )  // Returns CL_SUCCESS on success
+{
+    cl_int err = CL_SUCCESS;
+
+    if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
+    {
+        // Some numbers near 0.5f, that we look at to see how the values are rounded.
+        static const cl_uint  inData[4*4] = {   0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
+                                                0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U    };
+        static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
+        const float *inp = (const float*) inData;
+        cl_context context = NULL;
+
+    // Create an input buffer
+        err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
+        if( err )
+        {
+            log_error( "Error:  could not get context from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
+            return err;
+        }
+
+        cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
+        if( NULL == inBuf || err )
+        {
+            log_error( "Error:  could not create input buffer in DetectFloatToHalfRoundingMode  (err: %d)", err );
+            return err;
+        }
+
+    // Create a small output image
+        cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
+        cl_mem outImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &fmt, count, 1, 0, NULL, &err );
+        if( NULL == outImage || err )
+        {
+            log_error( "Error:  could not create half float out image in DetectFloatToHalfRoundingMode  (err: %d)", err );
+            clReleaseMemObject( inBuf );
+            return err;
+        }
+
+    // Create our program, and a kernel
+        const char *kernel[1] = {
+            "kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
+            "{\n"
+            "   write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
+            "}\n" };
+
+        clProgramWrapper program;
+        err = create_single_kernel_helper_create_program(context, &program, 1, kernel);
+
+        if( NULL == program || err )
+        {
+            log_error( "Error:  could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            return err;
+        }
+
+        cl_device_id device = NULL;
+        err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
+        if( err )
+        {
+            log_error( "Error:  could not get device from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            return err;
+        }
+
+        err = clBuildProgram( program, 1, &device, "", NULL, NULL );
+        if( err )
+        {
+            log_error( "Error:  could not build program in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            return err;
+        }
+
+        cl_kernel k = clCreateKernel( program, "detect_round", &err );
+        if( NULL == k || err )
+        {
+            log_error( "Error:  could not create kernel in DetectFloatToHalfRoundingMode  (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            return err;
+        }
+
+        err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
+        if( err )
+        {
+            log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+
+        err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
+        if( err )
+        {
+            log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+
+    // Run the kernel
+        size_t global_work_size = count;
+        err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
+        if( err )
+        {
+            log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+
+    // read the results
+        cl_ushort outBuf[count*4];
+        memset( outBuf, -1, sizeof( outBuf ) );
+        size_t origin[3] = {0,0,0};
+        size_t region[3] = {count,1,1};
+        err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
+        if( err )
+        {
+            log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
+            clReleaseMemObject( inBuf );
+            clReleaseMemObject( outImage );
+            clReleaseKernel( k );
+            return err;
+        }
+
+    // Generate our list of reference results
+        cl_ushort rte_ref[count*4];
+        cl_ushort rtz_ref[count*4];
+        for( size_t i = 0; i < 4 * count; i++ )
+        {
+            rte_ref[i] = float2half_rte( inp[i] );
+            rtz_ref[i] = float2half_rtz( inp[i] );
+        }
+
+    // Verify that we got something in either rtz or rte mode
+        if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
+        {
+            log_info( "Autodetected float->half rounding mode to be rte\n" );
+            gFloatToHalfRoundingMode = kRoundToNearestEven;
+        }
+        else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
+        {
+            log_info( "Autodetected float->half rounding mode to be rtz\n" );
+            gFloatToHalfRoundingMode = kRoundTowardZero;
+        }
+        else
+        {
+            log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
+            log_info( "\nfor:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
+            log_info( "\ngot:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
+            log_info( "\nrte:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
+            log_info( "\nrtz:" );
+            for( size_t i = 0; i < count; i++ )
+                log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
+            log_info( "\n" );
+            err = -1;
+            gFloatToHalfRoundingMode = kRoundingModeCount;  // illegal value
+        }
+
+    // clean up
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseKernel( k );
+        return err;
+    }
+
+    // Make sure that the rounding mode was successfully detected, if we checked earlier
+    if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
+        return -2;
+
+    return err;
+}
+
+char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer )
+{
+  size_t allocSize, numPixels;
+  if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
+  {
+    allocSize = (size_t) (compute_mipmapped_image_size(*imageInfo) * 4 * get_explicit_type_size( dataType ))/get_pixel_size(imageInfo->format);
+    numPixels = allocSize / (get_explicit_type_size( dataType ) * 4);
+  }
+  else
+  {
+    numPixels = (image2DFromBuffer? imageInfo->rowPitch: imageInfo->width) * imageInfo->height
+      * (imageInfo->depth ? imageInfo->depth : 1)
+      * (imageInfo->arraySize ? imageInfo->arraySize : 1);
+    allocSize = numPixels * 4 * get_explicit_type_size( dataType );
+  }
+
+#if 0 // DEBUG
+    {
+      fprintf(stderr,"--- create_random_image_data:\n");
+      fprintf(stderr,"allocSize = %zu\n",allocSize);
+      fprintf(stderr,"numPixels = %zu\n",numPixels);
+      fprintf(stderr,"width = %zu\n",imageInfo->width);
+      fprintf(stderr,"height = %zu\n",imageInfo->height);
+      fprintf(stderr,"depth = %zu\n",imageInfo->depth);
+      fprintf(stderr,"rowPitch = %zu\n",imageInfo->rowPitch);
+      fprintf(stderr,"slicePitch = %zu\n",imageInfo->slicePitch);
+      fprintf(stderr,"arraySize = %zu\n",imageInfo->arraySize);
+      fprintf(stderr,"explicit_type_size = %zu\n",get_explicit_type_size(dataType));
+    }
+#endif
+
+#if defined( __APPLE__ )
+    char *data = NULL;
+    if (gDeviceType == CL_DEVICE_TYPE_CPU) {
+      size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
+
+      void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+      if (map == MAP_FAILED)
+      {
+        perror("create_random_image_data: mmap");
+        log_error("%s:%d: mmap failed, mapSize = %zu\n",__FILE__,__LINE__,mapSize);
+      }
+      intptr_t data_end = (intptr_t)map + mapSize - 4096;
+      data = (char *)(data_end - (intptr_t)allocSize);
+
+      mprotect(map, 4096, PROT_NONE);
+      mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
+      P.reset(data, map, mapSize);
+    } else {
+      data = (char *)malloc(allocSize);
+      P.reset(data);
+    }
+#else
+#if defined (_WIN32) && defined(_MSC_VER)
+    char *data = (char *)_aligned_malloc(allocSize, get_pixel_size(imageInfo->format));
+#elif defined(__MINGW32__)
+        char *data = (char *)__mingw_aligned_malloc(allocSize, get_pixel_size(imageInfo->format));
+#else
+    char *data = (char *)memalign(get_pixel_size(imageInfo->format), allocSize);
+#endif
+
+    P.reset(data,NULL,0,allocSize,true);
+#endif
+
+    if (data == NULL) {
+        log_error( "ERROR: Unable to malloc %lu bytes for create_random_image_data\n", allocSize );
+        return NULL;
+    }
+
+    switch( dataType )
+    {
+        case kFloat:
+        {
+            float *inputValues = (float *)data;
+            switch (imageInfo->format->image_channel_data_type)
+            {
+                case CL_HALF_FLOAT:
+                    {
+                        // Generate data that is (mostly) inside the range of a half float
+                        // const float HALF_MIN = 5.96046448e-08f;
+                        const float HALF_MAX = 65504.0f;
+
+                        size_t i = 0;
+                        inputValues[ i++ ] = 0.f;
+                        inputValues[ i++ ] = 1.f;
+                        inputValues[ i++ ] = -1.f;
+                        inputValues[ i++ ] = 2.f;
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -HALF_MAX - 2.f, HALF_MAX + 2.f, d );
+                    }
+                    break;
+#ifdef CL_SFIXED14_APPLE
+                case CL_SFIXED14_APPLE:
+                    {
+                        size_t i = 0;
+                        if( numPixels * 4 >= 8 )
+                        {
+                            inputValues[ i++ ] = INFINITY;
+                            inputValues[ i++ ] = 0x1.0p14f;
+                            inputValues[ i++ ] = 0x1.0p31f;
+                            inputValues[ i++ ] = 0x1.0p32f;
+                            inputValues[ i++ ] = -INFINITY;
+                            inputValues[ i++ ] = -0x1.0p14f;
+                            inputValues[ i++ ] = -0x1.0p31f;
+                            inputValues[ i++ ] = -0x1.1p31f;
+                        }
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -1.1f, 3.1f, d );
+                    }
+                    break;
+#endif
+                case CL_FLOAT:
+                    {
+                        size_t i = 0;
+                        inputValues[ i++ ] = INFINITY;
+                        inputValues[ i++ ] = -INFINITY;
+                        inputValues[ i++ ] = 0.0f;
+                        inputValues[ i++ ] = 0.0f;
+                        cl_uint *p = (cl_uint *)data;
+                        for( ; i < numPixels * 4; i++ )
+                            p[ i ] = genrand_int32(d);
+                    }
+                    break;
+
+                default:
+                    size_t i = 0;
+                    if( numPixels * 4 >= 36 )
+                    {
+                        inputValues[ i++ ] = 0.0f;
+                        inputValues[ i++ ] = 0.5f;
+                        inputValues[ i++ ] = 31.5f;
+                        inputValues[ i++ ] = 32.0f;
+                        inputValues[ i++ ] = 127.5f;
+                        inputValues[ i++ ] = 128.0f;
+                        inputValues[ i++ ] = 255.5f;
+                        inputValues[ i++ ] = 256.0f;
+                        inputValues[ i++ ] = 1023.5f;
+                        inputValues[ i++ ] = 1024.0f;
+                        inputValues[ i++ ] = 32767.5f;
+                        inputValues[ i++ ] = 32768.0f;
+                        inputValues[ i++ ] = 65535.5f;
+                        inputValues[ i++ ] = 65536.0f;
+                        inputValues[ i++ ] = 2147483648.0f;
+                        inputValues[ i++ ] = 4294967296.0f;
+                        inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
+                        inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
+                        inputValues[ i++ ] = -0.0f;
+                        inputValues[ i++ ] = -0.5f;
+                        inputValues[ i++ ] = -31.5f;
+                        inputValues[ i++ ] = -32.0f;
+                        inputValues[ i++ ] = -127.5f;
+                        inputValues[ i++ ] = -128.0f;
+                        inputValues[ i++ ] = -255.5f;
+                        inputValues[ i++ ] = -256.0f;
+                        inputValues[ i++ ] = -1023.5f;
+                        inputValues[ i++ ] = -1024.0f;
+                        inputValues[ i++ ] = -32767.5f;
+                        inputValues[ i++ ] = -32768.0f;
+                        inputValues[ i++ ] = -65535.5f;
+                        inputValues[ i++ ] = -65536.0f;
+                        inputValues[ i++ ] = -2147483648.0f;
+                        inputValues[ i++ ] = -4294967296.0f;
+                        inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
+                        inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
+                    }
+                    if( is_format_signed(imageInfo->format) )
+                    {
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -1.1f, 1.1f, d );
+                    }
+                    else
+                    {
+                        for( ; i < numPixels * 4; i++ )
+                            inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                    }
+                    break;
+            }
+        }
+
+        case kInt:
+        {
+            int *imageData = (int *)data;
+
+            // We want to generate ints (mostly) in range of the target format
+            int formatMin = get_format_min_int( imageInfo->format );
+            size_t formatMax = get_format_max_int( imageInfo->format );
+            if( formatMin == 0 )
+            {
+                // Unsigned values, but we are only an int, so cap the actual max at the max of signed ints
+                if( formatMax > 2147483647L )
+                    formatMax = 2147483647L;
+            }
+            // If the final format is small enough, give us a bit of room for out-of-range values to test
+            if( formatMax < 2147483647L )
+                formatMax += 2;
+            if( formatMin > -2147483648LL )
+                formatMin -= 2;
+
+            // Now gen
+            for( size_t i = 0; i < numPixels * 4; i++ )
+            {
+                imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
+            }
+            break;
+        }
+
+        case kUInt:
+        case kUnsignedInt:
+        {
+            unsigned int *imageData = (unsigned int *)data;
+
+            // We want to generate ints (mostly) in range of the target format
+            int formatMin = get_format_min_int( imageInfo->format );
+            size_t formatMax = get_format_max_int( imageInfo->format );
+            if( formatMin < 0 )
+                formatMin = 0;
+            // If the final format is small enough, give us a bit of room for out-of-range values to test
+            if( formatMax < 4294967295LL )
+                formatMax += 2;
+
+            // Now gen
+            for( size_t i = 0; i < numPixels * 4; i++ )
+            {
+                imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
+            }
+            break;
+        }
+        default:
+            // Unsupported source format
+            delete [] data;
+            return NULL;
+    }
+
+    return data;
+}
+
+/*
+    deprecated
+bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue )
+{
+    int v = (int)value;
+
+    switch(imageSampler->addressing_mode)
+    {
+        case CL_ADDRESS_REPEAT:
+            outValue = v;
+            while( v < 0 )
+                v += (int)max;
+            while( v >= (int)max )
+                v -= (int)max;
+            if( v != outValue )
+            {
+                outValue = v;
+                return true;
+            }
+            return false;
+
+        case CL_ADDRESS_MIRRORED_REPEAT:
+            log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do we ever use this?
+            exit(-1);
+
+        default:
+            if( v < 0 )
+            {
+                outValue = 0;
+                return true;
+            }
+            if( v >= (int)max )
+            {
+                outValue = (int)max - 1;
+                return true;
+            }
+            outValue = v;
+            return false;
+    }
+
+}
+*/
+
+void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine )
+{
+    const char *normalized;
+    const char *addressMode;
+    const char *filterMode;
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP )
+        addressMode = "CLK_ADDRESS_CLAMP";
+    else if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
+        addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
+    else if( imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
+        addressMode = "CLK_ADDRESS_REPEAT";
+    else if( imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
+        addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
+    else if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+        addressMode = "CLK_ADDRESS_NONE";
+    else
+    {
+        log_error( "**Error: Unknown addressing mode! Aborting...\n" );
+        abort();
+    }
+
+    if( imageSampler->normalized_coords )
+        normalized = "CLK_NORMALIZED_COORDS_TRUE";
+    else
+        normalized = "CLK_NORMALIZED_COORDS_FALSE";
+
+    if( imageSampler->filter_mode == CL_FILTER_LINEAR )
+        filterMode = "CLK_FILTER_LINEAR";
+    else
+        filterMode = "CLK_FILTER_NEAREST";
+
+    sprintf( outLine, "    const sampler_t imageSampler = %s | %s | %s;\n", addressMode, filterMode, normalized );
+}
+
+void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                     const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] )
+{
+  //  assert( srcImageInfo->format == dstImageInfo->format );
+
+  size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
+  size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
+  size_t src_row_pitch_lod, src_slice_pitch_lod;
+  size_t dst_row_pitch_lod, dst_slice_pitch_lod;
+
+  size_t pixelSize = get_pixel_size( srcImageInfo->format );
+
+  sourcePos_lod[0] = sourcePos[0];
+  sourcePos_lod[1] = sourcePos[1];
+  sourcePos_lod[2] = sourcePos[2];
+  destPos_lod[0] = destPos[0];
+  destPos_lod[1] = destPos[1];
+  destPos_lod[2] = destPos[2];
+  src_row_pitch_lod = srcImageInfo->rowPitch;
+  dst_row_pitch_lod = dstImageInfo->rowPitch;
+  src_slice_pitch_lod = srcImageInfo->slicePitch;
+  dst_slice_pitch_lod = dstImageInfo->slicePitch;
+
+  if( srcImageInfo->num_mip_levels > 1)
+  {
+    size_t src_width_lod = 1/*srcImageInfo->width*/;
+    size_t src_height_lod = 1/*srcImageInfo->height*/;
+    size_t src_depth_lod = 1/*srcImageInfo->depth*/;
+
+    switch( srcImageInfo->type )
+    {
+    case CL_MEM_OBJECT_IMAGE1D:
+      src_lod = sourcePos[1];
+      sourcePos_lod[1] = sourcePos_lod[2] = 0;
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE2D:
+      src_lod = sourcePos[2];
+      sourcePos_lod[1] = sourcePos[1];
+      sourcePos_lod[2] = 0;
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
+        src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE3D:
+      src_lod = sourcePos[3];
+      sourcePos_lod[1] = sourcePos[1];
+      sourcePos_lod[2] = sourcePos[2];
+      src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
+      src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
+      if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
+        src_depth_lod = (srcImageInfo->depth >> src_lod ) ? ( srcImageInfo->depth >> src_lod ): 1;
+      break;
+
+    }
+    src_mip_level_offset = compute_mip_level_offset( srcImageInfo, src_lod );
+    src_row_pitch_lod = src_width_lod * get_pixel_size( srcImageInfo->format );
+    src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
+  }
+
+  if( dstImageInfo->num_mip_levels > 1)
+  {
+    size_t dst_width_lod = 1/*dstImageInfo->width*/;
+    size_t dst_height_lod = 1/*dstImageInfo->height*/;
+    size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
+    switch( dstImageInfo->type )
+    {
+    case CL_MEM_OBJECT_IMAGE1D:
+      dst_lod = destPos[1];
+      destPos_lod[1] = destPos_lod[2] = 0;
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE2D:
+      dst_lod = destPos[2];
+      destPos_lod[1] = destPos[1];
+      destPos_lod[2] = 0;
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
+        dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE3D:
+      dst_lod = destPos[3];
+      destPos_lod[1] = destPos[1];
+      destPos_lod[2] = destPos[2];
+      dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
+      dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
+      if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
+        dst_depth_lod = (dstImageInfo->depth >> dst_lod ) ? ( dstImageInfo->depth >> dst_lod ): 1;
+      break;
+
+    }
+    dst_mip_level_offset = compute_mip_level_offset( dstImageInfo, dst_lod );
+    dst_row_pitch_lod = dst_width_lod * get_pixel_size( dstImageInfo->format);
+    dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
+  }
+
+  // Get initial pointers
+  char *sourcePtr = (char *)imageValues + sourcePos_lod[ 2 ] * src_slice_pitch_lod + sourcePos_lod[ 1 ] * src_row_pitch_lod + pixelSize * sourcePos_lod[ 0 ] + src_mip_level_offset;
+  char *destPtr = (char *)destImageValues + destPos_lod[ 2 ] * dst_slice_pitch_lod + destPos_lod[ 1 ] * dst_row_pitch_lod + pixelSize * destPos_lod[ 0 ] + dst_mip_level_offset;
+
+  for( size_t z = 0; z < ( regionSize[ 2 ] > 0 ? regionSize[ 2 ] : 1 ); z++ )
+  {
+    char *rowSourcePtr = sourcePtr;
+    char *rowDestPtr = destPtr;
+    for( size_t y = 0; y < regionSize[ 1 ]; y++ )
+    {
+      memcpy( rowDestPtr, rowSourcePtr, pixelSize * regionSize[ 0 ] );
+      rowSourcePtr += src_row_pitch_lod;
+      rowDestPtr += dst_row_pitch_lod;
+    }
+
+    sourcePtr += src_slice_pitch_lod;
+    destPtr += dst_slice_pitch_lod;
+  }
+}
+
+float random_float(float low, float high, MTdata d)
+{
+    float t = (float) genrand_real1(d);
+    return (1.0f - t) * low + t * high;
+}
+
+CoordWalker::CoordWalker( void * coords, bool useFloats, size_t vecSize )
+{
+    if( useFloats )
+    {
+        mFloatCoords = (cl_float *)coords;
+        mIntCoords = NULL;
+    }
+    else
+    {
+        mFloatCoords = NULL;
+        mIntCoords = (cl_int *)coords;
+    }
+    mVecSize = vecSize;
+}
+
+CoordWalker::~CoordWalker()
+{
+}
+
+cl_float CoordWalker::Get( size_t idx, size_t el )
+{
+    if( mIntCoords != NULL )
+        return (cl_float)mIntCoords[ idx * mVecSize + el ];
+    else
+        return mFloatCoords[ idx * mVecSize + el ];
+}
+
+
+void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err, int t )
+{
+    const char *addressMode = NULL;
+    const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };
+
+    if( sampler->addressing_mode == CL_ADDRESS_CLAMP )
+        addressMode = "CL_ADDRESS_CLAMP";
+    else if( sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
+        addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
+    else if( sampler->addressing_mode == CL_ADDRESS_REPEAT )
+        addressMode = "CL_ADDRESS_REPEAT";
+    else if( sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
+        addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
+    else
+        addressMode = "CL_ADDRESS_NONE";
+
+    if( t )
+    {
+        if( err )
+            log_error( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                      GetChannelTypeName( format->image_channel_data_type ),
+                      (int)get_format_channel_count( format ),
+                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
+                      addressMode,
+                      normalizedNames[sampler->normalized_coords ? 1 : 0],
+                      t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
+        else
+            log_info( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                     GetChannelTypeName( format->image_channel_data_type ),
+                     (int)get_format_channel_count( format ),
+                     sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
+                     addressMode,
+                     normalizedNames[sampler->normalized_coords ? 1 : 0],
+                     t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
+    }
+    else
+    {
+        if( err )
+            log_error( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                      GetChannelTypeName( format->image_channel_data_type ),
+                      (int)get_format_channel_count( format ),
+                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
+                      addressMode,
+                      normalizedNames[sampler->normalized_coords ? 1 : 0] );
+        else
+            log_info( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
+                     GetChannelTypeName( format->image_channel_data_type ),
+                     (int)get_format_channel_count( format ),
+                     sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
+                     addressMode,
+                     normalizedNames[sampler->normalized_coords ? 1 : 0] );
+    }
+
+}
+
+void print_write_header( cl_image_format *format, bool err = false)
+{
+    if( err )
+        log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                  GetChannelTypeName( format->image_channel_data_type ),
+                  (int)get_format_channel_count( format ) );
+    else
+        log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                 GetChannelTypeName( format->image_channel_data_type ),
+                 (int)get_format_channel_count( format ) );
+}
+
+
+void print_header( cl_image_format *format, bool err = false )
+{
+    if (err) {
+        log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                  GetChannelTypeName( format->image_channel_data_type ),
+                  (int)get_format_channel_count( format ) );
+    } else {
+        log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
+                 GetChannelTypeName( format->image_channel_data_type ),
+                 (int)get_format_channel_count( format ) );
+    }
+}
+
+bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind )
+{
+    for( unsigned int i = 0; i < numFormats; i++ )
+    {
+        if( formatList[ i ].image_channel_order == formatToFind->image_channel_order &&
+           formatList[ i ].image_channel_data_type == formatToFind->image_channel_data_type )
+            return true;
+    }
+    return false;
+}
+
+bool check_minimum_supported( cl_image_format *formatList, unsigned int numFormats, cl_mem_flags flags )
+{
+    cl_image_format readFormatsToSupport[] = { { CL_RGBA, CL_UNORM_INT8 },
+        { CL_RGBA, CL_UNORM_INT16 },
+        { CL_RGBA, CL_SIGNED_INT8 },
+        { CL_RGBA, CL_SIGNED_INT16 },
+        { CL_RGBA, CL_SIGNED_INT32 },
+        { CL_RGBA, CL_UNSIGNED_INT8 },
+        { CL_RGBA, CL_UNSIGNED_INT16 },
+        { CL_RGBA, CL_UNSIGNED_INT32 },
+        { CL_RGBA, CL_HALF_FLOAT },
+        { CL_RGBA, CL_FLOAT },
+        { CL_BGRA, CL_UNORM_INT8} };
+
+    cl_image_format writeFormatsToSupport[] = { { CL_RGBA, CL_UNORM_INT8 },
+        { CL_RGBA, CL_UNORM_INT16 },
+        { CL_RGBA, CL_SIGNED_INT8 },
+        { CL_RGBA, CL_SIGNED_INT16 },
+        { CL_RGBA, CL_SIGNED_INT32 },
+        { CL_RGBA, CL_UNSIGNED_INT8 },
+        { CL_RGBA, CL_UNSIGNED_INT16 },
+        { CL_RGBA, CL_UNSIGNED_INT32 },
+        { CL_RGBA, CL_HALF_FLOAT },
+        { CL_RGBA, CL_FLOAT },
+        { CL_BGRA, CL_UNORM_INT8} };
+
+    cl_image_format *formatsToTest;
+    unsigned int testCount;
+    bool passed = true;
+
+    if( flags == CL_MEM_READ_ONLY )
+    {
+        formatsToTest = readFormatsToSupport;
+        testCount = sizeof( readFormatsToSupport ) / sizeof( readFormatsToSupport[ 0 ] );
+    }
+    else
+    {
+        formatsToTest = writeFormatsToSupport;
+        testCount = sizeof( writeFormatsToSupport ) / sizeof( writeFormatsToSupport[ 0 ] );
+    }
+
+    for( unsigned int i = 0; i < testCount; i++ )
+    {
+        if( !find_format( formatList, numFormats, &formatsToTest[ i ] ) )
+        {
+            log_error( "ERROR: Format required by OpenCL 1.0 is not supported: " );
+            print_header( &formatsToTest[ i ], true );
+            gTestCount++;
+            gTestFailure++;
+            passed = false;
+        }
+    }
+    return passed;
+}
+
+cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth)
+{
+  cl_uint retMaxMipLevels=0, max_dim = 0;
+
+  max_dim = width;
+  max_dim = height > max_dim ? height : max_dim;
+  max_dim = depth > max_dim ? depth : max_dim;
+
+  while(max_dim) {
+    retMaxMipLevels++;
+    max_dim >>= 1;
+  }
+  return retMaxMipLevels;
+}
+
+cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo)
+{
+  cl_ulong retSize = 0;
+  size_t curr_width, curr_height, curr_depth, curr_array_size;
+  curr_width = imageInfo.width;
+  curr_height = imageInfo.height;
+  curr_depth = imageInfo.depth;
+  curr_array_size = imageInfo.arraySize;
+
+  for (int i=0; i < (int) imageInfo.num_mip_levels; i++)
+  {
+    switch ( imageInfo.type )
+    {
+    case CL_MEM_OBJECT_IMAGE3D :
+      retSize += (cl_ulong)curr_width * curr_height * curr_depth * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE2D :
+      retSize += (cl_ulong)curr_width * curr_height * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE1D :
+      retSize += (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY :
+      retSize += (cl_ulong)curr_width * curr_array_size * get_pixel_size(imageInfo.format);
+      break;
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+      retSize += (cl_ulong)curr_width * curr_height * curr_array_size * get_pixel_size(imageInfo.format);
+      break;
+    }
+
+    switch ( imageInfo.type )
+    {
+    case CL_MEM_OBJECT_IMAGE3D :
+      curr_depth = curr_depth >> 1 ? curr_depth >> 1: 1;
+    case CL_MEM_OBJECT_IMAGE2D :
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY :
+      curr_height = curr_height >> 1? curr_height >> 1 : 1;
+    case CL_MEM_OBJECT_IMAGE1D :
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY :
+      curr_width = curr_width >> 1? curr_width >> 1 : 1;
+    }
+  }
+
+  return retSize;
+}
+
+size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod)
+{
+  size_t retOffset = 0;
+  size_t width, height,  depth;
+  width = imageInfo->width;
+  height = imageInfo->height;
+  depth = imageInfo->depth;
+
+  for(size_t i=0; i < lod; i++)
+  {
+    switch(imageInfo->type)
+    {
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      retOffset += (size_t) width * height * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE3D:
+      retOffset += (size_t) width * height * depth * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      retOffset += (size_t) width * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE2D:
+      retOffset += (size_t) width * height * get_pixel_size( imageInfo->format );
+      break;
+    case CL_MEM_OBJECT_IMAGE1D:
+      retOffset += (size_t) width * get_pixel_size( imageInfo->format );
+      break;
+    }
+
+    // Compute next lod dimensions
+    switch(imageInfo->type)
+    {
+    case CL_MEM_OBJECT_IMAGE3D:
+      depth = ( depth >> 1 ) ? ( depth >> 1 ) : 1;
+    case CL_MEM_OBJECT_IMAGE2D:
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      height = ( height >> 1 ) ? ( height >> 1 ) : 1;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE1D:
+      width = ( width >> 1 ) ? ( width >> 1 ) : 1;
+    }
+
+  }
+  return retOffset;
+}
diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
new file mode 100644
index 00000000..83ff1377
--- /dev/null
+++ b/test_common/harness/imageHelpers.h
@@ -0,0 +1,646 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _imageHelpers_h
+#define _imageHelpers_h
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <time.h>
+
+#include "errorHelpers.h"
+
+#include "conversions.h"
+#include "typeWrappers.h"
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include "mt19937.h"
+#include "rounding_mode.h"
+#include "clImageHelper.h"
+
+extern int gTestCount;
+extern int gTestFailure;
+extern cl_device_type gDeviceType;
+
+// Number of iterations per image format to test if not testing max images, rounding, or small images
+#define NUM_IMAGE_ITERATIONS 3
+
+
+// Definition for our own sampler type, to mirror the cl_sampler internals
+#define MAX_sRGB_TO_lRGB_CONVERSION_ERROR 0.5
+#define MAX_lRGB_TO_sRGB_CONVERSION_ERROR 0.6
+
+// Definition for our own sampler type, to mirror the cl_sampler internals
+typedef struct {
+ cl_addressing_mode addressing_mode;
+ cl_filter_mode     filter_mode;
+ bool               normalized_coords;
+} image_sampler_data;
+
+int round_to_even( float v );
+
+#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max :  v * max ) )
+#define NORMALIZE_SIGNED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
+#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v  < -1.0f ? min : ( v > 1.f ? max : v * max ) )
+#define CONVERT_INT( v, min, max, max_val)  ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
+#define CONVERT_UINT( v, max, max_val)  ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
+
+extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
+extern void print_write_header( cl_image_format *format, bool err);
+extern void print_header( cl_image_format *format, bool err );
+extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
+extern bool check_minimum_supported( cl_image_format *formatList, unsigned int numFormats, cl_mem_flags flags );
+
+extern size_t get_format_type_size( const cl_image_format *format );
+extern size_t get_channel_data_type_size( cl_channel_type channelType );
+extern size_t get_format_channel_count( const cl_image_format *format );
+extern size_t get_channel_order_channel_count( cl_channel_order order );
+cl_channel_type  get_channel_type_from_name( const char *name );
+cl_channel_order  get_channel_order_from_name( const char *name );
+extern int    is_format_signed( const cl_image_format *format );
+extern size_t get_pixel_size( cl_image_format *format );
+
+/* Helper to get any ol image format as long as it is 8-bits-per-channel */
+extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+/* Helper to get any ol image format as long as it is 32-bits-per-channel */
+extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+int random_in_range( int minV, int maxV, MTdata d );
+int random_log_in_range( int minV, int maxV, MTdata d );
+
+typedef struct
+{
+    size_t width;
+    size_t height;
+    size_t depth;
+    size_t rowPitch;
+    size_t slicePitch;
+    size_t arraySize;
+    cl_image_format *format;
+    cl_mem buffer;
+    cl_mem_object_type type;
+    cl_uint num_mip_levels;
+} image_descriptor;
+
+typedef struct
+{
+    float p[4];
+}FloatPixel;
+
+void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSize=0);
+extern size_t get_format_max_int( cl_image_format *format );
+
+extern cl_ulong get_image_size( image_descriptor const *imageInfo );
+extern cl_ulong get_image_size_mb( image_descriptor const *imageInfo );
+
+extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
+
+extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                      void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod = 0 );
+
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     float *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
+
+extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                            const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
+
+int has_alpha(cl_image_format *format);
+
+extern bool alpha_is_x(cl_image_format *format);
+
+extern bool is_sRGBA_order(cl_channel_order image_channel_order);
+
+inline float calculate_array_index( float coord, float extent );
+
+cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth);
+cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo);
+size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod);
+
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData, int lod )
+{
+    float convert_half_to_float( unsigned short halfValue );
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth, slice_pitch_lod = 0/*imageInfo->slicePitch*/ , row_pitch_lod = 0/*imageInfo->rowPitch*/;
+    width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod):1;
+
+    if ( imageInfo->type  != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
+        height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod):1;
+
+    if(imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+       depth_lod = ( imageInfo->depth >> lod) ? ( imageInfo->depth >> lod) : 1;
+    row_pitch_lod = (imageInfo->num_mip_levels > 0)? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+    slice_pitch_lod = (imageInfo->num_mip_levels > 0)? (row_pitch_lod * height_lod): imageInfo->slicePitch;
+
+    // correct depth_lod and height_lod for array image types in order to avoid
+    // return
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1 && depth_lod == 1) {
+    depth_lod = 0;
+    height_lod = 0;
+
+    }
+
+    if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1) {
+      depth_lod = 0;
+    }
+
+    if ( x < 0 || x >= (int)width_lod
+               || ( height_lod != 0 && ( y < 0 || y >= (int)height_lod ) )
+               || ( depth_lod != 0 && ( z < 0 || z >= (int)depth_lod ) )
+               || ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
+    {
+        // Border color
+        if (imageInfo->format->image_channel_order == CL_DEPTH)
+        {
+            outData[ 0 ] = 1;
+        }
+        else {
+            outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+            if (!has_alpha(imageInfo->format))
+                outData[3] = 1;
+        }
+        return;
+    }
+
+    cl_image_format *format = imageInfo->format;
+
+    unsigned int i;
+    T tempData[ 4 ];
+
+    // Advance to the right spot
+    char *ptr = (char *)imageData;
+    size_t pixelSize = get_pixel_size( format );
+
+    ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
+
+    // OpenCL only supports reading floats from certain formats
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        {
+            cl_char *dPtr = (cl_char *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SIGNED_INT8:
+        {
+            cl_char *dPtr = (cl_char *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar*)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SNORM_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SIGNED_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)convert_half_to_float( dPtr[ i ] );
+            break;
+        }
+
+        case CL_SIGNED_INT32:
+        {
+            cl_int *dPtr = (cl_int *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT32:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_SHORT_565:
+        {
+            cl_ushort *dPtr = (cl_ushort*)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+#ifdef OBSOLETE_FORMAT
+        case CL_UNORM_SHORT_565_REV:
+        {
+            unsigned short *dPtr = (unsigned short *)ptr;
+            tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_SHORT_555_REV:
+        {
+            unsigned short *dPtr = (unsigned short *)ptr;
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_INT_8888:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
+            break;
+        }
+        case CL_UNORM_INT_8888_REV:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
+            break;
+        }
+
+        case CL_UNORM_INT_101010_REV:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
+            break;
+        }
+#endif
+        case CL_UNORM_SHORT_555:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_INT_101010:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
+            break;
+        }
+
+        case CL_FLOAT:
+        {
+            cl_float *dPtr = (cl_float *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            cl_float *dPtr = (cl_float *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ] + 0x4000;
+            break;
+        }
+#endif
+    }
+
+
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
+
+    if( format->image_channel_order == CL_A )
+    {
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_R   )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_Rx   )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_RA )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 1 ];
+    }
+    else if( format->image_channel_order == CL_RG  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+    }
+    else if( format->image_channel_order == CL_RGx  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+    }
+    else if(( format->image_channel_order == CL_RGB  ) || ( format->image_channel_order == CL_sRGB  ))
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+    }
+    else if(( format->image_channel_order == CL_RGBx  ) || ( format->image_channel_order == CL_sRGBx  ))
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = 0;
+    }
+    else if(( format->image_channel_order == CL_RGBA ) || ( format->image_channel_order == CL_sRGBA ))
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = tempData[ 3 ];
+    }
+    else if( format->image_channel_order == CL_ARGB )
+    {
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if(( format->image_channel_order == CL_BGRA ) || ( format->image_channel_order == CL_sBGRA ))
+    {
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 3 ];
+    }
+    else if( format->image_channel_order == CL_INTENSITY )
+    {
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_LUMINANCE )
+    {
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_DEPTH  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+#ifdef CL_1RGB_APPLE
+    else if( format->image_channel_order == CL_1RGB_APPLE )
+    {
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = 0xff;
+    }
+#endif
+#ifdef CL_BGR1_APPLE
+    else if( format->image_channel_order == CL_BGR1_APPLE )
+    {
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = 0xff;
+    }
+#endif
+    else
+    {
+        log_error("Invalid format:");
+        print_header(format, true);
+    }
+}
+
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData )
+{
+  read_image_pixel<T>( imageData, imageInfo, x, y, z, outData, 0);
+}
+
+// Stupid template rules
+bool get_integer_coords( float x, float y, float z,
+                        size_t width, size_t height, size_t depth,
+                        image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                        int &outX, int &outY, int &outZ );
+bool get_integer_coords_offset( float x, float y, float z,
+                               float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                               size_t width, size_t height, size_t depth,
+                               image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                               int &outX, int &outY, int &outZ );
+
+
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData, int lod )
+{
+    int iX = 0, iY = 0, iZ = 0;
+
+    float max_w = imageInfo->width;
+    float max_h;
+    float max_d;
+
+    switch (imageInfo->type) {
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            max_h = imageInfo->arraySize;
+            max_d = 0;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            max_h = imageInfo->height;
+            max_d = imageInfo->arraySize;
+            break;
+        default:
+            max_h = imageInfo->height;
+            max_d = imageInfo->depth;
+            break;
+    }
+
+    if( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
+    {
+        switch (imageInfo->type) {
+            case CL_MEM_OBJECT_IMAGE3D:
+                max_d = (float)((imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1);
+            case CL_MEM_OBJECT_IMAGE2D:
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                max_h = (float)((imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1);
+                break;
+            default:
+                ;
+
+        }
+        max_w = (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
+    }
+    get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
+
+    read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData, lod );
+}
+
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData)
+{
+  sample_image_pixel_offset<T>( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
+    imageSampler,  outData, 0);
+}
+
+template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, image_sampler_data *imageSampler, T *outData )
+{
+    return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
+}
+
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
+
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
+
+
+extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results,  float *errors );
+
+extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer = false );
+
+// deprecated
+//extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
+
+extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
+extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
+extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
+extern int issubnormal(float);
+
+
+#define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
+
+static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
+{
+    return y > x ? y - x : x - y;
+}
+
+static inline cl_uint abs_diff_int( cl_int x, cl_int y )
+{
+    return (cl_uint) (y > x ? y - x : x - y);
+}
+
+static inline cl_float relative_error( float test, float expected )
+{
+    // 0-0/0 is 0 in this case, not NaN
+    if( test == 0.0f && expected == 0.0f )
+        return 0.0f;
+
+    return (test - expected) / expected;
+}
+
+extern float random_float(float low, float high);
+
+class CoordWalker
+{
+public:
+    CoordWalker( void * coords, bool useFloats, size_t vecSize );
+    ~CoordWalker();
+
+    cl_float    Get( size_t idx, size_t el );
+
+protected:
+    cl_float * mFloatCoords;
+    cl_int * mIntCoords;
+    size_t    mVecSize;
+};
+
+extern int  DetectFloatToHalfRoundingMode( cl_command_queue );  // Returns CL_SUCCESS on success
+
+int inline is_half_nan( cl_ushort half ){ return (half & 0x7fff) > 0x7c00; }
+
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
+
+extern double sRGBmap(float fc);
+
+#endif // _imageHelpers_h
diff --git a/test_common/harness/kernelHelpers.c b/test_common/harness/kernelHelpers.c
new file mode 100644
index 00000000..591ab038
--- /dev/null
+++ b/test_common/harness/kernelHelpers.c
@@ -0,0 +1,1290 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include "imageHelpers.h"
+#include "typeWrappers.h"
+#include "testHarness.h"
+#include "parseParameters.h"
+
+#include <vector>
+#include <string>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+
+#if defined(__MINGW32__)
+#include "mingw_compat.h"
+#endif
+
+#if defined(_WIN32)
+std::string slash = "\\";
+#else
+std::string slash = "/";
+#endif
+
+std::string get_file_name(const std::string &baseName, int index, const std::string &extension)
+{
+    std::ostringstream fileName;
+    fileName << baseName << "." << index << extension;
+    return fileName.str();
+}
+
+long get_file_size(const std::string &fileName)
+{
+    std::ifstream ifs(fileName.c_str(), std::ios::binary);
+    if (!ifs.good())
+        return 0;
+    // get length of file:
+    ifs.seekg(0, std::ios::end);
+    std::ios::pos_type length = ifs.tellg();
+    return static_cast<long>(length);
+}
+
+std::vector<char> get_file_content(const std::string &fileName)
+{
+    std::ifstream ifs(fileName.c_str(), std::ios::binary);
+    if (!ifs.good())
+        return std::vector<char>(0);
+    // get length of file:
+    ifs.seekg(0, std::ios::end);
+    std::ios::pos_type length = ifs.tellg();
+    ifs.seekg(0, std::ios::beg);
+
+    // allocate memory:
+    std::vector<char> content(static_cast<size_t>(length));
+
+    // read data as a block:
+    ifs.read(&content[0], length);
+    return content;
+}
+
+std::string get_kernel_name(const std::string &source)
+{
+    cl_uint crc = 0;
+
+    // Count CRC
+    for (cl_uint i = 0; i < source.size() - source.size() % sizeof(cl_uint); i += sizeof(cl_uint))
+    {
+        cl_uint *ptr = (cl_uint *)&source[i];
+        crc += *ptr;
+    }
+    cl_uint remainder = 0;
+    memcpy(&remainder, &source[0] + source.size() - source.size() % sizeof(cl_uint), source.size() % sizeof(cl_uint));
+    crc += remainder;
+
+    // Create list of kernel names
+    std::string kernelsList;
+    size_t kPos = source.find("kernel");
+    while (kPos != std::string::npos)
+    {
+        // check for '__kernel'
+        size_t pos = kPos;
+        if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
+            pos -= 2;
+
+        //check character before 'kernel' (white space expected)
+        size_t wsPos = source.find_last_of(" \t\r\n", pos);
+        if (wsPos == std::string::npos || wsPos + 1 == pos)
+        {
+            //check character after 'kernel' (white space expected)
+            size_t akPos = kPos + sizeof("kernel") - 1;
+            wsPos = source.find_first_of(" \t\r\n", akPos);
+            if (!(wsPos == akPos))
+            {
+                kPos = source.find("kernel", kPos + 1);
+                continue;
+            }
+
+            bool attributeFound;
+            do
+            {
+                attributeFound = false;
+                // find '(' after kernel name name
+                size_t pPos = source.find("(", akPos);
+                if (!(pPos != std::string::npos))
+                    continue;
+
+                // check for not empty kernel name before '('
+                pos = source.find_last_not_of(" \t\r\n", pPos - 1);
+                if (!(pos != std::string::npos && pos > akPos))
+                    continue;
+
+                //find character before kernel name
+                wsPos = source.find_last_of(" \t\r\n", pos);
+                if (!(wsPos != std::string::npos && wsPos >= akPos))
+                    continue;
+
+                std::string name = source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
+                //check for kernel attribute
+                if (name == "__attribute__")
+                {
+                    attributeFound = true;
+                    int pCount = 1;
+                    akPos = pPos + 1;
+                    while (pCount > 0 && akPos != std::string::npos)
+                    {
+                        akPos = source.find_first_of("()", akPos + 1);
+                        if (akPos != std::string::npos)
+                        {
+                            if (source[akPos] == '(')
+                                pCount++;
+                            else
+                                pCount--;
+                        }
+                    }
+                }
+                else
+                {
+                    kernelsList += name + ".";
+                }
+            } while (attributeFound);
+        }
+        kPos = source.find("kernel", kPos + 1);
+    }
+    std::ostringstream oss;
+    if (MAX_LEN_FOR_KERNEL_LIST > 0)
+    {
+        if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
+        {
+            kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
+            kernelsList[kernelsList.size() - 1] = '.';
+            kernelsList[kernelsList.size() - 1] = '.';
+        }
+        oss << kernelsList;
+    }
+    oss << std::hex << std::setfill('0') << std::setw(8) << crc;
+    return oss.str();
+}
+
+std::string add_build_options(const std::string &baseName, const char *options)
+{
+    if (options == 0 || options[0] == 0)
+        return get_file_name(baseName, 0, "");
+
+    bool equal = false;
+    int i = 0;
+
+    do
+    {
+        i++;
+        std::string fileName = gSpirVPath + slash + get_file_name(baseName, i, ".options");
+        long fileSize = get_file_size(fileName);
+        if (fileSize == 0)
+            break;
+        //if(fileSize == strlen(options))
+        {
+            std::vector<char> options2 = get_file_content(fileName);
+            options2.push_back(0); //terminate string
+            equal = strcmp(options, &options2[0]) == 0;
+        }
+    } while (!equal);
+    if (equal)
+        return get_file_name(baseName, i, "");
+
+    std::string fileName = gSpirVPath + slash + get_file_name(baseName, i, ".options");
+    std::ofstream ofs(fileName.c_str(), std::ios::binary);
+    if (!ofs.good())
+    {
+        log_info("OfflineCompiler: can't create options: %s\n", fileName.c_str());
+        return "";
+    }
+    // write data as a block:
+    ofs.write(options, strlen(options));
+    log_info("OfflineCompiler: options added: %s\n", fileName.c_str());
+    return get_file_name(baseName, i, "");
+}
+
+int create_single_kernel_helper_create_program(cl_context context,
+                                               cl_program *outProgram,
+                                               unsigned int numKernelLines,
+                                               const char **kernelProgram,
+                                               const char *buildOptions,
+                                               const bool openclCXX)
+{
+    int error = CL_SUCCESS;
+    std::string modifiedKernelStr;
+    const char* modifiedKernelCode;
+
+    if (gOfflineCompiler)
+    {
+        #ifndef CL_OFFLINE_COMPILER
+            log_error("Offline compilation is not possible: CL_OFFLINE_COMPILER was not defined.\n");
+            return -1;
+        #endif // !CL_OFFLINE_COMPILER
+
+        std::string kernel;
+        for (size_t i = 0; i < numKernelLines; ++i)
+        {
+            std::string chunk(kernelProgram[i], 0, std::string::npos);
+            kernel += chunk;
+        }
+
+        std::string kernelName = get_kernel_name(kernel);
+
+        // set build options
+        std::string bOptions;
+        bOptions += buildOptions ? std::string(buildOptions) : "";
+
+        kernelName = add_build_options(kernelName, buildOptions);
+
+        std::string gOfflineCompilerInput = gSpirVPath + slash + kernelName + ".cl";
+        std::string gOfflineCompilerOutput = gSpirVPath + slash + kernelName;
+
+        // Get device CL_DEVICE_ADDRESS_BITS
+        cl_uint device_address_space_size = 0;
+        if (gOfflineCompilerOutputType == kSpir_v)
+        {
+            cl_device_id device;
+            cl_uint numDevices = 0;
+            cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, 0);
+            if (error != CL_SUCCESS)
+            {
+                print_error(error, "clGetContextInfo failed");
+                return error;
+            }
+
+            std::vector<cl_device_id> devices(numDevices, 0);
+            error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices*sizeof(cl_device_id), &devices[0], 0);
+            if (error != CL_SUCCESS)
+            {
+                print_error(error, "clGetContextInfo failed");
+                return error;
+            }
+
+            error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices*sizeof(cl_device_id), &devices[0], NULL);
+            if (error != CL_SUCCESS)
+            {
+                print_error(error, "clGetContextInfo failed");
+                return error;
+            }
+
+            if ((0 == device_address_space_size) && ((error = clGetDeviceInfo(devices[0], CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &device_address_space_size, NULL))))
+            {
+                print_error(error, "Unable to obtain device address bits");
+                return -1;
+            }
+
+            if (device_address_space_size == 32)
+            {
+                gOfflineCompilerOutput += ".spv32";
+            }
+            else if (device_address_space_size == 64)
+            {
+                gOfflineCompilerOutput += ".spv64";
+            }
+        }
+
+        // try to read cached output file when test is run with gForceSpirVGenerate = false
+        std::ifstream ifs(gOfflineCompilerOutput.c_str(), std::ios::binary);
+        if (!ifs.good() || gForceSpirVGenerate)
+        {
+            if (gForceSpirVCache)
+            {
+                log_info("OfflineCompiler: can't open cached SpirV file: %s\n", gOfflineCompilerOutput.c_str());
+                return -1;
+            }
+
+            ifs.close();
+
+            if (!gForceSpirVGenerate)
+                log_info("OfflineCompiler: can't find cached SpirV file: %s\n", gOfflineCompilerOutput.c_str());
+
+            std::ofstream ofs(gOfflineCompilerInput.c_str(), std::ios::binary);
+            if (!ofs.good())
+            {
+                log_info("OfflineCompiler: can't create source file: %s\n", gOfflineCompilerInput.c_str());
+                return -1;
+            }
+
+            // write source to input file
+            ofs.write(kernel.c_str(), kernel.size());
+            ofs.close();
+
+            // Set compiler options
+            // Emit SPIR-V
+            std::string compilerOptions = " -cc1 -emit-spirv";
+            // <triple>: for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V use spir64-unknown-unknown.
+            if(device_address_space_size == 32)
+            {
+                compilerOptions += " -triple=spir-unknown-unknown";
+            }
+            else
+            {
+                compilerOptions += " -triple=spir64-unknown-unknown";
+            }
+            // Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by Khronos)
+            if(openclCXX)
+            {
+                compilerOptions = compilerOptions + " -cl-std=c++";
+            }
+            // Set correct includes
+            if(openclCXX)
+            {
+                compilerOptions += " -I ";
+                compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR);
+            }
+            else
+            {
+                compilerOptions += " -include opencl.h";
+            }
+
+            #ifdef CL_OFFLINE_COMPILER_OPTIONS
+            compilerOptions += STRINGIFY_VALUE(CL_OFFLINE_COMPILER_OPTIONS);
+            #endif
+
+            // Add build options passed to this function
+            compilerOptions += " " + bOptions;
+            compilerOptions +=
+                " " + gOfflineCompilerInput +
+                " -o " + gOfflineCompilerOutput;
+            std::string runString = STRINGIFY_VALUE(CL_OFFLINE_COMPILER) + compilerOptions;
+
+            // execute script
+            log_info("Executing command: %s\n", runString.c_str());
+            fflush(stdout);
+            int returnCode = system(runString.c_str());
+            if (returnCode != 0)
+            {
+                log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
+                return CL_COMPILE_PROGRAM_FAILURE;
+            }
+            // read output file
+            ifs.open(gOfflineCompilerOutput.c_str(), std::ios::binary);
+            if (!ifs.good())
+            {
+                log_info("OfflineCompiler: can't read output file: %s\n", gOfflineCompilerOutput.c_str());
+                return -1;
+            }
+        }
+
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        // Only OpenCL C++ to SPIR-V compilation
+        #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        if(openclCXX)
+        {
+            return CL_SUCCESS;
+        }
+        #endif
+
+        ifs.seekg(0, ifs.end);
+        int length = ifs.tellg();
+        ifs.seekg(0, ifs.beg);
+
+        //treat modifiedProgram as input for clCreateProgramWithSource
+        if (gOfflineCompilerOutputType == kSource)
+        {
+            // read source from file:
+            std::vector<char> modifiedKernelBuf(length);
+
+            ifs.read(&modifiedKernelBuf[0], length);
+            ifs.close();
+
+            for (int i = 0; i < length; i++)
+                modifiedKernelStr.push_back(modifiedKernelBuf[i]);
+
+            modifiedKernelCode = &modifiedKernelStr[0];
+
+            /* Create the program object from source - to be removed in the future as we will use offline compiler here */
+            *outProgram = clCreateProgramWithSource(context, numKernelLines, &modifiedKernelCode, NULL, &error);
+            if (*outProgram == NULL || error != CL_SUCCESS)
+            {
+                print_error(error, "clCreateProgramWithSource failed");
+                return error;
+            }
+        }
+        //treat modifiedProgram as input for clCreateProgramWithBinary
+        else if (gOfflineCompilerOutputType == kBinary)
+        {
+            // read binary from file:
+            std::vector<unsigned char> modifiedKernelBuf(length);
+
+            ifs.read((char *)&modifiedKernelBuf[0], length);
+            ifs.close();
+
+            cl_uint numDevices = 0;
+            cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, 0);
+            test_error(error, "clGetContextInfo failed");
+
+            std::vector<cl_device_id> devices(numDevices, 0);
+            error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices*sizeof(cl_device_id), &devices[0], 0);
+            test_error(error, "clGetContextInfo failed");
+
+            size_t lengths = modifiedKernelBuf.size();
+            const unsigned char *binaries = { &modifiedKernelBuf[0] };
+            log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithBinary\n");
+            *outProgram = clCreateProgramWithBinary(context, 1, &devices[0], &lengths, &binaries, NULL, &error);
+            if (*outProgram == NULL || error != CL_SUCCESS)
+            {
+                print_error(error, "clCreateProgramWithBinary failed");
+                return error;
+            }
+        }
+        //treat modifiedProgram as input for clCreateProgramWithIL
+        else if (gOfflineCompilerOutputType == kSpir_v)
+        {
+            // read spir-v from file:
+            std::vector<unsigned char> modifiedKernelBuf(length);
+
+            ifs.read((char *)&modifiedKernelBuf[0], length);
+            ifs.close();
+
+            size_t length = modifiedKernelBuf.size();
+            log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithIL\n");
+
+            *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0], length, &error);
+            if (*outProgram == NULL || error != CL_SUCCESS)
+            {
+                print_error(error, "clCreateProgramWithIL failed");
+                return error;
+            }
+        }
+    }
+    else // gOfflineCompiler == false
+    {
+        /* Create the program object from source */
+        *outProgram = clCreateProgramWithSource(context, numKernelLines, kernelProgram, NULL, &error);
+        if (*outProgram == NULL || error != CL_SUCCESS)
+        {
+            print_error(error, "clCreateProgramWithSource failed");
+            return error;
+        }
+    }
+    return 0;
+}
+
+int create_single_kernel_helper_with_build_options(cl_context context,
+                                                   cl_program *outProgram,
+                                                   cl_kernel *outKernel,
+                                                   unsigned int numKernelLines,
+                                                   const char **kernelProgram,
+                                                   const char *kernelName,
+                                                   const char *buildOptions,
+                                                   const bool openclCXX)
+{
+    return create_single_kernel_helper(context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, buildOptions, openclCXX);
+}
+
+// Creates and builds OpenCL C/C++ program, and creates a kernel
+int create_single_kernel_helper(cl_context context,
+                                cl_program *outProgram,
+                                cl_kernel *outKernel,
+                                unsigned int numKernelLines,
+                                const char **kernelProgram,
+                                const char *kernelName,
+                                const char *buildOptions,
+                                const bool openclCXX)
+{
+    int error;
+    // Create OpenCL C++ program
+    if(openclCXX)
+    {
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    // Only OpenCL C++ to SPIR-V compilation
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        // Save global variable
+        bool tempgForceSpirVGenerate = gForceSpirVGenerate;
+        // Force OpenCL C++ -> SPIR-V compilation on every run
+        gForceSpirVGenerate = true;
+    #endif
+        error = create_openclcpp_program(
+            context, outProgram, numKernelLines, kernelProgram, buildOptions
+        );
+        if (error != CL_SUCCESS)
+        {
+            log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+            return error;
+        }
+    // -----------------------------------------------------------------------------------
+    // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+    // -----------------------------------------------------------------------------------
+    #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        // Restore global variables
+        gForceSpirVGenerate = tempgForceSpirVGenerate;
+        log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n", kernelName);
+        return error;
+    #endif
+    }
+    // Create OpenCL C program
+    else
+    {
+        error = create_single_kernel_helper_create_program(
+            context, outProgram, numKernelLines, kernelProgram, buildOptions
+        );
+        if (error != CL_SUCCESS)
+        {
+            log_error("Create program failed: %d, line: %d\n", error, __LINE__);
+            return error;
+        }
+    }
+    // Remove offline-compiler-only build options
+    std::string newBuildOptions;
+    if (buildOptions != NULL)
+    {
+        newBuildOptions = buildOptions;
+        std::string offlineCompierOptions[] = {
+            "-cl-fp16-enable",
+            "-cl-fp64-enable",
+            "-cl-zero-init-local-mem-vars"
+        };
+        for(auto& s : offlineCompierOptions)
+        {
+            std::string::size_type i = newBuildOptions.find(s);
+            if (i != std::string::npos)
+                newBuildOptions.erase(i, s.length());
+        }
+    }
+    // Build program and create kernel
+    return build_program_create_kernel_helper(
+        context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, newBuildOptions.c_str()
+    );
+}
+
+// Creates OpenCL C++ program
+int create_openclcpp_program(cl_context context,
+                             cl_program *outProgram,
+                             unsigned int numKernelLines,
+                             const char **kernelProgram,
+                             const char *buildOptions)
+{
+    // Save global variables
+    bool tempgOfflineCompiler = gOfflineCompiler;
+    OfflineCompilerOutputType tempgOfflineCompilerOutputType = gOfflineCompilerOutputType;
+    // Force offline compilation to SPIR-V
+    gOfflineCompiler = true;
+    gOfflineCompilerOutputType = OfflineCompilerOutputType::kSpir_v;
+    // Create program
+    int error = create_single_kernel_helper_create_program(
+        context, outProgram, numKernelLines, kernelProgram, buildOptions, true
+    );
+    // Restore global variable
+    gOfflineCompiler = tempgOfflineCompiler;
+    gOfflineCompilerOutputType = tempgOfflineCompilerOutputType;
+    // Return result
+    return error;
+}
+
+// Builds OpenCL C/C++ program and creates
+int build_program_create_kernel_helper(cl_context context,
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions)
+{
+    int error;
+    /* Compile the program */
+    int buildProgramFailed = 0;
+    int printedSource = 0;
+    error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
+    if (error != CL_SUCCESS)
+    {
+        unsigned int i;
+        print_error(error, "clBuildProgram failed");
+        buildProgramFailed = 1;
+        printedSource = 1;
+        log_error("Build options: %s\n", buildOptions);
+        log_error("Original source is: ------------\n");
+        for (i = 0; i < numKernelLines; i++)
+            log_error("%s", kernelProgram[i]);
+    }
+
+    // Verify the build status on all devices
+    cl_uint deviceCount = 0;
+    error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES, sizeof(deviceCount), &deviceCount, NULL);
+    if (error != CL_SUCCESS)
+    {
+        print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
+        return error;
+    }
+
+    if (deviceCount == 0)
+    {
+        log_error("No devices found for program.\n");
+        return -1;
+    }
+
+    cl_device_id *devices = (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
+    if (NULL == devices)
+        return -1;
+    BufferOwningPtr<cl_device_id> devicesBuf(devices);
+
+    memset(devices, 0, deviceCount * sizeof(cl_device_id));
+    error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * deviceCount, devices, NULL);
+    if (error != CL_SUCCESS)
+    {
+        print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
+        return error;
+    }
+
+    cl_uint z;
+    bool buildFailed = false;
+    for (z = 0; z < deviceCount; z++)
+    {
+        char deviceName[4096] = "";
+        error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
+        if (error != CL_SUCCESS || deviceName[0] == '\0')
+        {
+            log_error("Device \"%d\" failed to return a name\n", z);
+            print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
+        }
+
+        cl_build_status buildStatus;
+        error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
+        if (error != CL_SUCCESS)
+        {
+            print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+            return error;
+        }
+
+        if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed && deviceCount == 1)
+        {
+            buildFailed = true;
+            log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
+        }
+
+        if (buildStatus != CL_BUILD_SUCCESS)
+        {
+
+            char statusString[64] = "";
+            if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
+                sprintf(statusString, "CL_BUILD_SUCCESS");
+            else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
+                sprintf(statusString, "CL_BUILD_NONE");
+            else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
+                sprintf(statusString, "CL_BUILD_ERROR");
+            else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
+                sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+            else
+                sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+
+            if (buildStatus != CL_BUILD_SUCCESS)
+                log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+            size_t paramSize = 0;
+            error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
+            if (error != CL_SUCCESS)
+            {
+
+                print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+                return error;
+            }
+
+            std::string log;
+            log.resize(paramSize / sizeof(char));
+            error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
+            if (error != CL_SUCCESS || log[0] == '\0')
+            {
+                log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
+                if (error)
+                {
+                    print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+                    return error;
+                }
+                else
+                {
+                    log_error("clGetProgramBuildInfo returned an empty log.\n");
+                    return -1;
+                }
+            }
+            // In this case we've already printed out the code above.
+            if (!printedSource)
+            {
+                unsigned int i;
+                log_error("Original source is: ------------\n");
+                for (i = 0; i < numKernelLines; i++)
+                    log_error("%s", kernelProgram[i]);
+                printedSource = 1;
+            }
+            log_error("Build log for device \"%s\" is: ------------\n", deviceName);
+            log_error("%s\n", log.c_str());
+            log_error("\n----------\n");
+            return -1;
+        }
+    }
+
+    if (buildFailed)
+    {
+        return -1;
+    }
+
+    /* And create a kernel from it */
+    if (kernelName != NULL)
+    {
+        *outKernel = clCreateKernel(*outProgram, kernelName, &error);
+        if (*outKernel == NULL || error != CL_SUCCESS)
+        {
+            print_error(error, "Unable to create kernel");
+            return error;
+        }
+    }
+
+    return 0;
+}
+
+int get_device_version( cl_device_id id, size_t* major, size_t* minor)
+{
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    char *p2;
+    while( *p1 == ' ' )
+        p1++;
+    *major = strtol( p1, &p2, 10 );
+    error = *p2 != '.';
+    test_error(error, "ERROR: Version number must contain a decimal point!");
+    *minor = strtol( ++p2, NULL, 10 );
+    return error;
+}
+
+int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
+{
+    cl_device_id *devices;
+    size_t size, maxCommonSize = 0;
+    int numDevices, i, j, error;
+  cl_uint numDims;
+    size_t outSize;
+  size_t sizeLimit[]={1,1,1};
+
+
+    /* Assume fewer than 16 devices will be returned */
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
+  test_error( error, "Unable to obtain list of devices size for context" );
+  devices = (cl_device_id *)malloc(outSize);
+  BufferOwningPtr<cl_device_id> devicesBuf(devices);
+
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
+  test_error( error, "Unable to obtain list of devices for context" );
+
+    numDevices = (int)( outSize / sizeof( cl_device_id ) );
+
+    for( i = 0; i < numDevices; i++ )
+    {
+        error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device" );
+        if( size < maxCommonSize || maxCommonSize == 0)
+            maxCommonSize = size;
+
+        error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+        if( size < maxCommonSize  || maxCommonSize == 0)
+            maxCommonSize = size;
+
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
+    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    sizeLimit[0] = 1;
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
+        test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+        if (outLimits != NULL)
+        {
+      if (i == 0) {
+        for (j=0; j<3; j++)
+          outLimits[j] = sizeLimit[j];
+      } else {
+        for (j=0; j<(int)numDims; j++) {
+          if (sizeLimit[j] < outLimits[j])
+            outLimits[j] = sizeLimit[j];
+        }
+      }
+    }
+    }
+
+    *outMaxSize = (unsigned int)maxCommonSize;
+    return 0;
+}
+
+
+int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t globalThreadSize, size_t *outMaxSize )
+{
+  size_t sizeLimit[3];
+    int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+
+    /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
+    /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
+     the modulo test will succeed and break the loop anyway */
+    for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
+        ;
+    return 0;
+}
+
+int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t *globalThreadSizes, size_t *outMaxSizes )
+{
+  size_t sizeLimit[3];
+    size_t maxSize;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+       sizes */
+
+    /* Simple case */
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
+    {
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] &&  globalThreadSizes[ 1 ] <= sizeLimit[1]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      return 0;
+    }
+    }
+
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<2; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
+    remainingSize = maxSize;
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
+
+    return 0;
+}
+
+int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
+                                      size_t *globalThreadSizes, size_t *outMaxSizes )
+{
+  size_t sizeLimit[3];
+    size_t maxSize;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+     sizes */
+
+    /* Simple case */
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
+    {
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
+      return 0;
+    }
+    }
+
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<3; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
+    remainingSize = maxSize;
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
+
+    return 0;
+}
+
+/* Helper to determine if an extension is supported by a device */
+int is_extension_available( cl_device_id device, const char *extensionName )
+{
+    char *extString;
+    size_t size = 0;
+    int err;
+    int result = 0;
+
+    if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
+    {
+        log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        return 0;
+    }
+
+    if( 0 == size )
+        return 0;
+
+    extString = (char*) malloc( size );
+
+    if( NULL == extString )
+    {
+        log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__,  err );
+        return 0;
+    }
+    BufferOwningPtr<char> extStringBuf(extString);
+
+    if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
+    {
+        log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        return 0;
+    }
+
+    if( strstr( extString, extensionName ) )
+        result = 1;
+
+    return result;
+}
+
+/* Helper to determine if a device supports an image format */
+int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
+{
+    cl_image_format *list;
+    cl_uint count = 0;
+    cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
+    if( count == 0 )
+        return 0;
+
+    list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
+    if( NULL == list )
+    {
+        log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__,  err );
+        return 0;
+    }
+    BufferOwningPtr<cl_image_format> listBuf(list);
+
+
+    cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
+    if( error )
+    {
+        log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        return 0;
+    }
+
+    // iterate looking for a match.
+    cl_uint i;
+    for( i = 0; i < count; i++ )
+    {
+        if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
+            fmt->image_channel_order == list[ i ].image_channel_order )
+            break;
+    }
+
+    return ( i < count ) ? 1 : 0;
+}
+
+size_t get_pixel_bytes( const cl_image_format *fmt );
+size_t get_pixel_bytes( const cl_image_format *fmt )
+{
+    size_t chanCount;
+    switch( fmt->image_channel_order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+        case CL_DEPTH:
+            chanCount = 1;
+            break;
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+            chanCount = 2;
+            break;
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_sRGB:
+        case CL_sRGBx:
+            chanCount = 3;
+            break;
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+        case CL_sBGRA:
+        case CL_sRGBA:
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+#endif
+            chanCount = 4;
+            break;
+        default:
+            log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
+            abort();
+            break;
+    }
+
+    switch( fmt->image_channel_data_type )
+    {
+          case CL_UNORM_SHORT_565:
+          case CL_UNORM_SHORT_555:
+            return 2;
+
+          case CL_UNORM_INT_101010:
+            return 4;
+
+          case CL_SNORM_INT8:
+          case CL_UNORM_INT8:
+          case CL_SIGNED_INT8:
+          case CL_UNSIGNED_INT8:
+            return chanCount;
+
+          case CL_SNORM_INT16:
+          case CL_UNORM_INT16:
+          case CL_HALF_FLOAT:
+          case CL_SIGNED_INT16:
+          case CL_UNSIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+          case CL_SFIXED14_APPLE:
+#endif
+            return chanCount * 2;
+
+          case CL_SIGNED_INT32:
+          case CL_UNSIGNED_INT32:
+          case CL_FLOAT:
+            return chanCount * 4;
+
+        default:
+            log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
+            abort();
+    }
+
+    return 0;
+}
+
+int verifyImageSupport( cl_device_id device )
+{
+    if( checkForImageSupport( device ) )
+    {
+        log_error( "ERROR: Device does not supported images as required by this test!\n" );
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+    return 0;
+}
+
+int checkForImageSupport( cl_device_id device )
+{
+    cl_uint i;
+    int error;
+
+
+    /* Check the device props to see if images are supported at all first */
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
+    {
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    /* So our support is good */
+    return 0;
+}
+
+int checkFor3DImageSupport( cl_device_id device )
+{
+    cl_uint i;
+    int error;
+
+    /* Check the device props to see if images are supported at all first */
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
+    {
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    char profile[128];
+    error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
+    test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
+    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
+    {
+        size_t width = -1L;
+        size_t height = -1L;
+        size_t depth = -1L;
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
+
+        if( 0 == (height | width | depth ))
+            return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    /* So our support is good */
+    return 0;
+}
+
+void * align_malloc(size_t size, size_t alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    return _aligned_malloc(size, alignment);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
+    // alignemnt must be a power of two and multiple of sizeof(void *).
+    if ( alignment < sizeof( void * ) )
+    {
+        alignment = sizeof( void * );
+    }
+#if defined(__ANDROID__)
+    ptr = memalign(alignment, size);
+    if ( ptr )
+        return ptr;
+#else
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
+#endif
+    return NULL;
+#elif defined(__MINGW32__)
+    return __mingw_aligned_malloc(size, alignment);
+#else
+    #error "Please add support OS for aligned malloc"
+#endif
+}
+
+void   align_free(void * ptr)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    _aligned_free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
+#elif defined(__MINGW32__)
+    return __mingw_aligned_free(ptr);
+#else
+    #error "Please add support OS for aligned free"
+#endif
+}
+
+size_t get_min_alignment(cl_context context)
+{
+    static cl_uint align_size = 0;
+
+    if( 0 == align_size )
+    {
+        cl_device_id * devices;
+        size_t devices_size = 0;
+        cl_uint result = 0;
+        cl_int error;
+        int i;
+
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  0,
+                                  NULL,
+                                  &devices_size);
+        test_error_ret(error, "clGetContextInfo failed", 0);
+
+        devices = (cl_device_id*)malloc(devices_size);
+        if (devices == NULL) {
+            print_error( error, "malloc failed" );
+            return 0;
+        }
+
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  devices_size,
+                                  (void*)devices,
+                                  NULL);
+        test_error_ret(error, "clGetContextInfo failed", 0);
+
+        for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
+        {
+            cl_uint alignment = 0;
+
+            error = clGetDeviceInfo (devices[i],
+                                     CL_DEVICE_MEM_BASE_ADDR_ALIGN,
+                                     sizeof(cl_uint),
+                                     (void*)&alignment,
+                                     NULL);
+
+            if (error == CL_SUCCESS)
+            {
+                alignment >>= 3;    // convert bits to bytes
+                result = (alignment > result) ? alignment : result;
+            }
+            else
+                print_error( error, "clGetDeviceInfo failed" );
+        }
+
+        align_size = result;
+        free(devices);
+    }
+
+    return align_size;
+}
+
+cl_device_fp_config get_default_rounding_mode( cl_device_id device )
+{
+    char profileStr[128] = "";
+    cl_device_fp_config single = 0;
+    int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
+    if( error )
+        test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
+
+    if( single & CL_FP_ROUND_TO_NEAREST )
+        return CL_FP_ROUND_TO_NEAREST;
+
+    if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
+        test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
+
+    // Make sure we are an embedded device before allowing a pass
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
+        test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
+
+    if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
+        test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
+
+    return CL_FP_ROUND_TO_ZERO;
+}
+
+int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
+{
+    cl_command_queue_properties realProps;
+    cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( realProps ), &realProps, NULL );
+    test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
+
+    return ( realProps & prop ) ? 1 : 0;
+}
+
+int printDeviceHeader( cl_device_id device )
+{
+    char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
+    int error;
+
+    error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
+    test_error( error, "Unable to get CL_DEVICE_NAME for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
+
+    log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
+             deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
+             ( error == CL_SUCCESS ) ? cLangVersion : "" );
+
+    return CL_SUCCESS;
+}
diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h
new file mode 100644
index 00000000..b39113ec
--- /dev/null
+++ b/test_common/harness/kernelHelpers.h
@@ -0,0 +1,172 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _kernelHelpers_h
+#define _kernelHelpers_h
+
+// Configuration
+#include "../config.hpp"
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined (__MINGW32__)
+#include <malloc.h>
+#endif
+
+#include <string.h>
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/*
+ *  The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
+ *
+ *  const char *source = {
+ *      INIT_OPENCL_DEBUG_INFO
+ *      "__kernel void foo( int x )\n"
+ *      "{\n"
+ *      "   ...\n"
+ *      "}\n"
+ *  };
+ */
+#define INIT_OPENCL_DEBUG_INFO                      SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
+#define SET_OPENCL_LINE_INFO(_line, _file)          "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
+#ifndef STRINGIFY_VALUE
+    #define STRINGIFY_VALUE(_x)                     STRINGIFY(_x)
+#endif
+#ifndef STRINGIFY
+    #define STRINGIFY(_x)                           #_x
+#endif
+
+const int MAX_LEN_FOR_KERNEL_LIST = 20;
+
+/* Helper that creates a single program and kernel from a single-kernel program source */
+extern int create_single_kernel_helper(cl_context context, 
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions = NULL,
+                                       const bool openclCXX = false);
+
+extern int create_single_kernel_helper_with_build_options(cl_context context, 
+                                                          cl_program *outProgram,
+                                                          cl_kernel *outKernel,
+                                                          unsigned int numKernelLines,
+                                                          const char **kernelProgram,
+                                                          const char *kernelName,
+                                                          const char *buildOptions,
+                                                          const bool openclCXX = false);
+
+extern int create_single_kernel_helper_create_program(cl_context context, 
+                                                      cl_program *outProgram,
+                                                      unsigned int numKernelLines,
+                                                      const char **kernelProgram,
+                                                      const char *buildOptions = NULL,
+                                                      const bool openclCXX = false);
+                                                      
+/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++ program. */
+extern int create_openclcpp_program(cl_context context, 
+                                    cl_program *outProgram,
+                                    unsigned int numKernelLines,
+                                    const char **kernelProgram,
+                                    const char *buildOptions = NULL);
+
+/* Builds program (outProgram) and creates one kernel */
+int build_program_create_kernel_helper(cl_context context,
+                                       cl_program *outProgram,
+                                       cl_kernel *outKernel,
+                                       unsigned int numKernelLines,
+                                       const char **kernelProgram,
+                                       const char *kernelName,
+                                       const char *buildOptions = NULL);
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to get major/minor number for a device */
+extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
+
+/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
+extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
+
+/* Helper to determine if an extension is supported by a device */
+extern int is_extension_available( cl_device_id device, const char *extensionName );
+
+/* Helper to determine if a device supports an image format */
+extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
+
+/* Helper to get pixel size for a pixel format */
+size_t get_pixel_bytes( const cl_image_format *fmt );
+
+/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
+extern int verifyImageSupport( cl_device_id device );
+
+/* Checks that the given device supports images. Same as verify, but doesn't print an error */
+extern int checkForImageSupport( cl_device_id device );
+extern int checkFor3DImageSupport( cl_device_id device );
+
+/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
+extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
+
+/* Helper for aligned memory allocation */
+void * align_malloc(size_t size, size_t alignment);
+void   align_free(void *);
+
+/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
+size_t get_min_alignment(cl_context context);
+
+/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
+cl_device_fp_config get_default_rounding_mode( cl_device_id device );
+
+#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device )    \
+    if( checkForImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )    \
+    if( checkFor3DImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+/* Prints out the standard device header for all tests given the device to print for */
+extern int printDeviceHeader( cl_device_id device );
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // _kernelHelpers_h
diff --git a/test_common/harness/mingw_compat.c b/test_common/harness/mingw_compat.c
new file mode 100644
index 00000000..54c44635
--- /dev/null
+++ b/test_common/harness/mingw_compat.c
@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined(__MINGW32__)
+
+#include "mingw_compat.h"
+#include <stdio.h>
+#include <string.h>
+
+//This function is unavailable on various mingw compilers,
+//especially 64 bit so implementing it here
+const char *basename_dot=".";
+char*
+basename(char *path)
+{
+    char *p = path, *b = NULL;
+    int len = strlen(path);
+
+    if (path == NULL) {
+        return (char*)basename_dot;
+    }
+
+    // Not absolute path on windows
+    if (path[1] != ':') {
+        return path;
+    }
+
+    // Trim trailing path seperators
+    if (path[len - 1]  == '\\' ||
+        path[len - 1]  == '/' ) {
+        len--;
+        path[len] = '\0';
+    }
+
+    while (len) {
+        while((*p != '\\' || *p != '/')  && len) {
+            p++;
+            len--;
+        }
+        p++;
+        b = p;
+     }
+
+     return b;
+}
+
+#endif
\ No newline at end of file
diff --git a/test_common/harness/mingw_compat.h b/test_common/harness/mingw_compat.h
new file mode 100644
index 00000000..ab28f398
--- /dev/null
+++ b/test_common/harness/mingw_compat.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef MINGW_COMPAT_H
+#define MINGW_COMPAT_H
+
+#if defined(__MINGW32__)
+char *basename(char *path);
+#include <malloc.h>
+
+#if defined(__MINGW64__)
+//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
+#define __mingw_aligned_malloc _aligned_malloc
+#define __mingw_aligned_free _aligned_free
+#include <stddef.h>
+#endif //(__MINGW64__)
+
+#endif //(__MINGW32__)
+#endif // MINGW_COMPAT_H
diff --git a/test_common/harness/msvc9.c b/test_common/harness/msvc9.c
new file mode 100644
index 00000000..3b168453
--- /dev/null
+++ b/test_common/harness/msvc9.c
@@ -0,0 +1,774 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+
+#if defined ( _MSC_VER )
+
+#include <limits.h>
+#include <stdlib.h>
+
+#include <CL/cl.h>
+
+#include <windows.h>
+
+#if ! defined( __INTEL_COMPILER )
+
+///////////////////////////////////////////////////////////////////
+//
+//                   rint, rintf
+//
+///////////////////////////////////////////////////////////////////
+
+float copysignf( float x, float y )
+{
+    union{ cl_uint u; float f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
+
+    return ux.f;
+}
+
+double copysign( double x, double y )
+{
+    union{ cl_ulong u; double f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
+
+    return ux.f;
+}
+
+long double copysignl( long double x, long double y )
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
+
+    return ux.f;
+}
+
+float rintf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        x = copysignf( rounded, x );
+    }
+
+    return x;
+}
+
+double rint(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        x = copysign( rounded, x );
+    }
+
+    return x;
+}
+
+long double rintl(long double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
+    {
+        long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
+        long double rounded = x + magic;
+        rounded -= magic;
+        x = copysignl( rounded, x );
+    }
+
+    return x;
+}
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                   ilogb, ilogbf, ilogbl
+//
+///////////////////////////////////////////////////////////////////
+#ifndef FP_ILOGB0
+    #define FP_ILOGB0   INT_MIN
+#endif
+
+#ifndef FP_ILOGBNAN
+    #define FP_ILOGBNAN INT_MIN
+#endif
+
+int ilogb (double x)
+{
+    union{ double f; cl_ulong u;} u;
+    u.f = x;
+
+    cl_ulong absx = u.u & CL_LONG_MAX;
+    if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7ff0000000000000ULL:
+                return INT_MAX;
+            default:
+                if( absx > 0x7ff0000000000000ULL )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3ff0000000000000ULL;
+                u.f -= 1.0;
+                return (u.u >> 52) - (1023 + 1022);
+        }
+    }
+
+    return (absx >> 52) - 1023;
+}
+
+
+int ilogbf (float x)
+{
+    union{ float f; cl_uint u;} u;
+    u.f = x;
+
+    cl_uint absx = u.u & 0x7fffffff;
+    if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7f800000U:
+                return INT_MAX;
+            default:
+                if( absx > 0x7f800000 )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3f800000U;
+                u.f -= 1.0f;
+                return (u.u >> 23) - (127 + 126);
+        }
+    }
+
+    return (absx >> 23) - 127;
+}
+
+int ilogbl (long double x)
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    } u;
+    u.f = x;
+
+    int exp = u.u.sexp & 0x7fff;
+    if( 0 == exp )
+    {
+        if( 0 == u.u.m )
+            return FP_ILOGB0;
+
+        //subnormal
+        u.u.sexp = 0x3fff;
+        u.f -= 1.0f;
+        exp = u.u.sexp & 0x7fff;
+
+        return exp - (0x3fff + 0x3ffe);
+    }
+    else if( 0x7fff == exp )
+    {
+        if( u.u.m & CL_LONG_MAX )
+            return FP_ILOGBNAN;
+
+        return INT_MAX;
+    }
+
+    return exp - 0x3fff;
+}
+
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                 fmax, fmin, fmaxf, fminf
+//
+///////////////////////////////////////////////////////////////////
+
+static void GET_BITS_SP32(float fx, unsigned int* ux)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.f = (fx);
+    *ux = _bitsy.u;
+}
+/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.f = (fx); */
+/*     *ux = _bitsy.i; */
+/* } */
+static void PUT_BITS_SP32(unsigned int ux, float* fx)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.u = (ux);
+    *fx = _bitsy.f;
+}
+/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.i = (ux); */
+/*     *fx = _bitsy.f; */
+/* } */
+static void GET_BITS_DP64(double dx, unsigned __int64* lx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.d = (dx);
+    *lx = _bitsy.l;
+}
+static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.l = (lx);
+    *dx = _bitsy.d;
+}
+
+#if 0
+int SIGNBIT_DP64(double x )
+{
+    int hx;
+    _GET_HIGH_WORD(hx,x);
+    return((hx>>31));
+}
+#endif
+
+/* fmax(x, y) returns the larger (more positive) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+/* This works so long as the compiler knows that (x != x) means
+   that x is NaN; gcc does. */
+double fmax(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+
+/* fmin(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+double fmin(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+
+float fmaxf( float x, float y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+/* fminf(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+float fminf(float x, float y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+long double scalblnl(long double x, long n)
+{
+    union
+    {
+        long double d;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
+    u.u.m = CL_LONG_MIN;
+
+    if( x == 0.0L || n < -2200)
+        return copysignl( 0.0L, x );
+
+    if( n > 2200 )
+        return INFINITY;
+
+    if( n < 0 )
+    {
+        u.u.sexp = 0x3fff - 1022;
+        while( n <= -1022 )
+        {
+            x *= u.d;
+            n += 1022;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    if( n > 0 )
+    {
+        u.u.sexp = 0x3fff + 1023;
+        while( n >= 1023 )
+        {
+            x *= u.d;
+            n -= 1023;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    return x;
+}
+
+///////////////////////////////////////////////////////////////////
+//
+//                          log2
+//
+///////////////////////////////////////////////////////////////////
+const static cl_double log_e_base2   = 1.4426950408889634074;
+const static cl_double log_10_base2  = 3.3219280948873623478;
+
+//double log10(double x);
+
+double log2(double x)
+{
+    return 1.44269504088896340735992468100189214 * log(x);
+}
+
+long double log2l(long double x)
+{
+    return 1.44269504088896340735992468100189214L * log(x);
+}
+
+double trunc(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        cl_long rounded = x;
+        x = copysign( (double) rounded, x );
+    }
+
+    return x;
+}
+
+float  truncf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        cl_int rounded = x;
+        x = copysignf( (float) rounded, x );
+    }
+
+    return x;
+}
+
+long lround(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return 0;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+long lroundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return 0;
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf(  absx, x );
+    }
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+double round(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return copysign( 0.0, x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    return x;
+}
+
+float  roundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return copysignf( 0.0f, x);
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf( absx, x );
+    }
+
+    return x;
+}
+
+long double roundl(long double x)
+{
+    long double absx = fabsl(x);
+
+    if( absx < 0.5L )
+        return copysignl( 0.0L, x);
+
+    if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
+    {
+        absx += 0.5L;
+        cl_ulong rounded = absx;
+        absx = rounded;
+        x = copysignl( absx, x );
+    }
+
+    return x;
+}
+
+float cbrtf( float x )
+{
+    float z = pow( fabs((double) x), 1.0 / 3.0 );
+    return copysignf( z, x );
+}
+
+double cbrt( double x )
+{
+    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
+}
+
+long int lrint (double x)
+{
+    double absx = fabs(x);
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+long int lrintf (float x)
+{
+    float absx = fabsf(x);
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                  fenv functions
+//
+///////////////////////////////////////////////////////////////////
+
+#if _MSC_VER < 1900
+int fetestexcept(int excepts)
+{
+    unsigned int status = _statusfp();
+    return excepts & (
+        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
+        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
+        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
+        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
+        ((status & _SW_INVALID) ? FE_INVALID : 0)
+    );
+}
+
+int feclearexcept(int excepts)
+{
+    _clearfp();
+    return 0;
+}
+#endif
+
+#endif // __INTEL_COMPILER
+
+#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
+
+float make_nan()
+{
+/* This is the IEEE 754 single-precision format:
+    unsigned int mantissa:  22;
+    unsigned int quiet_nan:  1;
+    unsigned int exponent:   8;
+    unsigned int negative:   1;
+*/
+     //const static unsigned
+     static const int32_t _nan = 0x7fc00000;
+     return *(const float*)(&_nan);
+}
+
+float nanf( const char* str)
+{
+    cl_uint u = atoi( str );
+    u |= 0x7fc00000U;
+    return *( float*)(&u);
+}
+
+
+double nan( const char* str)
+{
+    cl_ulong u = atoi( str );
+    u |= 0x7ff8000000000000ULL;
+    return *( double*)(&u);
+}
+
+// double check this implementatation
+long double nanl( const char* str)
+{
+    union
+    {
+        long double f;
+        struct { cl_ulong m; cl_ushort sexp; }u;
+    }u;
+    u.u.sexp = 0x7fff;
+    u.u.m = 0x8000000000000000ULL | atoi( str );
+
+    return u.f;
+}
+
+#endif
+
+///////////////////////////////////////////////////////////////////
+//
+//                  misc functions
+//
+///////////////////////////////////////////////////////////////////
+
+/*
+// This function is commented out because the Windows implementation should never call munmap.
+// If it is calling it, we have a bug. Please file a bugzilla.
+int munmap(void *addr, size_t len)
+{
+// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
+
+    return (int)VirtualAlloc( (LPVOID)addr, len,
+                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
+}
+*/
+
+uint64_t ReadTime( void )
+{
+    LARGE_INTEGER current;
+    QueryPerformanceCounter(&current);
+    return (uint64_t)current.QuadPart;
+}
+
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    static double PerformanceFrequency = 0.0;
+
+    if (PerformanceFrequency == 0.0) {
+        LARGE_INTEGER frequency;
+        QueryPerformanceFrequency(&frequency);
+        PerformanceFrequency = (double) frequency.QuadPart;
+    }
+
+    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
+}
+
+int cf_signbit(double x)
+{
+    union
+    {
+        double f;
+        cl_ulong u;
+    }u;
+    u.f = x;
+    return u.u >> 63;
+}
+
+int cf_signbitf(float x)
+{
+    union
+    {
+        float f;
+        cl_uint u;
+    }u;
+    u.f = x;
+    return u.u >> 31;
+}
+
+float int2float (int32_t ix)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.i = ix;
+    return u.f;
+}
+
+int32_t float2int (float   fx)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.f = fx;
+    return u.i;
+}
+
+#if !defined(_WIN64)
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+int __builtin_clz(unsigned int pattern)
+{
+#if 0
+    int res;
+    __asm {
+        mov eax, pattern
+        bsr eax, eax
+        mov res, eax
+    }
+    return 31 - res;
+#endif
+    unsigned long index;
+    unsigned char res = _BitScanReverse( &index, pattern);
+    if (res) {
+        return 8*sizeof(int) - 1 - index;
+    } else {
+        return 8*sizeof(int);
+    }
+}
+#else
+int __builtin_clz(unsigned int pattern)
+{
+   int count;
+   if (pattern == 0u) {
+       return 32;
+   }
+   count = 31;
+   if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
+   if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
+   if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
+   if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
+   if (pattern >=  1u<<1) {                 count -=  1; }
+   return count;
+}
+
+#endif // !defined(_WIN64)
+
+#include <intrin.h>
+#include <emmintrin.h>
+
+int usleep(int usec)
+{
+    Sleep((usec + 999) / 1000);
+    return 0;
+}
+
+unsigned int sleep( unsigned int sec )
+{
+    Sleep( sec * 1000 );
+    return 0;
+}
+
+#endif // defined( _MSC_VER )
diff --git a/test_common/harness/mt19937.c b/test_common/harness/mt19937.c
new file mode 100644
index 00000000..baa19fb8
--- /dev/null
+++ b/test_common/harness/mt19937.c
@@ -0,0 +1,280 @@
+/*
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+
+   Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mt19937.h"
+#include "mingw_compat.h"
+
+#ifdef __SSE2__
+    #include <emmintrin.h>
+#endif
+
+static void * align_malloc(size_t size, size_t alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    return _aligned_malloc(size, alignment);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
+#if defined(__ANDROID__)
+    ptr = memalign(alignment, size);
+    if ( ptr )
+        return ptr;
+#else
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
+#endif
+    return NULL;
+#elif defined(__MINGW32__)
+    return __mingw_aligned_malloc(size, alignment);
+#else
+    #error "Please add support OS for aligned malloc"
+#endif
+}
+
+static void   align_free(void * ptr)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    _aligned_free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
+#elif defined(__MINGW32__)
+    return __mingw_aligned_free(ptr);
+#else
+    #error "Please add support OS for aligned free"
+#endif
+}
+
+
+/* Period parameters */
+#define N 624   /* vector code requires multiple of 4 here */
+#define M 397
+#define MATRIX_A    (cl_uint) 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK  (cl_uint) 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK  (cl_uint) 0x7fffffffUL /* least significant r bits */
+
+typedef struct _MTdata
+{
+    cl_uint mt[N];
+#ifdef __SSE2__
+    cl_uint cache[N];
+#endif
+    cl_int  mti;
+}_MTdata;
+
+/* initializes mt[N] with a seed */
+MTdata init_genrand(cl_uint s)
+{
+    MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
+    if( NULL != r )
+    {
+        cl_uint *mt = r->mt;
+        int mti = 0;
+        mt[0]= s; // & 0xffffffffUL;
+        for (mti=1; mti<N; mti++) {
+            mt[mti] = (cl_uint)
+            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+            /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+            /* In the previous versions, MSBs of the seed affect   */
+            /* only MSBs of the array mt[].                        */
+            /* 2002/01/09 modified by Makoto Matsumoto             */
+    //        mt[mti] &= 0xffffffffUL;
+            /* for >32 bit machines */
+        }
+        r->mti = mti;
+    }
+
+    return r;
+}
+
+void    free_mtdata( MTdata d )
+{
+    if(d)
+        align_free(d);
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata d)
+{
+    /* mag01[x] = x * MATRIX_A  for x=0,1 */
+    static const cl_uint mag01[2]={0x0UL, MATRIX_A};
+#ifdef __SSE2__
+    static volatile int init = 0;
+    static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
+#endif
+
+
+    cl_uint *mt = d->mt;
+    cl_uint y;
+
+    if (d->mti == N)
+    { /* generate N words at one time */
+        int kk;
+
+#ifdef __SSE2__
+        if( 0 == init )
+        {
+            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
+            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
+            one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
+            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
+            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
+            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
+            init = 1;
+        }
+#endif
+
+        kk = 0;
+#ifdef __SSE2__
+        // vector loop
+        for( ; kk + 4 <= N-M; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) );    // mt[kk+M] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+        for ( ;kk<N-M;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+#ifdef __SSE2__
+        // advance to next aligned location
+        for (;kk<N-1 && (kk & 3);kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+        // vector loop
+        for( ; kk + 4 <= N-1; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) );          // mt[kk+M-N] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+
+        for (;kk<N-1;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+        y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
+        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+
+#ifdef __SSE2__
+        // Do the tempering ahead of time in vector code
+        for( kk = 0; kk + 4 <= N; kk += 4 )
+        {
+            __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) );                            // y = mt[k];
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) );                             // y ^= (y >> 11);
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) );        // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) );       // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) );                             // y ^= (y >> 18);
+            _mm_store_si128( (__m128i*)(d->cache+kk), vy );
+        }
+#endif
+
+        d->mti = 0;
+    }
+#ifdef __SSE2__
+    y = d->cache[d->mti++];
+#else
+    y = mt[d->mti++];
+
+    /* Tempering */
+    y ^= (y >> 11);
+    y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+    y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+    y ^= (y >> 18);
+#endif
+
+
+    return y;
+}
+
+cl_ulong genrand_int64( MTdata d)
+{
+    return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967295.0);
+    /* divided by 2^32-1 */
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3(MTdata d)
+{
+    return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53(MTdata d)
+{
+    unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
+    return(a*67108864.0+b)*(1.0/9007199254740992.0);
+}
diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
new file mode 100644
index 00000000..d05beed1
--- /dev/null
+++ b/test_common/harness/mt19937.h
@@ -0,0 +1,99 @@
+
+/*
+ *  mt19937.h
+ *
+ *  Mersenne Twister.
+ *
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+ */
+
+#ifndef MT19937_H
+#define MT19937_H   1
+
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
+#else
+    #include <CL/cl_platform.h>
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/*
+ *      Interfaces here have been modified from original sources so that they
+ *      are safe to call reentrantly, so long as a different MTdata is used
+ *      on each thread.
+ */
+
+typedef struct _MTdata  *MTdata;
+
+/* Create the random number generator with seed */
+MTdata init_genrand( cl_uint /*seed*/ );
+
+/* release memory used by a MTdata private data */
+void   free_mtdata( MTdata /*data*/ );
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata /*data*/);
+
+/* generates a random number on [0,0xffffffffffffffffULL]-interval */
+cl_ulong genrand_int64( MTdata /*data*/);
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1( MTdata /*data*/);
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2( MTdata /*data*/);
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3( MTdata /*data*/);
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53( MTdata /*data*/ );
+
+
+#ifdef __cplusplus
+    }
+#endif
+
+#endif  /* MT19937_H */
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
new file mode 100644
index 00000000..ecc55a22
--- /dev/null
+++ b/test_common/harness/os_helpers.cpp
@@ -0,0 +1,564 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "os_helpers.h"
+#include "errorHelpers.h"
+
+// =================================================================================================
+// C++ interface.
+// =================================================================================================
+
+#include <cerrno>     // errno, error constants
+#include <climits>    // PATH_MAX
+#include <cstdlib>    // abort, _splitpath, _makepath
+#include <cstring>    // strdup, strerror_r
+#include <sstream>
+
+#include <vector>
+
+#define CHECK_PTR( ptr )    \
+    if ( (ptr) == NULL ) {  \
+        abort();            \
+    }
+
+typedef std::vector< char > buffer_t;
+
+#if ! defined( PATH_MAX )
+    #define PATH_MAX 1000
+#endif
+
+int const _size  = PATH_MAX + 1;    // Initial buffer size for path.
+int const _count = 8;               // How many times we will try to double buffer size.
+
+// -------------------------------------------------------------------------------------------------
+// MacOS X
+// -------------------------------------------------------------------------------------------------
+
+#if defined( __APPLE__ )
+
+
+    #include <mach-o/dyld.h>    // _NSGetExecutablePath
+    #include <libgen.h>         // dirname
+
+
+    static
+    std::string
+    _err_msg(
+        int err,     // Error number (e. g. errno).
+        int level    // Nesting level, for avoiding infinite recursion.
+    ) {
+
+        /*
+            There are 3 incompatible versions of strerror_r:
+
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
+
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
+
+        */
+
+        // BSD version of strerror_r.
+        buffer_t buffer( 100 );
+        int      count = _count;
+        for ( ; ; ) {
+            int rc = strerror_r( err, & buffer.front(), buffer.size() );
+            if ( rc == EINVAL ) {
+                // Error code is not recognized, but anyway we got the message.
+                return & buffer.front();
+            } else if ( rc == ERANGE ) {
+                // Buffer is not enough.
+                if ( count > 0 ) {
+                    // Enlarge the buffer.
+                    -- count;
+                    buffer.resize( buffer.size() * 2 );
+                } else {
+                    std::stringstream ostr;
+                    ostr
+                        << "Error " << err << " "
+                        << "(Getting error message failed: "
+                        << "Buffer of " << buffer.size() << " bytes is still too small"
+                        << ")";
+                    return ostr.str();
+                }; // if
+            } else if ( rc == 0 ) {
+                // We got the message.
+                return & buffer.front();
+            } else {
+                std::stringstream ostr;
+                ostr
+                    << "Error " << err << " "
+                    << "(Getting error message failed: "
+                    << ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
+                    << ")";
+                return ostr.str();
+            }; // if
+        }; // forever
+
+    } // _err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+        buffer_t path( _size );
+        int      count = _count;
+        for ( ; ; ) {
+            uint32_t size = path.size();
+            int rc = _NSGetExecutablePath( & path.front(), & size );
+            if ( rc == 0 ) {
+                break;
+            }; // if
+            if ( count > 0 ) {
+                -- count;
+                path.resize( size );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
+        return & path.front();
+    } // exe_path
+
+
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
+
+
+#endif // __APPLE__
+
+// -------------------------------------------------------------------------------------------------
+// Linux
+// -------------------------------------------------------------------------------------------------
+
+#if defined( __linux__ )
+
+
+    #include <cerrno>      // errno
+    #include <libgen.h>    // dirname
+    #include <unistd.h>    // readlink
+
+
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
+
+        /*
+            There are 3 incompatible versions of strerror_r:
+
+                char * strerror_r( int, char *, size_t );  // GNU version
+                int    strerror_r( int, char *, size_t );  // BSD version
+                int    strerror_r( int, char *, size_t );  // XSI version
+
+            BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
+
+        */
+
+        #if defined(__ANDROID__) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
+
+            // XSI version of strerror_r.
+            #warning Not tested!
+            buffer_t buffer( 200 );
+            int      count = _count;
+            for ( ; ; ) {
+                int rc = strerror_r( err, & buffer.front(), buffer.size() );
+                if ( rc == -1 ) {
+                    int _err = errno;
+                    if ( _err == ERANGE ) {
+                        if ( count > 0 ) {
+                            // Enlarge the buffer.
+                            -- count;
+                            buffer.resize( buffer.size() * 2 );
+                        } else {
+                            std::stringstream ostr;
+                            ostr
+                                << "Error " << err << " "
+                                << "(Getting error message failed: "
+                                << "Buffer of " << buffer.size() << " bytes is still too small"
+                                << ")";
+                            return ostr.str();
+                        }; // if
+                    } else {
+                        std::stringstream ostr;
+                        ostr
+                            << "Error " << err << " "
+                            << "(Getting error message failed: "
+                            << ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
+                            << ")";
+                        return ostr.str();
+                    }; // if
+                } else {
+                    // We got the message.
+                    return & buffer.front();
+                }; // if
+            }; // forever
+
+        #else
+
+            // GNU version of strerror_r.
+            char buffer[ 2000 ];
+            return strerror_r( err, buffer, sizeof( buffer ) );
+
+        #endif
+
+    } // _err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "/";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+
+        static std::string const exe = "/proc/self/exe";
+
+        buffer_t    path( _size );
+        int         count = _count;  // Max number of iterations.
+
+        for ( ; ; ) {
+
+            ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
+
+            if ( len < 0 ) {
+                // Oops.
+                int err = errno;
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: %s\n",
+                    exe.c_str(), err_msg( err ).c_str()
+                );
+                exit( 2 );
+            }; // if
+
+            if ( len < path.size() ) {
+                // We got the path.
+                path.resize( len );
+                break;
+            }; // if
+
+            // Oops, buffer is too small.
+            if ( count > 0 ) {
+                -- count;
+                // Enlarge the buffer.
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
+                    exe.c_str(),
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+
+        }; // forever
+
+        return std::string( & path.front(), path.size() );
+
+    } // exe_path
+
+
+    std::string
+    exe_dir(
+    ) {
+        std::string path = exe_path();
+        // We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
+        buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
+        return dirname( & buffer.front() );
+    } // exe_dir
+
+#endif // __linux__
+
+// -------------------------------------------------------------------------------------------------
+// MS Windows
+// -------------------------------------------------------------------------------------------------
+
+#if defined( _WIN32 )
+
+
+    #include <windows.h>
+    #if defined( max )
+        #undef max
+    #endif
+
+    #include <cctype>
+    #include <algorithm>
+
+
+    static
+    std::string
+    _err_msg(
+        int err,
+        int level
+    ) {
+
+        std::string msg;
+
+        LPSTR  buffer = NULL;
+        DWORD  flags  =
+            FORMAT_MESSAGE_ALLOCATE_BUFFER |
+            FORMAT_MESSAGE_FROM_SYSTEM |
+            FORMAT_MESSAGE_IGNORE_INSERTS;
+
+        DWORD len =
+            FormatMessageA(
+                flags,
+                NULL,
+                err,
+                LANG_USER_DEFAULT,
+                reinterpret_cast< LPSTR >( & buffer ),
+                0,
+                NULL
+            );
+
+        if ( buffer == NULL || len == 0 ) {
+
+            int _err = GetLastError();
+            char str[1024] = { 0 };
+            snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
+            msg = std::string(str);
+
+        } else {
+
+            // Trim trailing whitespace (including `\r' and `\n').
+            while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
+                -- len;
+            }; // while
+
+            // Drop trailing full stop.
+            if ( len > 0 && buffer[ len - 1 ] == '.' ) {
+                -- len;
+            }; // if
+
+            msg.assign( buffer, len );
+
+        }; //if
+
+        if ( buffer != NULL ) {
+            LocalFree( buffer );
+        }; // if
+
+        return msg;
+
+    } // _get_err_msg
+
+
+    std::string
+    dir_sep(
+    ) {
+        return "\\";
+    } // dir_sep
+
+
+    std::string
+    exe_path(
+    ) {
+
+        buffer_t path( _size );
+        int      count = _count;
+
+        for ( ; ; ) {
+
+            DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
+
+            if ( len == 0 ) {
+                int err = GetLastError();
+                log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
+                exit( 2 );
+            }; // if
+
+            if ( len < path.size() ) {
+                path.resize( len );
+                break;
+            }; // if
+
+            // Buffer too small.
+            if ( count > 0 ) {
+                -- count;
+                path.resize( path.size() * 2 );
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Buffer of %lu bytes is still too small\n",
+                    (unsigned long) path.size()
+                );
+                exit( 2 );
+            }; // if
+
+        }; // forever
+
+        return std::string( & path.front(), path.size() );
+
+    } // exe_path
+
+
+    std::string
+    exe_dir(
+    ) {
+
+        std::string exe = exe_path();
+        int count = 0;
+
+        // Splitting path into components.
+        buffer_t drv( _MAX_DRIVE );
+        buffer_t dir( _MAX_DIR   );
+        count = _count;
+#if defined(_MSC_VER)
+        for ( ; ; ) {
+            int rc =
+                _splitpath_s(
+                    exe.c_str(),
+                    & drv.front(), drv.size(),
+                    & dir.front(), dir.size(),
+                    NULL, 0,   // We need neither name
+                    NULL, 0    // nor extension
+                );
+            if ( rc == 0 ) {
+                break;
+            } else if ( rc == ERANGE ) {
+                if ( count > 0 ) {
+                    -- count;
+                    // Buffer is too small, but it is not clear which one.
+                    // So we have to enlarge all.
+                    drv.resize( drv.size() * 2 );
+                    dir.resize( dir.size() * 2 );
+                } else {
+                    log_error(
+                        "ERROR: Getting executable path failed: "
+                        "Splitting path `%s' to components failed: "
+                        "Buffers of %lu and %lu bytes are still too small\n",
+                        exe.c_str(),
+                        (unsigned long) drv.size(),
+                        (unsigned long) dir.size()
+                    );
+                    exit( 2 );
+                }; // if
+            } else {
+                log_error(
+                    "ERROR: Getting executable path failed: "
+                    "Splitting path `%s' to components failed: %s\n",
+                    exe.c_str(),
+                    err_msg( rc ).c_str()
+                );
+                exit( 2 );
+            }; // if
+        }; // forever
+
+#else // __MINGW32__
+
+        // MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
+        _splitpath(
+            exe.c_str(),
+            & drv.front(),
+            & dir.front(),
+            NULL,   // We need neither name
+            NULL    // nor extension
+        );
+#endif // __MINGW32__
+
+        // Combining components back to path.
+        // I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
+        // ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
+        // So let us try to guess the size of result and go with insecure `_makepath'.
+        buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
+        _makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
+
+        return & path.front();
+
+    } // exe_dir
+
+
+#endif // _WIN32
+
+
+std::string
+err_msg(
+    int err
+) {
+
+    return _err_msg( err, 0 );
+
+} // err_msg
+
+
+// =================================================================================================
+// C interface.
+// =================================================================================================
+
+
+char *
+get_err_msg(
+    int err
+) {
+    char * msg = strdup( err_msg( err ).c_str() );
+    CHECK_PTR( msg );
+    return msg;
+} // get_err_msg
+
+
+char *
+get_dir_sep(
+) {
+    char * sep = strdup( dir_sep().c_str() );
+    CHECK_PTR( sep );
+    return sep;
+} // get_dir_sep
+
+
+char *
+get_exe_path(
+) {
+    char * path = strdup( exe_path().c_str() );
+    CHECK_PTR( path );
+    return path;
+} // get_exe_path
+
+
+char *
+get_exe_dir(
+) {
+    char * dir = strdup( exe_dir().c_str() );
+    CHECK_PTR( dir );
+    return dir;
+} // get_exe_dir
+
+
+// end of file //
diff --git a/test_common/harness/os_helpers.h b/test_common/harness/os_helpers.h
new file mode 100644
index 00000000..21625841
--- /dev/null
+++ b/test_common/harness/os_helpers.h
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __os_helpers_h__
+#define __os_helpers_h__
+
+#include "compat.h"
+
+// -------------------------------------------------------------------------------------------------
+// C++ interface.
+// -------------------------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+
+    #include <string>
+
+    std::string err_msg( int err );
+    std::string dir_sep();
+    std::string exe_path();
+    std::string exe_dir();
+
+#endif // __cplusplus
+
+// -------------------------------------------------------------------------------------------------
+// C interface.
+// -------------------------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+    extern "C" {
+#endif // __cplusplus
+
+char * get_err_msg( int err );  // Returns system error message. Subject to free.
+char * get_dir_sep();           // Returns dir separator. Subject to free.
+char * get_exe_path();          // Returns path of current executable. Subject to free.
+char * get_exe_dir();           // Returns dir of current executable. Subject to free.
+
+#ifdef __cplusplus
+    } // extern "C"
+#endif // __cplusplus
+
+#endif // __os_helpers_h__
diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
new file mode 100644
index 00000000..9c227a14
--- /dev/null
+++ b/test_common/harness/parseParameters.cpp
@@ -0,0 +1,129 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "parseParameters.h"
+
+#include "errorHelpers.h"
+#include "testHarness.h"
+#include "ThreadPool.h"
+
+#include <iostream>
+#include <sstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <string.h>
+
+using namespace std;
+
+bool             gOfflineCompiler = false;
+bool             gForceSpirVCache = false;
+bool             gForceSpirVGenerate = false;
+std::string      gSpirVPath = ".";
+OfflineCompilerOutputType gOfflineCompilerOutputType;
+
+void helpInfo ()
+{
+  log_info("  '-offlineCompiler <output_type:binary|source|spir_v>': use offline compiler\n");
+  log_info("  '                  output_type binary - \"../build_script_binary.py\" is invoked\n");
+  log_info("  '                  output_type source - \"../build_script_source.py\"  is invoked\n");
+  log_info("  '                  output_type spir_v <mode:generate|cache> - \"../cl_build_script_spir_v.py\" is invoked, optional modes: generate, cache\n");
+  log_info("  '                                     mode generate <path> - force binary generation\n");
+  log_info("  '                                     mode cache <path> - force reading binary files from cache\n");
+  log_info("\n");
+}
+
+int parseCustomParam (int argc, const char *argv[], const char *ignore)
+{
+  int delArg = 0;
+
+  for (int i=1; i<argc; i++)
+  {
+    if(ignore != 0)
+    {
+      // skip parameters that require special/different treatment in application
+      // (generic interpretation and parameter removal will not be performed)
+      const char * ptr = strstr(ignore, argv[i]);
+      if(ptr != 0 &&
+        (ptr == ignore || ptr[-1] == ' ') && //first on list or ' ' before
+        (ptr[strlen(argv[i])] == 0 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
+        continue;
+    }
+    if (i < 0) i = 0;
+	  delArg = 0;
+	  if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0)
+	  	  helpInfo ();
+
+    else if (!strcmp(argv[i], "-offlineCompiler"))
+    {
+        log_info(" Offline Compiler enabled\n");
+        delArg = 1;
+        if ((i + 1) < argc)
+        {
+            gOfflineCompiler = true;
+
+            if (!strcmp(argv[i + 1], "binary"))
+            {
+                gOfflineCompilerOutputType = kBinary;
+                delArg++;
+            }
+            else if (!strcmp(argv[i + 1], "source"))
+            {
+                gOfflineCompilerOutputType = kSource;
+                delArg++;
+            }
+            else if (!strcmp(argv[i + 1], "spir_v"))
+            {
+                gOfflineCompilerOutputType = kSpir_v;
+                delArg++;
+                if ((i + 3) < argc)
+                {
+                    if (!strcmp(argv[i + 2], "cache"))
+                    {
+                        gForceSpirVCache = true;
+                        gSpirVPath = argv[i + 3];
+                        log_info(" SpirV reading from cache enabled.\n");
+                        delArg += 2;
+                    }
+                    else if (!strcmp(argv[i + 2], "generate"))
+                    {
+                        gForceSpirVGenerate = true;
+                        gSpirVPath = argv[i + 3];
+                        log_info(" SpirV force generate binaries enabled.\n");
+                        delArg += 2;
+                    }
+                }
+            }
+            else
+            {
+                log_error(" Offline Compiler output type not supported: %s\n", argv[i + 1]);
+                return -1;
+            }
+        }
+        else
+        {
+            log_error(" Offline Compiler parameters are incorrect. Usage:\n");
+            log_error("       -offlineCompiler <input> <output> <output_type:binary | source | spir_v>\n");
+            return -1;
+        }
+    }
+
+    //cleaning parameters from argv tab
+	  for (int j=i; j<argc-delArg; j++)
+		  argv[j] = argv[j+delArg];
+	  argc -= delArg ;
+	  i -= delArg;
+  }
+  return argc;
+}
diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h
new file mode 100644
index 00000000..13171332
--- /dev/null
+++ b/test_common/harness/parseParameters.h
@@ -0,0 +1,37 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _parseParameters_h
+#define _parseParameters_h
+
+#include <string>
+
+extern bool gOfflineCompiler;
+extern bool gForceSpirVCache;
+extern bool gForceSpirVGenerate;
+extern std::string gSpirVPath;
+
+enum OfflineCompilerOutputType
+{
+    kBinary = 0,
+    kSource,
+    kSpir_v
+};
+
+extern OfflineCompilerOutputType gOfflineCompilerOutputType;
+
+extern int parseCustomParam (int argc, const char *argv[], const char *ignore = 0 );
+
+#endif // _parseParameters_h
diff --git a/test_common/harness/ref_counting.h b/test_common/harness/ref_counting.h
new file mode 100644
index 00000000..1a2aceee
--- /dev/null
+++ b/test_common/harness/ref_counting.h
@@ -0,0 +1,49 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _ref_counting_h
+#define _ref_counting_h
+
+#define MARK_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
+    test_error( error, "Unable to check reference count for " #type );
+
+#define TEST_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount_new; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
+    test_error( error, "Unable to check reference count for " #type ); \
+    if( c##_refCount != c##_refCount_new ) \
+    {    \
+        log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new );    \
+        return -1; \
+    }
+
+#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
+#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
+
+#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
+#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
+
+#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+
+#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
+#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
+
+#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
+#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
+
+#endif // _ref_counting_h
diff --git a/test_common/harness/rounding_mode.c b/test_common/harness/rounding_mode.c
new file mode 100644
index 00000000..ff38a7e4
--- /dev/null
+++ b/test_common/harness/rounding_mode.c
@@ -0,0 +1,241 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "rounding_mode.h"
+
+#if (defined( __arm__ ) || defined(__aarch64__))
+    #define FPSCR_FZ    (1 << 24)       // Flush-To-Zero mode
+    #define FPSCR_ROUND_MASK (3 << 22)  // Rounding mode:
+
+    #define _ARM_FE_FTZ     0x1000000
+    #define _ARM_FE_NFTZ    0x0
+    #if defined(__aarch64__)
+        #define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
+    #else
+        #define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
+        #define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
+    #endif
+#endif
+
+#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
+#define _ARM_FE_TONEAREST           0x0
+#define _ARM_FE_UPWARD              0x400000
+#define _ARM_FE_DOWNWARD            0x800000
+#define _ARM_FE_TOWARDZERO          0xc00000
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
+                                                          _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
+    const int *p = int_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
+
+    int fpscr = 0;
+    RoundingMode oldRound = get_round();
+
+    _FPU_GETCW(fpscr);
+    _FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
+
+    return oldRound;
+}
+
+RoundingMode get_round( void )
+{
+    int fpscr;
+    int oldRound;
+
+    _FPU_GETCW(fpscr);
+    oldRound = (fpscr & FPSCR_ROUND_MASK);
+
+    switch( oldRound )
+    {
+        case _ARM_FE_TONEAREST:
+            return kRoundToNearestEven;
+        case _ARM_FE_UPWARD:
+            return kRoundUp;
+        case _ARM_FE_DOWNWARD:
+            return kRoundDown;
+        case _ARM_FE_TOWARDZERO:
+            return kRoundTowardZero;
+    }
+
+    return kDefaultRoundingMode;
+}
+
+#elif !(defined(_WIN32) && defined(_MSC_VER))
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    const int *p = int_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
+    int oldRound = fegetround();
+    fesetround( p[r] );
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+        default:
+            abort();    // ??!
+    }
+    return kDefaultRoundingMode;    //never happens
+}
+
+RoundingMode get_round( void )
+{
+    int oldRound = fegetround();
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+    }
+
+    return kDefaultRoundingMode;
+}
+
+#else
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    if (err) {
+        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
+        return kDefaultRoundingMode;    //what else never happens
+    }
+
+    oldRound &= _MCW_RC;
+
+    RoundingMode old =
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+
+    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
+    return old;    //returning old rounding mode
+}
+
+RoundingMode get_round( void )
+{
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    oldRound &= _MCW_RC;
+    return
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+}
+
+#endif
+
+//
+// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
+// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
+// software by testing against FLT_MIN or DBL_MIN in that file.
+//
+// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
+// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
+// operators do (e.g. add, subtract, multiply, divide, etc.)
+//
+// Configuring hardware to FTZ mode varies by platform.
+// CAUTION: Some C implementations may also fail to behave properly in this mode.
+//
+//  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
+//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
+//          is used for floating point computation! If your OS uses x87, you'll need to figure out how
+//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
+//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
+//          in which case, these function are at liberty to do nothing.
+//
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+#endif
+void *FlushToZero( void )
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ int i;  void *p; }u = { _mm_getcsr() };
+        _mm_setcsr( u.i | 0x8040 );
+        return u.p;
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr | FPSCR_FZ);
+        return NULL;
+    #elif defined( __PPC__ )
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags |= _FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        return NULL;
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
+
+// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
+void UnFlushToZero( void *p)
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ void *p; int i;  }u = { p };
+        _mm_setcsr( u.i );
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        int fpscr;
+        _FPU_GETCW(fpscr);
+        _FPU_SETCW(fpscr & ~FPSCR_FZ);
+    #elif defined( __PPC__)
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags &= ~_FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h
new file mode 100644
index 00000000..b7eb9758
--- /dev/null
+++ b/test_common/harness/rounding_mode.h
@@ -0,0 +1,69 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __ROUNDING_MODE_H__
+#define __ROUNDING_MODE_H__
+
+#include "compat.h"
+
+#if (defined(_WIN32) && defined (_MSC_VER))
+#include "errorHelpers.h"
+#include "testHarness.h"
+#endif
+
+typedef enum
+{
+    kDefaultRoundingMode = 0,
+    kRoundToNearestEven,
+    kRoundUp,
+    kRoundDown,
+    kRoundTowardZero,
+
+    kRoundingModeCount
+}RoundingMode;
+
+typedef enum
+{
+    kuchar = 0,
+    kchar = 1,
+    kushort = 2,
+    kshort = 3,
+    kuint = 4,
+    kint = 5,
+    kfloat = 6,
+    kdouble = 7,
+    kulong = 8,
+    klong = 9,
+
+    //This goes last
+    kTypeCount
+}Type;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern RoundingMode set_round( RoundingMode r, Type outType );
+extern RoundingMode get_round( void );
+extern void *FlushToZero( void );
+extern void UnFlushToZero( void *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* __ROUNDING_MODE_H__ */
diff --git a/test_common/harness/testHarness.c b/test_common/harness/testHarness.c
new file mode 100644
index 00000000..f513f787
--- /dev/null
+++ b/test_common/harness/testHarness.c
@@ -0,0 +1,849 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testHarness.h"
+#include "compat.h"
+#include <stdio.h>
+#include <string.h>
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+#include "fpcontrol.h"
+#include "typeWrappers.h"
+#include "parseParameters.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <time.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#endif
+
+int gTestsPassed = 0;
+int gTestsFailed = 0;
+cl_uint gRandomSeed = 0;
+cl_uint gReSeed = 0;
+
+int     gFlushDenormsToZero = 0;
+int     gInfNanSupport = 1;
+int     gIsEmbedded = 0;
+int     gIsOpenCL_C_1_0_Device = 0;
+int     gIsOpenCL_1_0_Device = 0;
+int     gHasLong = 1;
+
+#define DEFAULT_NUM_ELEMENTS        0x4000
+
+int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                   basefn fnList[], const char *fnNames[],
+                   int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
+{
+    return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
+                          ( imageSupportRequired ) ? verifyImageSupport : NULL );
+}
+
+int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                 basefn fnList[], const char *fnNames[],
+                int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
+                DeviceCheckFn deviceCheckFn )
+{
+    test_start();
+
+    cl_device_type    device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_uint            num_platforms = 0;
+    cl_platform_id     *platforms;
+    cl_device_id       device;
+    int                num_elements = DEFAULT_NUM_ELEMENTS;
+    cl_uint            num_devices = 0;
+    cl_device_id       *devices = NULL;
+    cl_uint            choosen_device_index = 0;
+    cl_uint            choosen_platform_index = 0;
+
+    int            err, ret;
+    char *endPtr;
+    unsigned int            i;
+    int based_on_env_var = 0;
+
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        based_on_env_var = 1;
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+            abort();
+        }
+    }
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+    env_mode = getenv( "CL_DEVICE_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_device_index = atoi(env_mode);
+    }
+
+    env_mode = getenv( "CL_PLATFORM_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_platform_index = atoi(env_mode);
+    }
+
+    /* Process the command line arguments */
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return 0;
+    }
+
+    /* Special case: just list the tests */
+    if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
+    {
+        log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
+        log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
+        log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
+        log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
+        log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
+
+        for( i = 0; i < num_fns - 1; i++ )
+        {
+            log_info( "\t\t%s\n", fnNames[ i ] );
+        }
+        test_finish();
+        return 0;
+    }
+
+    /* How are we supposed to seed the random # generators? */
+    if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
+    {
+        gRandomSeed = (cl_uint) time( NULL );
+        log_info( "Random seed: %u.\n", gRandomSeed );
+        gReSeed = 1;
+        argc--;
+    }
+    else
+    {
+        log_info(" Initializing random seed to 0.\n");
+    }
+
+    /* Do we have an integer to specify the number of elements to pass to tests? */
+    if( argc > 1 )
+    {
+        ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
+        if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
+        {
+            /* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
+            /* (hence why we stored the result in ret first) */
+            num_elements = ret;
+            log_info( "Testing with num_elements of %d\n", num_elements );
+            argc--;
+        }
+    }
+
+    /* Do we have a CPU/GPU specification? */
+    if( argc > 1 )
+    {
+        if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_GPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_CPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific device index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
+        {
+            choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific platform index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
+        {
+            choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
+            argc--;
+        }
+    }
+
+    switch( device_type )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default:                            log_error( "Requesting unknown device "); return -1;
+    }
+    log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
+    log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
+
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
+    /* check our environment for a hint to disable SSE variants */
+    {
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
+        {
+            extern int _cpu_capabilities;
+            int mask = 0;
+            if( 0 == strcasecmp( env, "SSE4.1" ) )
+                mask = kHasSSE4_2;
+            else if( 0 == strcasecmp( env, "SSSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1;
+            else if( 0 == strcasecmp( env, "SSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+            else if( 0 == strcasecmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            else
+            {
+                log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
+                return -2;
+            }
+
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+            _cpu_capabilities &= ~mask;
+        }
+    }
+#endif
+#endif
+
+    /* Get the platform */
+    err = clGetPlatformIDs(0, NULL, &num_platforms);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
+    if (!platforms || choosen_platform_index >= num_platforms) {
+        log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
+        test_finish();
+        return -1;
+    }
+    BufferOwningPtr<cl_platform_id> platformsBuf(platforms);
+
+    err = clGetPlatformIDs(num_platforms, platforms, NULL);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    /* Get the number of requested devices */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, 0, NULL, &num_devices );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || choosen_device_index >= num_devices) {
+        log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
+        test_finish();
+        return -1;
+    }
+    BufferOwningPtr<cl_device_id> devicesBuf(devices);
+
+
+    /* Get the requested device */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, num_devices, devices, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    device = devices[choosen_device_index];
+
+    if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    cl_device_fp_config fpconfig = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
+        test_finish();
+        return -1;
+    }
+
+    gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
+    log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
+    log_info( "sizeof( void*) = %d  (host)\n", (int) sizeof( void* ) );
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        test_finish();
+        return -1;
+    }
+    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+
+    //detect the floating point capabilities
+    cl_device_fp_config floatCapabilities = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
+        test_finish();
+        return -1;
+    }
+
+    // Check for problems that only embedded will have
+    if( gIsEmbedded )
+    {
+        //If the device is embedded, we need to detect if the device supports Infinity and NaN
+        if ((floatCapabilities & CL_FP_INF_NAN) == 0)
+            gInfNanSupport = 0;
+
+        // check the extensions list to see if ulong and long are supported
+        size_t extensionsStringSize = 0;
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
+        {
+            print_error( err, "Unable to get extensions string size for embedded device" );
+            test_finish();
+            return -1;
+        }
+        char *extensions_string = (char*) malloc(extensionsStringSize);
+        if( NULL == extensions_string )
+        {
+            print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+        BufferOwningPtr<char> extensions_stringBuf(extensions_string);
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
+        {
+            print_error( err, "Unable to get extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+
+        if( extensions_string[extensionsStringSize-1] != '\0' )
+        {
+            log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
+            test_finish();
+            return -1;
+        }
+
+        if( NULL == strstr( extensions_string, "cles_khr_int64" ))
+            gHasLong = 0;
+    }
+
+    if( getenv( "OPENCL_1_0_DEVICE" ) )
+    {
+        char c_version[1024];
+        gIsOpenCL_1_0_Device = 1;
+        memset( c_version, 0, sizeof( c_version ) );
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
+        {
+            log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
+            test_finish();
+            return -1;
+        }
+
+        if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
+        {
+            gIsOpenCL_C_1_0_Device = 1;
+            log_info( "Device is a OpenCL C 1.0 device\n" );
+        }
+        else
+            log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
+    }
+
+    cl_uint device_address_bits = 0;
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
+    {
+        print_error( err, "Unable to obtain device address bits" );
+        test_finish();
+        return -1;
+    }
+    if( device_address_bits )
+        log_info( "sizeof( void*) = %d  (device)\n", device_address_bits/8 );
+    else
+    {
+        log_error("Invalid device address bit size returned by device.\n");
+        test_finish();
+        return -1;
+    }
+
+
+    /* If we have a device checking function, run it */
+    if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    if (num_elements <= 0)
+        num_elements = DEFAULT_NUM_ELEMENTS;
+
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+#if defined(__APPLE__) && defined(__arm__)
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+
+    int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
+
+ #if defined(__APPLE__) && defined(__arm__)
+     // Restore the old FP mode before leaving.
+    RestoreFPState( &oldMode );
+#endif
+
+    return error;
+}
+
+static int find_wildcard_matching_functions( const char *fnNames[], unsigned char fnsToCall[], unsigned int num_fns,
+                                             const char *wildcard )
+{
+    int found_tests = 0;
+    size_t wildcard_length = strlen( wildcard ) - 1; /* -1 for the asterisk */
+
+    for( unsigned int fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strncmp( fnNames[ fnIndex ], wildcard, wildcard_length ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+
+            fnsToCall[ fnIndex ] = 1;
+            found_tests = 1;
+        }
+    }
+
+    if( !found_tests )
+    {
+        log_error( "ERROR: The wildcard '%s' did not match any test names.\n", wildcard );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+static int find_argument_matching_function( const char *fnNames[], unsigned char *fnsToCall, unsigned int num_fns,
+                                            const char *argument )
+{
+    unsigned int fnIndex;
+
+    for( fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strcmp( argument, fnNames[ fnIndex ] ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+            else
+            {
+                fnsToCall[ fnIndex ] = 1;
+                break;
+            }
+        }
+    }
+
+    if( fnIndex == num_fns )
+    {
+        log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                  basefn fnList[], const char *fnNames[], int forceNoContextCreation,
+                                  cl_command_queue_properties queueProps, int num_elements )
+{
+    int ret = EXIT_SUCCESS;
+
+    unsigned char *fnsToCall = ( unsigned char* ) calloc( num_fns, 1 );
+
+    if( argc == 1 )
+    {
+        /* No actual arguments, all tests will be run. */
+        memset( fnsToCall, 1, num_fns );
+    }
+    else
+    {
+        for( int argIndex = 1; argIndex < argc; argIndex++ )
+        {
+            if( strchr( argv[ argIndex ], '*' ) != NULL )
+            {
+                ret = find_wildcard_matching_functions( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+            }
+            else
+            {
+                if( strcmp( argv[ argIndex ], "all" ) == 0 )
+                {
+                    memset( fnsToCall, 1, num_fns );
+                    break;
+                }
+                else
+                {
+                    ret = find_argument_matching_function( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+                }
+            }
+
+            if( ret == EXIT_FAILURE )
+            {
+                break;
+            }
+        }
+    }
+
+    if( ret == EXIT_SUCCESS )
+    {
+        ret = callTestFunctions( fnList, fnNames, fnsToCall, num_fns, device, forceNoContextCreation, num_elements, queueProps );
+
+        if( gTestsFailed == 0 )
+        {
+            if( gTestsPassed > 1 )
+            {
+                log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
+            }
+            else if( gTestsPassed > 0 )
+            {
+                log_info("PASSED test.\n");
+            }
+        }
+        else if( gTestsFailed > 0 )
+        {
+            if( gTestsFailed+gTestsPassed > 1 )
+            {
+                log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
+            }
+            else
+            {
+                log_error("FAILED test.\n");
+            }
+        }
+    }
+
+    test_finish();
+
+    free( fnsToCall );
+
+    return ret;
+}
+
+int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                       int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                       int numElementsToUse, cl_command_queue_properties queueProps )
+{
+    int numErrors = 0;
+
+    for( int i = 0; i < numFunctions; ++i )
+    {
+        if( functionsToCall[ i ] )
+        {
+            /* Skip any unimplemented tests. */
+            if( functionList[ i ] != NULL )
+            {
+                numErrors += callSingleTestFunction( functionList[ i ], functionNames[ i ], deviceToUse,
+                                                     forceNoContextCreation, numElementsToUse, queueProps );
+            }
+            else
+            {
+                log_info( "%s test currently not implemented\n", functionNames[ i ] );
+            }
+        }
+    }
+
+    return numErrors;
+}
+
+void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    log_info( "%s\n", errinfo );
+}
+
+// Actual function execution
+int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                           cl_device_id deviceToUse, int forceNoContextCreation,
+                           int numElementsToUse, const cl_queue_properties queueProps )
+{
+    int numErrors = 0, ret;
+    cl_int error;
+    cl_context context = NULL;
+    cl_command_queue queue = NULL;
+    const cl_command_queue_properties cmd_queueProps = (queueProps)?CL_QUEUE_PROPERTIES:0;
+    cl_command_queue_properties queueCreateProps[] = {cmd_queueProps, queueProps, 0};
+
+    /* Create a context to work with, unless we're told not to */
+    if( !forceNoContextCreation )
+    {
+        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
+        if (!context)
+        {
+            print_error( error, "Unable to create testing context" );
+            return 1;
+        }
+
+        queue = clCreateCommandQueueWithProperties( context, deviceToUse, &queueCreateProps[0], &error );
+        if( queue == NULL )
+        {
+            print_error( error, "Unable to create testing command queue" );
+            return 1;
+        }
+    }
+
+    /* Run the test and print the result */
+    log_info( "%s...\n", functionName );
+    fflush( stdout );
+
+    error = check_opencl_version_with_testname(functionName, deviceToUse);
+    test_missing_feature(error, functionName);
+
+    error = check_functions_for_offline_compiler(functionName, deviceToUse);
+    test_missing_support_offline_cmpiler(error, functionName);
+
+    ret = functionToCall( deviceToUse, context, queue, numElementsToUse);        //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
+    if( ret == TEST_NOT_IMPLEMENTED )
+    {
+        /* Tests can also let us know they're not implemented yet */
+        log_info("%s test currently not implemented\n\n", functionName);
+    }
+    else
+    {
+        /* Print result */
+        if( ret == 0 ) {
+            log_info( "%s PASSED\n", functionName );
+            gTestsPassed++;
+        }
+        else
+        {
+            numErrors++;
+            log_error( "%s FAILED\n", functionName );
+            gTestsFailed++;
+        }
+    }
+
+    /* Release the context */
+    if( !forceNoContextCreation )
+    {
+        int error = clFinish(queue);
+        if (error) {
+            log_error("clFinish failed: %d", error);
+            numErrors++;
+        }
+        clReleaseCommandQueue( queue );
+        clReleaseContext( context );
+    }
+
+    return numErrors;
+}
+
+void checkDeviceTypeOverride( cl_device_type *inOutType )
+{
+    /* Check if we are forced to CPU mode */
+    char *force_cpu = getenv( "CL_DEVICE_TYPE" );
+    if( force_cpu != NULL )
+    {
+        if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_DEFAULT;
+    }
+
+    switch( *inOutType )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default: break;
+    }
+    log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+}
+
+#if ! defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
+{
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
+    size_t count = bytes / 4;
+    size_t i;
+    uint32_t *d = (uint32_t*)dest;
+
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
+
+    d += i;
+
+    bytes &= 3;
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
+}
+#endif
+
+extern cl_device_type GetDeviceType( cl_device_id d )
+{
+    cl_device_type result = -1;
+    cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
+    if( CL_SUCCESS != err )
+        log_error( "ERROR: Unable to get device type for device %p\n", d );
+    return result;
+}
+
+
+cl_device_id GetOpposingDevice( cl_device_id device )
+{
+    cl_int error;
+    cl_device_id *otherDevices;
+    cl_uint actualCount;
+    cl_platform_id plat;
+
+    // Get the platform of the device to use for getting a list of devices
+    error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get device's platform" );
+        return NULL;
+    }
+
+    // Get a list of all devices
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices size" );
+        return NULL;
+    }
+    otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
+    if (NULL == otherDevices) {
+        print_error( error, "Unable to allocate list of other devices." );
+        return NULL;
+    }
+    BufferOwningPtr<cl_device_id> otherDevicesBuf(otherDevices);
+
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices" );
+        return NULL;
+    }
+
+    if( actualCount == 1 )
+    {
+        return device;    // NULL means error, returning self means we couldn't find another one
+    }
+
+    // Loop and just find one that isn't the one we were given
+    cl_uint i;
+    for( i = 0; i < actualCount; i++ )
+    {
+        if( otherDevices[ i ] != device )
+        {
+            cl_device_type newType;
+            error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
+            if( error != CL_SUCCESS )
+            {
+                print_error( error, "Unable to get device type for other device" );
+                return NULL;
+            }
+            cl_device_id result = otherDevices[ i ];
+            return result;
+        }
+    }
+
+    // Should never get here
+    return NULL;
+}
diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
new file mode 100644
index 00000000..2816b2a0
--- /dev/null
+++ b/test_common/harness/testHarness.h
@@ -0,0 +1,102 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testHarness_h
+#define _testHarness_h
+
+#include "threadTesting.h"
+#include "clImageHelper.h"
+
+#include <string>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern cl_uint gReSeed;
+extern cl_uint gRandomSeed;
+
+// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
+// setup work, and then call each function in turn as dictatated by the passed arguments.
+extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                            basefn fnList[], const char *fnNames[],
+                            int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
+
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
+typedef int (*DeviceCheckFn)( cl_device_id device );
+
+// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
+extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                              basefn fnList[], const char *fnNames[],
+                              int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
+
+// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
+extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                        basefn *fnList, const char *fnNames[],
+                                        int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
+
+// Call this function if you need to do all the setup work yourself, and just need the function list called/
+// managed.
+//    functionList is the actual array of functions
+//    functionNames is an array of strings representing the name of each function
+//    functionsToCall is an array of integers (treated as bools) which tell which function is to be called,
+//       each element at index i, corresponds to the element in functionList at index i
+//    numFunctions is the number of elements in the arrays
+//    contextProps are used to create a testing context for each test
+//    deviceToUse and numElementsToUse are all just passed to each test function
+extern int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                              int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                              int numElementsToUse, cl_command_queue_properties queueProps );
+
+// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
+extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                                   cl_device_id deviceToUse, int forceNoContextCreation,
+                                   int numElementsToUse, cl_command_queue_properties queueProps );
+
+///// Miscellaneous steps
+
+// Given a pre-existing device type choice, check the environment for an override, then print what
+// choice was made and how (and return the overridden choice, if there is one)
+extern void checkDeviceTypeOverride( cl_device_type *inOutType );
+
+// standard callback function for context pfn_notify
+extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
+
+extern cl_device_type GetDeviceType( cl_device_id );
+
+// Given a device (most likely passed in by the harness, but not required), will attempt to find
+// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
+// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
+// is the only device available, the SAME device is returned, so check!
+extern cl_device_id GetOpposingDevice( cl_device_id device );
+
+
+extern int      gFlushDenormsToZero;    // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
+extern int      gInfNanSupport;         // This is set to 1 if the device supports infinities and NaNs
+extern int        gIsEmbedded;            // This is set to 1 if the device is an embedded device
+extern int        gHasLong;               // This is set to 1 if the device suppots long and ulong types in OpenCL C.
+extern int      gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
+
+#if ! defined( __APPLE__ )
+    void     memset_pattern4(void *, const void *, size_t);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _testHarness_h
+
+
diff --git a/test_common/harness/test_mt19937.c b/test_common/harness/test_mt19937.c
new file mode 100644
index 00000000..c0498ea9
--- /dev/null
+++ b/test_common/harness/test_mt19937.c
@@ -0,0 +1,51 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "mt19937.h"
+#include <stdio.h>
+
+int main( void )
+{
+    MTdata d = init_genrand(42);
+    int i;
+    const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
+                                    0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
+                                    0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
+                                    0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
+    int errcount = 0;
+
+    for( i = 0; i < 65536; i++ )
+    {
+        cl_uint u = genrand_int32( d );
+        if( 0 == (i & 4095) )
+        {
+            if( u != reference[i>>12] )
+            {
+                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n", reference[i>>12], i, u );
+                errcount++;
+            }
+        }
+    }
+
+    free_mtdata(d);
+
+    if( errcount )
+        printf("mt19937 test failed.\n");
+    else
+        printf("mt19937 test passed.\n");
+
+
+    return 0;
+}
\ No newline at end of file
diff --git a/test_common/harness/threadTesting.c b/test_common/harness/threadTesting.c
new file mode 100644
index 00000000..1a07f974
--- /dev/null
+++ b/test_common/harness/threadTesting.c
@@ -0,0 +1,100 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <pthread.h>
+#endif
+
+#if 0 // Disabed for now
+
+typedef struct
+{
+    basefn            mFunction;
+    cl_device_id    mDevice;
+    cl_context        mContext;
+    int                mNumElements;
+} TestFnArgs;
+
+////////////////////////////////////////////////////////////////////////////////
+// Thread-based testing. Spawns a new thread to run the given test function,
+// then waits for it to complete. The entire idea is that, if the thread crashes,
+// we can catch it and report it as a failure instead of crashing the entire suite
+////////////////////////////////////////////////////////////////////////////////
+
+void *test_thread_wrapper( void *data )
+{
+    TestFnArgs *args;
+    int retVal;
+    cl_context context;
+
+    args = (TestFnArgs *)data;
+
+    /* Create a new context to use (contexts can't cross threads) */
+    context = clCreateContext(NULL, args->mDeviceGroup);
+    if( context == NULL )
+    {
+        log_error("clCreateContext failed for new thread\n");
+        return (void *)(-1);
+    }
+
+    /* Call function */
+    retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
+
+    clReleaseContext( context );
+
+    return (void *)retVal;
+}
+
+int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int error;
+    pthread_t threadHdl;
+    void *retVal;
+    TestFnArgs args;
+
+
+    args.mFunction = fnToTest;
+    args.mDeviceGroup = deviceGroup;
+    args.mDevice = device;
+    args.mContext = context;
+    args.mNumElements = numElements;
+
+
+    error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to create thread for testing!\n" );
+        return -1;
+    }
+
+    /* Thread has been started, now just wait for it to complete (or crash) */
+    error = pthread_join( threadHdl, &retVal );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to join testing thread!\n" );
+        return -1;
+    }
+
+    return (int)((intptr_t)retVal);
+}
+#endif
+
+
diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h
new file mode 100644
index 00000000..81a5757b
--- /dev/null
+++ b/test_common/harness/threadTesting.h
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _threadTesting_h
+#define _threadTesting_h
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#define TEST_NOT_IMPLEMENTED        -99
+
+typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+
+#endif // _threadTesting_h
+
+
diff --git a/test_common/harness/typeWrappers.cpp b/test_common/harness/typeWrappers.cpp
new file mode 100644
index 00000000..d4e08fb9
--- /dev/null
+++ b/test_common/harness/typeWrappers.cpp
@@ -0,0 +1,481 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "typeWrappers.h"
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include <stdlib.h>
+#include "clImageHelper.h"
+
+#define ROUND_SIZE_UP( _size, _align )      (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
+
+#if defined( __APPLE__ )
+    #define kPageSize       4096
+    #include <sys/mman.h>
+    #include <stdlib.h>
+#elif defined(__linux__)
+    #include <unistd.h>
+    #define kPageSize  (getpagesize())
+#endif
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        backingStoreSize = rowStride + 8 * rowStride;
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        mprotect( p, kPageSize, PROT_NONE );        p += rowStride;
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+    } else {
+        backingStore = NULL;
+        image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+    }
+#else
+
+    backingStore = NULL;
+    image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+#endif
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+      static int spewEnv = 1;
+      if(spewEnv)
+      {
+        log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+        spewEnv = 0;
+      }
+      imagePtr += rowBytes - pixelBytes * width;
+    }
+
+      image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+      image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+  }
+#else
+
+  backingStore = NULL;
+  image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+#endif
+    return error;
+}
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height, depth );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
+{
+    cl_int error;
+
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * depth * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height*depth; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+        static int spewEnv = 1;
+        if(spewEnv)
+        {
+            log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+            spewEnv = 0;
+        }
+        imagePtr += rowBytes - pixelBytes * width;
+    }
+
+    image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+  }
+#else
+
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+
+#endif
+
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                backingStoreSize = rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                backingStoreSize = height * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                backingStoreSize = height * depth * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                backingStoreSize = arraySize * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
+                break;
+        }
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
+        for( row = 0; row < sz; row++ )
+        {
+            mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+        }
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
+                break;
+        }
+    } else {
+        backingStore = NULL;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+                break;
+        }
+
+    }
+#else
+
+    backingStore = NULL;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+            break;
+    }
+#endif
+    return error;
+}
+
+
+
+/*******
+ * clProtectedArray implementation
+ *******/
+clProtectedArray::clProtectedArray()
+{
+    mBuffer = mValidBuffer = NULL;
+}
+
+clProtectedArray::clProtectedArray( size_t sizeInBytes )
+{
+    mBuffer = mValidBuffer = NULL;
+    Allocate( sizeInBytes );
+}
+
+clProtectedArray::~clProtectedArray()
+{
+    if( mBuffer != NULL ) {
+#if defined( __APPLE__ )
+        int error = munmap( mBuffer, mRealSize );
+      if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
+#else
+    free( mBuffer );
+#endif
+  }
+}
+
+void clProtectedArray::Allocate( size_t sizeInBytes )
+{
+
+#if defined( __APPLE__ )
+
+    // Allocate enough space to: round up our actual allocation to an even number of pages
+    // and allocate two pages on either side
+    mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
+    mRealSize = mRoundedSize + kPageSize * 2;
+
+    // Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
+    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    mValidBuffer = mBuffer + kPageSize;
+
+    // Protect guard area from access
+    mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
+    mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
+#else
+  mRoundedSize = mRealSize = sizeInBytes;
+  mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
+#endif
+}
+
+
diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h
new file mode 100644
index 00000000..d767a99d
--- /dev/null
+++ b/test_common/harness/typeWrappers.h
@@ -0,0 +1,332 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _typeWrappers_h
+#define _typeWrappers_h
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#include "compat.h"
+#include <stdio.h>
+#include "mt19937.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+
+extern "C" cl_uint gReSeed;
+extern "C" cl_uint gRandomSeed;
+
+/* cl_context wrapper */
+
+class clContextWrapper
+{
+    public:
+        clContextWrapper() { mContext = NULL; }
+        clContextWrapper( cl_context program ) { mContext = program; }
+        ~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
+
+        clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
+        operator cl_context() const { return mContext; }
+
+        cl_context * operator&() { return &mContext; }
+
+        bool operator==( const cl_context &rhs ) { return mContext == rhs; }
+
+    protected:
+
+        cl_context mContext;
+};
+
+/* cl_program wrapper */
+
+class clProgramWrapper
+{
+    public:
+        clProgramWrapper() { mProgram = NULL; }
+        clProgramWrapper( cl_program program ) { mProgram = program; }
+        ~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
+
+        clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
+        operator cl_program() const { return mProgram; }
+
+        cl_program * operator&() { return &mProgram; }
+
+        bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
+
+    protected:
+
+        cl_program mProgram;
+};
+
+/* cl_kernel wrapper */
+
+class clKernelWrapper
+{
+    public:
+        clKernelWrapper() { mKernel = NULL; }
+        clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
+        ~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
+
+        clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
+        operator cl_kernel() const { return mKernel; }
+
+        cl_kernel * operator&() { return &mKernel; }
+
+        bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
+
+    protected:
+
+        cl_kernel mKernel;
+};
+
+/* cl_mem (stream) wrapper */
+
+class clMemWrapper
+{
+    public:
+        clMemWrapper() { mMem = NULL; }
+        clMemWrapper( cl_mem mem ) { mMem = mem; }
+        ~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
+
+        clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
+        operator cl_mem() const { return mMem; }
+
+        cl_mem * operator&() { return &mMem; }
+
+        bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_mem mMem;
+};
+
+class clProtectedImage
+{
+    public:
+        clProtectedImage() { image = NULL; backingStore = NULL; }
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
+        ~clProtectedImage()
+        {
+            if( image != NULL )
+                clReleaseMemObject( image );
+
+#if defined( __APPLE__ )
+            if(backingStore)
+                munmap(backingStore, backingStoreSize);
+#endif
+        }
+
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
+        cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
+
+        clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
+        operator cl_mem() { return image; }
+
+        cl_mem * operator&() { return &image; }
+
+        bool operator==( const cl_mem &rhs ) { return image == rhs; }
+
+    protected:
+        void *backingStore;
+        size_t backingStoreSize;
+        cl_mem  image;
+};
+
+/* cl_command_queue wrapper */
+class clCommandQueueWrapper
+{
+    public:
+        clCommandQueueWrapper() { mMem = NULL; }
+        clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
+  ~clCommandQueueWrapper() { if( mMem != NULL ) { clReleaseCommandQueue( mMem ); } }
+
+        clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
+        operator cl_command_queue() const { return mMem; }
+
+        cl_command_queue * operator&() { return &mMem; }
+
+        bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_command_queue mMem;
+};
+
+/* cl_sampler wrapper */
+class clSamplerWrapper
+{
+    public:
+        clSamplerWrapper() { mMem = NULL; }
+        clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
+        ~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
+
+        clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
+        operator cl_sampler() const { return mMem; }
+
+        cl_sampler * operator&() { return &mMem; }
+
+        bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_sampler mMem;
+};
+
+/* cl_event wrapper */
+class clEventWrapper
+{
+    public:
+        clEventWrapper() { mMem = NULL; }
+        clEventWrapper( cl_event mem ) { mMem = mem; }
+        ~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
+
+        clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
+        operator cl_event() const { return mMem; }
+
+        cl_event * operator&() { return &mMem; }
+
+        bool operator==( const cl_event &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_event mMem;
+};
+
+/* Generic protected memory buffer, for verifying access within bounds */
+class clProtectedArray
+{
+    public:
+        clProtectedArray();
+        clProtectedArray( size_t sizeInBytes );
+        virtual ~clProtectedArray();
+
+        void    Allocate( size_t sizeInBytes );
+
+        operator void *()        { return (void *)mValidBuffer; }
+        operator const void *() const { return (const void *)mValidBuffer; }
+
+    protected:
+
+         char *    mBuffer;
+         char * mValidBuffer;
+        size_t    mRealSize, mRoundedSize;
+};
+
+class RandomSeed
+{
+    public:
+        RandomSeed( cl_uint seed  ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
+        ~RandomSeed()
+        {
+            if( gReSeed )
+                gRandomSeed = genrand_int32( mtData );
+            free_mtdata(mtData);
+        }
+
+        operator MTdata ()     {return mtData;}
+
+    protected:
+        MTdata mtData;
+};
+
+
+template <typename T> class BufferOwningPtr
+{
+  BufferOwningPtr(BufferOwningPtr const &); // do not implement
+    void operator=(BufferOwningPtr const &);  // do not implement
+
+    void *ptr;
+    void *map;
+  size_t mapsize;   // Bytes allocated total, pointed to by map.
+  size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
+  bool aligned;
+  public:
+  explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
+  explicit BufferOwningPtr(void *p, void *m, size_t s)
+    : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
+      {
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+      }
+    ~BufferOwningPtr() {
+      if (map) {
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+    }
+  void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
+      if (map){
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#else
+        log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
+        abort();
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+      ptr = p;
+      map = m;
+      mapsize = mapsize_;
+      allocsize =  (ptr != NULL) ? allocsize_ : 0; // Force allocsize to zero if ptr is NULL.
+      aligned = aligned_;
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+    }
+    operator T*() { return (T*)ptr; }
+
+      size_t getSize() const { return allocsize; };
+};
+
+#endif // _typeWrappers_h
+
diff --git a/test_common/miniz/CMakeLists.txt b/test_common/miniz/CMakeLists.txt
new file mode 100644
index 00000000..600db01d
--- /dev/null
+++ b/test_common/miniz/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(TARGET_NAME miniz)
+
+add_library(
+  ${TARGET_NAME}
+  STATIC
+  miniz.c
+  miniz.h
+)
diff --git a/test_common/miniz/miniz.c b/test_common/miniz/miniz.c
new file mode 100644
index 00000000..f893bd86
--- /dev/null
+++ b/test_common/miniz/miniz.c
@@ -0,0 +1,4153 @@
+/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing
+   See "unlicense" statement at the end of this file.
+   Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
+   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt
+
+   Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define
+   MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros).
+
+   * Change History
+     10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major release with Zip64 support (almost there!):
+       - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug (thanks kahmyong.moon@hp.com) which could cause locate files to not find files. This bug
+        would only have occured in earlier versions if you explicitly used this flag, OR if you used mz_zip_extract_archive_file_to_heap() or mz_zip_add_mem_to_archive_file_in_place()
+        (which used this flag). If you can't switch to v1.15 but want to fix this bug, just remove the uses of this flag from both helper funcs (and of course don't use the flag).
+       - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when pUser_read_buf is not NULL and compressed size is > uncompressed size
+       - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract compressed data from directory entries, to account for weird zipfiles which contain zero-size compressed data on dir entries.
+         Hopefully this fix won't cause any issues on weird zip archives, because it assumes the low 16-bits of zip external attributes are DOS attributes (which I believe they always are in practice).
+       - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the internal attributes, just the filename and external attributes
+       - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed
+       - Added cmake support for Linux builds which builds all the examples, tested with clang v3.3 and gcc v4.6.
+       - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti
+       - Merged MZ_FORCEINLINE fix from hdeanclark
+       - Fix <time.h> include before config #ifdef, thanks emil.brink
+       - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping (super useful for OpenGL apps), and explicit control over the compression level (so you can
+        set it to 1 for real-time compression).
+       - Merged in some compiler fixes from paulharris's github repro.
+       - Retested this build under Windows (VS 2010, including static analysis), tcc  0.9.26, gcc v4.6 and clang v3.3.
+       - Added example6.c, which dumps an image of the mandelbrot set to a PNG file.
+       - Modified example2 to help test the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more.
+       - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix possible src file fclose() leak if alignment bytes+local header file write faiiled
+       - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): Was pushing the wrong central dir header offset, appears harmless in this release, but it became a problem in the zip64 branch
+     5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include <time.h> (thanks fermtect).
+     5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit.
+       - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files.
+       - Eliminated a bunch of warnings when compiling with GCC 32-bit/64.
+       - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly
+        "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning).
+       - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64.
+       - Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test.
+       - Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives.
+       - Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.)
+       - Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself).
+     4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's.
+      level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson <bruced@valvesoftware.com> for the feedback/bug report.
+     5/28/11 v1.11 - Added statement from unlicense.org
+     5/27/11 v1.10 - Substantial compressor optimizations:
+      - Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a
+      - Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86).
+      - Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types.
+      - Refactored the compression code for better readability and maintainability.
+      - Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large
+       drop in throughput on some files).
+     5/15/11 v1.09 - Initial stable release.
+
+   * Low-level Deflate/Inflate implementation notes:
+
+     Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or
+     greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses
+     approximately as well as zlib.
+
+     Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function
+     coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory
+     block large enough to hold the entire file.
+
+     The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation.
+
+   * zlib-style API notes:
+
+     miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in
+     zlib replacement in many apps:
+        The z_stream struct, optional memory allocation callbacks
+        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
+        inflateInit/inflateInit2/inflate/inflateEnd
+        compress, compress2, compressBound, uncompress
+        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines.
+        Supports raw deflate streams or standard zlib streams with adler-32 checking.
+
+     Limitations:
+      The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries.
+      I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but
+      there are no guarantees that miniz.c pulls this off perfectly.
+
+   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by
+     Alex Evans. Supports 1-4 bytes/pixel images.
+
+   * ZIP archive API notes:
+
+     The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to
+     get the job done with minimal fuss. There are simple API's to retrieve file information, read files from
+     existing archives, create new archives, append new files to existing archives, or clone archive data from
+     one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h),
+     or you can specify custom file read/write callbacks.
+
+     - Archive reading: Just call this function to read a single file from a disk archive:
+
+      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name,
+        size_t *pSize, mz_uint zip_flags);
+
+     For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central
+     directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files.
+
+     - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file:
+
+     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
+
+     The locate operation can optionally check file comments too, which (as one example) can be used to identify
+     multiple versions of the same file in an archive. This function uses a simple linear search through the central
+     directory, so it's not very fast.
+
+     Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and
+     retrieve detailed info on each file by calling mz_zip_reader_file_stat().
+
+     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data
+     to disk and builds an exact image of the central directory in memory. The central directory image is written
+     all at once at the end of the archive file when the archive is finalized.
+
+     The archive writer can optionally align each file's local header and file data to any power of 2 alignment,
+     which can be useful when the archive will be read from optical media. Also, the writer supports placing
+     arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still
+     readable by any ZIP tool.
+
+     - Archive appending: The simple way to add a single file to an archive is to call this function:
+
+      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name,
+        const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+
+     The archive will be created if it doesn't already exist, otherwise it'll be appended to.
+     Note the appending is done in-place and is not an atomic operation, so if something goes wrong
+     during the operation it's possible the archive could be left without a central directory (although the local
+     file headers and file data will be fine, so the archive will be recoverable).
+
+     For more complex archive modification scenarios:
+     1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to
+     preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the
+     compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and
+     you're done. This is safe but requires a bunch of temporary disk space or heap memory.
+
+     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(),
+     append new files as needed, then finalize the archive which will write an updated central directory to the
+     original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a
+     possibility that the archive's central directory could be lost with this method if anything goes wrong, though.
+
+     - ZIP archive support limitations:
+     No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files.
+     Requires streams capable of seeking.
+
+   * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the
+     below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it.
+
+   * Important: For best perf. be sure to customize the below macros for your target platform:
+     #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+     #define MINIZ_LITTLE_ENDIAN 1
+     #define MINIZ_HAS_64BIT_REGISTERS 1
+
+   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz
+     uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files
+     (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
+*/
+
+#include "miniz.h"
+
+typedef unsigned char mz_validate_uint16[sizeof(mz_uint16)==2 ? 1 : -1];
+typedef unsigned char mz_validate_uint32[sizeof(mz_uint32)==4 ? 1 : -1];
+typedef unsigned char mz_validate_uint64[sizeof(mz_uint64)==8 ? 1 : -1];
+
+#include <string.h>
+#include <assert.h>
+
+#define MZ_ASSERT(x) assert(x)
+
+#ifdef MINIZ_NO_MALLOC
+  #define MZ_MALLOC(x) NULL
+  #define MZ_FREE(x) (void)x, ((void)0)
+  #define MZ_REALLOC(p, x) NULL
+#else
+  #define MZ_MALLOC(x) malloc(x)
+  #define MZ_FREE(x) free(x)
+  #define MZ_REALLOC(p, x) realloc(p, x)
+#endif
+
+#define MZ_MAX(a,b) (((a)>(b))?(a):(b))
+#define MZ_MIN(a,b) (((a)<(b))?(a):(b))
+#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  #define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
+  #define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
+#else
+  #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
+  #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
+#endif
+
+#ifdef _MSC_VER
+  #define MZ_FORCEINLINE __forceinline
+#elif defined(__GNUC__)
+  #define MZ_FORCEINLINE inline __attribute__((__always_inline__))
+#else
+  #define MZ_FORCEINLINE inline
+#endif
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+// ------------------- zlib-style API's
+
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len)
+{
+  mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552;
+  if (!ptr) return MZ_ADLER32_INIT;
+  while (buf_len) {
+    for (i = 0; i + 7 < block_len; i += 8, ptr += 8) {
+      s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+      s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+    }
+    for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1;
+    s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+  }
+  return (s2 << 16) + s1;
+}
+
+// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/
+mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
+{
+  static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
+    0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
+  mz_uint32 crcu32 = (mz_uint32)crc;
+  if (!ptr) return MZ_CRC32_INIT;
+  crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; }
+  return ~crcu32;
+}
+
+void mz_free(void *p)
+{
+  MZ_FREE(p);
+}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+static void *def_alloc_func(void *opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); }
+static void def_free_func(void *opaque, void *address) { (void)opaque, (void)address; MZ_FREE(address); }
+static void *def_realloc_func(void *opaque, void *address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); }
+
+const char *mz_version(void)
+{
+  return MZ_VERSION;
+}
+
+int mz_deflateInit(mz_streamp pStream, int level)
+{
+  return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY);
+}
+
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy)
+{
+  tdefl_compressor *pComp;
+  mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);
+
+  if (!pStream) return MZ_STREAM_ERROR;
+  if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR;
+
+  pStream->data_type = 0;
+  pStream->adler = MZ_ADLER32_INIT;
+  pStream->msg = NULL;
+  pStream->reserved = 0;
+  pStream->total_in = 0;
+  pStream->total_out = 0;
+  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
+  if (!pStream->zfree) pStream->zfree = def_free_func;
+
+  pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor));
+  if (!pComp)
+    return MZ_MEM_ERROR;
+
+  pStream->state = (struct mz_internal_state *)pComp;
+
+  if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY)
+  {
+    mz_deflateEnd(pStream);
+    return MZ_PARAM_ERROR;
+  }
+
+  return MZ_OK;
+}
+
+int mz_deflateReset(mz_streamp pStream)
+{
+  if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR;
+  pStream->total_in = pStream->total_out = 0;
+  tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags);
+  return MZ_OK;
+}
+
+int mz_deflate(mz_streamp pStream, int flush)
+{
+  size_t in_bytes, out_bytes;
+  mz_ulong orig_total_in, orig_total_out;
+  int mz_status = MZ_OK;
+
+  if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR;
+  if (!pStream->avail_out) return MZ_BUF_ERROR;
+
+  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
+
+  if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE)
+    return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;
+
+  orig_total_in = pStream->total_in; orig_total_out = pStream->total_out;
+  for ( ; ; )
+  {
+    tdefl_status defl_status;
+    in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+
+    defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush);
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+    pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state);
+
+    pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes;
+    pStream->total_out += (mz_uint)out_bytes;
+
+    if (defl_status < 0)
+    {
+      mz_status = MZ_STREAM_ERROR;
+      break;
+    }
+    else if (defl_status == TDEFL_STATUS_DONE)
+    {
+      mz_status = MZ_STREAM_END;
+      break;
+    }
+    else if (!pStream->avail_out)
+      break;
+    else if ((!pStream->avail_in) && (flush != MZ_FINISH))
+    {
+      if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out))
+        break;
+      return MZ_BUF_ERROR; // Can't make forward progress without some input.
+    }
+  }
+  return mz_status;
+}
+
+int mz_deflateEnd(mz_streamp pStream)
+{
+  if (!pStream) return MZ_STREAM_ERROR;
+  if (pStream->state)
+  {
+    pStream->zfree(pStream->opaque, pStream->state);
+    pStream->state = NULL;
+  }
+  return MZ_OK;
+}
+
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len)
+{
+  (void)pStream;
+  // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.)
+  return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5);
+}
+
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level)
+{
+  int status;
+  mz_stream stream;
+  memset(&stream, 0, sizeof(stream));
+
+  // In case mz_ulong is 64-bits (argh I hate longs).
+  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
+
+  stream.next_in = pSource;
+  stream.avail_in = (mz_uint32)source_len;
+  stream.next_out = pDest;
+  stream.avail_out = (mz_uint32)*pDest_len;
+
+  status = mz_deflateInit(&stream, level);
+  if (status != MZ_OK) return status;
+
+  status = mz_deflate(&stream, MZ_FINISH);
+  if (status != MZ_STREAM_END)
+  {
+    mz_deflateEnd(&stream);
+    return (status == MZ_OK) ? MZ_BUF_ERROR : status;
+  }
+
+  *pDest_len = stream.total_out;
+  return mz_deflateEnd(&stream);
+}
+
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+  return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION);
+}
+
+mz_ulong mz_compressBound(mz_ulong source_len)
+{
+  return mz_deflateBound(NULL, source_len);
+}
+
+typedef struct
+{
+  tinfl_decompressor m_decomp;
+  mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits;
+  mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
+  tinfl_status m_last_status;
+} inflate_state;
+
+int mz_inflateInit2(mz_streamp pStream, int window_bits)
+{
+  inflate_state *pDecomp;
+  if (!pStream) return MZ_STREAM_ERROR;
+  if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR;
+
+  pStream->data_type = 0;
+  pStream->adler = 0;
+  pStream->msg = NULL;
+  pStream->total_in = 0;
+  pStream->total_out = 0;
+  pStream->reserved = 0;
+  if (!pStream->zalloc) pStream->zalloc = def_alloc_func;
+  if (!pStream->zfree) pStream->zfree = def_free_func;
+
+  pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state));
+  if (!pDecomp) return MZ_MEM_ERROR;
+
+  pStream->state = (struct mz_internal_state *)pDecomp;
+
+  tinfl_init(&pDecomp->m_decomp);
+  pDecomp->m_dict_ofs = 0;
+  pDecomp->m_dict_avail = 0;
+  pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
+  pDecomp->m_first_call = 1;
+  pDecomp->m_has_flushed = 0;
+  pDecomp->m_window_bits = window_bits;
+
+  return MZ_OK;
+}
+
+int mz_inflateInit(mz_streamp pStream)
+{
+   return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS);
+}
+
+int mz_inflate(mz_streamp pStream, int flush)
+{
+  inflate_state* pState;
+  mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
+  size_t in_bytes, out_bytes, orig_avail_in;
+  tinfl_status status;
+
+  if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR;
+  if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH;
+  if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
+
+  pState = (inflate_state*)pStream->state;
+  if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
+  orig_avail_in = pStream->avail_in;
+
+  first_call = pState->m_first_call; pState->m_first_call = 0;
+  if (pState->m_last_status < 0) return MZ_DATA_ERROR;
+
+  if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR;
+  pState->m_has_flushed |= (flush == MZ_FINISH);
+
+  if ((flush == MZ_FINISH) && (first_call))
+  {
+    // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file.
+    decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
+    in_bytes = pStream->avail_in; out_bytes = pStream->avail_out;
+    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags);
+    pState->m_last_status = status;
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes;
+    pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+    pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes;
+
+    if (status < 0)
+      return MZ_DATA_ERROR;
+    else if (status != TINFL_STATUS_DONE)
+    {
+      pState->m_last_status = TINFL_STATUS_FAILED;
+      return MZ_BUF_ERROR;
+    }
+    return MZ_STREAM_END;
+  }
+  // flush != MZ_FINISH then we must assume there's more input.
+  if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;
+
+  if (pState->m_dict_avail)
+  {
+    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+    pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+    pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+    return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+  }
+
+  for ( ; ; )
+  {
+    in_bytes = pStream->avail_in;
+    out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;
+
+    status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
+    pState->m_last_status = status;
+
+    pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes;
+    pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp);
+
+    pState->m_dict_avail = (mz_uint)out_bytes;
+
+    n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
+    memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
+    pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n;
+    pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
+
+    if (status < 0)
+       return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well).
+    else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
+      return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH.
+    else if (flush == MZ_FINISH)
+    {
+       // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH.
+       if (status == TINFL_STATUS_DONE)
+          return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
+       // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong.
+       else if (!pStream->avail_out)
+          return MZ_BUF_ERROR;
+    }
+    else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail))
+      break;
+  }
+
+  return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
+}
+
+int mz_inflateEnd(mz_streamp pStream)
+{
+  if (!pStream)
+    return MZ_STREAM_ERROR;
+  if (pStream->state)
+  {
+    pStream->zfree(pStream->opaque, pStream->state);
+    pStream->state = NULL;
+  }
+  return MZ_OK;
+}
+
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
+{
+  mz_stream stream;
+  int status;
+  memset(&stream, 0, sizeof(stream));
+
+  // In case mz_ulong is 64-bits (argh I hate longs).
+  if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR;
+
+  stream.next_in = pSource;
+  stream.avail_in = (mz_uint32)source_len;
+  stream.next_out = pDest;
+  stream.avail_out = (mz_uint32)*pDest_len;
+
+  status = mz_inflateInit(&stream);
+  if (status != MZ_OK)
+    return status;
+
+  status = mz_inflate(&stream, MZ_FINISH);
+  if (status != MZ_STREAM_END)
+  {
+    mz_inflateEnd(&stream);
+    return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status;
+  }
+  *pDest_len = stream.total_out;
+
+  return mz_inflateEnd(&stream);
+}
+
+const char *mz_error(int err)
+{
+  static struct { int m_err; const char *m_pDesc; } s_error_descs[] =
+  {
+    { MZ_OK, "" },
+    { MZ_STREAM_END, "stream end" },
+    { MZ_NEED_DICT, "need dictionary" },
+    { MZ_ERRNO, "file error" },
+    { MZ_STREAM_ERROR, "stream error" },
+    { MZ_DATA_ERROR, "data error" },
+    { MZ_MEM_ERROR, "out of memory" },
+    { MZ_BUF_ERROR, "buf error" },
+    { MZ_VERSION_ERROR, "version error" },
+    { MZ_PARAM_ERROR, "parameter error" }
+  };
+  mz_uint i;
+  for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i)
+    if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc;
+  return NULL;
+}
+
+#endif //MINIZ_NO_ZLIB_APIS
+
+// ------------------- Low-level Decompression (completely independent from all compression API's)
+
+#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
+#define TINFL_MEMSET(p, c, l) memset(p, c, l)
+
+#define TINFL_CR_BEGIN switch(r->m_state) { case 0:
+#define TINFL_CR_RETURN(state_index, result) do { status = result; r->m_state = state_index; goto common_exit; case state_index:; } MZ_MACRO_END
+#define TINFL_CR_RETURN_FOREVER(state_index, result) do { for ( ; ; ) { TINFL_CR_RETURN(state_index, result); } } MZ_MACRO_END
+#define TINFL_CR_FINISH }
+
+// TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never
+// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario.
+#define TINFL_GET_BYTE(state_index, c) do { \
+  if (pIn_buf_cur >= pIn_buf_end) { \
+    for ( ; ; ) { \
+      if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \
+        TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \
+        if (pIn_buf_cur < pIn_buf_end) { \
+          c = *pIn_buf_cur++; \
+          break; \
+        } \
+      } else { \
+        c = 0; \
+        break; \
+      } \
+    } \
+  } else c = *pIn_buf_cur++; } MZ_MACRO_END
+
+#define TINFL_NEED_BITS(state_index, n) do { mz_uint c; TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; } while (num_bits < (mz_uint)(n))
+#define TINFL_SKIP_BITS(state_index, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+#define TINFL_GET_BITS(state_index, b, n) do { if (num_bits < (mz_uint)(n)) { TINFL_NEED_BITS(state_index, n); } b = bit_buf & ((1 << (n)) - 1); bit_buf >>= (n); num_bits -= (n); } MZ_MACRO_END
+
+// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2.
+// It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a
+// Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the
+// bit buffer contains >=15 bits (deflate's max. Huffman code size).
+#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \
+  do { \
+    temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \
+    if (temp >= 0) { \
+      code_len = temp >> 9; \
+      if ((code_len) && (num_bits >= code_len)) \
+      break; \
+    } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \
+       code_len = TINFL_FAST_LOOKUP_BITS; \
+       do { \
+          temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \
+       } while ((temp < 0) && (num_bits >= (code_len + 1))); if (temp >= 0) break; \
+    } TINFL_GET_BYTE(state_index, c); bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); num_bits += 8; \
+  } while (num_bits < 15);
+
+// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read
+// beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully
+// decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32.
+// The slow path is only executed at the very end of the input buffer.
+#define TINFL_HUFF_DECODE(state_index, sym, pHuff) do { \
+  int temp; mz_uint code_len, c; \
+  if (num_bits < 15) { \
+    if ((pIn_buf_end - pIn_buf_cur) < 2) { \
+       TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \
+    } else { \
+       bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); pIn_buf_cur += 2; num_bits += 16; \
+    } \
+  } \
+  if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \
+    code_len = temp >> 9, temp &= 511; \
+  else { \
+    code_len = TINFL_FAST_LOOKUP_BITS; do { temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; } while (temp < 0); \
+  } sym = temp; bit_buf >>= code_len; num_bits -= code_len; } MZ_MACRO_END
+
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags)
+{
+  static const int s_length_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 };
+  static const int s_length_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+  static const int s_dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
+  static const int s_dist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+  static const mz_uint8 s_length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+  static const int s_min_table_sizes[3] = { 257, 1, 4 };
+
+  tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf;
+  const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
+  mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size;
+  size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start;
+
+  // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter).
+  if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; }
+
+  num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start;
+  TINFL_CR_BEGIN
+
+  bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1;
+  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+  {
+    TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1);
+    counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
+    if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4)))));
+    if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); }
+  }
+
+  do
+  {
+    TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1;
+    if (r->m_type == 0)
+    {
+      TINFL_SKIP_BITS(5, num_bits & 7);
+      for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); }
+      if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); }
+      while ((counter) && (num_bits))
+      {
+        TINFL_GET_BITS(51, dist, 8);
+        while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); }
+        *pOut_buf_cur++ = (mz_uint8)dist;
+        counter--;
+      }
+      while (counter)
+      {
+        size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); }
+        while (pIn_buf_cur >= pIn_buf_end)
+        {
+          if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT)
+          {
+            TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT);
+          }
+          else
+          {
+            TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED);
+          }
+        }
+        n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter);
+        TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n;
+      }
+    }
+    else if (r->m_type == 3)
+    {
+      TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
+    }
+    else
+    {
+      if (r->m_type == 1)
+      {
+        mz_uint8 *p = r->m_tables[0].m_code_size; mz_uint i;
+        r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32);
+        for ( i = 0; i <= 143; ++i) *p++ = 8; for ( ; i <= 255; ++i) *p++ = 9; for ( ; i <= 279; ++i) *p++ = 7; for ( ; i <= 287; ++i) *p++ = 8;
+      }
+      else
+      {
+        for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; }
+        MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; }
+        r->m_table_sizes[2] = 19;
+      }
+      for ( ; (int)r->m_type >= 0; r->m_type--)
+      {
+        int tree_next, tree_cur; tinfl_huff_table *pTable;
+        mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree);
+        for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++;
+        used_syms = 0, total = 0; next_code[0] = next_code[1] = 0;
+        for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); }
+        if ((65536 != total) && (used_syms > 1))
+        {
+          TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
+        }
+        for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index)
+        {
+          mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue;
+          cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1);
+          if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; }
+          if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; }
+          rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
+          for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--)
+          {
+            tree_cur -= ((rev_code >>= 1) & 1);
+            if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1];
+          }
+          tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index;
+        }
+        if (r->m_type == 2)
+        {
+          for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]); )
+          {
+            mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; }
+            if ((dist == 16) && (!counter))
+            {
+              TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
+            }
+            num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16];
+            TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s;
+          }
+          if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
+          {
+            TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
+          }
+          TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
+        }
+      }
+      for ( ; ; )
+      {
+        mz_uint8 *pSrc;
+        for ( ; ; )
+        {
+          if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2))
+          {
+            TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]);
+            if (counter >= 256)
+              break;
+            while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); }
+            *pOut_buf_cur++ = (mz_uint8)counter;
+          }
+          else
+          {
+            int sym2; mz_uint code_len;
+#if TINFL_USE_64BIT_BITBUF
+            if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; }
+#else
+            if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+            if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+              code_len = sym2 >> 9;
+            else
+            {
+              code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0);
+            }
+            counter = sym2; bit_buf >>= code_len; num_bits -= code_len;
+            if (counter & 256)
+              break;
+
+#if !TINFL_USE_64BIT_BITBUF
+            if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; }
+#endif
+            if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
+              code_len = sym2 >> 9;
+            else
+            {
+              code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0);
+            }
+            bit_buf >>= code_len; num_bits -= code_len;
+
+            pOut_buf_cur[0] = (mz_uint8)counter;
+            if (sym2 & 256)
+            {
+              pOut_buf_cur++;
+              counter = sym2;
+              break;
+            }
+            pOut_buf_cur[1] = (mz_uint8)sym2;
+            pOut_buf_cur += 2;
+          }
+        }
+        if ((counter &= 511) == 256) break;
+
+        num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257];
+        if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; }
+
+        TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]);
+        num_extra = s_dist_extra[dist]; dist = s_dist_base[dist];
+        if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; }
+
+        dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
+        if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
+        {
+          TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
+        }
+
+        pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask);
+
+        if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end)
+        {
+          while (counter--)
+          {
+            while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); }
+            *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask];
+          }
+          continue;
+        }
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+        else if ((counter >= 9) && (counter <= dist))
+        {
+          const mz_uint8 *pSrc_end = pSrc + (counter & ~7);
+          do
+          {
+            ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0];
+            ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1];
+            pOut_buf_cur += 8;
+          } while ((pSrc += 8) < pSrc_end);
+          if ((counter &= 7) < 3)
+          {
+            if (counter)
+            {
+              pOut_buf_cur[0] = pSrc[0];
+              if (counter > 1)
+                pOut_buf_cur[1] = pSrc[1];
+              pOut_buf_cur += counter;
+            }
+            continue;
+          }
+        }
+#endif
+        do
+        {
+          pOut_buf_cur[0] = pSrc[0];
+          pOut_buf_cur[1] = pSrc[1];
+          pOut_buf_cur[2] = pSrc[2];
+          pOut_buf_cur += 3; pSrc += 3;
+        } while ((int)(counter -= 3) > 2);
+        if ((int)counter > 0)
+        {
+          pOut_buf_cur[0] = pSrc[0];
+          if ((int)counter > 1)
+            pOut_buf_cur[1] = pSrc[1];
+          pOut_buf_cur += counter;
+        }
+      }
+    }
+  } while (!(r->m_final & 1));
+  if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
+  {
+    TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; }
+  }
+  TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);
+  TINFL_CR_FINISH
+
+common_exit:
+  r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start;
+  *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
+  if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0))
+  {
+    const mz_uint8 *ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size;
+    mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552;
+    while (buf_len)
+    {
+      for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
+      {
+        s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1;
+        s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1;
+      }
+      for ( ; i < block_len; ++i) s1 += *ptr++, s2 += s1;
+      s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552;
+    }
+    r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH;
+  }
+  return status;
+}
+
+// Higher level helper functions.
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+  tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0;
+  *pOut_len = 0;
+  tinfl_init(&decomp);
+  for ( ; ; )
+  {
+    size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
+    tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size,
+      (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+    if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT))
+    {
+      MZ_FREE(pBuf); *pOut_len = 0; return NULL;
+    }
+    src_buf_ofs += src_buf_size;
+    *pOut_len += dst_buf_size;
+    if (status == TINFL_STATUS_DONE) break;
+    new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128;
+    pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
+    if (!pNew_buf)
+    {
+      MZ_FREE(pBuf); *pOut_len = 0; return NULL;
+    }
+    pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity;
+  }
+  return pBuf;
+}
+
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+  tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp);
+  status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
+  return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len;
+}
+
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  int result = 0;
+  tinfl_decompressor decomp;
+  mz_uint8 *pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0;
+  if (!pDict)
+    return TINFL_STATUS_FAILED;
+  tinfl_init(&decomp);
+  for ( ; ; )
+  {
+    size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
+    tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
+      (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
+    in_buf_ofs += in_buf_size;
+    if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user)))
+      break;
+    if (status != TINFL_STATUS_HAS_MORE_OUTPUT)
+    {
+      result = (status == TINFL_STATUS_DONE);
+      break;
+    }
+    dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
+  }
+  MZ_FREE(pDict);
+  *pIn_buf_size = in_buf_ofs;
+  return result;
+}
+
+// ------------------- Low-level Compression (independent from all decompression API's)
+
+// Purposely making these tables static for faster init and thread safety.
+static const mz_uint16 s_tdefl_len_sym[256] = {
+  257,258,259,260,261,262,263,264,265,265,266,266,267,267,268,268,269,269,269,269,270,270,270,270,271,271,271,271,272,272,272,272,
+  273,273,273,273,273,273,273,273,274,274,274,274,274,274,274,274,275,275,275,275,275,275,275,275,276,276,276,276,276,276,276,276,
+  277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,277,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,278,
+  279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,279,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,280,
+  281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,281,
+  282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,282,
+  283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,283,
+  284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,284,285 };
+
+static const mz_uint8 s_tdefl_len_extra[256] = {
+  0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+  4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0 };
+
+static const mz_uint8 s_tdefl_small_dist_sym[512] = {
+  0,1,2,3,4,4,5,5,6,6,6,6,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,
+  11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,
+  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,14,14,14,
+  14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,
+  14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
+  15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,
+  16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,
+  17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17 };
+
+static const mz_uint8 s_tdefl_small_dist_extra[512] = {
+  0,0,0,0,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7 };
+
+static const mz_uint8 s_tdefl_large_dist_sym[128] = {
+  0,0,18,19,20,20,21,21,22,22,22,22,23,23,23,23,24,24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,26,26,26,26,
+  26,26,26,26,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,
+  28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29,29 };
+
+static const mz_uint8 s_tdefl_large_dist_extra[128] = {
+  0,0,8,8,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,
+  12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
+  13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13 };
+
+// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values.
+typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq;
+static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1)
+{
+  mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist);
+  for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; }
+  while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--;
+  for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
+  {
+    const mz_uint32* pHist = &hist[pass << 8];
+    mz_uint offsets[256], cur_ofs = 0;
+    for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
+    for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
+    { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; }
+  }
+  return pCur_syms;
+}
+
+// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
+static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n)
+{
+  int root, leaf, next, avbl, used, dpth;
+  if (n==0) return; else if (n==1) { A[0].m_key = 1; return; }
+  A[0].m_key += A[1].m_key; root = 0; leaf = 2;
+  for (next=1; next < n-1; next++)
+  {
+    if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; } else A[next].m_key = A[leaf++].m_key;
+    if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); A[root++].m_key = (mz_uint16)next; } else A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key);
+  }
+  A[n-2].m_key = 0; for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
+  avbl = 1; used = dpth = 0; root = n-2; next = n-1;
+  while (avbl>0)
+  {
+    while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; }
+    while (avbl>used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; }
+    avbl = 2*used; dpth++; used = 0;
+  }
+}
+
+// Limits canonical Huffman code table's max code size.
+enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 };
+static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
+{
+  int i; mz_uint32 total = 0; if (code_list_len <= 1) return;
+  for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
+  for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i));
+  while (total != (1UL << max_code_size))
+  {
+    pNum_codes[max_code_size]--;
+    for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
+    total--;
+  }
+}
+
+static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table)
+{
+  int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes);
+  if (static_table)
+  {
+    for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++;
+  }
+  else
+  {
+    tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms;
+    int num_used_syms = 0;
+    const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
+    for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; }
+
+    pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);
+
+    for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
+
+    tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit);
+
+    MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]);
+    for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
+      for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i);
+  }
+
+  next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1);
+
+  for (i = 0; i < table_len; i++)
+  {
+    mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue;
+    code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1);
+    d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
+  }
+}
+
+#define TDEFL_PUT_BITS(b, l) do { \
+  mz_uint bits = b; mz_uint len = l; MZ_ASSERT(bits <= ((1U << len) - 1U)); \
+  d->m_bit_buffer |= (bits << d->m_bits_in); d->m_bits_in += len; \
+  while (d->m_bits_in >= 8) { \
+    if (d->m_pOutput_buf < d->m_pOutput_buf_end) \
+      *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
+      d->m_bit_buffer >>= 8; \
+      d->m_bits_in -= 8; \
+  } \
+} MZ_MACRO_END
+
+#define TDEFL_RLE_PREV_CODE_SIZE() { if (rle_repeat_count) { \
+  if (rle_repeat_count < 3) { \
+    d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
+    while (rle_repeat_count--) packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \
+  } else { \
+    d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); packed_code_sizes[num_packed_code_sizes++] = 16; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \
+} rle_repeat_count = 0; } }
+
+#define TDEFL_RLE_ZERO_CODE_SIZE() { if (rle_z_count) { \
+  if (rle_z_count < 3) { \
+    d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \
+  } else if (rle_z_count <= 10) { \
+    d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); packed_code_sizes[num_packed_code_sizes++] = 17; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \
+  } else { \
+    d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); packed_code_sizes[num_packed_code_sizes++] = 18; packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \
+} rle_z_count = 0; } }
+
+static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
+
+static void tdefl_start_dynamic_block(tdefl_compressor *d)
+{
+  int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index;
+  mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF;
+
+  d->m_huff_count[0][256] = 1;
+
+  tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
+  tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);
+
+  for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break;
+  for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break;
+
+  memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
+  memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes);
+  total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0;
+
+  memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
+  for (i = 0; i < total_code_sizes_to_pack; i++)
+  {
+    mz_uint8 code_size = code_sizes_to_pack[i];
+    if (!code_size)
+    {
+      TDEFL_RLE_PREV_CODE_SIZE();
+      if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); }
+    }
+    else
+    {
+      TDEFL_RLE_ZERO_CODE_SIZE();
+      if (code_size != prev_code_size)
+      {
+        TDEFL_RLE_PREV_CODE_SIZE();
+        d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size;
+      }
+      else if (++rle_repeat_count == 6)
+      {
+        TDEFL_RLE_PREV_CODE_SIZE();
+      }
+    }
+    prev_code_size = code_size;
+  }
+  if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); }
+
+  tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);
+
+  TDEFL_PUT_BITS(2, 2);
+
+  TDEFL_PUT_BITS(num_lit_codes - 257, 5);
+  TDEFL_PUT_BITS(num_dist_codes - 1, 5);
+
+  for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break;
+  num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
+  for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);
+
+  for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes; )
+  {
+    mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
+    TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
+    if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]);
+  }
+}
+
+static void tdefl_start_static_block(tdefl_compressor *d)
+{
+  mz_uint i;
+  mz_uint8 *p = &d->m_huff_code_sizes[0][0];
+
+  for (i = 0; i <= 143; ++i) *p++ = 8;
+  for ( ; i <= 255; ++i) *p++ = 9;
+  for ( ; i <= 279; ++i) *p++ = 7;
+  for ( ; i <= 287; ++i) *p++ = 8;
+
+  memset(d->m_huff_code_sizes[1], 5, 32);
+
+  tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
+  tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);
+
+  TDEFL_PUT_BITS(1, 2);
+}
+
+static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+  mz_uint flags;
+  mz_uint8 *pLZ_codes;
+  mz_uint8 *pOutput_buf = d->m_pOutput_buf;
+  mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
+  mz_uint64 bit_buffer = d->m_bit_buffer;
+  mz_uint bits_in = d->m_bits_in;
+
+#define TDEFL_PUT_BITS_FAST(b, l) { bit_buffer |= (((mz_uint64)(b)) << bits_in); bits_in += (l); }
+
+  flags = 1;
+  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1)
+  {
+    if (flags == 1)
+      flags = *pLZ_codes++ | 0x100;
+
+    if (flags & 1)
+    {
+      mz_uint s0, s1, n0, n1, sym, num_extra_bits;
+      mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16 *)(pLZ_codes + 1); pLZ_codes += 3;
+
+      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+      // This sequence coaxes MSVC into using cmov's vs. jmp's.
+      s0 = s_tdefl_small_dist_sym[match_dist & 511];
+      n0 = s_tdefl_small_dist_extra[match_dist & 511];
+      s1 = s_tdefl_large_dist_sym[match_dist >> 8];
+      n1 = s_tdefl_large_dist_extra[match_dist >> 8];
+      sym = (match_dist < 512) ? s0 : s1;
+      num_extra_bits = (match_dist < 512) ? n0 : n1;
+
+      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+    }
+    else
+    {
+      mz_uint lit = *pLZ_codes++;
+      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+      TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+      if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+      {
+        flags >>= 1;
+        lit = *pLZ_codes++;
+        MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+        TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+
+        if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
+        {
+          flags >>= 1;
+          lit = *pLZ_codes++;
+          MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+          TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+        }
+      }
+    }
+
+    if (pOutput_buf >= d->m_pOutput_buf_end)
+      return MZ_FALSE;
+
+    *(mz_uint64*)pOutput_buf = bit_buffer;
+    pOutput_buf += (bits_in >> 3);
+    bit_buffer >>= (bits_in & ~7);
+    bits_in &= 7;
+  }
+
+#undef TDEFL_PUT_BITS_FAST
+
+  d->m_pOutput_buf = pOutput_buf;
+  d->m_bits_in = 0;
+  d->m_bit_buffer = 0;
+
+  while (bits_in)
+  {
+    mz_uint32 n = MZ_MIN(bits_in, 16);
+    TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n);
+    bit_buffer >>= n;
+    bits_in -= n;
+  }
+
+  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#else
+static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
+{
+  mz_uint flags;
+  mz_uint8 *pLZ_codes;
+
+  flags = 1;
+  for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1)
+  {
+    if (flags == 1)
+      flags = *pLZ_codes++ | 0x100;
+    if (flags & 1)
+    {
+      mz_uint sym, num_extra_bits;
+      mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3;
+
+      MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
+      TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);
+
+      if (match_dist < 512)
+      {
+        sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist];
+      }
+      else
+      {
+        sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
+      }
+      MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
+      TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
+    }
+    else
+    {
+      mz_uint lit = *pLZ_codes++;
+      MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
+      TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
+    }
+  }
+
+  TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);
+
+  return (d->m_pOutput_buf < d->m_pOutput_buf_end);
+}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
+
+static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block)
+{
+  if (static_block)
+    tdefl_start_static_block(d);
+  else
+    tdefl_start_dynamic_block(d);
+  return tdefl_compress_lz_codes(d);
+}
+
+static int tdefl_flush_block(tdefl_compressor *d, int flush)
+{
+  mz_uint saved_bit_buf, saved_bits_in;
+  mz_uint8 *pSaved_output_buf;
+  mz_bool comp_block_succeeded = MZ_FALSE;
+  int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
+  mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf;
+
+  d->m_pOutput_buf = pOutput_buf_start;
+  d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;
+
+  MZ_ASSERT(!d->m_output_flush_remaining);
+  d->m_output_flush_ofs = 0;
+  d->m_output_flush_remaining = 0;
+
+  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left);
+  d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);
+
+  if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index))
+  {
+    TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8);
+  }
+
+  TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);
+
+  pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in;
+
+  if (!use_raw_block)
+    comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48));
+
+  // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead.
+  if ( ((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) &&
+       ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size) )
+  {
+    mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+    TDEFL_PUT_BITS(0, 2);
+    if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); }
+    for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF)
+    {
+      TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
+    }
+    for (i = 0; i < d->m_total_lz_bytes; ++i)
+    {
+      TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8);
+    }
+  }
+  // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes.
+  else if (!comp_block_succeeded)
+  {
+    d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
+    tdefl_compress_block(d, MZ_TRUE);
+  }
+
+  if (flush)
+  {
+    if (flush == TDEFL_FINISH)
+    {
+      if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); }
+      if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } }
+    }
+    else
+    {
+      mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); }
+    }
+  }
+
+  MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);
+
+  memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+  memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+
+  d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++;
+
+  if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0)
+  {
+    if (d->m_pPut_buf_func)
+    {
+      *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+      if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
+        return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
+    }
+    else if (pOutput_buf_start == d->m_output_buf)
+    {
+      int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
+      memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy);
+      d->m_out_buf_ofs += bytes_to_copy;
+      if ((n -= bytes_to_copy) != 0)
+      {
+        d->m_output_flush_ofs = bytes_to_copy;
+        d->m_output_flush_remaining = n;
+      }
+    }
+    else
+    {
+      d->m_out_buf_ofs += n;
+    }
+  }
+
+  return d->m_output_flush_remaining;
+}
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p)
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+  const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q;
+  mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s);
+  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return;
+  for ( ; ; )
+  {
+    for ( ; ; )
+    {
+      if (--num_probes_left == 0) return;
+      #define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) break;
+      TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+    }
+    if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32;
+    do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
+                   (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
+    if (!probe_len)
+    {
+      *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break;
+    }
+    else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len)
+    {
+      *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break;
+      c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
+    }
+  }
+}
+#else
+static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
+{
+  mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
+  mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
+  const mz_uint8 *s = d->m_dict + pos, *p, *q;
+  mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
+  MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return;
+  for ( ; ; )
+  {
+    for ( ; ; )
+    {
+      if (--num_probes_left == 0) return;
+      #define TDEFL_PROBE \
+        next_probe_pos = d->m_next[probe_pos]; \
+        if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) return; \
+        probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \
+        if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) break;
+      TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE;
+    }
+    if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break;
+    if (probe_len > match_len)
+    {
+      *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return;
+      c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1];
+    }
+  }
+}
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+static mz_bool tdefl_compress_fast(tdefl_compressor *d)
+{
+  // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio.
+  mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left;
+  mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
+  mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+
+  while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size)))
+  {
+    const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
+    mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+    mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
+    d->m_src_buf_left -= num_bytes_to_process;
+    lookahead_size += num_bytes_to_process;
+
+    while (num_bytes_to_process)
+    {
+      mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
+      memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
+      if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+        memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
+      d->m_pSrc += n;
+      dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
+      num_bytes_to_process -= n;
+    }
+
+    dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
+    if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break;
+
+    while (lookahead_size >= 4)
+    {
+      mz_uint cur_match_dist, cur_match_len = 1;
+      mz_uint8 *pCur_dict = d->m_dict + cur_pos;
+      mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF;
+      mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK;
+      mz_uint probe_pos = d->m_hash[hash];
+      d->m_hash[hash] = (mz_uint16)lookahead_pos;
+
+      if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32 *)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram))
+      {
+        const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
+        const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos);
+        mz_uint32 probe_len = 32;
+        do { } while ( (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) &&
+          (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0) );
+        cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q);
+        if (!probe_len)
+          cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;
+
+        if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)))
+        {
+          cur_match_len = 1;
+          *pLZ_code_buf++ = (mz_uint8)first_trigram;
+          *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+          d->m_huff_count[0][(mz_uint8)first_trigram]++;
+        }
+        else
+        {
+          mz_uint32 s0, s1;
+          cur_match_len = MZ_MIN(cur_match_len, lookahead_size);
+
+          MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE));
+
+          cur_match_dist--;
+
+          pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN);
+          *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist;
+          pLZ_code_buf += 3;
+          *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80);
+
+          s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
+          s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
+          d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;
+
+          d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++;
+        }
+      }
+      else
+      {
+        *pLZ_code_buf++ = (mz_uint8)first_trigram;
+        *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+        d->m_huff_count[0][(mz_uint8)first_trigram]++;
+      }
+
+      if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+      total_lz_bytes += cur_match_len;
+      lookahead_pos += cur_match_len;
+      dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE);
+      cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
+      MZ_ASSERT(lookahead_size >= cur_match_len);
+      lookahead_size -= cur_match_len;
+
+      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+      {
+        int n;
+        d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+        d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+        if ((n = tdefl_flush_block(d, 0)) != 0)
+          return (n < 0) ? MZ_FALSE : MZ_TRUE;
+        total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+      }
+    }
+
+    while (lookahead_size)
+    {
+      mz_uint8 lit = d->m_dict[cur_pos];
+
+      total_lz_bytes++;
+      *pLZ_code_buf++ = lit;
+      *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
+      if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; }
+
+      d->m_huff_count[0][lit]++;
+
+      lookahead_pos++;
+      dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE);
+      cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
+      lookahead_size--;
+
+      if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
+      {
+        int n;
+        d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+        d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+        if ((n = tdefl_flush_block(d, 0)) != 0)
+          return (n < 0) ? MZ_FALSE : MZ_TRUE;
+        total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left;
+      }
+    }
+  }
+
+  d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size;
+  d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left;
+  return MZ_TRUE;
+}
+#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+
+static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit)
+{
+  d->m_total_lz_bytes++;
+  *d->m_pLZ_code_buf++ = lit;
+  *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+  d->m_huff_count[0][lit]++;
+}
+
+static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist)
+{
+  mz_uint32 s0, s1;
+
+  MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE));
+
+  d->m_total_lz_bytes += match_len;
+
+  d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN);
+
+  match_dist -= 1;
+  d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF);
+  d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3;
+
+  *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; }
+
+  s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
+  d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
+
+  if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
+}
+
+static mz_bool tdefl_compress_normal(tdefl_compressor *d)
+{
+  const mz_uint8 *pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left;
+  tdefl_flush flush = d->m_flush;
+
+  while ((src_buf_left) || ((flush) && (d->m_lookahead_size)))
+  {
+    mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
+    // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN.
+    if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1))
+    {
+      mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
+      mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
+      mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
+      const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process;
+      src_buf_left -= num_bytes_to_process;
+      d->m_lookahead_size += num_bytes_to_process;
+      while (pSrc != pSrc_end)
+      {
+        mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+        hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+        d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos);
+        dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++;
+      }
+    }
+    else
+    {
+      while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+      {
+        mz_uint8 c = *pSrc++;
+        mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
+        src_buf_left--;
+        d->m_dict[dst_pos] = c;
+        if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
+          d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
+        if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN)
+        {
+          mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
+          mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
+          d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos);
+        }
+      }
+    }
+    d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
+    if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
+      break;
+
+    // Simple lazy/greedy parsing state machine.
+    len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
+    if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS))
+    {
+      if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))
+      {
+        mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
+        cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; }
+        if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1;
+      }
+    }
+    else
+    {
+      tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len);
+    }
+    if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U*1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5)))
+    {
+      cur_match_dist = cur_match_len = 0;
+    }
+    if (d->m_saved_match_len)
+    {
+      if (cur_match_len > d->m_saved_match_len)
+      {
+        tdefl_record_literal(d, (mz_uint8)d->m_saved_lit);
+        if (cur_match_len >= 128)
+        {
+          tdefl_record_match(d, cur_match_len, cur_match_dist);
+          d->m_saved_match_len = 0; len_to_move = cur_match_len;
+        }
+        else
+        {
+          d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+        }
+      }
+      else
+      {
+        tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
+        len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0;
+      }
+    }
+    else if (!cur_match_dist)
+      tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
+    else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128))
+    {
+      tdefl_record_match(d, cur_match_len, cur_match_dist);
+      len_to_move = cur_match_len;
+    }
+    else
+    {
+      d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len;
+    }
+    // Move the lookahead forward by len_to_move bytes.
+    d->m_lookahead_pos += len_to_move;
+    MZ_ASSERT(d->m_lookahead_size >= len_to_move);
+    d->m_lookahead_size -= len_to_move;
+    d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE);
+    // Check if it's time to flush the current LZ codes to the internal output buffer.
+    if ( (d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
+         ( (d->m_total_lz_bytes > 31*1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) )
+    {
+      int n;
+      d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+      if ((n = tdefl_flush_block(d, 0)) != 0)
+        return (n < 0) ? MZ_FALSE : MZ_TRUE;
+    }
+  }
+
+  d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left;
+  return MZ_TRUE;
+}
+
+static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d)
+{
+  if (d->m_pIn_buf_size)
+  {
+    *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
+  }
+
+  if (d->m_pOut_buf_size)
+  {
+    size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining);
+    memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n);
+    d->m_output_flush_ofs += (mz_uint)n;
+    d->m_output_flush_remaining -= (mz_uint)n;
+    d->m_out_buf_ofs += n;
+
+    *d->m_pOut_buf_size = d->m_out_buf_ofs;
+  }
+
+  return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush)
+{
+  if (!d)
+  {
+    if (pIn_buf_size) *pIn_buf_size = 0;
+    if (pOut_buf_size) *pOut_buf_size = 0;
+    return TDEFL_STATUS_BAD_PARAM;
+  }
+
+  d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size;
+  d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size;
+  d->m_pSrc = (const mz_uint8 *)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
+  d->m_out_buf_ofs = 0;
+  d->m_flush = flush;
+
+  if ( ((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) ||
+        (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf) )
+  {
+    if (pIn_buf_size) *pIn_buf_size = 0;
+    if (pOut_buf_size) *pOut_buf_size = 0;
+    return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
+  }
+  d->m_wants_to_finish |= (flush == TDEFL_FINISH);
+
+  if ((d->m_output_flush_remaining) || (d->m_finished))
+    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+
+#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) &&
+      ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) &&
+      ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0))
+  {
+    if (!tdefl_compress_fast(d))
+      return d->m_prev_return_status;
+  }
+  else
+#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
+  {
+    if (!tdefl_compress_normal(d))
+      return d->m_prev_return_status;
+  }
+
+  if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf))
+    d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf);
+
+  if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining))
+  {
+    if (tdefl_flush_block(d, flush) < 0)
+      return d->m_prev_return_status;
+    d->m_finished = (flush == TDEFL_FINISH);
+    if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; }
+  }
+
+  return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
+}
+
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush)
+{
+  MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
+}
+
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user;
+  d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
+  d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
+  if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash);
+  d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
+  d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
+  d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8;
+  d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY;
+  d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1;
+  d->m_pIn_buf = NULL; d->m_pOut_buf = NULL;
+  d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL;
+  d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0;
+  memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
+  memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
+  return TDEFL_STATUS_OKAY;
+}
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d)
+{
+  return d->m_prev_return_status;
+}
+
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d)
+{
+  return d->m_adler32;
+}
+
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
+{
+  tdefl_compressor *pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE;
+  pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE;
+  succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY);
+  succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE);
+  MZ_FREE(pComp); return succeeded;
+}
+
+typedef struct
+{
+  size_t m_size, m_capacity;
+  mz_uint8 *m_pBuf;
+  mz_bool m_expandable;
+} tdefl_output_buffer;
+
+static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser)
+{
+  tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
+  size_t new_size = p->m_size + len;
+  if (new_size > p->m_capacity)
+  {
+    size_t new_capacity = p->m_capacity; mz_uint8 *pNew_buf; if (!p->m_expandable) return MZ_FALSE;
+    do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity);
+    pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE;
+    p->m_pBuf = pNew_buf; p->m_capacity = new_capacity;
+  }
+  memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size;
+  return MZ_TRUE;
+}
+
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
+{
+  tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf);
+  if (!pOut_len) return MZ_FALSE; else *pOut_len = 0;
+  out_buf.m_expandable = MZ_TRUE;
+  if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL;
+  *pOut_len = out_buf.m_size; return out_buf.m_pBuf;
+}
+
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
+{
+  tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf);
+  if (!pOut_buf) return 0;
+  out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len;
+  if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0;
+  return out_buf.m_size;
+}
+
+#ifndef MINIZ_NO_ZLIB_APIS
+static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+
+// level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files).
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy)
+{
+  mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
+  if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER;
+
+  if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
+  else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES;
+  else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK;
+  else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
+  else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES;
+
+  return comp_flags;
+}
+#endif //MINIZ_NO_ZLIB_APIS
+
+#ifdef _MSC_VER
+#pragma warning (push)
+#pragma warning (disable:4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal)
+#endif
+
+// Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at
+// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
+// This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck.
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip)
+{
+  // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined.
+  static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32,  16, 32, 128, 256,  512, 768, 1500 };
+  tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0;
+  if (!pComp) return NULL;
+  MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57+MZ_MAX(64, (1+bpl)*h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; }
+  // write dummy header
+  for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf);
+  // compress image data
+  tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER);
+  for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH); }
+  if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; }
+  // write real header
+  *pLen_out = out_buf.m_size-41;
+  {
+    static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06};
+    mz_uint8 pnghdr[41]={0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x00,0x00,0x00,0x0d,0x49,0x48,0x44,0x52,
+      0,0,(mz_uint8)(w>>8),(mz_uint8)w,0,0,(mz_uint8)(h>>8),(mz_uint8)h,8,chans[num_chans],0,0,0,0,0,0,0,
+      (mz_uint8)(*pLen_out>>24),(mz_uint8)(*pLen_out>>16),(mz_uint8)(*pLen_out>>8),(mz_uint8)*pLen_out,0x49,0x44,0x41,0x54};
+    c=(mz_uint32)mz_crc32(MZ_CRC32_INIT,pnghdr+12,17); for (i=0; i<4; ++i, c<<=8) ((mz_uint8*)(pnghdr+29))[i]=(mz_uint8)(c>>24);
+    memcpy(out_buf.m_pBuf, pnghdr, 41);
+  }
+  // write footer (IDAT CRC-32, followed by IEND chunk)
+  if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; }
+  c = (mz_uint32)mz_crc32(MZ_CRC32_INIT,out_buf.m_pBuf+41-4, *pLen_out+4); for (i=0; i<4; ++i, c<<=8) (out_buf.m_pBuf+out_buf.m_size-16)[i] = (mz_uint8)(c >> 24);
+  // compute final size of file, grab compressed data buffer and return
+  *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf;
+}
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out)
+{
+  // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out)
+  return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE);
+}
+
+#ifdef _MSC_VER
+#pragma warning (pop)
+#endif
+
+// ------------------- .ZIP archive reading
+
+#ifndef MINIZ_NO_ARCHIVE_APIS
+
+#ifdef MINIZ_NO_STDIO
+  #define MZ_FILE void *
+#else
+  #include <stdio.h>
+  #include <sys/stat.h>
+
+  #if defined(_MSC_VER) //|| defined(__MINGW64__)
+    static FILE *mz_fopen(const char *pFilename, const char *pMode)
+    {
+      FILE* pFile = NULL;
+      fopen_s(&pFile, pFilename, pMode);
+      return pFile;
+    }
+    static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream)
+    {
+      FILE* pFile = NULL;
+      if (freopen_s(&pFile, pPath, pMode, pStream))
+        return NULL;
+      return pFile;
+    }
+    #ifndef MINIZ_NO_TIME
+      #include <sys/utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN mz_fopen
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 _ftelli64
+    #define MZ_FSEEK64 _fseeki64
+    #define MZ_FILE_STAT_STRUCT _stat
+    #define MZ_FILE_STAT _stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN mz_freopen
+    #define MZ_DELETE_FILE remove
+  #elif defined(__MINGW32__) || defined(__MINGW64__)
+    #ifndef MINIZ_NO_TIME
+      #include <sys/utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftello64
+    #define MZ_FSEEK64 fseeko64
+    #define MZ_FILE_STAT_STRUCT _stat
+    #define MZ_FILE_STAT _stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+    #define MZ_DELETE_FILE remove
+  #elif defined(__TINYC__)
+    #ifndef MINIZ_NO_TIME
+      #include <sys/utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftell
+    #define MZ_FSEEK64 fseek
+    #define MZ_FILE_STAT_STRUCT stat
+    #define MZ_FILE_STAT stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+    #define MZ_DELETE_FILE remove
+  #elif defined(__GNUC__) && _LARGEFILE64_SOURCE
+    #ifndef MINIZ_NO_TIME
+      #include <utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen64(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftello64
+    #define MZ_FSEEK64 fseeko64
+    #define MZ_FILE_STAT_STRUCT stat64
+    #define MZ_FILE_STAT stat64
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(p, m, s) freopen64(p, m, s)
+    #define MZ_DELETE_FILE remove
+  #else
+    #ifndef MINIZ_NO_TIME
+      #include <utime.h>
+    #endif
+    #define MZ_FILE FILE
+    #define MZ_FOPEN(f, m) fopen(f, m)
+    #define MZ_FCLOSE fclose
+    #define MZ_FREAD fread
+    #define MZ_FWRITE fwrite
+    #define MZ_FTELL64 ftello
+    #define MZ_FSEEK64 fseeko
+    #define MZ_FILE_STAT_STRUCT stat
+    #define MZ_FILE_STAT stat
+    #define MZ_FFLUSH fflush
+    #define MZ_FREOPEN(f, m, s) freopen(f, m, s)
+    #define MZ_DELETE_FILE remove
+  #endif // #ifdef _MSC_VER
+#endif // #ifdef MINIZ_NO_STDIO
+
+#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))
+
+// Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff.
+enum
+{
+  // ZIP archive identifiers and record sizes
+  MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50,
+  MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22,
+  // Central directory header record offsets
+  MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8,
+  MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16,
+  MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30,
+  MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42,
+  // Local directory header offsets
+  MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10,
+  MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22,
+  MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28,
+  // End of central directory offsets
+  MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8,
+  MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20,
+};
+
+typedef struct
+{
+  void *m_p;
+  size_t m_size, m_capacity;
+  mz_uint m_element_size;
+} mz_zip_array;
+
+struct mz_zip_internal_state_tag
+{
+  mz_zip_array m_central_dir;
+  mz_zip_array m_central_dir_offsets;
+  mz_zip_array m_sorted_central_dir_offsets;
+  MZ_FILE *m_pFile;
+  void *m_pMem;
+  size_t m_mem_size;
+  size_t m_mem_capacity;
+};
+
+#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size
+#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index]
+
+static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray)
+{
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p);
+  memset(pArray, 0, sizeof(mz_zip_array));
+}
+
+static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing)
+{
+  void *pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE;
+  if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; }
+  if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE;
+  pArray->m_p = pNew_p; pArray->m_capacity = new_capacity;
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing)
+{
+  if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; }
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing)
+{
+  if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; }
+  pArray->m_size = new_size;
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n)
+{
+  return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE);
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n)
+{
+  size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE;
+  memcpy((mz_uint8*)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size);
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_TIME
+static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date)
+{
+  struct tm tm;
+  memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1;
+  tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31;
+  tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62;
+  return mktime(&tm);
+}
+
+static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date)
+{
+#ifdef _MSC_VER
+  struct tm tm_struct;
+  struct tm *tm = &tm_struct;
+  errno_t err = localtime_s(tm, &time);
+  if (err)
+  {
+    *pDOS_date = 0; *pDOS_time = 0;
+    return;
+  }
+#else
+  struct tm *tm = localtime(&time);
+#endif
+  *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1));
+  *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday);
+}
+#endif
+
+#ifndef MINIZ_NO_STDIO
+static mz_bool mz_zip_get_file_modified_time(const char *pFilename, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date)
+{
+#ifdef MINIZ_NO_TIME
+  (void)pFilename; *pDOS_date = *pDOS_time = 0;
+#else
+  struct MZ_FILE_STAT_STRUCT file_stat;
+  // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh.
+  if (MZ_FILE_STAT(pFilename, &file_stat) != 0)
+    return MZ_FALSE;
+  mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date);
+#endif // #ifdef MINIZ_NO_TIME
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_TIME
+static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, time_t modified_time)
+{
+  struct utimbuf t; t.actime = access_time; t.modtime = modified_time;
+  return !utime(pFilename, &t);
+}
+#endif // #ifndef MINIZ_NO_TIME
+#endif // #ifndef MINIZ_NO_STDIO
+
+static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint32 flags)
+{
+  (void)flags;
+  if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
+    return MZ_FALSE;
+
+  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
+  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
+  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
+
+  pZip->m_zip_mode = MZ_ZIP_MODE_READING;
+  pZip->m_archive_size = 0;
+  pZip->m_central_directory_file_ofs = 0;
+  pZip->m_total_files = 0;
+
+  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
+    return MZ_FALSE;
+  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index)
+{
+  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
+  const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index));
+  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  mz_uint8 l = 0, r = 0;
+  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+  pE = pL + MZ_MIN(l_len, r_len);
+  while (pL < pE)
+  {
+    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
+      break;
+    pL++; pR++;
+  }
+  return (pL == pE) ? (l_len < r_len) : (l < r);
+}
+
+#define MZ_SWAP_UINT32(a, b) do { mz_uint32 t = a; a = b; b = t; } MZ_MACRO_END
+
+// Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.)
+static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip)
+{
+  mz_zip_internal_state *pState = pZip->m_pState;
+  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
+  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
+  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
+  const int size = pZip->m_total_files;
+  int start = (size - 2) >> 1, end;
+  while (start >= 0)
+  {
+    int child, root = start;
+    for ( ; ; )
+    {
+      if ((child = (root << 1) + 1) >= size)
+        break;
+      child += (((child + 1) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1])));
+      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
+        break;
+      MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child;
+    }
+    start--;
+  }
+
+  end = size - 1;
+  while (end > 0)
+  {
+    int child, root = 0;
+    MZ_SWAP_UINT32(pIndices[end], pIndices[0]);
+    for ( ; ; )
+    {
+      if ((child = (root << 1) + 1) >= end)
+        break;
+      child += (((child + 1) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1]));
+      if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
+        break;
+      MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child;
+    }
+    end--;
+  }
+}
+
+static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint32 flags)
+{
+  mz_uint cdir_size, num_this_disk, cdir_disk_index;
+  mz_uint64 cdir_ofs;
+  mz_int64 cur_file_ofs;
+  const mz_uint8 *p;
+  mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8 *pBuf = (mz_uint8 *)buf_u32;
+  mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0);
+  // Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there.
+  if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  // Find the end of central directory record by scanning the file from the end towards the beginning.
+  cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0);
+  for ( ; ; )
+  {
+    int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs);
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n)
+      return MZ_FALSE;
+    for (i = n - 4; i >= 0; --i)
+      if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG)
+        break;
+    if (i >= 0)
+    {
+      cur_file_ofs += i;
+      break;
+    }
+    if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)))
+      return MZ_FALSE;
+    cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0);
+  }
+  // Read and verify the end of central directory record.
+  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) ||
+      ((pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS)))
+    return MZ_FALSE;
+
+  num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS);
+  cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS);
+  if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1)))
+    return MZ_FALSE;
+
+  if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+
+  cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS);
+  if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size)
+    return MZ_FALSE;
+
+  pZip->m_central_directory_file_ofs = cdir_ofs;
+
+  if (pZip->m_total_files)
+  {
+     mz_uint i, n;
+
+    // Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and another to hold the sorted indices.
+    if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) ||
+        (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE)))
+      return MZ_FALSE;
+
+    if (sort_central_dir)
+    {
+      if (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE))
+        return MZ_FALSE;
+    }
+
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size)
+      return MZ_FALSE;
+
+    // Now create an index into the central directory file records, do some basic sanity checking on each record, and check for zip64 entries (which are not yet supported).
+    p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p;
+    for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i)
+    {
+      mz_uint total_header_size, comp_size, decomp_size, disk_index;
+      if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG))
+        return MZ_FALSE;
+      MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p);
+      if (sort_central_dir)
+        MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i;
+      comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+      decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+      if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || (comp_size == 0xFFFFFFFF))
+        return MZ_FALSE;
+      disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS);
+      if ((disk_index != num_this_disk) && (disk_index != 1))
+        return MZ_FALSE;
+      if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size)
+        return MZ_FALSE;
+      if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n)
+        return MZ_FALSE;
+      n -= total_header_size; p += total_header_size;
+    }
+  }
+
+  if (sort_central_dir)
+    mz_zip_reader_sort_central_dir_offsets_by_filename(pZip);
+
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags)
+{
+  if ((!pZip) || (!pZip->m_pRead))
+    return MZ_FALSE;
+  if (!mz_zip_reader_init_internal(pZip, flags))
+    return MZ_FALSE;
+  pZip->m_archive_size = size;
+  if (!mz_zip_reader_read_central_dir(pZip, flags))
+  {
+    mz_zip_reader_end(pZip);
+    return MZ_FALSE;
+  }
+  return MZ_TRUE;
+}
+
+static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n);
+  memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s);
+  return s;
+}
+
+mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags)
+{
+  if (!mz_zip_reader_init_internal(pZip, flags))
+    return MZ_FALSE;
+  pZip->m_archive_size = size;
+  pZip->m_pRead = mz_zip_mem_read_func;
+  pZip->m_pIO_opaque = pZip;
+#ifdef __cplusplus
+  pZip->m_pState->m_pMem = const_cast<void *>(pMem);
+#else
+  pZip->m_pState->m_pMem = (void *)pMem;
+#endif
+  pZip->m_pState->m_mem_size = size;
+  if (!mz_zip_reader_read_central_dir(pZip, flags))
+  {
+    mz_zip_reader_end(pZip);
+    return MZ_FALSE;
+  }
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
+  if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
+    return 0;
+  return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile);
+}
+
+mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags)
+{
+  mz_uint64 file_size;
+  MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb");
+  if (!pFile)
+    return MZ_FALSE;
+  if (MZ_FSEEK64(pFile, 0, SEEK_END))
+  {
+    MZ_FCLOSE(pFile);
+    return MZ_FALSE;
+  }
+  file_size = MZ_FTELL64(pFile);
+  if (!mz_zip_reader_init_internal(pZip, flags))
+  {
+    MZ_FCLOSE(pFile);
+    return MZ_FALSE;
+  }
+  pZip->m_pRead = mz_zip_file_read_func;
+  pZip->m_pIO_opaque = pZip;
+  pZip->m_pState->m_pFile = pFile;
+  pZip->m_archive_size = file_size;
+  if (!mz_zip_reader_read_central_dir(pZip, flags))
+  {
+    mz_zip_reader_end(pZip);
+    return MZ_FALSE;
+  }
+  return MZ_TRUE;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip)
+{
+  return pZip ? pZip->m_total_files : 0;
+}
+
+static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh(mz_zip_archive *pZip, mz_uint file_index)
+{
+  if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return NULL;
+  return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
+}
+
+mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index)
+{
+  mz_uint m_bit_flag;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if (!p)
+    return MZ_FALSE;
+  m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+  return (m_bit_flag & 1);
+}
+
+mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index)
+{
+  mz_uint filename_len, external_attr;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if (!p)
+    return MZ_FALSE;
+
+  // First see if the filename ends with a '/' character.
+  filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  if (filename_len)
+  {
+    if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/')
+      return MZ_TRUE;
+  }
+
+  // Bugfix: This code was also checking if the internal attribute was non-zero, which wasn't correct.
+  // Most/all zip writers (hopefully) set DOS file/directory attributes in the low 16-bits, so check for the DOS directory flag and ignore the source OS ID in the created by field.
+  // FIXME: Remove this check? Is it necessary - we already check the filename.
+  external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
+  if ((external_attr & 0x10) != 0)
+    return MZ_TRUE;
+
+  return MZ_FALSE;
+}
+
+mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat)
+{
+  mz_uint n;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if ((!p) || (!pStat))
+    return MZ_FALSE;
+
+  // Unpack the central directory record.
+  pStat->m_file_index = file_index;
+  pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index);
+  pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS);
+  pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS);
+  pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS);
+  pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS);
+#ifndef MINIZ_NO_TIME
+  pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS));
+#endif
+  pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS);
+  pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+  pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+  pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS);
+  pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS);
+  pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
+
+  // Copy as much of the filename and comment as possible.
+  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1);
+  memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0';
+
+  n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1);
+  pStat->m_comment_size = n;
+  memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0';
+
+  return MZ_TRUE;
+}
+
+mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size)
+{
+  mz_uint n;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  if (!p) { if (filename_buf_size) pFilename[0] = '\0'; return 0; }
+  n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  if (filename_buf_size)
+  {
+    n = MZ_MIN(n, filename_buf_size - 1);
+    memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n);
+    pFilename[n] = '\0';
+  }
+  return n + 1;
+}
+
+static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, const char *pB, mz_uint len, mz_uint flags)
+{
+  mz_uint i;
+  if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE)
+    return 0 == memcmp(pA, pB, len);
+  for (i = 0; i < len; ++i)
+    if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i]))
+      return MZ_FALSE;
+  return MZ_TRUE;
+}
+
+static MZ_FORCEINLINE int mz_zip_reader_filename_compare(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, mz_uint r_len)
+{
+  const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
+  mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+  mz_uint8 l = 0, r = 0;
+  pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+  pE = pL + MZ_MIN(l_len, r_len);
+  while (pL < pE)
+  {
+    if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
+      break;
+    pL++; pR++;
+  }
+  return (pL == pE) ? (int)(l_len - r_len) : (l - r);
+}
+
+static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, const char *pFilename)
+{
+  mz_zip_internal_state *pState = pZip->m_pState;
+  const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
+  const mz_zip_array *pCentral_dir = &pState->m_central_dir;
+  mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);
+  const int size = pZip->m_total_files;
+  const mz_uint filename_len = (mz_uint)strlen(pFilename);
+  int l = 0, h = size - 1;
+  while (l <= h)
+  {
+    int m = (l + h) >> 1, file_index = pIndices[m], comp = mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len);
+    if (!comp)
+      return file_index;
+    else if (comp < 0)
+      l = m + 1;
+    else
+      h = m - 1;
+  }
+  return -1;
+}
+
+int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags)
+{
+  mz_uint file_index; size_t name_len, comment_len;
+  if ((!pZip) || (!pZip->m_pState) || (!pName) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return -1;
+  if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size))
+    return mz_zip_reader_locate_file_binary_search(pZip, pName);
+  name_len = strlen(pName); if (name_len > 0xFFFF) return -1;
+  comment_len = pComment ? strlen(pComment) : 0; if (comment_len > 0xFFFF) return -1;
+  for (file_index = 0; file_index < pZip->m_total_files; file_index++)
+  {
+    const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index));
+    mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS);
+    const char *pFilename = (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
+    if (filename_len < name_len)
+      continue;
+    if (comment_len)
+    {
+      mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS);
+      const char *pFile_comment = pFilename + filename_len + file_extra_len;
+      if ((file_comment_len != comment_len) || (!mz_zip_reader_string_equal(pComment, pFile_comment, file_comment_len, flags)))
+        continue;
+    }
+    if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len))
+    {
+      int ofs = filename_len - 1;
+      do
+      {
+        if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':'))
+          break;
+      } while (--ofs >= 0);
+      ofs++;
+      pFilename += ofs; filename_len -= ofs;
+    }
+    if ((filename_len == name_len) && (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags)))
+      return file_index;
+  }
+  return -1;
+}
+
+mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size)
+{
+  int status = TINFL_STATUS_DONE;
+  mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail;
+  mz_zip_archive_file_stat file_stat;
+  void *pRead_buf;
+  mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+  tinfl_decompressor inflator;
+
+  if ((buf_size) && (!pBuf))
+    return MZ_FALSE;
+
+  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+    return MZ_FALSE;
+
+  // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes)
+  if (!file_stat.m_comp_size)
+    return MZ_TRUE;
+
+  // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers).
+  // I'm torn how to handle this case - should it fail instead?
+  if (mz_zip_reader_is_file_a_directory(pZip, file_index))
+    return MZ_TRUE;
+
+  // Encryption and patch files are not supported.
+  if (file_stat.m_bit_flag & (1 | 32))
+    return MZ_FALSE;
+
+  // This function only supports stored and deflate.
+  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
+    return MZ_FALSE;
+
+  // Ensure supplied output buffer is large enough.
+  needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size;
+  if (buf_size < needed_size)
+    return MZ_FALSE;
+
+  // Read and parse the local directory entry.
+  cur_file_ofs = file_stat.m_local_header_ofs;
+  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+    return MZ_FALSE;
+
+  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
+    return MZ_FALSE;
+
+  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
+  {
+    // The file is stored or the caller has requested the compressed data.
+    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size)
+      return MZ_FALSE;
+    return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32);
+  }
+
+  // Decompress the file either directly from memory or from a file input buffer.
+  tinfl_init(&inflator);
+
+  if (pZip->m_pState->m_pMem)
+  {
+    // Read directly from the archive in memory.
+    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
+    read_buf_size = read_buf_avail = file_stat.m_comp_size;
+    comp_remaining = 0;
+  }
+  else if (pUser_read_buf)
+  {
+    // Use a user provided read buffer.
+    if (!user_read_buf_size)
+      return MZ_FALSE;
+    pRead_buf = (mz_uint8 *)pUser_read_buf;
+    read_buf_size = user_read_buf_size;
+    read_buf_avail = 0;
+    comp_remaining = file_stat.m_comp_size;
+  }
+  else
+  {
+    // Temporarily allocate a read buffer.
+    read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE);
+#ifdef _MSC_VER
+    if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
+#else
+    if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF))
+#endif
+      return MZ_FALSE;
+    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size)))
+      return MZ_FALSE;
+    read_buf_avail = 0;
+    comp_remaining = file_stat.m_comp_size;
+  }
+
+  do
+  {
+    size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs);
+    if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
+    {
+      read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+      if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+      {
+        status = TINFL_STATUS_FAILED;
+        break;
+      }
+      cur_file_ofs += read_buf_avail;
+      comp_remaining -= read_buf_avail;
+      read_buf_ofs = 0;
+    }
+    in_buf_size = (size_t)read_buf_avail;
+    status = tinfl_decompress(&inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0));
+    read_buf_avail -= in_buf_size;
+    read_buf_ofs += in_buf_size;
+    out_buf_ofs += out_buf_size;
+  } while (status == TINFL_STATUS_NEEDS_MORE_INPUT);
+
+  if (status == TINFL_STATUS_DONE)
+  {
+    // Make sure the entire file was decompressed, and check its CRC.
+    if ((out_buf_ofs != file_stat.m_uncomp_size) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32))
+      status = TINFL_STATUS_FAILED;
+  }
+
+  if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf))
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+
+  return status == TINFL_STATUS_DONE;
+}
+
+mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
+  if (file_index < 0)
+    return MZ_FALSE;
+  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size);
+}
+
+mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags)
+{
+  return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0);
+}
+
+mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags)
+{
+  return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0);
+}
+
+void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags)
+{
+  mz_uint64 comp_size, uncomp_size, alloc_size;
+  const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index);
+  void *pBuf;
+
+  if (pSize)
+    *pSize = 0;
+  if (!p)
+    return NULL;
+
+  comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+  uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS);
+
+  alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size;
+#ifdef _MSC_VER
+  if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
+#else
+  if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF))
+#endif
+    return NULL;
+  if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size)))
+    return NULL;
+
+  if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags))
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+    return NULL;
+  }
+
+  if (pSize) *pSize = (size_t)alloc_size;
+  return pBuf;
+}
+
+void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
+  if (file_index < 0)
+  {
+    if (pSize) *pSize = 0;
+    return MZ_FALSE;
+  }
+  return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags);
+}
+
+mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags)
+{
+  int status = TINFL_STATUS_DONE; mz_uint file_crc32 = MZ_CRC32_INIT;
+  mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs;
+  mz_zip_archive_file_stat file_stat;
+  void *pRead_buf = NULL; void *pWrite_buf = NULL;
+  mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+
+  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+    return MZ_FALSE;
+
+  // Empty file, or a directory (but not always a directory - I've seen odd zips with directories that have compressed data which inflates to 0 bytes)
+  if (!file_stat.m_comp_size)
+    return MZ_TRUE;
+
+  // Entry is a subdirectory (I've seen old zips with dir entries which have compressed deflate data which inflates to 0 bytes, but these entries claim to uncompress to 512 bytes in the headers).
+  // I'm torn how to handle this case - should it fail instead?
+  if (mz_zip_reader_is_file_a_directory(pZip, file_index))
+    return MZ_TRUE;
+
+  // Encryption and patch files are not supported.
+  if (file_stat.m_bit_flag & (1 | 32))
+    return MZ_FALSE;
+
+  // This function only supports stored and deflate.
+  if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED))
+    return MZ_FALSE;
+
+  // Read and parse the local directory entry.
+  cur_file_ofs = file_stat.m_local_header_ofs;
+  if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+    return MZ_FALSE;
+
+  cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+  if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size)
+    return MZ_FALSE;
+
+  // Decompress the file either directly from memory or from a file input buffer.
+  if (pZip->m_pState->m_pMem)
+  {
+    pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs;
+    read_buf_size = read_buf_avail = file_stat.m_comp_size;
+    comp_remaining = 0;
+  }
+  else
+  {
+    read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE);
+    if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size)))
+      return MZ_FALSE;
+    read_buf_avail = 0;
+    comp_remaining = file_stat.m_comp_size;
+  }
+
+  if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method))
+  {
+    // The file is stored or the caller has requested the compressed data.
+    if (pZip->m_pState->m_pMem)
+    {
+#ifdef _MSC_VER
+      if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF))
+#else
+      if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF))
+#endif
+        return MZ_FALSE;
+      if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size)
+        status = TINFL_STATUS_FAILED;
+      else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+        file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)file_stat.m_comp_size);
+      cur_file_ofs += file_stat.m_comp_size;
+      out_buf_ofs += file_stat.m_comp_size;
+      comp_remaining = 0;
+    }
+    else
+    {
+      while (comp_remaining)
+      {
+        read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+        {
+          status = TINFL_STATUS_FAILED;
+          break;
+        }
+
+        if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+          file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail);
+
+        if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+        {
+          status = TINFL_STATUS_FAILED;
+          break;
+        }
+        cur_file_ofs += read_buf_avail;
+        out_buf_ofs += read_buf_avail;
+        comp_remaining -= read_buf_avail;
+      }
+    }
+  }
+  else
+  {
+    tinfl_decompressor inflator;
+    tinfl_init(&inflator);
+
+    if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE)))
+      status = TINFL_STATUS_FAILED;
+    else
+    {
+      do
+      {
+        mz_uint8 *pWrite_buf_cur = (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+        size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1));
+        if ((!read_buf_avail) && (!pZip->m_pState->m_pMem))
+        {
+          read_buf_avail = MZ_MIN(read_buf_size, comp_remaining);
+          if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail)
+          {
+            status = TINFL_STATUS_FAILED;
+            break;
+          }
+          cur_file_ofs += read_buf_avail;
+          comp_remaining -= read_buf_avail;
+          read_buf_ofs = 0;
+        }
+
+        in_buf_size = (size_t)read_buf_avail;
+        status = tinfl_decompress(&inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0);
+        read_buf_avail -= in_buf_size;
+        read_buf_ofs += in_buf_size;
+
+        if (out_buf_size)
+        {
+          if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size)
+          {
+            status = TINFL_STATUS_FAILED;
+            break;
+          }
+          file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size);
+          if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size)
+          {
+            status = TINFL_STATUS_FAILED;
+            break;
+          }
+        }
+      } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT));
+    }
+  }
+
+  if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)))
+  {
+    // Make sure the entire file was decompressed, and check its CRC.
+    if ((out_buf_ofs != file_stat.m_uncomp_size) || (file_crc32 != file_stat.m_crc32))
+      status = TINFL_STATUS_FAILED;
+  }
+
+  if (!pZip->m_pState->m_pMem)
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+  if (pWrite_buf)
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf);
+
+  return status == TINFL_STATUS_DONE;
+}
+
+mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags);
+  if (file_index < 0)
+    return MZ_FALSE;
+  return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags);
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, const void *pBuf, size_t n)
+{
+  (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE*)pOpaque);
+}
+
+mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags)
+{
+  mz_bool status;
+  mz_zip_archive_file_stat file_stat;
+  MZ_FILE *pFile;
+  if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat))
+    return MZ_FALSE;
+  pFile = MZ_FOPEN(pDst_filename, "wb");
+  if (!pFile)
+    return MZ_FALSE;
+  status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags);
+  if (MZ_FCLOSE(pFile) == EOF)
+    return MZ_FALSE;
+#ifndef MINIZ_NO_TIME
+  if (status)
+    mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time);
+#endif
+  return status;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_bool mz_zip_reader_end(mz_zip_archive *pZip)
+{
+  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return MZ_FALSE;
+
+  if (pZip->m_pState)
+  {
+    mz_zip_internal_state *pState = pZip->m_pState; pZip->m_pState = NULL;
+    mz_zip_array_clear(pZip, &pState->m_central_dir);
+    mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
+    mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
+
+#ifndef MINIZ_NO_STDIO
+    if (pState->m_pFile)
+    {
+      MZ_FCLOSE(pState->m_pFile);
+      pState->m_pFile = NULL;
+    }
+#endif // #ifndef MINIZ_NO_STDIO
+
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+  }
+  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
+
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags)
+{
+  int file_index = mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags);
+  if (file_index < 0)
+    return MZ_FALSE;
+  return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags);
+}
+#endif
+
+// ------------------- .ZIP archive writing
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); }
+static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); }
+#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v))
+#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v))
+
+mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size)
+{
+  if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
+    return MZ_FALSE;
+
+  if (pZip->m_file_offset_alignment)
+  {
+    // Ensure user specified file offset alignment is a power of 2.
+    if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1))
+      return MZ_FALSE;
+  }
+
+  if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func;
+  if (!pZip->m_pFree) pZip->m_pFree = def_free_func;
+  if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func;
+
+  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
+  pZip->m_archive_size = existing_size;
+  pZip->m_central_directory_file_ofs = 0;
+  pZip->m_total_files = 0;
+
+  if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
+    return MZ_FALSE;
+  memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
+  MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
+  return MZ_TRUE;
+}
+
+static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  mz_zip_internal_state *pState = pZip->m_pState;
+  mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size);
+#ifdef _MSC_VER
+  if ((!n) || ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
+#else
+  if ((!n) || ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF)))
+#endif
+    return 0;
+  if (new_size > pState->m_mem_capacity)
+  {
+    void *pNew_block;
+    size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2;
+    if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity)))
+      return 0;
+    pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity;
+  }
+  memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n);
+  pState->m_mem_size = (size_t)new_size;
+  return n;
+}
+
+mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size)
+{
+  pZip->m_pWrite = mz_zip_heap_write_func;
+  pZip->m_pIO_opaque = pZip;
+  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning))
+    return MZ_FALSE;
+  if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning)))
+  {
+    if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size)))
+    {
+      mz_zip_writer_end(pZip);
+      return MZ_FALSE;
+    }
+    pZip->m_pState->m_mem_capacity = initial_allocation_size;
+  }
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n)
+{
+  mz_zip_archive *pZip = (mz_zip_archive *)pOpaque;
+  mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile);
+  if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET))))
+    return 0;
+  return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile);
+}
+
+mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning)
+{
+  MZ_FILE *pFile;
+  pZip->m_pWrite = mz_zip_file_write_func;
+  pZip->m_pIO_opaque = pZip;
+  if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning))
+    return MZ_FALSE;
+  if (NULL == (pFile = MZ_FOPEN(pFilename, "wb")))
+  {
+    mz_zip_writer_end(pZip);
+    return MZ_FALSE;
+  }
+  pZip->m_pState->m_pFile = pFile;
+  if (size_to_reserve_at_beginning)
+  {
+    mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_OBJ(buf);
+    do
+    {
+      size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning);
+      if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n)
+      {
+        mz_zip_writer_end(pZip);
+        return MZ_FALSE;
+      }
+      cur_ofs += n; size_to_reserve_at_beginning -= n;
+    } while (size_to_reserve_at_beginning);
+  }
+  return MZ_TRUE;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename)
+{
+  mz_zip_internal_state *pState;
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING))
+    return MZ_FALSE;
+  // No sense in trying to write to an archive that's already at the support max size
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+
+  if (pState->m_pFile)
+  {
+#ifdef MINIZ_NO_STDIO
+    pFilename; return MZ_FALSE;
+#else
+    // Archive is being read from stdio - try to reopen as writable.
+    if (pZip->m_pIO_opaque != pZip)
+      return MZ_FALSE;
+    if (!pFilename)
+      return MZ_FALSE;
+    pZip->m_pWrite = mz_zip_file_write_func;
+    if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile)))
+    {
+      // The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it.
+      mz_zip_reader_end(pZip);
+      return MZ_FALSE;
+    }
+#endif // #ifdef MINIZ_NO_STDIO
+  }
+  else if (pState->m_pMem)
+  {
+    // Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback.
+    if (pZip->m_pIO_opaque != pZip)
+      return MZ_FALSE;
+    pState->m_mem_capacity = pState->m_mem_size;
+    pZip->m_pWrite = mz_zip_heap_write_func;
+  }
+  // Archive is being read via a user provided read function - make sure the user has specified a write function too.
+  else if (!pZip->m_pWrite)
+    return MZ_FALSE;
+
+  // Start writing new files at the archive's current central directory location.
+  pZip->m_archive_size = pZip->m_central_directory_file_ofs;
+  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING;
+  pZip->m_central_directory_file_ofs = 0;
+
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags)
+{
+  return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0);
+}
+
+typedef struct
+{
+  mz_zip_archive *m_pZip;
+  mz_uint64 m_cur_archive_file_ofs;
+  mz_uint64 m_comp_size;
+} mz_zip_writer_add_state;
+
+static mz_bool mz_zip_writer_add_put_buf_callback(const void* pBuf, int len, void *pUser)
+{
+  mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser;
+  if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len)
+    return MZ_FALSE;
+  pState->m_cur_archive_file_ofs += len;
+  pState->m_comp_size += len;
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date)
+{
+  (void)pZip;
+  memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size);
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes)
+{
+  (void)pZip;
+  memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size);
+  MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes);
+  MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs);
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, const void *pExtra, mz_uint16 extra_size, const void *pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes)
+{
+  mz_zip_internal_state *pState = pZip->m_pState;
+  mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size;
+  size_t orig_central_dir_size = pState->m_central_dir.m_size;
+  mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
+
+  // No zip64 support yet
+  if ((local_header_ofs > 0xFFFFFFFF) || (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + comment_size) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, extra_size, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes))
+    return MZ_FALSE;
+
+  if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) ||
+      (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &central_dir_ofs, 1)))
+  {
+    // Try to push the central directory array back into its original state.
+    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+    return MZ_FALSE;
+  }
+
+  return MZ_TRUE;
+}
+
+static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name)
+{
+  // Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes.
+  if (*pArchive_name == '/')
+    return MZ_FALSE;
+  while (*pArchive_name)
+  {
+    if ((*pArchive_name == '\\') || (*pArchive_name == ':'))
+      return MZ_FALSE;
+    pArchive_name++;
+  }
+  return MZ_TRUE;
+}
+
+static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive *pZip)
+{
+  mz_uint32 n;
+  if (!pZip->m_file_offset_alignment)
+    return 0;
+  n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1));
+  return (pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1);
+}
+
+static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, mz_uint64 cur_file_ofs, mz_uint32 n)
+{
+  char buf[4096];
+  memset(buf, 0, MZ_MIN(sizeof(buf), n));
+  while (n)
+  {
+    mz_uint32 s = MZ_MIN(sizeof(buf), n);
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s)
+      return MZ_FALSE;
+    cur_file_ofs += s; n -= s;
+  }
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32)
+{
+  mz_uint16 method = 0, dos_time = 0, dos_date = 0;
+  mz_uint level, ext_attributes = 0, num_alignment_padding_bytes;
+  mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0;
+  size_t archive_name_size;
+  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
+  tdefl_compressor *pComp = NULL;
+  mz_bool store_data_uncompressed;
+  mz_zip_internal_state *pState;
+
+  if ((int)level_and_flags < 0)
+    level_and_flags = MZ_DEFAULT_LEVEL;
+  level = level_and_flags & 0xF;
+  store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA));
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+
+  if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size))
+    return MZ_FALSE;
+  // No zip64 support yet
+  if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF))
+    return MZ_FALSE;
+  if (!mz_zip_writer_validate_archive_name(pArchive_name))
+    return MZ_FALSE;
+
+#ifndef MINIZ_NO_TIME
+  {
+    time_t cur_time; time(&cur_time);
+    mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date);
+  }
+#endif // #ifndef MINIZ_NO_TIME
+
+  archive_name_size = strlen(pArchive_name);
+  if (archive_name_size > 0xFFFF)
+    return MZ_FALSE;
+
+  num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+  // no zip64 support yet
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/'))
+  {
+    // Set DOS Subdirectory attribute bit.
+    ext_attributes |= 0x10;
+    // Subdirectories cannot contain data.
+    if ((buf_size) || (uncomp_size))
+      return MZ_FALSE;
+  }
+
+  // Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.)
+  if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1)))
+    return MZ_FALSE;
+
+  if ((!store_data_uncompressed) && (buf_size))
+  {
+    if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor))))
+      return MZ_FALSE;
+  }
+
+  if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header)))
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+    return MZ_FALSE;
+  }
+  local_dir_header_ofs += num_alignment_padding_bytes;
+  if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); }
+  cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header);
+
+  MZ_CLEAR_OBJ(local_dir_header);
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+    return MZ_FALSE;
+  }
+  cur_archive_file_ofs += archive_name_size;
+
+  if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA))
+  {
+    uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, buf_size);
+    uncomp_size = buf_size;
+    if (uncomp_size <= 3)
+    {
+      level = 0;
+      store_data_uncompressed = MZ_TRUE;
+    }
+  }
+
+  if (store_data_uncompressed)
+  {
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+      return MZ_FALSE;
+    }
+
+    cur_archive_file_ofs += buf_size;
+    comp_size = buf_size;
+
+    if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)
+      method = MZ_DEFLATED;
+  }
+  else if (buf_size)
+  {
+    mz_zip_writer_add_state state;
+
+    state.m_pZip = pZip;
+    state.m_cur_archive_file_ofs = cur_archive_file_ofs;
+    state.m_comp_size = 0;
+
+    if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) ||
+        (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE))
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+      return MZ_FALSE;
+    }
+
+    comp_size = state.m_comp_size;
+    cur_archive_file_ofs = state.m_cur_archive_file_ofs;
+
+    method = MZ_DEFLATED;
+  }
+
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+  pComp = NULL;
+
+  // no zip64 support yet
+  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
+    return MZ_FALSE;
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes))
+    return MZ_FALSE;
+
+  pZip->m_total_files++;
+  pZip->m_archive_size = cur_archive_file_ofs;
+
+  return MZ_TRUE;
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags)
+{
+  mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes;
+  mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0;
+  mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, comp_size = 0;
+  size_t archive_name_size;
+  mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE];
+  MZ_FILE *pSrc_file = NULL;
+
+  if ((int)level_and_flags < 0)
+    level_and_flags = MZ_DEFAULT_LEVEL;
+  level = level_and_flags & 0xF;
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION))
+    return MZ_FALSE;
+  if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)
+    return MZ_FALSE;
+  if (!mz_zip_writer_validate_archive_name(pArchive_name))
+    return MZ_FALSE;
+
+  archive_name_size = strlen(pArchive_name);
+  if (archive_name_size > 0xFFFF)
+    return MZ_FALSE;
+
+  num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+  // no zip64 support yet
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date))
+    return MZ_FALSE;
+
+  pSrc_file = MZ_FOPEN(pSrc_filename, "rb");
+  if (!pSrc_file)
+    return MZ_FALSE;
+  MZ_FSEEK64(pSrc_file, 0, SEEK_END);
+  uncomp_size = MZ_FTELL64(pSrc_file);
+  MZ_FSEEK64(pSrc_file, 0, SEEK_SET);
+
+  if (uncomp_size > 0xFFFFFFFF)
+  {
+    // No zip64 support yet
+    MZ_FCLOSE(pSrc_file);
+    return MZ_FALSE;
+  }
+  if (uncomp_size <= 3)
+    level = 0;
+
+  if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header)))
+  {
+    MZ_FCLOSE(pSrc_file);
+    return MZ_FALSE;
+  }
+  local_dir_header_ofs += num_alignment_padding_bytes;
+  if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); }
+  cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header);
+
+  MZ_CLEAR_OBJ(local_dir_header);
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size)
+  {
+    MZ_FCLOSE(pSrc_file);
+    return MZ_FALSE;
+  }
+  cur_archive_file_ofs += archive_name_size;
+
+  if (uncomp_size)
+  {
+    mz_uint64 uncomp_remaining = uncomp_size;
+    void *pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE);
+    if (!pRead_buf)
+    {
+      MZ_FCLOSE(pSrc_file);
+      return MZ_FALSE;
+    }
+
+    if (!level)
+    {
+      while (uncomp_remaining)
+      {
+        mz_uint n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining);
+        if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n))
+        {
+          pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+          MZ_FCLOSE(pSrc_file);
+          return MZ_FALSE;
+        }
+        uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n);
+        uncomp_remaining -= n;
+        cur_archive_file_ofs += n;
+      }
+      comp_size = uncomp_size;
+    }
+    else
+    {
+      mz_bool result = MZ_FALSE;
+      mz_zip_writer_add_state state;
+      tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor));
+      if (!pComp)
+      {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+        MZ_FCLOSE(pSrc_file);
+        return MZ_FALSE;
+      }
+
+      state.m_pZip = pZip;
+      state.m_cur_archive_file_ofs = cur_archive_file_ofs;
+      state.m_comp_size = 0;
+
+      if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY)
+      {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+        MZ_FCLOSE(pSrc_file);
+        return MZ_FALSE;
+      }
+
+      for ( ; ; )
+      {
+        size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, MZ_ZIP_MAX_IO_BUF_SIZE);
+        tdefl_status status;
+
+        if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size)
+          break;
+
+        uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size);
+        uncomp_remaining -= in_buf_size;
+
+        status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH);
+        if (status == TDEFL_STATUS_DONE)
+        {
+          result = MZ_TRUE;
+          break;
+        }
+        else if (status != TDEFL_STATUS_OKAY)
+          break;
+      }
+
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pComp);
+
+      if (!result)
+      {
+        pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+        MZ_FCLOSE(pSrc_file);
+        return MZ_FALSE;
+      }
+
+      comp_size = state.m_comp_size;
+      cur_archive_file_ofs = state.m_cur_archive_file_ofs;
+
+      method = MZ_DEFLATED;
+    }
+
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf);
+  }
+
+  MZ_FCLOSE(pSrc_file); pSrc_file = NULL;
+
+  // no zip64 support yet
+  if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date))
+    return MZ_FALSE;
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header))
+    return MZ_FALSE;
+
+  if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes))
+    return MZ_FALSE;
+
+  pZip->m_total_files++;
+  pZip->m_archive_size = cur_archive_file_ofs;
+
+  return MZ_TRUE;
+}
+#endif // #ifndef MINIZ_NO_STDIO
+
+mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index)
+{
+  mz_uint n, bit_flags, num_alignment_padding_bytes;
+  mz_uint64 comp_bytes_remaining, local_dir_header_ofs;
+  mz_uint64 cur_src_file_ofs, cur_dst_file_ofs;
+  mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32;
+  mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE];
+  size_t orig_central_dir_size;
+  mz_zip_internal_state *pState;
+  void *pBuf; const mz_uint8 *pSrc_central_header;
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
+    return MZ_FALSE;
+  if (NULL == (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index)))
+    return MZ_FALSE;
+  pState = pZip->m_pState;
+
+  num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip);
+
+  // no zip64 support yet
+  if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  cur_src_file_ofs = MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS);
+  cur_dst_file_ofs = pZip->m_archive_size;
+
+  if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG)
+    return MZ_FALSE;
+  cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
+
+  if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes))
+    return MZ_FALSE;
+  cur_dst_file_ofs += num_alignment_padding_bytes;
+  local_dir_header_ofs = cur_dst_file_ofs;
+  if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); }
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE)
+    return MZ_FALSE;
+  cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE;
+
+  n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS);
+  comp_bytes_remaining = n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS);
+
+  if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(sizeof(mz_uint32) * 4, MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining)))))
+    return MZ_FALSE;
+
+  while (comp_bytes_remaining)
+  {
+    n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining);
+    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+    cur_src_file_ofs += n;
+
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+    cur_dst_file_ofs += n;
+
+    comp_bytes_remaining -= n;
+  }
+
+  bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS);
+  if (bit_flags & 8)
+  {
+    // Copy data descriptor
+    if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+
+    n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3);
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n)
+    {
+      pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+      return MZ_FALSE;
+    }
+
+    cur_src_file_ofs += n;
+    cur_dst_file_ofs += n;
+  }
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf);
+
+  // no zip64 support yet
+  if (cur_dst_file_ofs > 0xFFFFFFFF)
+    return MZ_FALSE;
+
+  orig_central_dir_size = pState->m_central_dir.m_size;
+
+  memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE);
+  MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs);
+  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE))
+    return MZ_FALSE;
+
+  n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS);
+  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n))
+  {
+    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+    return MZ_FALSE;
+  }
+
+  if (pState->m_central_dir.m_size > 0xFFFFFFFF)
+    return MZ_FALSE;
+  n = (mz_uint32)orig_central_dir_size;
+  if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1))
+  {
+    mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE);
+    return MZ_FALSE;
+  }
+
+  pZip->m_total_files++;
+  pZip->m_archive_size = cur_dst_file_ofs;
+
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip)
+{
+  mz_zip_internal_state *pState;
+  mz_uint64 central_dir_ofs, central_dir_size;
+  mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE];
+
+  if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+
+  // no zip64 support yet
+  if ((pZip->m_total_files > 0xFFFF) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF))
+    return MZ_FALSE;
+
+  central_dir_ofs = 0;
+  central_dir_size = 0;
+  if (pZip->m_total_files)
+  {
+    // Write central directory
+    central_dir_ofs = pZip->m_archive_size;
+    central_dir_size = pState->m_central_dir.m_size;
+    pZip->m_central_directory_file_ofs = central_dir_ofs;
+    if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size)
+      return MZ_FALSE;
+    pZip->m_archive_size += central_dir_size;
+  }
+
+  // Write end of central directory record
+  MZ_CLEAR_OBJ(hdr);
+  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG);
+  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files);
+  MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files);
+  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size);
+  MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs);
+
+  if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, sizeof(hdr)) != sizeof(hdr))
+    return MZ_FALSE;
+#ifndef MINIZ_NO_STDIO
+  if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF))
+    return MZ_FALSE;
+#endif // #ifndef MINIZ_NO_STDIO
+
+  pZip->m_archive_size += sizeof(hdr);
+
+  pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED;
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize)
+{
+  if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize))
+    return MZ_FALSE;
+  if (pZip->m_pWrite != mz_zip_heap_write_func)
+    return MZ_FALSE;
+  if (!mz_zip_writer_finalize_archive(pZip))
+    return MZ_FALSE;
+
+  *pBuf = pZip->m_pState->m_pMem;
+  *pSize = pZip->m_pState->m_mem_size;
+  pZip->m_pState->m_pMem = NULL;
+  pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0;
+  return MZ_TRUE;
+}
+
+mz_bool mz_zip_writer_end(mz_zip_archive *pZip)
+{
+  mz_zip_internal_state *pState;
+  mz_bool status = MZ_TRUE;
+  if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED)))
+    return MZ_FALSE;
+
+  pState = pZip->m_pState;
+  pZip->m_pState = NULL;
+  mz_zip_array_clear(pZip, &pState->m_central_dir);
+  mz_zip_array_clear(pZip, &pState->m_central_dir_offsets);
+  mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets);
+
+#ifndef MINIZ_NO_STDIO
+  if (pState->m_pFile)
+  {
+    MZ_FCLOSE(pState->m_pFile);
+    pState->m_pFile = NULL;
+  }
+#endif // #ifndef MINIZ_NO_STDIO
+
+  if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem))
+  {
+    pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem);
+    pState->m_pMem = NULL;
+  }
+
+  pZip->m_pFree(pZip->m_pAlloc_opaque, pState);
+  pZip->m_zip_mode = MZ_ZIP_MODE_INVALID;
+  return status;
+}
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags)
+{
+  mz_bool status, created_new_archive = MZ_FALSE;
+  mz_zip_archive zip_archive;
+  struct MZ_FILE_STAT_STRUCT file_stat;
+  MZ_CLEAR_OBJ(zip_archive);
+  if ((int)level_and_flags < 0)
+     level_and_flags = MZ_DEFAULT_LEVEL;
+  if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION))
+    return MZ_FALSE;
+  if (!mz_zip_writer_validate_archive_name(pArchive_name))
+    return MZ_FALSE;
+  if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0)
+  {
+    // Create a new archive.
+    if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0))
+      return MZ_FALSE;
+    created_new_archive = MZ_TRUE;
+  }
+  else
+  {
+    // Append to an existing archive.
+    if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
+      return MZ_FALSE;
+    if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename))
+    {
+      mz_zip_reader_end(&zip_archive);
+      return MZ_FALSE;
+    }
+  }
+  status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0);
+  // Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.)
+  if (!mz_zip_writer_finalize_archive(&zip_archive))
+    status = MZ_FALSE;
+  if (!mz_zip_writer_end(&zip_archive))
+    status = MZ_FALSE;
+  if ((!status) && (created_new_archive))
+  {
+    // It's a new archive and something went wrong, so just delete it.
+    int ignoredStatus = MZ_DELETE_FILE(pZip_filename);
+    (void)ignoredStatus;
+  }
+  return status;
+}
+
+void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags)
+{
+  int file_index;
+  mz_zip_archive zip_archive;
+  void *p = NULL;
+
+  if (pSize)
+    *pSize = 0;
+
+  if ((!pZip_filename) || (!pArchive_name))
+    return NULL;
+
+  MZ_CLEAR_OBJ(zip_archive);
+  if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY))
+    return NULL;
+
+  if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, flags)) >= 0)
+    p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags);
+
+  mz_zip_reader_end(&zip_archive);
+  return p;
+}
+
+#endif // #ifndef MINIZ_NO_STDIO
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+  This is free and unencumbered software released into the public domain.
+
+  Anyone is free to copy, modify, publish, use, compile, sell, or
+  distribute this software, either in source code form or as a compiled
+  binary, for any purpose, commercial or non-commercial, and by any
+  means.
+
+  In jurisdictions that recognize copyright laws, the author or authors
+  of this software dedicate any and all copyright interest in the
+  software to the public domain. We make this dedication for the benefit
+  of the public at large and to the detriment of our heirs and
+  successors. We intend this dedication to be an overt act of
+  relinquishment in perpetuity of all present and future rights to this
+  software under copyright law.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  OTHER DEALINGS IN THE SOFTWARE.
+
+  For more information, please refer to <http://unlicense.org/>
+*/
diff --git a/test_common/miniz/miniz.h b/test_common/miniz/miniz.h
new file mode 100644
index 00000000..18681fe7
--- /dev/null
+++ b/test_common/miniz/miniz.h
@@ -0,0 +1,749 @@
+#ifndef MINIZ_HEADER_INCLUDED
+#define MINIZ_HEADER_INCLUDED
+
+#include <stdlib.h>
+
+#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
+  // TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux
+  #define MINIZ_NO_TIME
+#endif
+
+#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
+  #include <time.h>
+#endif
+
+#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
+// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
+#define MINIZ_X86_OR_X64_CPU 1
+#endif
+
+#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
+// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
+#define MINIZ_LITTLE_ENDIAN 1
+#endif
+
+#if MINIZ_X86_OR_X64_CPU
+// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
+#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
+#endif
+
+#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
+// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
+#define MINIZ_HAS_64BIT_REGISTERS 1
+#endif
+
+// Return status codes. MZ_PARAM_ERROR is non-standard.
+enum {
+  MZ_OK = 0,
+  MZ_STREAM_END = 1,
+  MZ_NEED_DICT = 2,
+  MZ_ERRNO = -1,
+  MZ_STREAM_ERROR = -2,
+  MZ_DATA_ERROR = -3,
+  MZ_MEM_ERROR = -4,
+  MZ_BUF_ERROR = -5,
+  MZ_VERSION_ERROR = -6,
+  MZ_PARAM_ERROR = -10000
+};
+
+typedef unsigned long mz_ulong;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// ------------------- zlib-style API Definitions.
+
+// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
+void mz_free(void *p);
+
+#define MZ_ADLER32_INIT (1)
+// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
+mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
+
+#define MZ_CRC32_INIT (0)
+// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
+mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
+
+// Compression strategies.
+enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
+
+// Method
+#define MZ_DEFLATED 8
+
+#ifndef MINIZ_NO_ZLIB_APIS
+
+// Heap allocation callbacks.
+// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
+typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
+typedef void (*mz_free_func)(void *opaque, void *address);
+typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
+
+#define MZ_VERSION          "9.1.15"
+#define MZ_VERNUM           0x91F0
+#define MZ_VER_MAJOR        9
+#define MZ_VER_MINOR        1
+#define MZ_VER_REVISION     15
+#define MZ_VER_SUBREVISION  0
+
+// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
+enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
+
+// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
+enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
+
+// Window bits
+#define MZ_DEFAULT_WINDOW_BITS 15
+
+struct mz_internal_state;
+
+// Compression/decompression stream struct.
+typedef struct mz_stream_s
+{
+  const unsigned char *next_in;     // pointer to next byte to read
+  unsigned int avail_in;            // number of bytes available at next_in
+  mz_ulong total_in;                // total number of bytes consumed so far
+
+  unsigned char *next_out;          // pointer to next byte to write
+  unsigned int avail_out;           // number of bytes that can be written to next_out
+  mz_ulong total_out;               // total number of bytes produced so far
+
+  char *msg;                        // error msg (unused)
+  struct mz_internal_state *state;  // internal state, allocated by zalloc/zfree
+
+  mz_alloc_func zalloc;             // optional heap allocation function (defaults to malloc)
+  mz_free_func zfree;               // optional heap free function (defaults to free)
+  void *opaque;                     // heap alloc function user pointer
+
+  int data_type;                    // data_type (unused)
+  mz_ulong adler;                   // adler32 of the source or uncompressed data
+  mz_ulong reserved;                // not used
+} mz_stream;
+
+typedef mz_stream *mz_streamp;
+
+// Returns the version string of miniz.c.
+const char *mz_version(void);
+
+// mz_deflateInit() initializes a compressor with default options:
+// Parameters:
+//  pStream must point to an initialized mz_stream struct.
+//  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
+//  level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
+//  (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
+// Return values:
+//  MZ_OK on success.
+//  MZ_STREAM_ERROR if the stream is bogus.
+//  MZ_PARAM_ERROR if the input parameters are bogus.
+//  MZ_MEM_ERROR on out of memory.
+int mz_deflateInit(mz_streamp pStream, int level);
+
+// mz_deflateInit2() is like mz_deflate(), except with more control:
+// Additional parameters:
+//   method must be MZ_DEFLATED
+//   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
+//   mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
+int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
+
+// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
+int mz_deflateReset(mz_streamp pStream);
+
+// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
+// Parameters:
+//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+//   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
+// Return values:
+//   MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
+//   MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
+//   MZ_STREAM_ERROR if the stream is bogus.
+//   MZ_PARAM_ERROR if one of the parameters is invalid.
+//   MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
+int mz_deflate(mz_streamp pStream, int flush);
+
+// mz_deflateEnd() deinitializes a compressor:
+// Return values:
+//  MZ_OK on success.
+//  MZ_STREAM_ERROR if the stream is bogus.
+int mz_deflateEnd(mz_streamp pStream);
+
+// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
+mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
+
+// Single-call compression functions mz_compress() and mz_compress2():
+// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
+int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
+
+// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
+mz_ulong mz_compressBound(mz_ulong source_len);
+
+// Initializes a decompressor.
+int mz_inflateInit(mz_streamp pStream);
+
+// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
+// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
+int mz_inflateInit2(mz_streamp pStream, int window_bits);
+
+// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
+// Parameters:
+//   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
+//   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
+//   On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
+//   MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
+// Return values:
+//   MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
+//   MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
+//   MZ_STREAM_ERROR if the stream is bogus.
+//   MZ_DATA_ERROR if the deflate stream is invalid.
+//   MZ_PARAM_ERROR if one of the parameters is invalid.
+//   MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
+//   with more input data, or with more room in the output buffer (except when using single call decompression, described above).
+int mz_inflate(mz_streamp pStream, int flush);
+
+// Deinitializes a decompressor.
+int mz_inflateEnd(mz_streamp pStream);
+
+// Single-call decompression.
+// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
+int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
+
+// Returns a string description of the specified error code, or NULL if the error code is invalid.
+const char *mz_error(int err);
+
+// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
+// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
+#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+  typedef unsigned char Byte;
+  typedef unsigned int uInt;
+  typedef mz_ulong uLong;
+  typedef Byte Bytef;
+  typedef uInt uIntf;
+  typedef char charf;
+  typedef int intf;
+  typedef void *voidpf;
+  typedef uLong uLongf;
+  typedef void *voidp;
+  typedef void *const voidpc;
+  #define Z_NULL                0
+  #define Z_NO_FLUSH            MZ_NO_FLUSH
+  #define Z_PARTIAL_FLUSH       MZ_PARTIAL_FLUSH
+  #define Z_SYNC_FLUSH          MZ_SYNC_FLUSH
+  #define Z_FULL_FLUSH          MZ_FULL_FLUSH
+  #define Z_FINISH              MZ_FINISH
+  #define Z_BLOCK               MZ_BLOCK
+  #define Z_OK                  MZ_OK
+  #define Z_STREAM_END          MZ_STREAM_END
+  #define Z_NEED_DICT           MZ_NEED_DICT
+  #define Z_ERRNO               MZ_ERRNO
+  #define Z_STREAM_ERROR        MZ_STREAM_ERROR
+  #define Z_DATA_ERROR          MZ_DATA_ERROR
+  #define Z_MEM_ERROR           MZ_MEM_ERROR
+  #define Z_BUF_ERROR           MZ_BUF_ERROR
+  #define Z_VERSION_ERROR       MZ_VERSION_ERROR
+  #define Z_PARAM_ERROR         MZ_PARAM_ERROR
+  #define Z_NO_COMPRESSION      MZ_NO_COMPRESSION
+  #define Z_BEST_SPEED          MZ_BEST_SPEED
+  #define Z_BEST_COMPRESSION    MZ_BEST_COMPRESSION
+  #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
+  #define Z_DEFAULT_STRATEGY    MZ_DEFAULT_STRATEGY
+  #define Z_FILTERED            MZ_FILTERED
+  #define Z_HUFFMAN_ONLY        MZ_HUFFMAN_ONLY
+  #define Z_RLE                 MZ_RLE
+  #define Z_FIXED               MZ_FIXED
+  #define Z_DEFLATED            MZ_DEFLATED
+  #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
+  #define alloc_func            mz_alloc_func
+  #define free_func             mz_free_func
+  #define internal_state        mz_internal_state
+  #define z_stream              mz_stream
+  #define deflateInit           mz_deflateInit
+  #define deflateInit2          mz_deflateInit2
+  #define deflateReset          mz_deflateReset
+  #define deflate               mz_deflate
+  #define deflateEnd            mz_deflateEnd
+  #define deflateBound          mz_deflateBound
+  #define compress              mz_compress
+  #define compress2             mz_compress2
+  #define compressBound         mz_compressBound
+  #define inflateInit           mz_inflateInit
+  #define inflateInit2          mz_inflateInit2
+  #define inflate               mz_inflate
+  #define inflateEnd            mz_inflateEnd
+  #define uncompress            mz_uncompress
+  #define crc32                 mz_crc32
+  #define adler32               mz_adler32
+  #define MAX_WBITS             15
+  #define MAX_MEM_LEVEL         9
+  #define zError                mz_error
+  #define ZLIB_VERSION          MZ_VERSION
+  #define ZLIB_VERNUM           MZ_VERNUM
+  #define ZLIB_VER_MAJOR        MZ_VER_MAJOR
+  #define ZLIB_VER_MINOR        MZ_VER_MINOR
+  #define ZLIB_VER_REVISION     MZ_VER_REVISION
+  #define ZLIB_VER_SUBREVISION  MZ_VER_SUBREVISION
+  #define zlibVersion           mz_version
+  #define zlib_version          mz_version()
+#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
+
+#endif // MINIZ_NO_ZLIB_APIS
+
+// ------------------- Types and macros
+
+typedef unsigned char mz_uint8;
+typedef signed short mz_int16;
+typedef unsigned short mz_uint16;
+typedef unsigned int mz_uint32;
+typedef unsigned int mz_uint;
+typedef long long mz_int64;
+typedef unsigned long long mz_uint64;
+typedef int mz_bool;
+
+#define MZ_FALSE (0)
+#define MZ_TRUE (1)
+
+// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
+#ifdef _MSC_VER
+   #define MZ_MACRO_END while (0, 0)
+#else
+   #define MZ_MACRO_END while (0)
+#endif
+
+// ------------------- ZIP archive reading/writing
+
+#ifndef MINIZ_NO_ARCHIVE_APIS
+
+enum
+{
+  MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024,
+  MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
+  MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
+};
+
+typedef struct
+{
+  mz_uint32 m_file_index;
+  mz_uint32 m_central_dir_ofs;
+  mz_uint16 m_version_made_by;
+  mz_uint16 m_version_needed;
+  mz_uint16 m_bit_flag;
+  mz_uint16 m_method;
+#ifndef MINIZ_NO_TIME
+  time_t m_time;
+#endif
+  mz_uint32 m_crc32;
+  mz_uint64 m_comp_size;
+  mz_uint64 m_uncomp_size;
+  mz_uint16 m_internal_attr;
+  mz_uint32 m_external_attr;
+  mz_uint64 m_local_header_ofs;
+  mz_uint32 m_comment_size;
+  char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
+  char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
+} mz_zip_archive_file_stat;
+
+typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
+typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
+
+struct mz_zip_internal_state_tag;
+typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
+
+typedef enum
+{
+  MZ_ZIP_MODE_INVALID = 0,
+  MZ_ZIP_MODE_READING = 1,
+  MZ_ZIP_MODE_WRITING = 2,
+  MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
+} mz_zip_mode;
+
+typedef struct mz_zip_archive_tag
+{
+  mz_uint64 m_archive_size;
+  mz_uint64 m_central_directory_file_ofs;
+  mz_uint m_total_files;
+  mz_zip_mode m_zip_mode;
+
+  mz_uint m_file_offset_alignment;
+
+  mz_alloc_func m_pAlloc;
+  mz_free_func m_pFree;
+  mz_realloc_func m_pRealloc;
+  void *m_pAlloc_opaque;
+
+  mz_file_read_func m_pRead;
+  mz_file_write_func m_pWrite;
+  void *m_pIO_opaque;
+
+  mz_zip_internal_state *m_pState;
+
+} mz_zip_archive;
+
+typedef enum
+{
+  MZ_ZIP_FLAG_CASE_SENSITIVE                = 0x0100,
+  MZ_ZIP_FLAG_IGNORE_PATH                   = 0x0200,
+  MZ_ZIP_FLAG_COMPRESSED_DATA               = 0x0400,
+  MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
+} mz_zip_flags;
+
+// ZIP archive reading
+
+// Inits a ZIP archive reader.
+// These functions read and validate the archive's central directory.
+mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags);
+mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags);
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
+#endif
+
+// Returns the total number of files in the archive.
+mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
+
+// Returns detailed information about an archive file entry.
+mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);
+
+// Determines if an archive file entry is a directory entry.
+mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);
+mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);
+
+// Retrieves the filename of an archive file entry.
+// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename.
+mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);
+
+// Attempts to locates a file in the archive's central directory.
+// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
+// Returns -1 if the file cannot be found.
+int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
+
+// Extracts a archive file to a memory buffer using no memory allocation.
+mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
+mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
+
+// Extracts a archive file to a memory buffer.
+mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);
+
+// Extracts a archive file to a dynamically allocated heap buffer.
+void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
+void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);
+
+// Extracts a archive file using a callback function to output the file's data.
+mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
+
+#ifndef MINIZ_NO_STDIO
+// Extracts a archive file to a disk file and sets its last accessed and modified times.
+// This function only extracts files, not archive directory records.
+mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
+mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);
+#endif
+
+// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used.
+mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
+
+// ZIP archive writing
+
+#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+// Inits a ZIP archive writer.
+mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
+mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
+
+#ifndef MINIZ_NO_STDIO
+mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
+#endif
+
+// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive.
+// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called.
+// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it).
+// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL.
+// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before
+// the archive is finalized the file's central directory will be hosed.
+mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
+
+// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive.
+// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer.
+// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);
+mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);
+
+#ifndef MINIZ_NO_STDIO
+// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive.
+// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+#endif
+
+// Adds a file to an archive by fully cloning the data from another archive.
+// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields.
+mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index);
+
+// Finalizes the archive by writing the central directory records followed by the end of central directory record.
+// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end().
+// An archive must be manually finalized by calling this function for it to be valid.
+mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
+mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize);
+
+// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used.
+// Note for the archive to be valid, it must have been finalized before ending.
+mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
+
+// Misc. high-level helper functions:
+
+// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive.
+// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
+mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
+
+// Reads a single file from an archive into a heap block.
+// Returns NULL on failure.
+void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags);
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
+
+#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
+
+// ------------------- Low-level Decompression API Definitions
+
+// Decompression flags used by tinfl_decompress().
+// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
+// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
+// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
+// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
+enum
+{
+  TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
+  TINFL_FLAG_HAS_MORE_INPUT = 2,
+  TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
+  TINFL_FLAG_COMPUTE_ADLER32 = 8
+};
+
+// High level decompression functions:
+// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
+// On entry:
+//  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
+// On return:
+//  Function returns a pointer to the decompressed data, or NULL on failure.
+//  *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
+//  The caller must call mz_free() on the returned block when it's no longer needed.
+void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
+// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
+#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
+size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
+// Returns 1 on success or 0 on failure.
+typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
+int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
+
+// Max size of LZ dictionary.
+#define TINFL_LZ_DICT_SIZE 32768
+
+// Return status.
+typedef enum
+{
+  TINFL_STATUS_BAD_PARAM = -3,
+  TINFL_STATUS_ADLER32_MISMATCH = -2,
+  TINFL_STATUS_FAILED = -1,
+  TINFL_STATUS_DONE = 0,
+  TINFL_STATUS_NEEDS_MORE_INPUT = 1,
+  TINFL_STATUS_HAS_MORE_OUTPUT = 2
+} tinfl_status;
+
+// Initializes the decompressor to its initial state.
+#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
+#define tinfl_get_adler32(r) (r)->m_check_adler32
+
+// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
+// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
+tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
+
+// Internal/private bits follow.
+enum
+{
+  TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
+  TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
+};
+
+typedef struct
+{
+  mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
+  mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
+} tinfl_huff_table;
+
+#if MINIZ_HAS_64BIT_REGISTERS
+  #define TINFL_USE_64BIT_BITBUF 1
+#endif
+
+#if TINFL_USE_64BIT_BITBUF
+  typedef mz_uint64 tinfl_bit_buf_t;
+  #define TINFL_BITBUF_SIZE (64)
+#else
+  typedef mz_uint32 tinfl_bit_buf_t;
+  #define TINFL_BITBUF_SIZE (32)
+#endif
+
+struct tinfl_decompressor_tag
+{
+  mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
+  tinfl_bit_buf_t m_bit_buf;
+  size_t m_dist_from_out_buf_start;
+  tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
+  mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
+};
+
+// ------------------- Low-level Compression API Definitions
+
+// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
+#define TDEFL_LESS_MEMORY 0
+
+// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
+// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
+enum
+{
+  TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
+};
+
+// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
+// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
+// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
+// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
+// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
+// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
+// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
+// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
+// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
+enum
+{
+  TDEFL_WRITE_ZLIB_HEADER             = 0x01000,
+  TDEFL_COMPUTE_ADLER32               = 0x02000,
+  TDEFL_GREEDY_PARSING_FLAG           = 0x04000,
+  TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
+  TDEFL_RLE_MATCHES                   = 0x10000,
+  TDEFL_FILTER_MATCHES                = 0x20000,
+  TDEFL_FORCE_ALL_STATIC_BLOCKS       = 0x40000,
+  TDEFL_FORCE_ALL_RAW_BLOCKS          = 0x80000
+};
+
+// High level compression functions:
+// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
+// On entry:
+//  pSrc_buf, src_buf_len: Pointer and size of source block to compress.
+//  flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
+// On return:
+//  Function returns a pointer to the compressed data, or NULL on failure.
+//  *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
+//  The caller must free() the returned block when it's no longer needed.
+void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
+
+// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
+// Returns 0 on failure.
+size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
+
+// Compresses an image to a compressed PNG file in memory.
+// On entry:
+//  pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4.
+//  The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
+//  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
+//  If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
+// On return:
+//  Function returns a pointer to the compressed data, or NULL on failure.
+//  *pLen_out will be set to the size of the PNG image file.
+//  The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
+void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
+void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
+
+// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
+typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
+
+// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
+mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
+
+// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
+#if TDEFL_LESS_MEMORY
+enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#else
+enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
+#endif
+
+// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
+typedef enum
+{
+  TDEFL_STATUS_BAD_PARAM = -2,
+  TDEFL_STATUS_PUT_BUF_FAILED = -1,
+  TDEFL_STATUS_OKAY = 0,
+  TDEFL_STATUS_DONE = 1,
+} tdefl_status;
+
+// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
+typedef enum
+{
+  TDEFL_NO_FLUSH = 0,
+  TDEFL_SYNC_FLUSH = 2,
+  TDEFL_FULL_FLUSH = 3,
+  TDEFL_FINISH = 4
+} tdefl_flush;
+
+// tdefl's compression state structure.
+typedef struct
+{
+  tdefl_put_buf_func_ptr m_pPut_buf_func;
+  void *m_pPut_buf_user;
+  mz_uint m_flags, m_max_probes[2];
+  int m_greedy_parsing;
+  mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
+  mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
+  mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
+  mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
+  tdefl_status m_prev_return_status;
+  const void *m_pIn_buf;
+  void *m_pOut_buf;
+  size_t *m_pIn_buf_size, *m_pOut_buf_size;
+  tdefl_flush m_flush;
+  const mz_uint8 *m_pSrc;
+  size_t m_src_buf_left, m_out_buf_ofs;
+  mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
+  mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
+  mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
+  mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
+  mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
+  mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
+} tdefl_compressor;
+
+// Initializes the compressor.
+// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
+// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
+// If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
+// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
+tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
+
+// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
+tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
+
+// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
+// tdefl_compress_buffer() always consumes the entire input buffer.
+tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
+
+tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
+mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
+
+// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
+#ifndef MINIZ_NO_ZLIB_APIS
+// Create tdefl_compress() flags given zlib-style compression parameters.
+// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
+// window_bits may be -15 (raw deflate) or 15 (zlib)
+// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
+mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
+#endif // #ifndef MINIZ_NO_ZLIB_APIS
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MINIZ_HEADER_INCLUDED
diff --git a/test_conformance/CMakeCommon.txt b/test_conformance/CMakeCommon.txt
new file mode 100644
index 00000000..62c339d7
--- /dev/null
+++ b/test_conformance/CMakeCommon.txt
@@ -0,0 +1,13 @@
+set_source_files_properties(COMPILE_FLAGS -msse2)
+
+string(TOLOWER ${MODULE_NAME} MODULE_NAME_LOWER)
+
+set(${MODULE_NAME}_OUT ${CONFORMANCE_PREFIX}${MODULE_NAME_LOWER}${CONFORMANCE_SUFFIX})
+
+add_executable(${${MODULE_NAME}_OUT} ${${MODULE_NAME}_SOURCES})
+
+set_source_files_properties(${${MODULE_NAME}_SOURCES} PROPERTIES LANGUAGE CXX)
+
+set_property(TARGET ${${MODULE_NAME}_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
+
+TARGET_LINK_LIBRARIES(${${MODULE_NAME}_OUT} ${CLConform_LIBRARIES})
diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt
new file mode 100644
index 00000000..deb9d2ff
--- /dev/null
+++ b/test_conformance/CMakeLists.txt
@@ -0,0 +1,92 @@
+# Remember current source directory (`test_conformance').
+set( CLConf_Install_Base_Dir "${CMAKE_CURRENT_SOURCE_DIR}" )
+
+add_subdirectory( allocations )
+add_subdirectory( api )
+add_subdirectory( atomics )
+add_subdirectory( basic )
+add_subdirectory( buffers )
+add_subdirectory( commonfns )
+add_subdirectory( compatibility )
+add_subdirectory( compiler )
+add_subdirectory( computeinfo )
+add_subdirectory( contractions )
+add_subdirectory( conversions )
+if(D3D10_IS_SUPPORTED)
+   add_subdirectory( d3d10 )
+endif(D3D10_IS_SUPPORTED)
+if(D3D11_IS_SUPPORTED)
+   add_subdirectory( d3d11 )
+endif(D3D11_IS_SUPPORTED)
+add_subdirectory( device_partition )
+add_subdirectory( events )
+add_subdirectory( geometrics )
+if(GL_IS_SUPPORTED)
+   add_subdirectory( gl )
+endif(GL_IS_SUPPORTED)
+if(GLES_IS_SUPPORTED)
+   add_subdirectory(gles)
+endif(GLES_IS_SUPPORTED)
+add_subdirectory( half )
+add_subdirectory( headers )
+add_subdirectory( images )
+add_subdirectory( integer_ops )
+add_subdirectory( math_brute_force )
+add_subdirectory( mem_host_flags )
+add_subdirectory( multiple_device_context )
+add_subdirectory( printf )
+add_subdirectory( profiling )
+add_subdirectory( relationals )
+add_subdirectory( select )
+add_subdirectory( thread_dimensions )
+add_subdirectory( vec_align )
+add_subdirectory( vec_step )
+add_subdirectory( c11_atomics )
+add_subdirectory( device_execution )
+add_subdirectory( non_uniform_work_group )
+add_subdirectory( SVM )
+add_subdirectory( generic_address_space )
+add_subdirectory( subgroups )
+add_subdirectory( workgroups )
+add_subdirectory( pipes )
+add_subdirectory( device_timer )
+add_subdirectory( clcpp )
+
+set(CSV_FILES
+    opencl_conformance_tests_21_full_spirv.csv
+    opencl_conformance_tests_21_legacy_wimpy.csv
+    opencl_conformance_tests_22.csv
+    opencl_conformance_tests_generate_spirv.csv
+    opencl_conformance_tests_conversions.csv
+    opencl_conformance_tests_d3d.csv
+    opencl_conformance_tests_full.csv
+    opencl_conformance_tests_full_no_math_or_conversions.csv
+    opencl_conformance_tests_math.csv
+    opencl_conformance_tests_quick.csv
+)
+set(PY_FILES
+    run_conformance.py
+)
+
+# Copy .csv files
+foreach(FILE ${CSV_FILES})
+    configure_file(${FILE} ${FILE} COPYONLY)
+endforeach()
+
+# Copy test run script
+foreach(FILE ${PY_FILES})
+    if(WIN32)
+        configure_file(${FILE} ${FILE} COPYONLY)
+    else(WIN32)
+        # Copy to CMakeFiles
+        configure_file(${FILE} ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${FILE} COPYONLY)
+        # Copy to final destination and set permissions
+        file(COPY ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${FILE}
+             DESTINATION ${CMAKE_BINARY_DIR}/test_conformance
+             FILE_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ
+             GROUP_EXECUTE WORLD_READ WORLD_EXECUTE)
+    endif(WIN32)
+endforeach()
+foreach(FILE test_conformance/${PY_FILES})
+
+endforeach()
\ No newline at end of file
diff --git a/test_conformance/Jamfile b/test_conformance/Jamfile
new file mode 100644
index 00000000..5065fe45
--- /dev/null
+++ b/test_conformance/Jamfile
@@ -0,0 +1,24 @@
+project
+    : requirements
+      <library>/harness//harness
+      <warnings>off
+    ;
+
+use-project /harness : ../test_common/harness ;
+
+proj_lst = allocations api atomics basic buffers commonfns compiler
+           computeinfo contractions conversions events geometrics gl
+           half images integer_ops math_brute_force multiple_device_context
+           profiling relationals select thread_dimensions ;
+
+for proj in $(proj_lst)
+{
+   build-project $(proj) ;
+}
+
+install data
+    : [ glob *.csv ] [ glob *.py ]
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance
+      <variant>release:<location>$(DIST)/release/tests/test_conformance
+    ;
+
diff --git a/test_conformance/Makefile b/test_conformance/Makefile
new file mode 100644
index 00000000..cfcb271b
--- /dev/null
+++ b/test_conformance/Makefile
@@ -0,0 +1,61 @@
+
+PRODUCTS = \
+	allocations/ \
+	api/ \
+	atomics/ \
+	basic/ \
+	buffers/ \
+	commonfns/ \
+  compatibility/test_conformance/ \
+	compiler/ \
+	computeinfo/ \
+	contractions/ \
+	conversions/ \
+	device_partition/ \
+	events/ \
+	geometrics/ \
+	gl/ \
+	half/ \
+	headers/ \
+	images/ \
+	integer_ops/ \
+	math_brute_force/ \
+	mem_host_flags/ \
+	multiple_device_context/ \
+	printf/ \
+	profiling/ \
+	relationals/ \
+	select/ \
+	thread_dimensions/ \
+	vec_align/ \
+	vec_step/ \
+	workgroups/
+
+ 
+TOP=$(shell pwd)
+
+all: $(PRODUCTS)
+
+clean:
+	@for testdir in $(dir $(PRODUCTS))  ; \
+		do ( \
+			echo "==================================================================================" ; \
+			echo "Cleaning $$testdir" ; \
+			echo "==================================================================================" ; \
+			if test -d $$testdir; \
+				then cd $$testdir && make clean; \
+				else echo "Warning: Directory '$$testdir' Does Not Exist"; \
+			fi; \
+			); \
+		done \
+
+$(PRODUCTS): 
+	@echo "==================================================================================" ;
+	@echo "(`date "+%H:%M:%S"`) Make $@" ;
+	@echo "==================================================================================" ;
+	@if test -d $@; \
+		then cd $(dir $@) && make -i; \
+		else echo "Warning: Directory '$@' Does Not Exist"; \
+		fi; \
+
+.PHONY: clean $(PRODUCTS)  all
diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt
new file mode 100644
index 00000000..c7d0219e
--- /dev/null
+++ b/test_conformance/SVM/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(MODULE_NAME SVM)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_allocate_shared_buffer.cpp
+    test_byte_granularity.cpp
+    test_cross_buffer_pointers.cpp
+    test_enqueue_api.cpp
+    test_fine_grain_memory_consistency.cpp
+    test_fine_grain_sync_buffers.cpp
+    test_pointer_passing.cpp
+    test_set_kernel_exec_info_svm_ptrs.cpp
+    test_shared_address_space_coarse_grain.cpp
+    test_shared_address_space_fine_grain.cpp
+    test_shared_address_space_fine_grain_buffers.cpp
+    test_shared_sub_buffers.cpp
+    test_migrate.cpp
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/SVM/Makefile b/test_conformance/SVM/Makefile
new file mode 100644
index 00000000..35487d35
--- /dev/null
+++ b/test_conformance/SVM/Makefile
@@ -0,0 +1,54 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+	test_allocate_shared_buffer.cpp \
+	test_byte_granularity.cpp \
+	test_cross_buffer_pointers.cpp \
+	test_enqueue_api.cpp \
+    test_fine_grain_memory_consistency.cpp \
+    test_fine_grain_sync_buffers.cpp \
+    test_pointer_passing.cpp \
+    test_set_kernel_exec_info_svm_ptrs.cpp \
+	test_shared_address_space_coarse_grain.cpp \
+	test_shared_address_space_fine_grain_buffers.cpp \
+	test_shared_address_space_fine_grain.cpp \
+    test_shared_sub_buffers.cpp \
+    test_migrate.cpp \
+    ../../test_common/harness/errorHelpers.c \
+	../../test_common/harness/threadTesting.c \
+	../../test_common/harness/testHarness.c \
+	../../test_common/harness/kernelHelpers.c \
+	../../test_common/harness/typeWrappers.cpp \
+	../../test_common/harness/mt19937.c \
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_SVM
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/SVM/common.h b/test_conformance/SVM/common.h
new file mode 100644
index 00000000..a1d57a5f
--- /dev/null
+++ b/test_conformance/SVM/common.h
@@ -0,0 +1,101 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include "../../test_common/harness/compat.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
+    #include <windows.h>
+#endif
+
+typedef enum {
+    memory_order_relaxed,
+    memory_order_acquire,
+    memory_order_release,
+    memory_order_acq_rel,
+    memory_order_seq_cst
+} cl_memory_order;
+
+cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order);
+cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o);
+
+template <typename T>
+bool AtomicCompareExchangeStrongExplicit(volatile T *a, T *expected, T desired,
+                                  cl_memory_order order_success,
+                                  cl_memory_order order_failure)
+{
+    T tmp;
+#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+    tmp = (T)InterlockedCompareExchange((volatile LONG *)a, (LONG)desired, *(LONG *)expected);
+#elif defined(__GNUC__)
+    tmp = (T)__sync_val_compare_and_swap((volatile intptr_t*)a, (intptr_t)(*expected), (intptr_t)desired);
+#else
+    log_info("Host function not implemented: atomic_compare_exchange\n");
+    tmp = 0;
+#endif
+    if(tmp == *expected)
+        return true;
+    *expected = tmp;
+    return false;
+}
+
+// this checks for a NULL ptr and/or an error code
+#define test_error2(error_code, ptr, msg)  { if(error != 0)  { test_error(error_code, msg); } else  { if(NULL == ptr)  {print_null_error(msg); return -1;} } }
+#define print_null_error(msg) log_error("ERROR: %s! (NULL pointer detected %s:%d)\n", msg, __FILE__, __LINE__ );
+
+// max possible number of queues needed, 1 for each device in platform.
+#define MAXQ 32
+
+typedef struct Node{
+    cl_int global_id;
+    cl_int position_in_list;
+    struct Node* pNext;
+} Node;
+
+extern void   create_linked_lists(Node* pNodes, size_t num_lists, int list_length);
+extern cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length);
+
+extern cl_int        create_linked_lists_on_device(int qi, cl_command_queue q, cl_mem allocator,     cl_kernel k, size_t numLists  );
+extern cl_int        verify_linked_lists_on_device(int qi, cl_command_queue q, cl_mem num_correct,   cl_kernel k, cl_int ListLength, size_t numLists  );
+extern cl_int create_linked_lists_on_device_no_map(int qi, cl_command_queue q, size_t *pAllocator,   cl_kernel k, size_t numLists  );
+extern cl_int verify_linked_lists_on_device_no_map(int qi, cl_command_queue q, cl_int *pNum_correct, cl_kernel k, cl_int ListLength, size_t numLists  );
+
+extern int    test_byte_granularity(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int    test_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps);
+
+extern const char *linked_list_create_and_verify_kernels[];
+
+#endif    // #ifndef __COMMON_H__
+
diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp
new file mode 100644
index 00000000..87c786da
--- /dev/null
+++ b/test_conformance/SVM/main.cpp
@@ -0,0 +1,317 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <vector>
+#include <sstream>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+
+#include "common.h"
+
+// SVM Atomic wrappers.
+// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
+// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
+// For now only Windows x86 is implemented, add support for other platforms as needed.
+cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
+{
+#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+  return *pValue;  // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
+#elif defined(__GNUC__)
+	return __sync_add_and_fetch(pValue, 0);
+#else
+  log_error("ERROR: AtomicLoadExplicit function not implemented\n");
+  return -1;
+#endif
+}
+// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
+cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
+{
+#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
+  return InterlockedExchangeAdd( (volatile LONG*) object, operand);
+#elif defined(__GNUC__)
+  return __sync_fetch_and_add(object, operand);
+#else
+  log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
+  return -1;
+#endif
+}
+
+cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
+{
+#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
+  return InterlockedExchange( (volatile LONG*) object, desired);
+#elif defined(__GNUC__)
+  return __sync_lock_test_and_set(object, desired);
+#else
+  log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
+  return -1;
+#endif
+}
+
+
+const char *linked_list_create_and_verify_kernels[] = {
+  "typedef struct Node {\n"
+  "    int global_id;\n"
+  "    int position_in_list;\n"
+  "    __global struct Node* pNext;\n"
+  "} Node;\n"
+  "\n"
+  // The allocation_index parameter must be initialized on the host to N work-items
+  // The first N nodes in pNodes will be the heads of the lists.
+  "__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes[i];\n"
+  "\n"
+  "    pNode->global_id = i;\n"
+  "    pNode->position_in_list = 0;\n"
+  "\n"
+  "    __global Node *pNew;\n"
+  "    for(int j=1; j < list_length; j++)\n"
+  "    {\n"
+  "        pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
+  "        pNew->global_id = i;\n"
+  "        pNew->position_in_list = j;\n"
+  "        pNode->pNext = pNew;  // link new node onto end of list\n"
+  "        pNode = pNew;   // move to end of list\n"
+  "    }\n"
+  "}\n"
+
+  "__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes[i];\n"
+  "\n"
+  "    for(int j=0; j < list_length; j++)\n"
+  "    {\n"
+  "        if( pNode->global_id == i && pNode->position_in_list == j)\n"
+  "        {\n"
+  "            atomic_inc(num_correct);\n"
+  "        } \n"
+  "        else {\n"
+  "            break;\n"
+  "        }\n"
+  "        pNode = pNode->pNext;\n"
+  "    }\n"
+  "}\n"
+};
+
+
+// The first N nodes in pNodes will be the heads of the lists.
+void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
+{
+  size_t allocation_index = num_lists;  // heads of lists are in first num_lists nodes.
+
+  for(cl_uint i = 0; i < num_lists; i++)
+  {
+    Node *pNode = &pNodes[i];
+    pNode->global_id = i;
+    pNode->position_in_list = 0;
+    Node *pNew;
+    for(int j=1; j < list_length; j++)
+    {
+      pNew = &pNodes[ allocation_index++ ];// allocate a new node
+      pNew->global_id = i;
+      pNew->position_in_list = j;
+      pNode->pNext = pNew;  // link new node onto end of list
+      pNode = pNew;   // move to end of list
+    }
+  }
+}
+
+cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
+{
+  cl_int error = CL_SUCCESS;
+  int numCorrect = 0;
+
+  log_info(" and verifying on host ");
+  for(cl_uint i=0; i < num_lists; i++)
+  {
+    Node *pNode = &pNodes[i];
+    for(int j=0; j < list_length; j++)
+    {
+      if( pNode->global_id == i && pNode->position_in_list == j)
+      {
+        numCorrect++;
+      }
+      else {
+        break;
+      }
+      pNode = pNode->pNext;
+    }
+  }
+  if(numCorrect != list_length * (cl_uint)num_lists)
+  {
+    error = -1;
+    log_info("Failed\n");
+  }
+  else
+    log_info("Passed\n");
+
+  return error;
+}
+
+// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
+// so we create are own context here that has all devices, we use the same platform that the harness used.
+cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps)
+{
+  cl_int error;
+
+  cl_platform_id platform_id;
+  // find out what platform the harness is using.
+  error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
+  test_error(error,"clGetDeviceInfo failed");
+
+  error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
+  test_error(error, "clGetDeviceIDs failed");
+
+  std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
+
+  error = clGetDeviceIDs(platform_id,  CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
+  test_error(error, "clGetDeviceIDs failed");
+
+  devices.push_back(device_from_harness);
+  for (size_t i = 0; i < devicesTmp.size(); ++i)
+  {
+    if (device_from_harness != devicesTmp[i])
+      devices.push_back(devicesTmp[i]);
+  }
+
+  // Select only the devices that support the SVM level needed for the test.
+  // Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
+  cl_device_svm_capabilities caps;
+  cl_uint num_capable_devices = 0;
+  for(cl_uint i = 0; i < *num_devices; i++)
+  {
+    size_t ret_len = 0;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, 0, 0, &ret_len);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
+      return -1;
+    }
+
+    std::vector<char> oclVersion(ret_len + 1);
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(char) * oclVersion.size(), &oclVersion[0], 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetDeviceInfo failed %s\n", IGetErrorString(error));
+      return -1;
+    }
+
+    std::string versionStr(&oclVersion[7]);
+    std::stringstream stream;
+    stream << versionStr;
+
+    double version = 0.0;
+    stream >> version;
+
+    if(device_from_harness != devices[i] && version < 2.0)
+    {
+      continue;
+    }
+
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
+    test_error(error,"clGetDeviceInfo failed for CL_DEVICE_MEM_SHARING");
+    if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER |  CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
+    {
+      log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
+      return -1;
+    }
+    if((caps & required_svm_caps) == required_svm_caps)
+    {
+      capable_devices.push_back(devices[i]);
+      ++num_capable_devices;
+    }
+  }
+  devices = capable_devices;  // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
+  *num_devices = num_capable_devices;
+  if(num_capable_devices == 0)
+    //    if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
+  {
+    log_info("Requested SVM level not supported by any device on this platform, test not executed.\n");
+    return 1; // 1 indicates do not execute, but counts as passing.
+  }
+
+  cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, NULL };
+  *context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
+  test_error(error, "Unable to create context" );
+
+  //    *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
+
+  for(cl_uint i = 0; i < *num_devices; i++)
+  {
+    queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
+    test_error(error, "clCreateCommandQueue failed");
+  }
+
+  if(ppCodeString)
+  {
+    error = create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0, "-cl-std=CL2.0");
+    test_error( error, "failed to create program" );
+  }
+
+  return 0;
+}
+
+basefn    basefn_list[] = {
+  test_byte_granularity,
+  test_set_kernel_exec_info_svm_ptrs,
+  test_fine_grain_memory_consistency,
+  test_fine_grain_sync_buffers,
+  test_shared_address_space_fine_grain,
+  test_shared_sub_buffers,
+  test_shared_address_space_fine_grain_buffers,
+  test_allocate_shared_buffer,
+  test_shared_address_space_coarse_grain_old_api,
+  test_shared_address_space_coarse_grain_new_api,
+  test_cross_buffer_pointers_coarse_grain,
+  test_svm_pointer_passing,
+  test_enqueue_api,
+  test_migrate,
+};
+
+const char    *basefn_names[] = {
+  "svm_byte_granularity",
+  "svm_set_kernel_exec_info_svm_ptrs",
+  "svm_fine_grain_memory_consistency",
+  "svm_fine_grain_sync_buffers",
+  "svm_shared_address_space_fine_grain",
+  "svm_shared_sub_buffers",
+  "svm_shared_address_space_fine_grain_buffers",
+  "svm_allocate_shared_buffer",
+  "svm_shared_address_space_coarse_grain_old_api",
+  "svm_shared_address_space_coarse_grain_new_api",
+  "svm_cross_buffer_pointers_coarse_grain",
+  "svm_pointer_passing",
+  "svm_enqueue_api",
+  "svm_migrate_mem",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+
+int main(int argc, const char *argv[])
+{
+  return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
+}
+
+
+
diff --git a/test_conformance/SVM/test_allocate_shared_buffer.cpp b/test_conformance/SVM/test_allocate_shared_buffer.cpp
new file mode 100644
index 00000000..7d555c8d
--- /dev/null
+++ b/test_conformance/SVM/test_allocate_shared_buffer.cpp
@@ -0,0 +1,107 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+const cl_mem_flags flag_set[] = {
+  CL_MEM_READ_WRITE,
+  CL_MEM_WRITE_ONLY,
+  CL_MEM_READ_ONLY,
+  CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
+  CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
+  CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
+  CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+  CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+  CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+  0
+};
+const char* flag_set_names[] = {
+  "CL_MEM_READ_WRITE",
+  "CL_MEM_WRITE_ONLY",
+  "CL_MEM_READ_ONLY",
+  "CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER",
+  "CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
+  "CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
+  "CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
+  "CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
+  "CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
+  "0"
+};
+
+
+int test_allocate_shared_buffer(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      err = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  cl_device_svm_capabilities caps;
+  err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
+  test_error(err,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
+
+  // under construction...
+  err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+  if(err) return -1;
+
+  size_t size = 1024;
+
+  // iteration over flag combos
+  int num_flags = sizeof(flag_set)/sizeof(cl_mem_flags);
+  for(int i = 0; i < num_flags; i++)
+  {
+    if (((flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0 && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
+        || ((flag_set[i] & CL_MEM_SVM_ATOMICS) != 0 && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
+    {
+      log_info("Skipping clSVMalloc with flags: %s\n", flag_set_names[i]);
+      continue;
+    }
+
+    log_info("Testing clSVMalloc with flags: %s\n", flag_set_names[i]);
+    cl_char *pBufData1 = (cl_char*) clSVMAlloc(context, flag_set[i], size, 0);
+    if(pBufData1 == NULL)
+    {
+      log_error("SVMalloc returned NULL");
+      return -1;
+    }
+
+    {
+      clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, size, pBufData1, &err);
+      test_error(err,"clCreateBuffer failed");
+
+      cl_char *pBufData2 = NULL;
+      cl_uint flags = CL_MAP_READ | CL_MAP_READ;
+      if(flag_set[i] & CL_MEM_HOST_READ_ONLY) flags ^= CL_MAP_WRITE;
+      if(flag_set[i] & CL_MEM_HOST_WRITE_ONLY) flags ^= CL_MAP_READ;
+
+      if(!(flag_set[i] & CL_MEM_HOST_NO_ACCESS))
+      {
+        pBufData2 = (cl_char*) clEnqueueMapBuffer(queues[0], buf, CL_TRUE, flags, 0, size, 0, NULL,NULL, &err);
+        test_error(err, "clEnqueueMapBuffer failed");
+
+        if(pBufData2 != pBufData1 || NULL == pBufData1)
+        {
+          log_error("SVM pointer returned by clEnqueueMapBuffer doesn't match pointer returned by clSVMalloc");
+          return -1;
+        }
+      }
+    }
+
+    clSVMFree(context, pBufData1);
+  }
+
+  return 0;
+}
diff --git a/test_conformance/SVM/test_byte_granularity.cpp b/test_conformance/SVM/test_byte_granularity.cpp
new file mode 100644
index 00000000..f1e58f70
--- /dev/null
+++ b/test_conformance/SVM/test_byte_granularity.cpp
@@ -0,0 +1,148 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+const char *byte_manipulation_kernels[] = {
+  // Each device will write it's id into the bytes that it "owns", ownership is based on round robin (global_id % num_id)
+  // num_id is equal to number of SVM devices in the system plus one (for the host code).
+  // id is the index (id) of the device that this kernel is executing on.
+  // For example, if there are 2 SVM devices and the host; the buffer should look like this after each device and the host write their id's:
+  // 0, 1, 2, 0, 1, 2, 0, 1, 2...
+  "__kernel void write_owned_locations(__global char* a, uint num_id, uint id)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "   int owner = i % num_id;\n"
+  "    if(id == owner) \n"
+  "       a[i] = id;\n"  // modify location if it belongs to this device, write id
+  "}\n"
+
+  // Verify that a device can see the byte sized updates from the other devices, sum up the device id's and see if they match expected value.
+  // Note: this must be called with a reduced NDRange so that neighbor acesses don't go past end of buffer.
+  // For example if there are two SVM devices and the host (3 total devices) the buffer should look like this:
+  // 0,1,2,0,1,2...
+  // and the expected sum at each point is 0+1+2 = 3.
+  "__kernel void sum_neighbor_locations(__global char* a, uint num_devices, volatile __global uint* error_count)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    uint expected_sum = (num_devices * (num_devices - 1))/2;\n"
+  "    uint sum = 0;\n"
+  "    for(uint j=0; j<num_devices; j++) {\n"
+  "        sum += a[i + j];\n" // add my neighbors to the right
+  "    }\n"
+  "    if(sum != expected_sum)\n"
+  "        atomic_inc(error_count);\n"
+  "}\n"
+};
+
+
+
+int    test_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper context;
+  clProgramWrapper program;
+  clKernelWrapper k1,k2;
+  clCommandQueueWrapper queues[MAXQ];
+
+  cl_uint     num_devices = 0;
+  cl_int      err = CL_SUCCESS;
+  cl_int        rval = CL_SUCCESS;
+
+  err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
+  if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
+  if(err < 0) return -1; // fail test.
+
+  cl_uint num_devices_plus_host = num_devices + 1;
+
+  k1 = clCreateKernel(program, "write_owned_locations", &err);
+  test_error(err, "clCreateKernel failed");
+  k2 = clCreateKernel(program, "sum_neighbor_locations", &err);
+  test_error(err, "clCreateKernel failed");
+
+
+  cl_char *pA = (cl_char*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_char) * num_elements, 0);
+
+  cl_uint **error_counts =  (cl_uint**) malloc(sizeof(void*) * num_devices);
+
+  for(cl_uint i=0; i < num_devices; i++) {
+    error_counts[i] = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint), 0);
+    *error_counts[i] = 0;
+  }
+  for(int i=0; i < num_elements; i++) pA[i] = -1;
+
+  err |= clSetKernelArgSVMPointer(k1, 0, pA);
+  err |= clSetKernelArg(k1, 1, sizeof(cl_uint), &num_devices_plus_host);
+  test_error(err, "clSetKernelArg failed");
+
+  // get all the devices going simultaneously
+  size_t element_num = num_elements;
+  for(cl_uint d=0; d < num_devices; d++)  // device ids starting at 1.
+  {
+    err = clSetKernelArg(k1, 2, sizeof(cl_uint), &d);
+    test_error(err, "clSetKernelArg failed");
+    err = clEnqueueNDRangeKernel(queues[d], k1, 1, NULL, &element_num, NULL, 0, NULL, NULL);
+    test_error(err,"clEnqueueNDRangeKernel failed");
+  }
+
+  for(cl_uint d=0; d < num_devices; d++) clFlush(queues[d]);
+
+  cl_uint host_id = num_devices;  // host code will take the id above the devices.
+  for(int i = (int)num_devices; i < num_elements; i+= num_devices_plus_host) pA[i] = host_id;
+
+  for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
+
+  // now check that each device can see the byte writes made by the other devices.
+
+  err |= clSetKernelArgSVMPointer(k2, 0, pA);
+  err |= clSetKernelArg(k2, 1, sizeof(cl_uint), &num_devices_plus_host);
+  test_error(err, "clSetKernelArg failed");
+
+  // adjusted so k2 doesn't read past end of buffer
+  size_t adjusted_num_elements = num_elements - num_devices;
+  for(cl_uint id = 0; id < num_devices; id++)
+  {
+    err = clSetKernelArgSVMPointer(k2, 2, error_counts[id]);
+    test_error(err, "clSetKernelArg failed");
+
+    err = clEnqueueNDRangeKernel(queues[id], k2, 1, NULL, &adjusted_num_elements, NULL, 0, NULL, NULL);
+    test_error(err,"clEnqueueNDRangeKernel failed");
+  }
+
+  for(cl_uint id = 0; id < num_devices; id++) clFinish(queues[id]);
+
+  bool failed = false;
+
+  // see if any of the devices found errors
+  for(cl_uint i=0; i < num_devices; i++) {
+    if(*error_counts[i] > 0)
+      failed = true;
+  }
+  cl_uint expected = (num_devices_plus_host * (num_devices_plus_host - 1))/2;
+  // check that host can see the byte writes made by the devices.
+  for(cl_uint i = 0; i < num_elements - num_devices_plus_host; i++)
+  {
+    int sum = 0;
+    for(cl_uint j=0; j < num_devices_plus_host; j++) sum += pA[i+j];
+    if(sum != expected)
+      failed = true;
+  }
+
+  clSVMFree(context, pA);
+  for(cl_uint i=0; i < num_devices; i++) clSVMFree(context, error_counts[i]);
+
+  if(failed)
+    return -1;
+  return 0;
+}
diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp
new file mode 100644
index 00000000..2ee76d92
--- /dev/null
+++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp
@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+// create linked lists that use nodes from two different buffers.
+const char *SVMCrossBufferPointers_test_kernel[] = {
+  "\n"
+  "typedef struct Node {\n"
+  "    int global_id;\n"
+  "    int position_in_list;\n"
+  "    __global struct Node* pNext;\n"
+  "} Node;\n"
+  "\n"
+  "__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
+  "{\n"
+  // mix things up, adjacent work items will allocate from different buffers
+  "    if(i & 0x1)\n"
+  "        return &pNodes1[atomic_inc(allocation_index)];\n"
+  "    else\n"
+  "        return &pNodes2[atomic_inc(allocation_index)];\n"
+  "}\n"
+  "\n"
+  // The allocation_index parameter must be initialized on the host to N work-items
+  // The first N nodes in pNodes will be the heads of the lists.
+  "__kernel void create_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global int* allocation_index, int list_length)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes[i];\n"
+  "\n"
+  "    pNode->global_id = i;\n"
+  "    pNode->position_in_list = 0;\n"
+  "\n"
+  "    __global Node *pNew;\n"
+  "    for(int j=1; j < list_length; j++)\n"
+  "    {\n"
+  "        pNew = allocate_node(pNodes, pNodes2, allocation_index, i);\n"
+  "        pNew->global_id = i;\n"
+  "        pNew->position_in_list = j;\n"
+  "        pNode->pNext = pNew;  // link new node onto end of list\n"
+  "        pNode = pNew;   // move to end of list\n"
+  "    }\n"
+  "}\n"
+  "\n"
+  "__kernel void verify_linked_lists(__global Node* pNodes, __global Node* pNodes2, volatile __global uint* num_correct, int list_length)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes[i];\n"
+  "\n"
+  "    for(int j=0; j < list_length; j++)\n"
+  "    {\n"
+  "        if( pNode->global_id == i && pNode->position_in_list == j)\n"
+  "        {\n"
+  "            atomic_inc(num_correct);\n"
+  "        }\n"
+  "        else {\n"
+  "            break;\n"
+  "        }\n"
+  "        pNode = pNode->pNext;\n"
+  "    }\n"
+  "}\n"
+};
+
+
+// Creates linked list using host code.
+cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
+{
+  cl_int error = CL_SUCCESS;
+
+  log_info("SVM: creating linked list on host ");
+
+  Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes, "clEnqueueMapBuffer failed");
+
+  Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
+
+  create_linked_lists(pNodes, numLists, ListLength);
+
+  error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed");
+  return error;
+}
+
+// Verify correctness of the linked list using host code.
+cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
+{
+  cl_int error = CL_SUCCESS;
+
+  //log_info(" and verifying on host ");
+
+  Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes, "clEnqueueMapBuffer failed");
+  Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes, "clEnqueueMapBuffer failed");
+
+  error = verify_linked_lists(pNodes, numLists, ListLength);
+  if(error) return -1;
+
+  error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed");
+  return error;
+}
+
+// This tests that shared buffers are able to contain pointers that point to other shared buffers.
+// This tests that all devices and the host share a common address space; using only the coarse-grain features.
+// This is done by creating a linked list on a device and then verifying the correctness of the list
+// on another device or the host.
+// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
+// This basic test is performed for all combinations of devices and the host.
+int test_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      error = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  error = create_cl_objects(deviceID, &SVMCrossBufferPointers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+  if(error) return -1;
+
+  size_t numLists =  num_elements;
+  cl_int ListLength = 32;
+
+  clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  // this buffer holds some of the linked list nodes.
+  Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
+
+  // this buffer holds some of the linked list nodes.
+  Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
+
+  {
+    clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    // this buffer holds the index into the nodes buffer that is used for node allocation
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    // this buffer holds the count of correct nodes which is computed by the verify kernel.
+    clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
+    //error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, (void *) pNodes);
+    error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2);
+    error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &allocator);
+    error |= clSetKernelArg(kernel_create_lists, 3, sizeof(cl_int),   (void *) &ListLength);
+
+    error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes);
+    error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2);
+    error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &num_correct);
+    error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(cl_int),   (void *) &ListLength);
+    test_error(error, "clSetKernelArg failed");
+
+    // Create linked list on one device and verify on another device (or the host).
+    // Do this for all possible combinations of devices and host within the platform.
+    for (int ci=0; ci<(int)num_devices+1; ci++)  // ci is CreationIndex, index of device/q to create linked list on
+    {
+      for (int vi=0; vi<(int)num_devices+1; vi++)  // vi is VerificationIndex, index of device/q to verify linked list on
+      {
+        if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
+        {
+          error = create_linked_lists_on_host(queues[0], nodes, nodes2, ListLength, numLists);
+          if(error) return -1;
+        }
+        else
+        {
+          error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
+          if(error) return -1;
+        }
+
+        if(vi == num_devices)
+        {
+          error = verify_linked_lists_on_host(vi, queues[0], nodes, nodes2, ListLength, numLists);
+          if(error) return -1;
+        }
+        else
+        {
+          error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
+          if(error) return -1;
+        }
+      } // inner loop, vi
+    } // outer loop, ci
+  }
+
+  clSVMFree(context, pNodes2);
+  clSVMFree(context, pNodes);
+
+  return 0;
+}
diff --git a/test_conformance/SVM/test_enqueue_api.cpp b/test_conformance/SVM/test_enqueue_api.cpp
new file mode 100644
index 00000000..6a04e955
--- /dev/null
+++ b/test_conformance/SVM/test_enqueue_api.cpp
@@ -0,0 +1,254 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+#include "../../test_common/harness/mt19937.h"
+
+#include <vector>
+
+typedef struct
+{
+  cl_uint status;
+  cl_uint num_svm_pointers;
+  std::vector<void *> svm_pointers;
+} CallbackData;
+
+void generate_data(std::vector<cl_uchar> &data, size_t size, MTdata seed)
+{
+  cl_uint randomData = genrand_int32(seed);
+  cl_uint bitsLeft = 32;
+
+  for( size_t i = 0; i < size; i++ )
+  {
+    if( 0 == bitsLeft)
+    {
+      randomData = genrand_int32(seed);
+      bitsLeft = 32;
+    }
+    data[i] = (cl_uchar)( randomData & 255 );
+    randomData >>= 8; randomData -= 8;
+  }
+}
+
+//callback which will be passed to clEnqueueSVMFree command
+void CL_CALLBACK callback_svm_free(cl_command_queue queue, cl_uint num_svm_pointers, void * svm_pointers[], void * user_data)
+{
+  CallbackData *data = (CallbackData *)user_data;
+  data->num_svm_pointers = num_svm_pointers;
+  data->svm_pointers.resize(num_svm_pointers, 0);
+
+  cl_context context;
+  if(clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, 0) != CL_SUCCESS)
+  {
+    log_error("clGetCommandQueueInfo failed in the callback\n");
+    return;
+  }
+
+  for (size_t i = 0; i < num_svm_pointers; ++i)
+  {
+    data->svm_pointers[i] = svm_pointers[i];
+    clSVMFree(context, svm_pointers[i]);
+  }
+
+  data->status = 1;
+}
+
+int test_enqueue_api(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper context = NULL;
+  clCommandQueueWrapper queues[MAXQ];
+  cl_uint num_devices = 0;
+  const size_t elementNum = 1024;
+  const size_t numSVMBuffers = 32;
+  cl_int error = CL_SUCCESS;
+  RandomSeed seed(0);
+
+  error = create_cl_objects(deviceID, NULL, &context, NULL, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+  if(error) return -1;
+
+  queue = queues[0];
+
+  //all possible sizes of vectors and scalars
+  size_t typeSizes[] = {
+    sizeof(cl_uchar),
+    sizeof(cl_uchar2),
+    sizeof(cl_uchar3),
+    sizeof(cl_uchar4),
+    sizeof(cl_uchar8),
+    sizeof(cl_uchar16),
+    sizeof(cl_ushort),
+    sizeof(cl_ushort2),
+    sizeof(cl_ushort3),
+    sizeof(cl_ushort4),
+    sizeof(cl_ushort8),
+    sizeof(cl_ushort16),
+    sizeof(cl_uint),
+    sizeof(cl_uint2),
+    sizeof(cl_uint3),
+    sizeof(cl_uint4),
+    sizeof(cl_uint8),
+    sizeof(cl_uint16),
+    sizeof(cl_ulong),
+    sizeof(cl_ulong2),
+    sizeof(cl_ulong3),
+    sizeof(cl_ulong4),
+    sizeof(cl_ulong8),
+    sizeof(cl_ulong16),
+  };
+
+  for (size_t i = 0; i < ( sizeof(typeSizes) / sizeof(typeSizes[0]) ); ++i)
+  {
+    //generate initial data
+    std::vector<cl_uchar> fillData0(typeSizes[i]), fillData1(typeSizes[i], 0), fillData2(typeSizes[i]);
+    generate_data(fillData0, typeSizes[i], seed);
+    generate_data(fillData2, typeSizes[i], seed);
+
+    cl_uchar *srcBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
+    cl_uchar *dstBuffer = (cl_uchar *)clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum * typeSizes[i], 0);
+
+    clEventWrapper userEvent = clCreateUserEvent(context, &error);
+    test_error(error, "clCreateUserEvent failed");
+
+    clEventWrapper eventMemFill;
+    error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData0[0], typeSizes[i], elementNum * typeSizes[i], 1, &userEvent, &eventMemFill);
+    test_error(error, "clEnqueueSVMMemFill failed");
+
+    clEventWrapper eventMemcpy;
+    error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i], 1, &eventMemFill, &eventMemcpy);
+    test_error(error, "clEnqueueSVMMemcpy failed");
+
+    error = clSetUserEventStatus(userEvent, CL_COMPLETE);
+    test_error(error, "clSetUserEventStatus failed");
+
+    clEventWrapper eventMap;
+    error = clEnqueueSVMMap(queue, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, dstBuffer, elementNum * typeSizes[i], 1, &eventMemcpy, &eventMap);
+    test_error(error, "clEnqueueSVMMap failed");
+
+    error = clWaitForEvents(1, &eventMap);
+    test_error(error, "clWaitForEvents failed");
+
+    //data verification
+    for (size_t j = 0; j < elementNum * typeSizes[i]; ++j)
+    {
+      if (dstBuffer[j] != fillData0[j % typeSizes[i]])
+      {
+        log_error("Invalid data at index %ld, expected %d, got %d\n", j, fillData0[j % typeSizes[i]], dstBuffer[j]);
+        return -1;
+      }
+    }
+
+    clEventWrapper eventUnmap;
+    error = clEnqueueSVMUnmap(queue, dstBuffer, 0, 0, &eventUnmap);
+    test_error(error, "clEnqueueSVMUnmap failed");
+
+    error = clEnqueueSVMMemFill(queue, srcBuffer, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
+    test_error(error, "clEnqueueSVMMemFill failed");
+
+    error = clEnqueueSVMMemFill(queue, dstBuffer + elementNum * typeSizes[i] / 2, &fillData2[0], typeSizes[i], elementNum * typeSizes[i] / 2, 0, 0, 0);
+    test_error(error, "clEnqueueSVMMemFill failed");
+
+    error = clEnqueueSVMMemcpy(queue, CL_FALSE, dstBuffer, srcBuffer, elementNum * typeSizes[i] / 2, 0, 0, 0);
+    test_error(error, "clEnqueueSVMMemcpy failed");
+
+    error = clEnqueueSVMMemcpy(queue, CL_TRUE, dstBuffer + elementNum * typeSizes[i] / 2, srcBuffer + elementNum * typeSizes[i] / 2, elementNum * typeSizes[i] / 2, 0, 0, 0);
+    test_error(error, "clEnqueueSVMMemcpy failed");
+
+    void *ptrs[] = {(void *)srcBuffer, (void *)dstBuffer};
+
+    clEventWrapper eventFree;
+    error = clEnqueueSVMFree(queue, 2, ptrs, 0, 0, 0, 0, &eventFree);
+    test_error(error, "clEnqueueSVMFree failed");
+
+    error = clWaitForEvents(1, &eventFree);
+    test_error(error, "clWaitForEvents failed");
+
+    //event info verification for new SVM commands
+    cl_command_type commandType;
+    error = clGetEventInfo(eventMemFill, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
+    test_error(error, "clGetEventInfo failed");
+    if (commandType != CL_COMMAND_SVM_MEMFILL)
+    {
+      log_error("Invalid command type returned for clEnqueueSVMMemFill\n");
+      return -1;
+    }
+
+    error = clGetEventInfo(eventMemcpy, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
+    test_error(error, "clGetEventInfo failed");
+    if (commandType != CL_COMMAND_SVM_MEMCPY)
+    {
+      log_error("Invalid command type returned for clEnqueueSVMMemcpy\n");
+      return -1;
+    }
+
+    error = clGetEventInfo(eventMap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
+    test_error(error, "clGetEventInfo failed");
+    if (commandType != CL_COMMAND_SVM_MAP)
+    {
+      log_error("Invalid command type returned for clEnqueueSVMMap\n");
+      return -1;
+    }
+
+    error = clGetEventInfo(eventUnmap, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
+    test_error(error, "clGetEventInfo failed");
+    if (commandType != CL_COMMAND_SVM_UNMAP)
+    {
+      log_error("Invalid command type returned for clEnqueueSVMUnmap\n");
+      return -1;
+    }
+
+    error = clGetEventInfo(eventFree, CL_EVENT_COMMAND_TYPE, sizeof(cl_command_type), &commandType, NULL);
+    test_error(error, "clGetEventInfo failed");
+    if (commandType != CL_COMMAND_SVM_FREE)
+    {
+      log_error("Invalid command type returned for clEnqueueSVMFree\n");
+      return -1;
+    }
+  }
+
+  std::vector<void *> buffers(numSVMBuffers, 0);
+  for(size_t i = 0; i < numSVMBuffers; ++i) buffers[i] = clSVMAlloc(context, CL_MEM_READ_WRITE, elementNum, 0);
+
+  //verify if callback is triggered correctly
+  CallbackData data;
+  data.status = 0;
+
+  error = clEnqueueSVMFree(queue, buffers.size(), &buffers[0], callback_svm_free, &data, 0, 0, 0);
+  test_error(error, "clEnqueueSVMFree failed");
+
+  error = clFinish(queue);
+  test_error(error, "clFinish failed");
+
+  //wait for the callback
+  while(data.status == 0) { }
+
+  //check if number of SVM pointers returned in the callback matches with expected
+  if (data.num_svm_pointers != buffers.size())
+  {
+    log_error("Invalid number of SVM pointers returned in the callback, expected: %ld, got: %d\n", buffers.size(), data.num_svm_pointers);
+    return -1;
+  }
+
+  //check if pointers returned in callback are correct
+  for (size_t i = 0; i < buffers.size(); ++i)
+  {
+    if (data.svm_pointers[i] != buffers[i])
+    {
+      log_error("Invalid SVM pointer returned in the callback, idx: %ld\n", i);
+      return -1;
+    }
+  }
+
+  return 0;
+}
\ No newline at end of file
diff --git a/test_conformance/SVM/test_fine_grain_memory_consistency.cpp b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp
new file mode 100644
index 00000000..9d9ddbf2
--- /dev/null
+++ b/test_conformance/SVM/test_fine_grain_memory_consistency.cpp
@@ -0,0 +1,168 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+const char *hash_table_kernel[] = {
+  "typedef struct BinNode {\n"
+  " int value;\n"
+  " atomic_uintptr_t pNext;\n"
+  "} BinNode;\n"
+
+  "__kernel void build_hash_table(__global uint* input, __global BinNode* pNodes, volatile __global atomic_uint* pNumNodes, uint numBins)\n"
+  "{\n"
+  " __global BinNode *pNew = &pNodes[ atomic_fetch_add_explicit(pNumNodes, 1, memory_order_relaxed, memory_scope_all_svm_devices) ];\n"
+  " uint i = get_global_id(0);\n"
+  " uint b = input[i] % numBins;\n"
+  " pNew->value = input[i];\n"
+  " uintptr_t next = atomic_load_explicit(&(pNodes[b].pNext), memory_order_seq_cst, memory_scope_all_svm_devices);\n"
+  " do\n"
+  " {\n"
+  "   atomic_store_explicit(&(pNew->pNext), next, memory_order_seq_cst, memory_scope_all_svm_devices);\n" // always inserting at head of list
+  " } while(!atomic_compare_exchange_strong_explicit(&(pNodes[b].pNext), &next, (uintptr_t)pNew, memory_order_seq_cst, memory_order_relaxed, memory_scope_all_svm_devices));\n"
+  "}\n"
+};
+
+typedef struct BinNode{
+  cl_uint value;
+  struct BinNode* pNext;
+} BinNode;
+
+void build_hash_table_on_host(cl_context c, cl_uint* input, size_t inputSize, BinNode* pNodes, cl_int volatile *pNumNodes, cl_uint numBins)
+{
+  for(cl_uint i = 0; i < inputSize; i++)
+  {
+    BinNode *pNew = &pNodes[ AtomicFetchAddExplicit(pNumNodes, 1, memory_order_relaxed) ];
+    cl_uint b = input[i] % numBins;
+    pNew->value = input[i];
+
+    BinNode *next = pNodes[b].pNext;
+    do {
+        pNew->pNext = next;  // always inserting at head of list
+    } while(!AtomicCompareExchangeStrongExplicit(&(pNodes[b].pNext), &next, pNew, memory_order_relaxed, memory_order_seq_cst));
+  }
+}
+
+
+int launch_kernels_and_verify(clContextWrapper &context, clCommandQueueWrapper* queues, clKernelWrapper &kernel, cl_uint num_devices, cl_uint numBins, size_t num_pixels)
+{
+  int err = CL_SUCCESS;
+  cl_uint *pInputImage = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY  | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
+  BinNode *pNodes      = (BinNode*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(BinNode) * (num_pixels * (num_devices + 1) + numBins), 0);
+  cl_int *pNumNodes       = (cl_int*)  clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int), 0);
+
+  *pNumNodes = numBins;  // using the first numBins nodes to hold the list heads.
+  for(cl_uint i=0;i<numBins;i++) {
+    pNodes[i].pNext = NULL;
+  };
+
+  for(cl_uint i=0; i < num_pixels; i++) pInputImage[i] = i;
+
+  err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
+  err |= clSetKernelArgSVMPointer(kernel, 1, pNodes);
+  err |= clSetKernelArgSVMPointer(kernel, 2, pNumNodes);
+  err |= clSetKernelArg(kernel, 3, sizeof(cl_uint), (void*) &numBins);
+
+  test_error(err, "clSetKernelArg failed");
+
+  cl_event done;
+  // get all the devices going simultaneously, each device (and the host) will insert all the pixels.
+  for(cl_uint d=0; d<num_devices; d++)
+  {
+    err = clEnqueueNDRangeKernel(queues[d], kernel, 1, NULL, &num_pixels, 0, 0, NULL, &done);
+    test_error(err,"clEnqueueNDRangeKernel failed");
+  }
+  for(cl_uint d=0; d<num_devices; d++) clFlush(queues[d]);
+
+  // wait until we see some activity from a device (try to run host side simultaneously).
+  while(numBins == AtomicLoadExplicit(pNumNodes, memory_order_relaxed));
+
+  build_hash_table_on_host(context, pInputImage, num_pixels, pNodes, pNumNodes, numBins);
+
+  for(cl_uint d=0; d<num_devices; d++) clFinish(queues[d]);
+
+  cl_uint num_items = 0;
+  // check correctness of each bin in the hash table.
+  for(cl_uint i = 0; i < numBins; i++)
+  {
+    BinNode *pNode = pNodes[i].pNext;
+    while(pNode)
+    {
+      if((pNode->value % numBins) != i)
+      {
+        log_error("Something went wrong, item is in wrong hash bucket\n");
+        break;
+      }
+      num_items++;
+      pNode = pNode->pNext;
+    }
+  }
+
+  clSVMFree(context, pInputImage);
+  clSVMFree(context, pNodes);
+  clSVMFree(context, pNumNodes);
+  // each device and the host inserted all of the pixels, check that none are missing.
+  if(num_items != num_pixels * (num_devices + 1) )
+  {
+    log_error("The hash table is not correct, num items %d, expected num items: %d\n", num_items, num_pixels * (num_devices + 1));
+    return -1; // test did not pass
+  }
+  return 0;
+}
+
+// This tests for memory consistency across devices and the host.
+// Each device and the host simultaneously insert values into a single hash table.
+// Each bin in the hash table is a linked list.  Each bin is protected against simultaneous
+// update using a lock free technique.  The correctness of the list is verfied on the host.
+// This test requires the new OpenCL 2.0 atomic operations that implement the new seq_cst memory ordering.
+int    test_fine_grain_memory_consistency(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper context;
+  clProgramWrapper program;
+  clKernelWrapper kernel;
+  clCommandQueueWrapper queues[MAXQ];
+
+  cl_uint     num_devices = 0;
+  cl_int      err = CL_SUCCESS;
+
+  if (sizeof(void *) == 8 && (!is_extension_available(deviceID, "cl_khr_int64_base_atomics") || !is_extension_available(deviceID, "cl_khr_int64_extended_atomics")))
+  {
+      log_info("WARNING: test skipped. 'cl_khr_int64_base_atomics' and 'cl_khr_int64_extended_atomics' extensions are not supported\n");
+      return 0;
+  }
+
+  err = create_cl_objects(deviceID, &hash_table_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
+  if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
+  if(err < 0) return -1; // fail test.
+
+  kernel = clCreateKernel(program, "build_hash_table", &err);
+  test_error(err, "clCreateKernel failed");
+  size_t num_pixels = num_elements;
+
+  int result;
+  cl_uint numBins = 1;  // all work groups in all devices and the host code will hammer on this one lock.
+  result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
+  if(result == -1) return result;
+
+  numBins = 2;  // 2 locks within in same cache line will get hit from different devices and host.
+  result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
+  if(result == -1) return result;
+
+  numBins = 29; // locks span a few cache lines.
+  result = launch_kernels_and_verify(context, queues, kernel, num_devices, numBins, num_pixels);
+  if(result == -1) return result;
+
+  return result;
+}
diff --git a/test_conformance/SVM/test_fine_grain_sync_buffers.cpp b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
new file mode 100644
index 00000000..064a3147
--- /dev/null
+++ b/test_conformance/SVM/test_fine_grain_sync_buffers.cpp
@@ -0,0 +1,105 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+const char *find_targets_kernel[] = {
+
+  "__kernel void find_targets(__global uint* image, uint target, volatile __global atomic_uint *numTargetsFound, volatile __global atomic_uint *targetLocations)\n"
+  "{\n"
+  " size_t i = get_global_id(0);\n"
+  " uint index;\n"
+  " if(image[i] == target) {\n"
+  "   index = atomic_fetch_add_explicit(numTargetsFound, 1, memory_order_relaxed, memory_scope_device); \n"
+  "   atomic_exchange_explicit(&targetLocations[index], i, memory_order_relaxed, memory_scope_all_svm_devices); \n"
+  " }\n"
+  "}\n"
+};
+
+
+void spawnAnalysisTask(int location)
+{
+  //    printf("found target at location %d\n", location);
+}
+
+#define MAX_TARGETS 1024
+
+// Goals: demonstrate use of SVM's atomics to do fine grain synchronization between the device and host.
+// Concept: a device kernel is used to search an input image for regions that match a target pattern.
+// The device immediately notifies the host when it finds a target (via an atomic operation that works across host and devices).
+// The host is then able to spawn a task that further analyzes the target while the device continues searching for more targets.
+int    test_fine_grain_sync_buffers(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      err = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  err = create_cl_objects(deviceID, &find_targets_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_ATOMICS);
+  if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
+  if(err < 0) return -1; // fail test.
+
+  clKernelWrapper kernel = clCreateKernel(program, "find_targets", &err);
+  test_error(err, "clCreateKernel failed");
+
+  size_t num_pixels = num_elements;
+  //cl_uint num_pixels = 1024*1024*32;
+
+  cl_uint *pInputImage      = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_ONLY  | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_uint) * num_pixels, 0);
+  cl_uint *pNumTargetsFound = (cl_uint*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_uint), 0);
+  cl_int  *pTargetLocations = (cl_int* ) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, sizeof(cl_int) * MAX_TARGETS, 0);
+
+  cl_uint targetDescriptor = 777;
+  *pNumTargetsFound = 0;
+  cl_uint i;
+  for(i=0; i < MAX_TARGETS; i++) pTargetLocations[i] = -1;
+  for(i=0; i < num_pixels; i++) pInputImage[i] = 0;
+  pInputImage[0] = targetDescriptor;
+  pInputImage[3] = targetDescriptor;
+  pInputImage[num_pixels - 1] = targetDescriptor;
+
+  err |= clSetKernelArgSVMPointer(kernel, 0, pInputImage);
+  err |= clSetKernelArg(kernel, 1, sizeof(cl_uint), (void*) &targetDescriptor);
+  err |= clSetKernelArgSVMPointer(kernel, 2, pNumTargetsFound);
+  err |= clSetKernelArgSVMPointer(kernel, 3, pTargetLocations);
+  test_error(err, "clSetKernelArg failed");
+
+  cl_event done;
+  err = clEnqueueNDRangeKernel(queues[0], kernel, 1, NULL, &num_pixels, NULL, 0, NULL, &done);
+  test_error(err,"clEnqueueNDRangeKernel failed");
+  clFlush(queues[0]);
+
+
+  i=0;
+  cl_int status;
+  // check for new targets, if found spawn a task to analyze target.
+  do {
+    err = clGetEventInfo(done,CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &status, NULL);
+    test_error(err,"clGetEventInfo failed");
+    if( AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1)  // -1 indicates slot not used yet.
+    {
+      spawnAnalysisTask(pTargetLocations[i]);
+      i++;
+    }
+  } while (status != CL_COMPLETE || AtomicLoadExplicit(&pTargetLocations[i], memory_order_relaxed) != -1);
+
+  clSVMFree(context, pInputImage);
+  clSVMFree(context, pNumTargetsFound);
+  clSVMFree(context, pTargetLocations);
+
+  if(i != 3) return -1;
+  return 0;
+}
diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp
new file mode 100644
index 00000000..704f8060
--- /dev/null
+++ b/test_conformance/SVM/test_migrate.cpp
@@ -0,0 +1,330 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+#include "../../test_common/harness/mt19937.h"
+
+#define GLOBAL_SIZE 65536
+
+static const char *sources[] = {
+"__kernel void migrate_kernel(__global uint * restrict a, __global uint * restrict b, __global uint * restrict c)\n"
+"{\n"
+"    size_t i = get_global_id(0);\n"
+"    a[i] ^= 0x13579bdf;\n"
+"    b[i] ^= 0x2468ace0;\n"
+"    c[i] ^= 0x731fec8f;\n"
+"}\n"
+};
+
+static void
+fill_buffer(cl_uint* p, size_t n, MTdata seed)
+{
+    for (size_t i=0; i<n; ++i)
+        p[i] = (cl_uint)genrand_int32(seed);
+}
+
+static bool
+check(const char* s, cl_uint* a, cl_uint* e, size_t n)
+{
+    bool ok = true;
+    for (size_t i=0; ok && i<n; ++i) {
+        if (a[i] != e[i]) {
+            log_error("ERROR: %s mismatch at word %u, *%08x vs %08x\n", s, (unsigned int)i, e[i], a[i]);
+            ok = false;
+        }
+    }
+    return ok;
+}
+
+static int
+wait_and_release(const char* s, cl_event* evs, int n)
+{
+    cl_int error = clWaitForEvents(n, evs);
+    if (error == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
+        for (int i=0; i<n; ++i) {
+            cl_int e;
+            error = clGetEventInfo(evs[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &e, NULL);
+            test_error(error, "clGetEventInfo failed");
+            if (e != CL_COMPLETE) {
+                log_error("ERROR: %s event %d execution status was %s\n", s, i, IGetErrorString(e));
+                return e;
+            }
+        }
+    } else
+        test_error(error, "clWaitForEvents failed");
+
+    for (int i=0; i<n; ++i) {
+        error = clReleaseEvent(evs[i]);
+        test_error(error, "clReleaseEvent failed");
+    }
+
+    return 0;
+}
+
+int
+test_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue, int num_elements)
+{
+    cl_uint amem[GLOBAL_SIZE];
+    cl_uint bmem[GLOBAL_SIZE];
+    cl_uint cmem[GLOBAL_SIZE];
+    cl_uint ramem[GLOBAL_SIZE];
+    cl_uint rbmem[GLOBAL_SIZE];
+    cl_uint rcmem[GLOBAL_SIZE];
+    cl_event evs[20];
+
+    const size_t global_size = GLOBAL_SIZE;
+
+    RandomSeed seed(0);
+
+    clContextWrapper context = NULL;
+    clCommandQueueWrapper queues[MAXQ];
+    cl_uint num_devices = 0;
+    clProgramWrapper program;
+    cl_int error;
+
+    error = create_cl_objects(deviceID, &sources[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+    if (error)
+        return -1;
+
+    cl_command_queue queue0 = queues[0];
+    clCommandQueueWrapper queue1;
+
+    if (num_devices > 1) {
+        log_info("  Running on two devices.\n");
+        queue1 = queues[1];
+    } else {
+        // Ensure we have two distinct queues
+        cl_device_id did;
+        error = clGetCommandQueueInfo(queue0, CL_QUEUE_DEVICE, sizeof(did), (void *)&did, NULL);
+        test_error(error, "clGetCommandQueueInfo failed");
+
+        cl_command_queue_properties cqp;
+        error = clGetCommandQueueInfo(queue0, CL_QUEUE_PROPERTIES, sizeof(cqp), &cqp, NULL);
+        test_error(error, "clGetCommandQueueInfo failed");
+
+        cl_queue_properties qp[3] = { CL_QUEUE_PROPERTIES, cqp, 0 };
+        queue1 = clCreateCommandQueueWithProperties(context, did, qp, &error);
+        test_error(error, "clCteateCommandQueueWithProperties failed");
+    }
+
+    clKernelWrapper kernel = clCreateKernel(program, "migrate_kernel", &error);
+    test_error(error, "clCreateKernel failed");
+
+    char* asvm = (char*)clSVMAlloc(context, CL_MEM_READ_WRITE, global_size*sizeof(cl_uint), 16);
+    if (asvm == NULL) {
+        log_error("ERROR: clSVMAlloc returned NULL at %s:%d\n", __FILE__, __LINE__);
+        return -1;
+    }
+
+    char* bsvm = (char *)clSVMAlloc(context, CL_MEM_READ_WRITE, global_size*sizeof(cl_uint), 16);
+    if (bsvm == NULL) {
+        log_error("ERROR: clSVMAlloc returned NULL at %s:%d\n", __FILE__, __LINE__);
+        clSVMFree(context, asvm);
+        return -1;
+    }
+
+    char* csvm = (char *)clSVMAlloc(context, CL_MEM_READ_WRITE, global_size*sizeof(cl_uint), 16);
+    if (csvm == NULL) {
+        log_error("ERROR: clSVMAlloc returned NULL at %s:%d\n", __FILE__, __LINE__);
+        clSVMFree(context, bsvm);
+        clSVMFree(context, asvm);
+        return -1;
+    }
+
+    error = clSetKernelArgSVMPointer(kernel, 0, (void*)asvm);
+    test_error(error, "clSetKernelArgSVMPointer failed");
+
+    error = clSetKernelArgSVMPointer(kernel, 1, (void*)bsvm);
+    test_error(error, "clSetKernelArgSVMPointer failed");
+
+    error = clSetKernelArgSVMPointer(kernel, 2, (void*)csvm);
+    test_error(error, "clSetKernelArgSVMPointer failed");
+
+    // Initialize host copy of data (and result)
+    fill_buffer(amem, global_size, seed);
+    fill_buffer(bmem, global_size, seed);
+    fill_buffer(cmem, global_size, seed);
+
+    // Now we're ready to start
+    {
+        // First, fill in the data on device0
+        cl_uint patt[] = { 0, 0, 0, 0};
+        error = clEnqueueSVMMemFill(queue0, (void *)asvm, patt, sizeof(patt), global_size*sizeof(cl_uint), 0, NULL, &evs[0]);
+        test_error(error, "clEnqueueSVMMemFill failed");
+
+        error = clEnqueueSVMMemFill(queue0, (void *)bsvm, patt, sizeof(patt), global_size*sizeof(cl_uint), 0, NULL, &evs[1]);
+        test_error(error, "clEnqueueSVMMemFill failed");
+
+        error = clEnqueueSVMMemFill(queue0, (void *)csvm, patt, sizeof(patt), global_size*sizeof(cl_uint), 0, NULL, &evs[2]);
+        test_error(error, "clEnqueueSVMMemFill failed");
+    }
+
+    {
+        // Now migrate fully to device 1 and discard the data
+        char* ptrs[] = { asvm, bsvm, csvm };
+        error = clEnqueueSVMMigrateMem(queue1, 3, (const void**)ptrs, NULL, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED, 1, &evs[2], &evs[3]);
+        test_error(error, "clEnqueueSVMMigrateMem failed");
+    }
+
+    {
+        // Test host flag
+        char *ptrs[] = { asvm+1, bsvm+3, csvm+5 };
+        const size_t szs[] = { 1, 1, 0 };
+        error = clEnqueueSVMMigrateMem(queue0, 3, (const void**)ptrs, szs, CL_MIGRATE_MEM_OBJECT_HOST, 1, &evs[3], &evs[4]);
+        test_error(error, "clEnqueueSVMMigrateMem failed");
+    }
+
+    {
+        // Next fill with known data
+        error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_WRITE, (void*)asvm, global_size*sizeof(cl_uint), 1, &evs[4], &evs[5]);
+        test_error(error, "clEnqueueSVMMap failed");
+
+        error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_WRITE, (void*)bsvm, global_size*sizeof(cl_uint), 0, NULL, &evs[6]);
+        test_error(error, "clEnqueueSVMMap failed");
+
+        error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_WRITE, (void*)csvm, global_size*sizeof(cl_uint), 0, NULL, &evs[7]);
+        test_error(error, "clEnqueueSVMMap failed");
+    }
+
+    error = clFlush(queue0);
+    test_error(error, "clFlush failed");
+
+    error = clFlush(queue1);
+    test_error(error, "clFlush failed");
+
+    error = wait_and_release("first batch", evs, 8);
+    if (error)
+        return -1;
+
+    memcpy((void *)asvm, (void *)amem, global_size*sizeof(cl_uint));
+    memcpy((void *)bsvm, (void *)bmem, global_size*sizeof(cl_uint));
+    memcpy((void *)csvm, (void *)cmem, global_size*sizeof(cl_uint));
+
+    {
+        error = clEnqueueSVMUnmap(queue1, (void *)asvm, 0, NULL, &evs[0]);
+        test_error(error, "clEnqueueSVMUnmap failed");
+
+        error = clEnqueueSVMUnmap(queue1, (void *)bsvm, 0, NULL, &evs[1]);
+        test_error(error, "clEnqueueSVMUnmap failed");
+
+        error = clEnqueueSVMUnmap(queue1, (void *)csvm, 0, NULL, &evs[2]);
+        test_error(error, "clEnqueueSVMUnmap failed");
+    }
+
+
+    {
+        // Now try some overlapping regions, and operate on the result
+        char *ptrs[] = { asvm+100, bsvm+17, csvm+1000, asvm+101, bsvm+19, csvm+1017 };
+        const size_t szs[] = { 13, 23, 43, 3, 7, 11 };
+
+        error = clEnqueueSVMMigrateMem(queue0, 3, (const void**)ptrs, szs, 0, 1, &evs[2], &evs[3]);
+        test_error(error, "clEnqueueSVMMigrateMem failed");
+
+        error = clEnqueueNDRangeKernel(queue0, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[4]);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+    }
+
+    {
+        // Now another pair
+        char *ptrs[] = { asvm+8, bsvm+17, csvm+31, csvm+83 };
+        const size_t szs[] = { 0, 1, 3, 7 };
+
+        error = clEnqueueSVMMigrateMem(queue1, 4, (const void**)ptrs, szs, 0, 1, &evs[4], &evs[5]);
+        test_error(error, "clEnqueueSVMMigrateMem failed");
+
+        error = clEnqueueNDRangeKernel(queue1, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[6]);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+    }
+
+    {
+        // Another pair
+        char *ptrs[] = { asvm+64, asvm+128, bsvm+64, bsvm+128, csvm, csvm+64 };
+        const size_t szs[] = { 64, 64, 64, 64, 64, 64 };
+
+        error = clEnqueueSVMMigrateMem(queue0, 6, (const void**)ptrs, szs, 0, 1, &evs[6], &evs[7]);
+        test_error(error, "clEnqueueSVMMigrateMem failed");
+
+        error = clEnqueueNDRangeKernel(queue0, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[8]);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+    }
+
+    {
+        // Final pair
+        char *ptrs[] = { asvm, asvm, bsvm, csvm, csvm };
+        const size_t szs[] = { 0, 1, 0, 1, 0 };
+
+        error = clEnqueueSVMMigrateMem(queue1, 5, (const void**)ptrs, szs, 0, 1, &evs[8], &evs[9]);
+        test_error(error, "clEnqueueSVMMigrateMem failed");
+
+        error = clEnqueueNDRangeKernel(queue1, kernel, 1, NULL, &global_size, NULL, 0, NULL, &evs[10]);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+    }
+
+    {
+        error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_READ, (void*)asvm, global_size*sizeof(cl_uint), 0, NULL, &evs[11]);
+        test_error(error, "clEnqueueSVMMap failed");
+
+        error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_READ, (void*)bsvm, global_size*sizeof(cl_uint), 0, NULL, &evs[12]);
+        test_error(error, "clEnqueueSVMMap failed");
+
+        error = clEnqueueSVMMap(queue1, CL_FALSE, CL_MAP_READ, (void*)csvm, global_size*sizeof(cl_uint), 0, NULL, &evs[13]);
+        test_error(error, "clEnqueueSVMMap failed");
+    }
+
+    error = clFlush(queue0);
+    test_error(error, "clFlush failed");
+
+    error = clFlush(queue1);
+    test_error(error, "clFlush failed");
+
+    error = wait_and_release("batch 2", evs, 14);
+    if (error)
+        return -1;
+
+    // Check kernel results
+    bool ok = check("memory a", (cl_uint *)asvm, amem, global_size);
+    ok &= check("memory b", (cl_uint *)bsvm, bmem, global_size);
+    ok &= check("memory c", (cl_uint *)csvm, cmem, global_size);
+
+    {
+        void *ptrs[] = { asvm, bsvm, csvm };
+
+        error = clEnqueueSVMUnmap(queue1, (void *)asvm, 0, NULL, &evs[0]);
+        test_error(error, "clEnqueueSVMUnmap failed");
+
+        error = clEnqueueSVMUnmap(queue1, (void *)bsvm, 0, NULL, &evs[1]);
+        test_error(error, "clEnqueueSVMUnmap failed");
+
+        error = clEnqueueSVMUnmap(queue1, (void *)csvm, 0, NULL, &evs[2]);
+        test_error(error, "clEnqueueSVMUnmap failed");
+
+        error = clEnqueueSVMFree(queue1, 3, ptrs, NULL, NULL, 0, NULL, &evs[3]);
+    }
+
+    error = clFlush(queue1);
+    test_error(error, "clFlush failed");
+
+    error = wait_and_release("batch 3", evs, 4);
+    if (error)
+        return -1;
+
+    clSVMFree(context, asvm);
+    clSVMFree(context, bsvm);
+    clSVMFree(context, csvm);
+
+    // The wrappers will clean up the rest
+    return ok ? 0 : -1;
+}
+
diff --git a/test_conformance/SVM/test_pointer_passing.cpp b/test_conformance/SVM/test_pointer_passing.cpp
new file mode 100644
index 00000000..6ef074d3
--- /dev/null
+++ b/test_conformance/SVM/test_pointer_passing.cpp
@@ -0,0 +1,115 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+const char *SVMPointerPassing_test_kernel[] = {
+  "__kernel void verify_char(__global uchar* pChar, volatile __global uint* num_correct, uchar expected)\n"
+  "{\n"
+  "    if(0 == get_global_id(0))\n"
+  "    {\n"
+  "        *num_correct = 0;\n"
+  "        if(*pChar == expected)\n"
+  "        {\n"
+  "                    *num_correct=1;\n"
+  "        }\n"
+  "    }\n"
+  "}\n"
+};
+
+
+// Test that arbitrarily aligned char pointers into shared buffers can be passed directly to a kernel.
+// This iterates through a buffer passing a pointer to each location to the kernel.
+// The buffer is initialized to known values at each location.
+// The kernel checks that it finds the expected value at each location.
+// TODO: possibly make this work across all base types (including typeN?), also check ptr arithmetic ++,--.
+int test_svm_pointer_passing(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      error = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  error = create_cl_objects(deviceID, &SVMPointerPassing_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+  if(error) return -1;
+
+  clKernelWrapper kernel_verify_char = clCreateKernel(program, "verify_char", &error);
+  test_error(error,"clCreateKernel failed");
+
+  size_t bufSize = 256;
+  char *pbuf = (char*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_uchar)*bufSize, 0);
+
+  cl_int *pNumCorrect = NULL;
+  pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
+
+  {
+    clMemWrapper buf = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar)*bufSize, pbuf, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(cl_int), pNumCorrect, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    error = clSetKernelArg(kernel_verify_char, 1, sizeof(void*), (void *) &num_correct);
+    test_error(error, "clSetKernelArg failed");
+
+    // put values into buf so that we can expect to see these values in the kernel when we pass a pointer to them.
+    cl_command_queue cmdq = queues[0];
+    cl_uchar* pBuf = (cl_uchar*) clEnqueueMapBuffer(cmdq, buf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_uchar)*bufSize, 0, NULL,NULL, &error);
+    test_error2(error, pBuf, "clEnqueueMapBuffer failed");
+    for(int i = 0; i<(int)bufSize; i++)
+    {
+      pBuf[i]= (cl_uchar)i;
+    }
+    error = clEnqueueUnmapMemObject(cmdq, buf, pBuf, 0,NULL,NULL);
+    test_error(error, "clEnqueueUnmapMemObject failed.");
+
+    for (cl_uint ii = 0; ii<num_devices; ++ii)  // iterate over all devices in the platform.
+    {
+      cmdq = queues[ii];
+      for(int i = 0; i<(int)bufSize; i++)
+      {
+        cl_uchar* pChar = &pBuf[i];
+        error = clSetKernelArgSVMPointer(kernel_verify_char, 0, pChar); // pass a pointer to a location within the buffer
+        test_error(error, "clSetKernelArg failed");
+        error = clSetKernelArg(kernel_verify_char, 2, sizeof(cl_uchar), (void *) &i );  // pass the expected value at the above location.
+        test_error(error, "clSetKernelArg failed");
+        error = clEnqueueNDRangeKernel(cmdq, kernel_verify_char, 1, NULL, &bufSize, NULL, 0, NULL, NULL);
+        test_error(error,"clEnqueueNDRangeKernel failed");
+
+        pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
+        test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
+        cl_int correct_count = *pNumCorrect;
+        error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
+        test_error(error, "clEnqueueUnmapMemObject failed.");
+
+        if(correct_count != 1)
+        {
+          log_error("Passing pointer directly to kernel for byte #%d failed on device %d\n", i, ii);
+          return -1;
+        }
+      }
+    }
+
+    error = clFinish(cmdq);
+    test_error(error, "clFinish failed");
+  }
+
+
+  clSVMFree(context, pbuf);
+  clSVMFree(context, pNumCorrect);
+
+  return 0;
+}
diff --git a/test_conformance/SVM/test_set_kernel_exec_info_svm_ptrs.cpp b/test_conformance/SVM/test_set_kernel_exec_info_svm_ptrs.cpp
new file mode 100644
index 00000000..f3450b1a
--- /dev/null
+++ b/test_conformance/SVM/test_set_kernel_exec_info_svm_ptrs.cpp
@@ -0,0 +1,153 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+typedef struct {
+  cl_int *pA;
+  cl_int *pB;
+  cl_int *pC;
+} BufPtrs;
+
+const char *set_kernel_exec_info_svm_ptrs_kernel[] = {
+  "struct BufPtrs;\n"
+  "\n"
+  "typedef struct {\n"
+  "    __global int *pA;\n"
+  "    __global int *pB;\n"
+  "    __global int *pC;\n"
+  "} BufPtrs;\n"
+  "\n"
+  "__kernel void set_kernel_exec_info_test(__global BufPtrs* pBufs)\n"
+  "{\n"
+  "    size_t i;\n"
+  "   i = get_global_id(0);\n"
+  "    pBufs->pA[i]++;\n"
+  "    pBufs->pB[i]++;\n"
+  "    pBufs->pC[i]++;\n"
+  "}\n"
+};
+
+// Test that clSetKernelExecInfo works correctly with CL_KERNEL_EXEC_INFO_SVM_PTRS flag.
+//
+int test_set_kernel_exec_info_svm_ptrs(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    c = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      error = CL_SUCCESS;
+  clCommandQueueWrapper q;
+
+  //error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &context, &program, &q, &num_devices, CL_DEVICE_SVM_FINE_GRAIN);
+  error = create_cl_objects(deviceID, &set_kernel_exec_info_svm_ptrs_kernel[0], &c, &program, &q, &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+  if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
+  if(error < 0) return -1; // fail test.
+
+
+  clKernelWrapper k = clCreateKernel(program, "set_kernel_exec_info_test", &error);
+  test_error(error, "clCreateKernel failed");
+
+  size_t size = num_elements*sizeof(int);
+  //int* pA = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
+  //int* pB = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
+  //int* pC = (int*) clSVMalloc(c, CL_MEM_READ_WRITE | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM, sizeof(int)*num_elements, 0);
+  int* pA = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
+  int* pB = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
+  int* pC = (int*) clSVMAlloc(c, CL_MEM_READ_WRITE, size, 0);
+  BufPtrs* pBuf = (BufPtrs*) clSVMAlloc(c, CL_MEM_READ_WRITE, sizeof(BufPtrs), 0);
+
+  bool failed = false;
+  {
+    clMemWrapper ba,bb,bc,bBuf;
+    ba = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pA, &error);
+    test_error(error, "clCreateBuffer failed");
+    bb = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pB, &error);
+    test_error(error, "clCreateBuffer failed");
+    bc = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, size, pC, &error);
+    test_error(error, "clCreateBuffer failed");
+    bBuf = clCreateBuffer(c, CL_MEM_USE_HOST_PTR, sizeof(BufPtrs), pBuf, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    clEnqueueMapBuffer(q, ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
+    test_error(error, "clEnqueueMapBuffer failed");
+    clEnqueueMapBuffer(q, bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
+    test_error(error, "clEnqueueMapBuffer failed");
+    clEnqueueMapBuffer(q, bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
+    test_error(error, "clEnqueueMapBuffer failed");
+    clEnqueueMapBuffer(q, bBuf, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(BufPtrs), 0, NULL, NULL, &error);
+    test_error(error, "clEnqueueMapBuffer failed");
+
+    for(int i = 0; i < num_elements; i++) pA[i] = pB[i] = pC[i] = 0;
+
+    pBuf->pA = pA;
+    pBuf->pB = pB;
+    pBuf->pC = pC;
+
+    error = clEnqueueUnmapMemObject(q, ba, pA, 0,NULL,NULL);
+    test_error(error, " clEnqueueUnmapMemObject failed.");
+    error = clEnqueueUnmapMemObject(q, bb, pB, 0,NULL,NULL);
+    test_error(error, " clEnqueueUnmapMemObject failed.");
+    error = clEnqueueUnmapMemObject(q, bc, pC, 0,NULL,NULL);
+    test_error(error, " clEnqueueUnmapMemObject failed.");
+    error = clEnqueueUnmapMemObject(q, bBuf, pBuf, 0,NULL,NULL);
+    test_error(error, " clEnqueueUnmapMemObject failed.");
+
+
+    error = clSetKernelArgSVMPointer(k, 0, pBuf);
+    test_error(error, "clSetKernelArg failed");
+
+    error = clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS, sizeof(BufPtrs), pBuf);
+    test_error(error, "clSetKernelExecInfo failed");
+
+    size_t range =  num_elements;
+    error = clEnqueueNDRangeKernel(q, k, 1, NULL, &range, NULL, 0, NULL, NULL);
+    test_error(error,"clEnqueueNDRangeKernel failed");
+
+    error = clFinish(q);
+    test_error(error, "clFinish failed.");
+
+    clEnqueueMapBuffer(q, ba, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
+    test_error(error, "clEnqueueMapBuffer failed");
+    clEnqueueMapBuffer(q, bb, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
+    test_error(error, "clEnqueueMapBuffer failed");
+    clEnqueueMapBuffer(q, bc, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, size, 0, NULL, NULL, &error);
+    test_error(error, "clEnqueueMapBuffer failed");
+
+    for(int i = 0; i < num_elements; i++)
+    {
+      if(pA[i] + pB[i] + pC[i] != 3)
+        failed = true;
+    }
+
+    error = clEnqueueUnmapMemObject(q, ba, pA, 0,NULL,NULL);
+    test_error(error, " clEnqueueUnmapMemObject failed.");
+    error = clEnqueueUnmapMemObject(q, bb, pB, 0,NULL,NULL);
+    test_error(error, " clEnqueueUnmapMemObject failed.");
+    error = clEnqueueUnmapMemObject(q, bc, pC, 0,NULL,NULL);
+    test_error(error, " clEnqueueUnmapMemObject failed.");
+  }
+
+  error = clFinish(q);
+  test_error(error, " clFinish failed.");
+
+  clSVMFree(c, pA);
+  clSVMFree(c, pB);
+  clSVMFree(c, pC);
+  clSVMFree(c, pBuf);
+
+  if(failed) return -1;
+
+  return 0;
+}
diff --git a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
new file mode 100644
index 00000000..a2a9387a
--- /dev/null
+++ b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
@@ -0,0 +1,282 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+// Creates linked list using host code
+cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
+{
+  cl_int error = CL_SUCCESS;
+
+  log_info("SVM: creating linked list on host ");
+
+  Node *pNodes;
+  if (useNewAPI == CL_FALSE)
+  {
+    pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
+    test_error2(error, pNodes, "clEnqMapBuffer failed");
+  }
+  else
+  {
+    pNodes = pNodes2;
+    error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength*numLists, 0, NULL,NULL);
+    test_error2(error, pNodes, "clEnqueueSVMMap failed");
+  }
+
+  create_linked_lists(pNodes, numLists, ListLength);
+
+  if (useNewAPI == CL_FALSE)
+  {
+    error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
+    test_error(error, "clEnqueueUnmapMemObject failed.");
+  }
+  else
+  {
+    error = clEnqueueSVMUnmap(cmdq, pNodes2, 0, NULL, NULL);
+    test_error(error, "clEnqueueSVMUnmap failed.");
+  }
+
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed.");
+  return error;
+}
+
+// Purpose: uses host code to verify correctness of the linked list
+cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
+{
+  cl_int error = CL_SUCCESS;
+  cl_int correct_count;
+
+  Node *pNodes;
+  if (useNewAPI == CL_FALSE)
+  {
+    pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
+    test_error2(error, pNodes, "clEnqueueMapBuffer failed");
+  }
+  else
+  {
+    pNodes = pNodes2;
+    error = clEnqueueSVMMap(cmdq, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, pNodes2, sizeof(Node)*ListLength * numLists, 0, NULL,NULL);
+    test_error2(error, pNodes, "clEnqueueSVMMap failed");
+  }
+
+  correct_count = 0;
+
+  error = verify_linked_lists(pNodes, numLists, ListLength);
+  if(error) return -1;
+
+  if (useNewAPI == CL_FALSE)
+  {
+    error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
+    test_error(error, "clEnqueueUnmapMemObject failed.");
+  }
+  else
+  {
+    error = clEnqueueSVMUnmap(cmdq, pNodes2, 0,NULL,NULL);
+    test_error(error, "clEnqueueSVMUnmap failed.");
+  }
+
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed.");
+  return error;
+}
+
+cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem allocator, cl_kernel kernel_create_lists, size_t numLists  )
+{
+  cl_int error = CL_SUCCESS;
+  log_info("SVM: creating linked list on device: %d ", ci);
+
+  size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
+  test_error2(error, pAllocator, "clEnqueueMapBuffer failed");
+  // reset allocator index
+  *pAllocator = numLists;   // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
+  error = clEnqueueUnmapMemObject(cmdq, allocator, pAllocator, 0,NULL,NULL);
+  test_error(error, " clEnqueueUnmapMemObject failed.");
+
+  error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
+  test_error(error, "clEnqueueNDRange failed.");
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed.");
+
+  return error;
+}
+
+cl_int verify_linked_lists_on_device(int vi, cl_command_queue cmdq,cl_mem num_correct, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists  )
+{
+  cl_int error = CL_SUCCESS;
+
+  log_info(" and verifying on device: %d ", vi);
+
+  cl_int *pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
+  test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
+
+  *pNumCorrect = 0;     // reset numCorrect to zero
+
+  error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed.");
+
+  error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
+  test_error(error,"clEnqueueNDRangeKernel failed");
+
+  pNumCorrect = (cl_int*) clEnqueueMapBuffer(cmdq, num_correct, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
+  test_error2(error, pNumCorrect, "clEnqueueMapBuffer failed");
+  cl_int correct_count = *pNumCorrect;
+  error = clEnqueueUnmapMemObject(cmdq, num_correct, pNumCorrect, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  clFinish(cmdq);
+  test_error(error,"clFinish failed");
+
+  if(correct_count != ListLength * (cl_uint)numLists)
+  {
+    error = -1;
+    log_info("Failed\n");
+  }
+  else
+    log_info("Passed\n");
+
+  return error;
+}
+
+// This tests that all devices and the host share a common address space; using only the coarse-grain features.
+// This is done by creating a linked list on a device and then verifying the correctness of the list
+// on another device or the host.  This basic test is performed for all combinations of devices and the host that exist within
+// the platform.  The test passes only if every combination passes.
+int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements, cl_bool useNewAPI)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      error = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+  if(error) return -1;
+
+  size_t numLists =  num_elements;
+  cl_int ListLength = 32;
+
+  clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  // this buffer holds the linked list nodes.
+  Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(Node)*ListLength*numLists, 0);
+
+  {
+    cl_bool usesSVMpointer = CL_FALSE;
+    clMemWrapper nodes;
+    if (useNewAPI == CL_FALSE)
+    {
+      nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes, &error);
+      test_error(error, "clCreateBuffer failed.");
+
+      // verify if buffer uses SVM pointer
+      size_t paramSize = 0;
+      error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, 0, 0, &paramSize);
+      test_error(error, "clGetMemObjectInfo failed.");
+
+      if (paramSize != sizeof(cl_bool))
+      {
+        log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned wrong size.");
+        return -1;
+      }
+
+      error = clGetMemObjectInfo(nodes, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
+      test_error(error, "clGetMemObjectInfo failed.");
+
+      if (usesSVMpointer != CL_TRUE)
+      {
+        log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_FALSE for buffer created from SVM pointer.");
+        return -1;
+      }
+    }
+
+    // this buffer holds an index into the nodes buffer, it is used for node allocation
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
+    test_error(error, "clGetMemObjectInfo failed.");
+
+    if (usesSVMpointer != CL_FALSE)
+    {
+      log_error("clGetMemObjectInfo(CL_MEM_USES_SVM_POINTER) returned CL_TRUE for non-SVM buffer.");
+      return -1;
+    }
+
+    // this buffer holds the count of correct nodes, which is computed by the verify kernel.
+    clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    if (useNewAPI == CL_TRUE)
+      error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
+    else
+      error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes);
+
+    error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &allocator);
+    error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),   (void *) &ListLength);
+
+    error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
+    error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &num_correct);
+    error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),   (void *) &ListLength);
+    test_error(error, "clSetKernelArg failed");
+
+    // Create linked list on one device and verify on another device (or the host).
+    // Do this for all possible combinations of devices and host within the platform.
+    for (int ci=0; ci<(int)num_devices+1; ci++)  // ci is CreationIndex, index of device/q to create linked list on
+    {
+      for (int vi=0; vi<(int)num_devices+1; vi++)  // vi is VerificationIndex, index of device/q to verify linked list on
+      {
+        if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
+        {
+          error = create_linked_lists_on_host(queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
+          if(error) return -1;
+        }
+        else
+        {
+          error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
+          if(error) return -1;
+        }
+
+        if(vi == num_devices)
+        {
+          error = verify_linked_lists_on_host(vi, queues[0], nodes, pNodes, ListLength, numLists, useNewAPI);
+          if(error) return -1;
+        }
+        else
+        {
+          error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
+          if(error) return -1;
+        }
+      }
+    }
+  }
+
+  clSVMFree(context, pNodes);
+
+  return 0;
+}
+
+int test_shared_address_space_coarse_grain_old_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_FALSE);
+}
+
+int test_shared_address_space_coarse_grain_new_api(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  return shared_address_space_coarse_grain(deviceID, context2, queue, num_elements, CL_TRUE);
+}
diff --git a/test_conformance/SVM/test_shared_address_space_fine_grain.cpp b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
new file mode 100644
index 00000000..ece3320d
--- /dev/null
+++ b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
@@ -0,0 +1,101 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+
+// This tests that all devices and the host share a common address space using fine-grain mode with no buffers.
+// This is done by creating a linked list on a device and then verifying the correctness of the list
+// on another device or the host.  This basic test is performed for all combinations of devices and the host that exist within
+// the platform.  The test passes only if every combination passes.
+int test_shared_address_space_fine_grain(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      error = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_SYSTEM);
+  if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
+  if(error < 0) return -1; // fail test.
+
+  size_t numLists =  num_elements;
+  cl_int ListLength = 32;
+
+  clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  // this allocation holds the linked list nodes.
+  // FIXME: remove the alignment once prototype can handle it
+  Node* pNodes = (Node*) align_malloc(numLists*ListLength*sizeof(Node),128);
+  test_error2(error, pNodes, "malloc failed");
+
+  // this allocation holds an index into the nodes buffer, it is used for node allocation
+  size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128);
+  test_error2(error, pAllocator, "malloc failed");
+
+  // this allocation holds the count of correct nodes, which is computed by the verify kernel.
+  cl_int* pNum_correct = (cl_int*) align_malloc(sizeof(cl_int), 128);
+  test_error2(error, pNum_correct, "malloc failed");
+
+
+  error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
+  error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
+  error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),(void *) &ListLength);
+
+  error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
+  error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNum_correct);
+  error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),   (void *) &ListLength);
+  test_error(error, "clSetKernelArg failed");
+
+  // Create linked list on one device and verify on another device (or the host).
+  // Do this for all possible combinations of devices and host within the platform.
+  for (int ci=0; ci<(int)num_devices+1; ci++)  // ci is CreationIndex, index of device/q to create linked list on
+  {
+    for (int vi=0; vi<(int)num_devices+1; vi++)  // vi is VerificationIndex, index of device/q to verify linked list on
+    {
+      if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
+      {
+        log_info("creating linked list on host ");
+        create_linked_lists(pNodes, numLists, ListLength);
+      }
+      else
+      {
+        error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
+        if(error) return -1;
+      }
+
+      if(vi == num_devices)
+      {
+        error = verify_linked_lists(pNodes, numLists, ListLength);
+        if(error) return -1;
+      }
+      else
+      {
+        error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNum_correct, kernel_verify_lists, ListLength, numLists);
+        if(error) return -1;
+      }
+    }
+  }
+
+  align_free(pNodes);
+  align_free(pAllocator);
+  align_free(pNum_correct);
+  return 0;
+}
diff --git a/test_conformance/SVM/test_shared_address_space_fine_grain_buffers.cpp b/test_conformance/SVM/test_shared_address_space_fine_grain_buffers.cpp
new file mode 100644
index 00000000..e1210f92
--- /dev/null
+++ b/test_conformance/SVM/test_shared_address_space_fine_grain_buffers.cpp
@@ -0,0 +1,138 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+
+
+
+cl_int create_linked_lists_on_device_no_map(int ci, cl_command_queue cmdq, size_t* pAllocator, cl_kernel kernel_create_lists, size_t numLists  )
+{
+  cl_int error = CL_SUCCESS;
+  log_info("SVM: creating linked list on device: %d ", ci);
+
+  // reset allocator index
+  *pAllocator = numLists;   // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
+  error = clEnqueueNDRangeKernel(cmdq, kernel_create_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
+  test_error(error, "clEnqueueNDRange failed.");
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed.");
+  return error;
+}
+
+cl_int verify_linked_lists_on_device_no_map(int vi, cl_command_queue cmdq,cl_int* pNumCorrect, cl_kernel kernel_verify_lists, cl_int ListLength, size_t numLists  )
+{
+  cl_int error = CL_SUCCESS;
+
+  log_info(" and verifying on device: %d ", vi);
+
+  *pNumCorrect = 0;     // reset numCorrect to zero
+
+  error = clEnqueueNDRangeKernel(cmdq, kernel_verify_lists, 1, NULL, &numLists, NULL, 0, NULL, NULL);
+  test_error(error,"clEnqueueNDRangeKernel failed");
+  clFinish(cmdq);
+  test_error(error,"clFinish failed");
+
+  cl_int correct_count = *pNumCorrect;
+  if(correct_count != ListLength * (cl_uint)numLists)
+  {
+    error = -1;
+    log_info("Failed\n");
+  }
+  else
+    log_info("Passed\n");
+
+  return error;
+}
+
+// This tests that all devices and the host share a common address space; using only the fine-grain with buffers mode.
+// This is done by creating a linked list on a device and then verifying the correctness of the list
+// on another device or the host.  This basic test is performed for all combinations of devices and the host that exist within
+// the platform.  The test passes only if every combination passes.
+int test_shared_address_space_fine_grain_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      error = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  error = create_cl_objects(deviceID, &linked_list_create_and_verify_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
+  if(error == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
+  if(error < 0) return -1; // fail test.
+
+  size_t numLists =  num_elements;
+  cl_int ListLength = 32;
+
+  clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  // this buffer holds the linked list nodes.
+  Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(Node)*ListLength*numLists, 0);
+
+  // this buffer holds an index into the nodes buffer, it is used for node allocation
+  size_t *pAllocator = (size_t*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(size_t), 0);
+
+  // this buffer holds the count of correct nodes, which is computed by the verify kernel.
+  cl_int *pNumCorrect = (cl_int*) clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, sizeof(cl_int), 0);
+
+  error |= clSetKernelArgSVMPointer(kernel_create_lists, 0, pNodes);
+  error |= clSetKernelArgSVMPointer(kernel_create_lists, 1, pAllocator);
+  error |= clSetKernelArg(kernel_create_lists, 2, sizeof(cl_int),   (void *) &ListLength);
+
+  error |= clSetKernelArgSVMPointer(kernel_verify_lists, 0, pNodes);
+  error |= clSetKernelArgSVMPointer(kernel_verify_lists, 1, pNumCorrect);
+  error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(cl_int),   (void *) &ListLength);
+  test_error(error, "clSetKernelArg failed");
+
+  // Create linked list on one device and verify on another device (or the host).
+  // Do this for all possible combinations of devices and host within the platform.
+  for (int ci=0; ci<(int)num_devices+1; ci++)  // ci is CreationIndex, index of device/q to create linked list on
+  {
+    for (int vi=0; vi<(int)num_devices+1; vi++)  // vi is VerificationIndex, index of device/q to verify linked list on
+    {
+      if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
+      {
+        log_info("SVM: creating linked list on host ");
+        create_linked_lists(pNodes, numLists, ListLength);
+      }
+      else
+      {
+        error = create_linked_lists_on_device_no_map(ci, queues[ci], pAllocator, kernel_create_lists, numLists);
+        if(error) return -1;
+      }
+
+      if(vi == num_devices)
+      {
+        error = verify_linked_lists(pNodes, numLists, ListLength);
+        if(error) return -1;
+      }
+      else
+      {
+        error = verify_linked_lists_on_device_no_map(vi, queues[vi], pNumCorrect, kernel_verify_lists, ListLength, numLists);
+        if(error) return -1;
+      }
+    }
+  }
+
+  clSVMFree(context, pNodes);
+  clSVMFree(context, pAllocator);
+  clSVMFree(context, pNumCorrect);
+
+  return 0;
+}
diff --git a/test_conformance/SVM/test_shared_sub_buffers.cpp b/test_conformance/SVM/test_shared_sub_buffers.cpp
new file mode 100644
index 00000000..7cf26ad0
--- /dev/null
+++ b/test_conformance/SVM/test_shared_sub_buffers.cpp
@@ -0,0 +1,241 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+const char *shared_sub_buffers_test_kernel[] = {
+  "typedef struct Node {\n"
+  "    int global_id;\n"
+  "    int position_in_list;\n"
+  "    __global struct Node* pNext;\n"
+  "} Node;\n"
+
+  // create linked lists that use nodes from 2 different buffers
+  "__global Node* allocate_node(__global Node* pNodes1, __global Node* pNodes2, volatile __global int* allocation_index, size_t i)\n"
+  "{\n"
+  // mix things up, adjacent work items will allocate from different buffers
+  "    if(i & 0x1)\n"
+  "        return &pNodes1[atomic_inc(allocation_index)];\n"
+  "    else\n"
+  "        return &pNodes2[atomic_inc(allocation_index)];\n"
+  "}\n"
+
+  // The allocation_index parameter must be initialized on the host to N work-items
+  // The first N nodes in pNodes will be the heads of the lists.
+  // This tests passing 4 different sub-buffers that come from two parent buffers.
+  // Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
+  "__kernel void create_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global int* allocation_index, int list_length) \n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes_sub1[i];\n"
+  "    pNode->global_id = i;\n"
+  "    pNode->position_in_list = 0;\n"
+  "    __global Node *pNew;\n"
+  "    for(int j=1; j < list_length; j++) {\n"
+  "        pNew = allocate_node(pNodes_sub1, pNodes2_sub1, allocation_index, i);\n"
+  "        pNew->global_id = i;\n"
+  "        pNew->position_in_list = j;\n"
+  "        pNode->pNext = pNew;  // link new node onto end of list\n"
+  "        pNode = pNew;   // move to end of list\n"
+  "    }\n"
+  "}\n"
+  // Note that we have arguments that appear to be unused, but they are required so that system knows to get all the sub-buffers on to the device
+  "__kernel void verify_linked_lists(__global Node* pNodes_sub1, __global Node* pNodes2_sub1, __global Node* pNodes_sub2, __global Node* pNodes2_sub2, volatile __global uint* num_correct, int list_length)\n"
+  "{\n"
+  "    size_t i = get_global_id(0);\n"
+  "    __global Node *pNode = &pNodes_sub1[i];\n"
+  "    for(int j=0; j < list_length; j++) {\n"
+  "        if( pNode->global_id == i && pNode->position_in_list == j)\n"
+  "            atomic_inc(num_correct);\n"
+  "        else \n"
+  "            break;\n"
+  "        pNode = pNode->pNext;\n"
+  "    }\n"
+  "}\n"
+};
+
+
+// Creates linked list using host code.
+cl_int create_linked_lists_on_host_sb(cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
+{
+  cl_int error = CL_SUCCESS;
+
+  log_info("SVM: creating linked list on host ");
+
+  Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes, "clEnqueueMapBuffer failed");
+
+  Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength*numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes2, "clEnqueueMapBuffer failed");
+
+  create_linked_lists(pNodes, numLists, ListLength);
+
+  error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed");
+  return error;
+}
+
+// Verify correctness of the linked list using host code.
+cl_int verify_linked_lists_on_host_sb(int ci, cl_command_queue cmdq, cl_mem nodes, cl_mem nodes2, cl_int ListLength, size_t numLists )
+{
+  cl_int error = CL_SUCCESS;
+
+  //log_info(" and verifying on host ");
+
+  Node *pNodes = (Node*) clEnqueueMapBuffer(cmdq, nodes, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes, "clEnqueueMapBuffer failed");
+  Node *pNodes2 = (Node*) clEnqueueMapBuffer(cmdq, nodes2, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(Node)*ListLength * numLists, 0, NULL,NULL, &error);
+  test_error2(error, pNodes, "clEnqueueMapBuffer failed");
+
+  error = verify_linked_lists(pNodes, numLists, ListLength);
+  if(error) return -1;
+
+  error = clEnqueueUnmapMemObject(cmdq, nodes, pNodes, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clEnqueueUnmapMemObject(cmdq, nodes2, pNodes2, 0,NULL,NULL);
+  test_error(error, "clEnqueueUnmapMemObject failed");
+  error = clFinish(cmdq);
+  test_error(error, "clFinish failed");
+  return error;
+}
+
+
+// This tests that shared sub-buffers can be created and that they inherit the flags from the parent buffer when no flags are specified.
+// This tests that passing only the sub-buffers to a kernel works.
+// The test is derived from the cross-buffer pointers test which
+// tests that shared buffers are able to contain pointers that point to other shared buffers.
+// This tests that all devices and the host share a common address space; using only the coarse-grain features.
+// This is done by creating a linked list on a device and then verifying the correctness of the list
+// on another device or the host.
+// The linked list nodes are allocated from two different buffers this is done to ensure that cross buffer pointers work correctly.
+// This basic test is performed for all combinations of devices and the host.
+int test_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_command_queue queue, int num_elements)
+{
+  clContextWrapper    context = NULL;
+  clProgramWrapper    program = NULL;
+  cl_uint     num_devices = 0;
+  cl_int      error = CL_SUCCESS;
+  clCommandQueueWrapper queues[MAXQ];
+
+  error = create_cl_objects(deviceID, &shared_sub_buffers_test_kernel[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+  if(error) return -1;
+
+  size_t numLists =  num_elements;
+  if(numLists & 0x1) numLists++; // force even size, so we can easily create two sub-buffers of same size.
+
+  cl_int ListLength = 32;
+
+  clKernelWrapper kernel_create_lists = clCreateKernel(program, "create_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  clKernelWrapper kernel_verify_lists = clCreateKernel(program, "verify_linked_lists", &error);
+  test_error(error, "clCreateKernel failed");
+
+  size_t nodes_bufsize = sizeof(Node)*ListLength*numLists;
+  Node* pNodes = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
+  Node* pNodes2 = (Node*) clSVMAlloc(context, CL_MEM_READ_WRITE, nodes_bufsize, 0);
+
+  {
+    // this buffer holds some of the linked list nodes.
+    clMemWrapper nodes = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, nodes_bufsize, pNodes, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    cl_buffer_region r;
+    r.origin = 0;
+    r.size = nodes_bufsize / 2;
+    // this should inherit the flag settings from nodes buffer
+    clMemWrapper nodes_sb1 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
+    test_error(error, "clCreateSubBuffer");
+    r.origin = nodes_bufsize / 2;
+    clMemWrapper nodes_sb2 = clCreateSubBuffer(nodes, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
+    test_error(error, "clCreateSubBuffer");
+
+
+    // this buffer holds some of the linked list nodes.
+    clMemWrapper nodes2 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof(Node)*ListLength*numLists, pNodes2, &error);
+    test_error(error, "clCreateBuffer failed.");
+    r.origin = 0;
+    r.size = nodes_bufsize / 2;
+    // this should inherit the flag settings from nodes buffer
+    clMemWrapper nodes2_sb1 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION, (void*)&r, &error);
+    test_error(error, "clCreateSubBuffer");
+    r.origin = nodes_bufsize / 2;
+    clMemWrapper nodes2_sb2 = clCreateSubBuffer(nodes2, 0, CL_BUFFER_CREATE_TYPE_REGION,(void*)&r, &error);
+    test_error(error, "clCreateSubBuffer");
+
+
+
+    // this buffer holds the index into the nodes buffer that is used for node allocation
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    // this buffer holds the count of correct nodes which is computed by the verify kernel.
+    clMemWrapper num_correct = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    test_error(error, "clCreateBuffer failed.");
+
+    error |= clSetKernelArg(kernel_create_lists, 0, sizeof(void*), (void *) &nodes_sb1);
+    error |= clSetKernelArg(kernel_create_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
+    error |= clSetKernelArg(kernel_create_lists, 2, sizeof(void*), (void *) &nodes_sb2);
+    error |= clSetKernelArg(kernel_create_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
+    error |= clSetKernelArg(kernel_create_lists, 4, sizeof(void*), (void *) &allocator);
+    error |= clSetKernelArg(kernel_create_lists, 5, sizeof(cl_int),(void *) &ListLength);
+
+    error |= clSetKernelArg(kernel_verify_lists, 0, sizeof(void*), (void *) &nodes_sb1);
+    error |= clSetKernelArg(kernel_verify_lists, 1, sizeof(void*), (void *) &nodes2_sb1);
+    error |= clSetKernelArg(kernel_verify_lists, 2, sizeof(void*), (void *) &nodes_sb2);
+    error |= clSetKernelArg(kernel_verify_lists, 3, sizeof(void*), (void *) &nodes2_sb2);
+    error |= clSetKernelArg(kernel_verify_lists, 4, sizeof(void*), (void *) &num_correct);
+    error |= clSetKernelArg(kernel_verify_lists, 5, sizeof(cl_int),(void *) &ListLength);
+    test_error(error, "clSetKernelArg failed");
+
+    // Create linked list on one device and verify on another device (or the host).
+    // Do this for all possible combinations of devices and host within the platform.
+    for (int ci=0; ci<(int)num_devices+1; ci++)  // ci is CreationIndex, index of device/q to create linked list on
+    {
+      for (int vi=0; vi<(int)num_devices+1; vi++)  // vi is VerificationIndex, index of device/q to verify linked list on
+      {
+        if(ci == num_devices) // last device index represents the host, note the num_device+1 above.
+        {
+          error = create_linked_lists_on_host_sb(queues[0], nodes, nodes2, ListLength, numLists);
+          if(error) return -1;
+        }
+        else
+        {
+          error = create_linked_lists_on_device(ci, queues[ci], allocator, kernel_create_lists, numLists);
+          if(error) return -1;
+        }
+
+        if(vi == num_devices)
+        {
+          error = verify_linked_lists_on_host_sb(vi, queues[0], nodes, nodes2, ListLength, numLists);
+          if(error) return -1;
+        }
+        else
+        {
+          error = verify_linked_lists_on_device(vi, queues[vi], num_correct, kernel_verify_lists, ListLength, numLists);
+          if(error) return -1;
+        }
+      } // inner loop, vi
+    } // outer loop, ci
+  }
+  clSVMFree(context, pNodes2);
+  clSVMFree(context, pNodes);
+
+  return 0;
+}
diff --git a/test_conformance/allocations/CMakeLists.txt b/test_conformance/allocations/CMakeLists.txt
new file mode 100644
index 00000000..a42288b4
--- /dev/null
+++ b/test_conformance/allocations/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(MODULE_NAME ALLOCATIONS)
+
+set(${MODULE_NAME}_SOURCES
+        main.cpp
+        allocation_execute.cpp
+        allocation_fill.cpp
+        allocation_functions.cpp
+        allocation_utils.cpp
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/threadTesting.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/typeWrappers.cpp
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/allocations/Jamfile b/test_conformance/allocations/Jamfile
new file mode 100644
index 00000000..aeb88d71
--- /dev/null
+++ b/test_conformance/allocations/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_allocations
+    : allocation_execute.cpp
+      allocation_fill.cpp
+      allocation_functions.cpp
+      allocation_utils.cpp
+      main.cpp
+    ;
+
+install dist
+    : test_allocations
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/allocations
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/allocations
+    ;
diff --git a/test_conformance/allocations/Makefile b/test_conformance/allocations/Makefile
new file mode 100644
index 00000000..096e91cd
--- /dev/null
+++ b/test_conformance/allocations/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		allocation_functions.cpp \
+		allocation_fill.cpp  \
+		allocation_utils.cpp \
+		allocation_execute.cpp \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/kernelHelpers.c \
+		  ../../test_common/harness/testHarness.c \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_allocations
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp
new file mode 100644
index 00000000..5168cfa4
--- /dev/null
+++ b/test_conformance/allocations/allocation_execute.cpp
@@ -0,0 +1,333 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "allocation_execute.h"
+#include "allocation_functions.h"
+
+
+const char *buffer_kernel_pattern = {
+    "__kernel void sample_test(%s __global uint *result, __global uint *array_sizes, uint per_item)\n"
+    "{\n"
+    "\tint tid = get_global_id(0);\n"
+    "\tuint r = 0;\n"
+    "\tuint i;\n"
+    "\tfor(i=tid*per_item; i<(1+tid)*per_item; i++) {\n"
+    "%s"
+    "\t}\n"
+    "\tresult[tid] = r;\n"
+    "}\n" };
+
+const char *image_kernel_pattern = {
+    "__kernel void sample_test(%s __global uint *result)\n"
+    "{\n"
+    "\tuint4 color;\n"
+    "\tcolor = (uint4)(0);\n"
+    "%s"
+    "\tint x, y;\n"
+    "%s"
+    "\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
+    "}\n" };
+
+const char *read_pattern = {
+    "\tfor(y=0; y<get_image_height(image%d); y++)\n"
+    "\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
+    "\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
+    "\t\t\t\tcolor += read_imageui(image%d, sampler, (int2)(x,y));\n"
+    "\t\t\t}\n"
+};
+
+const char *offset_pattern =
+"\tconst uint4 offset = (uint4)(0,1,2,3);\n";
+
+const char *sampler_pattern =
+"\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
+
+
+const char *write_pattern = {
+    "\tfor(y=0; y<get_image_height(image%d); y++)\n"
+    "\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
+    "\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
+    "\t\t\t\tcolor = (uint4)x*(uint4)y+offset;\n"
+    "\t\t\t\twrite_imageui(image%d, (int2)(x,y), color);\n"
+    "\t\t\t}\n"
+    "\tbarrier(CLK_LOCAL_MEM_FENCE);\n"
+};
+
+
+int check_image(cl_command_queue queue, cl_mem mem) {
+    int error;
+    cl_mem_object_type type;
+    size_t width, height;
+    size_t origin[3], region[3], x, j;
+    cl_uint *data;
+
+    error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
+    if (error) {
+        print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
+        return -1;
+    }
+
+    if (type == CL_MEM_OBJECT_BUFFER) {
+        log_error("Expected image object, not buffer.\n");
+        return -1;
+    } else if (type == CL_MEM_OBJECT_IMAGE2D) {
+        error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
+        if (error) {
+            print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
+            return -1;
+        }
+        error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
+        if (error) {
+            print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
+            return -1;
+        }
+    }
+
+
+    data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
+    if (data == NULL) {
+        log_error("Failed to malloc host buffer for writing into image.\n");
+        return FAILED_ABORT;
+    }
+    origin[0] = 0;
+    origin[1] = 0;
+    origin[2] = 0;
+    region[0] = width;
+    region[1] = 1;
+    region[2] = 1;
+    for (origin[1] = 0; origin[1] < height; origin[1]++) {
+        error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
+        if (error) {
+            print_error(error, "clEnqueueReadImage failed");
+            free(data);
+            return error;
+        }
+
+        for (x=0; x<width; x++) {
+            for (j=0; j<4; j++) {
+                if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
+                    log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
+                              (int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
+                    return -1;
+                }
+            }
+        }
+    }
+    free(data);
+    return 0;
+}
+
+
+#define NUM_OF_WORK_ITEMS 8192*2
+
+int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
+
+    char *argument_string;
+    char *access_string;
+    char *kernel_string;
+    int i, error, result;
+    clKernelWrapper kernel;
+    clProgramWrapper program;
+    clMemWrapper result_mem;
+    char *ptr;
+    size_t global_dims[3];
+    cl_uint per_item;
+    cl_uint per_item_uint;
+    cl_uint returned_results[NUM_OF_WORK_ITEMS], final_result;
+    clEventWrapper event;
+    cl_int event_status;
+
+    // Allocate memory for the kernel source
+    argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
+    access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
+    kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
+    argument_string[0] = '\0';
+    access_string[0] = '\0';
+    kernel_string[0] = '\0';
+
+    // Zero the results.
+    for (i=0; i<NUM_OF_WORK_ITEMS; i++)
+        returned_results[i] = 0;
+
+    // Build the kernel source
+    if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
+        for(i=0; i<number_of_mems_used; i++) {
+            sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
+            sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
+        }
+        sprintf(kernel_string, buffer_kernel_pattern, argument_string, access_string);
+    }
+    else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
+        for(i=0; i<number_of_mems_used; i++) {
+            sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
+            sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
+        }
+        sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
+    }
+    else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
+        for(i=0; i<number_of_mems_used; i++) {
+            sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
+            sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
+        }
+        sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
+    }
+    ptr = kernel_string;
+
+    // Create the kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
+
+    free(argument_string);
+    free(access_string);
+    free(kernel_string);
+
+    result = check_allocation_error(context, device_id, error, queue);
+    if (result != SUCCEEDED) {
+        if (result == FAILED_TOO_BIG)
+            log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
+        else
+            print_error(error, "Create kernel and program failed");
+        return result;
+    }
+
+    // Set the arguments
+    for (i=0; i<number_of_mems_used; i++) {
+        error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
+        test_error(error, "clSetKernelArg failed");
+    }
+
+    // Set the result
+    result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, &error);
+    test_error(error, "clCreateBuffer failed");
+    error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
+    test_error(error, "clSetKernelArg failed");
+
+    // Thread dimensions for execution
+    global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
+
+    // We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
+    cl_uint *sizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
+    cl_uint max_size = 0;
+    clMemWrapper buffer_sizes;
+    if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
+        for (i=0; i<number_of_mems_used; i++) {
+            size_t size;
+            error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
+            test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
+            sizes[i] = (cl_uint)(size/sizeof(cl_uint));
+            if (size/sizeof(cl_uint) > max_size)
+                max_size = (cl_uint)(size/sizeof(cl_uint));
+        }
+        buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, sizes, &error);
+        test_error_abort(error, "clCreateBuffer failed");
+        error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
+        test_error(error, "clSetKernelArg failed");
+        per_item = (cl_uint)ceil((double)max_size/global_dims[0]);
+        if (per_item > CL_UINT_MAX)
+            log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
+        per_item_uint = (cl_uint)per_item;
+        error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
+        test_error(error, "clSetKernelArg failed");
+        free(sizes);
+    }
+
+    size_t local_dims[3] = {1,1,1};
+    error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
+    test_error(error, "get_max_common_work_group_size failed");
+
+    // Execute the kernel
+    error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
+    result = check_allocation_error(context, device_id, error, queue);
+    if (result != SUCCEEDED) {
+        if (result == FAILED_TOO_BIG)
+            log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
+        else
+            print_error(error, "clEnqueueNDRangeKernel failed");
+        return result;
+    }
+
+    // Finish the test
+    error = clFinish(*queue);
+
+    result = check_allocation_error(context, device_id, error, queue);
+
+    if (result != SUCCEEDED) {
+        if (result == FAILED_TOO_BIG)
+            log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
+        else
+            print_error(error, "clFinish failed");
+        return result;
+    }
+
+    // Verify that the event from the execution did not have an error
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    if (event_status < 0) {
+        result = check_allocation_error(context, device_id, event_status, queue);
+        if (result != SUCCEEDED) {
+            if (result == FAILED_TOO_BIG)
+                log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
+            else
+                print_error(event_status, "clEnqueueNDRangeKernel failed");
+            return result;
+        }
+    }
+
+    // If we are not verifying the checksum return here
+    if (!verify_checksum) {
+        log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
+        return SUCCEEDED;
+    }
+
+    // Verify the checksum.
+    // Read back the result
+    error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, 0, NULL, NULL);
+    test_error_abort(error, "clEnqueueReadBuffer failed");
+    final_result = 0;
+    if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
+        // For buffers or read images we are just looking at the sum of what each thread summed up
+        for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
+            final_result += returned_results[i];
+        }
+        if (final_result != checksum) {
+            log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
+            return FAILED_ABORT;
+        }
+        log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
+    } else {
+        // For write images we need to verify the values
+        for (i=0; i<number_of_mems_used; i++) {
+            if (check_image(*queue, mems[i])) {
+                log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
+                return FAILED_ABORT;
+            }
+        }
+        log_info("\t\tImage contents verified.\n");
+    }
+
+    // Finish the test
+    error = clFinish(*queue);
+    result = check_allocation_error(context, device_id, error, queue);
+    if (result != SUCCEEDED) {
+        if (result == FAILED_TOO_BIG)
+            log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
+        else
+            print_error(error, "clFinish failed");
+        return result;
+    }
+
+    return SUCCEEDED;
+}
+
+
diff --git a/test_conformance/allocations/allocation_execute.h b/test_conformance/allocations/allocation_execute.h
new file mode 100644
index 00000000..2c1b3451
--- /dev/null
+++ b/test_conformance/allocations/allocation_execute.h
@@ -0,0 +1,22 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "allocation_utils.h"
+
+
+int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum);
+
+
diff --git a/test_conformance/allocations/allocation_fill.cpp b/test_conformance/allocations/allocation_fill.cpp
new file mode 100644
index 00000000..6aca3cb5
--- /dev/null
+++ b/test_conformance/allocations/allocation_fill.cpp
@@ -0,0 +1,338 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "allocation_fill.h"
+
+#define BUFFER_CHUNK_SIZE 8*1024*1024
+#define IMAGE_LINES 8
+
+#include "../../test_common/harness/compat.h"
+
+int fill_buffer_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t size, MTdata d, cl_bool blocking_write) {
+     size_t i, j;
+  cl_uint *data;
+  int error, result;
+  cl_uint checksum_delta = 0;
+  cl_event event;
+
+  size_t size_to_use = BUFFER_CHUNK_SIZE;
+  if (size_to_use > size)
+    size_to_use = size;
+
+  data = (cl_uint*)malloc(size_to_use);
+  if (data == NULL) {
+    log_error("Failed to malloc host buffer for writing into buffer.\n");
+    return FAILED_ABORT;
+  }
+  for (i=0; i<size-size_to_use; i+=size_to_use) {
+    // Put values in the data, and keep a checksum as we go along.
+    for (j=0; j<size_to_use/sizeof(cl_uint); j++) {
+      data[j] = genrand_int32(d);
+      checksum_delta += data[j];
+    }
+    if (blocking_write) {
+      error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size_to_use, data, 0, NULL, NULL);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteBuffer failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        free(data);
+        clReleaseMemObject(mem);
+        return result;
+      }
+    } else {
+      error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size_to_use, data, 0, NULL, &event);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteBuffer failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        free(data);
+        clReleaseMemObject(mem);
+        return result;
+      }
+
+      error = clWaitForEvents(1, &event);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clWaitForEvents failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseEvent(event);
+        free(data);
+        clReleaseMemObject(mem);
+        return result;
+      }
+
+      clReleaseEvent(event);
+    }
+  }
+
+  // Deal with any leftover bits
+  if (i < size) {
+    // Put values in the data, and keep a checksum as we go along.
+    for (j=0; j<(size-i)/sizeof(cl_uint); j++) {
+      data[j] = (cl_uint)genrand_int32(d);
+      checksum_delta += data[j];
+    }
+
+    if (blocking_write) {
+      error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size-i, data, 0, NULL, NULL);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteBuffer failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseMemObject(mem);
+        free(data);
+        return result;
+      }
+    } else {
+      error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size-i, data, 0, NULL, &event);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteBuffer failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseMemObject(mem);
+        free(data);
+        return result;
+      }
+
+      error = clWaitForEvents(1, &event);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clWaitForEvents failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseEvent(event);
+        free(data);
+        clReleaseMemObject(mem);
+        return result;
+      }
+
+      clReleaseEvent(event);
+    }
+  }
+
+  free(data);
+  // Only update the checksum if this succeeded.
+  checksum += checksum_delta;
+  return SUCCEEDED;
+}
+
+
+int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t width, size_t height, MTdata d, cl_bool blocking_write) {
+  size_t origin[3], region[3], j;
+  int error, result;
+  cl_uint *data;
+  cl_uint checksum_delta = 0;
+  cl_event event;
+
+  size_t image_lines_to_use;
+  image_lines_to_use = IMAGE_LINES;
+  if (image_lines_to_use > height)
+      image_lines_to_use = height;
+
+  data = (cl_uint*)malloc(width*4*sizeof(cl_uint)*image_lines_to_use);
+  if (data == NULL) {
+    log_error("Failed to malloc host buffer for writing into image.\n");
+    return FAILED_ABORT;
+  }
+  origin[0] = 0;
+  origin[1] = 0;
+  origin[2] = 0;
+  region[0] = width;
+  region[1] = image_lines_to_use;
+  region[2] = 1;
+  for (origin[1] = 0; origin[1] < height - image_lines_to_use; origin[1] += image_lines_to_use) {
+    // Put values in the data, and keep a checksum as we go along.
+    for (j=0; j<width*4*image_lines_to_use; j++) {
+      data[j] = (cl_uint)genrand_int32(d);
+      checksum_delta += data[j];
+    }
+
+    if (blocking_write) {
+      error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteImage failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseMemObject(mem);
+        free(data);
+        return result;
+      }
+      result = clFinish(*queue);
+      if (result != SUCCEEDED)
+      {
+        print_error(error, "clFinish failed after successful enquing filling buffer with data.");
+        return result;
+      }
+    } else {
+      error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteImage failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseMemObject(mem);
+        free(data);
+        return result;
+      }
+
+      error = clWaitForEvents(1, &event);
+
+      // Dig out execution error if that is the problem
+      if (error == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST) {
+          cl_int err, exec_status;
+          err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(exec_status), &exec_status, NULL);
+          test_error(err, "clGetEventInfo failed getting CL_EVENT_COMMAND_EXECUTION_STATUS from failed event");
+          error = exec_status;
+      }
+
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clWaitForEvents failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clReleaseEvent(event);
+        free(data);
+        clReleaseMemObject(mem);
+        return result;
+      }
+
+      clReleaseEvent(event);
+    }
+  }
+
+  // Deal with any leftover bits
+  if (origin[1] < height) {
+    // Put values in the data, and keep a checksum as we go along.
+    for (j=0; j<width*4*(height-origin[1]); j++) {
+      data[j] = (cl_uint)genrand_int32(d);
+      checksum_delta += data[j];
+    }
+
+    region[1] = height-origin[1];
+    if(blocking_write) {
+      error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteImage failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseMemObject(mem);
+        free(data);
+        return result;
+      }
+    } else {
+      error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clEnqueueWriteImage failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseMemObject(mem);
+        free(data);
+        return result;
+      }
+
+      error = clWaitForEvents(1, &event);
+      result = check_allocation_error(context, device_id, error, queue);
+
+      if (result == FAILED_ABORT) {
+        print_error(error, "clWaitForEvents failed.");
+      }
+
+      if (result != SUCCEEDED) {
+        clFinish(*queue);
+        clReleaseEvent(event);
+        free(data);
+        clReleaseMemObject(mem);
+        return result;
+      }
+
+      clReleaseEvent(event);
+    }
+  }
+
+  free(data);
+  // Only update the checksum if this succeeded.
+  checksum += checksum_delta;
+  return SUCCEEDED;
+}
+
+
+
+int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write) {
+  int error;
+  cl_mem_object_type type;
+  size_t size, width, height;
+
+  error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
+  test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
+
+  if (type == CL_MEM_OBJECT_BUFFER) {
+    error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
+    test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
+    return fill_buffer_with_data(context, device_id, queue, mem, size, d, blocking_write);
+  } else if (type == CL_MEM_OBJECT_IMAGE2D) {
+    error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
+    test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_WIDTH.");
+    error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
+    test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_HEIGHT.");
+    return fill_image_with_data(context, device_id, queue, mem, width, height, d, blocking_write);
+  }
+
+  log_error("Invalid CL_MEM_TYPE: %d\n", type);
+  return FAILED_ABORT;
+}
+
+
+
diff --git a/test_conformance/allocations/allocation_fill.h b/test_conformance/allocations/allocation_fill.h
new file mode 100644
index 00000000..0c1085a1
--- /dev/null
+++ b/test_conformance/allocations/allocation_fill.h
@@ -0,0 +1,19 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "allocation_utils.h"
+
+int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write);
diff --git a/test_conformance/allocations/allocation_functions.cpp b/test_conformance/allocations/allocation_functions.cpp
new file mode 100644
index 00000000..7a309cd6
--- /dev/null
+++ b/test_conformance/allocations/allocation_functions.cpp
@@ -0,0 +1,287 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "allocation_functions.h"
+#include "allocation_fill.h"
+
+
+static cl_image_format    image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
+
+int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
+  int error;
+  // log_info("\t\tAttempting to allocate a %gMB array and fill with %s writes.\n", (size_to_allocate/(1024.0*1024.0)), (blocking_write ? "blocking" : "non-blocking"));
+  *mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size_to_allocate, NULL, &error);
+  return check_allocation_error(context, device_id, error, queue);
+}
+
+
+int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height, size_t* max_size) {
+  size_t max_width, max_height, num_pixels, found_width, found_height;
+  int error;
+
+  if (checkForImageSupport(device_id)) {
+    log_info("Can not allocate an image on this device because it does not support images.");
+    return FAILED_ABORT;
+  }
+
+  if (size_to_allocate == 0) {
+    log_error("Trying to allcoate a zero sized image.\n");
+    return FAILED_ABORT;
+  }
+
+  error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
+  test_error_abort(error, "clGetDeviceInfo failed.");
+  error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
+  test_error_abort(error, "clGetDeviceInfo failed.");
+
+  num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
+
+  if (num_pixels > (max_width*max_height)) {
+    if(NULL != max_size) {
+      *max_size = max_width * max_height * sizeof(cl_uint) * 4;
+    }
+    return FAILED_TOO_BIG;
+  }
+
+  // We want a close-to-square aspect ratio.
+  // Note that this implicitly assumes that  max width >= max height
+  found_width = (int)sqrt( (double) num_pixels );
+  if( found_width > max_width ) {
+    found_width = max_width;
+  }
+  if (found_width == 0)
+    found_width = 1;
+
+  found_height = (size_t)num_pixels/found_width;
+  if (found_height > max_height) {
+    found_height = max_height;
+  }
+  if (found_height == 0)
+    found_height = 1;
+
+  *width = found_width;
+  *height = found_height;
+
+  if(NULL != max_size) {
+    *max_size = found_width * found_height * sizeof(cl_uint) * 4;
+  }
+
+  return SUCCEEDED;
+}
+
+
+int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
+  size_t width, height;
+  int error;
+
+  error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
+  if (error != SUCCEEDED)
+    return error;
+
+  log_info("\t\tAttempting to allocate a %gMB read-only image (%d x %d) and fill with %s writes.\n",
+          (size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
+  *mem = create_image_2d(context, CL_MEM_READ_ONLY, &image_format, width, height, 0, NULL, &error);
+
+  return check_allocation_error(context, device_id, error, queue);
+}
+
+
+int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
+  size_t width, height;
+  int error;
+
+  error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
+  if (error != SUCCEEDED)
+    return error;
+
+  //log_info("\t\tAttempting to allocate a %gMB write-only image (%d x %d) and fill with %s writes.\n",
+           //(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
+  *mem = create_image_2d(context, CL_MEM_WRITE_ONLY, &image_format, width, height, 0, NULL, &error);
+
+  return check_allocation_error(context, device_id, error, queue);
+}
+
+int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem) {
+  if (type == BUFFER) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, true);
+  if (type == IMAGE_READ) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, true);
+  if (type == IMAGE_WRITE) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, true);
+  if (type == BUFFER_NON_BLOCKING) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, false);
+  if (type == IMAGE_READ_NON_BLOCKING) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, false);
+  if (type == IMAGE_WRITE_NON_BLOCKING) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, false);
+    log_error("Invalid allocation type: %d\n", type);
+  return FAILED_ABORT;
+}
+
+
+int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
+                  int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d) {
+
+    cl_ulong max_individual_allocation_size, global_mem_size;
+  int error, result;
+  size_t amount_allocated;
+  size_t reduction_amount;
+  int current_allocation;
+  size_t allocation_this_time, actual_allocation;
+
+  // Set the number of mems used to 0 so if we fail to create even a single one we don't end up returning a garbage value
+  *number_of_mems = 0;
+
+  error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
+  test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
+  error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
+  test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+
+  if (global_mem_size > (cl_ulong)SIZE_MAX) {
+    global_mem_size = (cl_ulong)SIZE_MAX;
+  }
+
+//  log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
+//           max_individual_allocation_size, toMB(max_individual_allocation_size),
+//           global_mem_size, toMB(global_mem_size));
+
+  if (size_to_allocate > global_mem_size) {
+    log_error("Can not allocate more than the global memory size.\n");
+    return FAILED_ABORT;
+  }
+
+  amount_allocated = 0;
+  current_allocation = 0;
+
+  // If allocating for images, reduce the maximum allocation size to the maximum image size.
+  // If we don't do this, then the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher
+  // than the maximum image size on systems with 16GB or RAM or more. In this case, we
+  // succeed in allocating an image but its size is less than CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4
+  // (min_allocation_allowed) and thus we fail the allocation below.
+  if(type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) {
+    size_t width;
+    size_t height;
+    size_t max_size;
+    error = find_good_image_size(device_id, size_to_allocate, &width, &height, &max_size);
+    if (!(error == SUCCEEDED || error == FAILED_TOO_BIG))
+      return error;
+    if(max_size < max_individual_allocation_size)
+      max_individual_allocation_size = max_size;
+  }
+
+  reduction_amount = (size_t)max_individual_allocation_size/16;
+
+  if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
+  else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
+  else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
+
+//  log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
+//           toMB(reduction_amount), toMB(min_allocation_allowed));
+//  if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
+
+  // If we are only doing a single allocation, only allow 1
+  int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
+
+  // Make sure that the maximum number of images allocated is constrained by the
+  // maximum that may be passed to a kernel
+  if (type != BUFFER && type != BUFFER_NON_BLOCKING) {
+    cl_device_info param_name = (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) ?
+      CL_DEVICE_MAX_READ_IMAGE_ARGS : CL_DEVICE_MAX_WRITE_IMAGE_ARGS;
+
+    cl_uint max_image_args;
+    error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
+    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
+
+    if ((int)max_image_args < max_to_allocate) {
+      log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
+      max_to_allocate =  max_image_args;
+    }
+  }
+
+
+  // Try to allocate the requested amount.
+  while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
+
+    // Determine how much more is needed
+    allocation_this_time = size_to_allocate - amount_allocated;
+
+    // Bound by the individual allocation size
+    if (allocation_this_time > max_individual_allocation_size)
+        allocation_this_time = (size_t)max_individual_allocation_size;
+
+    // Allocate the largest object possible
+    result = FAILED_TOO_BIG;
+    //log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
+    while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
+
+      // Create the object
+        result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
+      if (result == SUCCEEDED) {
+        // Allocation succeeded, another memory object was added to the array
+        *number_of_mems = (current_allocation+1);
+
+          // Verify the size is correct to within 1MB.
+        actual_allocation = get_actual_allocation_size(mems[current_allocation]);
+        if (fabs((double)allocation_this_time - (double)actual_allocation) > 1024.0*1024.0) {
+             log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
+          return FAILED_ABORT;
+        }
+
+        // If we are filling the allocation for verification do so
+        if (force_fill) {
+          //log_info("\t\t\tWriting random values to object and calculating checksum.\n");
+          cl_bool blocking_write = true;
+          if (type == BUFFER_NON_BLOCKING || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE_NON_BLOCKING) {
+            blocking_write = false;
+          }
+          result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
+        }
+      }
+
+      // If creation failed, try to create a smaller object
+      if (result == FAILED_TOO_BIG) {
+        //log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
+        if (allocation_this_time > reduction_amount)
+            allocation_this_time -= reduction_amount;
+        else if (reduction_amount > 1) {
+          reduction_amount /= 2;
+        }
+        else {
+          allocation_this_time = 0;
+        }
+
+      }
+    }
+
+    if (result == FAILED_ABORT) {
+      log_error("\t\tAllocation failed.\n");
+      return FAILED_ABORT;
+    }
+
+    if (!allocation_this_time) {
+      log_info("\t\tFailed to allocate %gMB across several objects.\n", toMB(size_to_allocate));
+      return FAILED_TOO_BIG;
+    }
+
+    // Otherwise we succeeded
+    if (result != SUCCEEDED) {
+      log_error("Test logic error.");
+      test_finish();
+      exit(-1);
+    }
+    amount_allocated += allocation_this_time;
+
+    *final_size = amount_allocated;
+
+    current_allocation++;
+  }
+
+  log_info("\t\tSucceeded in allocating %gMB using %d memory objects.\n", toMB(amount_allocated), current_allocation);
+  return SUCCEEDED;
+}
diff --git a/test_conformance/allocations/allocation_functions.h b/test_conformance/allocations/allocation_functions.h
new file mode 100644
index 00000000..939a993b
--- /dev/null
+++ b/test_conformance/allocations/allocation_functions.h
@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "allocation_utils.h"
+
+int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem);
+int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
+int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
+int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
+int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
+                  int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d);
diff --git a/test_conformance/allocations/allocation_utils.cpp b/test_conformance/allocations/allocation_utils.cpp
new file mode 100644
index 00000000..38282dfa
--- /dev/null
+++ b/test_conformance/allocations/allocation_utils.cpp
@@ -0,0 +1,87 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "allocation_utils.h"
+
+cl_command_queue reset_queue(cl_context context, cl_device_id device_id, cl_command_queue *queue, int *error)
+{
+  log_info("Invalid command queue. Releasing and recreating the command queue.\n");
+  clReleaseCommandQueue(*queue);
+    *queue = clCreateCommandQueueWithProperties(context, device_id, 0, error);
+  return *queue;
+}
+
+int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue) {
+  //log_info("check_allocation_error context=%p device_id=%p error=%d *queue=%p\n", context, device_id, error, *queue);
+  if ((error == CL_MEM_OBJECT_ALLOCATION_FAILURE ) || (error == CL_OUT_OF_RESOURCES ) || (error == CL_OUT_OF_HOST_MEMORY) || (error == CL_INVALID_IMAGE_SIZE)) {
+    return FAILED_TOO_BIG;
+  } else if (error == CL_INVALID_COMMAND_QUEUE) {
+    *queue = reset_queue(context, device_id, queue, &error);
+    if (CL_SUCCESS != error)
+    {
+      log_error("Failed to reset command queue after corrupted queue: %s\n", IGetErrorString(error));
+      return FAILED_ABORT;
+    }
+    // Try again with smaller resources.
+    return FAILED_TOO_BIG;
+  } else if (error != CL_SUCCESS) {
+    log_error("Allocation failed with %s.\n", IGetErrorString(error));
+    return FAILED_ABORT;
+  }
+  return SUCCEEDED;
+}
+
+
+double toMB(cl_ulong size_in) {
+  return (double)size_in/(1024.0*1024.0);
+}
+
+size_t get_actual_allocation_size(cl_mem mem) {
+  int error;
+  cl_mem_object_type type;
+  size_t size, width, height;
+
+  error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
+  if (error) {
+      print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
+    return 0;
+  }
+
+  if (type == CL_MEM_OBJECT_BUFFER) {
+    error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
+    if (error) {
+      print_error(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
+      return 0;
+    }
+    return size;
+  } else if (type == CL_MEM_OBJECT_IMAGE2D) {
+    error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
+    if (error) {
+      print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
+      return 0;
+    }
+    error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
+    if (error) {
+      print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
+      return 0;
+    }
+    return width*height*4*sizeof(cl_uint);
+  }
+
+  log_error("Invalid CL_MEM_TYPE: %d\n", type);
+  return 0;
+}
+
+
diff --git a/test_conformance/allocations/allocation_utils.h b/test_conformance/allocations/allocation_utils.h
new file mode 100644
index 00000000..a91a1235
--- /dev/null
+++ b/test_conformance/allocations/allocation_utils.h
@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+extern cl_uint checksum;
+
+int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue);
+double toMB(cl_ulong size_in);
+size_t get_actual_allocation_size(cl_mem mem);
+
+
diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp
new file mode 100644
index 00000000..d41470be
--- /dev/null
+++ b/test_conformance/allocations/main.cpp
@@ -0,0 +1,411 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include "allocation_functions.h"
+#include "allocation_fill.h"
+#include "allocation_execute.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/parseParameters.h"
+#include <time.h>
+
+typedef long long unsigned llu;
+
+cl_device_id g_device_id;
+cl_device_type g_device_type = CL_DEVICE_TYPE_DEFAULT;
+clContextWrapper g_context;
+clCommandQueueWrapper g_queue;
+int g_repetition_count = 1;
+int g_tests_to_run = 0;
+int g_reduction_percentage = 100;
+int g_write_allocations = 1;
+int g_multiple_allocations = 0;
+int g_execute_kernel = 1;
+
+cl_uint checksum;
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [single|multiple] [numReps] [reduction%%] allocType\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\tsingle - Tests using a single allocation as large as possible\n" );
+    log_info( "\tmultiple - Tests using as many allocations as possible\n" );
+    log_info( "\n" );
+    log_info( "\tnumReps - Optional integer specifying the number of repetitions to run and average the result (defaults to 1)\n" );
+    log_info( "\treduction%% - Optional integer, followed by a %% sign, that acts as a multiplier for the target amount of memory.\n" );
+    log_info( "\t              Example: target amount of 512MB and a reduction of 75%% will result in a target of 384MB.\n" );
+    log_info( "\n" );
+    log_info( "\tallocType - Allocation type to test with. Can be one of the following:\n" );
+    log_info( "\t\tbuffer\n");
+    log_info( "\t\timage2d_read\n");
+    log_info( "\t\timage2d_write\n");
+    log_info( "\t\tbuffer_non_blocking\n");
+    log_info( "\t\timage2d_read_non_blocking\n");
+    log_info( "\t\timage2d_write_non_blocking\n");
+    log_info( "\t\tall (runs all of the above in sequence)\n" );
+    log_info( "\tdo_not_force_fill - Disable explicitly write data to all memory objects after creating them.\n" );
+    log_info( "\t Without this, the kernel execution can not verify its checksum.\n" );
+    log_info( "\tdo_not_execute - Disable executing a kernel that accesses all of the memory objects.\n" );
+}
+
+
+int init_cl() {
+    cl_platform_id platform;
+    int error;
+
+    error = clGetPlatformIDs(1, &platform, NULL);
+    test_error(error, "clGetPlatformIDs failed");
+
+    error = clGetDeviceIDs(platform, g_device_type, 1, &g_device_id, NULL);
+    test_error(error, "clGetDeviceIDs failed");
+
+    /* Create a context */
+    g_context = clCreateContext( NULL, 1, &g_device_id, notify_callback, NULL, &error );
+    test_error(error, "clCreateContext failed");
+
+    /* Create command queue */
+    g_queue = clCreateCommandQueueWithProperties( g_context, g_device_id, 0, &error );
+    test_error(error, "clCreateCommandQueue failed");
+
+    return error;
+}
+
+
+int main(int argc, const char *argv[])
+{
+    int error;
+    int count;
+    cl_mem mems[MAX_NUMBER_TO_ALLOCATE];
+    cl_ulong max_individual_allocation_size, global_mem_size;
+    char            str[ 128 ],  *endPtr;
+    int r;
+    int number_of_mems_used;
+    int failure_counts = 0;
+    int test, test_to_run = 0;
+    int randomize = 0;
+    size_t final_size, max_size, current_test_size;
+
+    test_start();
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    // Parse arguments
+    checkDeviceTypeOverride( &g_device_type );
+    for( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            g_device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            g_device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            g_device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            g_device_type = CL_DEVICE_TYPE_DEFAULT;
+
+        else if( strcmp( str, "multiple" ) == 0 )
+            g_multiple_allocations = 1;
+        else if( strcmp( str, "randomize" ) == 0 )
+            randomize = 1;
+        else if( strcmp( str, "single" ) == 0 )
+            g_multiple_allocations = 0;
+
+        else if( ( r = (int)strtol( str, &endPtr, 10 ) ) && ( endPtr != str ) && ( *endPtr == 0 ) )
+        {
+            // By spec, that means the entire string was an integer, so take it as a repetition count
+            g_repetition_count = r;
+        }
+
+        else if( strcmp( str, "all" ) == 0 )
+        {
+            g_tests_to_run = BUFFER | IMAGE_READ | IMAGE_WRITE | BUFFER_NON_BLOCKING | IMAGE_READ_NON_BLOCKING | IMAGE_WRITE_NON_BLOCKING;
+        }
+
+        else if( strchr( str, '%' ) != NULL )
+        {
+            // Reduction percentage (let strtol ignore the percentage)
+            g_reduction_percentage = (int)strtol( str, NULL, 10 );
+        }
+
+        else if( g_tests_to_run == 0 )
+        {
+            if( strcmp( str, "buffer" ) == 0 )
+            {
+                g_tests_to_run |= BUFFER;
+            }
+            else if( strcmp( str, "image2d_read" ) == 0 )
+            {
+                g_tests_to_run |= IMAGE_READ;
+            }
+            else if( strcmp( str, "image2d_write" ) == 0 )
+            {
+                g_tests_to_run |= IMAGE_WRITE;
+            }
+            else if( strcmp( str, "buffer_non_blocking" ) == 0 )
+            {
+                g_tests_to_run |= BUFFER_NON_BLOCKING;
+            }
+            else if( strcmp( str, "image2d_read_non_blocking" ) == 0 )
+            {
+                g_tests_to_run |= IMAGE_READ_NON_BLOCKING;
+            }
+            else if( strcmp( str, "image2d_write_non_blocking" ) == 0 )
+            {
+                g_tests_to_run |= IMAGE_WRITE_NON_BLOCKING;
+            }
+            if( g_tests_to_run == 0 )
+                break;    // Argument is invalid; break to print usage
+        }
+
+        else if( strcmp( str, "do_not_force_fill" ) == 0 )
+        {
+            g_write_allocations = 0;
+        }
+
+        else if( strcmp( str, "do_not_execute" ) == 0 )
+        {
+            g_execute_kernel = 0;
+        }
+
+    }
+
+    if( randomize )
+    {
+        gRandomSeed = (cl_uint) time( NULL );
+        log_info( "Random seed: %u.\n", gRandomSeed );
+        gReSeed = 1;
+    }
+
+    if( g_tests_to_run == 0 )
+    {
+        // Allocation type was never specified, or one of the arguments was invalid. Print usage and bail
+        printUsage( argv[ 0 ] );
+        return -1;
+    }
+
+    // All ready to go, so set up an environment
+    error = init_cl();
+    if (error) {
+        test_finish();
+        return -1;
+    }
+
+    if( printDeviceHeader( g_device_id ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+
+    error = clGetDeviceInfo(g_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
+    if ( error ) {
+        print_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
+        test_finish();
+        return -1;
+    }
+    error = clGetDeviceInfo(g_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
+    if ( error ) {
+        print_error( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+        test_finish();
+        return -1;
+    }
+
+    log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
+             llu( max_individual_allocation_size ), toMB(max_individual_allocation_size),
+             llu( global_mem_size ), toMB(global_mem_size));
+
+    if (global_mem_size > (cl_ulong)SIZE_MAX) {
+      global_mem_size = (cl_ulong)SIZE_MAX;
+    }
+
+    if( max_individual_allocation_size > global_mem_size )
+    {
+        log_error( "FAILURE:  CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n", llu( max_individual_allocation_size ), llu( global_mem_size ) );
+        test_finish();
+        return -1;
+    }
+
+    // We may need to back off the global_mem_size on unified memory devices to leave room for application and operating system code
+    // and associated data in the working set, so we dont start pathologically paging.
+    // Check to see if we are a unified memory device
+    cl_bool hasUnifiedMemory = CL_FALSE;
+    if( ( error = clGetDeviceInfo( g_device_id, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( hasUnifiedMemory ), &hasUnifiedMemory, NULL )))
+    {
+        print_error( error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
+        test_finish();
+        return -1;
+    }
+    // we share unified memory so back off to 1/2 the global memory size.
+    if( CL_TRUE == hasUnifiedMemory )
+    {
+        global_mem_size -= global_mem_size /2;
+        log_info( "Device shares memory with the host, so backing off the maximum combined allocation size to be %gMB to avoid rampant paging.\n", toMB( global_mem_size ) );
+    }
+    else
+    {
+        // Lets just use 60% of total available memory as framework/driver may not allow using all of it
+        // e.g. vram on GPU is used by window server and even for this test, we need some space for context,
+        // queue, kernel code on GPU.
+        global_mem_size *= 0.60;
+    }
+
+    // Pick the baseline size based on whether we are doing a single large or multiple allocations
+    if (!g_multiple_allocations) {
+        max_size = (size_t)max_individual_allocation_size;
+    } else {
+        max_size = (size_t)global_mem_size;
+    }
+
+
+    // Adjust based on the percentage
+    if (g_reduction_percentage != 100) {
+        log_info("NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage);
+        max_size = (size_t)((double)max_size * (double)g_reduction_percentage/100.0);
+    }
+
+    // Round to nearest MB.
+    max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
+
+    log_info("** Target allocation size (rounded to nearest MB) is: %lu bytes (%gMB).\n", max_size, toMB(max_size));
+
+    // Run all the requested tests
+    RandomSeed seed( gRandomSeed );
+    for (test=0; test<6; test++) {
+        if (test == 0) test_to_run = BUFFER;
+        if (test == 1) test_to_run = IMAGE_READ;
+        if (test == 2) test_to_run = IMAGE_WRITE;
+        if (test == 3) test_to_run = BUFFER_NON_BLOCKING;
+        if (test == 4) test_to_run = IMAGE_READ_NON_BLOCKING;
+        if (test == 5) test_to_run = IMAGE_WRITE_NON_BLOCKING;
+        if (!(g_tests_to_run & test_to_run))
+            continue;
+
+        // Skip image tests if we don't support images on the device
+        if (test > 0 && checkForImageSupport(g_device_id)) {
+            log_info("Can not test image allocation because device does not support images.\n");
+            continue;
+        }
+
+        // This section was added in order to fix a bug in the test
+        // If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT
+        // The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image
+        if ( (test_to_run != BUFFER) && (test_to_run != BUFFER_NON_BLOCKING) ) {
+          size_t max_width, max_height;
+          cl_ulong max_image2d_size;
+          error = clGetDeviceInfo(g_device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
+          test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH");
+          error = clGetDeviceInfo(g_device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
+          test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT");
+          max_image2d_size = (cl_ulong)max_height*max_width*4*sizeof(cl_uint);
+
+          if (max_individual_allocation_size > max_image2d_size)
+          {
+            max_individual_allocation_size = max_image2d_size;
+          }
+        }
+
+        // Pick the baseline size based on whether we are doing a single large or multiple allocations
+    if (!g_multiple_allocations) {
+      max_size = (size_t)max_individual_allocation_size;
+    } else {
+      max_size = (size_t)global_mem_size;
+    }
+
+        // Adjust based on the percentage
+        if (g_reduction_percentage != 100) {
+            log_info("NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage);
+            max_size = (size_t)((double)max_size * (double)g_reduction_percentage/100.0);
+        }
+
+        // Round to nearest MB.
+        max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
+
+        log_info("** Target allocation size (rounded to nearest MB) is: %llu bytes (%gMB).\n", llu( max_size ), toMB(max_size));
+
+        if (test_to_run == BUFFER || test_to_run == BUFFER_NON_BLOCKING) log_info("** Allocating buffer(s) to size %gMB.\n", toMB(max_size));
+        else if (test_to_run == IMAGE_READ || test_to_run == IMAGE_READ_NON_BLOCKING) log_info("** Allocating read-only image(s) to size %gMB.\n", toMB(max_size));
+        else if (test_to_run == IMAGE_WRITE || test_to_run == IMAGE_WRITE_NON_BLOCKING) log_info("** Allocating write-only image(s) to size %gMB.\n", toMB(max_size));
+        else {log_error("Test logic error.\n"); return -1;}
+
+        // Run the test the requested number of times
+        for (count = 0; count < g_repetition_count; count++) {
+            current_test_size = max_size;
+            error = FAILED_TOO_BIG;
+            log_info("  => Allocation %d\n", count+1);
+
+            while (error == FAILED_TOO_BIG && current_test_size > max_size/8) {
+                // Reset our checksum for each allocation
+                checksum = 0;
+
+                // Do the allocation
+                error = allocate_size(g_context, &g_queue, g_device_id, g_multiple_allocations, current_test_size, test_to_run, mems, &number_of_mems_used, &final_size, g_write_allocations, seed);
+
+                // If we succeeded and we're supposed to execute a kernel, do so.
+                if (error == SUCCEEDED && g_execute_kernel) {
+                    log_info("\tExecuting kernel with memory objects.\n");
+                    error = execute_kernel(g_context, &g_queue, g_device_id, test_to_run, mems, number_of_mems_used, g_write_allocations);
+                }
+
+                // If we failed to allocate more than 1/8th of the requested amount return a failure.
+                if (final_size < (size_t)max_size/8) {
+                    //          log_error("===> Allocation %d failed to allocate more than 1/8th of the requested size.\n", count+1);
+                    failure_counts++;
+                }
+                // Clean up.
+                for (int i=0; i<number_of_mems_used; i++)
+                    clReleaseMemObject(mems[i]);
+
+                if (error == FAILED_ABORT) {
+                    log_error("  => Allocation %d failed.\n", count+1);
+                    failure_counts++;
+                }
+
+                if (error == FAILED_TOO_BIG) {
+                    current_test_size -= max_size/16;
+                    // log_info("\tFailed at this size; trying a smaller size of %gMB.\n", toMB(current_test_size));
+                }
+            }
+            if (error == SUCCEEDED && current_test_size == max_size)
+                log_info("\tPASS: Allocation succeeded.\n");
+            else if (error == SUCCEEDED && current_test_size > max_size/8)
+                log_info("\tPASS: Allocation succeeded at reduced size.\n");
+            else {
+                log_error("\tFAIL: Allocation failed.\n");
+                failure_counts++;
+            }
+        }
+    }
+
+    if (failure_counts)
+        log_error("FAILED allocations test.\n");
+    else
+        log_info("PASSED allocations test.\n");
+
+    test_finish();
+    return failure_counts;
+}
+
+
diff --git a/test_conformance/allocations/testBase.h b/test_conformance/allocations/testBase.h
new file mode 100644
index 00000000..eadcd188
--- /dev/null
+++ b/test_conformance/allocations/testBase.h
@@ -0,0 +1,62 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+
+#define MAX_NUMBER_TO_ALLOCATE 100
+
+#define FAILED_CORRUPTED_QUEUE -2
+#define FAILED_ABORT -1
+#define FAILED_TOO_BIG 1
+// On Windows macro `SUCCEEDED' is defined in `WinError.h'. It causes compiler warnings. Let us avoid them.
+#if defined( _WIN32 ) && defined( SUCCEEDED )
+    #undef SUCCEEDED
+#endif
+#define SUCCEEDED 0
+
+#define BUFFER 1
+#define IMAGE_READ 2
+#define IMAGE_WRITE 4
+#define BUFFER_NON_BLOCKING 8
+#define IMAGE_READ_NON_BLOCKING 16
+#define IMAGE_WRITE_NON_BLOCKING 32
+
+#define test_error_abort(errCode,msg)    test_error_ret_abort(errCode,msg,errCode)
+#define test_error_ret_abort(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return FAILED_ABORT ; } }
+
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
new file mode 100644
index 00000000..bf3a126b
--- /dev/null
+++ b/test_conformance/api/CMakeLists.txt
@@ -0,0 +1,39 @@
+set(MODULE_NAME API)
+
+set(${MODULE_NAME}_SOURCES
+         main.c
+         test_bool.c
+         test_retain.cpp
+         test_retain_program.c
+         test_queries.cpp
+         test_create_kernels.c
+         test_kernels.c
+         test_api_min_max.c
+         test_kernel_arg_changes.cpp
+         test_kernel_arg_multi_setup.cpp
+         test_binary.cpp
+         test_native_kernel.cpp
+         test_mem_objects.cpp
+         test_create_context_from_type.cpp
+         test_device_min_data_type_align_size_alignment.cpp
+         test_platform.cpp
+         test_kernel_arg_info.c
+         test_null_buffer_arg.c
+         test_mem_object_info.cpp
+         test_queue_hint.cpp
+         test_sub_group_dispatch.cpp
+         test_clone_kernel.cpp
+         test_zero_sized_enqueue.cpp
+         ../../test_common/harness/errorHelpers.c
+         ../../test_common/harness/threadTesting.c
+         ../../test_common/harness/testHarness.c
+         ../../test_common/harness/kernelHelpers.c
+         ../../test_common/harness/typeWrappers.cpp
+         ../../test_common/harness/conversions.c
+         ../../test_common/harness/mt19937.c
+         ../../test_common/harness/msvc9.c
+         ../../test_common/harness/imageHelpers.cpp
+         ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/api/Jamfile b/test_conformance/api/Jamfile
new file mode 100644
index 00000000..704b5ce1
--- /dev/null
+++ b/test_conformance/api/Jamfile
@@ -0,0 +1,27 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+
+exe test_api
+    : main.c
+      test_api_min_max.c
+      test_binary.cpp
+      test_create_kernels.c
+      test_create_context_from_type.cpp
+      test_kernel_arg_changes.cpp
+      test_kernel_arg_multi_setup.cpp
+      test_kernels.c
+      test_native_kernel.cpp
+      test_queries.cpp
+      test_retain_program.c
+      test_platform.cpp 
+    ;
+
+install dist
+    : test_api #test.lst
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/api
+    ;
diff --git a/test_conformance/api/Makefile b/test_conformance/api/Makefile
new file mode 100644
index 00000000..44cfaee3
--- /dev/null
+++ b/test_conformance/api/Makefile
@@ -0,0 +1,61 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+			test_retain_program.c \
+			test_queries.cpp \
+			test_create_kernels.c \
+			test_kernels.c \
+            test_kernel_arg_info.c \
+			test_api_min_max.c \
+			test_kernel_arg_changes.cpp \
+			test_kernel_arg_multi_setup.cpp \
+			test_binary.cpp \
+			test_native_kernel.cpp \
+			test_create_context_from_type.cpp \
+			test_platform.cpp \
+			test_retain.cpp \
+			test_device_min_data_type_align_size_alignment.cpp \
+			test_mem_objects.cpp \
+            test_bool.c \
+            test_null_buffer_arg.c \
+            test_mem_object_info.cpp \
+            ../../test_common/harness/errorHelpers.c \
+			../../test_common/harness/threadTesting.c \
+			../../test_common/harness/testHarness.c \
+			../../test_common/harness/imageHelpers.cpp \
+			../../test_common/harness/kernelHelpers.c \
+			../../test_common/harness/typeWrappers.cpp \
+			../../test_common/harness/mt19937.c \
+			../../test_common/harness/conversions.c
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_api
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/api/main.c b/test_conformance/api/main.c
new file mode 100644
index 00000000..7f014382
--- /dev/null
+++ b/test_conformance/api/main.c
@@ -0,0 +1,223 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables (<rdar://problem/11111245>):
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = false;
+
+basefn    basefn_list[] = {
+    test_get_platform_info,
+    test_get_sampler_info,
+    test_get_command_queue_info,
+    test_get_context_info,
+    test_get_device_info,
+    test_enqueue_task,
+    test_binary_get,
+    test_program_binary_create,
+    test_kernel_required_group_size,
+
+    test_release_kernel_order,
+    test_release_during_execute,
+
+    test_load_single_kernel,
+    test_load_two_kernels,
+    test_load_two_kernels_in_one,
+    test_load_two_kernels_manually,
+    test_get_program_info_kernel_names,
+    test_get_kernel_arg_info,
+    test_create_kernels_in_program,
+    test_get_kernel_info,
+    test_execute_kernel_local_sizes,
+    test_set_kernel_arg_by_index,
+    test_set_kernel_arg_constant,
+    test_set_kernel_arg_struct_array,
+    test_kernel_global_constant,
+
+    test_min_max_thread_dimensions,
+    test_min_max_work_items_sizes,
+    test_min_max_work_group_size,
+    test_min_max_read_image_args,
+    test_min_max_write_image_args,
+    test_min_max_mem_alloc_size,
+    test_min_max_image_2d_width,
+    test_min_max_image_2d_height,
+    test_min_max_image_3d_width,
+    test_min_max_image_3d_height,
+    test_min_max_image_3d_depth,
+    test_min_max_image_array_size,
+    test_min_max_image_buffer_size,
+    test_min_max_parameter_size,
+    test_min_max_samplers,
+    test_min_max_constant_buffer_size,
+    test_min_max_constant_args,
+    test_min_max_compute_units,
+    test_min_max_address_bits,
+    test_min_max_single_fp_config,
+    test_min_max_double_fp_config,
+    test_min_max_local_mem_size,
+    test_min_max_kernel_preferred_work_group_size_multiple,
+    test_min_max_execution_capabilities,
+    test_min_max_queue_properties,
+    test_min_max_device_version,
+    test_min_max_language_version,
+
+    test_kernel_arg_changes,
+    test_kernel_arg_multi_setup_random,
+
+    test_native_kernel,
+
+    test_create_context_from_type,
+
+    test_platform_extensions,
+    test_get_platform_ids,
+    test_for_bool_type,
+
+    test_repeated_setup_cleanup,
+
+    test_retain_queue_single,
+    test_retain_queue_multiple,
+    test_retain_mem_object_single,
+    test_retain_mem_object_multiple,
+    test_min_data_type_align_size_alignment,
+
+    test_mem_object_destructor_callback,
+    test_null_buffer_arg,
+    test_get_buffer_info,
+    test_get_image2d_info,
+    test_get_image3d_info,
+    test_get_image1d_info,
+    test_get_image1d_array_info,
+    test_get_image2d_array_info,
+    test_queue_hint,
+    test_sub_group_dispatch,
+    test_clone_kernel,
+    test_zero_sized_enqueue
+};
+
+
+const char    *basefn_names[] = {
+    "get_platform_info",
+    "get_sampler_info",
+    "get_command_queue_info",
+    "get_context_info",
+    "get_device_info",
+    "enqueue_task",
+    "binary_get",
+    "binary_create",
+    "kernel_required_group_size",
+
+    "release_kernel_order",
+    "release_during_execute",
+
+    "load_single_kernel",
+    "load_two_kernels",
+    "load_two_kernels_in_one",
+    "load_two_kernels_manually",
+    "get_program_info_kernel_names",
+    "get_kernel_arg_info",
+    "create_kernels_in_program",
+    "get_kernel_info",
+    "execute_kernel_local_sizes",
+    "set_kernel_arg_by_index",
+    "set_kernel_arg_constant",
+    "set_kernel_arg_struct_array",
+    "kernel_global_constant",
+
+    "min_max_thread_dimensions",
+    "min_max_work_items_sizes",
+    "min_max_work_group_size",
+    "min_max_read_image_args",
+    "min_max_write_image_args",
+    "min_max_mem_alloc_size",
+    "min_max_image_2d_width",
+    "min_max_image_2d_height",
+    "min_max_image_3d_width",
+    "min_max_image_3d_height",
+    "min_max_image_3d_depth",
+    "min_max_image_array_size",
+    "min_max_image_buffer_size",
+    "min_max_parameter_size",
+    "min_max_samplers",
+    "min_max_constant_buffer_size",
+    "min_max_constant_args",
+    "min_max_compute_units",
+    "min_max_address_bits",
+    "min_max_single_fp_config",
+    "min_max_double_fp_config",
+    "min_max_local_mem_size",
+    "min_max_kernel_preferred_work_group_size_multiple",
+    "min_max_execution_capabilities",
+    "min_max_queue_properties",
+    "min_max_device_version",
+    "min_max_language_version",
+
+    "kernel_arg_changes",
+    "kernel_arg_multi_setup_random",
+
+    "native_kernel",
+
+    "create_context_from_type",
+    "platform_extensions",
+
+    "get_platform_ids",
+    "bool_type",
+
+    "repeated_setup_cleanup",
+
+    "retain_queue_single",
+    "retain_queue_multiple",
+    "retain_mem_object_single",
+    "retain_mem_object_multiple",
+
+    "min_data_type_align_size_alignment",
+
+    "mem_object_destructor_callback",
+    "null_buffer_arg",
+    "get_buffer_info",
+    "get_image2d_info",
+    "get_image3d_info",
+    "get_image1d_info",
+    "get_image1d_array_info",
+    "get_image2d_array_info",
+    "queue_hint",
+    "sub_group_dispatch",
+    "clone_kernel",
+    "zero_sized_enqueue",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/api/procs.h b/test_conformance/api/procs.h
new file mode 100644
index 00000000..89713c5c
--- /dev/null
+++ b/test_conformance/api/procs.h
@@ -0,0 +1,111 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/clImageHelper.h"
+#include "../../test_common/harness/imageHelpers.h"
+extern float    calculate_ulperror(float a, float b);
+
+extern int        test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int      test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int        test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_queue_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_zero_sized_enqueue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/api/testBase.h b/test_conformance/api/testBase.h
new file mode 100644
index 00000000..8d58eeb6
--- /dev/null
+++ b/test_conformance/api/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/api/test_api_min_max.c b/test_conformance/api/test_api_min_max.c
new file mode 100644
index 00000000..57b0ab64
--- /dev/null
+++ b/test_conformance/api/test_api_min_max.c
@@ -0,0 +1,2115 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+#include <ctype.h>
+#include <string.h>
+
+extern cl_uint gRandomSeed;
+
+const char *sample_single_param_kernel[] = {
+    "__kernel void sample_test(__global int *src)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "}\n" };
+
+const char *sample_single_param_write_kernel[] = {
+    "__kernel void sample_test(__global int *src)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     src[tid] = tid;\n"
+    "\n"
+    "}\n" };
+
+const char *sample_read_image_kernel_pattern[] = {
+    "__kernel void sample_test( __global float *result, ",  " )\n"
+    "{\n"
+    "  sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    result[0] = 0.0f;\n",
+    "\n"
+    "}\n" };
+
+const char *sample_write_image_kernel_pattern[] = {
+    "__kernel void sample_test( ",  " )\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n",
+    "\n"
+    "}\n" };
+
+
+const char *sample_large_parmam_kernel_pattern[] = {
+    "__kernel void sample_test(%s, __global long *result)\n"
+    "{\n"
+    "result[0] = 0;\n"
+    "%s"
+    "\n"
+    "}\n" };
+
+const char *sample_large_int_parmam_kernel_pattern[] = {
+    "__kernel void sample_test(%s, __global int *result)\n"
+    "{\n"
+    "result[0] = 0;\n"
+    "%s"
+    "\n"
+    "}\n" };
+
+const char *sample_sampler_kernel_pattern[] = {
+    "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n",
+    "     dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
+    "\n"
+    "}\n" };
+
+const char *sample_const_arg_kernel[] = {
+    "__kernel void sample_test(__constant int *src1, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src1[tid];\n"
+    "\n"
+    "}\n" };
+
+const char *sample_local_arg_kernel[] = {
+    "__kernel void sample_test(__local int *src1, __global int *global_src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    src1[tid] = global_src[tid];\n"
+    "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
+    "    dst[tid] = src1[tid];\n"
+    "\n"
+    "}\n" };
+
+const char *sample_const_max_arg_kernel_pattern =
+"__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid];\n"
+"%s"
+"\n"
+"}\n";
+
+int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error, retVal;
+    unsigned int maxThreadDim, threadDim, i;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[1];
+    size_t *threads, *localThreads;
+    cl_event event;
+    cl_int event_status;
+
+
+    /* Get the max thread dimensions */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxThreadDim ), &maxThreadDim, NULL );
+    test_error( error, "Unable to get max work item dimensions from device" );
+
+    if( maxThreadDim < 3 )
+    {
+        log_error( "ERROR: Reported max work item dimensions is less than required! (%d)\n", maxThreadDim );
+        return -1;
+    }
+
+    log_info("Reported max thread dimensions of %d.\n", maxThreadDim);
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_param_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error );
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating test array failed!\n");
+        return -1;
+    }
+
+    /* Set the arguments */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    retVal = 0;
+
+    /* Now try running the kernel with up to that many threads */
+    for (threadDim=1; threadDim <= maxThreadDim; threadDim++)
+    {
+        threads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
+        localThreads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
+        for( i = 0; i < maxThreadDim; i++ )
+        {
+            threads[ i ] = 1;
+            localThreads[i] = 1;
+        }
+
+        error = clEnqueueNDRangeKernel( queue, kernel, maxThreadDim, NULL, threads, localThreads, 0, NULL, &event );
+        test_error( error, "Failed clEnqueueNDRangeKernel");
+
+        // Verify that the event does not return an error from the execution
+        error = clWaitForEvents(1, &event);
+        test_error( error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        clReleaseEvent(event);
+        if (event_status < 0)
+            test_error(error, "Kernel execution event returned error");
+
+        /* All done */
+        free( threads );
+        free( localThreads );
+    }
+
+    return retVal;
+}
+
+
+int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t *deviceMaxWorkItemSize;
+    unsigned int maxWorkItemDim;
+
+    /* Get the max work item dimensions */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxWorkItemDim ), &maxWorkItemDim, NULL );
+    test_error( error, "Unable to get max work item dimensions from device" );
+
+    log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", maxWorkItemDim);
+    deviceMaxWorkItemSize = (size_t*)malloc(sizeof(size_t)*maxWorkItemDim);
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxWorkItemDim, deviceMaxWorkItemSize, NULL );
+    test_error( error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed" );
+
+    unsigned int i;
+    int errors = 0;
+    for(i=0; i<maxWorkItemDim; i++) {
+        if (deviceMaxWorkItemSize[i]<1) {
+            log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i, deviceMaxWorkItemSize[i]);
+            errors++;
+        } else {
+            log_info("Dimension %d has max work item size %lu\n", i, deviceMaxWorkItemSize[i]);
+        }
+    }
+
+    free(deviceMaxWorkItemSize);
+
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t deviceMaxThreadSize;
+
+    /* Get the max thread dimensions */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( deviceMaxThreadSize ), &deviceMaxThreadSize, NULL );
+    test_error( error, "Unable to get max work group size from device" );
+
+    log_info("Reported %ld max device work group size.\n", deviceMaxThreadSize);
+
+    if( deviceMaxThreadSize == 0 )
+    {
+        log_error( "ERROR: Max work group size is reported as zero!\n" );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    unsigned int maxReadImages, i;
+    unsigned int deviceAddressSize;
+    clProgramWrapper program;
+    char readArgLine[128], *programSrc;
+    const char *readArgPattern = ", read_only image2d_t srcimg%d";
+    clKernelWrapper kernel;
+    clMemWrapper    *streams, result;
+    size_t threads[2];
+    cl_image_format    image_format_desc;
+    size_t maxParameterSize;
+    cl_event event;
+    cl_int event_status;
+    cl_float image_data[4*4];
+    float image_result = 0.0f;
+    float actual_image_result;
+    cl_uint minRequiredReadImages = gIsEmbedded ? 8 : 128;
+    cl_device_type deviceType;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    image_format_desc.image_channel_order = CL_RGBA;
+    image_format_desc.image_channel_data_type = CL_FLOAT;
+
+    /* Get the max read image arg count */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof( maxReadImages ), &maxReadImages, NULL );
+    test_error( error, "Unable to get max read image arg count from device" );
+
+    if( maxReadImages < minRequiredReadImages )
+    {
+        log_error( "ERROR: Reported max read image arg count is less than required! (%d)\n", maxReadImages );
+        return -1;
+    }
+
+    log_info("Reported %d max read image args.\n", maxReadImages);
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( deviceAddressSize ), &deviceAddressSize, NULL );
+    test_error( error, "Unable to query CL_DEVICE_ADDRESS_BITS for device" );
+    deviceAddressSize /= 8; // convert from bits to bytes
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    if (!gIsEmbedded && maxReadImages >= 128 && maxParameterSize == 1024)
+    {
+        error = clGetDeviceInfo( deviceID, CL_DEVICE_TYPE, sizeof( deviceType ), &deviceType, NULL );
+        test_error( error, "Unable to get device type from device" );
+
+        if(deviceType != CL_DEVICE_TYPE_CUSTOM)
+        {
+            maxReadImages = 127;
+        }
+    }
+    // Subtract the size of the result
+    maxParameterSize -= deviceAddressSize;
+
+    // Calculate the number we can use
+    if (maxParameterSize/deviceAddressSize < maxReadImages) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/deviceAddressSize));
+        maxReadImages = (unsigned int)(maxParameterSize/deviceAddressSize);
+    }
+
+    /* Create a program with that many read args */
+    programSrc = (char *)malloc( strlen( sample_read_image_kernel_pattern[ 0 ] ) + ( strlen( readArgPattern ) + 6 ) * ( maxReadImages ) +
+                                strlen( sample_read_image_kernel_pattern[ 1 ] ) + 1 + 40240);
+
+    strcpy( programSrc, sample_read_image_kernel_pattern[ 0 ] );
+    strcat( programSrc, "read_only image2d_t srcimg0" );
+    for( i = 0; i < maxReadImages-1; i++ )
+    {
+        sprintf( readArgLine, readArgPattern, i+1 );
+        strcat( programSrc, readArgLine );
+    }
+    strcat( programSrc, sample_read_image_kernel_pattern[ 1 ] );
+    for ( i = 0; i < maxReadImages; i++) {
+        sprintf( readArgLine, "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", i);
+        strcat( programSrc, readArgLine );
+    }
+    strcat( programSrc, sample_read_image_kernel_pattern[ 2 ] );
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
+    test_error( error, "Failed to create the program and kernel.");
+    free( programSrc );
+
+    result = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float), NULL, &error);
+    test_error( error, "clCreateBufer failed");
+
+    /* Create some I/O streams */
+    streams = new clMemWrapper[maxReadImages + 1];
+    for( i = 0; i < maxReadImages; i++ )
+    {
+        image_data[0]=i;
+        image_result+= image_data[0];
+        streams[i] = create_image_2d( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &image_format_desc, 4, 4, 0, image_data, &error );
+        test_error( error, "Unable to allocate test image" );
+    }
+
+    error = clSetKernelArg( kernel, 0, sizeof( result ), &result );
+    test_error( error, "Unable to set kernel arguments" );
+
+    /* Set the arguments */
+    for( i = 1; i < maxReadImages+1; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[i-1] ), &streams[i-1] );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+
+    /* Now try running the kernel */
+    threads[0] = threads[1] = 1;
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
+    test_error( error, "clEnqueueNDRangeKernel failed");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), &actual_image_result, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    delete[] streams;
+
+    if (actual_image_result != image_result) {
+        log_error("Result failed to verify. Got %g, expected %g.\n", actual_image_result, image_result);
+        return 1;
+    }
+
+    return 0;
+}
+
+int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    unsigned int maxWriteImages, i;
+    clProgramWrapper program;
+    char writeArgLine[128], *programSrc;
+    const char *writeArgPattern = ", write_only image2d_t dstimg%d";
+    clKernelWrapper kernel;
+    clMemWrapper    *streams;
+    size_t threads[2];
+    cl_image_format    image_format_desc;
+    size_t maxParameterSize;
+    cl_event event;
+    cl_int event_status;
+    cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    image_format_desc.image_channel_order = CL_RGBA;
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+
+    /* Get the max read image arg count */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof( maxWriteImages ), &maxWriteImages, NULL );
+    test_error( error, "Unable to get max write image arg count from device" );
+
+    if( maxWriteImages == 0 )
+    {
+        log_info( "WARNING: Device reports 0 for a max write image arg count (write image arguments unsupported). Skipping test (implicitly passes). This is only valid if the number of image formats is also 0.\n" );
+        return 0;
+    }
+
+    if( maxWriteImages < minRequiredWriteImages )
+    {
+        log_error( "ERROR: Reported max write image arg count is less than required! (%d)\n", maxWriteImages );
+        return -1;
+    }
+
+    log_info("Reported %d max write image args.\n", maxWriteImages);
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    // Calculate the number we can use
+    if (maxParameterSize/sizeof(cl_mem) < maxWriteImages) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
+        maxWriteImages = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+    }
+
+    /* Create a program with that many write args + 1 */
+    programSrc = (char *)malloc( strlen( sample_write_image_kernel_pattern[ 0 ] ) + ( strlen( writeArgPattern ) + 6 ) * ( maxWriteImages + 1 ) +
+                                strlen( sample_write_image_kernel_pattern[ 1 ] ) + 1 + 40240 );
+
+    strcpy( programSrc, sample_write_image_kernel_pattern[ 0 ] );
+    strcat( programSrc, "write_only image2d_t dstimg0" );
+    for( i = 1; i < maxWriteImages; i++ )
+    {
+        sprintf( writeArgLine, writeArgPattern, i );
+        strcat( programSrc, writeArgLine );
+    }
+    strcat( programSrc, sample_write_image_kernel_pattern[ 1 ] );
+    for ( i = 0; i < maxWriteImages; i++) {
+        sprintf( writeArgLine, "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", i);
+        strcat( programSrc, writeArgLine );
+    }
+    strcat( programSrc, sample_write_image_kernel_pattern[ 2 ] );
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
+    test_error( error, "Failed to create the program and kernel.");
+    free( programSrc );
+
+
+    /* Create some I/O streams */
+    streams = new clMemWrapper[maxWriteImages + 1];
+    for( i = 0; i < maxWriteImages; i++ )
+    {
+        streams[i] = create_image_2d( context, CL_MEM_READ_WRITE, &image_format_desc, 16, 16, 0, NULL, &error );
+        test_error( error, "Unable to allocate test image" );
+    }
+
+    /* Set the arguments */
+    for( i = 0; i < maxWriteImages; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+
+    /* Now try running the kernel */
+    threads[0] = threads[1] = 16;
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
+    test_error( error, "clEnqueueNDRangeKernel failed.");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    /* All done */
+    delete[] streams;
+    return 0;
+}
+
+int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_ulong maxAllocSize, memSize, minSizeToTry;
+    clMemWrapper memHdl;
+
+    cl_ulong requiredAllocSize;
+
+    if (gIsEmbedded)
+        requiredAllocSize = 1 * 1024 * 1024;
+    else
+        requiredAllocSize = 128 * 1024 * 1024;
+
+    /* Get the max mem alloc size */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get max mem alloc size from device" );
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get global memory size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( maxAllocSize < requiredAllocSize)
+    {
+        log_error( "ERROR: Reported max allocation size is less than required %lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", (requiredAllocSize / 1024) / 1024, maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 );
+        return -1;
+    }
+
+    requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) ? 1024 * 1024 * 1024 : memSize / 4;
+
+    if (gIsEmbedded)
+        requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : requiredAllocSize;
+    else
+    requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) ? 128 * 1024 * 1024 : requiredAllocSize;
+
+    if( maxAllocSize < requiredAllocSize )
+    {
+        log_error( "ERROR: Reported max allocation size is less than required of total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", maxAllocSize, (maxAllocSize / 1024)/1024, (requiredAllocSize / 1024)/1024 );
+        return -1;
+    }
+
+    log_info("Reported max allocation size of %lld bytes (%gMB) and global mem size of %lld bytes (%gMB).\n",
+             maxAllocSize, maxAllocSize/(1024.0*1024.0), requiredAllocSize, requiredAllocSize/(1024.0*1024.0));
+
+    if ( memSize < maxAllocSize ) {
+        log_info("Global memory size is less than max allocation size, using that.\n");
+        maxAllocSize = memSize;
+    }
+
+    minSizeToTry = maxAllocSize/16;
+    while (maxAllocSize > (maxAllocSize/4)) {
+
+        log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+        memHdl = clCreateBuffer( context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, NULL, &error );
+        if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) {
+            log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+            maxAllocSize -= minSizeToTry;
+            continue;
+        }
+        test_error( error, "clCreateBuffer failed for maximum sized buffer.");
+        return 0;
+    }
+    log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+    return -1;
+}
+
+int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format image_format_desc;
+    cl_ulong maxAllocSize;
+    cl_uint minRequiredDimension;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 2.1", strlen( "OpenCL 2.1" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 4096;
+    else
+    {
+        log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 2d width from device" );
+
+    if( maxDimension < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image 2d width is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported width is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size %d x 1 = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 2D creation failed for maximum width" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format image_format_desc;
+    cl_ulong maxAllocSize;
+    cl_uint minRequiredDimension;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 2.1", strlen( "OpenCL 2.1" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) == 0 )
+       minRequiredDimension = gIsEmbedded ? 2048 : 4096;
+    else
+    {
+        log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 2d height from device" );
+
+    if( maxDimension < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image 2d height is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported height is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 2D creation failed for maximum height" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 3d width from device" );
+
+    if( maxDimension < 2048 )
+    {
+        log_error( "ERROR: Reported max image 3d width is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported width is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*2*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 2, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 3D creation failed for maximum width" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 3d height from device" );
+
+    if( maxDimension < 2048 )
+    {
+        log_error( "ERROR: Reported max image 3d height is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported height is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*2*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 2, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 3D creation failed for maximum height" );
+        return -1;
+    }
+
+    return 0;
+}
+
+
+int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 3d depth from device" );
+
+    if( maxDimension < 2048 )
+    {
+        log_error( "ERROR: Reported max image 3d depth is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported depth is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 3D creation failed for maximum depth" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+    size_t minRequiredDimension = gIsEmbedded ? 256 : 2048;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max image array width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image array size from device" );
+
+    if( maxDimension < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image array size is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported image array size is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D_ARRAY, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_2d_array( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "2D Image Array creation failed for maximum array size" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimensionPixels;
+    clMemWrapper streams[2];
+    cl_image_format image_format_desc = {0};
+    cl_ulong maxAllocSize;
+    size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536;
+    unsigned int i = 0;
+    size_t pixelBytes = 0;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+
+    /* Get the max memory allocation size */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+
+    /* Get the max image array width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxDimensionPixels ), &maxDimensionPixels, NULL );
+    test_error( error, "Unable to get max image buffer size from device" );
+
+    if( maxDimensionPixels < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image buffer size is less than required! (%d)\n", (int)maxDimensionPixels );
+        return -1;
+    }
+    log_info("Max reported image buffer size is %ld pixels.\n", maxDimensionPixels);
+
+    pixelBytes = maxAllocSize / maxDimensionPixels;
+    if ( pixelBytes == 0 )
+    {
+        log_error( "Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image of maximum size!\n" );
+        return -1;
+    }
+
+    error = -1;
+    for ( i = pixelBytes; i > 0; --i )
+    {
+        error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE1D, CL_MEM_READ_ONLY, i, &image_format_desc );
+        if ( error == CL_SUCCESS )
+        {
+            pixelBytes = i;
+            break;
+        }
+    }
+    test_error( error, "Device does not support format to be used to allocate image of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n" );
+
+    log_info("Attempting to create an 1D image with channel order %s from buffer of size %d = %gMB.\n",
+        GetChannelOrderName( image_format_desc.image_channel_order ), (int)maxDimensionPixels, ((float)maxDimensionPixels*pixelBytes/1024.0/1024.0));
+
+    /* Try to allocate a buffer */
+    streams[0] = clCreateBuffer( context, CL_MEM_READ_ONLY, maxDimensionPixels*pixelBytes, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Buffer creation failed for maximum image buffer size" );
+        return -1;
+    }
+
+    /* Try to allocate a 1D image array from buffer */
+    streams[1] = create_image_1d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimensionPixels, 0, NULL, streams[0], &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "1D Image from buffer creation failed for maximum image buffer size" );
+        return -1;
+    }
+
+    return 0;
+}
+
+
+
+int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error, retVal, i;
+    size_t maxSize;
+    char *programSrc;
+    char *ptr;
+    size_t numberExpected;
+    long numberOfIntParametersToTry;
+    char *argumentLine, *codeLines;
+    void *data;
+    cl_long long_result, expectedResult;
+    cl_int int_result;
+    size_t decrement;
+    cl_event event;
+    cl_int event_status;
+
+
+    /* Get the max param size */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxSize ), &maxSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+
+    if( ((!gIsEmbedded) && (maxSize < 1024)) || ((gIsEmbedded) && (maxSize < 256)) )
+    {
+        log_error( "ERROR: Reported max parameter size is less than required! (%d)\n", (int)maxSize );
+        return -1;
+    }
+
+    /* The embedded profile does not require longs, so use ints */
+    if(gIsEmbedded)
+        numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int);
+    else
+        numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long);
+
+    decrement = (size_t)(numberOfIntParametersToTry/8);
+    if (decrement < 1)
+        decrement = 1;
+    log_info("Reported max parameter size of %d bytes.\n", (int)maxSize);
+
+    while (numberOfIntParametersToTry > 0) {
+        // These need to be inside to be deallocated automatically on each loop iteration.
+        clProgramWrapper program;
+        clMemWrapper mem;
+        clKernelWrapper kernel;
+
+        if(gIsEmbedded)
+        {
+            log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
+                     numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem),
+                     sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+        }
+        else
+        {
+            log_info("Trying a kernel with %ld long arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
+                     numberOfIntParametersToTry, sizeof(cl_long)*numberOfIntParametersToTry, sizeof(cl_mem),
+                     sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+        }
+
+        // Allocate memory for the program storage
+        data = malloc(sizeof(cl_long)*numberOfIntParametersToTry);
+
+        argumentLine = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
+        codeLines = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
+        programSrc = (char*)malloc(sizeof(char)*(numberOfIntParametersToTry*64+1024));
+        argumentLine[0] = '\0';
+        codeLines[0] = '\0';
+        programSrc[0] = '\0';
+
+        // Generate our results
+        expectedResult = 0;
+        for (i=0; i<(int)numberOfIntParametersToTry; i++)
+            {
+            if( gHasLong )
+            {
+                ((cl_long *)data)[i] = i;
+                expectedResult += i;
+            }
+            else
+            {
+                ((cl_int *)data)[i] = i;
+                expectedResult += i;
+            }
+        }
+
+        // Build the program
+        if( gHasLong)
+            sprintf(argumentLine, "%s", "long arg0");
+        else
+            sprintf(argumentLine, "%s", "int arg0");
+
+        sprintf(codeLines, "%s", "result[0] += arg0;");
+        for (i=1; i<(int)numberOfIntParametersToTry; i++)
+        {
+            if( gHasLong)
+                sprintf(argumentLine + strlen( argumentLine), ", long arg%d", i);
+            else
+                sprintf(argumentLine + strlen( argumentLine), ", int arg%d", i);
+
+            sprintf(codeLines + strlen( codeLines), "\nresult[0] += arg%d;", i);
+        }
+
+        /* Create a kernel to test with */
+        sprintf( programSrc, gHasLong ?  sample_large_parmam_kernel_pattern[0]:
+                                        sample_large_int_parmam_kernel_pattern[0], argumentLine, codeLines);
+
+        ptr = programSrc;
+        if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" ) != 0 )
+        {
+            log_info("Create program failed, decrementing number of parameters to try.\n");
+            numberOfIntParametersToTry -= decrement;
+            continue;
+        }
+
+        /* Try to set a large argument to the kernel */
+        retVal = 0;
+
+        mem = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_long), NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        for (i=0; i<(int)numberOfIntParametersToTry; i++) {
+            if(gHasLong)
+                error = clSetKernelArg(kernel, i, sizeof(cl_long), &(((cl_long*)data)[i]));
+            else
+                error = clSetKernelArg(kernel, i, sizeof(cl_int), &(((cl_int*)data)[i]));
+
+            if (error != CL_SUCCESS) {
+                log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+                numberOfIntParametersToTry -= decrement;
+                break;
+            }
+        }
+        if (error != CL_SUCCESS)
+            continue;
+
+
+        error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem);
+        if (error != CL_SUCCESS) {
+            log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+            numberOfIntParametersToTry -= decrement;
+            continue;
+        }
+
+        size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
+        if (error != CL_SUCCESS) {
+            log_info( "clEnqueueNDRangeKernel failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+            numberOfIntParametersToTry -= decrement;
+            continue;
+        }
+
+        // Verify that the event does not return an error from the execution
+        error = clWaitForEvents(1, &event);
+        test_error( error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        clReleaseEvent(event);
+        if (event_status < 0)
+            test_error(error, "Kernel execution event returned error");
+
+        if(gHasLong)
+            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), &long_result, 0, NULL, NULL);
+        else
+            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), &int_result, 0, NULL, NULL);
+
+        test_error(error, "clEnqueueReadBuffer failed")
+
+        free(data);
+        free(argumentLine);
+        free(codeLines);
+        free(programSrc);
+
+        if(gHasLong)
+        {
+            if (long_result != expectedResult) {
+                log_error("Expected result (%lld) does not equal actual result (%lld).\n", expectedResult, long_result);
+                numberOfIntParametersToTry -= decrement;
+                continue;
+            } else {
+                log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+                break;
+            }
+        }
+        else
+        {
+            if (int_result != expectedResult) {
+                log_error("Expected result (%lld) does not equal actual result (%d).\n", expectedResult, int_result);
+                numberOfIntParametersToTry -= decrement;
+                continue;
+            } else {
+                log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+                break;
+            }
+        }
+    }
+
+    if (numberOfIntParametersToTry == (long)numberExpected)
+        return 0;
+    return -1;
+}
+
+int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_uint maxSamplers, i;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    char *programSrc, samplerLine[1024];
+    size_t maxParameterSize;
+    cl_event event;
+    cl_int event_status;
+    cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    /* Get the max value */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_SAMPLERS, sizeof( maxSamplers ), &maxSamplers, NULL );
+    test_error( error, "Unable to get max sampler count from device" );
+
+    if( maxSamplers < minRequiredSamplers )
+    {
+        log_error( "ERROR: Reported max sampler count is less than required! (%d)\n", (int)maxSamplers );
+        return -1;
+    }
+
+    log_info("Reported max %d samplers.\n", maxSamplers);
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    // Subtract the size of the result
+    maxParameterSize -= 2*sizeof(cl_mem);
+
+    // Calculate the number we can use
+    if (maxParameterSize/sizeof(cl_sampler) < maxSamplers) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max sampler arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_sampler)));
+        maxSamplers = (unsigned int)(maxParameterSize/sizeof(cl_sampler));
+    }
+
+    /* Create a kernel to test with */
+    programSrc = (char *)malloc( ( strlen( sample_sampler_kernel_pattern[ 1 ] ) + 8 ) * ( maxSamplers ) +
+                                strlen( sample_sampler_kernel_pattern[ 0 ] ) + strlen( sample_sampler_kernel_pattern[ 2 ] ) +
+                                ( strlen( sample_sampler_kernel_pattern[ 3 ] ) + 8 ) * maxSamplers +
+                                strlen( sample_sampler_kernel_pattern[ 4 ] ) );
+    strcpy( programSrc, sample_sampler_kernel_pattern[ 0 ] );
+    for( i = 0; i < maxSamplers; i++ )
+    {
+        sprintf( samplerLine, sample_sampler_kernel_pattern[ 1 ], i );
+        strcat( programSrc, samplerLine );
+    }
+    strcat( programSrc, sample_sampler_kernel_pattern[ 2 ] );
+    for( i = 0; i < maxSamplers; i++ )
+    {
+        sprintf( samplerLine, sample_sampler_kernel_pattern[ 3 ], i );
+        strcat( programSrc, samplerLine );
+    }
+    strcat( programSrc, sample_sampler_kernel_pattern[ 4 ] );
+
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
+    test_error( error, "Failed to create the program and kernel.");
+
+    // We have to set up some fake parameters so it'll work
+    clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers];
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+
+    clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error );
+    test_error( error, "Unable to create a test image" );
+
+    clMemWrapper stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), 16, NULL, &error );
+    test_error( error, "Unable to create test buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image );
+    error |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), &stream );
+    test_error( error, "Unable to set kernel arguments" );
+    for( i = 0; i < maxSamplers; i++ )
+    {
+        samplers[ i ] = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+        test_error( error, "Unable to create sampler" );
+
+        error = clSetKernelArg( kernel, 2 + i, sizeof( cl_sampler ), &samplers[ i ] );
+        test_error( error, "Unable to set sampler argument" );
+    }
+
+    size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
+    test_error(error, "clEnqueueNDRangeKernel failed with maximum number of samplers.");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    free( programSrc );
+    delete[] samplers;
+    return 0;
+}
+
+#define PASSING_FRACTION 4
+int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[3];
+    size_t    threads[1], localThreads[1];
+    cl_int *constantData, *resultData;
+    cl_ulong maxSize, stepSize, currentSize;
+    int i;
+    cl_event event;
+    cl_int event_status;
+    MTdata d;
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max constant buffer size" );
+
+    if( ( 0 == gIsEmbedded && maxSize < 64L * 1024L ) || maxSize <  1L * 1024L )
+    {
+        log_error( "ERROR: Reported max constant buffer size less than required by OpenCL 1.0 (reported %d KB)\n", (int)( maxSize / 1024L ) );
+        return -1;
+    }
+
+    log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_arg_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Try the returned max size and decrease it until we get one that works. */
+    stepSize = maxSize/16;
+    currentSize = maxSize;
+    int allocPassed = 0;
+    d = init_genrand( gRandomSeed );
+    while (!allocPassed && currentSize >= maxSize/PASSING_FRACTION) {
+        log_info("Attempting to allocate constant buffer of size %lld bytes\n", maxSize);
+
+        /* Create some I/O streams */
+        size_t sizeToAllocate = ((size_t)currentSize/sizeof( cl_int ))*sizeof(cl_int);
+        size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
+        constantData = (cl_int *)malloc( sizeToAllocate);
+        for(i=0; i<(int)(numberOfInts); i++)
+            constantData[i] = (int)genrand_int32(d);
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error);
+        test_error( error, "Creating test array failed" );
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
+        test_error( error, "Creating test array failed" );
+
+
+        /* Set the arguments */
+        error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+
+        /* Test running the kernel and verifying it */
+        threads[0] = numberOfInts;
+        localThreads[0] = 1;
+        log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", (int)threads[0], (int)(threads[0]*sizeof(cl_int)));
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
+        /* If we failed due to a resource issue, reduce the size and try again. */
+        if ((error == CL_OUT_OF_RESOURCES) || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (error == CL_OUT_OF_HOST_MEMORY)) {
+            log_info("Kernel enqueue failed at size %lld, trying at a reduced size.\n", currentSize);
+            currentSize -= stepSize;
+            free(constantData);
+            continue;
+        }
+        test_error( error, "clEnqueueNDRangeKernel with maximum constant buffer size failed.");
+
+        // Verify that the event does not return an error from the execution
+        error = clWaitForEvents(1, &event);
+        test_error( error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        clReleaseEvent(event);
+        if (event_status < 0) {
+            if ((event_status == CL_OUT_OF_RESOURCES) || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (event_status == CL_OUT_OF_HOST_MEMORY)) {
+                log_info("Kernel event indicates failure at size %lld, trying at a reduced size.\n", currentSize);
+                currentSize -= stepSize;
+                free(constantData);
+                continue;
+            } else {
+                test_error(error, "Kernel execution event returned error");
+            }
+        }
+
+        /* Otherwise we did not fail due to resource issues. */
+        allocPassed = 1;
+
+        resultData = (cl_int *)malloc(sizeToAllocate);
+        error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
+        test_error( error, "clEnqueueReadBuffer failed");
+
+        for(i=0; i<(int)(numberOfInts); i++)
+            if (constantData[i] != resultData[i]) {
+                log_error("Data failed to verify: constantData[%d]=%d != resultData[%d]=%d\n",
+                          i, constantData[i], i, resultData[i]);
+                free( constantData );
+                free(resultData);
+                free_mtdata(d);   d = NULL;
+                return -1;
+            }
+
+        free( constantData );
+        free(resultData);
+    }
+    free_mtdata(d);   d = NULL;
+
+    if (allocPassed) {
+        if (currentSize < maxSize/PASSING_FRACTION) {
+            log_error("Failed to allocate at least 1/4 of the reported constant size.\n");
+            return -1;
+        } else if (currentSize != maxSize) {
+            log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);
+            return 0;
+        }
+        return 0;
+    }
+    return -1;
+}
+
+int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    *streams;
+    size_t    threads[1], localThreads[1];
+    cl_uint i, maxArgs;
+    cl_ulong maxSize;
+    cl_ulong maxParameterSize;
+    size_t individualBufferSize;
+    char *programSrc, *constArgs, *str2;
+    char str[512];
+    const char *ptr;
+    cl_event event;
+    cl_int event_status;
+
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof( maxArgs ), &maxArgs, 0 );
+    test_error( error, "Unable to get max constant arg count" );
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    // Subtract the size of the result
+    maxParameterSize -= sizeof(cl_mem);
+
+    // Calculate the number we can use
+    if (maxParameterSize/sizeof(cl_mem) < maxArgs) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
+        maxArgs = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+    }
+
+
+    if( maxArgs < (gIsEmbedded ? 4 : 8) )
+    {
+        log_error( "ERROR: Reported max constant arg count less than required by OpenCL 1.0 (reported %d)\n", (int)maxArgs );
+        return -1;
+    }
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max constant buffer size" );
+    individualBufferSize = ((int)maxSize/2)/maxArgs;
+
+    log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n",
+             (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize);
+
+    str2 = (char*)malloc(sizeof(char)*32*(maxArgs+2));
+    constArgs = (char*)malloc(sizeof(char)*32*(maxArgs+2));
+    programSrc = (char*)malloc(sizeof(char)*32*2*(maxArgs+2)+1024);
+
+    /* Create a test program */
+    constArgs[0] = 0;
+    str2[0] = 0;
+    for( i = 0; i < maxArgs-1; i++ )
+    {
+        sprintf( str, ", __constant int *src%d", (int)( i + 2 ) );
+        strcat( constArgs, str );
+        sprintf( str2 + strlen( str2), "\tdst[tid] += src%d[tid];\n", (int)(i+2));
+        if (strlen(str2) > (sizeof(char)*32*(maxArgs+2)-32) || strlen(constArgs) > (sizeof(char)*32*(maxArgs+2)-32)) {
+            log_info("Limiting number of arguments tested to %d due to test program allocation size.\n", i);
+            break;
+        }
+    }
+    sprintf( programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2 );
+
+    /* Create a kernel to test with */
+    ptr = programSrc;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams = new clMemWrapper[ maxArgs + 1 ];
+    for( i = 0; i < maxArgs + 1; i++ )
+    {
+        streams[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), individualBufferSize, NULL, &error);
+        test_error( error, "Creating test array failed" );
+    }
+
+    /* Set the arguments */
+    for( i = 0; i < maxArgs + 1; i++ )
+    {
+        error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]);
+        test_error( error, "Unable to set kernel argument" );
+    }
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+    while (threads[0]*sizeof(cl_int) > individualBufferSize)
+        threads[0]--;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
+    test_error( error, "clEnqueueNDRangeKernel failed");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    error = clFinish(queue);
+    test_error( error, "clFinish failed.");
+
+    delete [] streams;
+    free(str2);
+    free(constArgs);
+    free(programSrc);
+    return 0;
+}
+
+int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_uint value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get compute unit count" );
+
+    if( value < 1 )
+    {
+        log_error( "ERROR: Reported compute unit count less than required by OpenCL 1.0 (reported %d)\n", (int)value );
+        return -1;
+    }
+
+    log_info("Reported %d max compute units.\n", value);
+
+    return 0;
+}
+
+int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_uint value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get address bit count" );
+
+    if( value != 32 && value != 64 )
+    {
+        log_error( "ERROR: Reported address bit count not valid by OpenCL 1.0 (reported %d)\n", (int)value );
+        return -1;
+    }
+
+    log_info("Reported %d device address bits.\n", value);
+
+    return 0;
+}
+
+int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_device_fp_config value;
+    char profile[128] = "";
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get device single fp config" );
+
+    //Check to see if we are an embedded profile device
+    if((error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL )))
+    {
+        log_error( "FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", error );
+        return error;
+    }
+
+    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ))
+    { // embedded device
+
+        if( 0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
+        {
+            log_error( "FAILURE: embedded device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" );
+            return -1;
+        }
+    }
+    else
+    { // Full profile
+        if( ( value & ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN )) != ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN ) )
+        {
+            log_error( "ERROR: Reported single fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_device_fp_config value;
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get device double fp config" );
+
+    if (value == 0)
+        return 0;
+
+    if( ( value & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) != ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) )
+    {
+        log_error( "ERROR: Reported double fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[3];
+    size_t    threads[1], localThreads[1];
+    cl_int *localData, *resultData;
+    cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
+    cl_char buffer[ 4098 ];
+    size_t length;
+    int i;
+    int err = 0;
+    MTdata d;
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max local buffer size" );
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if (!gIsEmbedded)
+    {
+        if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 2.1", strlen( "OpenCL 2.1" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) != 0 )
+            min_max_local_mem_size = 32L * 1024L;
+        else
+        {
+            log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+            return -1;
+        }
+    }
+
+    if( maxSize < (gIsEmbedded ? 1L * 1024L : min_max_local_mem_size) )
+    {
+        log_error( "ERROR: Reported local mem size less than required by OpenCL 1.1 (reported %dKb)\n", (int)( maxSize / 1024L ) );
+        return -1;
+    }
+
+    log_info("Reported max local buffer size for device: %lld bytes.\n", maxSize);
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalUsage), &kernelLocalUsage, NULL);
+    test_error(error, "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
+
+    log_info("Reported local buffer usage for kernel (CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", kernelLocalUsage);
+
+    /* Create some I/O streams */
+    size_t sizeToAllocate = ((size_t)(maxSize-kernelLocalUsage)/sizeof( cl_int ))*sizeof(cl_int);
+    size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
+
+    log_info("Attempting to use %lld bytes of local memory.\n", (cl_ulong)sizeToAllocate);
+
+    localData = (cl_int *)malloc( sizeToAllocate );
+    d = init_genrand( gRandomSeed );
+    for(i=0; i<(int)(numberOfInts); i++)
+        localData[i] = (int)genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, localData, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = numberOfInts;
+    localThreads[0] = 1;
+    log_info("Creating local buffer with %d cl_ints (%d bytes).\n", (int)numberOfInts, (int)sizeToAllocate);
+
+    cl_event evt;
+    cl_int   evt_err;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &evt );
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    error = clFinish(queue);
+    test_error( error, "clFinish failed");
+
+    error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof evt_err, &evt_err, NULL);
+    test_error( error, "clGetEventInfo with maximum local buffer size failed.");
+
+    if (evt_err != CL_COMPLETE) {
+        print_error(evt_err, "Kernel event returned error");
+        clReleaseEvent(evt);
+        return -1;
+    }
+
+    resultData = (cl_int *)malloc(sizeToAllocate);
+
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
+    test_error( error, "clEnqueueReadBuffer failed");
+
+    for(i=0; i<(int)(numberOfInts); i++)
+        if (localData[i] != resultData[i]) {
+            clReleaseEvent(evt);
+            free( localData );
+            free(resultData);
+            log_error("Results failed to verify.\n");
+            return -1;
+        }
+    clReleaseEvent(evt);
+    free( localData );
+    free(resultData);
+
+    return err;
+}
+
+int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                err;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t max_local_workgroup_size[3];
+    size_t max_workgroup_size = 0, preferred_workgroup_size = 0;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" );
+    test_error(err, "Failed to build kernel/program.");
+
+    err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE,
+                                   sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+                                   sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Since the preferred size is only a performance hint, we can only really check that we get a sane value
+    // back
+    log_info( "size: %ld     preferred: %ld      max: %ld\n", max_workgroup_size, preferred_workgroup_size, max_local_workgroup_size[0] );
+
+    if( preferred_workgroup_size > max_workgroup_size )
+    {
+        log_error( "ERROR: Reported preferred workgroup multiple larger than max workgroup size (preferred %ld, max %ld)\n", preferred_workgroup_size, max_workgroup_size );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_device_exec_capabilities value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get execution capabilities" );
+
+    if( ( value & CL_EXEC_KERNEL ) != CL_EXEC_KERNEL )
+    {
+        log_error( "ERROR: Reported execution capabilities less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_command_queue_properties value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get queue properties" );
+
+    if( ( value & CL_QUEUE_PROFILING_ENABLE ) != CL_QUEUE_PROFILING_ENABLE )
+    {
+        log_error( "ERROR: Reported queue properties less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int error, i;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
+    {
+        log_error( "ERROR: Initial part of device version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+
+    log_info("Returned version %s.\n", buffer);
+
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    while( *p1 == ' ' )
+        p1++;
+    char *p2 = p1;
+    if( ! isdigit(*p2) )
+    {
+        log_error( "ERROR: Major revision number must follow space behind OpenCL! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p2 ) )
+        p2++;
+    if( *p2 != '.' )
+    {
+        log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p3 = p2 + 1;
+    if( ! isdigit(*p3) )
+    {
+        log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p3 ) )
+        p3++;
+    if( *p3 != ' ' )
+    {
+        log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    *p2 = ' '; // Put in a space for atoi below.
+    p2++;
+
+    int major = atoi( p1 );
+    int minor = atoi( p2 );
+    int minor_revision = 2;
+    if( getenv("OPENCL_1_0_DEVICE"))
+    {
+        minor_revision = 0;
+        log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined!  This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" );
+    }
+    else if( getenv("OPENCL_1_1_DEVICE"))
+    {
+        minor_revision = 1;
+        log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined!  This is not a OpenCL 1.2 compatible device!!!\n" );
+    }
+    if( major * 10 + minor < 10 + minor_revision )
+    {
+        log_error( "ERROR: OpenCL device version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer );
+        return -1;
+    }
+
+    // Sanity checks on the returned values
+    if( length != (strlen( (char *)buffer ) + 1 ))
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length );
+        return -1;
+    }
+
+    // Make sure 2.0 devices support required extensions for 2.0
+    const char *requiredExtensions20[] =
+    {
+        "cl_khr_byte_addressable_store",
+        "cl_khr_3d_image_writes",
+        "cl_khr_image2d_from_buffer",
+        "cl_khr_depth_images",
+        "cl_khr_fp64",
+    };
+
+    // Make sure 1.1 devices support required extensions for 1.1
+    const char *requiredExtensions[] =
+    {
+        "cl_khr_global_int32_base_atomics",
+        "cl_khr_global_int32_extended_atomics",
+        "cl_khr_local_int32_base_atomics",
+        "cl_khr_local_int32_extended_atomics",
+        "cl_khr_byte_addressable_store",
+        NULL
+    };
+
+    if( major * 10 + minor >= 11 )
+    {
+        char *extensions;
+        size_t extensions_size = 0;
+
+        log_info( "Checking for required extensions for OpenCL 1.1 and later devices...\n" );
+
+        if( (error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &extensions_size)))
+        {
+            log_error( "ERROR: could not get extensions size.  Err # %d\n", error );
+            return -1;
+        }
+
+        if( extensions_size < 1 )
+        {
+            log_error( "ERROR: invalid extensions size.  Err # %d\n", error );
+            return -1;
+        }
+
+        extensions = (char*) malloc(extensions_size);
+        if( NULL == extensions )
+        {
+            log_error( "ERROR: cannot allocate %ld bytes to hold extension string.\n", extensions_size );
+            return -1;
+        }
+        memset( extensions, -1, extensions_size );
+
+        if( (error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, extensions_size, extensions, NULL)))
+        {
+            log_error( "ERROR: could not get extensions.  Err # %d\n", error );
+            free( extensions );
+            return -1;
+        }
+
+        if( '\0' != extensions[ extensions_size - 1 ] )
+        {
+            if( -1 == extensions[ extensions_size - 1 ] )
+                log_error( "ERROR: extensions size reported incorrectly.  Last byte is not NUL. Size too big. Reported: %ld.  Should be: %ld\n", extensions_size, strlen(extensions) + 1  );
+            else
+                log_error( "ERROR: extensions size reported incorrectly.  Last byte is not NUL. Size too small. \n" );
+
+            free( extensions );
+            return -1;
+        }
+
+        for( i = 0; NULL != requiredExtensions[i]; i++ )
+        {
+            if( NULL == strstr( extensions, requiredExtensions[i] ) )
+            {
+                log_error( "ERROR: Required extension for 1.1 and greater devices is not in extension string: %s\n", requiredExtensions[i] );
+                free( extensions );
+                return -1;
+            }
+            else
+                log_info( "\t%s\n", requiredExtensions[i] );
+        }
+
+        if( major >= 2 )
+        {
+            log_info( "Checking for required extensions for OpenCL 2.0 and later devices...\n" );
+
+            // Check if double precision is supported, if it is, then check the extension "cl_khr_fp64"
+            cl_device_fp_config value;
+            int numRequiredExtension20 = sizeof(requiredExtensions20)/sizeof(char *);
+
+            error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 );
+            test_error( error, "Unable to get device double fp config" );
+
+            // if double precision is not supported, then don't check extension "cl_khr_fp64"
+            numRequiredExtension20 -= (value == 0);
+
+            for( i = 0; i<numRequiredExtension20; i++ )
+            {
+                if( NULL == strstr( extensions, requiredExtensions20[i] ) )
+                {
+                    log_error( "ERROR: Required extension for 2.0 and greater devices is not in extension string: %s\n", requiredExtensions20[i] );
+                    free( extensions );
+                    return -1;
+                }
+                else
+                    log_info( "\t%s\n", requiredExtensions20[i] );
+            }
+        }
+
+        free( extensions );
+    }
+    else
+        log_info( "WARNING: skipping required extension test -- OpenCL 1.0 device.\n" );
+
+
+    return 0;
+}
+
+int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int error;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device opencl c version string" );
+    if( memcmp( buffer, "OpenCL C ", strlen( "OpenCL C " ) ) != 0 )
+    {
+        log_error( "ERROR: Initial part of device language version string does not match required format! (returned: \"%s\")\n", (char *)buffer );
+        return -1;
+    }
+
+    log_info("Returned version \"%s\".\n", buffer);
+
+    char *p1 = (char *)buffer + strlen( "OpenCL C " );
+    while( *p1 == ' ' )
+        p1++;
+    char *p2 = p1;
+    if( ! isdigit(*p2) )
+    {
+        log_error( "ERROR: Major revision number must follow space behind OpenCL C! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p2 ) )
+        p2++;
+    if( *p2 != '.' )
+    {
+        log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p3 = p2 + 1;
+    if( ! isdigit(*p3) )
+    {
+        log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p3 ) )
+        p3++;
+    if( *p3 != ' ' )
+    {
+        log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    *p2 = ' '; // Put in a space for atoi below.
+    p2++;
+
+    int major = atoi( p1 );
+    int minor = atoi( p2 );
+    int minor_revision = 2;
+
+    if( major * 10 + minor < 10 + minor_revision )
+    {
+        // If the language version did not match, check to see if OPENCL_1_0_DEVICE is set.
+        if( getenv("OPENCL_1_0_DEVICE"))
+        {
+          log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined!  This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" );
+        }
+        else if( getenv("OPENCL_1_1_DEVICE"))
+        {
+          log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined!  This is not a OpenCL 1.2 compatible device!!!\n" );
+        }
+        else
+        {
+          log_error( "ERROR: OpenCL device language version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer );
+          return -1;
+        }
+    }
+
+    // Sanity checks on the returned values
+    if( length != (strlen( (char *)buffer ) + 1 ))
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length );
+        return -1;
+    }
+
+    return 0;
+}
+
diff --git a/test_conformance/api/test_binary.cpp b/test_conformance/api/test_binary.cpp
new file mode 100644
index 00000000..343032b0
--- /dev/null
+++ b/test_conformance/api/test_binary.cpp
@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+static const char *sample_binary_kernel_source[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid] + 1;\n"
+"\n"
+"}\n" };
+
+
+int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    size_t            binarySize;
+
+
+    error = create_single_kernel_helper(context, &program, NULL, 1, sample_binary_kernel_source, NULL);
+    test_error( error, "Unable to build test program" );
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero\n" );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    unsigned char *binary;
+  binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+    unsigned char *buffers[ 1 ] = { binary };
+
+    // Do another sanity check here first
+    size_t size;
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
+    test_error( error, "Unable to get expected size of binaries array" );
+    if( size != sizeof( buffers ) )
+    {
+        log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
+        free(binary);
+    return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    // No way to verify the binary is correct, so just be good with that
+  free(binary);
+    return 0;
+}
+
+
+int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    /* To test this in a self-contained fashion, we have to create a program with
+   source, then get the binary, then use that binary to reload the program, and then verify */
+
+    int error;
+    clProgramWrapper program, program_from_binary;
+    size_t            binarySize;
+
+
+    error = create_single_kernel_helper(context, &program, NULL, 1, sample_binary_kernel_source, NULL);
+    test_error( error, "Unable to build test program" );
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero\n" );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    unsigned char *binary = (unsigned char*)malloc(binarySize);
+    const unsigned char *buffers[ 1 ] = { binary };
+
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    cl_int loadErrors[ 1 ];
+    program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
+    test_error( error, "Unable to load valid program binary" );
+    test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
+
+  error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
+  test_error( error, "Unable to build binary program" );
+
+    // Get the size of the binary built from the first binary
+    size_t binary2Size;
+    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
+    test_error( error, "Unable to get size for the binary program" );
+
+    // Now get the binary one more time and verify it loaded the right binary
+    unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
+    buffers[ 0 ] = binary2;
+    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary second time" );
+
+    // Try again, this time without passing the status ptr in, to make sure we still
+    // get a valid binary
+    clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
+    test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
+
+    error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build binary program created without binary_status" );
+
+    // Get the size of the binary created without passing binary_status
+    size_t binary3Size;
+    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
+    test_error( error, "Unable to get size for the binary program created without binary_status" );
+
+    // Now get the binary one more time
+    unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
+    buffers[ 0 ] = binary3;
+    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary from the program created without binary_status" );
+
+    // We no longer need these intermediate binaries
+    free(binary);
+    free(binary2);
+    free(binary3);
+
+  // Now execute them both to see that they both do the same thing.
+  clMemWrapper in, out, out_binary;
+  clKernelWrapper kernel, kernel_binary;
+  cl_int *out_data, *out_data_binary;
+  cl_float *in_data;
+  size_t size_to_run = 1000;
+
+  // Allocate some data
+  in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
+  out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
+  out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
+  memset(out_data, 0, sizeof(cl_int)*size_to_run);
+  memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
+  for (size_t i=0; i<size_to_run; i++)
+    in_data[i] = (cl_float)i;
+
+  // Create the buffers
+  in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
+  test_error( error, "clCreateBuffer failed");
+  out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
+  test_error( error, "clCreateBuffer failed");
+  out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
+  test_error( error, "clCreateBuffer failed");
+
+  // Create the kernels
+  kernel = clCreateKernel(program, "sample_test", &error);
+  test_error( error, "clCreateKernel failed");
+  kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
+  test_error( error, "clCreateKernel from binary failed");
+
+  // Set the arguments
+  error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
+  test_error( error, "clSetKernelArg failed");
+
+  // Execute the kernels
+  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
+  test_error( error, "clEnqueueNDRangeKernel failed");
+  error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
+  test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
+
+  // Finish up
+  error = clFinish(queue);
+  test_error( error, "clFinish failed");
+
+  // Get the results back
+  error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
+  test_error( error, "clEnqueueReadBuffer failed");
+  error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
+  test_error( error, "clEnqueueReadBuffer failed");
+
+  // Compare the results
+    if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
+    {
+        log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
+        return -1;
+    }
+
+    // All done!
+  free(in_data);
+  free(out_data);
+  free(out_data_binary);
+    return 0;
+}
+
+
diff --git a/test_conformance/api/test_bool.c b/test_conformance/api/test_bool.c
new file mode 100644
index 00000000..46d85907
--- /dev/null
+++ b/test_conformance/api/test_bool.c
@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+
+const char *kernel_with_bool[] = {
+    "__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
+    "    if(myBool)\n"
+    "    {\n"
+    "        dst[tid] = (int)src[tid];\n"
+    "    }\n"
+    "    else\n"
+    "    {\n"
+    "        dst[tid] = 0;\n"
+    "    }\n"
+    "\n"
+    "}\n"
+};
+
+int test_for_bool_type(cl_device_id deviceID, cl_context context,
+               cl_command_queue queue, int num_elements)
+{
+
+    cl_program program;
+    cl_kernel kernel;
+
+    int err = create_single_kernel_helper(context,
+                      &program,
+                      &kernel,
+                      1, kernel_with_bool,
+                      "kernel_with_bool" );
+    return err;
+}
+
diff --git a/test_conformance/api/test_clone_kernel.cpp b/test_conformance/api/test_clone_kernel.cpp
new file mode 100644
index 00000000..5826d7dc
--- /dev/null
+++ b/test_conformance/api/test_clone_kernel.cpp
@@ -0,0 +1,411 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include <sstream>
+#include <string>
+#include <cmath>
+
+using namespace std;
+
+const char *clone_kernel_test_img[] =
+{
+    "__kernel void img_read_kernel(read_only image2d_t img, sampler_t sampler, __global int* outbuf)\n"
+    "{\n"
+    "    uint4 color;\n"
+    "\n"
+    "    color = read_imageui(img, sampler, (int2)(0,0));\n"
+    "    \n"
+    "    // 7, 8, 9, 10th DWORD\n"
+    "    outbuf[7] = color.x;\n"
+    "    outbuf[8] = color.y;\n"
+    "    outbuf[9] = color.z;\n"
+    "    outbuf[10] = color.w;\n"
+    "}\n"
+    "\n"
+    "__kernel void img_write_kernel(write_only image2d_t img, uint4 color)\n"
+    "{\n"
+    "    write_imageui (img, (int2)(0, 0), color);\n"
+    "}\n"
+
+};
+
+const char *clone_kernel_test_double[] =
+{
+    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+    "__kernel void clone_kernel_test1(double d, __global double* outbuf)\n"
+    "{\n"
+    "    // use the same outbuf as rest of the tests\n"
+    "    outbuf[2] = d;\n"
+    "}\n"
+};
+
+const char *clone_kernel_test_kernel[] = {
+"typedef struct\n"
+"{\n"
+"    int i;\n"
+"    float f;\n"
+"} structArg;\n"
+"\n"
+"// value type test\n"
+"__kernel void clone_kernel_test0(int iarg, float farg, structArg sarg, __local int* localbuf, __global int* outbuf)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    outbuf[0] = iarg;\n"
+"    outbuf[1] = sarg.i;\n"
+"    \n"
+"    ((__global float*)outbuf)[2] = farg;\n"
+"    ((__global float*)outbuf)[3] = sarg.f;\n"
+"}\n"
+"\n"
+"__kernel void buf_read_kernel(__global int* buf, __global int* outbuf)\n"
+"{\n"
+"    // 6th DWORD\n"
+"    outbuf[6] = buf[0];\n"
+"}\n"
+"\n"
+"__kernel void buf_write_kernel(__global int* buf, int write_val)\n"
+"{\n"
+"    buf[0] = write_val;\n"
+"}\n"
+
+ };
+
+const int BUF_SIZE = 128;
+
+struct structArg
+{
+    int i;
+    float f;
+};
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32( d);
+
+    return ptr;
+}
+
+int test_image_arg_shallow_clone(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, void* pbufRes, clMemWrapper& bufOut)
+{
+    int error;
+    cl_image_format    img_format;
+    clSamplerWrapper sampler;
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNSIGNED_INT8;
+	cl_image_desc imageDesc;
+	memset(&imageDesc, 0x0, sizeof(cl_image_desc));
+    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+    imageDesc.image_width = 512;
+    imageDesc.image_height = 512;
+
+    cl_uint color[4] = {1,3,5,7};
+
+    clProgramWrapper program;
+    clKernelWrapper kernel_read;
+    clKernelWrapper kernel_write;
+    clKernelWrapper kernel_cloned;
+    size_t    ndrange1 = 1;
+
+    clMemWrapper img;
+
+    if( create_single_kernel_helper( context, &program, &kernel_read, 1, clone_kernel_test_img, "img_read_kernel" ) != 0 )
+    {
+        return -1;
+    }
+
+    if( create_single_kernel_helper( context, &program, &kernel_write, 1, clone_kernel_test_img, "img_write_kernel" ) != 0 )
+    {
+        return -1;
+    }
+
+    img = clCreateImage(context, CL_MEM_READ_WRITE, &img_format, &imageDesc, NULL, &error);
+    test_error( error, "clCreateImage failed." );
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    sampler = clCreateSamplerWithProperties(context, properties, &error);
+    test_error( error, "clCreateSamplerWithProperties failed." );
+
+    error = clSetKernelArg(kernel_write, 1, sizeof(int) * 4, color);
+    error += clSetKernelArg(kernel_write, 0, sizeof(cl_mem), &img);
+    test_error( error, "clSetKernelArg failed." );
+
+    error = clEnqueueNDRangeKernel(queue, kernel_write, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+
+    error = clSetKernelArg(kernel_read, 0, sizeof(cl_mem), &img);
+    error += clSetKernelArg(kernel_read, 1, sizeof(cl_sampler), &sampler);
+    error += clSetKernelArg(kernel_read, 2, sizeof(cl_mem), &bufOut);
+
+    test_error( error, "clSetKernelArg failed." );
+
+    // clone the kernel
+    kernel_cloned = clCloneKernel(kernel_read, &error);
+    test_error( error, "clCloneKernel failed." );
+    error = clEnqueueNDRangeKernel(queue, kernel_cloned, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+
+    // read result back
+    error = clEnqueueReadBuffer(queue, bufOut, CL_TRUE, 0, 128, pbufRes, 0, NULL, NULL);
+    test_error( error, "clEnqueueReadBuffer failed." );
+
+    if (((cl_uint*)pbufRes)[7] != color[0])
+    {
+        test_error( error, "clCloneKernel test failed." );
+        return -1;
+    }
+
+    if (((cl_uint*)pbufRes)[8] != color[1])
+    {
+        test_error( error, "clCloneKernel test failed." );
+        return -1;
+    }
+
+    if (((cl_uint*)pbufRes)[9] != color[2])
+    {
+        test_error( error, "clCloneKernel test failed." );
+        return -1;
+    }
+
+    if (((cl_uint*)pbufRes)[10] != color[3])
+    {
+        test_error( error, "clCloneKernel test failed." );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_double_arg_clone(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, void* pbufRes, clMemWrapper& bufOut)
+{
+    int error = 0;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clKernelWrapper kernel_cloned;
+    size_t    ndrange1 = 1;
+
+    if( create_single_kernel_helper( context, &program, &kernel, 1, clone_kernel_test_double, "clone_kernel_test1" ) != 0 )
+    {
+        return -1;
+    }
+
+    cl_double d = 1.23;
+    error = clSetKernelArg(kernel, 0, sizeof(double), &d);
+    error += clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufOut);
+    test_error( error, "clSetKernelArg failed." );
+
+    kernel_cloned = clCloneKernel(kernel, &error);
+    test_error( error, "clCloneKernel failed." );
+
+    error = clEnqueueNDRangeKernel(queue, kernel_cloned, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+
+    // read result back
+    error = clEnqueueReadBuffer(queue, bufOut, CL_TRUE, 0, BUF_SIZE, pbufRes, 0, NULL, NULL);
+    test_error( error, "clEnqueueReadBuffer failed." );
+
+    if (abs(((cl_double*)pbufRes)[2] - d) > 0.0000001)
+    {
+        test_error( error, "clCloneKernel test failed." );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clKernelWrapper kernel_pipe_read;
+    clKernelWrapper kernel_buf_read;
+    clKernelWrapper kernel_pipe_write;
+    clKernelWrapper kernel_buf_write;
+
+    clKernelWrapper kernel_pipe_read_cloned;
+    clKernelWrapper kernel_buf_read_cloned;
+    size_t    ndrange1 = 1;
+
+    int write_val = 123;
+
+
+    cl_bool bimg = CL_FALSE;
+    cl_bool bdouble = CL_FALSE;
+    // test image support
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &bimg, NULL);
+    test_error( error, "clGetDeviceInfo failed." );
+
+    // test double support
+    size_t ext_str_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &ext_str_size);
+    test_error( error, "clGetDeviceInfo failed." );
+    char* ext_str = new char[ext_str_size+1];
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, ext_str_size, ext_str, NULL);
+    test_error( error, "clGetDeviceInfo failed." );
+
+    ext_str[ext_str_size] = '\0';
+
+    stringstream ss;
+    ss << ext_str;
+
+    while (!ss.eof())
+    {
+        string s;
+        ss >> s;
+        if (s == "cl_khr_fp64")
+        {
+            bdouble = CL_TRUE;
+            break;
+        }
+    }
+
+    /* Create kernels to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, clone_kernel_test_kernel, "clone_kernel_test0" ) != 0 )
+    {
+        return -1;
+    }
+
+    if( create_single_kernel_helper( context, &program, &kernel_buf_read, 1, clone_kernel_test_kernel, "buf_read_kernel" ) != 0 )
+    {
+        return -1;
+    }
+
+    if( create_single_kernel_helper( context, &program, &kernel_buf_write, 1, clone_kernel_test_kernel, "buf_write_kernel" ) != 0 )
+    {
+        return -1;
+    }
+
+    // Kernel args
+    // Value type
+    int intarg = 0;
+    float farg = 1.0;
+    structArg sa = { 1, 1.0f };
+
+    // cl_mem
+    clMemWrapper buf, bufOut;
+
+    char* pbuf = new char[BUF_SIZE];
+    char* pbufRes = new char[BUF_SIZE];
+    buf = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, BUF_SIZE, pbuf, &error);
+    test_error( error, "clCreateBuffer failed." );
+
+    bufOut = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, BUF_SIZE, NULL, &error);
+    test_error( error, "clCreateBuffer failed." );
+
+    clMemWrapper pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), 16, NULL, &error);
+    test_error( error, "clCreatePipe failed." );
+
+    error = clSetKernelArg(kernel, 0, sizeof(int), &intarg);
+    error += clSetKernelArg(kernel, 1, sizeof(float), &farg);
+    error += clSetKernelArg(kernel, 2, sizeof(structArg), &sa);
+    error += clSetKernelArg(kernel, 3, 128, NULL);    // local mem
+
+    test_error( error, "clSetKernelArg failed." );
+
+    // clone the kernel
+    clKernelWrapper clonek = clCloneKernel(kernel, &error);
+    test_error( error, "clCloneKernel failed." );
+
+    // set the last arg and enqueue
+    error = clSetKernelArg(clonek, 4, sizeof(cl_mem), &bufOut);
+    test_error( error, "clSetKernelArg failed." );
+    error = clEnqueueNDRangeKernel(queue, clonek, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+
+    // shallow clone tests for buffer, svm and pipes
+    error = clSetKernelArg(kernel_buf_write, 0, sizeof(cl_mem), &buf);
+    error += clSetKernelArg(kernel_buf_write, 1, sizeof(int), &write_val);
+    test_error( error, "clSetKernelArg failed." );
+    error = clEnqueueNDRangeKernel(queue, kernel_buf_write, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+
+    error = clSetKernelArg(kernel_buf_read, 0, sizeof(cl_mem), &buf);
+    error += clSetKernelArg(kernel_buf_read, 1, sizeof(cl_mem), &bufOut);
+    test_error( error, "clSetKernelArg failed." );
+
+    // clone the kernel
+    kernel_buf_read_cloned = clCloneKernel(kernel_buf_read, &error);
+    test_error( error, "clCloneKernel API call failed." );
+    error = clEnqueueNDRangeKernel(queue, kernel_buf_read_cloned, 1, NULL, &ndrange1, NULL, 0, NULL, NULL);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+
+    // read result back
+    error = clEnqueueReadBuffer(queue, bufOut, CL_TRUE, 0, BUF_SIZE, pbufRes, 0, NULL, NULL);
+    test_error( error, "clEnqueueReadBuffer failed." );
+
+    // Compare the results
+    if (((int*)pbufRes)[0] != intarg)
+    {
+        test_error( error, "clCloneKernel test failed. Failed to clone integer type argument." );
+        return -1;
+    }
+
+    if (((int*)pbufRes)[1] != sa.i)
+    {
+        test_error( error, "clCloneKernel test failed. Failed to clone structure type argument." );
+        return -1;
+    }
+
+    if (((float*)pbufRes)[2] != farg)
+    {
+        test_error( error, "clCloneKernel test failed. Failed to clone structure type argument." );
+        return -1;
+    }
+
+    if (((float*)pbufRes)[3] != sa.f)
+    {
+        test_error( error, "clCloneKernel test failed. Failed to clone float type argument." );
+        return -1;
+    }
+
+    if (((int*)pbufRes)[6] != write_val)
+    {
+        test_error( error, "clCloneKernel test failed.  Failed to clone cl_mem argument." );
+        return -1;
+    }
+
+    if (bimg)
+    {
+        error = test_image_arg_shallow_clone(deviceID, context, queue, num_elements, pbufRes, bufOut);
+        test_error( error, "image arg shallow clone test failed." );
+    }
+
+    if (bdouble)
+    {
+        error = test_double_arg_clone(deviceID, context, queue, num_elements, pbufRes, bufOut);
+        test_error( error, "double arg clone test failed." );
+    }
+
+    delete [] pbuf;
+    delete [] pbufRes;
+    delete [] ext_str;
+
+    return 0;
+}
+
diff --git a/test_conformance/api/test_create_context_from_type.cpp b/test_conformance/api/test_create_context_from_type.cpp
new file mode 100644
index 00000000..4f560148
--- /dev/null
+++ b/test_conformance/api/test_create_context_from_type.cpp
@@ -0,0 +1,130 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    streams[2];
+    clContextWrapper context_to_test;
+    clCommandQueueWrapper queue_to_test;
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[10];
+    cl_int outputData[10];
+    int i;
+    RandomSeed seed( gRandomSeed );
+
+    const char *sample_single_test_kernel[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n" };
+
+    cl_device_type type;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
+
+    cl_platform_id platform;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
+
+    cl_context_properties properties[3] = {
+      (cl_context_properties)CL_CONTEXT_PLATFORM,
+      (cl_context_properties)platform,
+      NULL
+    };
+
+    context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
+    test_error(error, "clCreateContextFromType failed");
+    if (context_to_test == NULL) {
+        log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
+        return -1;
+    }
+
+    queue_to_test = clCreateCommandQueueWithProperties(context_to_test, deviceID, NULL, &error);
+    test_error(error, "clCreateCommandQueue failed");
+    if (queue_to_test == NULL) {
+        log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
+        return -1;
+    }
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<10; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Test setting the arguments by index manually */
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+  return 0;
+}
+
+
diff --git a/test_conformance/api/test_create_kernels.c b/test_conformance/api/test_create_kernels.c
new file mode 100644
index 00000000..ceb18732
--- /dev/null
+++ b/test_conformance/api/test_create_kernels.c
@@ -0,0 +1,595 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+
+const char *sample_single_kernel[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_single_kernel_lengths[1];
+
+const char *sample_two_kernels[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n",
+    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_two_kernel_lengths[2];
+
+const char *sample_two_kernels_in_1[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n"
+    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_two_kernels_in_1_lengths[1];
+
+
+const char *repeate_test_kernel =
+"__kernel void test_kernel(__global int *src, __global int *dst)\n"
+"{\n"
+" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
+"}\n";
+
+
+
+int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    cl_program testProgram;
+    clKernelWrapper kernel;
+    cl_context testContext;
+    unsigned int numKernels;
+    cl_char testName[512];
+    cl_uint testArgCount;
+    size_t realSize;
+
+
+    error = create_single_kernel_helper(context, &program, NULL, 1, sample_single_kernel, NULL);
+    test_error( error, "Unable to build test program" );
+
+    error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
+    test_error( error, "Unable to create single kernel program" );
+
+    /* Check program and context pointers */
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
+    test_error( error, "Unable to get kernel's program" );
+    if( (cl_program)testProgram != (cl_program)program )
+    {
+        log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
+        return -1;
+    }
+    if( realSize != sizeof( cl_program ) )
+    {
+        log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
+    test_error( error, "Unable to get kernel's context" );
+    if( (cl_context)testContext != (cl_context)context )
+    {
+        log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
+        return -1;
+    }
+    if( realSize != sizeof( cl_context ) )
+    {
+        log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
+        return -1;
+    }
+
+    /* Test arg count */
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
+    test_error( error, "Unable to get size of arg count info from kernel" );
+
+    if( realSize != sizeof( testArgCount ) )
+    {
+        log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: Kernel arg count does not match!\n" );
+        return -1;
+    }
+
+
+    /* Test function name */
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
+    test_error( error, "Unable to get name from kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) != 0 )
+    {
+        log_error( "ERROR: Kernel names do not match!\n" );
+        return -1;
+    }
+    if( realSize != strlen( (char *)testName ) + 1 )
+    {
+        log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
+        return -1;
+    }
+
+    /* All done */
+
+    return 0;
+}
+
+int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel[2];
+    unsigned int numKernels;
+    cl_char testName[ 512 ];
+    cl_uint testArgCount;
+
+
+    error = create_single_kernel_helper(context, &program, NULL, 2, sample_two_kernels, NULL);
+    test_error( error, "Unable to build test program" );
+
+    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
+    test_error( error, "Unable to create dual kernel program" );
+
+    if( numKernels != 2 )
+    {
+        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
+        return -1;
+    }
+
+    /* Check first kernel */
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    int found_kernel1 = 0, found_kernel2 = 0;
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from second kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        if (found_kernel1) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        if (found_kernel2) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    if( !found_kernel1 || !found_kernel2 )
+    {
+        log_error( "ERROR: Kernel names do not match.\n" );
+        if (!found_kernel1)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        if (!found_kernel2)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: wrong # of args for kernel\n" );
+        return -1;
+    }
+
+    /* All done */
+    return 0;
+}
+
+int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel[2];
+    unsigned int numKernels;
+    cl_char testName[512];
+    cl_uint testArgCount;
+
+
+    error = create_single_kernel_helper(context, &program, NULL, 1, sample_two_kernels_in_1, NULL);
+    test_error( error, "Unable to build test program" );
+
+    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
+    test_error( error, "Unable to create dual kernel program" );
+
+    if( numKernels != 2 )
+    {
+        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
+        return -1;
+    }
+
+    /* Check first kernel */
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    int found_kernel1 = 0, found_kernel2 = 0;
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: wrong # of args for kernel\n" );
+        return -1;
+    }
+
+    /* Check second kernel */
+    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        if (found_kernel1) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        if (found_kernel2) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    if( !found_kernel1 || !found_kernel2 )
+    {
+        log_error( "ERROR: Kernel names do not match.\n" );
+        if (!found_kernel1)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        if (!found_kernel2)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        return -1;
+    }
+
+    /* All done */
+    return 0;
+}
+
+int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2;
+    int error;
+
+
+    /* Now create a test program */
+    error = create_single_kernel_helper(context, &program, NULL, 1, sample_two_kernels_in_1, NULL);
+    test_error( error, "Unable to build test program" );
+
+    /* Try manually creating kernels (backwards just in case) */
+    kernel1 = clCreateKernel( program, "sample_test2", &error );
+
+    if( kernel1 == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Could not get kernel 1" );
+        return -1;
+    }
+
+    kernel2 = clCreateKernel( program, "sample_test", &error );
+
+    if( kernel2 == NULL )
+    {
+        print_error( error, "Could not get kernel 2" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2;
+    int error;
+    size_t i;
+
+    /* Now create a test program */
+    error = create_single_kernel_helper(context, &program, NULL, 1, sample_two_kernels_in_1, NULL);
+    test_error( error, "Unable to build test program" );
+
+    /* Lookup the number of kernels in the program. */
+    size_t total_kernels = 0;
+    error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
+    test_error( error, "Unable to get program info num kernels");
+
+    if (total_kernels != 2)
+    {
+        print_error( error, "Program did not contain two kernels" );
+        return -1;
+    }
+
+    /* Lookup the kernel names. */
+    const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
+
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
+    test_error( error, "Unable to get length of kernel names list." );
+
+    if (kernel_names_len != (strlen(actual_names[0])+1))
+    {
+        print_error( error, "Kernel names length did not match");
+        return -1;
+    }
+
+    const size_t len = (kernel_names_len+1)*sizeof(char);
+    char* kernel_names = (char*)malloc(len);
+    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
+    test_error( error, "Unable to get kernel names list." );
+
+    /* Check to see if the kernel name array is null terminated. */
+    if (kernel_names[kernel_names_len-1] != '\0')
+    {
+        free(kernel_names);
+        print_error( error, "Kernel name list was not null terminated");
+        return -1;
+    }
+
+    /* Check to see if the correct kernel name string was returned. */
+    for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
+        if( 0 == strcmp(actual_names[i],kernel_names) )
+            break;
+
+    if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
+    {
+        free(kernel_names);
+        log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
+        for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
+            log_error( "\t\t\"%s\"\n", actual_names[0] );
+        return -1;
+    }
+    free(kernel_names);
+
+    /* Try manually creating kernels (backwards just in case) */
+    kernel1 = clCreateKernel( program, "sample_test", &error );
+    if( kernel1 == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Could not get kernel 1" );
+        return -1;
+    }
+
+    kernel2 = clCreateKernel( program, "sample_test2", &error );
+    if( kernel2 == NULL )
+    {
+        print_error( error, "Could not get kernel 2" );
+        return -1;
+    }
+
+    return 0;
+}
+
+static const char *single_task_kernel[] = {
+    "__kernel void sample_test(__global int *dst, int count)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    for( int i = 0; i < count; i++ )\n"
+    "        dst[i] = tid + i;\n"
+    "\n"
+    "}\n" };
+
+int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper output;
+    cl_int count;
+
+
+    if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
+        return -1;
+
+    // Create args
+    count = 100;
+    output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Run task
+    error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
+    test_error( error, "Unable to run task" );
+
+    // Read results
+    cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
+    error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Validate
+    for( cl_int i = 0; i < count; i++ )
+    {
+        if( results[ i ] != i )
+        {
+            log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
+            free(results);
+            return -1;
+        }
+    }
+
+    /* All done */
+    free(results);
+    return 0;
+}
+
+
+
+#define TEST_SIZE 1000
+int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+
+    cl_context local_context;
+    cl_command_queue local_queue;
+    cl_program local_program;
+    cl_kernel local_kernel;
+    cl_mem local_mem_in, local_mem_out;
+    cl_event local_event;
+    size_t global_dim[3];
+    int i, j, error;
+    global_dim[0] = TEST_SIZE;
+    global_dim[1] = 1; global_dim[2] = 1;
+    cl_int *inData, *outData;
+    cl_int status;
+
+    inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
+    outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
+    for (i=0; i<TEST_SIZE; i++) {
+        inData[i] = i;
+    }
+
+
+    for (i=0; i<100; i++) {
+        memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
+
+        local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
+        test_error( error, "clCreateContext failed");
+
+        local_queue = clCreateCommandQueueWithProperties(local_context, deviceID, 0, &error);
+        test_error( error, "clCreateCommandQueue failed");
+
+        error = create_single_kernel_helper(local_context, &local_program, NULL, 1, &repeate_test_kernel, NULL);
+        test_error( error, "Unable to build test program" );
+
+        local_kernel = clCreateKernel(local_program, "test_kernel", &error);
+        test_error( error, "clCreateKernel failed");
+
+        local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
+        test_error( error, "clCreateBuffer failed");
+
+        local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
+        test_error( error, "clCreateBuffer failed");
+
+        error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
+        test_error( error, "clEnqueueWriteBuffer failed");
+
+        error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
+        test_error( error, "clEnqueueWriteBuffer failed");
+
+        error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
+        test_error( error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
+        test_error( error, "clSetKernelArg failed");
+
+        error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
+        test_error( error, "clEnqueueNDRangeKernel failed");
+
+        error = clWaitForEvents(1, &local_event);
+        test_error( error, "clWaitForEvents failed");
+
+        error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
+        test_error( error, "clGetEventInfo failed");
+
+        if (status != CL_COMPLETE) {
+            log_error( "Kernel execution not complete: status %d.\n", status);
+            free(inData);
+            free(outData);
+            return -1;
+        }
+
+        error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
+        test_error( error, "clEnqueueReadBuffer failed");
+
+        clReleaseEvent(local_event);
+        clReleaseMemObject(local_mem_in);
+        clReleaseMemObject(local_mem_out);
+        clReleaseKernel(local_kernel);
+        clReleaseProgram(local_program);
+        clReleaseCommandQueue(local_queue);
+        clReleaseContext(local_context);
+
+        for (j=0; j<TEST_SIZE; j++) {
+            if (outData[j] != inData[j] + 1) {
+                log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
+                free(inData);
+                free(outData);
+                return -1;
+            }
+        }
+    }
+
+    free(inData);
+    free(outData);
+
+    return 0;
+}
+
+
+
diff --git a/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp b/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp
new file mode 100644
index 00000000..4a742205
--- /dev/null
+++ b/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+int IsAPowerOfTwo( unsigned long x )
+{
+  return 0 == (x & (x-1));
+}
+
+
+int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+  cl_uint min_alignment;
+
+  if (gHasLong)
+    min_alignment = sizeof(cl_long)*16;
+  else
+    min_alignment = sizeof(cl_int)*16;
+
+  int error = 0;
+  cl_uint alignment;
+
+  error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
+  log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
+
+  // Verify the size is large enough
+  if (alignment < min_alignment*8) {
+    log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
+              (gHasLong) ? "long" : "int",
+              (unsigned long)(min_alignment*8), (unsigned long)alignment);
+    return -1;
+  }
+
+  // Verify the size is a power of two
+  if (!IsAPowerOfTwo((unsigned long)alignment)) {
+    log_error("ERROR: alignment is not a power of two.\n");
+    return -1;
+  }
+
+  return 0;
+
+}
diff --git a/test_conformance/api/test_kernel_arg_changes.cpp b/test_conformance/api/test_kernel_arg_changes.cpp
new file mode 100644
index 00000000..b7aba632
--- /dev/null
+++ b/test_conformance/api/test_kernel_arg_changes.cpp
@@ -0,0 +1,141 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+extern "C" { extern cl_uint gRandomSeed;}
+
+// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
+// potentially overlapping) to make sure each kernel gets the right arguments
+
+// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
+const char *inspect_image_kernel_source[] = {
+"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
+"{\n"
+"    int tid = get_global_id(0), i;\n"
+"     for( i = 0; i < 100000; i++ ); \n"
+"    outDimensions[tid * 2] = get_image_width(src) * tid;\n"
+"    outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
+"\n"
+"}\n" };
+
+#define NUM_TRIES    100
+#define NUM_THREADS 2048
+
+int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error, i;
+    clMemWrapper images[ NUM_TRIES ];
+    size_t         sizes[ NUM_TRIES ][ 2 ];
+    clMemWrapper results[ NUM_TRIES ];
+    cl_image_format    imageFormat;
+    size_t maxWidth, maxHeight;
+    size_t threads[1], localThreads[1];
+    cl_int resultArray[ NUM_THREADS * 2 ];
+    char errStr[ 128 ];
+    RandomSeed seed( gRandomSeed );
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    // Just get any ol format to test with
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    // Create our testing kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Get max dimensions for each of our images
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    test_error( error, "Unable to get max image dimensions for device" );
+
+    // Get the number of threads we'll be able to run
+    threads[0] = NUM_THREADS;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size for kernel" );
+
+    // Create a variety of images and output arrays
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
+        sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
+
+        images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                     &imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
+        if( images[i] == NULL )
+        {
+            log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
+            return -1;
+        }
+        results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof( cl_int ) * threads[0] * 2, NULL, &error );
+        if( results[i] == NULL)
+        {
+            log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
+            return -1;
+        }
+    }
+
+    // Start setting arguments and executing kernels
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        // Set the arguments for this try
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
+        sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
+        test_error( error, errStr );
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
+        sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
+        test_error( error, errStr );
+
+        // Queue up execution
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+        sprintf( errStr, "Unable to execute kernel try %d", i );
+        test_error( error, errStr );
+    }
+
+    // Read the results back out, one at a time, and verify
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
+        sprintf( errStr, "Unable to read results for kernel try %d", i );
+        test_error( error, errStr );
+
+        // Verify. Each entry should be n * the (width/height) of image i
+        for( int j = 0; j < NUM_THREADS; j++ )
+        {
+            if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
+            {
+                log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
+                          i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
+                return -1;
+            }
+            if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
+            {
+                log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
+                          i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
+                return -1;
+            }
+        }
+    }
+
+    // If we got here, everything verified successfully
+    return 0;
+}
+
+
diff --git a/test_conformance/api/test_kernel_arg_info.c b/test_conformance/api/test_kernel_arg_info.c
new file mode 100644
index 00000000..27d58987
--- /dev/null
+++ b/test_conformance/api/test_kernel_arg_info.c
@@ -0,0 +1,5976 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include <limits.h>
+#include <ctype.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#define ARG_INFO_FIELD_COUNT        5
+
+#define ARG_INFO_ADDR_OFFSET        1
+#define ARG_INFO_ACCESS_OFFSET        2
+#define ARG_INFO_TYPE_QUAL_OFFSET    3
+#define ARG_INFO_TYPE_NAME_OFFSET    4
+#define ARG_INFO_ARG_NAME_OFFSET    5
+
+
+typedef char const * kernel_args_t[];
+
+kernel_args_t required_kernel_args = {
+    "typedef float4 typedef_type;\n"
+    "\n"
+    "typedef struct struct_type {\n"
+    "    float4 float4d;\n"
+    "    int intd;\n"
+    "} typedef_struct_type;\n"
+    "\n"
+    "typedef union union_type {\n"
+    "    float4 float4d;\n"
+    "    uint4 uint4d;\n"
+    "} typedef_union_type;\n"
+    "\n"
+    "typedef enum enum_type {\n"
+    "    enum_type_zero,\n"
+    "    enum_type_one,\n"
+    "    enum_type_two\n"
+    "} typedef_enum_type;\n"
+    "\n"
+    "kernel void constant_scalar_p0(constant void*constantvoidp,\n"
+    "                              constant char *constantcharp,\n"
+    "                              constant uchar* constantucharp,\n"
+    "                              constant unsigned char * constantunsignedcharp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p1(constant short*constantshortp,\n"
+    "                              constant ushort *constantushortp,\n"
+    "                              constant unsigned short* constantunsignedshortp,\n"
+    "                              constant int * constantintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p2(constant uint*constantuintp,\n"
+    "                              constant unsigned int *constantunsignedintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p3(constant float *constantfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n"
+    "                                       constant char * restrict constantcharrestrictp,\n"
+    "                                       constant uchar*restrict constantucharrestrictp,\n"
+    "                                       constant unsigned char *restrict constantunsignedcharrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n"
+    "                                       constant ushort * restrict constantushortrestrictp,\n"
+    "                                       constant unsigned short*restrict constantunsignedshortrestrictp,\n"
+    "                                       constant int *restrict constantintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n"
+    "                                       constant unsigned int * restrict constantunsignedintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p3(constant float * restrict constantfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_p(global void*globalvoidp,\n"
+    "                            global char *globalcharp,\n"
+    "                            global uchar* globalucharp,\n"
+    "                            global unsigned char * globalunsignedcharp,\n"
+    "                            global short*globalshortp,\n"
+    "                            global ushort *globalushortp,\n"
+    "                            global unsigned short* globalunsignedshortp,\n"
+    "                            global int * globalintp,\n"
+    "                            global uint*globaluintp,\n"
+    "                            global unsigned int *globalunsignedintp,\n"
+    "                            global float *globalfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n"
+    "                                     global char * restrict globalcharrestrictp,\n"
+    "                                     global uchar*restrict globalucharrestrictp,\n"
+    "                                     global unsigned char *restrict globalunsignedcharrestrictp,\n"
+    "                                     global short* restrict globalshortrestrictp,\n"
+    "                                     global ushort * restrict globalushortrestrictp,\n"
+    "                                     global unsigned short*restrict globalunsignedshortrestrictp,\n"
+    "                                     global int *restrict globalintrestrictp,\n"
+    "                                     global uint* restrict globaluintrestrictp,\n"
+    "                                     global unsigned int * restrict globalunsignedintrestrictp,\n"
+    "                                     global float * restrict globalfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n"
+    "                                  global const char *globalconstcharp,\n"
+    "                                  global const uchar* globalconstucharp,\n"
+    "                                  global const unsigned char * globalconstunsignedcharp,\n"
+    "                                  global const short*globalconstshortp,\n"
+    "                                  global const ushort *globalconstushortp,\n"
+    "                                  global const unsigned short* globalconstunsignedshortp,\n"
+    "                                  global const int * globalconstintp,\n"
+    "                                  global const uint*globalconstuintp,\n"
+    "                                  global const unsigned int *globalconstunsignedintp,\n"
+    "                                  global const float *globalconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n"
+    "                                           global const char * restrict globalconstcharrestrictp,\n"
+    "                                           global const uchar*restrict globalconstucharrestrictp,\n"
+    "                                           global const unsigned char *restrict globalconstunsignedcharrestrictp,\n"
+    "                                           global const short* restrict globalconstshortrestrictp,\n"
+    "                                           global const ushort * restrict globalconstushortrestrictp,\n"
+    "                                           global const unsigned short*restrict globalconstunsignedshortrestrictp,\n"
+    "                                           global const int *restrict globalconstintrestrictp,\n"
+    "                                           global const uint* restrict globalconstuintrestrictp,\n"
+    "                                           global const unsigned int * restrict globalconstunsignedintrestrictp,\n"
+    "                                           global const float * restrict globalconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n"
+    "                                     global volatile char *globalvolatilecharp,\n"
+    "                                     global volatile uchar* globalvolatileucharp,\n"
+    "                                     global volatile unsigned char * globalvolatileunsignedcharp,\n"
+    "                                     global volatile short*globalvolatileshortp,\n"
+    "                                     global volatile ushort *globalvolatileushortp,\n"
+    "                                     global volatile unsigned short* globalvolatileunsignedshortp,\n"
+    "                                     global volatile int * globalvolatileintp,\n"
+    "                                     global volatile uint*globalvolatileuintp,\n"
+    "                                     global volatile unsigned int *globalvolatileunsignedintp,\n"
+    "                                     global volatile float *globalvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n"
+    "                                              global volatile char * restrict globalvolatilecharrestrictp,\n"
+    "                                              global volatile uchar*restrict globalvolatileucharrestrictp,\n"
+    "                                              global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n"
+    "                                              global volatile short* restrict globalvolatileshortrestrictp,\n"
+    "                                              global volatile ushort * restrict globalvolatileushortrestrictp,\n"
+    "                                              global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n"
+    "                                              global volatile int *restrict globalvolatileintrestrictp,\n"
+    "                                              global volatile uint* restrict globalvolatileuintrestrictp,\n"
+    "                                              global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n"
+    "                                              global volatile float * restrict globalvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n"
+    "                                           global const volatile char *globalconstvolatilecharp,\n"
+    "                                           global const volatile uchar* globalconstvolatileucharp,\n"
+    "                                           global const volatile unsigned char * globalconstvolatileunsignedcharp,\n"
+    "                                           global const volatile short*globalconstvolatileshortp,\n"
+    "                                           global const volatile ushort *globalconstvolatileushortp,\n"
+    "                                           global const volatile unsigned short* globalconstvolatileunsignedshortp,\n"
+    "                                           global const volatile int * globalconstvolatileintp,\n"
+    "                                           global const volatile uint*globalconstvolatileuintp,\n"
+    "                                           global const volatile unsigned int *globalconstvolatileunsignedintp,\n"
+    "                                           global const volatile float *globalconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n"
+    "                                                    global const volatile char * restrict globalconstvolatilecharrestrictp,\n"
+    "                                                    global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n"
+    "                                                    global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n"
+    "                                                    global const volatile short* restrict globalconstvolatileshortrestrictp,\n"
+    "                                                    global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n"
+    "                                                    global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n"
+    "                                                    global const volatile int *restrict globalconstvolatileintrestrictp,\n"
+    "                                                    global const volatile uint* restrict globalconstvolatileuintrestrictp,\n"
+    "                                                    global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n"
+    "                                                    global const volatile float * restrict globalconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_p(local void*localvoidp,\n"
+    "                           local char *localcharp,\n"
+    "                           local uchar* localucharp,\n"
+    "                           local unsigned char * localunsignedcharp,\n"
+    "                           local short*localshortp,\n"
+    "                           local ushort *localushortp,\n"
+    "                           local unsigned short* localunsignedshortp,\n"
+    "                           local int * localintp,\n"
+    "                           local uint*localuintp,\n"
+    "                           local unsigned int *localunsignedintp,\n"
+    "                           local float *localfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n"
+    "                                    local char * restrict localcharrestrictp,\n"
+    "                                    local uchar*restrict localucharrestrictp,\n"
+    "                                    local unsigned char *restrict localunsignedcharrestrictp,\n"
+    "                                    local short* restrict localshortrestrictp,\n"
+    "                                    local ushort * restrict localushortrestrictp,\n"
+    "                                    local unsigned short*restrict localunsignedshortrestrictp,\n"
+    "                                    local int *restrict localintrestrictp,\n"
+    "                                    local uint* restrict localuintrestrictp,\n"
+    "                                    local unsigned int * restrict localunsignedintrestrictp,\n"
+    "                                    local float * restrict localfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_p(local const void*localconstvoidp,\n"
+    "                                 local const char *localconstcharp,\n"
+    "                                 local const uchar* localconstucharp,\n"
+    "                                 local const unsigned char * localconstunsignedcharp,\n"
+    "                                 local const short*localconstshortp,\n"
+    "                                 local const ushort *localconstushortp,\n"
+    "                                 local const unsigned short* localconstunsignedshortp,\n"
+    "                                 local const int * localconstintp,\n"
+    "                                 local const uint*localconstuintp,\n"
+    "                                 local const unsigned int *localconstunsignedintp,\n"
+    "                                 local const float *localconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n"
+    "                                          local const char * restrict localconstcharrestrictp,\n"
+    "                                          local const uchar*restrict localconstucharrestrictp,\n"
+    "                                          local const unsigned char *restrict localconstunsignedcharrestrictp,\n"
+    "                                          local const short* restrict localconstshortrestrictp,\n"
+    "                                          local const ushort * restrict localconstushortrestrictp,\n"
+    "                                          local const unsigned short*restrict localconstunsignedshortrestrictp,\n"
+    "                                          local const int *restrict localconstintrestrictp,\n"
+    "                                          local const uint* restrict localconstuintrestrictp,\n"
+    "                                          local const unsigned int * restrict localconstunsignedintrestrictp,\n"
+    "                                          local const float * restrict localconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n"
+    "                                    local volatile char *localvolatilecharp,\n"
+    "                                    local volatile uchar* localvolatileucharp,\n"
+    "                                    local volatile unsigned char * localvolatileunsignedcharp,\n"
+    "                                    local volatile short*localvolatileshortp,\n"
+    "                                    local volatile ushort *localvolatileushortp,\n"
+    "                                    local volatile unsigned short* localvolatileunsignedshortp,\n"
+    "                                    local volatile int * localvolatileintp,\n"
+    "                                    local volatile uint*localvolatileuintp,\n"
+    "                                    local volatile unsigned int *localvolatileunsignedintp,\n"
+    "                                    local volatile float *localvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n"
+    "                                             local volatile char * restrict localvolatilecharrestrictp,\n"
+    "                                             local volatile uchar*restrict localvolatileucharrestrictp,\n"
+    "                                             local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n"
+    "                                             local volatile short* restrict localvolatileshortrestrictp,\n"
+    "                                             local volatile ushort * restrict localvolatileushortrestrictp,\n"
+    "                                             local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n"
+    "                                             local volatile int *restrict localvolatileintrestrictp,\n"
+    "                                             local volatile uint* restrict localvolatileuintrestrictp,\n"
+    "                                             local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n"
+    "                                             local volatile float * restrict localvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n"
+    "                                          local const volatile char *localconstvolatilecharp,\n"
+    "                                          local const volatile uchar* localconstvolatileucharp,\n"
+    "                                          local const volatile unsigned char * localconstvolatileunsignedcharp,\n"
+    "                                          local const volatile short*localconstvolatileshortp,\n"
+    "                                          local const volatile ushort *localconstvolatileushortp,\n"
+    "                                          local const volatile unsigned short* localconstvolatileunsignedshortp,\n"
+    "                                          local const volatile int * localconstvolatileintp,\n"
+    "                                          local const volatile uint*localconstvolatileuintp,\n"
+    "                                          local const volatile unsigned int *localconstvolatileunsignedintp,\n"
+    "                                          local const volatile float *localconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n"
+    "                                                   local const volatile char * restrict localconstvolatilecharrestrictp,\n"
+    "                                                   local const volatile uchar*restrict localconstvolatileucharrestrictp,\n"
+    "                                                   local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n"
+    "                                                   local const volatile short* restrict localconstvolatileshortrestrictp,\n"
+    "                                                   local const volatile ushort * restrict localconstvolatileushortrestrictp,\n"
+    "                                                   local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n"
+    "                                                   local const volatile int *restrict localconstvolatileintrestrictp,\n"
+    "                                                   local const volatile uint* restrict localconstvolatileuintrestrictp,\n"
+    "                                                   local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n"
+    "                                                   local const volatile float * restrict localconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void scalar_d(char chard,\n"
+    "                     uchar uchard,\n"
+    "                     unsigned char unsignedchard,\n"
+    "                     short shortd,\n"
+    "                     ushort ushortd,\n"
+    "                     unsigned short unsignedshortd,\n"
+    "                     int intd,\n"
+    "                     uint uintd,\n"
+    "                     unsigned int unsignedintd,\n"
+    "                     float floatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_scalar_d(const char constchard,\n"
+    "                           const uchar constuchard,\n"
+    "                           const unsigned char constunsignedchard,\n"
+    "                           const short constshortd,\n"
+    "                           const ushort constushortd,\n"
+    "                           const unsigned short constunsignedshortd,\n"
+    "                           const int constintd,\n"
+    "                           const uint constuintd,\n"
+    "                           const unsigned int constunsignedintd,\n"
+    "                           const float constfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_scalar_d(private char privatechard,\n"
+    "                             private uchar privateuchard,\n"
+    "                             private unsigned char privateunsignedchard,\n"
+    "                             private short privateshortd,\n"
+    "                             private ushort privateushortd,\n"
+    "                             private unsigned short privateunsignedshortd,\n"
+    "                             private int privateintd,\n"
+    "                             private uint privateuintd,\n"
+    "                             private unsigned int privateunsignedintd,\n"
+    "                             private float privatefloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_scalar_d(private const char privateconstchard,\n"
+    "                                   private const uchar privateconstuchard,\n"
+    "                                   private const unsigned char privateconstunsignedchard,\n"
+    "                                   private const short privateconstshortd,\n"
+    "                                   private const ushort privateconstushortd,\n"
+    "                                   private const unsigned short privateconstunsignedshortd,\n"
+    "                                   private const int privateconstintd,\n"
+    "                                   private const uint privateconstuintd,\n"
+    "                                   private const unsigned int privateconstunsignedintd,\n"
+    "                                   private const float privateconstfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_p0(constant char2*constantchar2p,\n"
+    "                               constant uchar2 *constantuchar2p,\n"
+    "                               constant short2* constantshort2p,\n"
+    "                               constant ushort2 * constantushort2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p1(constant int2*constantint2p,\n"
+    "                               constant uint2 *constantuint2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n"
+    "                                        constant uchar2* restrict constantuchar2restrictp,\n"
+    "                                        constant short2 * restrict constantshort2restrictp,\n"
+    "                                        constant ushort2*restrict constantushort2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n"
+    "                                        constant uint2* restrict constantuint2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_p(global char2*globalchar2p,\n"
+    "                             global uchar2 *globaluchar2p,\n"
+    "                             global short2* globalshort2p,\n"
+    "                             global ushort2 * globalushort2p,\n"
+    "                             global int2*globalint2p,\n"
+    "                             global uint2 *globaluint2p,\n"
+    "                             global float2*globalfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n"
+    "                                      global uchar2* restrict globaluchar2restrictp,\n"
+    "                                      global short2 * restrict globalshort2restrictp,\n"
+    "                                      global ushort2*restrict globalushort2restrictp,\n"
+    "                                      global int2 *restrict globalint2restrictp,\n"
+    "                                      global uint2* restrict globaluint2restrictp,\n"
+    "                                      global float2 *restrict globalfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n"
+    "                                   global const uchar2 * globalconstuchar2p,\n"
+    "                                   global const short2*globalconstshort2p,\n"
+    "                                   global const ushort2 *globalconstushort2p,\n"
+    "                                   global const int2* globalconstint2p,\n"
+    "                                   global const uint2 * globalconstuint2p,\n"
+    "                                   global const float2* globalconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n"
+    "                                            global const uchar2*restrict globalconstuchar2restrictp,\n"
+    "                                            global const short2 *restrict globalconstshort2restrictp,\n"
+    "                                            global const ushort2* restrict globalconstushort2restrictp,\n"
+    "                                            global const int2 * restrict globalconstint2restrictp,\n"
+    "                                            global const uint2*restrict globalconstuint2restrictp,\n"
+    "                                            global const float2 * restrict globalconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n"
+    "                                      global volatile uchar2 *globalvolatileuchar2p,\n"
+    "                                      global volatile short2* globalvolatileshort2p,\n"
+    "                                      global volatile ushort2 * globalvolatileushort2p,\n"
+    "                                      global volatile int2*globalvolatileint2p,\n"
+    "                                      global volatile uint2 *globalvolatileuint2p,\n"
+    "                                      global volatile float2*globalvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n"
+    "                                               global volatile uchar2* restrict globalvolatileuchar2restrictp,\n"
+    "                                               global volatile short2 * restrict globalvolatileshort2restrictp,\n"
+    "                                               global volatile ushort2*restrict globalvolatileushort2restrictp,\n"
+    "                                               global volatile int2 *restrict globalvolatileint2restrictp,\n"
+    "                                               global volatile uint2* restrict globalvolatileuint2restrictp,\n"
+    "                                               global volatile float2 *restrict globalvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n"
+    "                                            global const volatile uchar2 * globalconstvolatileuchar2p,\n"
+    "                                            global const volatile short2*globalconstvolatileshort2p,\n"
+    "                                            global const volatile ushort2 *globalconstvolatileushort2p,\n"
+    "                                            global const volatile int2* globalconstvolatileint2p,\n"
+    "                                            global const volatile uint2 * globalconstvolatileuint2p,\n"
+    "                                            global const volatile float2* globalconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n"
+    "                                                     global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n"
+    "                                                     global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n"
+    "                                                     global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n"
+    "                                                     global const volatile int2 * restrict globalconstvolatileint2restrictp,\n"
+    "                                                     global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n"
+    "                                                     global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_p(local char2*localchar2p,\n"
+    "                            local uchar2 *localuchar2p,\n"
+    "                            local short2* localshort2p,\n"
+    "                            local ushort2 * localushort2p,\n"
+    "                            local int2*localint2p,\n"
+    "                            local uint2 *localuint2p,\n"
+    "                            local float2*localfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n"
+    "                                     local uchar2* restrict localuchar2restrictp,\n"
+    "                                     local short2 * restrict localshort2restrictp,\n"
+    "                                     local ushort2*restrict localushort2restrictp,\n"
+    "                                     local int2 *restrict localint2restrictp,\n"
+    "                                     local uint2* restrict localuint2restrictp,\n"
+    "                                     local float2 *restrict localfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n"
+    "                                  local const uchar2 * localconstuchar2p,\n"
+    "                                  local const short2*localconstshort2p,\n"
+    "                                  local const ushort2 *localconstushort2p,\n"
+    "                                  local const int2* localconstint2p,\n"
+    "                                  local const uint2 * localconstuint2p,\n"
+    "                                  local const float2* localconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n"
+    "                                           local const uchar2*restrict localconstuchar2restrictp,\n"
+    "                                           local const short2 *restrict localconstshort2restrictp,\n"
+    "                                           local const ushort2* restrict localconstushort2restrictp,\n"
+    "                                           local const int2 * restrict localconstint2restrictp,\n"
+    "                                           local const uint2*restrict localconstuint2restrictp,\n"
+    "                                           local const float2 * restrict localconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n"
+    "                                     local volatile uchar2 *localvolatileuchar2p,\n"
+    "                                     local volatile short2* localvolatileshort2p,\n"
+    "                                     local volatile ushort2 * localvolatileushort2p,\n"
+    "                                     local volatile int2*localvolatileint2p,\n"
+    "                                     local volatile uint2 *localvolatileuint2p,\n"
+    "                                     local volatile float2*localvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n"
+    "                                              local volatile uchar2* restrict localvolatileuchar2restrictp,\n"
+    "                                              local volatile short2 * restrict localvolatileshort2restrictp,\n"
+    "                                              local volatile ushort2*restrict localvolatileushort2restrictp,\n"
+    "                                              local volatile int2 *restrict localvolatileint2restrictp,\n"
+    "                                              local volatile uint2* restrict localvolatileuint2restrictp,\n"
+    "                                              local volatile float2 *restrict localvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n"
+    "                                           local const volatile uchar2 * localconstvolatileuchar2p,\n"
+    "                                           local const volatile short2*localconstvolatileshort2p,\n"
+    "                                           local const volatile ushort2 *localconstvolatileushort2p,\n"
+    "                                           local const volatile int2* localconstvolatileint2p,\n"
+    "                                           local const volatile uint2 * localconstvolatileuint2p,\n"
+    "                                           local const volatile float2* localconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n"
+    "                                                    local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n"
+    "                                                    local const volatile short2 *restrict localconstvolatileshort2restrictp,\n"
+    "                                                    local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n"
+    "                                                    local const volatile int2 * restrict localconstvolatileint2restrictp,\n"
+    "                                                    local const volatile uint2*restrict localconstvolatileuint2restrictp,\n"
+    "                                                    local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector2_d(char2 char2d,\n"
+    "                      uchar2 uchar2d,\n"
+    "                      short2 short2d,\n"
+    "                      ushort2 ushort2d,\n"
+    "                      int2 int2d,\n"
+    "                      uint2 uint2d,\n"
+    "                      float2 float2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector2_d(const char2 constchar2d,\n"
+    "                            const uchar2 constuchar2d,\n"
+    "                            const short2 constshort2d,\n"
+    "                            const ushort2 constushort2d,\n"
+    "                            const int2 constint2d,\n"
+    "                            const uint2 constuint2d,\n"
+    "                            const float2 constfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector2_d(private char2 privatechar2d,\n"
+    "                              private uchar2 privateuchar2d,\n"
+    "                              private short2 privateshort2d,\n"
+    "                              private ushort2 privateushort2d,\n"
+    "                              private int2 privateint2d,\n"
+    "                              private uint2 privateuint2d,\n"
+    "                              private float2 privatefloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n"
+    "                                    private const uchar2 privateconstuchar2d,\n"
+    "                                    private const short2 privateconstshort2d,\n"
+    "                                    private const ushort2 privateconstushort2d,\n"
+    "                                    private const int2 privateconstint2d,\n"
+    "                                    private const uint2 privateconstuint2d,\n"
+    "                                    private const float2 privateconstfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p0(constant char3*constantchar3p,\n"
+    "                               constant uchar3 *constantuchar3p,\n"
+    "                               constant short3* constantshort3p,\n"
+    "                               constant ushort3 * constantushort3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p1(constant int3*constantint3p,\n"
+    "                               constant uint3 *constantuint3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n"
+    "                                        constant uchar3* restrict constantuchar3restrictp,\n"
+    "                                        constant short3 * restrict constantshort3restrictp,\n"
+    "                                        constant ushort3*restrict constantushort3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n"
+    "                                        constant uint3* restrict constantuint3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_p(global char3*globalchar3p,\n"
+    "                             global uchar3 *globaluchar3p,\n"
+    "                             global short3* globalshort3p,\n"
+    "                             global ushort3 * globalushort3p,\n"
+    "                             global int3*globalint3p,\n"
+    "                             global uint3 *globaluint3p,\n"
+    "                             global float3*globalfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n"
+    "                                      global uchar3* restrict globaluchar3restrictp,\n"
+    "                                      global short3 * restrict globalshort3restrictp,\n"
+    "                                      global ushort3*restrict globalushort3restrictp,\n"
+    "                                      global int3 *restrict globalint3restrictp,\n"
+    "                                      global uint3* restrict globaluint3restrictp,\n"
+    "                                      global float3 *restrict globalfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n"
+    "                                   global const uchar3 * globalconstuchar3p,\n"
+    "                                   global const short3*globalconstshort3p,\n"
+    "                                   global const ushort3 *globalconstushort3p,\n"
+    "                                   global const int3* globalconstint3p,\n"
+    "                                   global const uint3 * globalconstuint3p,\n"
+    "                                   global const float3* globalconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n"
+    "                                            global const uchar3*restrict globalconstuchar3restrictp,\n"
+    "                                            global const short3 *restrict globalconstshort3restrictp,\n"
+    "                                            global const ushort3* restrict globalconstushort3restrictp,\n"
+    "                                            global const int3 * restrict globalconstint3restrictp,\n"
+    "                                            global const uint3*restrict globalconstuint3restrictp,\n"
+    "                                            global const float3 * restrict globalconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n"
+    "                                      global volatile uchar3 *globalvolatileuchar3p,\n"
+    "                                      global volatile short3* globalvolatileshort3p,\n"
+    "                                      global volatile ushort3 * globalvolatileushort3p,\n"
+    "                                      global volatile int3*globalvolatileint3p,\n"
+    "                                      global volatile uint3 *globalvolatileuint3p,\n"
+    "                                      global volatile float3*globalvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n"
+    "                                               global volatile uchar3* restrict globalvolatileuchar3restrictp,\n"
+    "                                               global volatile short3 * restrict globalvolatileshort3restrictp,\n"
+    "                                               global volatile ushort3*restrict globalvolatileushort3restrictp,\n"
+    "                                               global volatile int3 *restrict globalvolatileint3restrictp,\n"
+    "                                               global volatile uint3* restrict globalvolatileuint3restrictp,\n"
+    "                                               global volatile float3 *restrict globalvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n"
+    "                                            global const volatile uchar3 * globalconstvolatileuchar3p,\n"
+    "                                            global const volatile short3*globalconstvolatileshort3p,\n"
+    "                                            global const volatile ushort3 *globalconstvolatileushort3p,\n"
+    "                                            global const volatile int3* globalconstvolatileint3p,\n"
+    "                                            global const volatile uint3 * globalconstvolatileuint3p,\n"
+    "                                            global const volatile float3* globalconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n"
+    "                                                     global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n"
+    "                                                     global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n"
+    "                                                     global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n"
+    "                                                     global const volatile int3 * restrict globalconstvolatileint3restrictp,\n"
+    "                                                     global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n"
+    "                                                     global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_p(local char3*localchar3p,\n"
+    "                            local uchar3 *localuchar3p,\n"
+    "                            local short3* localshort3p,\n"
+    "                            local ushort3 * localushort3p,\n"
+    "                            local int3*localint3p,\n"
+    "                            local uint3 *localuint3p,\n"
+    "                            local float3*localfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n"
+    "                                     local uchar3* restrict localuchar3restrictp,\n"
+    "                                     local short3 * restrict localshort3restrictp,\n"
+    "                                     local ushort3*restrict localushort3restrictp,\n"
+    "                                     local int3 *restrict localint3restrictp,\n"
+    "                                     local uint3* restrict localuint3restrictp,\n"
+    "                                     local float3 *restrict localfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n"
+    "                                  local const uchar3 * localconstuchar3p,\n"
+    "                                  local const short3*localconstshort3p,\n"
+    "                                  local const ushort3 *localconstushort3p,\n"
+    "                                  local const int3* localconstint3p,\n"
+    "                                  local const uint3 * localconstuint3p,\n"
+    "                                  local const float3* localconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n"
+    "                                           local const uchar3*restrict localconstuchar3restrictp,\n"
+    "                                           local const short3 *restrict localconstshort3restrictp,\n"
+    "                                           local const ushort3* restrict localconstushort3restrictp,\n"
+    "                                           local const int3 * restrict localconstint3restrictp,\n"
+    "                                           local const uint3*restrict localconstuint3restrictp,\n"
+    "                                           local const float3 * restrict localconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n"
+    "                                     local volatile uchar3 *localvolatileuchar3p,\n"
+    "                                     local volatile short3* localvolatileshort3p,\n"
+    "                                     local volatile ushort3 * localvolatileushort3p,\n"
+    "                                     local volatile int3*localvolatileint3p,\n"
+    "                                     local volatile uint3 *localvolatileuint3p,\n"
+    "                                     local volatile float3*localvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n"
+    "                                              local volatile uchar3* restrict localvolatileuchar3restrictp,\n"
+    "                                              local volatile short3 * restrict localvolatileshort3restrictp,\n"
+    "                                              local volatile ushort3*restrict localvolatileushort3restrictp,\n"
+    "                                              local volatile int3 *restrict localvolatileint3restrictp,\n"
+    "                                              local volatile uint3* restrict localvolatileuint3restrictp,\n"
+    "                                              local volatile float3 *restrict localvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n"
+    "                                           local const volatile uchar3 * localconstvolatileuchar3p,\n"
+    "                                           local const volatile short3*localconstvolatileshort3p,\n"
+    "                                           local const volatile ushort3 *localconstvolatileushort3p,\n"
+    "                                           local const volatile int3* localconstvolatileint3p,\n"
+    "                                           local const volatile uint3 * localconstvolatileuint3p,\n"
+    "                                           local const volatile float3* localconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n"
+    "                                                    local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n"
+    "                                                    local const volatile short3 *restrict localconstvolatileshort3restrictp,\n"
+    "                                                    local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n"
+    "                                                    local const volatile int3 * restrict localconstvolatileint3restrictp,\n"
+    "                                                    local const volatile uint3*restrict localconstvolatileuint3restrictp,\n"
+    "                                                    local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector3_d(char3 char3d,\n"
+    "                      uchar3 uchar3d,\n"
+    "                      short3 short3d,\n"
+    "                      ushort3 ushort3d,\n"
+    "                      int3 int3d,\n"
+    "                      uint3 uint3d,\n"
+    "                      float3 float3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector3_d(const char3 constchar3d,\n"
+    "                            const uchar3 constuchar3d,\n"
+    "                            const short3 constshort3d,\n"
+    "                            const ushort3 constushort3d,\n"
+    "                            const int3 constint3d,\n"
+    "                            const uint3 constuint3d,\n"
+    "                            const float3 constfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector3_d(private char3 privatechar3d,\n"
+    "                              private uchar3 privateuchar3d,\n"
+    "                              private short3 privateshort3d,\n"
+    "                              private ushort3 privateushort3d,\n"
+    "                              private int3 privateint3d,\n"
+    "                              private uint3 privateuint3d,\n"
+    "                              private float3 privatefloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n"
+    "                                    private const uchar3 privateconstuchar3d,\n"
+    "                                    private const short3 privateconstshort3d,\n"
+    "                                    private const ushort3 privateconstushort3d,\n"
+    "                                    private const int3 privateconstint3d,\n"
+    "                                    private const uint3 privateconstuint3d,\n"
+    "                                    private const float3 privateconstfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p0(constant char4*constantchar4p,\n"
+    "                               constant uchar4 *constantuchar4p,\n"
+    "                               constant short4* constantshort4p,\n"
+    "                               constant ushort4 * constantushort4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p1(constant int4*constantint4p,\n"
+    "                               constant uint4 *constantuint4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n"
+    "                                        constant uchar4* restrict constantuchar4restrictp,\n"
+    "                                        constant short4 * restrict constantshort4restrictp,\n"
+    "                                        constant ushort4*restrict constantushort4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n"
+    "                                        constant uint4* restrict constantuint4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_p(global char4*globalchar4p,\n"
+    "                             global uchar4 *globaluchar4p,\n"
+    "                             global short4* globalshort4p,\n"
+    "                             global ushort4 * globalushort4p,\n"
+    "                             global int4*globalint4p,\n"
+    "                             global uint4 *globaluint4p,\n"
+    "                             global float4*globalfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n"
+    "                                      global uchar4* restrict globaluchar4restrictp,\n"
+    "                                      global short4 * restrict globalshort4restrictp,\n"
+    "                                      global ushort4*restrict globalushort4restrictp,\n"
+    "                                      global int4 *restrict globalint4restrictp,\n"
+    "                                      global uint4* restrict globaluint4restrictp,\n"
+    "                                      global float4 *restrict globalfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n"
+    "                                   global const uchar4 * globalconstuchar4p,\n"
+    "                                   global const short4*globalconstshort4p,\n"
+    "                                   global const ushort4 *globalconstushort4p,\n"
+    "                                   global const int4* globalconstint4p,\n"
+    "                                   global const uint4 * globalconstuint4p,\n"
+    "                                   global const float4* globalconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n"
+    "                                            global const uchar4*restrict globalconstuchar4restrictp,\n"
+    "                                            global const short4 *restrict globalconstshort4restrictp,\n"
+    "                                            global const ushort4* restrict globalconstushort4restrictp,\n"
+    "                                            global const int4 * restrict globalconstint4restrictp,\n"
+    "                                            global const uint4*restrict globalconstuint4restrictp,\n"
+    "                                            global const float4 * restrict globalconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n"
+    "                                      global volatile uchar4 *globalvolatileuchar4p,\n"
+    "                                      global volatile short4* globalvolatileshort4p,\n"
+    "                                      global volatile ushort4 * globalvolatileushort4p,\n"
+    "                                      global volatile int4*globalvolatileint4p,\n"
+    "                                      global volatile uint4 *globalvolatileuint4p,\n"
+    "                                      global volatile float4*globalvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n"
+    "                                               global volatile uchar4* restrict globalvolatileuchar4restrictp,\n"
+    "                                               global volatile short4 * restrict globalvolatileshort4restrictp,\n"
+    "                                               global volatile ushort4*restrict globalvolatileushort4restrictp,\n"
+    "                                               global volatile int4 *restrict globalvolatileint4restrictp,\n"
+    "                                               global volatile uint4* restrict globalvolatileuint4restrictp,\n"
+    "                                               global volatile float4 *restrict globalvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n"
+    "                                            global const volatile uchar4 * globalconstvolatileuchar4p,\n"
+    "                                            global const volatile short4*globalconstvolatileshort4p,\n"
+    "                                            global const volatile ushort4 *globalconstvolatileushort4p,\n"
+    "                                            global const volatile int4* globalconstvolatileint4p,\n"
+    "                                            global const volatile uint4 * globalconstvolatileuint4p,\n"
+    "                                            global const volatile float4* globalconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n"
+    "                                                     global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n"
+    "                                                     global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n"
+    "                                                     global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n"
+    "                                                     global const volatile int4 * restrict globalconstvolatileint4restrictp,\n"
+    "                                                     global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n"
+    "                                                     global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_p(local char4*localchar4p,\n"
+    "                            local uchar4 *localuchar4p,\n"
+    "                            local short4* localshort4p,\n"
+    "                            local ushort4 * localushort4p,\n"
+    "                            local int4*localint4p,\n"
+    "                            local uint4 *localuint4p,\n"
+    "                            local float4*localfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n"
+    "                                     local uchar4* restrict localuchar4restrictp,\n"
+    "                                     local short4 * restrict localshort4restrictp,\n"
+    "                                     local ushort4*restrict localushort4restrictp,\n"
+    "                                     local int4 *restrict localint4restrictp,\n"
+    "                                     local uint4* restrict localuint4restrictp,\n"
+    "                                     local float4 *restrict localfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n"
+    "                                  local const uchar4 * localconstuchar4p,\n"
+    "                                  local const short4*localconstshort4p,\n"
+    "                                  local const ushort4 *localconstushort4p,\n"
+    "                                  local const int4* localconstint4p,\n"
+    "                                  local const uint4 * localconstuint4p,\n"
+    "                                  local const float4* localconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n"
+    "                                           local const uchar4*restrict localconstuchar4restrictp,\n"
+    "                                           local const short4 *restrict localconstshort4restrictp,\n"
+    "                                           local const ushort4* restrict localconstushort4restrictp,\n"
+    "                                           local const int4 * restrict localconstint4restrictp,\n"
+    "                                           local const uint4*restrict localconstuint4restrictp,\n"
+    "                                           local const float4 * restrict localconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n"
+    "                                     local volatile uchar4 *localvolatileuchar4p,\n"
+    "                                     local volatile short4* localvolatileshort4p,\n"
+    "                                     local volatile ushort4 * localvolatileushort4p,\n"
+    "                                     local volatile int4*localvolatileint4p,\n"
+    "                                     local volatile uint4 *localvolatileuint4p,\n"
+    "                                     local volatile float4*localvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n"
+    "                                              local volatile uchar4* restrict localvolatileuchar4restrictp,\n"
+    "                                              local volatile short4 * restrict localvolatileshort4restrictp,\n"
+    "                                              local volatile ushort4*restrict localvolatileushort4restrictp,\n"
+    "                                              local volatile int4 *restrict localvolatileint4restrictp,\n"
+    "                                              local volatile uint4* restrict localvolatileuint4restrictp,\n"
+    "                                              local volatile float4 *restrict localvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n"
+    "                                           local const volatile uchar4 * localconstvolatileuchar4p,\n"
+    "                                           local const volatile short4*localconstvolatileshort4p,\n"
+    "                                           local const volatile ushort4 *localconstvolatileushort4p,\n"
+    "                                           local const volatile int4* localconstvolatileint4p,\n"
+    "                                           local const volatile uint4 * localconstvolatileuint4p,\n"
+    "                                           local const volatile float4* localconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n"
+    "                                                    local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n"
+    "                                                    local const volatile short4 *restrict localconstvolatileshort4restrictp,\n"
+    "                                                    local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n"
+    "                                                    local const volatile int4 * restrict localconstvolatileint4restrictp,\n"
+    "                                                    local const volatile uint4*restrict localconstvolatileuint4restrictp,\n"
+    "                                                    local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector4_d(char4 char4d,\n"
+    "                      uchar4 uchar4d,\n"
+    "                      short4 short4d,\n"
+    "                      ushort4 ushort4d,\n"
+    "                      int4 int4d,\n"
+    "                      uint4 uint4d,\n"
+    "                      float4 float4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector4_d(const char4 constchar4d,\n"
+    "                            const uchar4 constuchar4d,\n"
+    "                            const short4 constshort4d,\n"
+    "                            const ushort4 constushort4d,\n"
+    "                            const int4 constint4d,\n"
+    "                            const uint4 constuint4d,\n"
+    "                            const float4 constfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector4_d(private char4 privatechar4d,\n"
+    "                              private uchar4 privateuchar4d,\n"
+    "                              private short4 privateshort4d,\n"
+    "                              private ushort4 privateushort4d,\n"
+    "                              private int4 privateint4d,\n"
+    "                              private uint4 privateuint4d,\n"
+    "                              private float4 privatefloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n"
+    "                                    private const uchar4 privateconstuchar4d,\n"
+    "                                    private const short4 privateconstshort4d,\n"
+    "                                    private const ushort4 privateconstushort4d,\n"
+    "                                    private const int4 privateconstint4d,\n"
+    "                                    private const uint4 privateconstuint4d,\n"
+    "                                    private const float4 privateconstfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p0(constant char8*constantchar8p,\n"
+    "                               constant uchar8 *constantuchar8p,\n"
+    "                               constant short8* constantshort8p,\n"
+    "                               constant ushort8 * constantushort8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p1(constant int8*constantint8p,\n"
+    "                               constant uint8 *constantuint8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n"
+    "                                        constant uchar8* restrict constantuchar8restrictp,\n"
+    "                                        constant short8 * restrict constantshort8restrictp,\n"
+    "                                        constant ushort8*restrict constantushort8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n"
+    "                                        constant uint8* restrict constantuint8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_p(global char8*globalchar8p,\n"
+    "                             global uchar8 *globaluchar8p,\n"
+    "                             global short8* globalshort8p,\n"
+    "                             global ushort8 * globalushort8p,\n"
+    "                             global int8*globalint8p,\n"
+    "                             global uint8 *globaluint8p,\n"
+    "                             global float8*globalfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n"
+    "                                      global uchar8* restrict globaluchar8restrictp,\n"
+    "                                      global short8 * restrict globalshort8restrictp,\n"
+    "                                      global ushort8*restrict globalushort8restrictp,\n"
+    "                                      global int8 *restrict globalint8restrictp,\n"
+    "                                      global uint8* restrict globaluint8restrictp,\n"
+    "                                      global float8 *restrict globalfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n"
+    "                                   global const uchar8 * globalconstuchar8p,\n"
+    "                                   global const short8*globalconstshort8p,\n"
+    "                                   global const ushort8 *globalconstushort8p,\n"
+    "                                   global const int8* globalconstint8p,\n"
+    "                                   global const uint8 * globalconstuint8p,\n"
+    "                                   global const float8* globalconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n"
+    "                                            global const uchar8*restrict globalconstuchar8restrictp,\n"
+    "                                            global const short8 *restrict globalconstshort8restrictp,\n"
+    "                                            global const ushort8* restrict globalconstushort8restrictp,\n"
+    "                                            global const int8 * restrict globalconstint8restrictp,\n"
+    "                                            global const uint8*restrict globalconstuint8restrictp,\n"
+    "                                            global const float8 * restrict globalconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n"
+    "                                      global volatile uchar8 *globalvolatileuchar8p,\n"
+    "                                      global volatile short8* globalvolatileshort8p,\n"
+    "                                      global volatile ushort8 * globalvolatileushort8p,\n"
+    "                                      global volatile int8*globalvolatileint8p,\n"
+    "                                      global volatile uint8 *globalvolatileuint8p,\n"
+    "                                      global volatile float8*globalvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n"
+    "                                               global volatile uchar8* restrict globalvolatileuchar8restrictp,\n"
+    "                                               global volatile short8 * restrict globalvolatileshort8restrictp,\n"
+    "                                               global volatile ushort8*restrict globalvolatileushort8restrictp,\n"
+    "                                               global volatile int8 *restrict globalvolatileint8restrictp,\n"
+    "                                               global volatile uint8* restrict globalvolatileuint8restrictp,\n"
+    "                                               global volatile float8 *restrict globalvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n"
+    "                                            global const volatile uchar8 * globalconstvolatileuchar8p,\n"
+    "                                            global const volatile short8*globalconstvolatileshort8p,\n"
+    "                                            global const volatile ushort8 *globalconstvolatileushort8p,\n"
+    "                                            global const volatile int8* globalconstvolatileint8p,\n"
+    "                                            global const volatile uint8 * globalconstvolatileuint8p,\n"
+    "                                            global const volatile float8* globalconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n"
+    "                                                     global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n"
+    "                                                     global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n"
+    "                                                     global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n"
+    "                                                     global const volatile int8 * restrict globalconstvolatileint8restrictp,\n"
+    "                                                     global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n"
+    "                                                     global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_p(local char8*localchar8p,\n"
+    "                            local uchar8 *localuchar8p,\n"
+    "                            local short8* localshort8p,\n"
+    "                            local ushort8 * localushort8p,\n"
+    "                            local int8*localint8p,\n"
+    "                            local uint8 *localuint8p,\n"
+    "                            local float8*localfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n"
+    "                                     local uchar8* restrict localuchar8restrictp,\n"
+    "                                     local short8 * restrict localshort8restrictp,\n"
+    "                                     local ushort8*restrict localushort8restrictp,\n"
+    "                                     local int8 *restrict localint8restrictp,\n"
+    "                                     local uint8* restrict localuint8restrictp,\n"
+    "                                     local float8 *restrict localfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n"
+    "                                  local const uchar8 * localconstuchar8p,\n"
+    "                                  local const short8*localconstshort8p,\n"
+    "                                  local const ushort8 *localconstushort8p,\n"
+    "                                  local const int8* localconstint8p,\n"
+    "                                  local const uint8 * localconstuint8p,\n"
+    "                                  local const float8* localconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n"
+    "                                           local const uchar8*restrict localconstuchar8restrictp,\n"
+    "                                           local const short8 *restrict localconstshort8restrictp,\n"
+    "                                           local const ushort8* restrict localconstushort8restrictp,\n"
+    "                                           local const int8 * restrict localconstint8restrictp,\n"
+    "                                           local const uint8*restrict localconstuint8restrictp,\n"
+    "                                           local const float8 * restrict localconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n"
+    "                                     local volatile uchar8 *localvolatileuchar8p,\n"
+    "                                     local volatile short8* localvolatileshort8p,\n"
+    "                                     local volatile ushort8 * localvolatileushort8p,\n"
+    "                                     local volatile int8*localvolatileint8p,\n"
+    "                                     local volatile uint8 *localvolatileuint8p,\n"
+    "                                     local volatile float8*localvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n"
+    "                                              local volatile uchar8* restrict localvolatileuchar8restrictp,\n"
+    "                                              local volatile short8 * restrict localvolatileshort8restrictp,\n"
+    "                                              local volatile ushort8*restrict localvolatileushort8restrictp,\n"
+    "                                              local volatile int8 *restrict localvolatileint8restrictp,\n"
+    "                                              local volatile uint8* restrict localvolatileuint8restrictp,\n"
+    "                                              local volatile float8 *restrict localvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n"
+    "                                           local const volatile uchar8 * localconstvolatileuchar8p,\n"
+    "                                           local const volatile short8*localconstvolatileshort8p,\n"
+    "                                           local const volatile ushort8 *localconstvolatileushort8p,\n"
+    "                                           local const volatile int8* localconstvolatileint8p,\n"
+    "                                           local const volatile uint8 * localconstvolatileuint8p,\n"
+    "                                           local const volatile float8* localconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n"
+    "                                                    local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n"
+    "                                                    local const volatile short8 *restrict localconstvolatileshort8restrictp,\n"
+    "                                                    local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n"
+    "                                                    local const volatile int8 * restrict localconstvolatileint8restrictp,\n"
+    "                                                    local const volatile uint8*restrict localconstvolatileuint8restrictp,\n"
+    "                                                    local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector8_d(char8 char8d,\n"
+    "                      uchar8 uchar8d,\n"
+    "                      short8 short8d,\n"
+    "                      ushort8 ushort8d,\n"
+    "                      int8 int8d,\n"
+    "                      uint8 uint8d,\n"
+    "                      float8 float8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector8_d(const char8 constchar8d,\n"
+    "                            const uchar8 constuchar8d,\n"
+    "                            const short8 constshort8d,\n"
+    "                            const ushort8 constushort8d,\n"
+    "                            const int8 constint8d,\n"
+    "                            const uint8 constuint8d,\n"
+    "                            const float8 constfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector8_d(private char8 privatechar8d,\n"
+    "                              private uchar8 privateuchar8d,\n"
+    "                              private short8 privateshort8d,\n"
+    "                              private ushort8 privateushort8d,\n"
+    "                              private int8 privateint8d,\n"
+    "                              private uint8 privateuint8d,\n"
+    "                              private float8 privatefloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n"
+    "                                    private const uchar8 privateconstuchar8d,\n"
+    "                                    private const short8 privateconstshort8d,\n"
+    "                                    private const ushort8 privateconstushort8d,\n"
+    "                                    private const int8 privateconstint8d,\n"
+    "                                    private const uint8 privateconstuint8d,\n"
+    "                                    private const float8 privateconstfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p0(constant char16*constantchar16p,\n"
+    "                                constant uchar16 *constantuchar16p,\n"
+    "                                constant short16* constantshort16p,\n"
+    "                                constant ushort16 * constantushort16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p1(constant int16*constantint16p,\n"
+    "                                constant uint16 *constantuint16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n"
+    "                                         constant uchar16* restrict constantuchar16restrictp,\n"
+    "                                         constant short16 * restrict constantshort16restrictp,\n"
+    "                                         constant ushort16*restrict constantushort16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n"
+    "                                         constant uint16* restrict constantuint16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_p(global char16*globalchar16p,\n"
+    "                              global uchar16 *globaluchar16p,\n"
+    "                              global short16* globalshort16p,\n"
+    "                              global ushort16 * globalushort16p,\n"
+    "                              global int16*globalint16p,\n"
+    "                              global uint16 *globaluint16p,\n"
+    "                              global float16*globalfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n"
+    "                                       global uchar16* restrict globaluchar16restrictp,\n"
+    "                                       global short16 * restrict globalshort16restrictp,\n"
+    "                                       global ushort16*restrict globalushort16restrictp,\n"
+    "                                       global int16 *restrict globalint16restrictp,\n"
+    "                                       global uint16* restrict globaluint16restrictp,\n"
+    "                                       global float16 *restrict globalfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n"
+    "                                    global const uchar16 * globalconstuchar16p,\n"
+    "                                    global const short16*globalconstshort16p,\n"
+    "                                    global const ushort16 *globalconstushort16p,\n"
+    "                                    global const int16* globalconstint16p,\n"
+    "                                    global const uint16 * globalconstuint16p,\n"
+    "                                    global const float16* globalconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n"
+    "                                             global const uchar16*restrict globalconstuchar16restrictp,\n"
+    "                                             global const short16 *restrict globalconstshort16restrictp,\n"
+    "                                             global const ushort16* restrict globalconstushort16restrictp,\n"
+    "                                             global const int16 * restrict globalconstint16restrictp,\n"
+    "                                             global const uint16*restrict globalconstuint16restrictp,\n"
+    "                                             global const float16 * restrict globalconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n"
+    "                                       global volatile uchar16 *globalvolatileuchar16p,\n"
+    "                                       global volatile short16* globalvolatileshort16p,\n"
+    "                                       global volatile ushort16 * globalvolatileushort16p,\n"
+    "                                       global volatile int16*globalvolatileint16p,\n"
+    "                                       global volatile uint16 *globalvolatileuint16p,\n"
+    "                                       global volatile float16*globalvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n"
+    "                                                global volatile uchar16* restrict globalvolatileuchar16restrictp,\n"
+    "                                                global volatile short16 * restrict globalvolatileshort16restrictp,\n"
+    "                                                global volatile ushort16*restrict globalvolatileushort16restrictp,\n"
+    "                                                global volatile int16 *restrict globalvolatileint16restrictp,\n"
+    "                                                global volatile uint16* restrict globalvolatileuint16restrictp,\n"
+    "                                                global volatile float16 *restrict globalvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n"
+    "                                             global const volatile uchar16 * globalconstvolatileuchar16p,\n"
+    "                                             global const volatile short16*globalconstvolatileshort16p,\n"
+    "                                             global const volatile ushort16 *globalconstvolatileushort16p,\n"
+    "                                             global const volatile int16* globalconstvolatileint16p,\n"
+    "                                             global const volatile uint16 * globalconstvolatileuint16p,\n"
+    "                                             global const volatile float16* globalconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n"
+    "                                                      global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n"
+    "                                                      global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n"
+    "                                                      global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n"
+    "                                                      global const volatile int16 * restrict globalconstvolatileint16restrictp,\n"
+    "                                                      global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n"
+    "                                                      global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_p(local char16*localchar16p,\n"
+    "                             local uchar16 *localuchar16p,\n"
+    "                             local short16* localshort16p,\n"
+    "                             local ushort16 * localushort16p,\n"
+    "                             local int16*localint16p,\n"
+    "                             local uint16 *localuint16p,\n"
+    "                             local float16*localfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n"
+    "                                      local uchar16* restrict localuchar16restrictp,\n"
+    "                                      local short16 * restrict localshort16restrictp,\n"
+    "                                      local ushort16*restrict localushort16restrictp,\n"
+    "                                      local int16 *restrict localint16restrictp,\n"
+    "                                      local uint16* restrict localuint16restrictp,\n"
+    "                                      local float16 *restrict localfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n"
+    "                                   local const uchar16 * localconstuchar16p,\n"
+    "                                   local const short16*localconstshort16p,\n"
+    "                                   local const ushort16 *localconstushort16p,\n"
+    "                                   local const int16* localconstint16p,\n"
+    "                                   local const uint16 * localconstuint16p,\n"
+    "                                   local const float16* localconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n"
+    "                                            local const uchar16*restrict localconstuchar16restrictp,\n"
+    "                                            local const short16 *restrict localconstshort16restrictp,\n"
+    "                                            local const ushort16* restrict localconstushort16restrictp,\n"
+    "                                            local const int16 * restrict localconstint16restrictp,\n"
+    "                                            local const uint16*restrict localconstuint16restrictp,\n"
+    "                                            local const float16 * restrict localconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n"
+    "                                      local volatile uchar16 *localvolatileuchar16p,\n"
+    "                                      local volatile short16* localvolatileshort16p,\n"
+    "                                      local volatile ushort16 * localvolatileushort16p,\n"
+    "                                      local volatile int16*localvolatileint16p,\n"
+    "                                      local volatile uint16 *localvolatileuint16p,\n"
+    "                                      local volatile float16*localvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n"
+    "                                               local volatile uchar16* restrict localvolatileuchar16restrictp,\n"
+    "                                               local volatile short16 * restrict localvolatileshort16restrictp,\n"
+    "                                               local volatile ushort16*restrict localvolatileushort16restrictp,\n"
+    "                                               local volatile int16 *restrict localvolatileint16restrictp,\n"
+    "                                               local volatile uint16* restrict localvolatileuint16restrictp,\n"
+    "                                               local volatile float16 *restrict localvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n"
+    "                                            local const volatile uchar16 * localconstvolatileuchar16p,\n"
+    "                                            local const volatile short16*localconstvolatileshort16p,\n"
+    "                                            local const volatile ushort16 *localconstvolatileushort16p,\n"
+    "                                            local const volatile int16* localconstvolatileint16p,\n"
+    "                                            local const volatile uint16 * localconstvolatileuint16p,\n"
+    "                                            local const volatile float16* localconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n"
+    "                                                     local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n"
+    "                                                     local const volatile short16 *restrict localconstvolatileshort16restrictp,\n"
+    "                                                     local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n"
+    "                                                     local const volatile int16 * restrict localconstvolatileint16restrictp,\n"
+    "                                                     local const volatile uint16*restrict localconstvolatileuint16restrictp,\n"
+    "                                                     local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector16_d(char16 char16d,\n"
+    "                       uchar16 uchar16d,\n"
+    "                       short16 short16d,\n"
+    "                       ushort16 ushort16d,\n"
+    "                       int16 int16d,\n"
+    "                       uint16 uint16d,\n"
+    "                       float16 float16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector16_d(const char16 constchar16d,\n"
+    "                             const uchar16 constuchar16d,\n"
+    "                             const short16 constshort16d,\n"
+    "                             const ushort16 constushort16d,\n"
+    "                             const int16 constint16d,\n"
+    "                             const uint16 constuint16d,\n"
+    "                             const float16 constfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector16_d(private char16 privatechar16d,\n"
+    "                               private uchar16 privateuchar16d,\n"
+    "                               private short16 privateshort16d,\n"
+    "                               private ushort16 privateushort16d,\n"
+    "                               private int16 privateint16d,\n"
+    "                               private uint16 privateuint16d,\n"
+    "                               private float16 privatefloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n"
+    "                                     private const uchar16 privateconstuchar16d,\n"
+    "                                     private const short16 privateconstshort16d,\n"
+    "                                     private const ushort16 privateconstushort16d,\n"
+    "                                     private const int16 privateconstint16d,\n"
+    "                                     private const uint16 privateconstuint16d,\n"
+    "                                     private const float16 privateconstfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n"
+    "                               constant struct struct_type *constantstructstruct_typep,\n"
+    "                               constant typedef_struct_type* constanttypedef_struct_typep,\n"
+    "                               constant union union_type * constantunionunion_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n"
+    "                               constant enum enum_type *constantenumenum_typep,\n"
+    "                               constant typedef_enum_type* constanttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n"
+    "                                        constant struct struct_type*restrict constantstructstruct_typerestrictp,\n"
+    "                                        constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n"
+    "                                        constant union union_type* restrict constantunionunion_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n"
+    "                                        constant enum enum_type*restrict constantenumenum_typerestrictp,\n"
+    "                                        constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n"
+    "                             global struct struct_type *globalstructstruct_typep,\n"
+    "                             global typedef_struct_type* globaltypedef_struct_typep,\n"
+    "                             global union union_type * globalunionunion_typep,\n"
+    "                             global typedef_union_type*globaltypedef_union_typep,\n"
+    "                             global enum enum_type *globalenumenum_typep,\n"
+    "                             global typedef_enum_type* globaltypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n"
+    "                                      global struct struct_type*restrict globalstructstruct_typerestrictp,\n"
+    "                                      global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n"
+    "                                      global union union_type* restrict globalunionunion_typerestrictp,\n"
+    "                                      global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n"
+    "                                      global enum enum_type*restrict globalenumenum_typerestrictp,\n"
+    "                                      global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n"
+    "                                   global const struct struct_type * globalconststructstruct_typep,\n"
+    "                                   global const typedef_struct_type*globalconsttypedef_struct_typep,\n"
+    "                                   global const union union_type *globalconstunionunion_typep,\n"
+    "                                   global const typedef_union_type* globalconsttypedef_union_typep,\n"
+    "                                   global const enum enum_type * globalconstenumenum_typep,\n"
+    "                                   global const typedef_enum_type*globalconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n"
+    "                                            global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n"
+    "                                            global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n"
+    "                                            global const union union_type*restrict globalconstunionunion_typerestrictp,\n"
+    "                                            global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n"
+    "                                            global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n"
+    "                                            global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n"
+    "                                      global volatile struct struct_type *globalvolatilestructstruct_typep,\n"
+    "                                      global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n"
+    "                                      global volatile union union_type * globalvolatileunionunion_typep,\n"
+    "                                      global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n"
+    "                                      global volatile enum enum_type *globalvolatileenumenum_typep,\n"
+    "                                      global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n"
+    "                                               global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n"
+    "                                               global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n"
+    "                                               global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n"
+    "                                               global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n"
+    "                                               global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n"
+    "                                               global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n"
+    "                                            global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n"
+    "                                            global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n"
+    "                                            global const volatile union union_type *globalconstvolatileunionunion_typep,\n"
+    "                                            global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n"
+    "                                            global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n"
+    "                                            global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n"
+    "                                                     global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n"
+    "                                                     global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                     global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n"
+    "                                                     global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                     global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n"
+    "                                                     global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n"
+    "                            local struct struct_type *localstructstruct_typep,\n"
+    "                            local typedef_struct_type* localtypedef_struct_typep,\n"
+    "                            local union union_type * localunionunion_typep,\n"
+    "                            local typedef_union_type*localtypedef_union_typep,\n"
+    "                            local enum enum_type *localenumenum_typep,\n"
+    "                            local typedef_enum_type* localtypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n"
+    "                                     local struct struct_type*restrict localstructstruct_typerestrictp,\n"
+    "                                     local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n"
+    "                                     local union union_type* restrict localunionunion_typerestrictp,\n"
+    "                                     local typedef_union_type * restrict localtypedef_union_typerestrictp,\n"
+    "                                     local enum enum_type*restrict localenumenum_typerestrictp,\n"
+    "                                     local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n"
+    "                                  local const struct struct_type * localconststructstruct_typep,\n"
+    "                                  local const typedef_struct_type*localconsttypedef_struct_typep,\n"
+    "                                  local const union union_type *localconstunionunion_typep,\n"
+    "                                  local const typedef_union_type* localconsttypedef_union_typep,\n"
+    "                                  local const enum enum_type * localconstenumenum_typep,\n"
+    "                                  local const typedef_enum_type*localconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n"
+    "                                           local const struct struct_type* restrict localconststructstruct_typerestrictp,\n"
+    "                                           local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n"
+    "                                           local const union union_type*restrict localconstunionunion_typerestrictp,\n"
+    "                                           local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n"
+    "                                           local const enum enum_type* restrict localconstenumenum_typerestrictp,\n"
+    "                                           local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n"
+    "                                     local volatile struct struct_type *localvolatilestructstruct_typep,\n"
+    "                                     local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n"
+    "                                     local volatile union union_type * localvolatileunionunion_typep,\n"
+    "                                     local volatile typedef_union_type*localvolatiletypedef_union_typep,\n"
+    "                                     local volatile enum enum_type *localvolatileenumenum_typep,\n"
+    "                                     local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n"
+    "                                              local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n"
+    "                                              local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n"
+    "                                              local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n"
+    "                                              local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n"
+    "                                              local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n"
+    "                                              local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n"
+    "                                           local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n"
+    "                                           local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n"
+    "                                           local const volatile union union_type *localconstvolatileunionunion_typep,\n"
+    "                                           local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n"
+    "                                           local const volatile enum enum_type * localconstvolatileenumenum_typep,\n"
+    "                                           local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n"
+    "                                                    local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n"
+    "                                                    local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                    local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n"
+    "                                                    local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                    local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n"
+    "                                                    local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void derived_d(typedef_type typedef_typed,\n"
+    "                      struct struct_type structstruct_typed,\n"
+    "                      typedef_struct_type typedef_struct_typed,\n"
+    "                      union union_type unionunion_typed,\n"
+    "                      typedef_union_type typedef_union_typed,\n"
+    "                      enum enum_type enumenum_typed,\n"
+    "                      typedef_enum_type typedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_derived_d(const typedef_type consttypedef_typed,\n"
+    "                            const struct struct_type conststructstruct_typed,\n"
+    "                            const typedef_struct_type consttypedef_struct_typed,\n"
+    "                            const union union_type constunionunion_typed,\n"
+    "                            const typedef_union_type consttypedef_union_typed,\n"
+    "                            const enum enum_type constenumenum_typed,\n"
+    "                            const typedef_enum_type consttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n"
+    "                              private struct struct_type privatestructstruct_typed,\n"
+    "                              private typedef_struct_type privatetypedef_struct_typed,\n"
+    "                              private union union_type privateunionunion_typed,\n"
+    "                              private typedef_union_type privatetypedef_union_typed,\n"
+    "                              private enum enum_type privateenumenum_typed,\n"
+    "                              private typedef_enum_type privatetypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n"
+    "                                    private const struct struct_type privateconststructstruct_typed,\n"
+    "                                    private const typedef_struct_type privateconsttypedef_struct_typed,\n"
+    "                                    private const union union_type privateconstunionunion_typed,\n"
+    "                                    private const typedef_union_type privateconsttypedef_union_typed,\n"
+    "                                    private const enum enum_type privateconstenumenum_typed,\n"
+    "                                    private const typedef_enum_type privateconsttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * required_arg_info[][72] = {
+    // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+    {
+        "constant_scalar_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp",
+        NULL
+  },
+  {
+        "constant_scalar_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp",
+        NULL
+  },
+  {
+        "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp",
+        NULL
+  },
+  {
+        "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp",
+        NULL
+    },
+    {
+        "constant_scalar_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd",
+        NULL
+    },
+    {
+        "constant_vector2_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p",
+        NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p",
+        NULL
+    },
+    {
+        "constant_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d",
+        NULL
+    },
+    {
+        "constant_vector3_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p",
+        NULL
+    },
+    {
+        "constant_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d",
+        NULL
+    },
+    {
+        "constant_vector4_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p",
+        NULL
+    },
+    {
+        "constant_vector4_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d",
+        NULL
+    },
+    {
+        "constant_vector8_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p",
+    NULL
+    },
+    {
+        "constant_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp",
+    NULL
+    },
+    {
+        "constant_vector8_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d",
+        NULL
+    },
+    {
+        "constant_vector16_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p",
+        NULL
+    },
+    {
+        "constant_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d",
+        NULL
+    },
+    {
+        "constant_derived_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep",
+        NULL
+    },
+    {
+        "constant_derived_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed",
+        NULL
+    },
+    {
+        "const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed",
+        NULL
+    },
+};
+
+// Support for optional image data type
+const char * image_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
+    "kernel void image_d(read_only image2d_t image2d_td0,\n"
+    "                    write_only image2d_t image2d_td1,\n"
+    "                    read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3,\n"
+    "                    read_only image2d_array_t image2d_array_td4,\n"
+    "                    write_only image2d_array_t image2d_array_td5,\n"
+    "                    read_only image1d_t image1d_td6,\n"
+    "                    write_only image1d_t image1d_td7,\n"
+    "                    read_only image1d_buffer_t image1d_buffer_td8,\n"
+    "                    write_only image1d_buffer_t image1d_buffer_td9,\n"
+    "                    read_only image1d_array_t image1d_array_td10,\n"
+    "                    write_only image1d_array_t image1d_array_td11,\n"
+    "                    sampler_t sampler_td12)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * image_arg_info[][67] = {
+    {
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12",
+        NULL
+    },
+};
+
+// Support for optional double data type
+const char * double_kernel_args[] = {
+    "kernel void double_scalar_p(constant double*constantdoublep,\n"
+    "                            constant double *restrict constantdoublerestrictp,\n"
+    "                            global double*globaldoublep,\n"
+    "                            global double *restrict globaldoublerestrictp,\n"
+    "                            global const double* globalconstdoublep,\n"
+    "                            global const double * restrict globalconstdoublerestrictp,\n"
+    "                            global volatile double*globalvolatiledoublep,\n"
+    "                            global volatile double *restrict globalvolatiledoublerestrictp,\n"
+    "                            global const volatile double* globalconstvolatiledoublep)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n"
+    "                             local double*localdoublep,\n"
+    "                             local double *restrict localdoublerestrictp,\n"
+    "                             local const double* localconstdoublep,\n"
+    "                             local const double * restrict localconstdoublerestrictp,\n"
+    "                             local volatile double*localvolatiledoublep,\n"
+    "                             local volatile double *restrict localvolatiledoublerestrictp,\n"
+    "                             local const volatile double* localconstvolatiledoublep,\n"
+    "                             local const volatile double * restrict localconstvolatiledoublerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_d(double doubled,\n"
+    "                            const double constdoubled,\n"
+    "                            private double privatedoubled,\n"
+    "                            private const double privateconstdoubled)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p(constant double2*constantdouble2p,\n"
+    "                             constant double2 *restrict constantdouble2restrictp,\n"
+    "                             global double2*globaldouble2p,\n"
+    "                             global double2 *restrict globaldouble2restrictp,\n"
+    "                             global const double2* globalconstdouble2p,\n"
+    "                             global const double2 * restrict globalconstdouble2restrictp,\n"
+    "                             global volatile double2*globalvolatiledouble2p,\n"
+    "                             global volatile double2 *restrict globalvolatiledouble2restrictp,\n"
+    "                             global const volatile double2* globalconstvolatiledouble2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n"
+    "                              local double2*localdouble2p,\n"
+    "                              local double2 *restrict localdouble2restrictp,\n"
+    "                              local const double2* localconstdouble2p,\n"
+    "                              local const double2 * restrict localconstdouble2restrictp,\n"
+    "                              local volatile double2*localvolatiledouble2p,\n"
+    "                              local volatile double2 *restrict localvolatiledouble2restrictp,\n"
+    "                              local const volatile double2* localconstvolatiledouble2p,\n"
+    "                              local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_d(double2 double2d,\n"
+    "                             const double2 constdouble2d,\n"
+    "                             private double2 privatedouble2d,\n"
+    "                             private const double2 privateconstdouble2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p(constant double3*constantdouble3p,\n"
+    "                             constant double3 *restrict constantdouble3restrictp,\n"
+    "                             global double3*globaldouble3p,\n"
+    "                             global double3 *restrict globaldouble3restrictp,\n"
+    "                             global const double3* globalconstdouble3p,\n"
+    "                             global const double3 * restrict globalconstdouble3restrictp,\n"
+    "                             global volatile double3*globalvolatiledouble3p,\n"
+    "                             global volatile double3 *restrict globalvolatiledouble3restrictp,\n"
+    "                             global const volatile double3* globalconstvolatiledouble3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n"
+    "                              local double3*localdouble3p,\n"
+    "                              local double3 *restrict localdouble3restrictp,\n"
+    "                              local const double3* localconstdouble3p,\n"
+    "                              local const double3 * restrict localconstdouble3restrictp,\n"
+    "                              local volatile double3*localvolatiledouble3p,\n"
+    "                              local volatile double3 *restrict localvolatiledouble3restrictp,\n"
+    "                              local const volatile double3* localconstvolatiledouble3p,\n"
+    "                              local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_d(double3 double3d,\n"
+    "                             const double3 constdouble3d,\n"
+    "                             private double3 privatedouble3d,\n"
+    "                             private const double3 privateconstdouble3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p(constant double4*constantdouble4p,\n"
+    "                             constant double4 *restrict constantdouble4restrictp,\n"
+    "                             global double4*globaldouble4p,\n"
+    "                             global double4 *restrict globaldouble4restrictp,\n"
+    "                             global const double4* globalconstdouble4p,\n"
+    "                             global const double4 * restrict globalconstdouble4restrictp,\n"
+    "                             global volatile double4*globalvolatiledouble4p,\n"
+    "                             global volatile double4 *restrict globalvolatiledouble4restrictp,\n"
+    "                             global const volatile double4* globalconstvolatiledouble4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n"
+    "                              local double4*localdouble4p,\n"
+    "                              local double4 *restrict localdouble4restrictp,\n"
+    "                              local const double4* localconstdouble4p,\n"
+    "                              local const double4 * restrict localconstdouble4restrictp,\n"
+    "                              local volatile double4*localvolatiledouble4p,\n"
+    "                              local volatile double4 *restrict localvolatiledouble4restrictp,\n"
+    "                              local const volatile double4* localconstvolatiledouble4p,\n"
+    "                              local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_d(double4 double4d,\n"
+    "                             const double4 constdouble4d,\n"
+    "                             private double4 privatedouble4d,\n"
+    "                             private const double4 privateconstdouble4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p(constant double8*constantdouble8p,\n"
+    "                             constant double8 *restrict constantdouble8restrictp,\n"
+    "                             global double8*globaldouble8p,\n"
+    "                             global double8 *restrict globaldouble8restrictp,\n"
+    "                             global const double8* globalconstdouble8p,\n"
+    "                             global const double8 * restrict globalconstdouble8restrictp,\n"
+    "                             global volatile double8*globalvolatiledouble8p,\n"
+    "                             global volatile double8 *restrict globalvolatiledouble8restrictp,\n"
+    "                             global const volatile double8* globalconstvolatiledouble8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n"
+    "                              local double8*localdouble8p,\n"
+    "                              local double8 *restrict localdouble8restrictp,\n"
+    "                              local const double8* localconstdouble8p,\n"
+    "                              local const double8 * restrict localconstdouble8restrictp,\n"
+    "                              local volatile double8*localvolatiledouble8p,\n"
+    "                              local volatile double8 *restrict localvolatiledouble8restrictp,\n"
+    "                              local const volatile double8* localconstvolatiledouble8p,\n"
+    "                              local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_d(double8 double8d,\n"
+    "                             const double8 constdouble8d,\n"
+    "                             private double8 privatedouble8d,\n"
+    "                             private const double8 privateconstdouble8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p(constant double16*constantdouble16p,\n"
+    "                              constant double16 *restrict constantdouble16restrictp,\n"
+    "                              global double16*globaldouble16p,\n"
+    "                              global double16 *restrict globaldouble16restrictp,\n"
+    "                              global const double16* globalconstdouble16p,\n"
+    "                              global const double16 * restrict globalconstdouble16restrictp,\n"
+    "                              global volatile double16*globalvolatiledouble16p,\n"
+    "                              global volatile double16 *restrict globalvolatiledouble16restrictp,\n"
+    "                              global const volatile double16* globalconstvolatiledouble16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n"
+    "                               local double16*localdouble16p,\n"
+    "                               local double16 *restrict localdouble16restrictp,\n"
+    "                               local const double16* localconstdouble16p,\n"
+    "                               local const double16 * restrict localconstdouble16restrictp,\n"
+    "                               local volatile double16*localvolatiledouble16p,\n"
+    "                               local volatile double16 *restrict localvolatiledouble16restrictp,\n"
+    "                               local const volatile double16* localconstvolatiledouble16p,\n"
+    "                               local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_d(double16 double16d,\n"
+    "                              const double16 constdouble16d,\n"
+    "                              private double16 privatedouble16d,\n"
+    "                              private const double16 privateconstdouble16d)\n"
+    "{}\n",
+    "\n"
+};
+
+// Support for optional 3D image data type
+const char * image_3D_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
+    "kernel void image_d(read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * image_3D_arg_info[][67] = {
+    {
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        NULL
+    },
+};
+
+const char * double_arg_info[][77] = {
+    {
+        "double_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep",
+        NULL
+    },
+    {
+        "double_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp",
+        NULL
+    },
+    {
+        "double_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled",
+        NULL
+    },
+    {
+        "double_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p",
+        NULL
+    },
+    {
+        "double_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp",
+        NULL
+    },
+    {
+        "double_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d",
+        NULL
+    },
+    {
+        "double_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p",
+        NULL
+    },
+    {
+        "double_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp",
+        NULL
+    },
+    {
+        "double_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d",
+        NULL
+    },
+    {
+        "double_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p",
+        NULL
+    },
+    {
+        "double_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp",
+        NULL
+    },
+    {
+        "double_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d",
+        NULL
+    },
+    {
+        "double_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p",
+        NULL
+    },
+    {
+        "double_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp",
+        NULL
+    },
+    {
+        "double_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d",
+        NULL
+    },
+    {
+        "double_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p",
+        NULL
+    },
+    {
+        "double_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp",
+        NULL
+    },
+    {
+        "double_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d",
+        NULL
+    },
+};
+
+
+// Support for optional half data type
+const char * half_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
+    "\n"
+    "kernel void half_scalar_p(constant half*constanthalfp,\n"
+    "                          constant half *restrict constanthalfrestrictp,\n"
+    "                          global half*globalhalfp,\n"
+    "                          global half *restrict globalhalfrestrictp,\n"
+    "                          global const half* globalconsthalfp,\n"
+    "                          global const half * restrict globalconsthalfrestrictp,\n"
+    "                          global volatile half*globalvolatilehalfp,\n"
+    "                          global volatile half *restrict globalvolatilehalfrestrictp,\n"
+    "                          global const volatile half* globalconstvolatilehalfp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n"
+    "                           local half*localhalfp,\n"
+    "                           local half *restrict localhalfrestrictp,\n"
+    "                           local const half* localconsthalfp,\n"
+    "                           local const half * restrict localconsthalfrestrictp,\n"
+    "                           local volatile half*localvolatilehalfp,\n"
+    "                           local volatile half *restrict localvolatilehalfrestrictp,\n"
+    "                           local const volatile half* localconstvolatilehalfp,\n"
+    "                           local const volatile half * restrict localconstvolatilehalfrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p(constant half2*constanthalf2p,\n"
+    "                           constant half2 *restrict constanthalf2restrictp,\n"
+    "                           global half2*globalhalf2p,\n"
+    "                           global half2 *restrict globalhalf2restrictp,\n"
+    "                           global const half2* globalconsthalf2p,\n"
+    "                           global const half2 * restrict globalconsthalf2restrictp,\n"
+    "                           global volatile half2*globalvolatilehalf2p,\n"
+    "                           global volatile half2 *restrict globalvolatilehalf2restrictp,\n"
+    "                           global const volatile half2* globalconstvolatilehalf2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n"
+    "                            local half2*localhalf2p,\n"
+    "                            local half2 *restrict localhalf2restrictp,\n"
+    "                            local const half2* localconsthalf2p,\n"
+    "                            local const half2 * restrict localconsthalf2restrictp,\n"
+    "                            local volatile half2*localvolatilehalf2p,\n"
+    "                            local volatile half2 *restrict localvolatilehalf2restrictp,\n"
+    "                            local const volatile half2* localconstvolatilehalf2p,\n"
+    "                            local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p(constant half3*constanthalf3p,\n"
+    "                           constant half3 *restrict constanthalf3restrictp,\n"
+    "                           global half3*globalhalf3p,\n"
+    "                           global half3 *restrict globalhalf3restrictp,\n"
+    "                           global const half3* globalconsthalf3p,\n"
+    "                           global const half3 * restrict globalconsthalf3restrictp,\n"
+    "                           global volatile half3*globalvolatilehalf3p,\n"
+    "                           global volatile half3 *restrict globalvolatilehalf3restrictp,\n"
+    "                           global const volatile half3* globalconstvolatilehalf3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n"
+    "                            local half3*localhalf3p,\n"
+    "                            local half3 *restrict localhalf3restrictp,\n"
+    "                            local const half3* localconsthalf3p,\n"
+    "                            local const half3 * restrict localconsthalf3restrictp,\n"
+    "                            local volatile half3*localvolatilehalf3p,\n"
+    "                            local volatile half3 *restrict localvolatilehalf3restrictp,\n"
+    "                            local const volatile half3* localconstvolatilehalf3p,\n"
+    "                            local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p(constant half4*constanthalf4p,\n"
+    "                           constant half4 *restrict constanthalf4restrictp,\n"
+    "                           global half4*globalhalf4p,\n"
+    "                           global half4 *restrict globalhalf4restrictp,\n"
+    "                           global const half4* globalconsthalf4p,\n"
+    "                           global const half4 * restrict globalconsthalf4restrictp,\n"
+    "                           global volatile half4*globalvolatilehalf4p,\n"
+    "                           global volatile half4 *restrict globalvolatilehalf4restrictp,\n"
+    "                           global const volatile half4* globalconstvolatilehalf4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n"
+    "                            local half4*localhalf4p,\n"
+    "                            local half4 *restrict localhalf4restrictp,\n"
+    "                            local const half4* localconsthalf4p,\n"
+    "                            local const half4 * restrict localconsthalf4restrictp,\n"
+    "                            local volatile half4*localvolatilehalf4p,\n"
+    "                            local volatile half4 *restrict localvolatilehalf4restrictp,\n"
+    "                            local const volatile half4* localconstvolatilehalf4p,\n"
+    "                            local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p(constant half8*constanthalf8p,\n"
+    "                           constant half8 *restrict constanthalf8restrictp,\n"
+    "                           global half8*globalhalf8p,\n"
+    "                           global half8 *restrict globalhalf8restrictp,\n"
+    "                           global const half8* globalconsthalf8p,\n"
+    "                           global const half8 * restrict globalconsthalf8restrictp,\n"
+    "                           global volatile half8*globalvolatilehalf8p,\n"
+    "                           global volatile half8 *restrict globalvolatilehalf8restrictp,\n"
+    "                           global const volatile half8* globalconstvolatilehalf8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n"
+    "                            local half8*localhalf8p,\n"
+    "                            local half8 *restrict localhalf8restrictp,\n"
+    "                            local const half8* localconsthalf8p,\n"
+    "                            local const half8 * restrict localconsthalf8restrictp,\n"
+    "                            local volatile half8*localvolatilehalf8p,\n"
+    "                            local volatile half8 *restrict localvolatilehalf8restrictp,\n"
+    "                            local const volatile half8* localconstvolatilehalf8p,\n"
+    "                            local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p(constant half16*constanthalf16p,\n"
+    "                            constant half16 *restrict constanthalf16restrictp,\n"
+    "                            global half16*globalhalf16p,\n"
+    "                            global half16 *restrict globalhalf16restrictp,\n"
+    "                            global const half16* globalconsthalf16p,\n"
+    "                            global const half16 * restrict globalconsthalf16restrictp,\n"
+    "                            global volatile half16*globalvolatilehalf16p,\n"
+    "                            global volatile half16 *restrict globalvolatilehalf16restrictp,\n"
+    "                            global const volatile half16* globalconstvolatilehalf16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n"
+    "                             local half16*localhalf16p,\n"
+    "                             local half16 *restrict localhalf16restrictp,\n"
+    "                             local const half16* localconsthalf16p,\n"
+    "                             local const half16 * restrict localconsthalf16restrictp,\n"
+    "                             local volatile half16*localvolatilehalf16p,\n"
+    "                             local volatile half16 *restrict localvolatilehalf16restrictp,\n"
+    "                             local const volatile half16* localconstvolatilehalf16p,\n"
+    "                             local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * half_arg_info[][77] = {
+    {
+        "half_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp",
+        NULL
+    },
+    {
+        "half_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp",
+        NULL
+    },
+    {
+        "half_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p",
+        NULL
+    },
+    {
+        "half_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp",
+        NULL
+    },
+    {
+        "half_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p",
+        NULL
+    },
+    {
+        "half_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp",
+        NULL
+    },
+    {
+        "half_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p",
+        NULL
+    },
+    {
+        "half_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp",
+        NULL
+    },
+    {
+        "half_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p",
+        NULL
+    },
+    {
+        "half_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp",
+        NULL
+    },
+    {
+        "half_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p",
+        NULL
+    },
+    {
+        "half_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp",
+        NULL
+    },
+};
+
+const char * long_kernel_args[] = {
+        "kernel void constant_scalar_p2(constant long* constantlongp,\n"
+        "                              constant ulong * constantulongp)\n"
+      "{}\n",
+        "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_scalar_restrict_p2(constant long*restrict constantlongrestrictp,\n"
+        "                                       constant ulong *restrict constantulongrestrictp)\n"
+        "{}\n",
+        "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_scalar_p(global long* globallongp,\n"
+        "                            global ulong * globalulongp,\n"
+        "                            global unsigned long*globalunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_scalar_restrict_p(global long*restrict globallongrestrictp,\n"
+        "                                     global ulong *restrict globalulongrestrictp,\n"
+        "                                     global unsigned long* restrict globalunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_scalar_p(global const long* globalconstlongp,\n"
+        "                                  global const ulong * globalconstulongp,\n"
+        "                                  global const unsigned long*globalconstunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_scalar_restrict_p(global const long*restrict globalconstlongrestrictp,\n"
+        "                                           global const ulong *restrict globalconstulongrestrictp,\n"
+        "                                           global const unsigned long* restrict globalconstunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_scalar_p(global volatile long* globalvolatilelongp,\n"
+        "                                     global volatile ulong * globalvolatileulongp,\n"
+        "                                     global volatile unsigned long*globalvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_scalar_restrict_p(global volatile long*restrict globalvolatilelongrestrictp,\n"
+        "                                              global volatile ulong *restrict globalvolatileulongrestrictp,\n"
+        "                                              global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_scalar_p(global const volatile long* globalconstvolatilelongp,\n"
+        "                                           global const volatile ulong * globalconstvolatileulongp,\n"
+        "                                           global const volatile unsigned long*globalconstvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_scalar_restrict_p(global const volatile long*restrict globalconstvolatilelongrestrictp,\n"
+        "                                                    global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n"
+        "                                                    global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_scalar_p(local long* locallongp,\n"
+        "                           local ulong * localulongp,\n"
+        "                           local unsigned long*localunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_scalar_restrict_p(local long*restrict locallongrestrictp,\n"
+        "                                    local ulong *restrict localulongrestrictp,\n"
+        "                                    local unsigned long* restrict localunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_scalar_p(local const long* localconstlongp,\n"
+        "                                 local const ulong * localconstulongp,\n"
+        "                                 local const unsigned long*localconstunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_scalar_restrict_p(local const long*restrict localconstlongrestrictp,\n"
+        "                                          local const ulong *restrict localconstulongrestrictp,\n"
+        "                                          local const unsigned long* restrict localconstunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_scalar_p(local volatile long* localvolatilelongp,\n"
+        "                                    local volatile ulong * localvolatileulongp,\n"
+        "                                    local volatile unsigned long*localvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_scalar_restrict_p(local volatile long*restrict localvolatilelongrestrictp,\n"
+        "                                             local volatile ulong *restrict localvolatileulongrestrictp,\n"
+        "                                             local volatile unsigned long* restrict localvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_scalar_p(local const volatile long* localconstvolatilelongp,\n"
+        "                                          local const volatile ulong * localconstvolatileulongp,\n"
+        "                                          local const volatile unsigned long*localconstvolatileunsignedlongp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_scalar_restrict_p(local const volatile long*restrict localconstvolatilelongrestrictp,\n"
+        "                                                   local const volatile ulong *restrict localconstvolatileulongrestrictp,\n"
+        "                                                   local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void scalar_d(long longd,\n"
+        "                     ulong ulongd,\n"
+        "                     unsigned long unsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_scalar_d(const long constlongd,\n"
+        "                           const ulong constulongd,\n"
+        "                           const unsigned long constunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_scalar_d(private long privatelongd,\n"
+        "                             private ulong privateulongd,\n"
+        "                             private unsigned long privateunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_scalar_d(private const long privateconstlongd,\n"
+        "                                   private const ulong privateconstulongd,\n"
+        "                                   private const unsigned long privateconstunsignedlongd)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector2_p1(constant long2* constantlong2p,\n"
+        "                               constant ulong2 * constantulong2p)\n"
+      "{}\n",
+        "\n"
+        "kernel void constant_vector2_restrict_p1(constant long2 * restrict constantlong2restrictp,\n"
+        "                                        constant ulong2*restrict constantulong2restrictp)\n"
+      "{}\n",
+        "\n"
+        "kernel void global_vector2_p(global long2* globallong2p,\n"
+        "                             global ulong2 * globalulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector2_restrict_p(global long2 * restrict globallong2restrictp,\n"
+        "                                      global ulong2*restrict globalulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector2_p(global const long2*globalconstlong2p,\n"
+        "                                   global const ulong2 *globalconstulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector2_restrict_p(global const long2 *restrict globalconstlong2restrictp,\n"
+        "                                            global const ulong2* restrict globalconstulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector2_p(global volatile long2* globalvolatilelong2p,\n"
+        "                                      global volatile ulong2 * globalvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector2_restrict_p(global volatile long2 * restrict globalvolatilelong2restrictp,\n"
+        "                                               global volatile ulong2*restrict globalvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector2_p(global const volatile long2*globalconstvolatilelong2p,\n"
+        "                                            global const volatile ulong2 *globalconstvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector2_restrict_p(global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n"
+        "                                                     global const volatile ulong2* restrict globalconstvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector2_p(local long2* locallong2p,\n"
+        "                            local ulong2 * localulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector2_restrict_p(local long2 * restrict locallong2restrictp,\n"
+        "                                     local ulong2*restrict localulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector2_p(local const long2*localconstlong2p,\n"
+        "                                  local const ulong2 *localconstulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector2_restrict_p(local const long2 *restrict localconstlong2restrictp,\n"
+        "                                           local const ulong2* restrict localconstulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector2_p(local volatile long2* localvolatilelong2p,\n"
+        "                                     local volatile ulong2 * localvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector2_restrict_p(local volatile long2 * restrict localvolatilelong2restrictp,\n"
+        "                                              local volatile ulong2*restrict localvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector2_p(local const volatile long2*localconstvolatilelong2p,\n"
+        "                                           local const volatile ulong2 *localconstvolatileulong2p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector2_restrict_p(local const volatile long2 *restrict localconstvolatilelong2restrictp,\n"
+        "                                                    local const volatile ulong2* restrict localconstvolatileulong2restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector2_d(long2 long2d,\n"
+        "                      ulong2 ulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector2_d(const long2 constlong2d,\n"
+        "                            const ulong2 constulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector2_d(private long2 privatelong2d,\n"
+        "                              private ulong2 privateulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector2_d(private const long2 privateconstlong2d,\n"
+        "                                    private const ulong2 privateconstulong2d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector3_p1(constant long3* constantlong3p,\n"
+        "                               constant ulong3 * constantulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector3_restrict_p1(constant long3 * restrict constantlong3restrictp,\n"
+        "                                        constant ulong3*restrict constantulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector3_p(global long3* globallong3p,\n"
+        "                             global ulong3 * globalulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector3_restrict_p(global long3 * restrict globallong3restrictp,\n"
+        "                                      global ulong3*restrict globalulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector3_p(global const long3*globalconstlong3p,\n"
+        "                                   global const ulong3 *globalconstulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector3_restrict_p(global const long3 *restrict globalconstlong3restrictp,\n"
+        "                                            global const ulong3* restrict globalconstulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector3_p(global volatile long3* globalvolatilelong3p,\n"
+        "                                      global volatile ulong3 * globalvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector3_restrict_p(global volatile long3 * restrict globalvolatilelong3restrictp,\n"
+        "                                               global volatile ulong3*restrict globalvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector3_p(global const volatile long3*globalconstvolatilelong3p,\n"
+        "                                            global const volatile ulong3 *globalconstvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector3_restrict_p(global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n"
+        "                                                     global const volatile ulong3* restrict globalconstvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector3_p(local long3* locallong3p,\n"
+        "                            local ulong3 * localulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector3_restrict_p(local long3 * restrict locallong3restrictp,\n"
+        "                                     local ulong3*restrict localulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector3_p(local const long3*localconstlong3p,\n"
+        "                                  local const ulong3 *localconstulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector3_restrict_p(local const long3 *restrict localconstlong3restrictp,\n"
+        "                                           local const ulong3* restrict localconstulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector3_p(local volatile long3* localvolatilelong3p,\n"
+        "                                     local volatile ulong3 * localvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector3_restrict_p(local volatile long3 * restrict localvolatilelong3restrictp,\n"
+        "                                              local volatile ulong3*restrict localvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector3_p(local const volatile long3*localconstvolatilelong3p,\n"
+        "                                           local const volatile ulong3 *localconstvolatileulong3p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector3_restrict_p(local const volatile long3 *restrict localconstvolatilelong3restrictp,\n"
+        "                                                    local const volatile ulong3* restrict localconstvolatileulong3restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector3_d(long3 long3d,\n"
+        "                      ulong3 ulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector3_d(const long3 constlong3d,\n"
+        "                            const ulong3 constulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector3_d(private long3 privatelong3d,\n"
+        "                              private ulong3 privateulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector3_d(private const long3 privateconstlong3d,\n"
+        "                                    private const ulong3 privateconstulong3d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector4_p1(constant long4* constantlong4p,\n"
+        "                               constant ulong4 * constantulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector4_restrict_p1(constant long4 * restrict constantlong4restrictp,\n"
+        "                                        constant ulong4*restrict constantulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector4_p(global long4* globallong4p,\n"
+        "                             global ulong4 * globalulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector4_restrict_p(global long4 * restrict globallong4restrictp,\n"
+        "                                      global ulong4*restrict globalulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector4_p(global const long4*globalconstlong4p,\n"
+        "                                   global const ulong4 *globalconstulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector4_restrict_p(global const long4 *restrict globalconstlong4restrictp,\n"
+        "                                            global const ulong4* restrict globalconstulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector4_p(global volatile long4* globalvolatilelong4p,\n"
+        "                                      global volatile ulong4 * globalvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector4_restrict_p(global volatile long4 * restrict globalvolatilelong4restrictp,\n"
+        "                                               global volatile ulong4*restrict globalvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector4_p(global const volatile long4*globalconstvolatilelong4p,\n"
+        "                                            global const volatile ulong4 *globalconstvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector4_restrict_p(global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n"
+        "                                                     global const volatile ulong4* restrict globalconstvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector4_p(local long4* locallong4p,\n"
+        "                            local ulong4 * localulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector4_restrict_p(local long4 * restrict locallong4restrictp,\n"
+        "                                     local ulong4*restrict localulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector4_p(local const long4*localconstlong4p,\n"
+        "                                  local const ulong4 *localconstulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector4_restrict_p(local const long4 *restrict localconstlong4restrictp,\n"
+        "                                           local const ulong4* restrict localconstulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector4_p(local volatile long4* localvolatilelong4p,\n"
+        "                                     local volatile ulong4 * localvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector4_restrict_p(local volatile long4 * restrict localvolatilelong4restrictp,\n"
+        "                                              local volatile ulong4*restrict localvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector4_p(local const volatile long4*localconstvolatilelong4p,\n"
+        "                                           local const volatile ulong4 *localconstvolatileulong4p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector4_restrict_p(local const volatile long4 *restrict localconstvolatilelong4restrictp,\n"
+        "                                                    local const volatile ulong4* restrict localconstvolatileulong4restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector4_d(long4 long4d,\n"
+        "                      ulong4 ulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector4_d(const long4 constlong4d,\n"
+        "                            const ulong4 constulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector4_d(private long4 privatelong4d,\n"
+        "                              private ulong4 privateulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector4_d(private const long4 privateconstlong4d,\n"
+        "                                    private const ulong4 privateconstulong4d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector8_p1(constant long8* constantlong8p,\n"
+        "                               constant ulong8 * constantulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector8_restrict_p1(constant long8 * restrict constantlong8restrictp,\n"
+        "                                        constant ulong8*restrict constantulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector8_p(global long8* globallong8p,\n"
+        "                             global ulong8 * globalulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector8_restrict_p(global long8 * restrict globallong8restrictp,\n"
+        "                                      global ulong8*restrict globalulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector8_p(global const long8*globalconstlong8p,\n"
+        "                                   global const ulong8 *globalconstulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector8_restrict_p(global const long8 *restrict globalconstlong8restrictp,\n"
+        "                                            global const ulong8* restrict globalconstulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector8_p(global volatile long8* globalvolatilelong8p,\n"
+        "                                      global volatile ulong8 * globalvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector8_restrict_p(global volatile long8 * restrict globalvolatilelong8restrictp,\n"
+        "                                               global volatile ulong8*restrict globalvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector8_p(global const volatile long8*globalconstvolatilelong8p,\n"
+        "                                            global const volatile ulong8 *globalconstvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector8_restrict_p(global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n"
+        "                                                     global const volatile ulong8* restrict globalconstvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector8_p(local long8* locallong8p,\n"
+        "                            local ulong8 * localulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector8_restrict_p(local long8 * restrict locallong8restrictp,\n"
+        "                                     local ulong8*restrict localulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector8_p(local const long8*localconstlong8p,\n"
+        "                                  local const ulong8 *localconstulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector8_restrict_p(local const long8 *restrict localconstlong8restrictp,\n"
+        "                                           local const ulong8* restrict localconstulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector8_p(local volatile long8* localvolatilelong8p,\n"
+        "                                     local volatile ulong8 * localvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector8_restrict_p(local volatile long8 * restrict localvolatilelong8restrictp,\n"
+        "                                              local volatile ulong8*restrict localvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector8_p(local const volatile long8*localconstvolatilelong8p,\n"
+        "                                           local const volatile ulong8 *localconstvolatileulong8p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector8_restrict_p(local const volatile long8 *restrict localconstvolatilelong8restrictp,\n"
+        "                                                    local const volatile ulong8* restrict localconstvolatileulong8restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector8_d(long8 long8d,\n"
+        "                      ulong8 ulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector8_d(const long8 constlong8d,\n"
+        "                            const ulong8 constulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector8_d(private long8 privatelong8d,\n"
+        "                              private ulong8 privateulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector8_d(private const long8 privateconstlong8d,\n"
+        "                                    private const ulong8 privateconstulong8d)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector16_p1(constant long16* constantlong16p,\n"
+        "                                constant ulong16 * constantulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void constant_vector16_restrict_p1(constant long16 * restrict constantlong16restrictp,\n"
+        "                                         constant ulong16*restrict constantulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector16_p(global long16* globallong16p,\n"
+        "                              global ulong16 * globalulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_vector16_restrict_p(global long16 * restrict globallong16restrictp,\n"
+        "                                       global ulong16*restrict globalulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector16_p(global const long16*globalconstlong16p,\n"
+        "                                    global const ulong16 *globalconstulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_vector16_restrict_p(global const long16 *restrict globalconstlong16restrictp,\n"
+        "                                             global const ulong16* restrict globalconstulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector16_p(global volatile long16* globalvolatilelong16p,\n"
+        "                                       global volatile ulong16 * globalvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_volatile_vector16_restrict_p(global volatile long16 * restrict globalvolatilelong16restrictp,\n"
+        "                                                global volatile ulong16*restrict globalvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector16_p(global const volatile long16*globalconstvolatilelong16p,\n"
+        "                                             global const volatile ulong16 *globalconstvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void global_const_volatile_vector16_restrict_p(global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n"
+        "                                                      global const volatile ulong16* restrict globalconstvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector16_p(local long16* locallong16p,\n"
+        "                             local ulong16 * localulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_vector16_restrict_p(local long16 * restrict locallong16restrictp,\n"
+        "                                      local ulong16*restrict localulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector16_p(local const long16*localconstlong16p,\n"
+        "                                   local const ulong16 *localconstulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_vector16_restrict_p(local const long16 *restrict localconstlong16restrictp,\n"
+        "                                            local const ulong16* restrict localconstulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector16_p(local volatile long16* localvolatilelong16p,\n"
+        "                                      local volatile ulong16 * localvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_volatile_vector16_restrict_p(local volatile long16 * restrict localvolatilelong16restrictp,\n"
+        "                                               local volatile ulong16*restrict localvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector16_p(local const volatile long16*localconstvolatilelong16p,\n"
+        "                                            local const volatile ulong16 *localconstvolatileulong16p)\n"
+        "{}\n",
+        "\n"
+        "kernel void local_const_volatile_vector16_restrict_p(local const volatile long16 *restrict localconstvolatilelong16restrictp,\n"
+        "                                                     local const volatile ulong16* restrict localconstvolatileulong16restrictp)\n"
+        "{}\n",
+        "\n"
+        "kernel void vector16_d(long16 long16d,\n"
+        "                       ulong16 ulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void const_vector16_d(const long16 constlong16d,\n"
+        "                             const ulong16 constulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_vector16_d(private long16 privatelong16d,\n"
+        "                               private ulong16 privateulong16d)\n"
+        "{}\n",
+        "\n"
+        "kernel void private_const_vector16_d(private const long16 privateconstlong16d,\n"
+        "                                     private const ulong16 privateconstulong16d)\n"
+        "{}\n",
+        "\n"
+};
+
+const char * long_arg_info[][72] = {
+  // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+  {
+        "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp",
+        NULL
+  },
+  {
+        "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp",
+        NULL
+    },
+  {
+        "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp",
+        NULL
+  },
+  {
+        "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd",
+        NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d",
+        NULL
+    },
+};
+
+template<typename arg_info_t>
+int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) {
+
+  cl_program program;
+    cl_kernel kernel;
+    const size_t max_name_len = 512;
+    cl_char name[ max_name_len ];
+    cl_uint arg_count, numArgs;
+    size_t i, j, size;
+    int error;
+
+  program = clCreateProgramWithSource( context, lines_count, kernel_args, NULL, &error );
+    if ( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create required arguments kernel program" );
+        return -1;
+    }
+
+    // Compile the program
+    log_info( "Building kernels...\n" );
+    clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL );
+
+    // check for build errors and exit if things didn't work
+    size_t size_ret;
+    cl_build_status build_status;
+    error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret);
+    test_error( error, "Unable to query build status" );
+    if (build_status == CL_BUILD_ERROR) {
+        printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to get build log size" );
+        char *build_log = (char *)malloc(size_ret);
+        error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to get build log" );
+        printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        printf("CL_BUILD_ERROR. exiting\n");
+        free(build_log);
+        return -1;
+    }
+
+    // Lookup the number of kernels in the program.
+    log_info( "Testing kernels...\n" );
+    size_t total_kernels = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL );
+    test_error( error, "Unable to get program info num kernels" );
+
+    if ( total_kernels != total_kernels_in_program )
+    {
+        print_error( error, "Program did not build all kernels" );
+        return -1;
+    }
+
+    // Lookup the kernel names.
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len );
+    test_error( error, "Unable to get length of kernel names list." );
+
+    size_t expected_kernel_names_len = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] );
+    }
+    if ( kernel_names_len != expected_kernel_names_len )
+    {
+        log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len );
+        return -1;
+    }
+
+    const size_t len = ( kernel_names_len + 1 ) * sizeof( char );
+    char* kernel_names = (char*) malloc( len );
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len );
+    test_error( error, "Unable to get kernel names list." );
+
+    // Check to see if the kernel name array is null terminated.
+    if ( kernel_names[ kernel_names_len - 1 ] != '\0' )
+    {
+        free( kernel_names );
+        print_error( error, "Kernel name list was not null terminated" );
+        return -1;
+    }
+
+    // Check to see if the correct kernel name string was returned.
+    // Does the string contain each expected kernel name?
+    for ( i = 0; i < total_kernels; ++i )
+        if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) )
+            break;
+    if ( i != total_kernels )
+    {
+        log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] );
+        free( kernel_names );
+        return -1;
+    }
+
+    // Are the kernel names delimited by ';'?
+    if ( !strtok( kernel_names, ";" ) )
+    {
+        error = -1;
+    }
+    else
+    {
+        for ( i = 1; i < total_kernels; ++i )
+        {
+            if ( !strtok( NULL, ";" ) )
+            {
+                error = -1;
+            }
+        }
+    }
+    if ( error )
+    {
+        log_error( "Kernel names string was not properly delimited by ';'\n" );
+        free( kernel_names );
+        return -1;
+    }
+    free( kernel_names );
+
+    // Create kernel objects and query them.
+    int rc = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        int kernel_rc = 0;
+        const char* kernel_name = arg_info[ i ][ 0 ];
+        kernel = clCreateKernel( program, kernel_name, &error );
+        if( kernel == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Could not get kernel: %s\n", kernel_name );
+            kernel_rc = -1;
+        }
+
+        if(kernel_rc == 0)
+        {
+            // Determine the expected number of arguments.
+            arg_count = 0;
+            while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL)
+                ++arg_count;
+
+            // Try to get the number of arguments.
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size );
+            test_error( error, "Unable to get kernel arg count param size" );
+            if( size != sizeof( numArgs ) )
+            {
+                log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+
+
+        if(kernel_rc == 0)
+        {
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+            test_error( error, "Unable to get kernel arg count" );
+            if( numArgs != arg_count )
+            {
+                log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+
+        if(kernel_rc == 0)
+        {
+            for ( j = 0; j < numArgs; ++j )
+            {
+
+                int arg_rc = 0;
+                cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ];
+                cl_kernel_arg_access_qualifier expected_access_qualifier =  (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ];
+                cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ];
+                const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ];
+                const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ];
+
+                // Try to get the address qualifier of each argument.
+                cl_kernel_arg_address_qualifier address_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size );
+                test_error( error, "Unable to get argument address qualifier" );
+                error = (address_qualifier != expected_address_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the access qualifier of each argument.
+                cl_kernel_arg_access_qualifier access_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size );
+                test_error( error, "Unable to get argument access qualifier" );
+                error = (access_qualifier != expected_access_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the type qualifier of each argument.
+                cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size );
+                test_error( error, "Unable to get argument type qualifier" );
+                error = (arg_type_qualifier != expected_type_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the type of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument type name" );
+                error = strcmp( (const char*) name, expected_type_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name );
+                    arg_rc = -1;
+                }
+
+                // Try to get the name of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument name" );
+                error = strcmp( (const char*) name, expected_arg_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name );
+                    arg_rc = -1;
+                }
+
+                if(arg_rc != 0) {
+                    kernel_rc = -1;
+                }
+            }
+        }
+
+        //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" );
+        if(kernel_rc != 0) {
+            rc = -1;
+        }
+    }
+  return rc;
+}
+
+
+int    test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    size_t size;
+    int error;
+
+    cl_bool supports_double = 0; // assume not
+    cl_bool supports_half = 0; // assume not
+    cl_bool supports_images = 0; // assume not
+    cl_bool supports_long = 0; // assume not
+    cl_bool supports_3D_images = 0; // assume not
+
+    // Check if this device supports images
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+  if (supports_images) {
+    log_info(" o Device supports images\n");
+    log_info(" o Expecting SUCCESS when testing image kernel arguments.\n");
+  }
+  else {
+    log_info(" o Device lacks image support\n");
+    log_info(" o Not testing image kernel arguments.\n");
+  }
+
+    // Get the extensions string for the device
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &size);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS size failed");
+
+    char *extensions = (char*)malloc(sizeof(char)*(size + 1));
+    if (extensions == 0) {
+        log_error("Failed to allocate memory for extensions string.\n");
+        return -1;
+    }
+    memset( extensions, CHAR_MIN, sizeof(char)*(size+1) );
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, sizeof(char)*size, extensions, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+
+    // Check to make sure the extension string is NUL terminated.
+    if( extensions[size] != CHAR_MIN )
+    {
+        test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS wrote past the end of the array!" );
+        return -1;
+    }
+    extensions[size] = '\0';    // set last char to NUL to avoid problems with string functions later
+
+    // test for termination with '\0'
+    size_t stringSize = strlen( extensions );
+    if( stringSize == size )
+    {
+        test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS is not NUL terminated!" );
+        return -1;
+    }
+
+    if (strstr(extensions, "cl_khr_fp64")) {
+        log_info(" o Device claims extension 'cl_khr_fp64'\n");
+        log_info(" o Expecting SUCCESS when testing double kernel arguments.\n");
+        supports_double = 1;
+    } else {
+        cl_device_fp_config double_fp_config;
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
+        if (double_fp_config != 0)
+            supports_double = 1;
+        else {
+            log_info(" o Device lacks extension 'cl_khr_fp64'\n");
+            log_info(" o Not testing double kernel arguments.\n");
+            supports_double = 0;
+        }
+    }
+
+    if (strstr(extensions, "cl_khr_fp16")) {
+        log_info(" o Device claims extension 'cl_khr_fp16'\n");
+        log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n");
+        supports_half = 1;
+    } else {
+        log_info(" o Device lacks extension 'cl_khr_fp16'\n");
+        log_info(" o Not testing halfn* kernel arguments.\n");
+        supports_half = 0;
+    }
+
+    if (strstr(extensions, "cl_khr_int64"))
+    {
+        log_info(" o Device claims extension 'cl_khr_int64'\n");
+        log_info(" o Expecting SUCCESS when testing long kernel arguments.\n");
+        supports_long = 1;
+    } else
+    {
+        log_info(" o Device lacks extension 'cl_khr_int64'\n");
+        log_info(" o Not testing long kernel arguments.\n");
+        supports_long = 0;
+    }
+
+    error = checkFor3DImageSupport(deviceID);
+    if (error != CL_IMAGE_FORMAT_NOT_SUPPORTED)
+    {
+        log_info(" o Device supports 3D images\n");
+        log_info(" o Expecting SUCCESS when testing 3D image kernel arguments.\n");
+        supports_3D_images = 1;
+    } else
+    {
+        log_info(" o Device lacks 3D image support\n");
+        log_info(" o Not testing 3D image kernel arguments.\n");
+        supports_3D_images = 0;
+    }
+
+    int test_failed = 0;
+
+    // Now create a test program using required arguments
+    log_info("Testing required kernel arguments...\n");
+    error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+
+    if ( supports_images )
+    {
+        log_info("Testing optional image arguments...\n");
+        error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_double )
+    {
+        log_info("Testing optional double arguments...\n");
+        error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_half )
+    {
+        log_info("Testing optional half arguments...\n");
+        error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_long )
+    {
+        log_info("Testing optional long arguments...\n");
+        error = test(deviceID, context, long_kernel_args, sizeof(long_kernel_args)/sizeof(long_kernel_args[0]), long_arg_info, sizeof(long_arg_info)/sizeof(long_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    if ( supports_3D_images )
+    {
+        log_info("Testing optional 3D image arguments...\n");
+        error = test(deviceID, context, image_3D_kernel_args, sizeof(image_3D_kernel_args)/sizeof(image_3D_kernel_args[0]), image_3D_arg_info, sizeof(image_3D_arg_info)/sizeof(image_3D_arg_info[0]));
+        test_failed = (error) ? -1 : test_failed;
+    }
+
+    return test_failed;
+}
diff --git a/test_conformance/api/test_kernel_arg_multi_setup.cpp b/test_conformance/api/test_kernel_arg_multi_setup.cpp
new file mode 100644
index 00000000..89c5eeb1
--- /dev/null
+++ b/test_conformance/api/test_kernel_arg_multi_setup.cpp
@@ -0,0 +1,277 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+
+// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
+
+const char *multi_arg_kernel_source_pattern =
+"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
+"{\n"
+"    int tid = get_global_id(0);\n"
+"    dst1[tid] = src1[tid];\n"
+"    dst2[tid] = src2[tid];\n"
+"    dst3[tid] = src3[tid];\n"
+"}\n";
+
+extern cl_uint gRandomSeed;
+
+#define MAX_ERROR_TOLERANCE 0.0005f
+
+int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
+                       ExplicitType vec1Type, int vec1Size,
+                       ExplicitType vec2Type, int vec2Size,
+                       ExplicitType vec3Type, int vec3Size, MTdata d)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error, i, j;
+    clMemWrapper streams[ 6 ];
+    size_t threads[1], localThreads[1];
+    char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
+    char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
+    const char *ptr;
+    void *initData[3], *resultData[3];
+
+
+    // Create the program source
+    sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
+    sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
+    sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
+
+    sprintf( programSrc, multi_arg_kernel_source_pattern,
+            vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
+            vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
+    ptr = programSrc;
+
+    // Create our testing kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Get thread dimensions
+    threads[0] = 1024;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size for kernel" );
+
+    // Create input streams
+    initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
+    streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
+    streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
+    streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    // Set the arguments
+    error = 0;
+    for( i = 0; i < 6; i++ )
+        error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
+    test_error( error, "Unable to set arguments for kernel" );
+
+    // Execute!
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute kernel" );
+
+    // Read results
+    resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
+    resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
+    resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
+    error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
+    error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
+    error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
+    test_error( error, "Unable to read result stream" );
+
+    // Verify
+    char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
+    size_t span = get_explicit_type_size( vec1Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec1Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec1Size;
+        ptr2 += span * vec1Size;
+    }
+
+    ptr1 = (char *)initData[ 1 ];
+    ptr2 = (char *)resultData[ 1 ];
+    span = get_explicit_type_size( vec2Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec2Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec2Size;
+        ptr2 += span * vec2Size;
+    }
+
+    ptr1 = (char *)initData[ 2 ];
+    ptr2 = (char *)resultData[ 2 ];
+    span = get_explicit_type_size( vec3Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec3Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec3Size;
+        ptr2 += span * vec3Size;
+    }
+
+    // If we got here, everything verified successfully
+    free( initData[ 0 ] );
+    free( initData[ 1 ] );
+    free( initData[ 2 ] );
+    free( resultData[ 0 ] );
+    free( resultData[ 1 ] );
+    free( resultData[ 2 ] );
+
+    return 0;
+}
+
+int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    // Loop through every combination of input and output types
+    ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
+    int type1, type2, type3;
+    int size1, size2, size3;
+    RandomSeed seed( gRandomSeed );
+
+    log_info( "\n" ); // for formatting
+
+    for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
+    {
+        for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
+        {
+            for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
+            {
+                log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
+
+                // Loop through every combination of vector size
+                for( size1 = 2; size1 <= 8; size1 <<= 1 )
+                {
+                    for( size2 = 2; size2 <= 8; size2 <<= 1 )
+                    {
+                        for( size3 = 2; size3 <= 8; size3 <<= 1 )
+                        {
+                            log_info(".");
+                            fflush( stdout);
+                            if( test_multi_arg_set( device, context, queue,
+                                                   types[ type1 ], size1,
+                                                   types[ type2 ], size2,
+                                                   types[ type3 ], size3, seed ) )
+                                return -1;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    log_info( "\n" );
+    return 0;
+}
+
+int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    // Loop through a selection of combinations
+    ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
+    int type1, type2, type3;
+    int size1, size2, size3;
+    RandomSeed seed( gRandomSeed );
+
+    num_elements = 3*3*3*4;
+    log_info( "Testing %d random configurations\n", num_elements );
+
+    // Loop through every combination of vector size
+    for( size1 = 2; size1 <= 8; size1 <<= 1 )
+    {
+        for( size2 = 2; size2 <= 8; size2 <<= 1 )
+        {
+            for( size3 = 2; size3 <= 8; size3 <<= 1 )
+            {
+                // Loop through 4 type combinations for each size combination
+                int n;
+                for (n=0; n<4; n++) {
+                    type1 = (int)get_random_float(0,4, seed);
+                    type2 = (int)get_random_float(0,4, seed);
+                    type3 = (int)get_random_float(0,4, seed);
+
+
+                    log_info( "\ttesting %s%d, %s%d, %s%d...\n",
+                             get_explicit_type_name( types[ type1 ] ), size1,
+                             get_explicit_type_name( types[ type2 ] ), size2,
+                             get_explicit_type_name( types[ type3 ] ), size3 );
+
+                    if( test_multi_arg_set( device, context, queue,
+                                           types[ type1 ], size1,
+                                           types[ type2 ], size2,
+                                           types[ type3 ], size3, seed ) )
+                        return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+
+
+
diff --git a/test_conformance/api/test_kernels.c b/test_conformance/api/test_kernels.c
new file mode 100644
index 00000000..6daaabee
--- /dev/null
+++ b/test_conformance/api/test_kernels.c
@@ -0,0 +1,695 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+const char *sample_single_test_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_struct_test_kernel[] = {
+"typedef struct {\n"
+"__global int *A;\n"
+"__global int *B;\n"
+"} input_pair_t;\n"
+"\n"
+"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src->A[tid] + src->B[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_struct_array_test_kernel[] = {
+"typedef struct {\n"
+"int A;\n"
+"int B;\n"
+"} input_pair_t;\n"
+"\n"
+"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid].A + src[tid].B;\n"
+"\n"
+"}\n" };
+
+const char *sample_const_test_kernel[] = {
+"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid] + src2[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_const_global_test_kernel[] = {
+"__constant int addFactor = 1024;\n"
+"__kernel void sample_test(__global int *src1, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid] + addFactor;\n"
+"\n"
+"}\n" };
+
+const char *sample_two_kernel_program[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n",
+"__kernel void sample_test2(__global int *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)src[tid];\n"
+"\n"
+"}\n" };
+
+
+
+
+int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, testProgram;
+    cl_context testContext;
+    cl_kernel kernel;
+    cl_char name[ 512 ];
+    cl_uint numArgs, numInstances;
+    size_t paramSize;
+
+
+    /* Create reference */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
+    test_error( error, "Unable to get kernel function name param size" );
+    if( paramSize != strlen( "sample_test" ) + 1 )
+    {
+        log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
+    test_error( error, "Unable to get kernel function name" );
+    if( strcmp( (char *)name, "sample_test" ) != 0 )
+    {
+        log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
+        return -1;
+    }
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
+    test_error( error, "Unable to get kernel arg count param size" );
+    if( paramSize != sizeof( numArgs ) )
+    {
+        log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+    test_error( error, "Unable to get kernel arg count" );
+    if( numArgs != 2 )
+    {
+        log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
+        return -1;
+    }
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
+    test_error( error, "Unable to get kernel reference count param size" );
+    if( paramSize != sizeof( numInstances ) )
+    {
+        log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
+    test_error( error, "Unable to get kernel reference count" );
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
+    test_error( error, "Unable to get kernel program param size" );
+    if( paramSize != sizeof( testProgram ) )
+    {
+        log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
+    test_error( error, "Unable to get kernel program" );
+    if( testProgram != program )
+    {
+        log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
+    test_error( error, "Unable to get kernel context" );
+    if( testContext != context )
+    {
+        log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
+        return -1;
+    }
+
+    /* Release memory */
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+    return 0;
+}
+
+int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[100];
+    cl_int outputData[100];
+    RandomSeed seed( gRandomSeed );
+    int i;
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 100, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 100, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<100; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Set the arguments */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)100;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* Try again */
+    if( localThreads[0] > 1 )
+        localThreads[0] /= 2;
+    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
+        localThreads[0]--;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* And again */
+    if( localThreads[0] > 1 )
+        localThreads[0] /= 2;
+    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
+        localThreads[0]--;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* One more time */
+    localThreads[0] = (unsigned int)1;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[10];
+    cl_int outputData[10];
+    RandomSeed seed( gRandomSeed );
+    int i;
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<10; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Test setting the arguments by index manually */
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel kernel;
+    void            *args[2];
+    cl_mem            outStream;
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10], randomTestDataB[10];
+    MTdata  d;
+
+    struct img_pair_t
+    {
+        cl_mem streamA;
+        cl_mem streamB;
+    } image_pair;
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d);
+        randomTestDataB[i] = (cl_int)genrand_int32(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
+    test_error( error, "Creating test array failed" );
+    outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    args[0] = &image_pair;
+    args[1] = outStream;
+
+    error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
+        {
+            log_error( "ERROR: Data did not verify!\n" );
+            return -1;
+        }
+    }
+
+
+    clReleaseMemObject( image_pair.streamA );
+    clReleaseMemObject( image_pair.streamB );
+    clReleaseMemObject( outStream );
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[3];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10], randomTestDataB[10];
+    cl_ulong maxSize;
+    MTdata d;
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max constant buffer size" );
+    if( maxSize < sizeof( cl_int ) * 10 )
+    {
+        log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
+        return -1;
+    }
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* Make sure values are positive, just so we don't have to */
+        randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* deal with overflow on the verification */
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
+    test_error( error, "Creating test array failed" );
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
+        {
+            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    MTdata d;
+
+    typedef struct img_pair_type
+    {
+        int A;
+        int B;
+    } image_pair_t;
+
+    image_pair_t image_pair[ 10 ];
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        image_pair[i].A = (cl_int)genrand_int32(d);
+        image_pair[i].A = (cl_int)genrand_int32(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != image_pair[i].A + image_pair[i].B)
+        {
+            log_error( "ERROR: Data did not verify!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel  kernel[3];
+    unsigned int kernelCount;
+
+    error = create_single_kernel_helper(context, &program, NULL, 2, sample_two_kernel_program, NULL);
+    test_error(error, "Unable to build test program");
+
+    /* Try getting the kernel count */
+    error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
+    test_error( error, "Unable to get kernel count for built program" );
+    if( kernelCount != 2 )
+    {
+        log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
+        return -1;
+    }
+
+    /* Try actually getting the kernels */
+    error = clCreateKernelsInProgram( program, 2, kernel, NULL );
+    test_error( error, "Unable to get kernels for built program" );
+    clReleaseKernel( kernel[0] );
+    clReleaseKernel( kernel[1] );
+
+    clReleaseProgram( program );
+    return 0;
+}
+
+int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10];
+    MTdata d;
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff;    /* Make sure values are positive and small, just so we don't have to */
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + 1024)
+        {
+            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+
diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp
new file mode 100644
index 00000000..85069a54
--- /dev/null
+++ b/test_conformance/api/test_mem_object_info.cpp
@@ -0,0 +1,756 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+extern cl_uint gRandomSeed;
+
+
+#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
+error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );   \
+test_error( error, "Unable to get mem object " name );  \
+if( val != expected )   \
+{   \
+log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type " from %s:%d)\n",   \
+expected, (cast)val, __FILE__, __LINE__ );   \
+return -1;  \
+}   \
+if( size != sizeof( val ) ) \
+{   \
+log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d from %s:%d)\n", \
+(int)sizeof( val ), (int)size , __FILE__, __LINE__ );   \
+return -1;  \
+}
+
+static void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void * data )
+{
+    free( data );
+}
+
+static unsigned int
+get_image_dim(MTdata *d, unsigned int mod)
+{
+    unsigned int val = 0;
+
+    do
+    {
+        val = (unsigned int)genrand_int32(*d) % mod;
+    } while (val == 0);
+
+    return val;
+}
+
+
+int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    int error;
+    size_t size;
+    void * buffer = NULL;
+
+    clMemWrapper bufferObject;
+    clMemWrapper subBufferObject;
+
+    cl_mem_flags bufferFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_ONLY,
+        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_WRITE_ONLY,
+        CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+    };
+
+    cl_mem_flags subBufferFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_ONLY,
+        CL_MEM_WRITE_ONLY,
+        0,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | 0,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | 0,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | 0,
+    };
+
+
+    // Get the address alignment, so we can make sure the sub-buffer test later works properly.
+    cl_uint addressAlignBits;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(addressAlignBits), &addressAlignBits, NULL );
+
+    size_t addressAlign = addressAlignBits/8;
+    if ( addressAlign < 128 )
+    {
+        addressAlign = 128;
+    }
+
+    for ( unsigned int i = 0; i < sizeof(bufferFlags) / sizeof(cl_mem_flags); ++i )
+    {
+        //printf("@@@ bufferFlags[%u]=0x%x\n", i, bufferFlags[ i ]);
+        if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            void * ptr;
+            TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_HOST_PTR, ptr, buffer, "host pointer", "%p", void * )
+        }
+        else if ( (bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR) && (bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR) )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+        }
+        else if ( bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
+            test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR) to test with" );
+        }
+        else if ( bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+        }
+        else
+        {
+            // Create a buffer object to test against.
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
+            test_error( error, "Unable to create buffer to test with" );
+        }
+
+        // Perform buffer object queries.
+        cl_mem_object_type type;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
+
+        cl_mem_flags flags;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_FLAGS, flags, (unsigned int)bufferFlags[ i ], "flags", "%d", unsigned int )
+
+        size_t sz;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign * 4 ), "size", "%ld", size_t )
+
+        cl_uint mapCount;
+        error = clGetMemObjectInfo( bufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+        test_error( error, "Unable to get mem object map count" );
+        if( size != sizeof( mapCount ) )
+        {
+            log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                      (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+            return -1;
+        }
+
+        cl_uint refCount;
+        error = clGetMemObjectInfo( bufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+        test_error( error, "Unable to get mem object reference count" );
+        if( size != sizeof( refCount ) )
+        {
+            log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                      (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+            return -1;
+        }
+
+        cl_context otherCtx;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+        cl_mem origObj;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (void *)NULL, "associated mem object", "%p", void * )
+
+        size_t offset;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
+
+        cl_buffer_region region;
+        region.origin = addressAlign;
+        region.size = addressAlign;
+
+        // Loop over possible sub-buffer objects to create.
+        for ( unsigned int j = 0; j < sizeof(subBufferFlags) / sizeof(cl_mem_flags); ++j )
+        {
+            if ( subBufferFlags[ j ] & CL_MEM_READ_WRITE )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) )
+                    continue; // Buffer must be read_write for sub-buffer to be read_write.
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_READ_ONLY )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_READ_ONLY) )
+                    continue; // Buffer must be read_write or read_only for sub-buffer to be read_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_WRITE_ONLY )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_WRITE_ONLY) )
+                    continue; // Buffer must be read_write or write_only for sub-buffer to be write_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_HOST_READ_ONLY )
+            {
+                if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_WRITE_ONLY) )
+                    continue; // Buffer must be host all access or host read_only for sub-buffer to be host read_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_HOST_WRITE_ONLY )
+            {
+                if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_READ_ONLY) )
+                    continue; // Buffer must be host all access or host write_only for sub-buffer to be host write_only
+            }
+            //printf("@@@ bufferFlags[%u]=0x%x subBufferFlags[%u]=0x%x\n", i, bufferFlags[ i ], j, subBufferFlags[ j ]);
+
+            subBufferObject = clCreateSubBuffer( bufferObject, subBufferFlags[ j ], CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
+            test_error( error, "Unable to create sub-buffer to test against" );
+
+            // Perform sub-buffer object queries.
+            cl_mem_object_type type;
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
+
+            cl_mem_flags flags;
+            cl_mem_flags inheritedFlags = subBufferFlags[ j ];
+            if ( (subBufferFlags[ j ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0 )
+            {
+              inheritedFlags |= bufferFlags[ i ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
+            }
+            inheritedFlags |= bufferFlags[ i ] & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
+            if ( (subBufferFlags[ j ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0)
+            {
+              inheritedFlags |= bufferFlags[ i ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
+            }
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_FLAGS, flags, (unsigned int)inheritedFlags, "flags", "%d", unsigned int )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign ), "size", "%ld", size_t )
+
+            if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
+            {
+                void * ptr;
+                void * offsetInBuffer = (char *)buffer + addressAlign;
+
+                TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_HOST_PTR, ptr, offsetInBuffer, "host pointer", "%p", void * )
+            }
+
+            cl_uint mapCount;
+            error = clGetMemObjectInfo( subBufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+            test_error( error, "Unable to get mem object map count" );
+            if( size != sizeof( mapCount ) )
+            {
+                log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                          (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+                return -1;
+            }
+
+            cl_uint refCount;
+            error = clGetMemObjectInfo( subBufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+            test_error( error, "Unable to get mem object reference count" );
+            if( size != sizeof( refCount ) )
+            {
+                log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                          (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+                return -1;
+            }
+
+            cl_context otherCtx;
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
+
+            clReleaseMemObject( subBufferObject );
+            subBufferObject = NULL;
+
+        }
+
+        clReleaseMemObject( bufferObject );
+        bufferObject = NULL;
+    }
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_image_desc *imageInfo, cl_image_format *imageFormat, size_t pixelSize, cl_context context )
+{
+    int error;
+    size_t size;
+    cl_mem_object_type type;
+    cl_mem_flags flags;
+    cl_uint mapCount;
+    cl_uint refCount;
+    size_t rowPitchMultiplier;
+    size_t slicePitchMultiplier;
+    cl_context otherCtx;
+    size_t offset;
+    size_t sz;
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_TYPE, type, imageInfo->image_type, "type", "%d", int )
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_FLAGS, flags, (unsigned int)objectFlags, "flags", "%d", unsigned int )
+
+    error = clGetMemObjectInfo( *image, CL_MEM_SIZE, sizeof( sz ), &sz, NULL );
+    test_error( error, "Unable to get mem size" );
+
+    // The size returned is not constrained by the spec.
+
+    error = clGetMemObjectInfo( *image, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+    test_error( error, "Unable to get mem object map count" );
+    if( size != sizeof( mapCount ) )
+    {
+        log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                  (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clGetMemObjectInfo( *image, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get mem object reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                  (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_image_info( cl_device_id deviceID, cl_context context, cl_mem_object_type type )
+{
+    int error;
+    size_t size;
+    void * image = NULL;
+
+    cl_mem imageObject;
+    cl_image_desc imageInfo;
+
+    cl_mem_flags imageFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_ONLY,
+        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_WRITE_ONLY,
+        CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+    };
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    cl_image_format imageFormat;
+    size_t pixelSize = 4;
+
+    imageFormat.image_channel_order = CL_RGBA;
+    imageFormat.image_channel_data_type = CL_UNORM_INT8;
+
+    imageInfo.image_width = imageInfo.image_height = imageInfo.image_depth = 1;
+    imageInfo.image_array_size = 0;
+    imageInfo.num_mip_levels = imageInfo.num_samples = 0;
+    imageInfo.mem_object = NULL;
+
+    d = init_genrand( gRandomSeed );
+
+    for ( unsigned int i = 0; i < sizeof(imageFlags) / sizeof(cl_mem_flags); ++i )
+    {
+        imageInfo.image_row_pitch = 0;
+        imageInfo.image_slice_pitch = 0;
+
+        switch (type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE2D:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_height = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE3D:
+                error = checkFor3DImageSupport(deviceID);
+                if (error == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+                {
+                    log_info("Device doesn't support 3D images. Skipping test.\n");
+                    return CL_SUCCESS;
+                }
+                imageInfo.image_width = get_image_dim(&d, 127);
+                imageInfo.image_height = get_image_dim(&d, 127);
+                imageInfo.image_depth = get_image_dim(&d, 127);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE3D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_array_size = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                imageInfo.image_width = get_image_dim(&d, 255);
+                imageInfo.image_height = get_image_dim(&d, 255);
+                imageInfo.image_array_size = get_image_dim(&d, 255);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+                break;
+        }
+
+        if ( imageFlags[i] & CL_MEM_USE_HOST_PTR )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            void * ptr;
+            TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            // release image object
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image2d (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure image2d is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( (imageFlags[i] & CL_MEM_ALLOC_HOST_PTR) && (imageFlags[i] & CL_MEM_COPY_HOST_PTR) )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[ i ], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            // release image object
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( imageFlags[i] & CL_MEM_ALLOC_HOST_PTR )
+        {
+            // Create an image object to test against.
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
+            test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR) to test with" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( imageFlags[i] & CL_MEM_COPY_HOST_PTR )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else
+        {
+            // Create an image object to test against.
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
+            test_error( error, "Unable to create image to test with" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+
+        clReleaseMemObject( imageObject );
+    }
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D);
+}
+
+int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE3D);
+}
+
+int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D);
+}
+
+int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D_ARRAY);
+}
+
+int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D_ARRAY);
+}
+
+
diff --git a/test_conformance/api/test_mem_objects.cpp b/test_conformance/api/test_mem_objects.cpp
new file mode 100644
index 00000000..b0dc99d4
--- /dev/null
+++ b/test_conformance/api/test_mem_objects.cpp
@@ -0,0 +1,108 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+static volatile cl_int sDestructorIndex;
+
+void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
+{
+    int * userPtr = (int *)userData;
+
+    // ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
+    *userPtr = ++sDestructorIndex;
+}
+
+#ifndef ABS
+#define ABS( x ) ( ( x < 0 ) ? -x : x )
+#endif
+
+int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
+{
+    cl_int error;
+    int i;
+
+    // Set up some variables to catch the order in which callbacks are called
+    volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
+    sDestructorIndex = 0;
+
+    // Set up the callbacks
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    // Now release the buffer, which SHOULD call the callbacks
+    error = clReleaseMemObject( memObject );
+    test_error( error, "Unable to release test buffer" );
+
+    // Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
+    memObject = NULL;
+
+    // At this point, all three callbacks should have already been called
+    int numErrors = 0;
+    for(  i = 0; i < 3; i++ )
+    {
+        // Spin waiting for the release to finish.  If you don't call the mem_destructor_callback, you will not
+        // pass the test.  bugzilla 6316
+        while( 0 == callbackOrders[i] )
+        {}
+
+        if( ABS( callbackOrders[ i ] ) != 3-i )
+        {
+            log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
+                      i+1, ABS( callbackOrders[ i ] ), i );
+            numErrors++;
+        }
+    }
+
+    return ( numErrors > 0 ) ? -1 : 0;
+}
+
+int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clMemWrapper testBuffer, testImage;
+    cl_int error;
+
+
+    // Create a buffer and an image to test callbacks against
+    testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
+
+    if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
+    {
+        log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
+        return -1;
+    }
+
+    if( checkForImageSupport( deviceID ) == 0 )
+    {
+        cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
+        testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
+        test_error( error, "Unable to create testing image" );
+
+        if( test_mem_object_destructor_callback_single( testImage ) != 0 )
+        {
+            log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/api/test_native_kernel.cpp b/test_conformance/api/test_native_kernel.cpp
new file mode 100644
index 00000000..2d52134b
--- /dev/null
+++ b/test_conformance/api/test_native_kernel.cpp
@@ -0,0 +1,121 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+static void CL_CALLBACK test_native_kernel_fn( void *userData )
+{
+    struct arg_struct {
+        cl_int * source;
+        cl_int * dest;
+        cl_int count;
+    } *args = (arg_struct *)userData;
+
+    for( cl_int i = 0; i < args->count; i++ )
+        args->dest[ i ] = args->source[ i ];
+}
+
+int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    int error;
+    RandomSeed seed( gRandomSeed );
+    // Check if we support native kernels
+    cl_device_exec_capabilities capabilities;
+    error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
+    if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
+        log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
+        return 0;
+    }
+
+    clMemWrapper streams[ 2 ];
+#if !(defined (_WIN32) && defined (_MSC_VER))
+    cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
+#else
+    cl_int* inBuffer  = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
+    cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
+#endif
+    clEventWrapper finishEvent;
+
+    struct arg_struct
+    {
+        cl_mem inputStream;
+        cl_mem outputStream;
+        cl_int count;
+    } args;
+
+
+    // Create some input values
+    generate_random_data( kInt, n_elems, seed, inBuffer );
+
+
+    // Create I/O streams
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
+    test_error( error, "Unable to create I/O stream" );
+    streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
+    test_error( error, "Unable to create I/O stream" );
+
+
+    // Set up the arrays to call with
+    args.inputStream = streams[ 0 ];
+    args.outputStream = streams[ 1 ];
+    args.count = n_elems;
+
+    void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
+
+
+    // Run the kernel
+    error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
+                                      &args, sizeof( args ),
+                                      2, &streams[ 0 ],
+                                      (const void **)memLocs,
+                                      0, NULL, &finishEvent );
+    test_error( error, "Unable to queue native kernel" );
+
+    // Finish and wait for the kernel to complete
+    error = clFinish( queue );
+    test_error(error, "clFinish failed");
+
+    error = clWaitForEvents( 1, &finishEvent );
+    test_error(error, "clWaitForEvents failed");
+
+    // Now read the results and verify
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    for( int i = 0; i < n_elems; i++ )
+    {
+        if( inBuffer[ i ] != outBuffer[ i ] )
+        {
+            log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
+                      i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+
+
+
+
diff --git a/test_conformance/api/test_null_buffer_arg.c b/test_conformance/api/test_null_buffer_arg.c
new file mode 100644
index 00000000..b845fc0b
--- /dev/null
+++ b/test_conformance/api/test_null_buffer_arg.c
@@ -0,0 +1,206 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#if defined(__APPLE__)
+#include <OpenCL/opencl.h>
+#include <OpenCL/cl_platform.h>
+#else
+#include <CL/opencl.h>
+#include <CL/cl_platform.h>
+#endif
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "procs.h"
+
+
+enum { SUCCESS, FAILURE };
+typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
+
+#define NITEMS 4096
+
+/* places the comparison result of value of the src ptr against 0 into each element of the output
+ * array, to allow testing that the kernel actually _gets_ the NULL value */
+const char *kernel_string_long =
+"kernel void test_kernel(global float *src, global long *dst)\n"
+"{\n"
+"    uint tid = get_global_id(0);\n"
+"    dst[tid] = (long)(src != 0);\n"
+"}\n";
+
+// For gIsEmbedded
+const char *kernel_string =
+"kernel void test_kernel(global float *src, global int *dst)\n"
+"{\n"
+"    uint tid = get_global_id(0);\n"
+"    dst[tid] = (int)(src != 0);\n"
+"}\n";
+
+
+/*
+ * The guts of the test:
+ * call setKernelArgs with a regular buffer, &NULL, or NULL depending on
+ * the value of 'test_type'
+ */
+static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
+    cl_mem test_buf, cl_mem result_buf, test_type type)
+{
+    unsigned int test_success = 0;
+
+    unsigned int i;
+    cl_int status;
+    char *typestr;
+
+    if (type == NON_NULL_PATH) {
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+        typestr = "non-NULL";
+    } else if (type == ADDROF_NULL_PATH) {
+        test_buf = NULL;
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+        typestr = "&NULL";
+    } else if (type == NULL_PATH) {
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
+        typestr = "NULL";
+    }
+
+    log_info("Testing setKernelArgs with %s buffer.\n", typestr);
+
+    if (status != CL_SUCCESS) {
+        log_error("clSetKernelArg failed with status: %d\n", status);
+        return FAILURE; // no point in continuing *this* test
+    }
+
+    size_t global = NITEMS;
+    status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
+        NULL, 0, NULL, NULL);
+    test_error(status, "NDRangeKernel failed.");
+
+    if (gIsEmbedded)
+    {
+        cl_int* host_result = (cl_int*)malloc(NITEMS*sizeof(cl_int));
+        status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
+                                     sizeof(cl_int)*NITEMS, host_result, 0, NULL, NULL);
+        test_error(status, "ReadBuffer failed.");
+        // in the non-null case, we expect NONZERO values:
+        if (type == NON_NULL_PATH) {
+            for (i=0; i<NITEMS; i++) {
+                if (host_result[i] == 0) {
+                    log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
+                    test_success = FAILURE; break;
+                }
+            }
+
+        } else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
+            for (i=0; i<NITEMS; i++) {
+                if (host_result[i] != 0) {
+                    log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
+                    test_success = FAILURE; break;
+                }
+            }
+        }
+        free(host_result);
+    }
+    else
+    {
+    cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
+    status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
+        sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
+    test_error(status, "ReadBuffer failed.");
+    // in the non-null case, we expect NONZERO values:
+    if (type == NON_NULL_PATH) {
+        for (i=0; i<NITEMS; i++) {
+            if (host_result[i] == 0) {
+                log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
+                test_success = FAILURE; break;
+            }
+        }
+    } else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
+        for (i=0; i<NITEMS; i++) {
+            if (host_result[i] != 0) {
+                log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
+                test_success = FAILURE; break;
+            }
+        }
+    }
+    free(host_result);
+    }
+
+    if (test_success == SUCCESS) {
+        log_info("\t%s ok.\n", typestr);
+    }
+
+    return test_success;
+}
+
+int test_null_buffer_arg(cl_device_id device, cl_context context,
+    cl_command_queue queue, int num_elements)
+{
+    unsigned int test_success = 0;
+    unsigned int i;
+    unsigned int buffer_size;
+    cl_int status;
+    cl_program program;
+    cl_kernel kernel;
+
+    // prep kernel:
+    if (gIsEmbedded)
+        status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string, NULL);
+    else
+        status = create_single_kernel_helper(context, &program, NULL, 1, &kernel_string_long, NULL);
+
+    test_error(status, "Unable to build test program");
+
+    kernel = clCreateKernel(program, "test_kernel", &status);
+    test_error(status, "CreateKernel failed.");
+
+    cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
+        NULL, NULL);
+
+    if (gIsEmbedded)
+        buffer_size = NITEMS*sizeof(cl_int);
+    else
+        buffer_size = NITEMS*sizeof(cl_long);
+
+    cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, buffer_size,
+        NULL, NULL);
+
+    // set the destination buffer normally:
+    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
+    test_error(status, "SetKernelArg failed.");
+
+    //
+    // we test three cases:
+    //
+    // - typical case, used everyday: non-null buffer
+    // - the case of src as &NULL (the spec-compliance test)
+    // - the case of src as NULL (the backwards-compatibility test, Apple only)
+    //
+
+    test_success  = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
+    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
+
+#ifdef __APPLE__
+    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
+#endif
+
+    // clean up:
+    if (dev_src) clReleaseMemObject(dev_src);
+    clReleaseMemObject(dev_dst);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return test_success;
+}
diff --git a/test_conformance/api/test_platform.cpp b/test_conformance/api/test_platform.cpp
new file mode 100644
index 00000000..f748b248
--- /dev/null
+++ b/test_conformance/api/test_platform.cpp
@@ -0,0 +1,289 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include <string.h>
+
+#define EXTENSION_NAME_BUF_SIZE 4096
+
+#define PRINT_EXTENSION_INFO 0
+
+int test_platform_extensions(cl_device_id deviceID, cl_context context,
+                 cl_command_queue queue, int num_elements)
+{
+    const char * extensions[] = {
+    "cl_khr_byte_addressable_store",
+//    "cl_APPLE_SetMemObjectDestructor",
+    "cl_khr_global_int32_base_atomics",
+    "cl_khr_global_int32_extended_atomics",
+    "cl_khr_local_int32_base_atomics",
+    "cl_khr_local_int32_extended_atomics",
+    "cl_khr_int64_base_atomics",
+    "cl_khr_int64_extended_atomics",
+// need to put in entires for various atomics
+    "cl_khr_3d_image_writes",
+    "cl_khr_fp16",
+    "cl_khr_fp64",
+    NULL
+    };
+
+    bool extensionsSupported[] = {
+    false, //"cl_khr_byte_addressable_store",
+    false, // need to put in entires for various atomics
+    false, // "cl_khr_global_int32_base_atomics",
+    false, // "cl_khr_global_int32_extended_atomics",
+    false, // "cl_khr_local_int32_base_atomics",
+    false, // "cl_khr_local_int32_extended_atomics",
+    false, // "cl_khr_int64_base_atomics",
+    false, // "cl_khr_int64_extended_atomics",
+    false, //"cl_khr_3d_image_writes",
+    false, //"cl_khr_fp16",
+    false, //"cl_khr_fp64",
+    false //NULL
+    };
+
+    int extensionIndex;
+
+    cl_platform_id platformID;
+    cl_int err;
+
+    char platform_extensions[EXTENSION_NAME_BUF_SIZE];
+    char device_extensions[EXTENSION_NAME_BUF_SIZE];
+
+    // Okay, so what we're going to do is just check the device indicated by
+    // deviceID against the platform that includes this device
+
+
+    // pass CL_DEVICE_PLATFORM to clGetDeviceInfo
+    // to get a result of type cl_platform_id
+
+    err = clGetDeviceInfo(deviceID,
+              CL_DEVICE_PLATFORM,
+              sizeof(cl_platform_id),
+              (void *)(&platformID),
+              NULL);
+
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get platformID from device\n");
+    return -1;
+    }
+
+
+    // now we grab the set of extensions specified by the platform
+    err = clGetPlatformInfo(platformID,
+                CL_PLATFORM_EXTENSIONS,
+                sizeof(platform_extensions),
+                (void *)(&platform_extensions[0]),
+                NULL);
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get extension string from platform\n");
+    return -1;
+    }
+
+#if PRINT_EXTENSION_INFO
+    log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
+#endif
+
+    // here we parse the platform extensions, to look for the "important" ones
+    for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
+    {
+    if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
+    {
+        // we found it
+#if PRINT_EXTENSION_INFO
+        log_info("Found \"%s\" in platform extensions\n",
+        extensions[extensionIndex]);
+#endif
+        extensionsSupported[extensionIndex] = true;
+    }
+    }
+
+    // and then we grab the set of extensions specified by the device
+    // (this can be turned into a "loop over all devices in this platform")
+    err = clGetDeviceInfo(deviceID,
+              CL_DEVICE_EXTENSIONS,
+              sizeof(device_extensions),
+              (void *)(&device_extensions[0]),
+              NULL);
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get extension string from device\n");
+    return -1;
+    }
+
+
+#if PRINT_EXTENSION_INFO
+    log_info("Device extensions include \"%s\"\n\n", device_extensions);
+#endif
+
+    for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
+    {
+    if(extensionsSupported[extensionIndex] == false)
+    {
+        continue; // skip this one
+    }
+
+    if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
+    {
+        // device does not support it
+        vlog_error("Platform supports extension \"%s\" but device does not\n",
+               extensions[extensionIndex]);
+        return -1;
+    }
+    }
+    return 0;
+}
+
+int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+  cl_platform_id platforms[16];
+  cl_uint num_platforms;
+  char *string_returned;
+
+  string_returned = (char*)malloc(8192);
+
+  int total_errors = 0;
+  int err = CL_SUCCESS;
+
+
+  err = clGetPlatformIDs(16, platforms, &num_platforms);
+  test_error(err, "clGetPlatformIDs failed");
+
+  if (num_platforms <= 16) {
+    // Try with NULL
+    err = clGetPlatformIDs(num_platforms, platforms, NULL);
+    test_error(err, "clGetPlatformIDs failed with NULL for return size");
+  }
+
+  if (num_platforms < 1) {
+    log_error("Found 0 platforms.\n");
+    return -1;
+  }
+  log_info("Found %d platforms.\n", num_platforms);
+
+
+  for (int p=0; p<(int)num_platforms; p++) {
+    cl_device_id *devices;
+    cl_uint num_devices;
+    size_t size;
+
+
+    log_info("Platform %d (%p):\n", p, platforms[p]);
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
+    log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
+    log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
+    log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
+    log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
+    log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
+    test_error(err, "clGetDeviceIDs size failed.\n");
+    devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
+    memset(devices, 0, sizeof(cl_device_id)*num_devices);
+    err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
+    test_error(err, "clGetDeviceIDs failed.\n");
+
+    log_info("\tPlatform has %d devices.\n", (int)num_devices);
+    for (int d=0; d<(int)num_devices; d++) {
+      size_t returned_size;
+      cl_platform_id returned_platform;
+      cl_context context;
+      cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
+
+      err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
+      test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
+      if (returned_size != sizeof(cl_platform_id)) {
+        log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
+        total_errors++;
+      }
+
+      memset(string_returned, 0, 8192);
+      err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
+      test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
+
+      log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
+
+      log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
+      // Try creating a context for the platform
+      context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
+      test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
+
+      memset(properties, 0, sizeof(cl_context_properties)*3);
+
+      err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
+      test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
+      if (returned_size != sizeof(cl_context_properties)*3) {
+        log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
+                  returned_size, sizeof(cl_context_properties)*3);
+        total_errors++;
+      }
+
+      if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
+        log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
+                  (void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
+        total_errors++;
+      }
+
+      err = clReleaseContext(context);
+      test_error(err, "clReleaseContext failed");
+    }
+    free(devices);
+  }
+
+  free(string_returned);
+
+  return total_errors;
+}
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
new file mode 100644
index 00000000..0d412a0d
--- /dev/null
+++ b/test_conformance/api/test_queries.cpp
@@ -0,0 +1,643 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_platform_id platform;
+    cl_int error;
+    char buffer[ 16384 ];
+    size_t length;
+
+    // Get the platform to use
+    error = clGetPlatformIDs(1, &platform, NULL);
+    test_error( error, "Unable to get platform" );
+
+    // Platform profile should either be FULL_PROFILE or EMBEDDED_PROFILE
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_PROFILE, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get platform profile string" );
+
+    log_info("Returned CL_PLATFORM_PROFILE %s.\n", buffer);
+
+    if( strcmp( buffer, "FULL_PROFILE" ) != 0 && strcmp( buffer, "EMBEDDED_PROFILE" ) != 0 )
+    {
+        log_error( "ERROR: Returned platform profile string is not a valid string by OpenCL 1.2! (Returned: %s)\n", buffer );
+        return -1;
+    }
+    if( strlen( buffer )+1 != length )
+    {
+        log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
+                  (int)strlen( buffer )+1, (int)length );
+        return -1;
+    }
+
+    // Check just length return
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_PROFILE, 0, NULL, &length );
+    test_error( error, "Unable to get platform profile length" );
+    if( strlen( (char *)buffer )+1 != length )
+    {
+        log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
+                  (int)strlen( (char *)buffer )+1, (int)length );
+        return -1;
+    }
+
+
+    // Platform version should fit the regex "OpenCL *[0-9]+\.[0-9]+"
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get platform version string" );
+
+    log_info("Returned CL_PLATFORM_VERSION %s.\n", buffer);
+
+    if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
+    {
+        log_error( "ERROR: Initial part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    while( *p1 == ' ' )
+        p1++;
+    char *p2 = p1;
+    while( isdigit( *p2 ) )
+        p2++;
+    if( *p2 != '.' )
+    {
+        log_error( "ERROR: Numeric part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p3 = p2 + 1;
+    while( isdigit( *p3 ) )
+        p3++;
+    if( *p3 != ' ' )
+    {
+        log_error( "ERROR: space expected after minor version number! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    *p2 = ' '; // Put in a space for atoi below.
+    p2++;
+
+    // make sure it is null terminated
+    for( ; p3 != buffer + length; p3++ )
+        if( *p3 == '\0' )
+            break;
+    if( p3 == buffer + length )
+    {
+        log_error( "ERROR: platform version string is not NUL terminated!\n" );
+        return -1;
+    }
+
+    int major = atoi( p1 );
+    int minor = atoi( p2 );
+    int minor_revision = 2;
+    if( major * 10 + minor < 10 + minor_revision )
+    {
+        log_error( "ERROR: OpenCL profile version returned is less than 1.%d!\n", minor_revision );
+        return -1;
+    }
+
+    // Sanity checks on the returned values
+    if( length != strlen( (char *)buffer ) + 1)
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer )+1, (int)length );
+        return -1;
+    }
+
+    // Check just length
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_VERSION, 0, NULL, &length );
+    test_error( error, "Unable to get platform version length" );
+    if( length != strlen( (char *)buffer )+1 )
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( buffer )+1, (int)length );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_TRUE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_LINEAR,
+        0 };
+    clSamplerWrapper sampler = clCreateSamplerWithProperties(context, properties, &error);
+    test_error( error, "Unable to create sampler to test with" );
+
+    cl_uint refCount;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get sampler ref count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
+    test_error( error, "Unable to get sampler context" );
+    if( otherCtx != context )
+    {
+        log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
+        return -1;
+    }
+    if( size != sizeof( otherCtx ) )
+    {
+        log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
+        return -1;
+    }
+
+    cl_addressing_mode mode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
+    test_error( error, "Unable to get sampler addressing mode" );
+    if( mode != CL_ADDRESS_CLAMP )
+    {
+        log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
+        return -1;
+    }
+    if( size != sizeof( mode ) )
+    {
+        log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
+        return -1;
+    }
+
+    cl_filter_mode fmode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
+    test_error( error, "Unable to get sampler filter mode" );
+    if( fmode != CL_FILTER_LINEAR )
+    {
+        log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
+        return -1;
+    }
+    if( size != sizeof( fmode ) )
+    {
+        log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
+        return -1;
+    }
+
+    cl_int norm;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
+    test_error( error, "Unable to get sampler normalized flag" );
+    if( norm != CL_TRUE )
+    {
+        log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
+        return -1;
+    }
+    if( size != sizeof( norm ) )
+    {
+        log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
+        return -1;
+    }
+
+    return 0;
+}
+
+#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast )    \
+error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get command queue " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}
+
+int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    cl_queue_properties device_props;
+    cl_queue_properties queue_props[] = {CL_QUEUE_PROPERTIES,0,0};
+
+    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", (int)device_props);
+
+    queue_props[1] = device_props;
+    clCommandQueueWrapper queue = clCreateCommandQueueWithProperties( context, deviceID, &queue_props[0], &error );
+    test_error( error, "Unable to create command queue to test with" );
+
+    cl_uint refCount;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get command queue reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+    cl_device_id otherDevice;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
+    test_error(error, "clGetCommandQueue failed.");
+
+    if (size != sizeof(cl_device_id)) {
+        log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
+        return -1;
+    }
+
+    /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+    cl_uint otherDevice_vid, deviceID_vid;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+
+    if( otherDevice_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
+        return -1;
+    }
+
+    cl_command_queue_properties props;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
+
+    return 0;
+}
+
+int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+    cl_context_properties props;
+
+    error = clGetContextInfo( context, CL_CONTEXT_PROPERTIES, sizeof( props ), &props, &size );
+    test_error( error, "Unable to get context props" );
+
+    if (size == 0) {
+        // Valid size
+        return 0;
+    } else if (size == sizeof(cl_context_properties)) {
+        // Data must be NULL
+        if (props != 0) {
+            log_error("ERROR: Returned properties is no NULL.\n");
+            return -1;
+        }
+        // Valid data and size
+        return 0;
+    }
+    // Size was not 0 or 1
+    log_error( "ERROR: Returned size of context props is not valid! (expected 0 or %d, got %d)\n",
+              (int)sizeof(cl_context_properties), (int)size );
+    return -1;
+}
+
+#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
+error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get mem object " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}
+
+void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
+{
+    free( data );
+}
+
+// All possible combinations of valid cl_mem_flags.
+static cl_mem_flags all_flags[16] = {
+  0,
+  CL_MEM_READ_WRITE,
+  CL_MEM_READ_ONLY,
+  CL_MEM_WRITE_ONLY,
+  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+};
+
+#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast )    \
+error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get device " name );                            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}                \
+log_info( "\tReported device " name " : " type "\n", (cast)val );
+
+#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div )    \
+error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get device " name );                            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}                \
+log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
+
+int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    cl_uint vendorID;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_VENDOR_ID, vendorID, "vendor ID", "0x%08x", int )
+
+    char extensions[ 10240 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_EXTENSIONS, sizeof( extensions ), &extensions, &size );
+    test_error( error, "Unable to get device extensions" );
+    if( size != strlen( extensions ) + 1 )
+    {
+        log_error( "ERROR: Returned size of device extensions does not validate! (expected %d, got %d)\n", (int)( strlen( extensions ) + 1 ), (int)size );
+        return -1;
+    }
+    log_info( "\tReported device extensions: %s \n", extensions );
+
+    cl_uint preferred;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferred, "preferred vector char width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferred, "preferred vector short width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferred, "preferred vector int width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferred, "preferred vector long width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferred, "preferred vector float width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferred, "preferred vector double width", "%d", int )
+
+    // Note that even if cl_khr_fp64, the preferred width for double can be non-zero.  For example, vendors
+    // extensions can support double but may not support cl_khr_fp64, which implies math library support.
+
+    cl_uint baseAddrAlign;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bytes", int )
+
+    cl_uint maxDataAlign;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
+
+    cl_device_mem_cache_type cacheType;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof( cacheType ), &cacheType, &size );
+    test_error( error, "Unable to get device global mem cache type" );
+    if( size != sizeof( cacheType ) )
+    {
+        log_error( "ERROR: Returned size of device global mem cache type does not validate! (expected %d, got %d)\n", (int)sizeof( cacheType ), (int)size );
+        return -1;
+    }
+    const char *cacheTypeName = ( cacheType == CL_NONE ) ? "CL_NONE" : ( cacheType == CL_READ_ONLY_CACHE ) ? "CL_READ_ONLY_CACHE" : ( cacheType == CL_READ_WRITE_CACHE ) ? "CL_READ_WRITE_CACHE" : "<unknown>";
+    log_info( "\tReported device global mem cache type: %s \n", cacheTypeName );
+
+    cl_uint cachelineSize;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cachelineSize, "global mem cacheline size", "%d bytes", int )
+
+    cl_ulong cacheSize;
+    TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cacheSize, "global mem cache size", "%d KB", 1024 )
+
+    cl_ulong memSize;
+    TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, memSize, "global mem size", "%d MB", ( 1024 * 1024 ) )
+
+    cl_device_local_mem_type localMemType;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_TYPE, sizeof( localMemType ), &localMemType, &size );
+    test_error( error, "Unable to get device local mem type" );
+    if( size != sizeof( cacheType ) )
+    {
+        log_error( "ERROR: Returned size of device local mem type does not validate! (expected %d, got %d)\n", (int)sizeof( localMemType ), (int)size );
+        return -1;
+    }
+    const char *localMemTypeName = ( localMemType == CL_LOCAL ) ? "CL_LOCAL" : ( cacheType == CL_GLOBAL ) ? "CL_GLOBAL" : "<unknown>";
+    log_info( "\tReported device local mem type: %s \n", localMemTypeName );
+
+
+    cl_bool errSupport;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ERROR_CORRECTION_SUPPORT, errSupport, "error correction support", "%d", int )
+
+    size_t timerResolution;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, timerResolution, "profiling timer resolution", "%ld nanoseconds", long )
+
+    cl_bool endian;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ENDIAN_LITTLE, endian, "little endian flag", "%d", int )
+
+    cl_bool avail;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_AVAILABLE, avail, "available flag", "%d", int )
+
+    cl_bool compilerAvail;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_COMPILER_AVAILABLE, compilerAvail, "compiler available flag", "%d", int )
+
+    char profile[ 1024 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profile ), &profile, &size );
+    test_error( error, "Unable to get device profile" );
+    if( size != strlen( profile ) + 1 )
+    {
+        log_error( "ERROR: Returned size of device profile does not validate! (expected %d, got %d)\n", (int)( strlen( profile ) + 1 ), (int)size );
+        return -1;
+    }
+    if( strcmp( profile, "FULL_PROFILE" ) != 0 && strcmp( profile, "EMBEDDED_PROFILE" ) != 0 )
+    {
+        log_error( "ERROR: Returned profile of device not FULL or EMBEDDED as required by OpenCL 1.2! (Returned %s)\n", profile );
+        return -1;
+    }
+    log_info( "\tReported device profile: %s \n", profile );
+
+
+    return 0;
+}
+
+
+
+
+static const char *sample_compile_size[2] = {
+    "__kernel void sample_test(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     dst[tid] = src[tid];\n"
+    "\n"
+    "}\n",
+    "__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     dst[tid] = src[tid];\n"
+    "\n"
+    "}\n" };
+
+int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t realSize;
+    size_t kernel_max_workgroup_size;
+    size_t global[] = {64,14,10};
+    size_t local[] = {0,0,0};
+
+    cl_uint max_dimensions;
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
+    test_error(error,  "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    log_info("Device reported CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = %d.\n", (int)max_dimensions);
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        error = create_single_kernel_helper( context, &program, &kernel, 1, &sample_compile_size[ 0 ], "sample_test" );
+        if( error != 0 )
+            return error;
+
+        error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_max_workgroup_size), &kernel_max_workgroup_size, NULL);
+        test_error( error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+        log_info("The CL_KERNEL_WORK_GROUP_SIZE for the kernel is %d.\n", (int)kernel_max_workgroup_size);
+
+        size_t size[ 3 ];
+        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
+        test_error( error, "Unable to get work group info" );
+
+        if( size[ 0 ] != 0 || size[ 1 ] != 0 || size[ 2 ] != 0 )
+        {
+            log_error( "ERROR: Nonzero compile work group size returned for nonspecified size! (returned %d,%d,%d)\n", (int)size[0], (int)size[1], (int)size[2] );
+            return -1;
+        }
+
+        if( realSize != sizeof( size ) )
+        {
+            log_error( "ERROR: Returned size of compile work group size not valid! (Expected %d, got %d)\n", (int)sizeof( size ), (int)realSize );
+            return -1;
+        }
+
+        // Determine some local dimensions to use for the test.
+        if (max_dimensions == 1) {
+            error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
+            test_error( error, "get_max_common_work_group_size failed");
+            log_info("For global dimension %d, kernel will require local dimension %d.\n", (int)global[0], (int)local[0]);
+        } else if (max_dimensions == 2) {
+            error = get_max_common_2D_work_group_size(context, kernel, global, local);
+            test_error( error, "get_max_common_2D_work_group_size failed");
+            log_info("For global dimension %d x %d, kernel will require local dimension %d x %d.\n", (int)global[0], (int)global[1], (int)local[0], (int)local[1]);
+        } else {
+            error = get_max_common_3D_work_group_size(context, kernel, global, local);
+            test_error( error, "get_max_common_3D_work_group_size failed");
+            log_info("For global dimension %d x %d x %d, kernel will require local dimension %d x %d x %d.\n",
+                     (int)global[0], (int)global[1], (int)global[2], (int)local[0], (int)local[1], (int)local[2]);
+        }
+    }
+
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        clMemWrapper in, out;
+        //char source[1024];
+        char *source = (char*)malloc(1024);
+        source[0] = '\0';
+
+        sprintf(source, sample_compile_size[1], local[0], local[1], local[2]);
+
+        error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&source, "sample_test" );
+        if( error != 0 )
+            return error;
+
+        size_t size[ 3 ];
+        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
+        test_error( error, "Unable to get work group info" );
+
+        if( size[ 0 ] != local[0] || size[ 1 ] != local[1] || size[ 2 ] != local[2] )
+        {
+            log_error( "ERROR: Incorrect compile work group size returned for specified size! (returned %d,%d,%d, expected %d,%d,%d)\n",
+                      (int)size[0], (int)size[1], (int)size[2], (int)local[0], (int)local[1], (int)local[2]);
+            return -1;
+        }
+
+        // Verify that the kernel will only execute with that size.
+        in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*global[0], NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+        out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*global[0], NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+        test_error(error, "clSetKernelArg failed");
+        error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        log_info("kernel_required_group_size may report spurious ERRORS in the conformance log.\n");
+
+        local[0]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2] );
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        if (max_dimensions == 1) {
+            free(source);
+            return 0;
+        }
+
+        local[0]--; local[1]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        if (max_dimensions == 2) {
+            return 0;
+            free(source);
+        }
+
+        local[1]--; local[2]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+        free(source);
+    }
+
+    return 0;
+}
+
+
diff --git a/test_conformance/api/test_queue_hint.cpp b/test_conformance/api/test_queue_hint.cpp
new file mode 100644
index 00000000..2634a23a
--- /dev/null
+++ b/test_conformance/api/test_queue_hint.cpp
@@ -0,0 +1,191 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include <sstream>
+#include <string>
+
+using namespace std;
+/*
+
+*/
+
+const char *queue_hint_test_kernel[] = {
+"__kernel void vec_cpy(__global int *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid];\n"
+"\n"
+"}\n" };
+
+int test_enqueue(cl_context context, clCommandQueueWrapper& queue, clKernelWrapper& kernel, size_t num_elements)
+{
+    clMemWrapper            streams[2];
+    int error;
+
+    int* buf = new int[num_elements];
+
+    for (int i = 0; i < static_cast<int>(num_elements); ++i)
+    {
+        buf[i] = i;
+    }
+
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf, &error);
+    test_error( error, "clCreateBuffer failed." );
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error);
+    test_error( error, "clCreateBuffer failed." );
+
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error( error, "clSetKernelArg failed." );
+
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error( error, "clSetKernelArg failed." );
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, NULL);
+    test_error( error, "clEnqueueNDRangeKernel failed." );
+
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements * sizeof(int), buf, 0, NULL, NULL);
+    test_error( error, "clEnqueueReadBuffer failed." );
+
+    for (int i = 0; i < static_cast<int>(num_elements); ++i)
+    {
+        if (buf[i] != i)
+        {
+            log_error("ERROR: Incorrect vector copy result.");
+            return -1;
+        }
+    }
+
+    delete [] buf;
+
+    return 0;
+}
+
+
+
+
+int test_queue_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (num_elements <= 0)
+    {
+        num_elements = 128;
+    }
+
+    int err = 0;
+
+    // Query extension
+    cl_platform_id platform;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    char *string_returned;
+
+    string_returned = (char*)malloc(8192);
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+    test_error(err, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, queue_hint_test_kernel, "vec_cpy", NULL);
+    if (err != 0)
+    {
+        return err;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, 8192, string_returned, NULL);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
+    log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
+    string strExt = string_returned;
+    if (strExt.find("cl_khr_priority_hints") != string::npos)
+    {
+        log_info("Testing cl_khr_priority_hints...\n", string_returned);
+
+        cl_queue_properties queue_prop[][3] =
+        {
+            {
+                CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR,
+                0
+            },
+            {
+                CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_MED_KHR,
+                0
+            },
+            {
+                CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_LOW_KHR,
+                0
+            }
+        };
+
+        for (int i = 0; i < 3; ++i)
+        {
+            clCommandQueueWrapper q = clCreateCommandQueueWithProperties(context, deviceID, queue_prop[i], &err);
+            test_error(err, "clCreateCommandQueueWithProperties failed");
+
+            err = test_enqueue(context, q, kernel, (size_t)num_elements);
+            if (err != 0)
+            {
+                return err;
+            }
+        }
+    }
+    else
+    {
+        log_info("cl_khr_priority_hints is not supported.");
+    }
+
+    if (strExt.find("cl_khr_throttle_hints") != string::npos)
+    {
+        cl_queue_properties queue_prop[][3] =
+        {
+            {
+                CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_HIGH_KHR,
+                0
+            },
+            {
+                CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_MED_KHR,
+                0
+            },
+            {
+                CL_QUEUE_THROTTLE_KHR, CL_QUEUE_THROTTLE_LOW_KHR,
+                0
+            }
+        };
+
+        for (int i = 0; i < 3; ++i)
+        {
+            clCommandQueueWrapper q = clCreateCommandQueueWithProperties(context, deviceID, queue_prop[i], &err);
+            test_error(err, "clCreateCommandQueueWithProperties failed");
+
+            err = test_enqueue(context, q, kernel, (size_t)num_elements);
+            if (err != 0)
+            {
+                return err;
+            }
+        }
+
+    }
+    else
+    {
+        log_info("cl_khr_throttle_hints is not supported.");
+    }
+
+    free(string_returned);
+
+    return 0;
+}
+
diff --git a/test_conformance/api/test_retain.cpp b/test_conformance/api/test_retain.cpp
new file mode 100644
index 00000000..384d280e
--- /dev/null
+++ b/test_conformance/api/test_retain.cpp
@@ -0,0 +1,234 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif // !_WIN32
+
+// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
+// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
+// this define.
+//#define VERIFY_AFTER_RELEASE    1
+
+#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
+#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
+
+#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
+log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
+return -1;    }
+
+int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
+{
+    cl_command_queue queue;
+    cl_uint numInstances;
+    int err;
+
+
+    /* Create a test queue */
+    queue = clCreateCommandQueueWithProperties( context, deviceID, 0, &err );
+    test_error( err, "Unable to create command queue to test with" );
+
+    /* Test the instance count */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* Now release the program */
+    clReleaseCommandQueue( queue );
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    if( err != CL_INVALID_COMMAND_QUEUE )
+    {
+        print_error( err, "Command queue was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
+{
+    cl_command_queue queue;
+    unsigned int numInstances, i;
+    int err;
+
+
+    /* Create a test program */
+    queue = clCreateCommandQueueWithProperties( context, deviceID, 0, &err );
+    test_error( err, "Unable to create command queue to test with" );
+
+    /* Increment 9 times, which should bring the count to 10 */
+    for( i = 0; i < 9; i++ )
+    {
+        clRetainCommandQueue( queue );
+    }
+
+    /* Test the instance count */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 10 );
+
+    /* Now release 5 times, which should take us to 5 */
+    for( i = 0; i < 5; i++ )
+    {
+        clReleaseCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 5 );
+
+    /* Retain again three times, which should take us to 8 */
+    for( i = 0; i < 3; i++ )
+    {
+        clRetainCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 8 );
+
+    /* Release 7 times, which should take it to 1 */
+    for( i = 0; i < 7; i++ )
+    {
+        clReleaseCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* And one last one */
+    clReleaseCommandQueue( queue );
+
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    if( err != CL_INVALID_COMMAND_QUEUE )
+    {
+        print_error( err, "Command queue was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem object;
+    cl_uint numInstances;
+    int err;
+
+
+    /* Create a test object */
+    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
+    test_error( err, "Unable to create buffer to test with" );
+
+    /* Test the instance count */
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* Now release the program */
+    clReleaseMemObject( object );
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_MEM_INSTANCE_COUNT( object );
+    if( err != CL_INVALID_MEM_OBJECT )
+    {
+        print_error( err, "Mem object was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem object;
+    unsigned int numInstances, i;
+    int err;
+
+
+    /* Create a test object */
+    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
+    test_error( err, "Unable to create buffer to test with" );
+
+    /* Increment 9 times, which should bring the count to 10 */
+    for( i = 0; i < 9; i++ )
+    {
+        clRetainMemObject( object );
+    }
+
+    /* Test the instance count */
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 10 );
+
+    /* Now release 5 times, which should take us to 5 */
+    for( i = 0; i < 5; i++ )
+    {
+        clReleaseMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 5 );
+
+    /* Retain again three times, which should take us to 8 */
+    for( i = 0; i < 3; i++ )
+    {
+        clRetainMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 8 );
+
+    /* Release 7 times, which should take it to 1 */
+    for( i = 0; i < 7; i++ )
+    {
+        clReleaseMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* And one last one */
+    clReleaseMemObject( object );
+
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_MEM_INSTANCE_COUNT( object );
+    if( err != CL_INVALID_MEM_OBJECT )
+    {
+        print_error( err, "Mem object was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
diff --git a/test_conformance/api/test_retain_program.c b/test_conformance/api/test_retain_program.c
new file mode 100644
index 00000000..4930c862
--- /dev/null
+++ b/test_conformance/api/test_retain_program.c
@@ -0,0 +1,105 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/compat.h"
+
+int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program program;
+    cl_kernel kernel;
+    int error;
+    const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
+
+    /* Create a test program */
+    error = create_single_kernel_helper(context, &program, NULL, 1, testProgram, NULL);
+    test_error( error, "Unable to build sample program to test with" );
+
+    /* And create a kernel from it */
+    kernel = clCreateKernel( program, "sample_test", &error );
+    test_error( error, "Unable to create kernel" );
+
+    /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
+    clReleaseProgram( program );
+    clReleaseKernel( kernel );
+
+    /* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
+    return 0;
+}
+
+const char *sample_delay_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    for( int i = 0; i < 1000000; i++ ); \n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel kernel;
+    cl_mem streams[2];
+    size_t threads[1] = { 10 }, localThreadSize;
+
+
+    /* We now need an event to test. So we'll execute a kernel to get one */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
+    test_error( error, "Unable to calc local thread size" );
+
+
+    /* Execute the kernel */
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* The kernel should still be executing, but we should still be able to release it. It's not terribly
+       useful, but we should be able to do it, if the internal refcounting is indeed correct. */
+
+    clReleaseMemObject( streams[ 1 ] );
+    clReleaseMemObject( streams[ 0 ] );
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+
+  /* Now make sure we're really finished before we go on. */
+  error = clFinish(queue);
+  test_error( error, "Unable to finish context.");
+
+    return 0;
+}
+
+
diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp
new file mode 100644
index 00000000..b17727c5
--- /dev/null
+++ b/test_conformance/api/test_sub_group_dispatch.cpp
@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+
+const char *subgroup_dispatch_kernel[] = {
+"__kernel void subgroup_dispatch_kernel(__global int *output)\n"
+"{\n"
+"    size_t size = get_num_sub_groups ();\n"
+"\n"
+"    output[0] = size;\n"
+"\n"
+"}\n" };
+
+size_t flatten_ndrange(size_t* ndrange, size_t dim)
+{
+    switch(dim)
+    {
+    case 1:
+        return *ndrange;
+    case 2:
+        return ndrange[0] * ndrange[1];
+    case 3:
+        return ndrange[0] * ndrange[1] * ndrange[2];
+    default:
+        log_error("ERROR: bad ndrange value");
+        return 0;
+    }
+}
+
+cl_int get_sub_group_num(cl_command_queue queue, cl_kernel kernel, clMemWrapper& out, size_t& size, size_t local_size, size_t dim)
+{
+    size_t ndrange[3] = {local_size, 1, 1};
+    cl_int error = CL_SUCCESS;
+    size = 0;
+    error = clSetKernelArg(kernel, 0, sizeof(out), &out);
+    error += clEnqueueNDRangeKernel(queue, kernel, dim, NULL, ndrange, ndrange, 0, NULL, NULL);
+    error += clEnqueueReadBuffer(queue, out, CL_TRUE, 0, 4, &size, 0, NULL, NULL);
+    return error;
+}
+
+int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    static const size_t gsize0 = 80;
+    int i, error;
+    size_t realSize;
+    size_t kernel_max_subgroup_size, kernel_subgroup_count;
+    size_t global[] = {1,1,1};
+    size_t max_local;
+
+    cl_platform_id platform;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper out;
+
+    size_t ret_ndrange1d;
+    size_t ret_ndrange2d[2];
+    size_t ret_ndrange3d[3];
+
+    size_t ret_ndrange2d_flattened;
+    size_t ret_ndrange3d_flattened;
+
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, subgroup_dispatch_kernel, "subgroup_dispatch_kernel", "-cl-std=CL2.0");
+    if (error != 0)
+        return error;
+
+    out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(size_t), NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_local, NULL);
+    test_error(error, "clGetDeviceInfo failed");
+
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
+    test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
+
+    // Get the max subgroup size
+    error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+            sizeof(max_local), &max_local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize);
+    test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE");
+    log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_max_subgroup_size);
+
+    if (realSize != sizeof(kernel_max_subgroup_size)) {
+        log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize );
+        return -1;
+    }
+
+    // Get the number of subgroup for max local size
+    error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+            sizeof(max_local), &max_local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize);
+    test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE");
+    log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_subgroup_count);
+
+    if (realSize != sizeof(kernel_subgroup_count)) {
+        log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
+        return -1;
+    }
+
+    // test CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT
+    for (size_t i = kernel_subgroup_count; i > 0; --i)
+    {
+        // test all 3 different dimention of requested local size
+        size_t expect_size = kernel_max_subgroup_size * i;
+        size_t kernel_ret_size = 0;
+        error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
+        test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
+        if (realSize != sizeof(ret_ndrange1d)) {
+            log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
+            return -1;
+        }
+
+        if (ret_ndrange1d != expect_size)
+        {
+            log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange1d );
+            return -1;
+        }
+
+        error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange1d, 1);
+        test_error(error, "Failed to query number of subgroups from kernel");
+        if (i != kernel_ret_size)
+        {
+            log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
+            return -1;
+        }
+
+        error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
+        test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
+        if (realSize != sizeof(ret_ndrange2d)) {
+            log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
+            return -1;
+        }
+
+        ret_ndrange2d_flattened = flatten_ndrange(ret_ndrange2d, 2);
+        if (ret_ndrange2d_flattened != expect_size ||
+            ret_ndrange2d[1] != 1)
+        {
+            log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange2d_flattened );
+            return -1;
+        }
+
+        error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange2d_flattened, 2);
+        test_error(error, "Failed to query number of subgroups from kernel");
+        if (i != kernel_ret_size)
+        {
+            log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
+            return -1;
+        }
+
+        error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
+        test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
+        if (realSize != sizeof(ret_ndrange3d)) {
+            log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
+            return -1;
+        }
+
+        ret_ndrange3d_flattened = flatten_ndrange(ret_ndrange3d, 3);
+        if (ret_ndrange3d_flattened != expect_size ||
+            ret_ndrange3d[1] != 1 ||
+            ret_ndrange3d[2] != 1)
+        {
+            log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange3d_flattened );
+            return -1;
+        }
+
+        error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange3d_flattened, 3);
+        test_error(error, "Failed to query number of subgroups from kernel");
+        if (i != kernel_ret_size)
+        {
+            log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
+            return -1;
+        }
+    }
+
+    // test when input subgroup count exceeds max wg size
+    size_t large_sg_size = kernel_subgroup_count + 1;
+    error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
+        test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
+    if (ret_ndrange1d != 0)
+    {
+        log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", 0, (int)ret_ndrange1d );
+            return -1;
+    }
+
+    error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
+        test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
+    if (ret_ndrange2d[0] != 0 ||
+        ret_ndrange2d[1] != 0)
+    {
+        log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
+            return -1;
+    }
+
+    error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
+        test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
+    if (ret_ndrange3d[0] != 0 ||
+        ret_ndrange3d[1] != 0 ||
+        ret_ndrange3d[2] != 0)
+    {
+        log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
+            return -1;
+    }
+
+    return 0;
+}
diff --git a/test_conformance/api/test_zero_sized_enqueue.cpp b/test_conformance/api/test_zero_sized_enqueue.cpp
new file mode 100644
index 00000000..87aa0d21
--- /dev/null
+++ b/test_conformance/api/test_zero_sized_enqueue.cpp
@@ -0,0 +1,209 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+
+const char *zero_sized_enqueue_test_kernel[] = {
+"__kernel void foo_kernel(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+const int bufSize = 128;
+
+cl_int test_zero_sized_enqueue_and_test_output_buffer(cl_command_queue queue, clKernelWrapper& kernel, clMemWrapper& buf, size_t dim, size_t ndrange[])
+{
+    cl_int error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, ndrange, NULL, 0, NULL, NULL);
+    if (error != CL_SUCCESS)
+    {
+        return error;
+    }
+
+    clFinish(queue);
+
+    // check output buffer has not changed.
+    int* output = reinterpret_cast<int*>(clEnqueueMapBuffer(queue, buf, CL_TRUE, CL_MAP_READ, 0, sizeof(int) * bufSize, 0, NULL, NULL, &error));
+    if (error != CL_SUCCESS)
+    {
+        return error;
+    }
+
+    for (int i = 0; i < bufSize; ++i)
+    {
+        if (output[i] != 0)
+        {
+            log_error( "ERROR: output buffer value has changed.\n" );
+            return CL_INVALID_OPERATION;
+        }
+    }
+
+    return clEnqueueUnmapMemObject(queue, buf, output, 0, NULL, NULL);
+}
+
+int test_zero_sized_enqueue_helper(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    ndrange1 = 0;
+    size_t    ndrange20[2] = {0, 0};
+    size_t    ndrange21[2] = {1, 0};
+    size_t    ndrange22[2] = {0, 1};
+
+    size_t    ndrange30[3] = {0, 0, 0};
+    size_t    ndrange31[3] = {1, 0, 0};
+    size_t    ndrange32[3] = {0, 1, 0};
+    size_t    ndrange33[3] = {0, 0, 1};
+    size_t    ndrange34[3] = {0, 1, 1};
+    size_t    ndrange35[3] = {1, 0, 1};
+    size_t    ndrange36[3] = {1, 1, 0};
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufSize * sizeof(int), NULL, &error);
+
+    int* buf = new int[bufSize];
+    memset(buf, 0, sizeof(int) * bufSize);
+
+    // update output buffer
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(int) * bufSize, buf, 0, NULL, NULL);
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, zero_sized_enqueue_test_kernel, "foo_kernel" ) != 0 )
+    {
+        return -1;
+    }
+
+    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
+    test_error( error, "clSetKernelArg failed." );
+    error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &streams[1]);
+    test_error( error, "clSetKernelArg failed." );
+
+    // Simple API return code tests for 1D, 2D and 3D zero sized ND range.
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 1, &ndrange1);
+    test_error( error, "1D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange20);
+    test_error( error, "2D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange21);
+    test_error( error, "2D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 2, ndrange22);
+    test_error( error, "2D zero sized kernel enqueue failed." );
+
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange30);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange31);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange32);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange33);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange34);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange35);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+
+    error = test_zero_sized_enqueue_and_test_output_buffer(queue, kernel, streams[1], 3, ndrange36);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+
+    // Verify zero-sized ND range kernel still satisfy event wait list and correct event object
+    // is returned
+    cl_event ev = NULL;
+    clEventWrapper user_ev = clCreateUserEvent(context, &error);
+    test_error( error, "user event creation failed." );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, ndrange30, NULL, 1, &user_ev, &ev);
+    test_error( error, "3D zero sized kernel enqueue failed." );
+    if (ev == NULL)
+    {
+        log_error( "ERROR: failed to create an event object\n" );
+        return -1;
+    }
+
+    cl_int sta;
+    error = clGetEventInfo(ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &sta, NULL);
+    test_error( error, "Failed to get event status.");
+
+    if (sta != CL_QUEUED)
+    {
+        log_error( "ERROR: incorrect zero sized kernel enqueue event status.\n" );
+        return -1;
+    }
+
+    // now unblock zero-sized enqueue
+    error = clSetUserEventStatus(user_ev, CL_COMPLETE);
+    test_error( error, "Failed to set user event status.");
+
+    clFinish(queue);
+
+    // now check zero sized enqueue event status
+    error = clGetEventInfo(ev, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &sta, NULL);
+    test_error( error, "Failed to get event status.");
+
+    if (sta != CL_COMPLETE)
+    {
+        log_error( "ERROR: incorrect zero sized kernel enqueue event status.\n" );
+        return -1;
+    }
+
+    delete [] buf;
+
+    return 0;
+}
+
+
+int test_zero_sized_enqueue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int res = test_zero_sized_enqueue_helper(deviceID, context, queue, num_elements);
+    if (res != 0)
+    {
+        return res;
+    }
+
+    // now test out of order queue
+    cl_command_queue_properties props;
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &props, NULL);
+    test_error( error, "clGetDeviceInfo failed.");
+
+    if (props | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
+    {
+        // test out of order queue
+        cl_queue_properties queue_prop_def[] =
+        {
+            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+            0
+        };
+
+        clCommandQueueWrapper ooqueue = clCreateCommandQueueWithProperties(context, deviceID, queue_prop_def, &error);
+        test_error( error, "clCreateCommandQueueWithProperties failed.");
+
+        res = test_zero_sized_enqueue_helper(deviceID, context, ooqueue, num_elements);
+    }
+
+    return res;
+}
diff --git a/test_conformance/atomics/CMakeLists.txt b/test_conformance/atomics/CMakeLists.txt
new file mode 100644
index 00000000..9dcd4bb1
--- /dev/null
+++ b/test_conformance/atomics/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(MODULE_NAME ATOMICS)
+
+set(${MODULE_NAME}_SOURCES
+        main.c
+        test_atomics.cpp
+        test_indexed_cases.c
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/threadTesting.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/conversions.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
+
diff --git a/test_conformance/atomics/Jamfile b/test_conformance/atomics/Jamfile
new file mode 100644
index 00000000..8c039e1a
--- /dev/null
+++ b/test_conformance/atomics/Jamfile
@@ -0,0 +1,17 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+ 
+exe test_atomics
+    : main.c
+      test_atomics.c
+      test_indexed_cases.c
+    ;
+
+install dist
+    : test_atomics 
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/atomics
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/atomics
+    ;
diff --git a/test_conformance/atomics/Makefile b/test_conformance/atomics/Makefile
new file mode 100644
index 00000000..aa1c0147
--- /dev/null
+++ b/test_conformance/atomics/Makefile
@@ -0,0 +1,44 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		  test_atomics.cpp \
+		  test_indexed_cases.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+                  ../../test_common/harness/mt19937.c \
+                  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/kernelHelpers.c
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_atomics
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/atomics/main.c b/test_conformance/atomics/main.c
new file mode 100644
index 00000000..76df7946
--- /dev/null
+++ b/test_conformance/atomics/main.c
@@ -0,0 +1,71 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+
+basefn    basefn_list[] = {
+            test_atomic_add,
+            test_atomic_sub,
+            test_atomic_xchg,
+            test_atomic_min,
+            test_atomic_max,
+            test_atomic_inc,
+            test_atomic_dec,
+            test_atomic_cmpxchg,
+            test_atomic_and,
+            test_atomic_or,
+            test_atomic_xor,
+
+            test_atomic_add_index,
+            test_atomic_add_index_bin
+};
+
+const char    *basefn_names[] = {
+            "atomic_add",
+            "atomic_sub",
+            "atomic_xchg",
+            "atomic_min",
+            "atomic_max",
+            "atomic_inc",
+            "atomic_dec",
+            "atomic_cmpxchg",
+            "atomic_and",
+            "atomic_or",
+            "atomic_xor",
+
+            "atomic_add_index",
+            "atomic_add_index_bin",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h
new file mode 100644
index 00000000..017e6c55
--- /dev/null
+++ b/test_conformance/atomics/procs.h
@@ -0,0 +1,39 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+extern int        test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+
diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h
new file mode 100644
index 00000000..8d58eeb6
--- /dev/null
+++ b/test_conformance/atomics/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
new file mode 100644
index 00000000..06941e58
--- /dev/null
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -0,0 +1,1126 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#define INT_TEST_VALUE 402258822
+#define LONG_TEST_VALUE 515154531254381446LL
+
+
+extern cl_uint gRandomSeed;
+
+const char *atomic_global_pattern[] = {
+    "__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    ,
+    "\n"
+    "}\n" };
+
+const char *atomic_local_pattern[] = {
+    "__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     int  dstItemIdx;\n"
+    "\n"
+    "    // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
+    "     for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+    "        destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
+    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    ,
+    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "    // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
+    "     for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+    "        finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
+    "}\n" };
+
+
+#define TEST_COUNT 128 * 1024
+
+
+struct TestFns
+{
+    cl_int    mIntStartValue;
+    cl_long    mLongStartValue;
+
+    size_t    (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
+
+    // Integer versions
+    cl_int    (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
+    void    (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
+    bool    (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
+
+    // Long versions
+    cl_long    (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
+    void    (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
+    bool    (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
+
+    // Float versions
+    cl_float    (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
+    void        (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
+    bool        (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
+};
+
+bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
+{
+    const char *extensionNames[8] = {
+        "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
+        "cl_khr_local_int32_base_atomics",  "cl_khr_local_int32_extended_atomics",
+        "cl_khr_int64_base_atomics",        "cl_khr_int64_extended_atomics",
+        "cl_khr_int64_base_atomics",        "cl_khr_int64_extended_atomics"       // this line intended to be the same as the last one
+    };
+
+    size_t index = 0;
+    if( extended )
+        index += 1;
+    if( isLocal )
+        index += 2;
+
+    size_t major, minor;
+
+    int error = get_device_version(device, &major, &minor);
+    test_error( error, "get_device_version" );
+
+    switch (dataType)
+    {
+        case kInt:
+        case kUInt:
+            if( major * 10 + minor >= 11 )
+                return 1;
+            break;
+        case kLong:
+        case kULong:
+            index += 4;
+            break;
+        case kFloat:  // this has to stay separate since the float atomics arent in the 1.0 extensions
+            return major * 10 + minor >= 11;
+        default:
+            log_error( "ERROR:  Unsupported data type (%d) in check_atomic_support\n", dataType );
+            return 0;
+    }
+
+    return is_extension_available( device, extensionNames[index] );
+}
+
+int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
+                         TestFns testFns,
+                         bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error;
+    size_t threads[1];
+    clMemWrapper streams[2];
+    void *refValues, *startRefValues;
+    size_t threadSize, groupSize;
+    const char *programLines[4];
+    char pragma[ 512 ];
+    char programHeader[ 512 ];
+    MTdata d;
+    size_t typeSize = get_explicit_type_size( dataType );
+
+
+    // Verify we can run first
+    bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
+    if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
+    {
+        // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
+        if( dataType == kFloat )
+            log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
+        else if( !isUnsigned )
+            log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
+        // Since we don't support the operation, they implicitly pass
+        return 0;
+    }
+    else
+    {
+        if( dataType == kFloat )
+            log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
+        else
+            log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
+                     (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
+    }
+
+    //// Set up the kernel code
+
+    // Create the pragma line for this kernel
+    bool isLong = ( dataType == kLong || dataType == kULong );
+    sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
+            isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
+            extended ? "extended" : "base" );
+
+    // Now create the program header
+    const char *typeName = get_explicit_type_name( dataType );
+    if( isLocal )
+        sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
+    else
+        sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
+
+    // Set up our entire program now
+    programLines[ 0 ] = pragma;
+    programLines[ 1 ] = programHeader;
+    programLines[ 2 ] = programCore;
+    programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
+
+    if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
+    {
+        return -1;
+    }
+
+    //// Set up to actually run
+    threadSize = num_elements;
+
+    error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
+    test_error( error, "Unable to get thread group max size" );
+
+    if( matchGroupSize )
+        // HACK because xchg and cmpxchg apparently are limited by hardware
+        threadSize = groupSize;
+
+    if( isLocal )
+    {
+        size_t maxSizes[3] = {0, 0, 0};
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
+        test_error( error, "Unable to obtain max work item sizes for the device" );
+
+        size_t workSize;
+        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
+        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+
+        threadSize = groupSize = workSize;
+    }
+
+
+    log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
+
+    refValues = (cl_int *)malloc( typeSize * threadSize );
+
+    if( testFns.GenerateRefsIntFn != NULL )
+    {
+        // We have a ref generator provided
+        d = init_genrand( gRandomSeed );
+        startRefValues = malloc( typeSize * threadSize );
+        if( typeSize == 4 )
+            testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
+        else
+            testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
+        free_mtdata(d);
+        d = NULL;
+    }
+    else
+        startRefValues = NULL;
+
+    // If we're given a num_results function, we need to determine how many result objects we need. If
+    // we don't have it, we assume it's just 1
+    size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
+
+    char * destItems = new char[ typeSize * numDestItems ];
+    if( destItems == NULL )
+    {
+        log_error( "ERROR: Unable to allocate memory!\n" );
+        return -1;
+    }
+    void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
+    for( size_t i = 0; i < numDestItems; i++ )
+        memcpy( destItems + i * typeSize, startValue, typeSize );
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, destItems, NULL);
+    if (!streams[0])
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(( startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE )), typeSize * threadSize, startRefValues, NULL);
+    if (!streams[1])
+    {
+        log_error("ERROR: Creating reference array failed!\n");
+        return -1;
+    }
+
+    /* Set the arguments */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    if( isLocal )
+    {
+        error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
+        test_error( error, "Unable to set indexed local kernel argument" );
+
+        cl_int numDestItemsInt = (cl_int)numDestItems;
+        error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
+        test_error( error, "Unable to set indexed kernel argument" );
+    }
+
+    /* Run the kernel */
+    threads[0] = threadSize;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
+    test_error( error, "Unable to read result value!" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
+    test_error( error, "Unable to read reference values!" );
+
+    // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
+    // have one, it's because we're comparing ref values only
+    if( testFns.ExpectedValueIntFn != NULL )
+    {
+        for( size_t i = 0; i < numDestItems; i++ )
+        {
+            char expected[ 8 ];
+            cl_int intVal;
+            cl_long longVal;
+            if( typeSize == 4 )
+            {
+                // Int version
+                intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
+                memcpy( expected, &intVal, sizeof( intVal ) );
+            }
+            else
+            {
+                // Long version
+                longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
+                memcpy( expected, &longVal, sizeof( longVal ) );
+            }
+
+            if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
+            {
+                if( typeSize == 4 )
+                {
+                    cl_int *outValue = (cl_int *)( destItems + i * typeSize );
+                    log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
+                    cl_int *startRefs = (cl_int *)startRefValues;
+                    cl_int *refs = (cl_int *)refValues;
+                    for( i = 0; i < threadSize; i++ )
+                    {
+                        if( startRefs != NULL )
+                            log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
+                        else
+                            log_info( " --- %ld --- %d\n", i, refs[i] );
+                    }
+                }
+                else
+                {
+                    cl_long *outValue = (cl_long *)( destItems + i * typeSize );
+                    log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
+                    cl_long *startRefs = (cl_long *)startRefValues;
+                    cl_long *refs = (cl_long *)refValues;
+                    for( i = 0; i < threadSize; i++ )
+                    {
+                        if( startRefs != NULL )
+                            log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
+                        else
+                            log_info( " --- %ld --- %lld\n", i, refs[i] );
+                    }
+                }
+                return -1;
+            }
+        }
+    }
+
+    if( testFns.VerifyRefsIntFn != NULL )
+    {
+        /* Use the verify function to also check the results */
+        if( dataType == kFloat )
+        {
+            cl_float *outValue = (cl_float *)destItems;
+            if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
+            {
+                log_error( "ERROR: Reference values did not validate!\n" );
+                return -1;
+            }
+        }
+        else if( typeSize == 4 )
+        {
+            cl_int *outValue = (cl_int *)destItems;
+            if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
+            {
+                log_error( "ERROR: Reference values did not validate!\n" );
+                return -1;
+            }
+        }
+        else
+        {
+            cl_long *outValue = (cl_long *)destItems;
+            if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
+            {
+                log_error( "ERROR: Reference values did not validate!\n" );
+                return -1;
+            }
+        }
+    }
+    else if( testFns.ExpectedValueIntFn == NULL )
+    {
+        log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
+        return -1;
+    }
+
+
+    /* Re-write the starting value */
+    for( size_t i = 0; i < numDestItems; i++ )
+        memcpy( destItems + i * typeSize, startValue, typeSize );
+    error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
+    test_error( error, "Unable to write starting values!" );
+
+    /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
+    threads[0] = 1;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
+    test_error( error, "Unable to read reference values!" );
+
+    if( memcmp( refValues, destItems, typeSize ) != 0 )
+    {
+        if( typeSize == 4 )
+        {
+            cl_int *s = (cl_int *)destItems;
+            cl_int *r = (cl_int *)refValues;
+            log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
+                      " (should have been %d, returned %d)!\n", *s, *r );
+        }
+        else
+        {
+            cl_long *s = (cl_long *)destItems;
+            cl_long *r = (cl_long *)refValues;
+            log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
+                      " (should have been %lld, returned %lld)!\n", *s, *r );
+        }
+        return -1;
+    }
+
+    delete [] destItems;
+    free( refValues );
+    if( startRefValues != NULL )
+        free( startRefValues );
+
+    return 0;
+}
+
+int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
+                             TestFns testFns,
+                             bool extended, bool matchGroupSize, bool usingAtomicPrefix )
+{
+    log_info("    Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
+
+    int errors = 0;
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
+
+    // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
+    // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
+    if (!usingAtomicPrefix) {
+      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
+      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
+      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
+      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
+    }
+
+    return errors;
+}
+
+#pragma mark ---- add
+
+const char atom_add_core[] =
+"    oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
+"    atom_add( &destMemory[0], tid + 3 );\n"
+"   atom_add( &destMemory[0], tid + 3 );\n"
+"   atom_add( &destMemory[0], tid + 3 );\n";
+
+const char atomic_add_core[] =
+"    oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
+"    atomic_add( &destMemory[0], tid + 3 );\n"
+"   atomic_add( &destMemory[0], tid + 3 );\n"
+"   atomic_add( &destMemory[0], tid + 3 );\n";
+
+cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+{
+    cl_int total = 0;
+    for( size_t i = 0; i < size; i++ )
+        total += ( (cl_int)i + 3 ) * 4;
+    return total;
+}
+
+cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+{
+    cl_long total = 0;
+    for( size_t i = 0; i < size; i++ )
+        total += ( ( i + 3 ) * 4 );
+    return total;
+}
+
+int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+      return -1;
+    return 0;
+}
+
+#pragma mark ---- sub
+
+const char atom_sub_core[] = "    oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
+
+const char atomic_sub_core[] = "    oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
+
+cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+{
+    cl_int total = INT_TEST_VALUE;
+    for( size_t i = 0; i < size; i++ )
+        total -= (cl_int)i + 3;
+    return total;
+}
+
+cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+{
+    cl_long total = LONG_TEST_VALUE;
+    for( size_t i = 0; i < size; i++ )
+        total -= i + 3;
+    return total;
+}
+
+int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+        return -1;
+    return 0;
+}
+
+#pragma mark ---- xchg
+
+const char atom_xchg_core[] = "    oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
+
+const char atomic_xchg_core[] = "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_float_core[] = "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+
+bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
+{
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    char *valids;
+    size_t i;
+    char originalValidCount = 0;
+
+    valids = (char *)malloc( sizeof( char ) * size );
+    memset( valids, 0, sizeof( char ) * size );
+
+    for( i = 0; i < size; i++ )
+    {
+        if( refValues[ i ] == INT_TEST_VALUE )
+        {
+            // Special initial value
+            originalValidCount++;
+            continue;
+        }
+        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        {
+            log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
+            return false;
+        }
+        valids[ refValues[ i ] ] ++;
+    }
+
+    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
+     the final value outputted */
+    if( valids[ finalValue ] > 0 )
+    {
+        log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
+        return false;
+    }
+    else
+        valids[ finalValue ] = 1;    // So the following loop will be okay
+
+    /* Now check that every entry has one and only one count */
+    if( originalValidCount != 1 )
+    {
+        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        return false;
+    }
+    for( i = 0; i < size; i++ )
+    {
+        if( valids[ i ] != 1 )
+        {
+            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
+            for( size_t j = 0; j < size; j++ )
+                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            return false;
+        }
+    }
+
+    free( valids );
+    return true;
+}
+
+bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
+{
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    char *valids;
+    size_t i;
+    char originalValidCount = 0;
+
+    valids = (char *)malloc( sizeof( char ) * size );
+    memset( valids, 0, sizeof( char ) * size );
+
+    for( i = 0; i < size; i++ )
+    {
+        if( refValues[ i ] == LONG_TEST_VALUE )
+        {
+            // Special initial value
+            originalValidCount++;
+            continue;
+        }
+        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        {
+            log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
+            return false;
+        }
+        valids[ refValues[ i ] ] ++;
+    }
+
+    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
+     the final value outputted */
+    if( valids[ finalValue ] > 0 )
+    {
+        log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
+        return false;
+    }
+    else
+        valids[ finalValue ] = 1;    // So the following loop will be okay
+
+    /* Now check that every entry has one and only one count */
+    if( originalValidCount != 1 )
+    {
+        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        return false;
+    }
+    for( i = 0; i < size; i++ )
+    {
+        if( valids[ i ] != 1 )
+        {
+            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
+            for( size_t j = 0; j < size; j++ )
+                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            return false;
+        }
+    }
+
+    free( valids );
+    return true;
+}
+
+bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
+{
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    char *valids;
+    size_t i;
+    char originalValidCount = 0;
+
+    valids = (char *)malloc( sizeof( char ) * size );
+    memset( valids, 0, sizeof( char ) * size );
+
+    for( i = 0; i < size; i++ )
+    {
+        cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
+        if( *intRefValue == INT_TEST_VALUE )
+        {
+            // Special initial value
+            originalValidCount++;
+            continue;
+        }
+        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        {
+            log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
+            return false;
+        }
+        valids[ (int)refValues[ i ] ] ++;
+    }
+
+    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
+     the final value outputted */
+    if( valids[ (int)finalValue ] > 0 )
+    {
+        log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
+        return false;
+    }
+    else
+        valids[ (int)finalValue ] = 1;    // So the following loop will be okay
+
+    /* Now check that every entry has one and only one count */
+    if( originalValidCount != 1 )
+    {
+        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        return false;
+    }
+    for( i = 0; i < size; i++ )
+    {
+        if( valids[ i ] != 1 )
+        {
+            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
+            for( size_t j = 0; j < size; j++ )
+                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            return false;
+        }
+    }
+
+    free( valids );
+    return true;
+}
+
+int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
+
+    int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false  );
+    errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true  );
+
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
+
+    return errors;
+}
+
+
+#pragma mark ---- min
+
+const char atom_min_core[] = "    oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
+
+const char atomic_min_core[] = "    oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
+
+cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+{
+    cl_int total = 0x7fffffffL;
+    for( size_t i = 0; i < size; i++ )
+    {
+        if( startRefValues[ i ] < total )
+            total = startRefValues[ i ];
+    }
+    return total;
+}
+
+void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+{
+    for( size_t i = 0; i < size; i++ )
+        startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+}
+
+cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+{
+    cl_long total = 0x7fffffffffffffffLL;
+    for( size_t i = 0; i < size; i++ )
+    {
+        if( startRefValues[ i ] < total )
+            total = startRefValues[ i ];
+    }
+    return total;
+}
+
+void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+{
+    for( size_t i = 0; i < size; i++ )
+        startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+}
+
+int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+#pragma mark ---- max
+
+const char atom_max_core[] = "    oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
+
+const char atomic_max_core[] = "    oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
+
+cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+{
+    cl_int total = 0;
+    for( size_t i = 0; i < size; i++ )
+    {
+        if( startRefValues[ i ] > total )
+            total = startRefValues[ i ];
+    }
+    return total;
+}
+
+void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+{
+    for( size_t i = 0; i < size; i++ )
+        startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+}
+
+cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+{
+    cl_long total = 0;
+    for( size_t i = 0; i < size; i++ )
+    {
+        if( startRefValues[ i ] > total )
+            total = startRefValues[ i ];
+    }
+    return total;
+}
+
+void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+{
+    for( size_t i = 0; i < size; i++ )
+        startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+}
+
+int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+      return -1;
+    return 0;
+}
+
+
+#pragma mark ---- inc
+
+const char atom_inc_core[] = "    oldValues[tid] = atom_inc( &destMemory[0] );\n";
+
+const char atomic_inc_core[] = "    oldValues[tid] = atomic_inc( &destMemory[0] );\n";
+
+cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+{
+    return INT_TEST_VALUE + (cl_int)size;
+}
+
+cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+{
+    return LONG_TEST_VALUE + size;
+}
+
+int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+#pragma mark ---- dec
+
+const char atom_dec_core[] = "    oldValues[tid] = atom_dec( &destMemory[0] );\n";
+
+const char atomic_dec_core[] = "    oldValues[tid] = atomic_dec( &destMemory[0] );\n";
+
+cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+{
+    return INT_TEST_VALUE - (cl_int)size;
+}
+
+cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+{
+    return LONG_TEST_VALUE - size;
+}
+
+int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+#pragma mark ---- cmpxchg
+
+/* We test cmpxchg by implementing (the long way) atom_add */
+const char atom_cmpxchg_core[] =
+"    int oldValue, origValue, newValue;\n"
+"    do { \n"
+"        origValue = destMemory[0];\n"
+"        newValue = origValue + tid + 2;\n"
+"        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+"    } while( oldValue != origValue );\n"
+"    oldValues[tid] = oldValue;\n"
+;
+
+const char atom_cmpxchg64_core[] =
+"    long oldValue, origValue, newValue;\n"
+"    do { \n"
+"        origValue = destMemory[0];\n"
+"        newValue = origValue + tid + 2;\n"
+"        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+"    } while( oldValue != origValue );\n"
+"    oldValues[tid] = oldValue;\n"
+;
+
+const char atomic_cmpxchg_core[] =
+"    int oldValue, origValue, newValue;\n"
+"    do { \n"
+"        origValue = destMemory[0];\n"
+"        newValue = origValue + tid + 2;\n"
+"        oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
+"    } while( oldValue != origValue );\n"
+"    oldValues[tid] = oldValue;\n"
+;
+
+cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+{
+    cl_int total = INT_TEST_VALUE;
+    for( size_t i = 0; i < size; i++ )
+        total += (cl_int)i + 2;
+    return total;
+}
+
+cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+{
+    cl_long total = LONG_TEST_VALUE;
+    for( size_t i = 0; i < size; i++ )
+        total += i + 2;
+    return total;
+}
+
+int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
+
+    int errors = 0;
+
+    log_info("    Testing atom_ functions...\n");
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
+
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
+
+    log_info("    Testing atomic_ functions...\n");
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
+    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
+
+    if( errors )
+        return -1;
+
+    return 0;
+}
+
+#pragma mark -------- Bitwise functions
+
+size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
+{
+    size_t numBits = get_explicit_type_size( dataType ) * 8;
+
+    return ( threadCount + numBits - 1 ) / numBits;
+}
+
+#pragma mark ---- and
+
+const char atom_and_core[] =
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  whichResult = tid / numBits;\n"
+"    int  bitIndex = tid - ( whichResult * numBits );\n"
+"\n"
+"    oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
+;
+
+const char atomic_and_core[] =
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  whichResult = tid / numBits;\n"
+"    int  bitIndex = tid - ( whichResult * numBits );\n"
+"\n"
+"    oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
+;
+
+
+cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+{
+    size_t numThreads = ( (size_t)size + 31 ) / 32;
+    if( whichResult < numThreads - 1 )
+        return 0;
+
+    // Last item doesn't get and'ed on every bit, so we have to mask away
+    size_t numBits = (size_t)size - whichResult * 32;
+    cl_int bits = (cl_int)0xffffffffL;
+    for( size_t i = 0; i < numBits; i++ )
+        bits &= ~( 1 << i );
+
+    return bits;
+}
+
+cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+{
+    size_t numThreads = ( (size_t)size + 63 ) / 64;
+    if( whichResult < numThreads - 1 )
+        return 0;
+
+    // Last item doesn't get and'ed on every bit, so we have to mask away
+    size_t numBits = (size_t)size - whichResult * 64;
+    cl_long bits = (cl_long)0xffffffffffffffffLL;
+    for( size_t i = 0; i < numBits; i++ )
+        bits &= ~( 1 << i );
+
+    return bits;
+}
+
+int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
+        test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+#pragma mark ---- or
+
+const char atom_or_core[] =
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  whichResult = tid / numBits;\n"
+"    int  bitIndex = tid - ( whichResult * numBits );\n"
+"\n"
+"    oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
+;
+
+const char atomic_or_core[] =
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  whichResult = tid / numBits;\n"
+"    int  bitIndex = tid - ( whichResult * numBits );\n"
+"\n"
+"    oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
+;
+
+cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+{
+    size_t numThreads = ( (size_t)size + 31 ) / 32;
+    if( whichResult < numThreads - 1 )
+        return 0xffffffff;
+
+    // Last item doesn't get and'ed on every bit, so we have to mask away
+    size_t numBits = (size_t)size - whichResult * 32;
+    cl_int bits = 0;
+    for( size_t i = 0; i < numBits; i++ )
+        bits |= ( 1 << i );
+
+    return bits;
+}
+
+cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+{
+    size_t numThreads = ( (size_t)size + 63 ) / 64;
+    if( whichResult < numThreads - 1 )
+        return 0x0ffffffffffffffffLL;
+
+    // Last item doesn't get and'ed on every bit, so we have to mask away
+    size_t numBits = (size_t)size - whichResult * 64;
+    cl_long bits = 0;
+    for( size_t i = 0; i < numBits; i++ )
+        bits |= ( 1LL << i );
+
+    return bits;
+}
+
+int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+#pragma mark ---- xor
+
+const char atom_xor_core[] =
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  bitIndex = tid & ( numBits - 1 );\n"
+"\n"
+"    oldValues[tid] = atom_xor( &destMemory[0], 1 << bitIndex );\n"
+;
+
+const char atomic_xor_core[] =
+"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+"    int  bitIndex = tid & ( numBits - 1 );\n"
+"\n"
+"    oldValues[tid] = atomic_xor( &destMemory[0], 1 << bitIndex );\n"
+;
+
+cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+{
+    cl_int total = 0x2f08ab41;
+    for( size_t i = 0; i < size; i++ )
+        total ^= ( 1 << ( i & 31 ) );
+    return total;
+}
+
+cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+{
+    cl_long total = 0x2f08ab418ba0541LL;
+    for( size_t i = 0; i < size; i++ )
+        total ^= ( 1LL << ( i & 63 ) );
+    return total;
+}
+
+int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
+
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+        return -1;
+    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+
+
diff --git a/test_conformance/atomics/test_indexed_cases.c b/test_conformance/atomics/test_indexed_cases.c
new file mode 100644
index 00000000..a620d6ef
--- /dev/null
+++ b/test_conformance/atomics/test_indexed_cases.c
@@ -0,0 +1,380 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+const char * atomic_index_source =
+"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+"// Counter keeps track of which index in counts we are using.\n"
+"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
+"// At the end of this we should have all thread IDs in some random location in counts\n"
+"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
+"// will be missing some.\n"
+"\n"
+"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
+"    int tid = get_global_id(0);\n"
+"    \n"
+"    int counter_to_use = atom_add(counter, 1);\n"
+"    counts[counter_to_use] = tid;\n"
+"}";
+
+int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper counter, counters;
+    size_t numGlobalThreads, numLocalThreads;
+    int fail = 0, succeed = 0, err;
+
+  /* Check if atomics are supported. */
+  if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
+    log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
+    return 0;
+  }
+
+    //===== add_index test
+    // The index test replicates what particles does.
+    // It uses one memory location to keep track of the current index and then each thread
+    // does an atomic add to it to get its new location. The threads then write to their
+    // assigned location. At the end we check to make sure that each thread's ID shows up
+    // exactly once in the output.
+
+    numGlobalThreads = 2048;
+
+    if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
+        return -1;
+
+    if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
+        return -1;
+
+    log_info("Execute global_threads:%d local_threads:%d\n",
+             (int)numGlobalThreads, (int)numLocalThreads);
+
+    // Create the counter that will keep track of where each thread writes.
+    counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                   sizeof(cl_int) * 1, NULL, NULL);
+    // Create the counters that will hold the results of each thread writing
+    // its ID into a (hopefully) unique location.
+    counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                    sizeof(cl_int) * numGlobalThreads, NULL, NULL);
+
+    // Reset all those locations to -1 to indciate they have not been used.
+    cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
+    if (values == NULL) {
+        log_error("add_index_test FAILED to allocate memory for initial values.\n");
+        fail = 1; succeed = -1;
+    } else {
+        memset(values, -1, numLocalThreads);
+        unsigned int i=0;
+        for (i=0; i<numGlobalThreads; i++)
+            values[i] = -1;
+        int init=0;
+        err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
+        err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
+        if (err) {
+            log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
+            fail=1; succeed=-1;
+        } else {
+            err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
+            err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
+            if (err) {
+                log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
+                fail=1; succeed=-1;
+            } else {
+                err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
+                if (err) {
+                    log_error("add_index_test FAILED to execute kernel: %d\n", err);
+                    fail=1; succeed=-1;
+                } else {
+                    err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
+                    if (err) {
+                        log_error("add_index_test FAILED to read back results: %d\n", err);
+                        fail = 1; succeed=-1;
+                    } else {
+                        unsigned int looking_for, index;
+                        for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
+                            int instances_found=0;
+                            for (index=0; index<numGlobalThreads; index++) {
+                                if (values[index]==(int)looking_for)
+                                    instances_found++;
+                            }
+                            if (instances_found != 1) {
+                                log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
+                                fail = 1; succeed=-1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        if (!fail) {
+            log_info("add_index_test passed. Each thread used exactly one index.\n");
+        }
+        free(values);
+    }
+    return fail;
+}
+
+const char *add_index_bin_kernel[] = {
+"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
+"// using an atomic add to keep track of the current location to write into in each bin.\n"
+"// This is the same as the memory update for the particles demo.\n"
+"\n"
+"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
+"    int tid = get_global_id(0);\n"
+"\n"
+"    int location = bin_assignments[tid];\n"
+"    int counter = atom_add(&bin_counters[location], 1);\n"
+"    bins[location*max_counts_per_bin + counter] = tid;\n"
+"}" };
+
+// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
+// using an atomic add to keep track of the current location to write into in each bin.
+// This is the same as the memory update for the particles demo.
+int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
+{
+    int number_of_items = (int)global_threads[0];
+    size_t local_threads[1];
+    int divisor = 12;
+    int number_of_bins = number_of_items/divisor;
+    int max_counts_per_bin = divisor*2;
+
+    int fail = 0;
+    int succeed = 0;
+    int err;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
+    //           number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
+
+    //===== add_index_bin test
+    // The index test replicates what particles does.
+    err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
+    test_error( err, "Unable to create testing kernel" );
+
+    if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
+        return -1;
+
+    log_info("Execute global_threads:%d local_threads:%d\n",
+             (int)global_threads[0], (int)local_threads[0]);
+
+    // Allocate our storage
+    cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                        sizeof(cl_int) * number_of_bins, NULL, NULL);
+    cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
+    cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                           sizeof(cl_int) * number_of_items, NULL, NULL);
+
+    if (bin_counters == NULL) {
+        log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
+        return -1;
+    }
+    if (bins == NULL) {
+        log_error("add_index_bin_test FAILED to allocate bins.\n");
+        return -1;
+    }
+    if (bin_assignments == NULL) {
+        log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
+        return -1;
+    }
+
+    // Initialize our storage
+    cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
+    if (!l_bin_counts) {
+        log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
+        return -1;
+    }
+    int i;
+    for (i=0; i<number_of_bins; i++)
+        l_bin_counts[i] = 0;
+    err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
+    if (err) {
+        log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
+        return -1;
+    }
+
+    cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
+    if (!values) {
+        log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
+        return -1;
+    }
+    for (i=0; i<number_of_bins*max_counts_per_bin; i++)
+        values[i] = -1;
+    err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
+    if (err) {
+        log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
+        return -1;
+    }
+    free(values);
+
+    cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
+    if (!l_bin_assignments) {
+        log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
+        return -1;
+    }
+    for (i=0; i<number_of_items; i++) {
+        int bin = random_in_range(0, number_of_bins-1, d);
+        while (l_bin_counts[bin] >= max_counts_per_bin) {
+            bin = random_in_range(0, number_of_bins-1, d);
+        }
+        if (bin >= number_of_bins)
+            log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
+        if (l_bin_counts[bin]+1 > max_counts_per_bin)
+            log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
+        l_bin_counts[bin]++;
+        l_bin_assignments[i] = bin;
+        //     log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
+    }
+    err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
+    if (err) {
+        log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
+        return -1;
+    }
+    // Setup the kernel
+    err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
+    err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
+    err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
+    err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
+    if (err) {
+        log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
+        fail=1; succeed=-1;
+        return -1;
+    }
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
+    if (err) {
+        log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
+        fail=1; succeed=-1;
+    }
+
+    cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
+    if (!final_bin_assignments) {
+        log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
+    if (err) {
+        log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
+        fail = 1; succeed=-1;
+    }
+
+    cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
+    if (!final_bin_counts) {
+        log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
+    if (err) {
+        log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
+        fail = 1; succeed=-1;
+    }
+
+    // Verification.
+    int errors=0;
+    int current_bin;
+    int search;
+    //  Print out all the contents of the bins.
+    //  for (current_bin=0; current_bin<number_of_bins; current_bin++)
+    //        for (search=0; search<max_counts_per_bin; search++)
+    //      log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
+
+    // First verify that there are the correct number in each bin.
+    for (current_bin=0; current_bin<number_of_bins; current_bin++) {
+        int expected_number = l_bin_counts[current_bin];
+        int actual_number = final_bin_counts[current_bin];
+        if (expected_number != actual_number) {
+            log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
+            errors++;
+        }
+        for (search=0; search<expected_number; search++) {
+            if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
+                log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
+                errors++;
+            }
+        }
+        for (search=expected_number; search<max_counts_per_bin; search++) {
+            if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
+                log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
+                errors++;
+            }
+        }
+    }
+    // Now verify that the correct ones are in each bin
+    int index;
+    for (index=0; index<number_of_items; index++) {
+        int expected_bin = l_bin_assignments[index];
+        int found_it = 0;
+        for (search=0; search<l_bin_counts[expected_bin]; search++) {
+            if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
+                found_it = 1;
+            }
+        }
+        if (found_it == 0) {
+            log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
+            errors++;
+        }
+    }
+    free(l_bin_counts);
+    free(l_bin_assignments);
+    free(final_bin_assignments);
+    free(final_bin_counts);
+    clReleaseMemObject(bin_counters);
+    clReleaseMemObject(bins);
+    clReleaseMemObject(bin_assignments);
+    if (errors == 0) {
+        log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
+        return 0;
+    } else {
+        log_error("add_index_bin_test FAILED: %d errors.\n", errors);
+        return -1;
+    }
+}
+
+int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    //===== add_index_bin test
+    size_t numGlobalThreads = 2048;
+    int iteration=0;
+    int err, failed = 0;
+    MTdata d = init_genrand( gRandomSeed );
+
+  /* Check if atomics are supported. */
+  if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
+    log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
+    free_mtdata( d );
+    return 0;
+  }
+
+    for(iteration=0; iteration<10; iteration++) {
+        log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
+        err = add_index_bin_test(&numGlobalThreads,  queue,  context, d);
+        if (err) {
+            failed++;
+            break;
+        }
+        numGlobalThreads*=2;
+    }
+    free_mtdata( d );
+    return failed;
+}
+
+
diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
new file mode 100644
index 00000000..aebf470a
--- /dev/null
+++ b/test_conformance/basic/CMakeLists.txt
@@ -0,0 +1,83 @@
+set(MODULE_NAME BASIC)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
+    test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
+    test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
+    test_hiloeo.c test_local.c test_pointercast.c
+    test_if.c test_loop.c
+    test_readimage.c test_readimage_int16.c test_readimage_fp32.c
+    test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
+    test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
+    test_multireadimageonefmt.c test_multireadimagemultifmt.c
+    test_imagedim.c
+    test_vloadstore.c
+    test_int2float.c test_float2int.c
+    test_createkernelsinprogram.c
+    test_hostptr.c
+    test_explicit_s2v.cpp
+    test_constant.c
+    test_image_multipass.c
+    test_imagereadwrite.c test_imagereadwrite3d.c
+    test_image_param.c
+    test_imagenpot.c
+    test_image_r8.c
+    test_barrier.c
+    test_basic_parameter_types.c
+    test_arrayreadwrite.c
+    test_arraycopy.c
+    test_imagearraycopy.c
+    test_imagearraycopy3d.c
+    test_imagecopy.c
+    test_imagerandomcopy.c
+    test_arrayimagecopy.c
+    test_arrayimagecopy3d.c
+    test_imagecopy3d.c
+    test_enqueue_map.cpp
+    test_work_item_functions.cpp
+    test_astype.cpp
+    test_async_copy.cpp
+    test_sizeof.c
+    test_vector_creation.cpp
+    test_vec_type_hint.c
+    test_numeric_constants.cpp
+    test_constant_source.cpp
+    test_bufferreadwriterect.c
+    test_async_strided_copy.cpp
+    test_preprocessors.cpp
+    test_kernel_memory_alignment.cpp
+    test_global_work_offsets.cpp
+    test_kernel_call_kernel_function.cpp
+    test_local_kernel_scope.cpp
+    test_progvar.cpp
+    test_wg_barrier.c
+    test_global_linear_id.c
+    test_local_linear_id.c
+    test_enqueued_local_size.c
+    test_simple_image_pitch.c
+    test_get_linear_ids.cpp
+    test_rw_image_access_qualifier.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/imageHelpers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/rounding_mode.c
+    ../../test_common/harness/msvc9.c
+    test_wg_barrier.c
+    test_enqueued_local_size.c
+    test_global_linear_id.c
+    test_local_linear_id.c
+    test_progvar.cpp
+    ../../test_common/harness/parseParameters.cpp
+)
+
+if(APPLE)
+    list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.c)
+endif(APPLE)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/basic/Jamfile b/test_conformance/basic/Jamfile
new file mode 100644
index 00000000..075229d2
--- /dev/null
+++ b/test_conformance/basic/Jamfile
@@ -0,0 +1,74 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_basic
+    : main.c
+      test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
+      test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
+      test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
+      test_hiloeo.c test_local.c test_pointercast.c
+      test_if.c test_sizeof.c test_loop.c
+      test_readimage.c test_readimage_int16.c test_readimage_fp32.c
+      test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
+      test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
+      test_multireadimageonefmt.c test_multireadimagemultifmt.c
+      test_imagedim.c
+      test_vloadstore.c
+      test_int2float.c test_float2int.c
+      test_createkernelsinprogram.c
+      test_hostptr.c
+      test_explicit_s2v.cpp
+      test_constant.c
+      test_constant_source.cpp
+      test_image_multipass.c
+      test_imagereadwrite.c test_imagereadwrite3d.c
+      test_bufferreadwriterect.c
+      test_image_param.c
+      test_imagenpot.c
+      test_image_r8.c
+      test_barrier.c
+      test_arrayreadwrite.c
+      test_arraycopy.c
+      test_imagearraycopy.c
+      test_imagearraycopy3d.c
+      test_imagecopy.c
+      test_imagerandomcopy.c
+      test_arrayimagecopy.c
+      test_arrayimagecopy3d.c
+      test_imagecopy3d.c
+      test_enqueue_map.cpp
+      test_work_item_functions.cpp
+      test_astype.cpp
+      test_async_copy.cpp
+      test_async_strided_copy.cpp
+      test_numeric_constants.cpp
+      test_kernel_call_kernel_function.cpp
+      test_basic_parameter_types.c
+      test_vector_creation.cpp
+      test_vec_type_hint.c
+      test_preprocessors.cpp
+      test_kernel_memory_alignment.cpp
+      test_global_work_offsets.cpp
+      test_local_kernel_scope.cpp
+      test_get_linear_ids.cpp
+      ../../test_common/harness/errorHelpers.c
+      ../../test_common/harness/threadTesting.c
+      ../../test_common/harness/testHarness.c
+      ../../test_common/harness/rounding_mode.c
+      ../../test_common/harness/kernelHelpers.c
+      ../../test_common/harness/typeWrappers.cpp
+      ../../test_common/harness/imageHelpers.cpp
+      ../../test_common/harness/mt19937.c
+      ../../test_common/harness/conversions.c
+    : <target-os>windows:<source>../../test_common/harness/msvc9.c
+    ;
+
+install dist
+    : test_basic
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/basic
+    ;
+ 
diff --git a/test_conformance/basic/Makefile b/test_conformance/basic/Makefile
new file mode 100644
index 00000000..faac0a80
--- /dev/null
+++ b/test_conformance/basic/Makefile
@@ -0,0 +1,103 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
+		test_intmath_int.c test_intmath_int2.c test_intmath_int4.c  \
+		test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
+		test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
+		test_if.c test_sizeof.c test_loop.c \
+		test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
+		test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
+		test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
+		test_multireadimageonefmt.c test_multireadimagemultifmt.c \
+		test_imagedim.c \
+		test_vloadstore.c \
+		test_int2float.c test_float2int.c \
+		test_createkernelsinprogram.c \
+		test_hostptr.c \
+		test_explicit_s2v.cpp \
+		test_constant.c \
+		test_constant_source.cpp \
+		test_image_multipass.c \
+		test_imagereadwrite.c test_imagereadwrite3d.c \
+		test_bufferreadwriterect.c \
+		test_image_param.c \
+		test_imagenpot.c \
+		test_image_r8.c \
+		test_barrier.c \
+		test_wg_barrier.c \
+		test_arrayreadwrite.c \
+		test_arraycopy.c \
+		test_imagearraycopy.c \
+		test_imagearraycopy3d.c \
+		test_imagecopy.c \
+		test_imagerandomcopy.c \
+		test_arrayimagecopy.c \
+		test_arrayimagecopy3d.c\
+		test_imagecopy3d.c \
+		test_enqueue_map.cpp \
+		test_work_item_functions.cpp \
+		test_astype.cpp \
+		test_async_copy.cpp \
+		test_async_strided_copy.cpp \
+		test_numeric_constants.cpp \
+		test_kernel_call_kernel_function.cpp \
+		test_basic_parameter_types.c \
+		test_vector_creation.cpp \
+		test_vec_type_hint.c \
+		test_preprocessors.cpp \
+		test_kernel_memory_alignment.cpp \
+		test_global_work_offsets.cpp \
+		test_simple_image_pitch.c \
+		test_queue_priority.c \
+		test_global_linear_id.c \
+		test_local_linear_id.c \
+		test_enqueued_local_size.c \
+		test_get_linear_ids.c \
+		test_progvar.cpp \
+		test_rw_image_access_qualifier.c \
+		../../test_common/harness/errorHelpers.c \
+		../../test_common/harness/threadTesting.c \
+		../../test_common/harness/testHarness.c \
+		../../test_common/harness/rounding_mode.c \
+		../../test_common/harness/kernelHelpers.c \
+		../../test_common/harness/typeWrappers.cpp \
+		../../test_common/harness/imageHelpers.cpp \
+                ../../test_common/harness/mt19937.c \
+		../../test_common/harness/conversions.c 
+
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_basic
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
+
+
+
diff --git a/test_conformance/basic/main.c b/test_conformance/basic/main.c
new file mode 100644
index 00000000..893eaeb5
--- /dev/null
+++ b/test_conformance/basic/main.c
@@ -0,0 +1,303 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "procs.h"
+
+// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables (<rdar://problem/11111245>):
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = false;
+
+basefn    basefn_list[] = {
+    test_hostptr,
+    test_fpmath_float,
+    test_fpmath_float2,
+    test_fpmath_float4,
+    test_intmath_int,
+    test_intmath_int2,
+    test_intmath_int4,
+    test_intmath_long,
+    test_intmath_long2,
+    test_intmath_long4,
+    test_hiloeo,
+    test_if,
+    test_sizeof,
+    test_loop,
+    test_pointer_cast,
+    test_local_arg_def,
+    test_local_kernel_def,
+    test_local_kernel_scope,
+    test_constant,
+    test_constant_source,
+    test_readimage,
+    test_readimage_int16,
+    test_readimage_fp32,
+    test_writeimage,
+    test_writeimage_int16,
+    test_writeimage_fp32,
+    test_multireadimageonefmt,
+
+    test_multireadimagemultifmt,
+    test_image_r8,
+    test_barrier,
+    test_wg_barrier,
+    test_int2float,
+    test_float2int,
+    test_imagereadwrite,
+    test_imagereadwrite3d,
+    test_readimage3d,
+    test_readimage3d_int16,
+    test_readimage3d_fp32,
+    test_bufferreadwriterect,
+    test_arrayreadwrite,
+    test_arraycopy,
+    test_imagearraycopy,
+    test_imagearraycopy3d,
+    test_imagecopy,
+    test_imagecopy3d,
+    test_imagerandomcopy,
+    test_arrayimagecopy,
+    test_arrayimagecopy3d,
+    test_imagenpot,
+
+    test_vload_global,
+    test_vload_local,
+    test_vload_constant,
+    test_vload_private,
+    test_vstore_global,
+    test_vstore_local,
+    test_vstore_private,
+
+    test_createkernelsinprogram,
+    test_imagedim_pow2,
+    test_imagedim_non_pow2,
+    test_image_param,
+    test_image_multipass_integer_coord,
+    test_image_multipass_float_coord,
+    test_explicit_s2v_bool,
+    test_explicit_s2v_char,
+    test_explicit_s2v_uchar,
+    test_explicit_s2v_short,
+    test_explicit_s2v_ushort,
+    test_explicit_s2v_int,
+    test_explicit_s2v_uint,
+    test_explicit_s2v_long,
+    test_explicit_s2v_ulong,
+    test_explicit_s2v_float,
+    test_explicit_s2v_double,
+
+    test_enqueue_map_buffer,
+    test_enqueue_map_image,
+
+    test_work_item_functions,
+
+    test_astype,
+
+    test_async_copy_global_to_local,
+    test_async_copy_local_to_global,
+    test_async_strided_copy_global_to_local,
+    test_async_strided_copy_local_to_global,
+    test_prefetch,
+
+    test_kernel_call_kernel_function,
+    test_host_numeric_constants,
+    test_kernel_numeric_constants,
+    test_kernel_limit_constants,
+    test_kernel_preprocessor_macros,
+
+    test_basic_parameter_types,
+    test_vector_creation,
+    test_vec_type_hint,
+    test_kernel_memory_alignment_local,
+    test_kernel_memory_alignment_global,
+    test_kernel_memory_alignment_constant,
+    test_kernel_memory_alignment_private,
+
+    test_progvar_prog_scope_misc,
+    test_progvar_prog_scope_uninit,
+    test_progvar_prog_scope_init,
+    test_progvar_func_scope,
+
+    test_global_work_offsets,
+    test_get_global_offset,
+
+    test_global_linear_id,
+    test_local_linear_id,
+    test_enqueued_local_size,
+
+    test_simple_read_image_pitch,
+    test_simple_write_image_pitch,
+
+#if defined( __APPLE__ )
+    test_queue_priority,
+#endif
+
+    test_get_linear_ids,
+    test_rw_image_access_qualifier
+};
+
+const char    *basefn_names[] = {
+    "hostptr",
+    "fpmath_float",
+    "fpmath_float2",
+    "fpmath_float4",
+    "intmath_int",
+    "intmath_int2",
+    "intmath_int4",
+    "intmath_long",
+    "intmath_long2",
+    "intmath_long4",
+    "hiloeo",
+    "if",
+    "sizeof",
+    "loop",
+    "pointer_cast",
+    "local_arg_def",
+    "local_kernel_def",
+    "local_kernel_scope",
+    "constant",
+    "constant_source",
+    "readimage",
+    "readimage_int16",
+    "readimage_fp32",
+    "writeimage",
+    "writeimage_int16",
+    "writeimage_fp32",
+    "mri_one",
+
+    "mri_multiple",
+    "image_r8",
+    "barrier",
+    "wg_barrier",
+    "int2float",
+    "float2int",
+    "imagereadwrite",
+    "imagereadwrite3d",
+    "readimage3d",
+    "readimage3d_int16",
+    "readimage3d_fp32",
+    "bufferreadwriterect",
+    "arrayreadwrite",
+    "arraycopy",
+    "imagearraycopy",
+    "imagearraycopy3d",
+    "imagecopy",
+    "imagecopy3d",
+    "imagerandomcopy",
+    "arrayimagecopy",
+    "arrayimagecopy3d",
+    "imagenpot",
+
+    "vload_global",
+    "vload_local",
+    "vload_constant",
+    "vload_private",
+    "vstore_global",
+    "vstore_local",
+    "vstore_private",
+
+    "createkernelsinprogram",
+    "imagedim_pow2",
+    "imagedim_non_pow2",
+    "image_param",
+    "image_multipass_integer_coord",
+    "image_multipass_float_coord",
+    "explicit_s2v_bool",
+    "explicit_s2v_char",
+    "explicit_s2v_uchar",
+    "explicit_s2v_short",
+    "explicit_s2v_ushort",
+    "explicit_s2v_int",
+    "explicit_s2v_uint",
+    "explicit_s2v_long",
+    "explicit_s2v_ulong",
+    "explicit_s2v_float",
+    "explicit_s2v_double",
+
+    "enqueue_map_buffer",
+    "enqueue_map_image",
+
+    "work_item_functions",
+
+    "astype",
+
+    "async_copy_global_to_local",
+    "async_copy_local_to_global",
+    "async_strided_copy_global_to_local",
+    "async_strided_copy_local_to_global",
+    "prefetch",
+
+    "kernel_call_kernel_function",
+    "host_numeric_constants",
+    "kernel_numeric_constants",
+    "kernel_limit_constants",
+    "kernel_preprocessor_macros",
+
+    "parameter_types",
+
+    "vector_creation",
+    "vec_type_hint",
+
+    "kernel_memory_alignment_local",
+    "kernel_memory_alignment_global",
+    "kernel_memory_alignment_constant",
+    "kernel_memory_alignment_private",
+
+    "progvar_prog_scope_misc",
+    "progvar_prog_scope_uninit",
+    "progvar_prog_scope_init",
+    "progvar_func_scope",
+
+    "global_work_offsets",
+    "get_global_offset",
+
+    "global_linear_id",
+    "local_linear_id",
+    "enqueued_local_size",
+
+    "simple_read_image_pitch",
+    "simple_write_image_pitch",
+
+#if defined( __APPLE__ )
+    "queue_priority",
+#endif
+
+    "get_linear_ids",
+    "test_rw_image_access_qualifier",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
+
diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h
new file mode 100644
index 00000000..44b2dec7
--- /dev/null
+++ b/test_conformance/basic/procs.h
@@ -0,0 +1,160 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+extern void     memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+
+extern int      test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
+extern int      test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+extern int      test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int    test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int test_global_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_local_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
+extern int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
+
+#if defined( __APPLE__ )
+extern int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+#endif
+
+extern int test_get_linear_ids(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements);
+extern int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, cl_command_queue commands, int num_elements);
+
diff --git a/test_conformance/basic/run_array b/test_conformance/basic/run_array
new file mode 100644
index 00000000..07d67892
--- /dev/null
+++ b/test_conformance/basic/run_array
@@ -0,0 +1,3 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic arrayreadwrite arraycopy bufferreadwriterect $@
diff --git a/test_conformance/basic/run_array_image_copy b/test_conformance/basic/run_array_image_copy
new file mode 100644
index 00000000..f88ec2a0
--- /dev/null
+++ b/test_conformance/basic/run_array_image_copy
@@ -0,0 +1,3 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy
diff --git a/test_conformance/basic/run_image b/test_conformance/basic/run_image
new file mode 100644
index 00000000..9bb5ee1b
--- /dev/null
+++ b/test_conformance/basic/run_image
@@ -0,0 +1,17 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic  \
+imagecopy imagerandomcopy \
+imagearraycopy imagearraycopy3d \
+image_r8 \
+readimage readimage_int16 readimage_fp32 \
+writeimage writeimage_int16 writeimage_fp32 \
+imagenpot \
+image_param \
+image_multipass_integer_coord \
+readimage3d \
+readimage3d_int16 \
+readimage3d_fp32 \
+imagereadwrite3d \
+imagereadwrite \
+$@
diff --git a/test_conformance/basic/run_multi_read_image b/test_conformance/basic/run_multi_read_image
new file mode 100644
index 00000000..aa87b1cd
--- /dev/null
+++ b/test_conformance/basic/run_multi_read_image
@@ -0,0 +1,4 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic mri_one mri_multiple
+
diff --git a/test_conformance/basic/test_arraycopy.c b/test_conformance/basic/test_arraycopy.c
new file mode 100644
index 00000000..5104c49b
--- /dev/null
+++ b/test_conformance/basic/test_arraycopy.c
@@ -0,0 +1,201 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *copy_kernel_code =
+"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid];\n"
+"}\n";
+
+int
+test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_uint    *input_ptr, *output_ptr;
+    cl_mem                streams[4], results;
+    cl_program          program;
+    cl_kernel            kernel;
+    unsigned            num_elements = 128 * 1024;
+    cl_uint             num_copies = 1;
+    size_t                delta_offset;
+    unsigned            i;
+    cl_int err;
+    MTdata              d;
+
+    int error_count = 0;
+
+    input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+
+    // results
+    results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+/*****************************************************************************************************************************************/
+#pragma mark client backing
+
+    log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
+    // randomize data
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    // client backing
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    delta_offset = num_elements * sizeof(cl_uint) / num_copies;
+    for (i=0; i<num_copies; i++)
+    {
+        size_t    offset = i * delta_offset;
+        err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyBuffer failed");
+    }
+
+    // Try upload from client backing
+    err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+            error_count++;
+        }
+    }
+
+    if (err)
+        log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
+    else
+        log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
+
+
+
+#pragma mark framework backing (no client data)
+
+    log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
+    // randomize data
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    // no backing
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    for (i=0; i<num_copies; i++)
+    {
+        size_t    offset = i * delta_offset;
+
+        // Copy the array up from host ptr
+        err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed");
+
+        err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyBuffer failed");
+    }
+
+    err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+            error_count++;
+            break;
+        }
+    }
+
+    if (err)
+        log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
+    else
+        log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
+
+/*****************************************************************************************************************************************/
+#pragma mark kernel copy test
+
+    log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
+    // randomize data
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+    free_mtdata(d); d= NULL;
+
+    // client backing
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
+  test_error(err, "create_single_kernel_helper failed");
+
+  err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
+  err |= clSetKernelArg(kernel, 1, sizeof results, &results);
+  test_error(err, "clSetKernelArg failed");
+
+  size_t threads[3] = {num_elements, 0, 0};
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+    err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+      error_count++;
+            break;
+        }
+    }
+
+  // Keep track of multiple errors.
+  if (error_count != 0)
+    err = error_count;
+
+    if (err)
+        log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
+    else
+        log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
+
+
+  clReleaseProgram(program);
+  clReleaseKernel(kernel);
+  clReleaseMemObject(results);
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+
+  free(input_ptr);
+  free(output_ptr);
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_arrayimagecopy.c b/test_conformance/basic/test_arrayimagecopy.c
new file mode 100644
index 00000000..2345be6b
--- /dev/null
+++ b/test_conformance/basic/test_arrayimagecopy.c
@@ -0,0 +1,143 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *bufptr, *imgptr;
+  clMemWrapper      buffer, image;
+  int        img_width = 512;
+  int        img_height = 512;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
+  test_error(err, "create_image_2d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  d = init_genrand( gRandomSeed );
+  bufptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     bufptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
+  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  imgptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)bufptr;
+    unsigned char * outchar = (unsigned char*)imgptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(bufptr);
+  free(imgptr);
+
+  if (err)
+    log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
+  }
+
+  free(formats);
+  if (err)
+    log_error("ARRAY to IMAGE copy test failed\n");
+  else
+    log_info("ARRAY to IMAGE copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/basic/test_arrayimagecopy3d.c b/test_conformance/basic/test_arrayimagecopy3d.c
new file mode 100644
index 00000000..d5333777
--- /dev/null
+++ b/test_conformance/basic/test_arrayimagecopy3d.c
@@ -0,0 +1,144 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *bufptr, *imgptr;
+  clMemWrapper      buffer, image;
+  int        img_width = 128;
+  int        img_height = 128;
+  int        img_depth = 32;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  d = init_genrand( gRandomSeed );
+  bufptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     bufptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
+  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  imgptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)bufptr;
+    unsigned char * outchar = (unsigned char*)imgptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(bufptr);
+  free(imgptr);
+
+  if (err)
+    log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
+  }
+
+  free(formats);
+  if (err)
+    log_error("ARRAY to IMAGE3D copy test failed\n");
+  else
+    log_info("ARRAY to IMAGE3D copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/basic/test_arrayreadwrite.c b/test_conformance/basic/test_arrayreadwrite.c
new file mode 100644
index 00000000..96942ec1
--- /dev/null
+++ b/test_conformance/basic/test_arrayreadwrite.c
@@ -0,0 +1,95 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+int
+test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint                *inptr, *outptr;
+    cl_mem              streams[1];
+    int                 num_tries = 400;
+    num_elements = 1024 * 1024 * 4;
+    int                 i, j, err;
+    MTdata              d;
+
+    inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
+    outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
+
+    // randomize data
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    for (i=0; i<num_tries; i++)
+    {
+        int        offset;
+        int        cb;
+
+        do {
+            offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
+            if (offset > 0 && offset < num_elements)
+                break;
+        } while (1);
+        cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
+        if (cb > (num_elements - offset))
+            cb = num_elements - offset;
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed");
+
+        err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
+        test_error(err, "clEnqueueReadBuffer failed");
+
+        for (j=offset; j<offset+cb; j++)
+        {
+            if (inptr[j] != outptr[j])
+            {
+                log_error("ARRAY read, write test failed\n");
+                err = -1;
+                break;
+            }
+        }
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(d);
+    clReleaseMemObject(streams[0]);
+    free(inptr);
+    free(outptr);
+
+    if (!err)
+        log_info("ARRAY read, write test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp
new file mode 100644
index 00000000..9043ece2
--- /dev/null
+++ b/test_conformance/basic/test_astype.cpp
@@ -0,0 +1,288 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+
+static const char *astype_kernel_pattern =
+"%s\n"
+"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( src[ tid ] );\n"
+"   dst[ tid ] = tmp;\n"
+"}\n";
+
+static const char *astype_kernel_pattern_V3srcV3dst =
+"%s\n"
+"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
+"   vstore3(tmp,tid,dst);\n"
+"}\n";
+// in the printf, remove the third and fifth argument, each of which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+static const char *astype_kernel_pattern_V3dst =
+"%s\n"
+"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s3 tmp = as_%s3( src[ tid ] );\n"
+"   vstore3(tmp,tid,dst);\n"
+"}\n";
+// in the printf, remove the fifth argument, which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+
+static const char *astype_kernel_pattern_V3src =
+"%s\n"
+"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
+"   dst[ tid ] = tmp;\n"
+"}\n";
+// in the printf, remove the third argument, which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+
+int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
+                    unsigned int vecSize, unsigned int outVecSize,
+                    int numElements )
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+
+    char programSrc[ 10240 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    size_t typeSize = get_explicit_type_size( inVecType );
+    size_t outTypeSize = get_explicit_type_size(outVecType);
+    char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    MTdata d;
+
+
+
+    // Create program
+    if(outVecSize == 3 && vecSize == 3) {
+        // astype_kernel_pattern_V3srcV3dst
+        sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
+    } else if(outVecSize == 3) {
+        // astype_kernel_pattern_V3dst
+        sprintf( programSrc, astype_kernel_pattern_V3dst,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ),
+                get_explicit_type_name( outVecType ),
+                get_explicit_type_name( outVecType ));
+
+    } else if(vecSize == 3) {
+        // astype_kernel_pattern_V3src
+        sprintf( programSrc, astype_kernel_pattern_V3src,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
+    } else {
+        sprintf( programSrc, astype_kernel_pattern,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
+    }
+
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+
+    // Create some input values
+    size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
+    char *inBuffer = (char*)malloc( inBufferSize );
+    size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
+    char *outBuffer = (char*)malloc( outBufferSize );
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( inVecType, numElements * vecSize,
+                         d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    // Create I/O streams and set arguments
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create I/O stream" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
+    test_error( error, "Unable to create I/O stream" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+
+
+    // Run the kernel
+    threads[ 0 ] = numElements;
+    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
+    test_error( error, "Unable to get group size to run with" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+
+    // Get the results and compare
+    // The beauty is that astype is supposed to return the bit pattern as a different type, which means
+    // the output should have the exact same bit pattern as the input. No interpretation necessary!
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    char *expected = inBuffer;
+    char *actual = outBuffer;
+    size_t compSize = typeSize*vecSize;
+    if(outTypeSize*outVecSize < compSize) {
+        compSize = outTypeSize*outVecSize;
+    }
+
+    if(outVecSize == 4 && vecSize == 3)
+    {
+        // as_type4(vec3) should compile but produce undefined results??
+        free(inBuffer);
+        free(outBuffer);
+        return 0;
+    }
+
+    if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
+    {
+        // as_typen(vecm) should compile and run but produce
+        // implementation-defined results for m != n
+        // and n*sizeof(type) = sizeof(vecm)
+        free(inBuffer);
+        free(outBuffer);
+        return 0;
+    }
+
+    for( int i = 0; i < numElements; i++ )
+    {
+        if( memcmp( expected, actual, compSize ) != 0 )
+        {
+            char expectedString[ 1024 ], actualString[ 1024 ];
+            log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
+                      (int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
+                      GetDataVectorString( expected, typeSize, vecSize, expectedString ),
+                      GetDataVectorString( actual, typeSize, vecSize, actualString ) );
+            log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
+                      programSrc, (int)threads[0],(int) localThreads[0]);
+            free(inBuffer);
+            free(outBuffer);
+            return 1;
+        }
+        expected += typeSize * vecSize;
+        actual += outTypeSize * outVecSize;
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+    return 0;
+}
+
+int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
+    // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
+    // for us to verify what is "valid". So the only thing we can test are types that match in size independent
+    // of the element count (char -> uchar, etc)
+    ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
+    size_t inTypeSize, outTypeSize;
+    int error = 0;
+
+    for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
+    {
+        inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
+
+        if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
+            continue;
+
+        for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
+        {
+            outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
+            if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
+                continue;
+            }
+
+            if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
+                continue;
+
+            // change this check
+            if( inTypeIdx == outTypeIdx ) {
+                continue;
+            }
+
+            log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
+            fflush( stdout );
+
+            for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+            {
+
+                for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
+                {
+                    if(vecSizes[sizeIdx]*inTypeSize !=
+                       vecSizes[outSizeIdx]*outTypeSize )
+                    {
+                        continue;
+                    }
+                    error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
+
+
+                }
+
+            }
+            if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
+               get_explicit_type_size(vecTypes[outTypeIdx])) {
+                // as_type3(vec4) allowed, as_type4(vec3) not allowed
+                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
+                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
+            }
+
+        }
+    }
+    return error;
+}
+
+
diff --git a/test_conformance/basic/test_async_copy.cpp b/test_conformance/basic/test_async_copy.cpp
new file mode 100644
index 00000000..4d2bd9ca
--- /dev/null
+++ b/test_conformance/basic/test_async_copy.cpp
@@ -0,0 +1,279 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+
+static const char *async_global_to_local_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"     localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+"    event_t event;\n"
+"    event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );\n"
+// Wait for the copy to complete, then verify by manually copying to the dest
+"    wait_group_events( 1, &event );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+static const char *async_local_to_global_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
+// Do this to verify all kernels are done copying to the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+"    event_t event;\n"
+"    event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, 0 );\n"
+"    wait_group_events( 1, &event );\n"
+"}\n" ;
+
+
+static const char *prefetch_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" // Ignore this: %s%s%s\n"
+" int i;\n"
+" prefetch( (const __global %s*)(src+copiesPerWorkItem*get_global_id(0)), copiesPerWorkItem);\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+
+
+int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode,
+              ExplicitType vecType, int vecSize
+              )
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    void *inBuffer, *outBuffer;
+    MTdata d;
+    char vecNameString[64]; vecNameString[0] = 0;
+    if (vecSize == 1)
+        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
+    else
+        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
+
+
+    size_t elementSize = get_explicit_type_size(vecType)*vecSize;
+    log_info("Testing %s\n", vecNameString);
+
+    cl_long max_local_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
+
+    unsigned int num_of_compute_devices;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
+
+    char programSource[4096]; programSource[0]=0;
+    char *programPtr;
+
+    sprintf(programSource, kernelCode,
+            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+            vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    //log_info("program: %s\n", programSource);
+    programPtr = programSource;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    size_t max_workgroup_size;
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
+
+    size_t max_local_workgroup_size[3];
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (max_workgroup_size > max_local_workgroup_size[0])
+        max_workgroup_size = max_local_workgroup_size[0];
+
+    size_t numberOfCopiesPerWorkitem = 13;
+    elementSize = get_explicit_type_size(vecType)* ((vecSize == 3) ? 4 : vecSize);
+    size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
+    size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
+
+    // Calculation can return 0 on embedded devices due to 1KB local mem limit
+    if(maxLocalWorkgroupSize == 0)
+    {
+        maxLocalWorkgroupSize = 1;
+    }
+
+    size_t localWorkgroupSize = maxLocalWorkgroupSize;
+    if (maxLocalWorkgroupSize > max_workgroup_size)
+        localWorkgroupSize = max_workgroup_size;
+
+    size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
+    size_t numberOfLocalWorkgroups = 1111;
+    size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
+    size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
+
+    inBuffer = (void*)malloc(globalBufferSize);
+    outBuffer = (void*)malloc(globalBufferSize);
+    memset(outBuffer, 0, globalBufferSize);
+
+    cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
+    copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
+    copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
+
+    log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n",
+             (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt);
+
+    threads[0] = globalWorkgroupSize;
+    localThreads[0] = localWorkgroupSize;
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create input buffer" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Enqueue
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to queue kernel" );
+
+    // Read
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify
+    int failuresPrinted = 0;
+    if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
+    {
+        size_t typeSize = get_explicit_type_size(vecType)* vecSize;
+        unsigned char * inchar = (unsigned char*)inBuffer;
+        unsigned char * outchar = (unsigned char*)outBuffer;
+        for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
+            if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 )
+            {
+                char values[4096];
+                values[0] = 0;
+                if ( failuresPrinted == 0 ) {
+                    // Print first failure message
+                    log_error( "ERROR: Results of copy did not validate!\n" );
+                }
+                sprintf(values + strlen( values), "%d -> [", i);
+                for (int j=0; j<(int)elementSize; j++)
+                    sprintf(values + strlen( values), "%2x ", inchar[i+j]);
+                sprintf(values + strlen(values), "] != [");
+                for (int j=0; j<(int)elementSize; j++)
+                    sprintf(values + strlen( values), "%2x ", outchar[i+j]);
+                sprintf(values + strlen(values), "]");
+                log_error("%s\n", values);
+                failuresPrinted++;
+            }
+
+            if (failuresPrinted > 5) {
+                log_error("Not printing further failures...\n");
+                break;
+            }
+        }
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+
+    return failuresPrinted ? -1 : 0;
+}
+
+int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int size, typeIndex;
+
+    int errors = 0;
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        for( size = 0; vecSizes[ size ] != 0; size++ )
+        {
+            if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) {
+                errors++;
+            }
+        }
+    }
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+
+int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
+}
+
+int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel );
+}
+
+int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, prefetch_kernel );
+}
+
diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp
new file mode 100644
index 00000000..ec65101d
--- /dev/null
+++ b/test_conformance/basic/test_async_strided_copy.cpp
@@ -0,0 +1,274 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+
+static const char *async_strided_global_to_local_kernel =
+"%s\n" // optional pragma string
+"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" event_t event;\n"
+" event = async_work_group_strided_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
+// Wait for the copy to complete, then verify by manually copying to the dest
+" wait_group_events( 1, &event );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+static const char *async_strided_local_to_global_kernel =
+"%s\n" // optional pragma string
+"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ];\n"
+// Do this to verify all kernels are done copying to the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" event_t event;\n"
+" event = async_work_group_strided_copy((__global %s*)(dst+copiesPerWorkgroup*stride*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
+" wait_group_events( 1, &event );\n"
+"}\n" ;
+
+
+int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    void *inBuffer, *outBuffer;
+    MTdata d;
+    char vecNameString[64]; vecNameString[0] = 0;
+
+    if (vecSize == 1)
+        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
+    else
+        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
+
+
+    log_info("Testing %s\n", vecNameString);
+
+    cl_long max_local_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
+
+    unsigned int num_of_compute_devices;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
+
+    char programSource[4096]; programSource[0]=0;
+    char *programPtr;
+
+    sprintf(programSource, kernelCode,
+        vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+        "",
+        vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    //log_info("program: %s\n", programSource);
+    programPtr = programSource;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    size_t max_workgroup_size;
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
+
+    size_t max_local_workgroup_size[3];
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+  // Pick the minimum of the device and the kernel
+    if (max_workgroup_size > max_local_workgroup_size[0])
+        max_workgroup_size = max_local_workgroup_size[0];
+
+    size_t elementSize = get_explicit_type_size(vecType)* ((vecSize == 3) ? 4 : vecSize);
+
+    cl_ulong max_global_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+
+    if (max_global_mem_size > (cl_ulong)SIZE_MAX) {
+      max_global_mem_size = (cl_ulong)SIZE_MAX;
+    }
+
+    cl_bool unified_mem;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
+
+    int number_of_global_mem_buffers = (unified_mem) ? 4 : 2;
+
+    size_t numberOfCopiesPerWorkitem = 3;
+    size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
+    size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
+
+    size_t localWorkgroupSize = maxLocalWorkgroupSize;
+    if (maxLocalWorkgroupSize > max_workgroup_size)
+        localWorkgroupSize = max_workgroup_size;
+
+    size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
+    size_t numberOfLocalWorkgroups = 579;//1111;
+
+    // Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed
+    // by the allocated buffer. This is done to avoid resource  errors resulting from address space fragmentation.
+    size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride);
+    if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit;
+
+    size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
+    size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
+
+    inBuffer = (void*)malloc(globalBufferSize);
+    outBuffer = (void*)malloc(globalBufferSize);
+    memset(outBuffer, 0, globalBufferSize);
+
+    cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
+    copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
+    copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
+
+    log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n",
+                (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt);
+
+    threads[0] = globalWorkgroupSize;
+    localThreads[0] = localWorkgroupSize;
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create input buffer" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Enqueue
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to queue kernel" );
+
+    // Read
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify
+    size_t typeSize = get_explicit_type_size(vecType)* vecSize;
+    for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
+    {
+        if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 )
+        {
+            unsigned char * inchar = (unsigned char*)inBuffer + i;
+            unsigned char * outchar = (unsigned char*)outBuffer + i;
+            char values[4096];
+            values[0] = 0;
+
+            log_error( "ERROR: Results of copy did not validate!\n" );
+            sprintf(values + strlen( values), "%d -> [", i);
+            for (int j=0; j<(int)elementSize; j++)
+                sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
+            sprintf(values + strlen(values), "] != [");
+            for (int j=0; j<(int)elementSize; j++)
+                sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+
+               return -1;
+        }
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+
+    return 0;
+}
+
+int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
+    unsigned int size, typeIndex, stride;
+
+    int errors = 0;
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        for( size = 0; vecSizes[ size ] != 0; size++ )
+        {
+            for( stride = 0; strideSizes[ stride ] != 0; stride++)
+            {
+                if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] ))
+                {
+                    errors++;
+                }
+            }
+        }
+    }
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+
+int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
+}
+
+int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
+}
+
diff --git a/test_conformance/basic/test_barrier.c b/test_conformance/basic/test_barrier.c
new file mode 100644
index 00000000..b24d771b
--- /dev/null
+++ b/test_conformance/basic/test_barrier.c
@@ -0,0 +1,159 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *barrier_kernel_code =
+"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
+"{\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"     \n"
+"     // updated to work for any workgroup size \n"
+"    for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
+"    {\n"
+"        barrier(CLK_GLOBAL_MEM_FENCE);\n"
+"        if (tid + i < lsize)\n"
+"            tmp_sum[tid] += tmp_sum[tid + i];\n"
+"         lsize = i; \n"
+"    }\n"
+"\n"
+"     //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
+"    if (tid == 0)\n"
+"        *sum = tmp_sum[0];\n"
+"}\n";
+
+
+static int
+verify_sum(int *inptr, int *outptr, int n)
+{
+  int            r = 0;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+        r += inptr[i];
+  }
+
+    if (r != outptr[0])
+    {
+        log_error("BARRIER test failed\n");
+        return -1;
+    }
+
+  log_info("BARRIER test passed\n");
+  return 0;
+}
+
+
+int
+test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[3];
+    cl_int            *input_ptr = NULL, *output_ptr = NULL;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3];
+    size_t    local_threads[3];
+    int                err;
+    int                i;
+    size_t max_local_workgroup_size[3];
+    size_t max_threadgroup_size = 0;
+    MTdata d;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
+    test_error(err, "Failed to build kernel/program.");
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
+                                 sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (max_threadgroup_size > max_local_workgroup_size[0])
+        max_threadgroup_size = max_local_workgroup_size[0];
+
+    // work group size must divide evenly into the global size
+    while( num_elements % max_threadgroup_size )
+        max_threadgroup_size--;
+
+    input_ptr = (int*)malloc(sizeof(int) * num_elements);
+    output_ptr = (int*)malloc(sizeof(int));
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
+    free_mtdata(d);  d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed.");
+
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+    err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
+    test_error(err, "clSetKernelArg failed.");
+
+    global_threads[0] = max_threadgroup_size;
+    local_threads[0] = max_threadgroup_size;
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
+    test_error(err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed.");
+
+        err = verify_sum(input_ptr, output_ptr, num_elements);
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_basic_parameter_types.c b/test_conformance/basic/test_basic_parameter_types.c
new file mode 100644
index 00000000..7d90da7b
--- /dev/null
+++ b/test_conformance/basic/test_basic_parameter_types.c
@@ -0,0 +1,303 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+const char *kernel_code =
+"__kernel void test_kernel(\n"
+"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(c);\n"
+"  result[1] = %s(uc);\n"
+"  result[2] = %s(s);\n"
+"  result[3] = %s(us);\n"
+"  result[4] = %s(i);\n"
+"  result[5] = %s(ui);\n"
+"  result[6] = f;\n"
+"}\n";
+
+const char *kernel_code_long =
+"__kernel void test_kernel_long(\n"
+"long%s l, ulong%s ul,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(l);\n"
+"  result[1] = %s(ul);\n"
+"}\n";
+
+int
+test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+     clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[2*16];
+  int count, index;
+  const char* types[] = { "long", "ulong" };
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
+
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
+
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
+
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
+
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
+    }
+
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+
+    // Build the kernel
+    sprintf(kernel_string, kernel_code_long,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 2; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 2; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)l[index]; break;
+          case 1: expected = (float)ul[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  return total_errors;
+}
+
+int
+test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+     clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[7*16];
+  int count, index;
+  const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
+
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
+  sizeof(cl_short) +sizeof(cl_ushort) +
+  sizeof(cl_int) +sizeof(cl_uint) +
+  sizeof(cl_float);
+
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
+
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
+
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
+    }
+
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+
+    // Build the kernel
+    sprintf(kernel_string, kernel_code,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string, convert_string,
+            convert_string, convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 7; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
+        case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
+        case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
+        case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
+        case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
+        case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 7; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)c[index]; break;
+          case 1: expected = (float)uc[index]; break;
+          case 2: expected = (float)s[index]; break;
+          case 3: expected = (float)us[index]; break;
+          case 4: expected = (float)i[index]; break;
+          case 5: expected = (float)ui[index]; break;
+          case 6: expected = (float)f[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  if (gHasLong) {
+    log_info("Testing long types...\n");
+    total_errors += test_basic_parameter_types_long( device, context, queue, num_elements );
+  }
+  else {
+    log_info("Longs unsupported, skipping.");
+  }
+
+  return total_errors;
+}
+
+
+
diff --git a/test_conformance/basic/test_bufferreadwriterect.c b/test_conformance/basic/test_bufferreadwriterect.c
new file mode 100644
index 00000000..1631056b
--- /dev/null
+++ b/test_conformance/basic/test_bufferreadwriterect.c
@@ -0,0 +1,564 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define CL_EXIT_ERROR(cmd,format,...)                \
+{                                \
+if ((cmd) != CL_SUCCESS) {                    \
+log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);    \
+log_error(format,## __VA_ARGS__ );            \
+log_error("\n");                        \
+/*abort();*/                \
+}                                \
+}
+
+typedef unsigned char BufferType;
+
+// Globals for test
+cl_command_queue queue;
+
+// Width and height of each pair of images.
+enum { TotalImages = 8 };
+size_t width  [TotalImages];
+size_t height [TotalImages];
+size_t depth  [TotalImages];
+
+// cl buffer and host buffer.
+cl_mem buffer [TotalImages];
+BufferType* verify[TotalImages];
+BufferType* backing[TotalImages];
+
+// Temporary buffer used for read and write operations.
+BufferType* tmp_buffer;
+size_t tmp_buffer_size;
+
+size_t num_tries   = 50; // Number of randomly selected operations to perform.
+size_t alloc_scale = 2;   // Scale term applied buffer allocation size.
+MTdata mt;
+
+// Initialize a buffer in host memory containing random values of the specified size.
+static void initialize_image(BufferType* ptr, size_t w, size_t h, size_t d, MTdata mt)
+{
+    enum { ElementSize = sizeof(BufferType)/sizeof(unsigned char) };
+
+    unsigned char* buf = (unsigned char*)ptr;
+    size_t size = w*h*d*ElementSize;
+
+    for (size_t i = 0; i != size; i++) {
+        buf[i] = (unsigned char)(genrand_int32(mt) % 0xff);
+    }
+}
+
+// This function prints the contents of a buffer to standard error.
+void print_buffer(BufferType* buf, size_t w, size_t h, size_t d) {
+    log_error("Size = %lux%lux%lu (%lu total)\n",w,h,d,w*h*d);
+    for (unsigned k=0; k!=d;++k) {
+        log_error("Slice: %u\n",k);
+        for (unsigned j=0; j!=h;++j) {
+            for (unsigned i=0;i!=w;++i) {
+                log_error("%02x",buf[k*(w*h)+j*w+i]);
+            }
+            log_error("\n");
+        }
+        log_error("\n");
+    }
+}
+
+// Returns true if the two specified regions overlap.
+bool check_overlap_rect(size_t src_offset[3],
+                        size_t dst_offset[3],
+                        size_t region[3],
+                        size_t row_pitch,
+                        size_t slice_pitch)
+{
+    const size_t src_min[] = { src_offset[0], src_offset[1], src_offset[2] };
+    const size_t src_max[] = { src_offset[0] + region[0], src_offset[1] + region[1], src_offset[2] + region[2] };
+
+    const size_t dst_min[] = { dst_offset[0], dst_offset[1], dst_offset[2] };
+    const size_t dst_max[] = { dst_offset[0] + region[0],
+                               dst_offset[1] + region[1],
+                               dst_offset[2] + region[2]};
+// Check for overlap
+        bool overlap = true;
+        unsigned i;
+        for (i = 0; i != 3; ++i)
+        {
+            overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]);
+        }
+
+    size_t dst_start = dst_offset[2] * slice_pitch + dst_offset[1] * row_pitch + dst_offset[0];
+    size_t dst_end = dst_start + (region[2] * slice_pitch +
+                                  region[1] * row_pitch + region[0]);
+    size_t src_start = src_offset[2] * slice_pitch + src_offset[1] * row_pitch + src_offset[0];
+    size_t src_end = src_start + (region[2] * slice_pitch +
+                                  region[1] * row_pitch + region[0]);
+    if (!overlap) {
+        size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ?
+            src_offset[0] + region[0] - row_pitch : 0; size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ?
+            dst_offset[0] + region[0] - row_pitch : 0;
+        if ((delta_src_x > 0 && delta_src_x > dst_offset[0]) ||
+            (delta_dst_x > 0 && delta_dst_x > src_offset[0])) {
+            if ((src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end)) overlap = true;
+        }
+        if (region[2] > 1) {
+            size_t src_height = slice_pitch / row_pitch; size_t dst_height = slice_pitch / row_pitch;
+            size_t delta_src_y = (src_offset[1] + region[1] > src_height) ? src_offset[1] + region[1] - src_height : 0;
+            size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ? dst_offset[1] + region[1] - dst_height : 0;
+            if ((delta_src_y > 0 && delta_src_y > dst_offset[1]) ||
+                (delta_dst_y > 0 && delta_dst_y > src_offset[1])) {
+                if ((src_start <= dst_start && dst_start < src_end) || (dst_start <= src_start && src_start < dst_end))
+                    overlap = true;
+            }
+        }
+    }
+    return overlap;
+}
+
+
+
+// This function invokes the CopyBufferRect CL command and then mirrors the operation on the host side verify buffers.
+int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    // Copy between cl buffers.
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+    size_t src_row_pitch = width[src];
+
+    cl_int err;
+    if (check_overlap_rect(soffset,doffset,sregion,src_row_pitch, src_slice_pitch)) {
+        log_info( "Copy overlap reported, skipping copy buffer rect\n" );
+        return CL_SUCCESS;
+    } else {
+        if ((err = clEnqueueCopyBufferRect(queue,
+                                         buffer[src],buffer[dst],
+                                         soffset, doffset,
+                                         sregion,/*dregion,*/
+                                         width[src], src_slice_pitch,
+                                         width[dst], dst_slice_pitch,
+                                         0, NULL, NULL)) != CL_SUCCESS)
+        {
+            CL_EXIT_ERROR(err, "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+        }
+    }
+
+    // Copy between host buffers.
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        size_t dz = sz;
+        size_t dy = sy;
+        size_t dx = sx;
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
+
+        verify[dst][d_idx] = verify[src][s_idx];
+    }
+
+    return 0;
+}
+
+// This function compares the destination region in the buffer pointed
+// to by device, to the source region of the specified verify buffer.
+int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
+
+    // Copy between host buffers.
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+sz)*dslice + (doffset[1]+sy)*dpitch + doffset[0]+sx;
+
+        if (device[d_idx] != verify[src][s_idx]) {
+            log_error("Verify failed on comparsion %lu: coordinate (%lu, %lu, %lu) of region\n",i,sx,sy,sz);
+            log_error("0x%02x != 0x%02x\n", device[d_idx], verify[src][s_idx]);
+#if 0
+            // Uncomment this section to print buffers.
+            log_error("Device (copy): [%lu]\n",dst);
+            print_buffer(device,width[dst],height[dst],depth[dst]);
+            log_error("\n");
+            log_error("Verify: [%lu]\n",src);
+            print_buffer(verify[src],width[src],height[src],depth[src]);
+            log_error("\n");
+            abort();
+#endif
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+// This function invokes ReadBufferRect to read a region from the
+// specified source buffer into a temporary destination buffer. The
+// contents of the temporary buffer are then compared to the source
+// region of the corresponding verify buffer.
+int read_verify_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    // Clear the temporary destination host buffer.
+    memset(tmp_buffer, 0xff, tmp_buffer_size);
+
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    CL_EXIT_ERROR(clEnqueueReadBufferRect(queue,
+                                          buffer[src],
+                                          CL_TRUE,
+                                          soffset,doffset,
+                                          sregion,
+                                          width[src], src_slice_pitch,
+                                          width[dst], dst_slice_pitch,
+                                          tmp_buffer,
+                                          0, NULL, NULL), "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+
+    return verify_region(tmp_buffer,src,soffset,sregion,dst,doffset);
+}
+
+// This function performs the same verification check as
+// read_verify_region, except a MapBuffer command is used to access the
+// device buffer data instead of a ReadBufferRect, and the whole
+// buffer is checked.
+int map_verify_region(size_t src) {
+
+    size_t size_bytes = width[src]*height[src]*depth[src]*sizeof(BufferType);
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    cl_int err;
+    BufferType* mapped = (BufferType*)clEnqueueMapBuffer(queue,buffer[src],CL_TRUE,CL_MAP_READ,0,size_bytes,0,NULL,NULL,&err);
+    CL_EXIT_ERROR(err, "clEnqueueMapBuffer failed for buffer %u",(unsigned)src);
+
+    size_t soffset[] = { 0, 0, 0 };
+    size_t sregion[] = { width[src], height[src], depth[src] };
+
+    int ret = verify_region(mapped,src,soffset,sregion,src,soffset);
+
+    CL_EXIT_ERROR(clEnqueueUnmapMemObject(queue,buffer[src],mapped,0,NULL,NULL),
+                  "clEnqueueUnmapMemObject failed for buffer %u",(unsigned)src);
+
+    return ret;
+}
+
+// This function generates a new temporary buffer and then writes a
+// region of it to a region in the specified destination buffer.
+int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
+    // memset(tmp_buffer, 0xf0, tmp_buffer_size);
+
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    CL_EXIT_ERROR(clEnqueueWriteBufferRect(queue,
+                                           buffer[dst],
+                                           CL_TRUE,
+                                           doffset,soffset,
+    /*sregion,*/dregion,
+                                           width[dst], dst_slice_pitch,
+                                           width[src], src_slice_pitch,
+                                           tmp_buffer,
+                                           0, NULL, NULL), "clEnqueueWriteBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+
+    // Copy from the temporary buffer to the host buffer.
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        size_t dz = sz;
+        size_t dy = sy;
+        size_t dx = sx;
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
+
+        verify[dst][d_idx] = tmp_buffer[s_idx];
+    }
+    return 0;
+}
+
+void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
+{
+    free( data );
+}
+
+// This is the main test function for the conformance test.
+int
+test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements)
+{
+    queue = queue_;
+    cl_int err;
+
+    // Initialize the random number generator.
+    mt = init_genrand( gRandomSeed );
+
+    // Compute a maximum buffer size based on the number of test images and the device maximum.
+    cl_ulong max_mem_alloc_size = 0;
+    CL_EXIT_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, NULL),"Could not get device info");
+    log_info("CL_DEVICE_MAX_MEM_ALLOC_SIZE = %llu bytes.\n", max_mem_alloc_size);
+
+    // Confirm that the maximum allocation size is not zero.
+    if (max_mem_alloc_size == 0) {
+        log_error("Error: CL_DEVICE_MAX_MEM_ALLOC_SIZE is zero bytes\n");
+        return -1;
+    }
+
+    // Guess at a reasonable maximum dimension.
+    size_t max_mem_alloc_dim = (size_t)cbrt((double)(max_mem_alloc_size/sizeof(BufferType)))/alloc_scale;
+    if (max_mem_alloc_dim == 0) {
+        max_mem_alloc_dim = max_mem_alloc_size;
+    }
+
+    log_info("Using maximum dimension      = %lu.\n", max_mem_alloc_dim);
+
+    // Create pairs of cl buffers and host buffers on which operations will be mirrored.
+    log_info("Creating %u pairs of random sized host and cl buffers.\n", TotalImages);
+
+    size_t max_size = 0;
+    size_t total_bytes = 0;
+
+    for (unsigned i=0; i != TotalImages; ++i) {
+
+        // Determine a width and height for this buffer.
+        size_t size_bytes;
+        size_t tries = 0;
+        size_t max_tries = 1048576;
+        do {
+            width[i]   = get_random_size_t(1, max_mem_alloc_dim, mt);
+            height[i]  = get_random_size_t(1, max_mem_alloc_dim, mt);
+            depth[i]   = get_random_size_t(1, max_mem_alloc_dim, mt);
+            ++tries;
+        } while ((tries < max_tries) && (size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType)) > max_mem_alloc_size);
+
+        // Check to see if adequately sized buffers were found.
+        if (tries >= max_tries) {
+            log_error("Error: Could not find random buffer sized less than %llu bytes in %lu tries.\n",
+                      max_mem_alloc_size, max_tries);
+            return -1;
+        }
+
+        // Keep track of the dimensions of the largest buffer.
+        max_size = (size_bytes > max_size) ? size_bytes : max_size;
+        total_bytes += size_bytes;
+
+        log_info("Buffer[%u] is (%lu,%lu,%lu) = %lu MB (truncated)\n",i,width[i],height[i],depth[i],(size_bytes)/1048576);
+    }
+
+    log_info( "Total size: %lu MB (truncated)\n", total_bytes/1048576 );
+
+    // Allocate a temporary buffer for read and write operations.
+    tmp_buffer_size  = max_size;
+    tmp_buffer = (BufferType*)malloc(tmp_buffer_size);
+
+    // Initialize cl buffers
+    log_info( "Initializing buffers\n" );
+    for (unsigned i=0; i != TotalImages; ++i) {
+
+        size_t size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType);
+
+        // Allocate a host copy of the buffer for verification.
+        verify[i] = (BufferType*)malloc(size_bytes);
+        CL_EXIT_ERROR(verify[i] ? CL_SUCCESS : -1, "malloc of host buffer failed for buffer %u", i);
+
+        // Allocate the buffer in host memory.
+        backing[i] = (BufferType*)malloc(size_bytes);
+        CL_EXIT_ERROR(backing[i] ? CL_SUCCESS : -1, "malloc of backing buffer failed for buffer %u", i);
+
+        // Generate a random buffer.
+        log_info( "Initializing buffer %u\n", i );
+        initialize_image(verify[i], width[i], height[i], depth[i], mt);
+
+        // Copy the image into a buffer which will passed to CL.
+        memcpy(backing[i], verify[i], size_bytes);
+
+        // Create the CL buffer.
+        buffer[i] = clCreateBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size_bytes, backing[i], &err);
+        CL_EXIT_ERROR(err,"clCreateBuffer failed for buffer %u", i);
+
+        // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+        err = clSetMemObjectDestructorCallback( buffer[i], mem_obj_destructor_callback, backing[i] );
+        CL_EXIT_ERROR(err, "Unable to set mem object destructor callback" );
+    }
+
+    // Main test loop, run num_tries times.
+    log_info( "Executing %u test operations selected at random.\n", (unsigned)num_tries );
+    for (size_t iter = 0; iter < num_tries; ++iter) {
+
+        // Determine a source and a destination.
+        size_t src = get_random_size_t(0,TotalImages,mt);
+        size_t dst = get_random_size_t(0,TotalImages,mt);
+
+        // Determine the minimum dimensions.
+        size_t min_width = width[src] < width[dst] ? width[src] : width[dst];
+        size_t min_height = height[src] < height[dst] ? height[src] : height[dst];
+        size_t min_depth = depth[src] < depth[dst] ? depth[src] : depth[dst];
+
+        // Generate a random source rectangle within the minimum dimensions.
+        size_t mx = get_random_size_t(0, min_width-1, mt);
+        size_t my = get_random_size_t(0, min_height-1, mt);
+        size_t mz = get_random_size_t(0, min_depth-1, mt);
+
+        size_t sw = get_random_size_t(1, (min_width - mx), mt);
+        size_t sh = get_random_size_t(1, (min_height - my), mt);
+        size_t sd = get_random_size_t(1, (min_depth - mz), mt);
+
+        size_t sx = get_random_size_t(0, width[src]-sw, mt);
+        size_t sy = get_random_size_t(0, height[src]-sh, mt);
+        size_t sz = get_random_size_t(0, depth[src]-sd, mt);
+
+        size_t soffset[] = { sx, sy, sz };
+        size_t sregion[] = { sw, sh, sd };
+
+        // Generate a destination rectangle of the same size.
+        size_t dw = sw;
+        size_t dh = sh;
+        size_t dd = sd;
+
+        // Generate a random destination offset within the buffer.
+        size_t dx = get_random_size_t(0, (width[dst] - dw), mt);
+        size_t dy = get_random_size_t(0, (height[dst] - dh), mt);
+        size_t dz = get_random_size_t(0, (depth[dst] - dd), mt);
+        size_t doffset[] = { dx, dy, dz };
+        size_t dregion[] = { dw, dh, dd };
+
+        // Execute one of three operations:
+        // - Copy: Copies between src and dst within each set of host, buffer, and images.
+        // - Read & verify: Reads src region from buffer and image, and compares to host.
+        // - Write: Generates new buffer with src dimensions, and writes to cl buffer and image.
+
+        enum { TotalOperations = 3 };
+        size_t operation = get_random_size_t(0,TotalOperations,mt);
+
+        switch (operation) {
+            case 0:
+                log_info("%lu Copy %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = copy_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+            case 1:
+                log_info("%lu Read %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = read_verify_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+            case 2:
+                log_info("%lu Write %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = write_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+        }
+
+#if 0
+        // Uncomment this section to verify each operation.
+        // If commented out, verification won't occur until the end of the
+        // test, and it will not be possible to determine which operation failed.
+        log_info("Verify src %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", src, 0, 0, 0, width[src], height[src], depth[src]);
+        if (err = map_verify_region(src))
+            return err;
+
+        log_info("Verify dst %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", dst, 0, 0, 0, width[dst], height[dst], depth[dst]);
+        if (err = map_verify_region(dst))
+            return err;
+
+
+#endif
+
+    } // end main for loop.
+
+    for (unsigned i=0;i<TotalImages;++i) {
+        log_info("Verify %u offset (%u,%u,%u) region (%lux%lux%lu)\n", i, 0, 0, 0, width[i], height[i], depth[i]);
+        if ((err = map_verify_region(i)))
+            return err;
+    }
+
+    // Clean-up.
+    free_mtdata(mt);
+    for (unsigned i=0;i<TotalImages;++i) {
+        free( verify[i] );
+        clReleaseMemObject( buffer[i] );
+    }
+    free( tmp_buffer );
+
+    if (!err) {
+        log_info("RECT read, write test passed\n");
+    }
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_constant.c b/test_conformance/basic/test_constant.c
new file mode 100644
index 00000000..eb156e49
--- /dev/null
+++ b/test_conformance/basic/test_constant.c
@@ -0,0 +1,262 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *constant_kernel_code =
+"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    float ftmp = tmpF[tid]; \n"
+"    float Itmp = tmpI[tid]; \n"
+"    out[tid] = ftmp * Itmp; \n"
+"}\n";
+
+const char *loop_constant_kernel_code =
+"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
+"{\n"
+"    int tid = get_global_id(0);\n"
+"    float sum = 0;\n"
+"    for (int i = 0; i < num; i++) {\n"
+"        float  pos  = i_pos[i*3];\n"
+"        sum += pos;\n"
+"    }\n"
+"    out[tid] = sum;\n"
+"}\n";
+
+
+static int
+verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
+{
+    int         i;
+
+    for (i=0; i < n; i++)
+    {
+        float f = tmpF[i] * tmpI[i];
+        if( out[i] != f )
+        {
+            log_error("CONSTANT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("CONSTANT test passed\n");
+    return 0;
+}
+
+
+static int
+verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
+{
+    int i;
+    cl_int j;
+    for (i=0; i < n; i++)
+    {
+        float sum = 0;
+        for (j=0; j < l; ++j)
+            sum += tmp[j*3];
+
+        if( out[i] != sum )
+        {
+            log_error("loop CONSTANT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("loop CONSTANT test passed\n");
+    return 0;
+}
+
+int
+test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[3];
+    cl_int            *tmpI;
+    cl_float        *tmpF, *out;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3];
+    int                err;
+    unsigned int                i;
+    cl_ulong maxSize;
+    size_t num_floats, num_ints, constant_values;
+    MTdata          d;
+    RoundingMode     oldRoundMode;
+    int isRTZ = 0;
+
+  /* Verify our test buffer won't be bigger than allowed */
+    err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( err, "Unable to get max constant buffer size" );
+
+  log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
+  maxSize/=4;
+  num_ints = (size_t)maxSize/sizeof(cl_int);
+  num_floats = (size_t)maxSize/sizeof(cl_float);
+  if (num_ints >= num_floats) {
+    constant_values = num_floats;
+  } else {
+    constant_values = num_ints;
+  }
+
+  log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
+           constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
+
+    tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
+    tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
+    out  = (cl_float*)malloc(sizeof(cl_float) * constant_values);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * constant_values, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<constant_values; i++) {
+        tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
+        tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
+    if (err) {
+    log_error("Failed to create kernel and program: %d\n", err);
+    return -1;
+  }
+
+
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    global_threads[0] = constant_values;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
+    {
+        oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        isRTZ = 1;
+    }
+
+    err = verify(tmpF, tmpI, out, (int)constant_values);
+
+    if (isRTZ)
+        (void)set_round(oldRoundMode, kfloat);
+
+    // Loop constant buffer test
+    cl_program loop_program;
+    cl_kernel  loop_kernel;
+    cl_int limit = 2;
+
+    memset(out, 0, sizeof(cl_float) * constant_values);
+    err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
+                                      &loop_constant_kernel_code, "loop_constant_kernel" );
+    if (err) {
+        log_error("Failed to create loop kernel and program: %d\n", err);
+        return -1;
+    }
+
+    err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
+    if (err != CL_SUCCESS) {
+        log_error("clSetKernelArgs for loop kernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS) {
+        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
+    if (err != CL_SUCCESS) {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseKernel(loop_kernel);
+    clReleaseProgram(loop_program);
+    free(tmpI);
+    free(tmpF);
+    free(out);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_constant_source.cpp b/test_conformance/basic/test_constant_source.cpp
new file mode 100644
index 00000000..b6a3cf4c
--- /dev/null
+++ b/test_conformance/basic/test_constant_source.cpp
@@ -0,0 +1,101 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *constant_source_kernel_code[] = {
+"__constant int outVal = 42;\n"
+"__constant int outIndex = 7;\n"
+"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
+"\n"
+"__kernel void constant_kernel( __global int *out )\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    if( tid == 0 )\n"
+"    {\n"
+"        out[ 0 ] = outVal;\n"
+"        out[ 1 ] = outValues[ outIndex ];\n"
+"    }\n"
+"    else\n"
+"    {\n"
+"        out[ tid + 1 ] = outValues[ tid ];\n"
+"    }\n"
+"}\n" };
+
+int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    clMemWrapper outStream;
+    cl_int         outValues[ 17 ];
+    cl_int         expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
+
+    cl_int        error;
+
+
+    // Create a kernel to test with
+    error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Create our output buffer
+    outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set the argument
+    error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Run test kernel
+    size_t threads[ 1 ] = { 16 };
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to enqueue kernel" );
+
+    // Read results
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify results
+    for( int i = 0; i < 17; i++ )
+    {
+        if( expectedValues[ i ] != outValues[ i ] )
+        {
+            if( i == 0 )
+                log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            else if( i == 1 )
+                log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            else
+                log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_createkernelsinprogram.c b/test_conformance/basic/test_createkernelsinprogram.c
new file mode 100644
index 00000000..2940f298
--- /dev/null
+++ b/test_conformance/basic/test_createkernelsinprogram.c
@@ -0,0 +1,105 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *sample_single_kernel = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"};
+
+const char *sample_double_kernel = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"
+"__kernel void sample_test2(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"};
+
+
+int
+test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program        program;
+    cl_kernel        kernel[2];
+    unsigned int    num_kernels;
+    int                err;
+
+    err = create_single_kernel_helper(context, &program, NULL, 1, &sample_single_kernel, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
+    if ( (err != CL_SUCCESS) || (num_kernels != 1) )
+    {
+        log_error("clCreateKernelsInProgram test failed for a single kernel\n");
+        return -1;
+    }
+
+    clReleaseKernel(kernel[0]);
+    clReleaseProgram(program);
+
+    err = create_single_kernel_helper(context, &program, NULL, 1, &sample_double_kernel, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
+    if ( (err != CL_SUCCESS) || (num_kernels != 2) )
+    {
+        log_error("clCreateKernelsInProgram test failed for two kernels\n");
+        return -1;
+    }
+
+  log_info("clCreateKernelsInProgram test passed\n");
+
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program);
+
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp
new file mode 100644
index 00000000..d78d20e5
--- /dev/null
+++ b/test_conformance/basic/test_enqueue_map.cpp
@@ -0,0 +1,254 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+const cl_mem_flags flag_set[] = {
+  CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_USE_HOST_PTR,
+  CL_MEM_COPY_HOST_PTR,
+  0
+};
+const char* flag_set_names[] = {
+  "CL_MEM_ALLOC_HOST_PTR",
+  "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
+  "CL_MEM_USE_HOST_PTR",
+  "CL_MEM_COPY_HOST_PTR",
+  "0"
+};
+
+int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    const size_t bufferSize = 256*256;
+    int src_flag_id;
+    MTdata d = init_genrand( gRandomSeed );
+    cl_char *initialData = (cl_char*)malloc(bufferSize);
+    cl_char *finalData = (cl_char*)malloc(bufferSize);
+
+    for (src_flag_id=0; src_flag_id < sizeof(flag_set)/sizeof(flag_set[0]); src_flag_id++)
+    {
+        clMemWrapper memObject;
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+        generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
+
+        if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), initialData, &error);
+        else
+            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), NULL, &error);
+        test_error( error, "Unable to create testing buffer" );
+
+        if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+        {
+            error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
+            test_error( error, "clEnqueueWriteBuffer failed");
+        }
+
+        for( int i = 0; i < 128; i++ )
+        {
+
+          size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
+          size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
+
+          cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                                offset, length, 0, NULL, NULL, &error );
+          if( error != CL_SUCCESS )
+          {
+            print_error( error, "clEnqueueMapBuffer call failed" );
+            log_error( "\tOffset: %d  Length: %d\n", (int)offset, (int)length );
+            free( initialData );
+            free( finalData );
+            free_mtdata(d);
+            return -1;
+          }
+
+          // Write into the region
+          for( size_t j = 0; j < length; j++ )
+          {
+            cl_char spin = (cl_char)genrand_int32( d );
+
+            // Test read AND write in one swipe
+            cl_char value = mappedRegion[ j ];
+            value = spin - value;
+            mappedRegion[ j ] = value;
+
+            // Also update the initial data array
+            value = initialData[ offset + j ];
+            value = spin - value;
+            initialData[ offset + j ] = value;
+          }
+
+          // Unmap
+          error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
+          test_error( error, "Unable to unmap buffer" );
+        }
+
+        // Final validation: read actual values of buffer and compare against our reference
+        error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
+        test_error( error, "Unable to read results" );
+
+        for( size_t q = 0; q < bufferSize; q++ )
+        {
+            if( initialData[ q ] != finalData[ q ] )
+            {
+                log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
+                free( initialData );
+                free( finalData );
+                free_mtdata(d);
+                return -1;
+            }
+        }
+    } // cl_mem flags
+
+    free( initialData );
+    free( finalData );
+    free_mtdata(d);
+
+    return 0;
+}
+
+int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
+    const size_t imageSize = 256;
+    int src_flag_id;
+    cl_uint *initialData;
+    cl_uint *finalData;
+    MTdata  d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
+    finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
+
+    if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
+    {
+        log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
+        free(initialData);
+        free(finalData);
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+  for (src_flag_id=0; src_flag_id < sizeof(flag_set)/sizeof(flag_set[0]); src_flag_id++) {
+    clMemWrapper memObject;
+    log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+    generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
+
+    if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
+                                  imageSize, imageSize, 0, initialData, &error );
+    else
+      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
+                                  imageSize, imageSize, 0, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
+
+    if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+      size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
+      error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
+      test_error( error, "Unable to write to testing buffer" );
+    }
+
+    for( int i = 0; i < 128; i++ )
+    {
+
+      size_t offset[3], region[3];
+      size_t rowPitch;
+
+      offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
+      region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
+      offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
+      region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
+      offset[ 2 ] = 0;
+      region[ 2 ] = 1;
+      cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                           offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
+      if( error != CL_SUCCESS )
+      {
+        print_error( error, "clEnqueueMapImage call failed" );
+        log_error( "\tOffset: %d,%d  Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
+        free(initialData);
+        free(finalData);
+        free_mtdata(d);
+        return -1;
+      }
+
+      // Write into the region
+      cl_uint *mappedPtr = mappedRegion;
+      for( size_t y = 0; y < region[ 1 ]; y++ )
+      {
+        for( size_t x = 0; x < region[ 0 ] * 4; x++ )
+        {
+          cl_int spin = (cl_int)random_in_range( 16, 1024, d );
+
+          cl_int value;
+          // Test read AND write in one swipe
+          value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
+          value = spin - value;
+          mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
+
+          // Also update the initial data array
+          value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
+          value = spin - value;
+          initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
+        }
+      }
+
+      // Unmap
+      error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
+      test_error( error, "Unable to unmap buffer" );
+    }
+
+    // Final validation: read actual values of buffer and compare against our reference
+    size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
+    error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
+    {
+      if( initialData[ q ] != finalData[ q ] )
+      {
+        log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
+                                    (int)finalData[ q ], (int)initialData[ q ] );
+        free(initialData);
+        free(finalData);
+        free_mtdata(d);
+        return -1;
+      }
+    }
+  } // cl_mem_flags
+
+    free(initialData);
+    free(finalData);
+    free_mtdata(d);
+    return 0;
+}
+
+
diff --git a/test_conformance/basic/test_enqueued_local_size.c b/test_conformance/basic/test_enqueued_local_size.c
new file mode 100644
index 00000000..371f9ac6
--- /dev/null
+++ b/test_conformance/basic/test_enqueued_local_size.c
@@ -0,0 +1,123 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+#include "procs.h"
+
+static const char *enqueued_local_size_2d_code =
+"__kernel void test_enqueued_local_size_2d(global int *dst)\n"
+"{\n"
+"    if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
+"    {\n"
+"        dst[0] = (int)get_enqueued_local_size(0)\n;"
+"        dst[1] = (int)get_enqueued_local_size(1)\n;"
+"    }\n"
+"}\n";
+
+static const char *enqueued_local_size_1d_code =
+"__kernel void test_enqueued_local_size_1d(global int *dst)\n"
+"{\n"
+"    int  tid_x = get_global_id(0);\n"
+"    if (get_global_id(0) == 0)\n"
+"    {\n"
+"        dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
+"    }\n"
+"}\n";
+
+
+static int
+verify_enqueued_local_size(int *result, size_t *expected, int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        if (result[i] != (int)expected[i])
+        {
+            log_error("get_enqueued_local_size failed\n");
+            return -1;
+        }
+    }
+    log_info("get_enqueued_local_size passed\n");
+    return 0;
+}
+
+
+int
+test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams;
+    cl_program program[2];
+    cl_kernel kernel[2];
+
+    int *output_ptr;
+    size_t globalsize[2];
+    size_t localsize[2];
+    int err;
+
+    output_ptr   = (int*)malloc(2 * sizeof(int));
+
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), 2*sizeof(int), NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code, "test_enqueued_local_size_1d", "-cl-std=CL2.0");
+    test_error( err, "create_single_kernel_helper failed");
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code, "test_enqueued_local_size_2d", "-cl-std=CL2.0");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
+    test_error( err, "clSetKernelArgs failed.");
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
+    test_error( err, "clSetKernelArgs failed.");
+
+    globalsize[0] = (size_t)num_elements;
+    globalsize[1] = (size_t)num_elements;
+    localsize[0] = 16;
+    localsize[1] = 11;
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    err = verify_enqueued_local_size(output_ptr, localsize, 2);
+
+    globalsize[0] = (size_t)num_elements;
+    localsize[0] = 9;
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    err = verify_enqueued_local_size(output_ptr, localsize, 1);
+
+    // cleanup
+    clReleaseMemObject(streams);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program[0]);
+    clReleaseProgram(program[1]);
+    free(output_ptr);
+
+    return err;
+}
diff --git a/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/basic/test_explicit_s2v.cpp
new file mode 100644
index 00000000..6da587cc
--- /dev/null
+++ b/test_conformance/basic/test_explicit_s2v.cpp
@@ -0,0 +1,385 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
+"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n"        \
+"{\n"                                                                            \
+"    int  tid = get_global_id(0);\n"                                        \
+"    " srctype "  src = sourceValues[tid];\n"                                        \
+"\n"                                                                            \
+"    destValues[tid] = (" dsttype #size ")src;\n"                        \
+"\n"                                                                            \
+"}\n"
+
+#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
+{        \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
+}
+
+#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
+
+/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
+
+#define DECLARE_S2V_IDENT_KERNELS_SET(srctype)    \
+{                                                    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,bool),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,char),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,uchar),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,short),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,ushort),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,int),                \
+DECLARE_S2V_IDENT_KERNELS(#srctype,uint),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,long),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,ulong),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,float),            \
+DECLARE_EMPTY                                        \
+}
+
+#define DECLARE_EMPTY_SET                \
+{                                                    \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY    \
+}
+
+
+/* The overall array */
+const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
+    DECLARE_S2V_IDENT_KERNELS_SET(bool),
+    DECLARE_S2V_IDENT_KERNELS_SET(char),
+    DECLARE_S2V_IDENT_KERNELS_SET(uchar),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
+    DECLARE_S2V_IDENT_KERNELS_SET(short),
+    DECLARE_S2V_IDENT_KERNELS_SET(ushort),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
+    DECLARE_S2V_IDENT_KERNELS_SET(int),
+    DECLARE_S2V_IDENT_KERNELS_SET(uint),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
+    DECLARE_S2V_IDENT_KERNELS_SET(long),
+    DECLARE_S2V_IDENT_KERNELS_SET(ulong),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
+    DECLARE_S2V_IDENT_KERNELS_SET(float),
+    DECLARE_EMPTY_SET
+};
+
+int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
+                               ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error;
+    clMemWrapper streams[2];
+    void *outData;
+    unsigned char convertedData[ 8 ];    /* Max type size is 8 bytes */
+    size_t threadSize[3], groupSize[3];
+    unsigned int i, s;
+    unsigned char *inPtr, *outPtr;
+    size_t paramSize, destTypeSize;
+
+    const char* finalProgramSrc[2] = {
+        "", // optional pragma
+        programSrc
+    };
+
+    if (srcType == kDouble || destType == kDouble) {
+        finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    }
+
+
+    if( programSrc == NULL )
+        return 0;
+
+    paramSize = get_explicit_type_size( srcType );
+    destTypeSize = get_explicit_type_size( destType );
+
+    size_t destStride = destTypeSize * vecSize;
+
+    outData = malloc( destStride * count );
+
+    if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
+    {
+        log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
+        return -1;
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
+    test_error( error, "clCreateBuffer failed");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  destStride * count, NULL, &error);
+    test_error( error, "clCreateBuffer failed");
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threadSize[0] = count;
+
+    error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now verify the results. Each value should have been duplicated four times, and we should be able to just
+     do a memcpy instead of relying on the actual type of data */
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output values!" );
+
+    inPtr = (unsigned char *)inputData;
+    outPtr = (unsigned char *)outData;
+
+    for( i = 0; i < count; i++ )
+    {
+        /* Convert the input data element to our output data type to compare against */
+        convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
+
+        /* Now compare every element of the vector */
+        for( s = 0; s < vecSize; s++ )
+        {
+            if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
+            {
+                unsigned int *p = (unsigned int *)outPtr;
+                log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
+                log_error( "       Input:   0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
+                log_error( "       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                return -1;
+            }
+        }
+        inPtr += paramSize;
+        outPtr += destStride;
+    }
+
+    free( outData );
+
+    return 0;
+}
+
+int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
+                                   unsigned int count, void *inputData )
+{
+    unsigned int sizes[] = { 2, 4, 8, 16, 0 };
+    int i, dstType, failed = 0;
+
+
+    for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
+    {
+        if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( dstType == kLong || dstType == kULong ) && !gHasLong )
+            continue;
+
+        for( i = 0; sizes[i] != 0; i++ )
+        {
+            if( dstType != srcType )
+                continue;
+            if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
+               strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
+                continue;
+
+            if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
+                                           srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
+            {
+                log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
+                          get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
+                failed = -1;
+                break;
+            }
+        }
+    }
+
+    return failed;
+}
+
+int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
+    return 0;
+#if 0
+    bool    data[128];
+
+    generate_random_data( kBool, 128, data );
+
+    return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
+#endif
+}
+
+int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kChar, 128, seed, data );
+
+    return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
+}
+
+int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned char    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUChar, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    short            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kShort, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned short    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUShort, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kInt, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUInt, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_long    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kLong, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kLong,  128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_ulong    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kULong, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kULong,  128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    float            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kFloat, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    double            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
+        log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
+        return 0;
+    }
+
+    generate_random_data( kDouble, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+
diff --git a/test_conformance/basic/test_float2int.c b/test_conformance/basic/test_float2int.c
new file mode 100644
index 00000000..55411728
--- /dev/null
+++ b/test_conformance/basic/test_float2int.c
@@ -0,0 +1,145 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *float2int_kernel_code =
+"__kernel void test_float2int(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n";
+
+
+int
+verify_float2int(cl_float *inptr, cl_int *outptr, int n)
+{
+  int     i;
+
+  for (i=0; i<n; i++)
+  {
+    if (outptr[i] != (int)inptr[i])
+    {
+      log_error("FLOAT2INT test failed\n");
+      return -1;
+    }
+  }
+
+  log_info("FLOAT2INT test passed\n");
+  return 0;
+}
+
+
+int
+test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_float        *input_ptr;
+    cl_int          *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    void            *values[2];
+    size_t    threads[1];
+    int                err;
+    int                i;
+    MTdata          d;
+
+    input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &float2int_kernel_code, "test_float2int");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_float2int(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_fpmath_float.c b/test_conformance/basic/test_fpmath_float.c
new file mode 100644
index 00000000..7c395fc0
--- /dev/null
+++ b/test_conformance/basic/test_fpmath_float.c
@@ -0,0 +1,271 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+#include "procs.h"
+
+static const char *fpadd_kernel_code =
+"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+static const char *fpsub_kernel_code =
+"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+static const char *fpmul_kernel_code =
+"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+static const float    MAX_ERR = 1e-5f;
+
+static int
+verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float test passed\n");
+    return 0;
+}
+
+static int
+verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float test passed\n");
+    return 0;
+}
+
+static int
+verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_MUL float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_MUL float test passed\n");
+    return 0;
+}
+
+
+int
+test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+    size_t length = sizeof(cl_float) * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    threads[0] = (unsigned int)num_elements;
+    for (i=0; i<3; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+        test_error( err, "clEnqueueNDRangeKernel failed.");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+        test_error( err, "clEnqueueReadBuffer failed.");
+
+        if( isRTZ )
+            set_round( kRoundTowardZero, kfloat );
+
+        switch (i)
+        {
+            case 0:
+                err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+            case 1:
+                err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+            case 2:
+                err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+        }
+
+        if( isRTZ )
+            set_round( oldMode, kfloat );
+
+        if (err)
+            break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    free_mtdata( d );
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_fpmath_float2.c b/test_conformance/basic/test_fpmath_float2.c
new file mode 100644
index 00000000..20747bc3
--- /dev/null
+++ b/test_conformance/basic/test_fpmath_float2.c
@@ -0,0 +1,269 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+
+#include "procs.h"
+
+const char *fpadd2_kernel_code =
+"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub2_kernel_code =
+"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul2_kernel_code =
+"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+int
+verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float2 test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float2 test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_MUL float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_MUL float2 test passed\n");
+    return 0;
+}
+
+
+int
+test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    cl_float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_float) * 2 * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+    free_mtdata(d);
+    d = NULL;
+
+    threads[0] = (unsigned int)num_elements;
+    for (i=0; i<3; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+      test_error( err, "clEnqueueNDRangeKernel failed.");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+      test_error( err, "clEnqueueReadBuffer failed.");
+
+        if( isRTZ )
+            set_round( kRoundTowardZero, kfloat );
+
+        switch (i)
+        {
+            case 0:
+                err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+            case 1:
+                err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+            case 2:
+                err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+        }
+
+        if( isRTZ )
+            set_round( oldMode, kfloat );
+
+        if (err)
+            break;
+    }
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_fpmath_float4.c b/test_conformance/basic/test_fpmath_float4.c
new file mode 100644
index 00000000..7dee498c
--- /dev/null
+++ b/test_conformance/basic/test_fpmath_float4.c
@@ -0,0 +1,270 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+const char *fpadd4_kernel_code =
+"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub4_kernel_code =
+"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul4_kernel_code =
+"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+int
+verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float4 test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float4 test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+  float       r;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    r = inptrA[i] * inptrB[i];
+    if (r != outptr[i])
+    {
+      log_error("FP_MUL float4 test failed\n");
+      return -1;
+    }
+  }
+
+  log_info("FP_MUL float4 test passed\n");
+  return 0;
+}
+
+
+int
+test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    cl_float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_float) * 4 * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+
+    free_mtdata(d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<3; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    if( isRTZ )
+        set_round( kRoundTowardZero, kfloat );
+
+    switch (i)
+    {
+      case 0:
+        err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+      case 1:
+        err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+      case 2:
+        err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+    }
+
+    if( isRTZ )
+        set_round( oldMode, kfloat );
+
+    if (err)
+      break;
+    }
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_get_linear_ids.cpp b/test_conformance/basic/test_get_linear_ids.cpp
new file mode 100644
index 00000000..fba200a2
--- /dev/null
+++ b/test_conformance/basic/test_get_linear_ids.cpp
@@ -0,0 +1,191 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include <ctype.h>
+
+static const char *linear_ids_source[1] = {
+"__kernel void test_linear_ids(__global int2 *out)\n"
+"{\n"
+"    size_t lid, gid;\n"
+"    uint d = get_work_dim();\n"
+"    if (d == 1U) {\n"
+"        gid = get_global_id(0) - get_global_offset(0);\n"
+"        lid = get_local_id(0);\n"
+"    } else if (d == 2U) {\n"
+"        gid = (get_global_id(1) - get_global_offset(1)) * get_global_size(0) +\n"
+"              (get_global_id(0) - get_global_offset(0));\n"
+"        lid = get_local_id(1) * get_local_size(0) + get_local_id(0);\n"
+"    } else {\n"
+"        gid = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) +\n"
+"               (get_global_id(1) - get_global_offset(1))) * get_global_size(0) +\n"
+"               (get_global_id(0) - get_global_offset(0));\n"
+"        lid = (get_local_id(2) * get_local_size(1) +\n"
+"               get_local_id(1)) * get_local_size(0) + get_local_id(0);\n"
+"    }\n"
+"    out[gid].x = gid == get_global_linear_id();\n"
+"    out[gid].y = lid == get_local_linear_id();\n"
+"}\n"
+};
+
+#define NUM_ITER 12
+#define MAX_1D 4096
+#define MAX_2D 64
+#define MAX_3D 16
+#define MAX_OFFSET 100000
+
+int
+test_get_linear_ids(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper outbuf;
+    int error, iter, i, j, k;
+    size_t lws[3], gws[3], gwo[3];
+    cl_uint dims;
+    cl_int outmem[2*MAX_1D], *om;
+
+
+    // Create the kernel
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, linear_ids_source, "test_linear_ids", "-cl-std=CL2.0");
+    if (error)
+        return error;
+
+    // Create the out buffer
+    outbuf = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(outmem), NULL, &error);
+    test_error(error, "failed to create result buffer\n");
+
+    // This will leak if there is an error, but this is what is done everywhere else
+    MTdata seed = init_genrand(gRandomSeed);
+
+    // Run some tests
+    for (iter=0; iter<NUM_ITER; ++iter) {
+        dims = iter % 3 + 1;
+
+        switch (dims) {
+        case 1:
+            gwo[0] = random_in_range(0, MAX_OFFSET, seed);
+            gws[0] = random_in_range(MAX_1D/8, MAX_1D/4, seed)*4;
+            error = get_max_common_work_group_size(context, kernel, gws[0], lws);
+            break;
+        case 2:
+            gwo[0] = random_in_range(0, MAX_OFFSET, seed);
+            gwo[1] = random_in_range(0, MAX_OFFSET, seed);
+            gws[0] = random_in_range(MAX_2D/8, MAX_2D/4, seed)*4;
+            gws[1] = random_in_range(MAX_2D/8, MAX_2D/4, seed)*4;
+            error = get_max_common_2D_work_group_size(context, kernel, gws, lws);
+            break;
+        case 3:
+            gwo[0] = random_in_range(0, MAX_OFFSET, seed);
+            gwo[1] = random_in_range(0, MAX_OFFSET, seed);
+            gwo[2] = random_in_range(0, MAX_OFFSET, seed);
+            gws[0] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
+            gws[1] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
+            gws[2] = random_in_range(MAX_3D/4, MAX_3D/2, seed)*2;
+            error = get_max_common_3D_work_group_size(context, kernel, gws, lws);
+            break;
+        }
+
+        test_error(error, "Failed to determine local work size\n");
+
+
+        switch (dims) {
+        case 1:
+            log_info("  testing offset=%u global=%u local=%u...\n", gwo[0], gws[0], lws[0]);
+            break;
+        case 2:
+            log_info("  testing offset=(%u,%u) global=(%u,%u) local=(%u,%u)...\n",
+                    gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]);
+            break;
+        case 3:
+            log_info("  testing offset=(%u,%u,%u) global=(%u,%u,%u) local=(%u,%u,%u)...\n",
+                    gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]);
+            break;
+        }
+
+        // Set up and run
+        memset(outmem, 0, sizeof(outmem));
+
+        error = clSetKernelArg(kernel, 0, sizeof(outbuf), (void *)&outbuf);
+        test_error(error, "clSetKernelArg failed\n");
+
+        error = clEnqueueWriteBuffer(queue, outbuf, CL_FALSE, 0, sizeof(outmem), (void *)outmem, 0, NULL, NULL);
+        test_error(error, "clEnqueueWriteBuffer failed\n");
+
+        error = clEnqueueNDRangeKernel(queue, kernel, dims, gwo, gws, lws, 0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed\n");
+
+        error = clEnqueueReadBuffer(queue, outbuf, CL_FALSE, 0, sizeof(outmem), (void *)outmem, 0, NULL, NULL);
+        test_error(error, "clEnqueueReadBuffer failed\n");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed\n");
+
+        // Check the return
+        switch (dims) {
+        case 1:
+            for (i=0, om=outmem; i<(int)gws[0]; ++i, om+=2) {
+                if (om[0] != 1) {
+                    log_error("get_global_linear_id() failed at %d\n", i);
+                    return -1;
+                }
+                if (om[1] != 1) {
+                    log_error("get_local_linear_id() failed at (%d, %d)\n", i % (int)lws[0], i / (int)lws[0]);
+                    return -1;
+                }
+            }
+            break;
+        case 2:
+            for (j=0, om=outmem; j<gws[1]; ++j) {
+                for (i=0; i<gws[0]; ++i, om+=2) {
+                    if (om[0] != 1) {
+                        log_error("get_global_linear_id() failed at (%d,%d)\n", i, j);
+                        return -1;
+                    }
+                    if (om[1] != 1) {
+                        log_error("get_local_linear_id() failed at (%d, %d), (%d, %d)\n",
+                                i % (int)lws[0], j % (int)lws[1],
+                                i / (int)lws[0], j / (int)lws[1]);
+                        return -1;
+                    }
+                }
+            }
+            break;
+        case 3:
+            for (k=0, om=outmem; k<gws[2]; ++k) {
+                for (j=0; j<gws[1]; ++j) {
+                    for (i=0; i<gws[0]; ++i, om+=2) {
+                        if (om[0] != 1) {
+                            log_error("get_global_linear_id() failed at (%d,%d, %d)\n", i, j, k);
+                            return -1;
+                        }
+                        if (om[1] != 1) {
+                            log_error("get_local_linear_id() failed at (%d, %d), (%d, %d), (%d, %d)\n",
+                                    i % (int)lws[0], j % (int)lws[1], k % (int)lws[2],
+                                    i / (int)lws[0], j / (int)lws[1], k / (int)lws[2]);
+                            return -1;
+                        }
+                    }
+                }
+            }
+            break;
+        }
+
+    }
+
+    free_mtdata(seed);
+    return 0;
+}
+
diff --git a/test_conformance/basic/test_global_linear_id.c b/test_conformance/basic/test_global_linear_id.c
new file mode 100644
index 00000000..159d5956
--- /dev/null
+++ b/test_conformance/basic/test_global_linear_id.c
@@ -0,0 +1,121 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+#include "procs.h"
+
+static const char *global_linear_id_2d_code =
+"__kernel void test_global_linear_id_2d(global int *dst)\n"
+"{\n"
+"    int  tid_x = get_global_id(0);\n"
+"    int  tid_y = get_global_id(1);\n"
+"\n"
+"    int linear_id = tid_y * get_global_size(0) + tid_x;\n"
+"    int result = (linear_id == (int)get_global_linear_id()) ? 0x1 : 0x0;\n"
+"    dst[linear_id] = result;\n"
+"}\n";
+
+static const char *global_linear_id_1d_code =
+"__kernel void test_global_linear_id_1d(global int *dst)\n"
+"{\n"
+"    int  tid_x = get_global_id(0);\n"
+"\n"
+"    int result = (tid_x == (int)get_global_linear_id()) ? 0x1 : 0x0;\n"
+"    dst[tid_x] = result;\n"
+"}\n";
+
+
+static int
+verify_global_linear_id(int *result, int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        if (result[i] == 0)
+        {
+            log_error("get_global_linear_id failed\n");
+            return -1;
+        }
+    }
+    log_info("get_global_linear_id passed\n");
+    return 0;
+}
+
+
+int
+test_global_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams;
+    cl_program program[2];
+    cl_kernel kernel[2];
+
+    int *output_ptr;
+      size_t threads[2];
+      int err;
+      num_elements = (int)sqrt((float)num_elements);
+      int length = num_elements * num_elements;
+
+      output_ptr   = (int*)malloc(sizeof(int) * length);
+
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &global_linear_id_1d_code, "test_global_linear_id_1d", "-cl-std=CL2.0");
+    test_error( err, "create_single_kernel_helper failed");
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &global_linear_id_2d_code, "test_global_linear_id_2d", "-cl-std=CL2.0");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
+    test_error( err, "clSetKernelArgs failed.");
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
+    test_error( err, "clSetKernelArgs failed.");
+
+    threads[0] = (size_t)num_elements;
+    threads[1] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, length*sizeof(int), output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    err = verify_global_linear_id(output_ptr, length);
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, threads, NULL, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, num_elements*sizeof(int), output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    err = verify_global_linear_id(output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program[0]);
+    clReleaseProgram(program[1]);
+    free(output_ptr);
+
+    return err;
+}
diff --git a/test_conformance/basic/test_global_work_offsets.cpp b/test_conformance/basic/test_global_work_offsets.cpp
new file mode 100644
index 00000000..edaca386
--- /dev/null
+++ b/test_conformance/basic/test_global_work_offsets.cpp
@@ -0,0 +1,284 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include <ctype.h>
+
+
+const char *work_offset_test[] = {
+    "__kernel void test( __global int * outputID_A, \n"
+    "                        __global int * outputID_B, __global int * outputID_C )\n"
+    "{\n"
+    "    size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
+    "    size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
+    "    size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
+    "    size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
+    "\n"
+    "    outputID_A[ id ] = get_global_id( 0 );\n"
+    "    outputID_B[ id ] = get_global_id( 1 );\n"
+    "    outputID_C[ id ] = get_global_id( 2 );\n"
+    "}\n"
+    };
+
+#define MAX_TEST_ITEMS 16 * 16 * 16
+#define NUM_TESTS 16
+#define MAX_OFFSET 256
+
+#define CHECK_RANGE( v, m, c ) \
+    if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
+    {    \
+        log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
+        return -1;    \
+    }
+
+int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
+{
+    size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
+    size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
+
+    static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
+    memset( counts, 0, sizeof( counts ) );
+
+    for( size_t i = 0; i < limit; i++ )
+    {
+        // Check ranges first
+        CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
+        CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
+        CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
+
+        // Now set the value in the map
+        counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
+    }
+
+    // Now check the map
+    int missed = 0, multiple = 0, errored = 0, corrected = 0;
+    for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
+    {
+        for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
+        {
+            for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
+            {
+                const char * limitMsg = " (further errors of this type suppressed)";
+                if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
+                {
+                    if( counts[ x ][ y ][ z ] < 1 )
+                    {
+                        if( missed < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
+                        missed++;
+                    }
+                    else if( counts[ x ][ y ][ z ] > 1 )
+                    {
+                        if( multiple < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
+                        multiple++;
+                    }
+                }
+                else
+                {
+                    if( counts[ x ][ y ][ z ] > 0 )
+                    {
+                        if( errored < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
+                        errored++;
+                    }
+                }
+                    }
+                }
+                    }
+
+    if( missed || multiple || errored )
+    {
+        size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
+                        ( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
+                        ( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
+            int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
+
+        if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
+            log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
+        else
+            log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
+                            missed, multiple, errored );
+    }
+    return ( missed | multiple | errored | corrected );
+}
+
+int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 7 ];
+
+    int error;
+    size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
+    cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
+
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    //// Create some output streams
+
+    // Use just one output array to init them all (no need to init every single stack storage here)
+    memset( outputA, 0xff, sizeof( outputA ) );
+    for( int i = 0; i < 3; i++ )
+    {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
+        test_error( error, "Unable to create output array" );
+    }
+
+    // Run a few different times
+    MTdata seed = init_genrand( gRandomSeed );
+    for( int test = 0; test < NUM_TESTS; test++ )
+    {
+        // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
+        threads[ 0 ] = random_in_range( 1, 32, seed );
+        threads[ 1 ] = random_in_range( 1, 16, seed );
+        threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
+
+        // Make sure we get the local thread count right
+        error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
+        test_error( error, "Unable to determine local work group sizes" );
+
+        // Randomize some offsets
+        for( int j = 0; j < 3; j++ )
+            offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
+
+        log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
+                 threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                 offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+
+        // Now set up and run
+        for( int i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
+            test_error( error, "Unable to set indexed kernel arguments" );
+        }
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        // Read our results back now
+        cl_int * resultBuffers[] = { outputA, outputB, outputC };
+        for( int i = 0; i < 3; i++ )
+        {
+            error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
+            test_error( error, "Unable to get result data" );
+        }
+
+        // Now we need to check the results. The outputs should have one entry for each possible ID,
+        // but they won't be in order, so we need to construct a count map to determine what we got
+        if( check_results( threads, offsets, outputA, outputB, outputC ) )
+        {
+            log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
+                      threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                      offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+            return -1;
+        }
+    }
+
+    free_mtdata(seed);
+
+    // All done!
+    return 0;
+}
+
+const char *get_offset_test[] = {
+    "__kernel void test( __global int * outOffsets )\n"
+    "{\n"
+    "    // We use local ID here so we don't have to worry about offsets\n"
+    "   // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
+    "    outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
+    "    outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
+    "    outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
+    "}\n"
+};
+
+int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 1 ];
+
+    int error;
+    size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
+    cl_int outOffsets[ 3 ];
+
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    // Create some output streams, and storage for a single control ID
+    memset( outOffsets, 0xff, sizeof( outOffsets ) );
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
+    test_error( error, "Unable to create control ID buffer" );
+
+    // Run a few different times
+    MTdata seed = init_genrand( gRandomSeed );
+    for( int test = 0; test < NUM_TESTS; test++ )
+    {
+        // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
+        threads[ 0 ] = random_in_range( 1, 32, seed );
+        threads[ 1 ] = random_in_range( 1, 16, seed );
+        threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
+
+        // Make sure we get the local thread count right
+        error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
+        test_error( error, "Unable to determine local work group sizes" );
+
+        // Randomize some offsets
+        for( int j = 0; j < 3; j++ )
+            offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
+
+        log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
+                 threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                 offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+
+        // Now set up and run
+        error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        // Read our results back now
+        error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        // And check!
+        int errors = 0;
+        for( int j = 0; j < 3; j++ )
+        {
+            if( outOffsets[ j ] != (cl_int)offsets[ j ] )
+            {
+                log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
+                errors++;
+            }
+        }
+        if( errors > 0 )
+            return errors;
+    }
+    free_mtdata(seed);
+
+    // All done!
+    return 0;
+}
+
diff --git a/test_conformance/basic/test_hiloeo.c b/test_conformance/basic/test_hiloeo.c
new file mode 100644
index 00000000..c9777ec8
--- /dev/null
+++ b/test_conformance/basic/test_hiloeo.c
@@ -0,0 +1,421 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
+int lo_offset( int index, int vectorSize) { return index; }
+int even_offset( int index, int vectorSize ) { return index * 2; }
+int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
+
+typedef int (*OffsetFunc)( int index, int vectorSize );
+static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
+typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
+static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
+
+static const unsigned int vector_sizes[] =     { 1, 2, 3, 4, 8, 16};
+static const unsigned int vector_aligns[] =    { 1, 2, 4, 4, 8, 16};
+static const unsigned int out_vector_idx[] =   { 0, 0, 1, 1, 3, 4};
+// if input is size vector_sizes[i], output is size
+// vector_sizes[out_vector_idx[i]]
+// input type name is strcat(gentype, vector_size_names[i]);
+// and output type name is
+// strcat(gentype, vector_size_names[out_vector_idx[i]]);
+static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
+    -1,-1,-1,-1,-1,-1,-1,5};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
+
+static const size_t  kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
+
+int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_int *input_ptr, *output_ptr, *p;
+    int err;
+    cl_uint i;
+    int hasDouble = is_extension_available( device, "cl_khr_fp64" );
+    cl_uint vectorSize, operatorToUse;
+    cl_uint type;
+    MTdata d;
+
+    int expressionMode;
+    int numExpressionModes = 2;
+
+    size_t length = sizeof(cl_int) * 4 * n_elems;
+
+    input_ptr   = (cl_int*)malloc(length);
+    output_ptr  = (cl_int*)malloc(length);
+
+    p = input_ptr;
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<4 * (cl_uint) n_elems; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[2];
+
+        // skip double if unavailable
+        if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
+            continue;
+
+        if( !gHasLong &&
+            (( 0 == strcmp( test_str_names[type], "long" )) ||
+            ( 0 == strcmp( test_str_names[type], "ulong" ))))
+            continue;
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+        for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
+        {
+            log_info( " %s", operatorToUse_names[ operatorToUse ] );
+            fflush( stdout );
+            for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
+                for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
+
+                    cl_program program = NULL;
+                    cl_kernel kernel = NULL;
+                    cl_uint outVectorSize = out_vector_idx[vectorSize];
+                    char expression[1024];
+
+                    const char *source[] = {
+                        "", // optional pragma string
+                        "__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
+                        "(__global ", test_str_names[type], vector_size_names[vectorSize],
+                        " *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
+                        " *dst)\n"
+                        "{\n"
+                        "    int  tid = get_global_id(0);\n"
+                        "\n"
+                        "    ", test_str_names[type],
+                        vector_size_names[out_vector_idx[vectorSize]],
+                        " tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
+                        "    dst[tid] = tmp;\n"
+                        "}\n"
+                    };
+
+                    if(expressionMode == 0) {
+                        sprintf(expression, "srcA[tid]");
+                    } else if(expressionMode == 1) {
+                        switch(vector_sizes[vectorSize]) {
+                            case 16:
+                                sprintf(expression,
+                                        "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 8:
+                                sprintf(expression,
+                                        "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 4:
+                                sprintf(expression,
+                                        "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 3:
+                                sprintf(expression,
+                                        "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 2:
+                                sprintf(expression,
+                                        "((%s2)(srcA[tid].s0, srcA[tid].s1))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            default :
+                                sprintf(expression, "srcA[tid]");
+                                log_info("Default\n");
+                        }
+                    } else {
+                        sprintf(expression, "srcA[tid]");
+                    }
+
+                    if (0 == strcmp( test_str_names[type], "double" ))
+                        source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+                    char kernelName[128];
+                    snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
+                    err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+                    if (err)
+                        return -1;
+
+                    err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+                    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clSetKernelArgs failed\n");
+                        return -1;
+                    }
+
+                    //Wipe the output buffer clean
+                    uint32_t pattern = 0xdeadbeef;
+                    memset_pattern4( output_ptr, &pattern, length );
+                    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueWriteBuffer failed\n");
+                        return -1;
+                    }
+
+                    size_t size = elementCount / (vector_aligns[vectorSize]);
+                    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueNDRangeKernel failed\n");
+                        return -1;
+                    }
+
+                    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueReadBuffer failed\n");
+                        return -1;
+                    }
+
+                    char *inP = (char *)input_ptr;
+                    char *outP = (char *)output_ptr;
+                    outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
+                                            ( vector_sizes[ out_vector_idx[vectorSize] ] ) );
+                    // was                outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
+                    for( size_t e = 0; e < size; e++ )
+                    {
+                        if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
+
+                            log_info("e is %d\n", (int)e);
+                            fflush(stdout);
+                            // break;
+                            return -1;
+                        }
+                        inP += kSizes[type] * ( vector_aligns[vectorSize] );
+                        outP += kSizes[type] * ( vector_aligns[outVectorSize] );
+                    }
+
+                    clReleaseKernel( kernel );
+                    clReleaseProgram( program );
+                    log_info( "." );
+                    fflush( stdout );
+                }
+            }
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        log_info( "done\n" );
+    }
+
+    log_info("HiLoEO test passed\n");
+
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
+{
+    cl_ulong  array[8];
+    void *p = array;
+    size_t halfVectorSize  = vector_sizes[out_vector_idx[vectorSize]];
+    size_t cmpVectorSize =  vector_sizes[out_vector_idx[vectorSize]];
+    // was 1 << (vectorSize-1);
+    OffsetFunc f = offsetFuncs[ operatorToUse ];
+    size_t elementSize =  kSizes[type];
+
+    if(vector_size_names[vectorSize][0] == '3') {
+        if(operatorToUse_names[operatorToUse][0] == 'h' ||
+           operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
+        {
+            cmpVectorSize = 1; // special case for vec3 ignored values
+        }
+    }
+
+    switch( elementSize )
+    {
+        case 1:
+        {
+            char *i = (char*)in;
+            char *o = (char*)out;
+            size_t j;
+            cl_uint k;
+            OffsetFunc f = offsetFuncs[ operatorToUse ];
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                char *o2 = (char*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", %d", i[j] );
+                    log_info( " } --> { %d", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", %d", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 2:
+        {
+            short *i = (short*)in;
+            short *o = (short*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                short *o2 = (short*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", %d", i[j] );
+                    log_info( " } --> { %d", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", %d", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 4:
+        {
+            int *i = (int*)in;
+            int *o = (int*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                int *o2 = (int *)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                for( j = 0; j < cmpVectorSize; j++ )
+        {
+            /* Allow float nans to be binary different */
+            if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
+            {
+                log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+            for( j = 1; j < halfVectorSize * 2; j++ )
+                log_info( ", 0x%8.8x", i[j] );
+            log_info( " } --> { 0x%8.8x", o[0] );
+            for( j = 1; j < halfVectorSize; j++ )
+                log_info( ", 0x%8.8x", o[j] );
+            log_info( " }\n" );
+            return -1;
+            }
+        }
+        i += 2 * halfVectorSize;
+        o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 8:
+        {
+            cl_ulong *i = (cl_ulong*)in;
+            cl_ulong *o = (cl_ulong*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                cl_ulong *o2 = (cl_ulong*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", 0x%16.16llx", i[j] );
+                    log_info( " } --> { 0x%16.16llx", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", 0x%16.16llx", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        default:
+            log_info( "Internal error. Unknown data type\n" );
+            return -2;
+    }
+
+    return 0;
+}
+
+
+
diff --git a/test_conformance/basic/test_hostptr.c b/test_conformance/basic/test_hostptr.c
new file mode 100644
index 00000000..4990dfd2
--- /dev/null
+++ b/test_conformance/basic/test_hostptr.c
@@ -0,0 +1,277 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *hostptr_kernel_code =
+"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+static const float    MAX_ERR = 1e-5f;
+
+static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
+{
+    cl_float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static void make_random_data(unsigned count, float *ptr, MTdata d)
+{
+    cl_uint     i;
+    for (i=0; i<count; i++)
+        ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
+}
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static unsigned char *
+randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
+{
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+int
+test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_float            *input_ptr[2], *output_ptr;
+    cl_program            program;
+    cl_kernel           kernel;
+    size_t              threads[3]={0,0,0};
+    cl_image_format     img_format;
+    cl_uchar            *rgba8_inptr, *rgba8_outptr;
+    void                *lock_buffer;
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_int              err;
+    MTdata              d;
+    RoundingMode        oldRoundMode;
+    int                    isRTZ = 0;
+
+    // Block to mark deletion of streams before deletion of host_ptr
+    {
+        clMemWrapper        streams[7];
+
+        PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+        // Alloc buffers
+        input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+
+        d = init_genrand( gRandomSeed );
+        rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
+        rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
+
+        // Random data
+        make_random_data(num_elements, input_ptr[0], d);
+        make_random_data(num_elements, input_ptr[1], d);
+
+        // Create host-side input
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
+        test_error(err, "clCreateBuffer 0 failed");
+
+        // Create a copied input
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
+        test_error(err, "clCreateBuffer 1 failed");
+
+        // Create a host-side output
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
+        test_error(err, "clCreateBuffer 2 failed");
+
+        // Create a host-side input
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
+        test_error(err, "create_image_2d 3 failed");
+
+        // Create a copied input
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
+        test_error(err, "create_image_2d 4 failed");
+
+        // Create a host-side output
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
+        test_error(err, "create_image_2d 5 failed");
+
+        // Create a copied output
+        img_format.image_channel_data_type = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
+        test_error(err, "create_image_2d 6 failed");
+
+        err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
+        test_error(err, "create_single_kernel_helper failed");
+
+        // Execute kernel
+        err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+        err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+        test_error(err, "clSetKernelArg failed");
+
+        threads[0] = (size_t)num_elements;
+        err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error(err, "clEnqueueNDRangeKernel failed");
+
+        cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
+        test_error( err, "clEnqueueMapBuffer failed" );
+
+        //If we only support rtz mode
+        if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
+        {
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+            isRTZ = 1;
+        }
+
+        if (isRTZ)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+
+        // Verify that we got the expected results back on the host side
+        err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
+        if (err)
+        {
+            log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
+                      "and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
+        } else {
+            log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
+                     "and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
+        }
+
+        if (isRTZ)
+            set_round(oldRoundMode, kfloat);
+
+        err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
+        test_error( err, "clEnqueueUnmapMemObject failed" );
+
+        size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
+        randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
+        free_mtdata(d); d = NULL;
+
+        // Copy from host-side to host-side
+        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
+        err = clEnqueueCopyImage(queue, streams[3], streams[5],
+                                 origin, origin, region,  0, NULL, NULL);
+        test_error(err, "clEnqueueCopyImage failed");
+        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
+
+        // test the lock buffer interface
+        log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
+        size_t row_pitch;
+        lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
+                                        0, origin, region,
+                                        &row_pitch, NULL,
+                                        0, NULL, NULL, &err);
+        test_error(err, "clEnqueueMapImage failed");
+
+        err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
+        if (err != CL_SUCCESS)
+        {
+            log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
+            return -1;
+        }
+        log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
+
+        err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+
+        // Copy host-side to device-side and read back
+        log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
+        err = clEnqueueCopyImage(queue, streams[3], streams[5],
+                                 origin, origin, region,
+                                 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyImage failed");
+
+        err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
+        test_error(err, "clEnqueueReadImage failed");
+
+        err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
+        if (err != CL_SUCCESS)
+        {
+            log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
+            return -1;
+        }
+        log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
+    }
+    // cleanup
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    free(rgba8_inptr);
+    free(rgba8_outptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_if.c b/test_conformance/basic/test_if.c
new file mode 100644
index 00000000..0e40cf07
--- /dev/null
+++ b/test_conformance/basic/test_if.c
@@ -0,0 +1,166 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *conditional_kernel_code =
+"__kernel void test_if(__global int *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    if (src[tid] == 0)\n"
+"        dst[tid] = 0x12345678;\n"
+"    else if (src[tid] == 1)\n"
+"        dst[tid] = 0x23456781;\n"
+"    else if (src[tid] == 2)\n"
+"        dst[tid] = 0x34567812;\n"
+"    else if (src[tid] == 3)\n"
+"        dst[tid] = 0x45678123;\n"
+"    else if (src[tid] == 4)\n"
+"        dst[tid] = 0x56781234;\n"
+"    else if (src[tid] == 5)\n"
+"        dst[tid] = 0x67812345;\n"
+"    else if (src[tid] == 6)\n"
+"        dst[tid] = 0x78123456;\n"
+"    else if (src[tid] == 7)\n"
+"        dst[tid] = 0x81234567;\n"
+"    else\n"
+"        dst[tid] = 0x7FFFFFFF;\n"
+"\n"
+"}\n";
+
+const int results[] = {
+    0x12345678,
+    0x23456781,
+    0x34567812,
+    0x45678123,
+    0x56781234,
+    0x67812345,
+    0x78123456,
+    0x81234567,
+};
+
+int
+verify_if(int *inptr, int *outptr, int n)
+{
+    int     r, i;
+
+    for (i=0; i<n; i++)
+    {
+        if (inptr[i] <= 7)
+            r = results[inptr[i]];
+        else
+            r = 0x7FFFFFFF;
+
+        if (r != outptr[i])
+        {
+            log_error("IF test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("IF test passed\n");
+    return 0;
+}
+
+int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_int *input_ptr, *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * num_elements;
+    input_ptr  = (cl_int*)malloc(length);
+    output_ptr = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)get_random_float(0, 32, d);
+
+    free_mtdata(d); d = NULL;
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
+  if (err)
+    return -1;
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)num_elements;
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_if(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_image_multipass.c b/test_conformance/basic/test_image_multipass.c
new file mode 100644
index 00000000..fa828767
--- /dev/null
+++ b/test_conformance/basic/test_image_multipass.c
@@ -0,0 +1,654 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *image_to_image_kernel_integer_coord_code =
+"\n"
+"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+static const char *image_to_image_kernel_float_coord_code =
+"\n"
+"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static const char *image_sum_kernel_integer_coord_code =
+"\n"
+"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color0;\n"
+"    float4 color1;\n"
+"\n"
+"    color0 = read_imagef(srcimg0, sampler, (int2)(tid_x, tid_y));\n"
+"    color1 = read_imagef(srcimg1, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color0 + color1);\n"
+"\n"
+"}\n";
+
+
+static const char *image_sum_kernel_float_coord_code =
+"\n"
+"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color0;\n"
+"    float4 color1;\n"
+"\n"
+"    color0 = read_imagef(srcimg0, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    color1 = read_imagef(srcimg1, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    write_imagef(dstimg,(int2)(tid_x, tid_y), color0 + color1);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_initial_byte_image(int w, int h, int num_elements, unsigned char value)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+        ptr[i] = value;
+
+    return ptr;
+}
+
+static unsigned char *
+generate_expected_byte_image(unsigned char **input_data, int num_inputs, int w, int h, int num_elements)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+    {
+        int j;
+        ptr[i] = 0;
+        for (j = 0; j < num_inputs; j++)
+        {
+            unsigned char *input = *(input_data + j);
+            ptr[i] += input[i];
+        }
+    }
+
+    return ptr;
+}
+
+
+static unsigned char *
+generate_byte_image(int w, int h, int num_elements, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+        ptr[i] = (unsigned char)genrand_int32(d) & 31;
+
+    return ptr;
+}
+
+static int
+verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int num_elements)
+{
+    int     i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int
+test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_image_format     img_format;
+
+    int                 num_input_streams = 8;
+    cl_mem              *input_streams;
+    cl_mem                accum_streams[2];
+    unsigned char       *expected_output;
+    unsigned char       *output_ptr;
+    cl_kernel           kernel[2];
+    int                 err;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+
+    expected_output = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+    // Create the accum images with initial data.
+    {
+        unsigned char          *initial_data;
+        cl_mem_flags        flags;
+
+        initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+        err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        free(initial_data);
+    }
+
+    // Set up the input data.
+    {
+        cl_mem_flags        flags;
+        unsigned char       **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
+        MTdata              d;
+
+        input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        int i;
+        d = init_genrand( gRandomSeed );
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            input_data[i] = generate_byte_image(img_width, img_height, 4, d);
+            input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!input_streams[i])
+            {
+                log_error("create_image_2d failed\n");
+                free_mtdata(d);
+                free(expected_output);
+                free(output_ptr);
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
+                                      origin, region, 0, 0,
+                                      input_data[i], 0, NULL, NULL);
+            if (err)
+            {
+                log_error("clWriteImage failed: %d\n", err);
+                free_mtdata(d);
+                free(expected_output);
+                free(output_ptr);
+                free(input_streams);
+                return -1;
+            }
+
+
+        }
+        free_mtdata(d); d = NULL;
+        expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            free(input_data[i]);
+        }
+        free( input_data );
+    }
+
+    // Set up the kernels.
+    {
+        cl_program          program[4];
+
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_integer_coord_code, "image_to_image_copy");
+        if (err)
+        {
+            log_error("Failed to create kernel 0: %d\n", err);
+            return -1;
+        }
+        err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_integer_coord_code, "image_sum");
+        if (err)
+        {
+            log_error("Failed to create kernel 1: %d\n", err);
+            return -1;
+        }
+        clReleaseProgram(program[0]);
+        clReleaseProgram(program[1]);
+    }
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+    test_error(err, "clCreateSamplerWithProperties failed");
+
+    {
+        size_t        threads[3] = {0, 0, 0};
+        threads[0] = (size_t)img_width;
+        threads[1] = (size_t)img_height;
+        int i;
+
+        {
+            cl_mem accum_input;
+            cl_mem accum_output;
+
+            err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
+            err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            for (i = 1; i < num_input_streams; i++)
+            {
+                accum_input = accum_streams[(i-1)%2];
+                accum_output = accum_streams[i%2];
+
+                err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
+                err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
+                err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
+                err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
+
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+                err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+            }
+
+            // Copy the last accum into the other one.
+            accum_input = accum_streams[(i-1)%2];
+            accum_output = accum_streams[i%2];
+            err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
+                                     origin, region, 0, 0,
+                                     (void *)output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                return -1;
+            }
+            err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
+            if (err)
+            {
+                log_error("IMAGE_MULTIPASS test failed.\n");
+            }
+            else
+            {
+                log_info("IMAGE_MULTIPASS test passed\n");
+            }
+        }
+
+        clReleaseSampler(sampler);
+    }
+
+
+    // cleanup
+    clReleaseMemObject(accum_streams[0]);
+    clReleaseMemObject(accum_streams[1]);
+    {
+        int i;
+        for (i = 0; i < num_input_streams; i++)
+        {
+            clReleaseMemObject(input_streams[i]);
+        }
+    }
+    free(input_streams);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    free(expected_output);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_image_format     img_format;
+
+    int                 num_input_streams = 8;
+    cl_mem              *input_streams;
+    cl_mem                accum_streams[2];
+    unsigned char       *expected_output;
+    unsigned char       *output_ptr;
+    cl_kernel           kernel[2];
+    int                 err;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+    // Create the accum images with initial data.
+    {
+        unsigned char          *initial_data;
+        cl_mem_flags        flags;
+
+        initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            return -1;
+        }
+
+        size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            return -1;
+        }
+
+        accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            return -1;
+        }
+        err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            return -1;
+        }
+
+        free(initial_data);
+    }
+
+    // Set up the input data.
+    {
+        cl_mem_flags        flags;
+        unsigned char       **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
+        MTdata              d;
+
+        input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        int i;
+        d = init_genrand( gRandomSeed );
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            input_data[i] = generate_byte_image(img_width, img_height, 4, d);
+            input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!input_streams[i])
+            {
+                log_error("create_image_2d failed\n");
+                free(input_data);
+                free(input_streams);
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
+                                      origin, region, 0, 0,
+                                      input_data[i], 0, NULL, NULL);
+            if (err)
+            {
+                log_error("clWriteImage failed: %d\n", err);
+                free(input_data);
+                free(input_streams);
+                return -1;
+            }
+        }
+        free_mtdata(d); d = NULL;
+        expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            free(input_data[i]);
+        }
+        free(input_data);
+    }
+
+    // Set up the kernels.
+    {
+        cl_program          program[2];
+
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_float_coord_code, "image_to_image_copy");
+        if (err)
+        {
+            log_error("Failed to create kernel 2: %d\n", err);
+            return -1;
+        }
+        err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_float_coord_code, "image_sum");
+        if (err)
+        {
+            log_error("Failed to create kernel 3: %d\n", err);
+            return -1;
+        }
+
+        clReleaseProgram(program[0]);
+        clReleaseProgram(program[1]);
+    }
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+    test_error(err, "clCreateSamplerWithProperties failed");
+
+    {
+        size_t        threads[3] = {0, 0, 0};
+        threads[0] = (size_t)img_width;
+        threads[1] = (size_t)img_height;
+        int i;
+
+        {
+            cl_mem accum_input;
+            cl_mem accum_output;
+
+            err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
+            err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            for (i = 1; i < num_input_streams; i++)
+            {
+                accum_input = accum_streams[(i-1)%2];
+                accum_output = accum_streams[i%2];
+
+                err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
+                err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
+                err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
+                err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
+
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+                err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+            }
+
+            // Copy the last accum into the other one.
+            accum_input = accum_streams[(i-1)%2];
+            accum_output = accum_streams[i%2];
+            err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
+                                     origin, region, 0, 0,
+                                     (void *)output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                return -1;
+            }
+            err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
+            if (err)
+            {
+                log_error("IMAGE_MULTIPASS test failed.\n");
+            }
+            else
+            {
+                log_info("IMAGE_MULTIPASS test passed\n");
+            }
+        }
+
+    }
+
+
+    // cleanup
+    clReleaseSampler(sampler);
+    clReleaseMemObject(accum_streams[0]);
+    clReleaseMemObject(accum_streams[1]);
+    {
+        int i;
+        for (i = 0; i < num_input_streams; i++)
+        {
+            clReleaseMemObject(input_streams[i]);
+        }
+    }
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    free(expected_output);
+    free(output_ptr);
+    free(input_streams);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_image_param.c b/test_conformance/basic/test_image_param.c
new file mode 100644
index 00000000..e9e99fea
--- /dev/null
+++ b/test_conformance/basic/test_image_param.c
@@ -0,0 +1,290 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include "../../test_common/harness/conversions.h"
+
+
+static const char *param_kernel[] = {
+"__kernel void test_fn(read_only image2d_t srcimg, sampler_t sampler, __global float4 *results )\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    results[ tid_y * get_image_width( srcimg ) + tid_x ] = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"\n"
+"}\n" };
+
+int validate_results( size_t width, size_t height, cl_image_format &format, char *inputData, cl_float *actualResults )
+{
+    for( size_t i = 0; i < width * height; i++ )
+    {
+        cl_float expected[ 4 ], tolerance;
+
+        switch( format.image_channel_data_type )
+        {
+            case CL_UNORM_INT8:
+            {
+                cl_uchar *p = (cl_uchar *)inputData;
+                expected[ 0 ] = p[ 0 ] / 255.f;
+                expected[ 1 ] = p[ 1 ] / 255.f;
+                expected[ 2 ] = p[ 2 ] / 255.f;
+                expected[ 3 ] = p[ 3 ] / 255.f;
+                tolerance = 1.f / 255.f;
+                break;
+            }
+            case CL_SNORM_INT8:
+            {
+                cl_char *p = (cl_char *)inputData;
+                expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
+                expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
+                expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
+                expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_UNSIGNED_INT8:
+            {
+                cl_uchar *p = (cl_uchar *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_SIGNED_INT8:
+            {
+                cl_short *p = (cl_short *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_UNORM_INT16:
+            {
+                cl_ushort *p = (cl_ushort *)inputData;
+                expected[ 0 ] = p[ 0 ] / 65535.f;
+                expected[ 1 ] = p[ 1 ] / 65535.f;
+                expected[ 2 ] = p[ 2 ] / 65535.f;
+                expected[ 3 ] = p[ 3 ] / 65535.f;
+                tolerance = 1.f / 65535.f;
+                break;
+            }
+            case CL_UNSIGNED_INT32:
+            {
+                cl_uint *p = (cl_uint *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 0.0001f;
+                break;
+            }
+            case CL_FLOAT:
+            {
+                cl_float *p = (cl_float *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 0.0001f;
+                break;
+            }
+            default:
+                // Should never get here
+                break;
+        }
+
+        if( format.image_channel_order == CL_BGRA )
+        {
+            cl_float tmp = expected[ 0 ];
+            expected[ 0 ] = expected[ 2 ];
+            expected[ 2 ] = tmp;
+        }
+
+        // Within an error tolerance, make sure the results match
+        cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
+        cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
+        cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
+        cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
+
+        if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
+        {
+            log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
+                            GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
+            log_error( "    Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
+            log_error( "      Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
+
+            // Check real quick a special case error here
+            cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
+            cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
+            cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
+            cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
+            if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
+            {
+                log_error( "\t(Kernel did not respect change in channel order)\n" );
+            }
+            return -1;
+        }
+
+        // Increment and go
+        actualResults += 4;
+        inputData += get_format_type_size( &format ) * 4;
+    }
+
+    return 0;
+}
+
+int test_image_param(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t              sizes[] = { 64, 100, 128, 250, 512 };
+    cl_image_format      formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
+    cl_image_format  *supported_formats;
+    ExplicitType      types[] =  { kUChar, kUShort, kFloat, kUChar };
+    int               error;
+    size_t            i, j, idx;
+    size_t            threads[ 2 ];
+    MTdata            d;
+    int supportsBGRA = 0;
+    cl_uint numSupportedFormats = 0;
+
+    const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
+    const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
+    const size_t numAttempts = numSizes * numFormats;
+
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ numAttempts ][ 2 ];
+    BufferOwningPtr<char> inputs[ numAttempts ];
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+       if(gIsEmbedded)
+    {
+        /* Get the supported image formats to see if BGRA is supported */
+        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numSupportedFormats);
+        supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numSupportedFormats);
+        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
+
+        for(i = 0; i < numSupportedFormats; i++)
+        {
+            if(supported_formats[i].image_channel_order == CL_BGRA)
+            {
+                supportsBGRA = 1;
+                break;
+            }
+        }
+    }
+    else
+    {
+        supportsBGRA = 1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            if(formats[j].image_channel_order == CL_BGRA && !supportsBGRA)
+                continue;
+
+            // For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
+            // that we can read values from. The output buffer will remain consistent to ensure that any changes we
+            // witness are due to the image changes
+            inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
+
+            streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
+            {
+                char err_str[256];
+                sprintf(err_str, "Unable to create input image for format %s order %s" ,
+                                  GetChannelOrderName( formats[j].image_channel_order ),
+                                  GetChannelTypeName( formats[j].image_channel_data_type ));
+                test_error( error, err_str);
+            }
+
+            streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
+            test_error( error, "Unable to create output buffer" );
+        }
+    }
+    free_mtdata(d); d = NULL;
+
+    // Create a single kernel to use for all the tests
+    error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Also create a sampler to use for all the runs
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    clSamplerWrapper sampler = clCreateSamplerWithProperties(context, properties, &error);
+    test_error(error, "clCreateSamplerWithProperties failed");
+
+    // Set up the arguments for each and queue
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            if(formats[j].image_channel_order == CL_BGRA && !supportsBGRA)
+                continue;
+
+            error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
+            error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
+            error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
+            test_error( error, "Unable to set kernel arguments" );
+
+            threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "clEnqueueNDRangeKernel failed" );
+        }
+    }
+
+    // Now go through each combo and validate the results
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            if(formats[j].image_channel_order == CL_BGRA && !supportsBGRA)
+                continue;
+
+            BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
+
+            error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
+            test_error( error, "Unable to read results" );
+
+            error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
+            if( error )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/basic/test_image_r8.c b/test_conformance/basic/test_image_r8.c
new file mode 100644
index 00000000..02bb8483
--- /dev/null
+++ b/test_conformance/basic/test_image_r8.c
@@ -0,0 +1,183 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *r_uint8_kernel_code =
+"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    uint4    color;\n"
+"\n"
+"    color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"    dst[indx] = (unsigned char)(color.x);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8bit_image(int w, int h, MTdata d)
+{
+    unsigned char    *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char));
+    int             i;
+
+    for (i=0; i<w*h; i++)
+      ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_8bit_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n");
+    return 0;
+}
+
+int
+test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    cl_uchar    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[3];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_R;
+    img_format.image_channel_data_type = CL_UNSIGNED_INT8;
+
+    // early out if this image type is not supported
+    if( ! is_image_format_supported( context, (cl_mem_flags)(CL_MEM_READ_ONLY), CL_MEM_OBJECT_IMAGE2D, &img_format ) ) {
+        log_info("WARNING: Image type not supported; skipping test.\n");
+        return 0;
+    }
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_8bit_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1};
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
+                            origin, region, 0, 0,
+                            input_ptr,
+                            0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteImage failed: %d\n", err);
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" );
+    if (err) {
+    log_error("Failed to create kernel and program: %d\n", err);
+    return -1;
+  }
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+  test_error(err, "clCreateSamplerWithProperties failed");
+
+  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed: %d\n", err);
+        return -1;
+    }
+
+    threads[0] = (size_t)img_width;
+    threads[1] = (size_t)img_height;
+    err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height);
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseSampler(sampler);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_imagearraycopy.c b/test_conformance/basic/test_imagearraycopy.c
new file mode 100644
index 00000000..a361b6a4
--- /dev/null
+++ b/test_conformance/basic/test_imagearraycopy.c
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_imagearraycopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *imgptr, *bufptr;
+  clMemWrapper      image, buffer;
+  int        img_width = 512;
+  int        img_height = 512;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
+  test_error(err, "create_image_2d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  d = init_genrand( gRandomSeed );
+  imgptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     imgptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
+  err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, &copyevent, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  err = clReleaseEvent(copyevent);
+  test_error(err, "clReleaseEvent failed");
+
+  if (memcmp(imgptr, bufptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)imgptr;
+    unsigned char * outchar = (unsigned char*)bufptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(imgptr);
+  free(bufptr);
+
+  if (err)
+    log_error("IMAGE to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_imagearraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_imagearraycopy_single_format(device, context, queue, &formats[i]);
+  }
+
+  free(formats);
+  if (err)
+    log_error("IMAGE to ARRAY copy test failed\n");
+  else
+    log_info("IMAGE to ARRAY copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/basic/test_imagearraycopy3d.c b/test_conformance/basic/test_imagearraycopy3d.c
new file mode 100644
index 00000000..9bb595a1
--- /dev/null
+++ b/test_conformance/basic/test_imagearraycopy3d.c
@@ -0,0 +1,143 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_imagearraycopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *imgptr, *bufptr;
+  clMemWrapper      image, buffer;
+  int        img_width = 128;
+  int        img_height = 128;
+  int        img_depth = 32;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  d = init_genrand( gRandomSeed );
+  imgptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     imgptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
+  err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, NULL );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(imgptr, bufptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)imgptr;
+    unsigned char * outchar = (unsigned char*)bufptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(imgptr);
+  free(bufptr);
+
+  if (err)
+    log_error("IMAGE3D to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_imagearraycopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_imagearraycopy3d_single_format(device, context, queue, &formats[i]);
+  }
+
+  free(formats);
+  if (err)
+    log_error("IMAGE3D to ARRAY copy test failed\n");
+  else
+    log_info("IMAGE3D to ARRAY copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/basic/test_imagecopy.c b/test_conformance/basic/test_imagecopy.c
new file mode 100644
index 00000000..258fe033
--- /dev/null
+++ b/test_conformance/basic/test_imagecopy.c
@@ -0,0 +1,235 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short    *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper            streams[6];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                i, err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p, *outp;
+        int        x, y, delta_w = img_width/8, delta_h = img_height/16;
+
+        switch (i)
+        {
+            case 0:
+                p = (void *)rgba8_inptr;
+                outp = (void *)rgba8_outptr;
+        log_info("Testing CL_RGBA CL_UNORM_INT8\n");
+                break;
+            case 1:
+                p = (void *)rgba16_inptr;
+                outp = (void *)rgba16_outptr;
+        log_info("Testing CL_RGBA CL_UNORM_INT16\n");
+                break;
+            case 2:
+                p = (void *)rgbafp_inptr;
+                outp = (void *)rgbafp_outptr;
+        log_info("Testing CL_RGBA CL_FLOAT\n");
+                break;
+        }
+
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
+        test_error(err, "create_image_2d failed");
+
+        int copy_number = 0;
+        for (y=0; y<img_height; y+=delta_h)
+        {
+            for (x=0; x<img_width; x+=delta_w)
+            {
+        copy_number++;
+        size_t copy_origin[3] = {x,y,0}, copy_region[3]={delta_w, delta_h, 1};
+        err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1],
+                                 copy_origin, copy_origin, copy_region,
+                                 0, NULL, NULL);
+        if (err) {
+          log_error("Copy %d (origin [%d, %d], size [%d, %d], image size [%d x %d]) Failed\n", copy_number, x, y, delta_w, delta_h, img_width, img_height);
+        }
+        test_error(err, "clEnqueueCopyImage failed");
+            }
+        }
+
+        err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
+        test_error(err, "clEnqueueReadImage failed");
+
+        switch (i)
+        {
+            case 0:
+                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
+                break;
+            case 1:
+                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
+                break;
+            case 2:
+                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+  free(rgba8_inptr);
+  free(rgba16_inptr);
+  free(rgbafp_inptr);
+  free(rgba8_outptr);
+  free(rgba16_outptr);
+  free(rgbafp_outptr);
+
+    if (err)
+        log_error("IMAGE copy test failed\n");
+    else
+        log_info("IMAGE copy test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_imagecopy3d.c b/test_conformance/basic/test_imagecopy3d.c
new file mode 100644
index 00000000..67af31f2
--- /dev/null
+++ b/test_conformance/basic/test_imagecopy3d.c
@@ -0,0 +1,238 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_uint8_image(unsigned num_elements, MTdata d)
+{
+    unsigned char *ptr = (unsigned char*)malloc(num_elements);
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_uint8_image(unsigned char *image, unsigned char *outptr, unsigned num_elements)
+{
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static unsigned short *
+generate_uint16_image(unsigned num_elements, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_uint16_image(unsigned short *image, unsigned short *outptr, unsigned num_elements)
+{
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static float *
+generate_float_image(unsigned num_elements, MTdata d)
+{
+    float   *ptr = (float*)malloc(num_elements * sizeof(float));
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_float_image(float *image, float *outptr, unsigned num_elements)
+{
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements_ignored)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short *rgba16_inptr, *rgba16_outptr;
+    float *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper streams[6];
+    int img_width = 128;
+    int img_height = 128;
+    int img_depth = 64;
+    int i;
+    cl_int        err;
+    unsigned    num_elements = img_width * img_height * img_depth * 4;
+    MTdata      d;
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
+    rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
+    rgbafp_inptr = (float *)generate_float_image(num_elements, d);
+    free_mtdata(d); d = NULL;
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+    streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+    streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p, *outp;
+        int        x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
+
+        switch (i)
+        {
+            case 0:
+                p = (void *)rgba8_inptr;
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                p = (void *)rgba16_inptr;
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                p = (void *)rgbafp_inptr;
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        size_t origin[3]={0,0,0}, region[3]={img_width, img_height, img_depth};
+        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteImage failed");
+
+        for (z=0; z<img_depth; z+=delta_d)
+        {
+            for (y=0; y<img_height; y+=delta_h)
+            {
+                for (x=0; x<img_width; x+=delta_w)
+                {
+                  origin[0] = x; origin[1] = y; origin[2] = z;
+                  region[0] = delta_w; region[1] = delta_h; region[2] = delta_d;
+
+                  err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
+                  test_error(err, "clEnqueueCopyImage failed");
+                }
+            }
+        }
+
+        origin[0] = 0; origin[1] = 0; origin[2] = 0;
+        region[0] = img_width; region[1] = img_height; region[2] = img_depth;
+        err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
+        test_error(err, "clEnqueueReadImage failed");
+
+        switch (i)
+        {
+            case 0:
+                err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
+        if (err) log_error("Failed uint8\n");
+                break;
+            case 1:
+                err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
+        if (err) log_error("Failed uint16\n");
+                break;
+            case 2:
+                err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
+        if (err) log_error("Failed float\n");
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+  free(rgba8_inptr);
+  free(rgba16_inptr);
+  free(rgbafp_inptr);
+  free(rgba8_outptr);
+  free(rgba16_outptr);
+  free(rgbafp_outptr);
+
+    if (err)
+        log_error("IMAGE3D copy test failed\n");
+    else
+        log_info("IMAGE3D copy test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_imagedim.c b/test_conformance/basic/test_imagedim.c
new file mode 100644
index 00000000..aa8602db
--- /dev/null
+++ b/test_conformance/basic/test_imagedim.c
@@ -0,0 +1,524 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *image_dim_kernel_code =
+"\n"
+"__kernel void test_image_dim(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"     write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2];
+     cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
+    int total_errors = 0;
+    MTdata  d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
+    if (err)
+    {
+        log_error("create_program_and_kernel_with_sources failed\n");
+        return -1;
+    }
+
+    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
+        return -1;
+    }
+    log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
+           max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
+
+    if (max_mem_size > (cl_ulong)SIZE_MAX) {
+        max_mem_size = (cl_ulong)SIZE_MAX;
+    }
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+    test_error(err, "clCreateSamplerWithProperties failed");
+
+    max_img_width = (int)max_image2d_width;
+    max_img_height = (int)max_image2d_height;
+
+    // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
+  //  and we want to consume 1/4 of global memory (this is the minimum required to be
+  //  supported by the spec)
+    max_mem_size /= 4; // use 1/4
+    max_mem_size /= 4; // 4 bytes per pixel
+    max_img_dim = (int)sqrt((double)max_mem_size);
+    // convert to a power of 2
+    {
+        unsigned int    n = (unsigned int)max_img_dim;
+        unsigned int    m = 0x80000000;
+
+        // round-down to the nearest power of 2
+        while (m > n)
+            m >>= 1;
+
+        max_img_dim = (int)m;
+    }
+
+    if (max_img_width > max_img_dim)
+        max_img_width = max_img_dim;
+    if (max_img_height > max_img_dim)
+        max_img_height = max_img_dim;
+
+    log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
+                max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
+
+  d = init_genrand( gRandomSeed );
+  input_ptr = generate_8888_image(max_img_width, max_img_height, d);
+  output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
+
+    // test power of 2 width, height starting at 1 to 4K
+    for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
+    {
+        img_height = (1 << i2);
+        for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
+        {
+            img_width = (1 << j2);
+
+            img_format.image_channel_order = CL_RGBA;
+            img_format.image_channel_data_type = CL_UNORM_INT8;
+            streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!streams[0])
+            {
+                log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+            img_format.image_channel_order = CL_RGBA;
+            img_format.image_channel_data_type = CL_UNORM_INT8;
+            streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!streams[1])
+            {
+                log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
+                clReleaseMemObject(streams[0]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+
+            size_t origin[3] = {0,0,0};
+            size_t region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clWriteImage failed\n");
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+
+            err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+            err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+
+            threads[0] = (size_t)img_width;
+            threads[1] = (size_t)img_height;
+            log_info("Testing image dimensions %d x %d with local threads NULL.\n", img_width, img_height);
+            err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                log_error("Image Dimension test failed.  image width = %d, image height = %d, local NULL\n",
+                            img_width, img_height);
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+            err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                log_error("Image Dimension test failed.  image width = %d, image height = %d, local NULL\n",
+                            img_width, img_height);
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+            err = verify_8888_image(input_ptr, output_ptr, img_width, img_height);
+            if (err)
+            {
+                total_errors++;
+                log_error("Image Dimension test failed.  image width = %d, image height = %d\n", img_width, img_height);
+            }
+
+            clReleaseMemObject(streams[0]);
+            clReleaseMemObject(streams[1]);
+        }
+    }
+
+    // cleanup
+    free(input_ptr);
+    free(output_ptr);
+    free_mtdata(d);
+    clReleaseSampler(sampler);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return total_errors;
+}
+
+
+
+int
+test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2], local_threads[2];
+    cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
+    int total_errors = 0;
+    size_t max_local_workgroup_size[3];
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
+    if (err)
+    {
+        log_error("create_program_and_kernel_with_sources failed\n");
+        return -1;
+    }
+
+    size_t work_group_size = 0;
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
+    test_error(err, "clGetKerenlWorkgroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
+        return -1;
+    }
+    log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
+           max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
+
+     cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+    test_error(err, "clCreateSamplerWithProperties failed");
+
+    max_img_width = (int)max_image2d_width;
+    max_img_height = (int)max_image2d_height;
+
+  if (max_mem_size > (cl_ulong)SIZE_MAX) {
+    max_mem_size = (cl_ulong)SIZE_MAX;
+  }
+
+    // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
+    //  and we want to consume 1/4 of global memory (this is the minimum required to be
+    //  supported by the spec)
+    max_mem_size /= 4; // use 1/4
+    max_mem_size /= 4; // 4 bytes per pixel
+    max_img_dim = (int)sqrt((double)max_mem_size);
+    // convert to a power of 2
+    {
+        unsigned int    n = (unsigned int)max_img_dim;
+        unsigned int    m = 0x80000000;
+
+        // round-down to the nearest power of 2
+        while (m > n)
+            m >>= 1;
+
+        max_img_dim = (int)m;
+    }
+
+    if (max_img_width > max_img_dim)
+        max_img_width = max_img_dim;
+    if (max_img_height > max_img_dim)
+        max_img_height = max_img_dim;
+
+    log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
+            max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_8888_image(max_img_width, max_img_height, d);
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
+
+    int plus_minus;
+    for (plus_minus=0; plus_minus < 3; plus_minus++)
+    {
+
+    // test power of 2 width, height starting at 1 to 4K
+        for (i=2,i2=1; i<=max_img_height; i<<=1,i2++)
+        {
+            img_height = (1 << i2);
+            for (j=2,j2=1; j<=max_img_width; j<<=1,j2++)
+            {
+                img_width = (1 << j2);
+
+                int effective_img_height = img_height;
+                int effective_img_width = img_width;
+
+                local_threads[0] = 1;
+                local_threads[1] = 1;
+
+                switch (plus_minus) {
+                    case 0:
+                      effective_img_height--;
+                      local_threads[0] = work_group_size > max_local_workgroup_size[0] ? max_local_workgroup_size[0] : work_group_size;
+                      while (img_width%local_threads[0] != 0)
+                        local_threads[0]--;
+                      break;
+                    case 1:
+                      effective_img_width--;
+                      local_threads[1] = work_group_size > max_local_workgroup_size[1] ? max_local_workgroup_size[1] : work_group_size;
+                      while (img_height%local_threads[1] != 0)
+                        local_threads[1]--;
+                      break;
+                    case 2:
+                      effective_img_width--;
+                      effective_img_height--;
+                      break;
+                    default:
+                      break;
+                }
+
+                img_format.image_channel_order = CL_RGBA;
+                img_format.image_channel_data_type = CL_UNORM_INT8;
+                streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
+                if (!streams[0])
+                {
+                    log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+                img_format.image_channel_order = CL_RGBA;
+                img_format.image_channel_data_type = CL_UNORM_INT8;
+                streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
+                if (!streams[1])
+                {
+                    log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
+                    clReleaseMemObject(streams[0]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                  size_t origin[3] = {0,0,0};
+                  size_t region[3] = {effective_img_width, effective_img_height, 1};
+                  err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clWriteImage failed\n");
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+                err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+                err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                threads[0] = (size_t)effective_img_width;
+                threads[1] = (size_t)effective_img_height;
+                log_info("Testing image dimensions %d x %d with local threads %d x %d.\n",
+                            effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
+                err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, local_threads, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    log_error("Image Dimension test failed.  image width = %d, image height = %d, local %d x %d\n",
+                                effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+                err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clReadImage failed\n");
+                    log_error("Image Dimension test failed.  image width = %d, image height = %d, local %d x %d\n",
+                                effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+                err = verify_8888_image(input_ptr, output_ptr, effective_img_width, effective_img_height);
+                if (err)
+                {
+                    total_errors++;
+                    log_error("Image Dimension test failed.  image width = %d, image height = %d\n", effective_img_width, effective_img_height);
+                }
+
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+            }
+        }
+
+  }
+
+    // cleanup
+    free(input_ptr);
+    free(output_ptr);
+    free_mtdata(d);
+    clReleaseSampler(sampler);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return total_errors;
+}
+
+
+
+
diff --git a/test_conformance/basic/test_imagenpot.c b/test_conformance/basic/test_imagenpot.c
new file mode 100644
index 00000000..2682f626
--- /dev/null
+++ b/test_conformance/basic/test_imagenpot.c
@@ -0,0 +1,226 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba8888_kernel_code =
+"\n"
+"__kernel void test_rgba8888(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    if ( (tid_x >= get_image_width(dstimg)) || (tid_y >= get_image_height(dstimg)) )\n"
+"        return;\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8888_image(unsigned char *src, unsigned char *dst, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (dst[i] != src[i])
+        {
+            log_error("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d failed\n", w, h);
+            return -1;
+        }
+    }
+
+    log_info("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d passed\n", w, h);
+    return 0;
+}
+
+
+int    img_width_selection[] = { 97, 111, 322, 479 };
+int    img_height_selection[] = { 149, 222, 754, 385 };
+
+int
+test_imagenpot(cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3], local_threads[3];
+    size_t            local_workgroup_size;
+    int                img_width;
+    int                img_height;
+    int                err;
+    cl_uint            m;
+    size_t max_local_workgroup_size[3];
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device_id )
+
+    cl_device_type device_type;
+    err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
+    if (err) {
+        log_error("Failed to get device type: %d\n",err);
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (m=0; m<sizeof(img_width_selection)/sizeof(int); m++)
+    {
+        img_width = img_width_selection[m];
+        img_height = img_height_selection[m];
+        input_ptr = generate_8888_image(img_width, img_height, d);
+        output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
+                              origin, region, 0, 0,
+                              input_ptr,
+                              0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+
+        err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba8888_kernel_code, "test_rgba8888" );
+        if (err)
+        {
+            log_error("Failed to create kernel and program: %d\n", err);
+            free_mtdata(d);
+            return -1;
+        }
+
+        cl_sampler_properties properties[] = {
+            CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+            CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+            CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+            0 };
+        cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+        test_error(err, "clCreateSamplerWithProperties failed");
+
+        err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+        err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+        err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local_workgroup_size), &local_workgroup_size, NULL);
+        test_error(err, "clGetKernelWorkGroupInfo for CL_KERNEL_WORK_GROUP_SIZE failed");
+
+        err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+        test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+        // Pick the minimum of the device and the kernel
+        if (local_workgroup_size > max_local_workgroup_size[0])
+            local_workgroup_size = max_local_workgroup_size[0];
+
+        global_threads[0] = ((img_width + local_workgroup_size - 1) / local_workgroup_size) * local_workgroup_size;
+        global_threads[1] = img_height;
+        local_threads[0] = local_workgroup_size;
+        local_threads[1] = 1;
+        err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, global_threads, local_threads, 0, NULL, NULL );
+
+        if (err != CL_SUCCESS)
+        {
+            log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+            free_mtdata(d);
+            return -1;
+        }
+        err = clEnqueueReadImage(queue, streams[1], CL_TRUE,
+                             origin, region, 0, 0,
+                             (void *)output_ptr,
+                             0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        err = verify_rgba8888_image(input_ptr, output_ptr, img_width, img_height);
+
+        // cleanup
+        clReleaseSampler(sampler);
+        clReleaseMemObject(streams[0]);
+        clReleaseMemObject(streams[1]);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        free(input_ptr);
+        free(output_ptr);
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(d);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_imagerandomcopy.c b/test_conformance/basic/test_imagerandomcopy.c
new file mode 100644
index 00000000..1de572f1
--- /dev/null
+++ b/test_conformance/basic/test_imagerandomcopy.c
@@ -0,0 +1,270 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int x, int y, int w, int h, int img_width)
+{
+    int     i, j, indx;
+
+    for (j=y; j<(y+h); j++)
+    {
+        indx = j*img_width*4;
+        for (i=x*4; i<(x+w)*4; i++)
+        {
+            if (outptr[indx+i] != image[indx+i])
+                return -1;
+        }
+    }
+    return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int x, int y, int w, int h, int img_width)
+{
+    int     i, j, indx;
+
+    for (j=y; j<(y+h); j++)
+    {
+        indx = j*img_width*4;
+        for (i=x*4; i<(x+w)*4; i++)
+        {
+            if (outptr[indx+i] != image[indx+i])
+                return -1;
+        }
+    }
+    return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int x, int y, int w, int h, int img_width)
+{
+    int     i, j, indx;
+
+    for (j=y; j<(y+h); j++)
+    {
+        indx = j*img_width*4;
+        for (i=x*4; i<(x+w)*4; i++)
+        {
+            if (outptr[indx+i] != image[indx+i])
+                return -1;
+        }
+    }
+    return 0;
+}
+
+
+#define NUM_COPIES    10
+static const char *test_str_names[] = { "CL_RGBA CL_UNORM_INT8", "CL_RGBA CL_UNORM_INT16", "CL_RGBA CL_FLOAT" };
+
+int
+test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short    *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper            streams[6];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                i, j;
+    cl_int          err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    log_info("Testing with image %d x %d.\n", img_width, img_height);
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void            *p, *outp;
+        unsigned int    x[2], y[2], delta_w, delta_h ;
+
+        switch (i)
+        {
+            case 0:
+                p = (void *)rgba8_inptr;
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                p = (void *)rgba16_inptr;
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                p = (void *)rgbafp_inptr;
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
+        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
+//        err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
+        test_error(err, "clEnqueueWriteImage failed");
+
+        for (j=0; j<NUM_COPIES; j++)
+        {
+            x[0] = (int)get_random_float(0, img_width, d);
+            do
+            {
+                x[1] = (int)get_random_float(0, img_width, d);
+            } while (x[1] <= x[0]);
+
+            y[0] = (int)get_random_float(0, img_height, d);
+            do
+            {
+                y[1] = (int)get_random_float(0, img_height, d);
+            } while (y[1] <= y[0]);
+
+            delta_w = x[1] - x[0];
+            delta_h = y[1] - y[0];
+            log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
+            origin[0] = x[0];
+            origin[1] = y[0];
+            origin[2] = 0;
+            region[0] = delta_w;
+            region[1] = delta_h;
+            region[2] = 1;
+            err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
+//          err = clCopyImage(context, streams[i*2], streams[i*2+1],
+//                              x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
+            test_error(err, "clEnqueueCopyImage failed");
+
+            origin[0] = 0;
+            origin[1] = 0;
+            origin[2] = 0;
+            region[0] = img_width;
+            region[1] = img_height;
+            region[2] = 1;
+            err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
+//            err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
+            test_error(err, "clEnqueueReadImage failed");
+
+            switch (i)
+            {
+                case 0:
+                    err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
+                    break;
+                case 1:
+                    err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
+                    break;
+                case 2:
+                    err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
+                    break;
+            }
+
+            if (err)
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(d); d = NULL;
+    free(rgba8_inptr);
+    free(rgba16_inptr);
+    free(rgbafp_inptr);
+    free(rgba8_outptr);
+    free(rgba16_outptr);
+    free(rgbafp_outptr);
+
+    if (err)
+        log_error("IMAGE random copy test failed\n");
+    else
+        log_info("IMAGE random copy test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_imagereadwrite.c b/test_conformance/basic/test_imagereadwrite.c
new file mode 100644
index 00000000..d5000edb
--- /dev/null
+++ b/test_conformance/basic/test_imagereadwrite.c
@@ -0,0 +1,418 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+  unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+  int             i;
+
+  for (i=0; i<w*h*4; i++)
+    ptr[i] = (unsigned char)genrand_int32(d);
+
+  return ptr;
+}
+
+static void
+update_rgba8_image(unsigned char *p, int x, int y, int w, int h, int img_width, MTdata d)
+{
+    int        i, j, indx;
+
+    for (i=y; i<y+h; i++)
+    {
+        indx = (i * img_width + x) * 4;
+        for (j=x; j<x+w; j++,indx+=4)
+        {
+            p[indx+0] = (unsigned char)genrand_int32(d);
+            p[indx+1] = (unsigned char)genrand_int32(d);
+            p[indx+2] = (unsigned char)genrand_int32(d);
+            p[indx+3] = (unsigned char)genrand_int32(d);
+        }
+    }
+}
+
+static void
+update_image_from_image(void *out, void *in, int x, int y, int w, int h, int img_width, int elem_size)
+{
+    int        i, j, k, out_indx, in_indx;
+    in_indx = 0;
+
+    for (i=y; i<y+h; i++)
+    {
+        out_indx = (i * img_width + x) * elem_size;
+        for (j=x; j<x+w; j++,out_indx+=elem_size)
+        {
+            for (k=0; k<elem_size; k++)
+            {
+                ((char*)out)[out_indx + k] = ((char*)in)[in_indx];
+                in_indx++;
+            }
+        }
+    }
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+  {
+    if (outptr[i] != image[i])
+    {
+        log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+        return -1;
+    }
+  }
+
+  return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, MTdata d)
+{
+  unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
+  int             i;
+
+  for (i=0; i<w*h*4; i++)
+    ptr[i] = (unsigned short)genrand_int32(d);
+
+  return ptr;
+}
+
+static void
+update_rgba16_image(unsigned short *p, int x, int y, int w, int h, int img_width, MTdata d)
+{
+    int        i, j, indx;
+
+    for (i=y; i<y+h; i++)
+    {
+        indx = (i * img_width + x) * 4;
+        for (j=x; j<x+w; j++,indx+=4)
+        {
+            p[indx+0] = (unsigned short)genrand_int32(d);
+            p[indx+1] = (unsigned short)genrand_int32(d);
+            p[indx+2] = (unsigned short)genrand_int32(d);
+            p[indx+3] = (unsigned short)genrand_int32(d);
+        }
+    }
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
+{
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+  {
+    if (outptr[i] != image[i])
+    {
+        log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+        return -1;
+    }
+  }
+
+  return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, MTdata d)
+{
+  float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+    ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+  return ptr;
+}
+
+static void
+update_rgbafp_image(float *p, int x, int y, int w, int h, int img_width, MTdata d)
+{
+    int        i, j, indx;
+
+    for (i=y; i<y+h; i++)
+    {
+        indx = (i * img_width + x) * 4;
+        for (j=x; j<x+w; j++,indx+=4)
+        {
+            p[indx+0] = get_random_float(-0x40000000, 0x40000000, d);
+            p[indx+1] = get_random_float(-0x40000000, 0x40000000, d);
+            p[indx+2] = get_random_float(-0x40000000, 0x40000000, d);
+            p[indx+3] = get_random_float(-0x40000000, 0x40000000, d);
+        }
+    }
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int w, int h)
+{
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+  {
+    if (outptr[i] != image[i])
+    {
+        log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+        return -1;
+    }
+  }
+
+  return 0;
+}
+
+
+int
+test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short    *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper            streams[3];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                num_tries = 200;
+    int                i, j, err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p;
+
+        if (i == 0)
+            p = (void *)rgba8_inptr;
+        else if (i == 1)
+            p = (void *)rgba16_inptr;
+        else
+            p = (void *)rgbafp_inptr;
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
+                              origin, region, 0, 0,
+                              p, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage2D failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0,j=0; i<num_tries*3; i++,j++)
+    {
+        int        x = (int)get_random_float(0, img_width, d);
+        int        y = (int)get_random_float(0, img_height, d);
+        int        w = (int)get_random_float(1, (img_width - x), d);
+        int        h = (int)get_random_float(1, (img_height - y), d);
+        size_t    input_pitch;
+        int     set_input_pitch = (int)(genrand_int32(d) & 0x01);
+        int     packed_update = (int)(genrand_int32(d) & 0x01);
+        void    *p, *outp;
+        int        elem_size;
+
+        if (j == 3)
+            j = 0;
+
+        switch (j)
+        {
+            case 0:
+                //if ((w<=10) || (h<=10)) continue;
+                elem_size = 4;
+                if(packed_update)
+                {
+                    p = generate_rgba8_image(w, h, d);
+                    update_image_from_image(rgba8_inptr, p, x, y, w, h, img_width, elem_size);
+                }
+                else
+                {
+                    update_rgba8_image(rgba8_inptr, x, y, w, h, img_width, d);
+                    p = (void *)(rgba8_inptr + ((y * img_width + x) * 4));
+                }
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                //if ((w<=8) || (h<=8)) continue;
+                elem_size = 2*4;
+                if(packed_update)
+                {
+                    p = generate_rgba16_image(w, h, d);
+                    update_image_from_image(rgba16_inptr, p, x, y, w, h, img_width, elem_size);
+                }
+                else
+                {
+                    update_rgba16_image(rgba16_inptr, x, y, w, h, img_width, d);
+                    p = (void *)(rgba16_inptr + ((y * img_width + x) * 4));
+                }
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                //if ((w<=8) || (h<=8)) continue;
+                elem_size = 4*4;
+                if(packed_update)
+                {
+                    p = generate_rgbafp_image(w, h, d);
+                    update_image_from_image(rgbafp_inptr, p, x, y, w, h, img_width, elem_size);
+                }
+                else
+                {
+                    update_rgbafp_image(rgbafp_inptr, x, y, w, h, img_width, d);
+                    p = (void *)(rgbafp_inptr + ((y * img_width + x) * 4));
+                }
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        const char* update_packed_pitch_name = "";
+        if(packed_update)
+        {
+            if(set_input_pitch)
+            {
+                // for packed updates the pitch does not need to be calculated here (but can be)
+                update_packed_pitch_name = "'packed with pitch'";
+                input_pitch = w*elem_size;
+            }
+            else
+            {
+                // for packed updates the pitch does not need to be calculated here
+                update_packed_pitch_name = "'packed without pitch'";
+                input_pitch = 0;
+            }
+        }
+        else
+        {
+            // for unpacked updates the pitch is required
+            update_packed_pitch_name = "'unpacked with pitch'";
+            input_pitch = img_width*elem_size;
+        }
+
+        size_t origin[3] = {x,y,0}, region[3] = {w, h, 1};
+        err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
+                              origin, region, input_pitch, 0, p,
+                              0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage update failed for %s %s: %d\n",
+                (packed_update) ? "packed" : "unpacked",
+                (set_input_pitch) ? "set pitch" : "unset pitch", err);
+            free_mtdata(d);
+            return -1;
+        }
+
+        if(packed_update)
+        {
+            free(p);
+            p = NULL;
+        }
+
+        memset(outp, 0x7, img_width*img_height*elem_size);
+
+        origin[0]=0; origin[1]=0; origin[2]=0;
+        region[0]=img_width; region[1]=img_height; region[2]=1;
+        err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
+                             origin, region, 0,0,
+                             outp, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clReadImage failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+        switch (j)
+        {
+            case 0:
+                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
+                if (err)
+                {
+                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
+                    log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 1:
+                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
+                if (err)
+                {
+                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
+                    log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 2:
+                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
+                if (err)
+                {
+                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
+                    log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+        }
+
+        if (err) break;
+    }
+
+    free_mtdata(d);
+    free(rgba8_inptr);
+    free(rgba16_inptr);
+    free(rgbafp_inptr);
+    free(rgba8_outptr);
+    free(rgba16_outptr);
+    free(rgbafp_outptr);
+
+    if (!err)
+        log_info("IMAGE read, write test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_imagereadwrite3d.c b/test_conformance/basic/test_imagereadwrite3d.c
new file mode 100644
index 00000000..e83b32fb
--- /dev/null
+++ b/test_conformance/basic/test_imagereadwrite3d.c
@@ -0,0 +1,418 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, int d, MTdata mtData)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * d *4);
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+            ptr[i] = (unsigned char)genrand_int32(mtData);
+
+    return ptr;
+}
+
+static void
+update_rgba8_image(unsigned char *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
+{
+    int        i, j, k, indx;
+    int        img_slice = img_width * img_height;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+        {
+            indx = (k * img_slice + j * img_width + x) * 4;
+            for (i=x; i<x+w; i++,indx+=4)
+            {
+                p[indx+0] = (unsigned char)genrand_int32(mtData);
+                p[indx+1] = (unsigned char)genrand_int32(mtData);
+                p[indx+2] = (unsigned char)genrand_int32(mtData);
+                p[indx+3] = (unsigned char)genrand_int32(mtData);
+            }
+        }
+}
+
+static void
+update_image_from_image(void *out, void *in, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, int elem_size)
+{
+    int        i, j, k, elem, out_indx, in_indx;
+    int        img_slice = img_width * img_height;
+    in_indx = 0;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+        {
+            out_indx = (k * img_slice + j * img_width + x) * elem_size;
+            for (i=x; i<x+w; i++,out_indx+=elem_size)
+            {
+                for (elem=0; elem<elem_size; elem++)
+                {
+                    ((char*)out)[out_indx + elem] = ((char*)in)[in_indx];
+                    in_indx++;
+                }
+            }
+        }
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, int d, MTdata mtData)
+{
+    unsigned short    *ptr = (unsigned short*)malloc(w * h * d * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+            ptr[i] = (unsigned short)genrand_int32(mtData);
+
+    return ptr;
+}
+
+static void
+update_rgba16_image(unsigned short *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
+{
+    int        i, j, k, indx;
+    int        img_slice = img_width * img_height;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+    {
+        indx = (k * img_slice + j * img_width + x) * 4;
+        for (i=x; i<x+w; i++,indx+=4)
+        {
+            p[indx+0] = (unsigned short)genrand_int32(mtData);
+            p[indx+1] = (unsigned short)genrand_int32(mtData);
+            p[indx+2] = (unsigned short)genrand_int32(mtData);
+            p[indx+3] = (unsigned short)genrand_int32(mtData);
+        }
+    }
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, int d, MTdata mtData)
+{
+    float   *ptr = (float*)malloc(w * h * d *4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+            ptr[i] = get_random_float(-0x40000000, 0x40000000, mtData);
+
+    return ptr;
+}
+
+static void
+update_rgbafp_image(float *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
+{
+    int        i, j, k, indx;
+    int        img_slice = img_width * img_height;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+        {
+            indx = (k * img_slice + j * img_width + x) * 4;
+            for (i=x; i<x+w; i++,indx+=4)
+            {
+                p[indx+0] = get_random_float(-0x40000000, 0x40000000, mtData);
+                p[indx+1] = get_random_float(-0x40000000, 0x40000000, mtData);
+                p[indx+2] = get_random_float(-0x40000000, 0x40000000, mtData);
+                p[indx+3] = get_random_float(-0x40000000, 0x40000000, mtData);
+            }
+        }
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+int
+test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper    streams[3];
+    int       img_width = 64;
+    int       img_height = 64;
+    int       img_depth = 32;
+    int       img_slice = img_width * img_height;
+    int       num_tries = 30;
+    int       i, j, err;
+    MTdata      mtData;
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    mtData = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, img_depth, mtData);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, img_depth, mtData);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, img_depth, mtData);
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height * img_depth);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height * img_depth);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height * img_depth);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p;
+
+        if (i == 0)
+            p = (void *)rgba8_inptr;
+        else if (i == 1)
+            p = (void *)rgba16_inptr;
+        else
+            p = (void *)rgbafp_inptr;
+
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, img_depth};
+        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  p,
+                                  0, NULL, NULL);
+        test_error(err, "clEnqueueWriteImage failed");
+    }
+
+    for (i=0,j=0; i<num_tries*3; i++,j++)
+    {
+        int        x = (int)get_random_float(0, (float)img_width - 1, mtData);
+        int        y = (int)get_random_float(0, (float)img_height - 1, mtData);
+        int        z = (int)get_random_float(0, (float)img_depth - 1, mtData);
+        int        w = (int)get_random_float(1, (float)(img_width - x), mtData);
+        int        h = (int)get_random_float(1, (float)(img_height - y), mtData);
+        int        d = (int)get_random_float(1, (float)(img_depth - z), mtData);
+        size_t    input_pitch, input_slice_pitch;
+        int     set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
+        int     packed_update = (int)(genrand_int32(mtData) & 0x01);
+        void    *p, *outp;
+        int        elem_size;
+
+        if (j == 3)
+            j = 0;
+
+        // packed: the source image for the write is a whole image                                                                                                                                                                                                                                                      .
+        // unpacked: the source image for the write is a subset within a larger image
+        switch (j)
+        {
+            case 0:
+                elem_size = 4;
+                if(packed_update)
+                {
+                    p = generate_rgba8_image(w, h, d, mtData);
+                    update_image_from_image(rgba8_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
+                }
+                else
+                {
+                    update_rgba8_image(rgba8_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
+                    p = (void *)(rgba8_inptr + ((z * img_slice + y * img_width + x) * 4));
+                }
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                elem_size = 2*4;
+                if(packed_update)
+                {
+                    p = generate_rgba16_image(w, h, d, mtData);
+                    update_image_from_image(rgba16_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
+                }
+                else
+                {
+                    update_rgba16_image(rgba16_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
+                    p = (void *)(rgba16_inptr + ((z * img_slice + y * img_width + x) * 4));
+                }
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                elem_size = 4*4;
+                if(packed_update)
+                {
+                    p = generate_rgbafp_image(w, h, d, mtData);
+                    update_image_from_image(rgbafp_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
+                }
+                else
+                {
+                    update_rgbafp_image(rgbafp_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
+                    p = (void *)(rgbafp_inptr + ((z * img_slice + y * img_width + x) * 4));
+                }
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        const char* update_packed_pitch_name = "";
+        if(packed_update)
+        {
+            if(set_input_pitch)
+            {
+                // for packed updates the pitch does not need to be calculated here (but can be)
+                update_packed_pitch_name = "'packed with pitch'";
+                input_pitch = w*elem_size;
+                input_slice_pitch = w*h*elem_size;
+            }
+            else
+            {
+                // for packed updates the pitch does not need to be calculated here
+                update_packed_pitch_name = "'packed without pitch'";
+                input_pitch = 0;
+                input_slice_pitch = 0;
+            }
+        }
+        else
+        {
+            // for unpacked updates the pitch is required
+            update_packed_pitch_name = "'unpacked with pitch'";
+            input_pitch = img_width*elem_size;
+            input_slice_pitch = input_pitch*img_height;
+        }
+
+    size_t origin[3] = {x,y,z}, region[3] = {w, h, d};
+        err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
+                              origin, region, input_pitch, input_slice_pitch,
+                              p, 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteImage failed");
+
+        if(packed_update)
+        {
+            free(p);
+            p = NULL;
+        }
+
+        memset(outp, 0x7, img_width*img_height*img_depth*elem_size);
+
+    origin[0]=0; origin[1]=0; origin[2]=0; region[0]=img_width; region[1]=img_height; region[2]=img_depth;
+        err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
+                             origin, region, 0, 0,
+                             outp, 0, NULL, NULL);
+    test_error(err, "clEnqueueReadImage failed");
+
+        switch (j)
+        {
+            case 0:
+                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height, img_depth);
+                if (err)
+                {
+                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
+                    log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 1:
+                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height, img_depth);
+                if (err)
+                {
+                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
+                    log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 2:
+                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height, img_depth);
+                if (err)
+                {
+                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
+                    log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(mtData);
+    free(rgba8_inptr);
+    free(rgba16_inptr);
+    free(rgbafp_inptr);
+    free(rgba8_outptr);
+    free(rgba16_outptr);
+    free(rgbafp_outptr);
+
+    if (!err)
+        log_info("IMAGE read, write test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_int2float.c b/test_conformance/basic/test_int2float.c
new file mode 100644
index 00000000..3211ccd2
--- /dev/null
+++ b/test_conformance/basic/test_int2float.c
@@ -0,0 +1,144 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int2float_kernel_code =
+"__kernel void test_int2float(__global int *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)src[tid];\n"
+"\n"
+"}\n";
+
+
+int
+verify_int2float(cl_int *inptr, cl_float *outptr, int n)
+{
+    int     i;
+
+    for (i=0; i<n; i++)
+    {
+        if (outptr[i] != (float)inptr[i])
+        {
+            log_error("INT2FLOAT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT2FLOAT test passed\n");
+    return 0;
+}
+
+int
+test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_int            *input_ptr;
+    cl_float        *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    void            *values[2];
+    size_t    threads[1];
+    int                err;
+    int                i;
+    MTdata          d;
+
+    input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &int2float_kernel_code, "test_int2float");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_int2float(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_intmath_int.c b/test_conformance/basic/test_intmath_int.c
new file mode 100644
index 00000000..3706fd93
--- /dev/null
+++ b/test_conformance/basic/test_intmath_int.c
@@ -0,0 +1,334 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int_add_kernel_code =
+"__kernel void test_int_add(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *int_sub_kernel_code =
+"__kernel void test_int_sub(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *int_mul_kernel_code =
+"__kernel void test_int_mul(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *int_mad_kernel_code =
+"__kernel void test_int_mad(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+static const float    MAX_ERR = 1e-5f;
+
+int
+verify_int_add(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_ADD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_ADD int test passed\n");
+    return 0;
+}
+
+int
+verify_int_sub(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_SUB int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_SUB int test passed\n");
+    return 0;
+}
+
+int
+verify_int_mul(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MUL int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MUL int test passed\n");
+    return 0;
+}
+
+int
+verify_int_mad(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MAD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MAD int test passed\n");
+    return 0;
+}
+
+int
+test_intmath_int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_int *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * num_elements;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    input_ptr[2] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = (int)genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_add_kernel_code, "test_int_add");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int_sub_kernel_code, "test_int_sub");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int_mul_kernel_code, "test_int_mul");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int_mad_kernel_code, "test_int_mad");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_int_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_int_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_int_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_int_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_intmath_int2.c b/test_conformance/basic/test_intmath_int2.c
new file mode 100644
index 00000000..811a842b
--- /dev/null
+++ b/test_conformance/basic/test_intmath_int2.c
@@ -0,0 +1,333 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int_add2_kernel_code =
+"__kernel void test_int_add2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *int_sub2_kernel_code =
+"__kernel void test_int_sub2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *int_mul2_kernel_code =
+"__kernel void test_int_mul2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *int_mad2_kernel_code =
+"__kernel void test_int_mad2(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_int_add2(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_ADD int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_ADD int2 test passed\n");
+    return 0;
+}
+
+int
+verify_int_sub2(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_SUB int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_SUB int2 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mul2(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MUL int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MUL int2 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mad2(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MAD int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MAD int2 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_int2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_int *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * 2 * num_elements;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    input_ptr[2] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = (int)genrand_int32(d);
+
+    free_mtdata( d );
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_add2_kernel_code, "test_int_add2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int_sub2_kernel_code, "test_int_sub2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int_mul2_kernel_code, "test_int_mul2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int_mad2_kernel_code, "test_int_mad2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_int_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_int_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_int_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_int_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_intmath_int4.c b/test_conformance/basic/test_intmath_int4.c
new file mode 100644
index 00000000..473cefb1
--- /dev/null
+++ b/test_conformance/basic/test_intmath_int4.c
@@ -0,0 +1,332 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int_add4_kernel_code =
+"__kernel void test_int_add4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *int_sub4_kernel_code =
+"__kernel void test_int_sub4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *int_mul4_kernel_code =
+"__kernel void test_int_mul4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *int_mad4_kernel_code =
+"__kernel void test_int_mad4(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_int_add4(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_ADD int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_ADD int4 test passed\n");
+    return 0;
+}
+
+int
+verify_int_sub4(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_SUB int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_SUB int4 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mul4(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MUL int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MUL int4 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mad4(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MAD int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MAD int4 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_int4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_int *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * 4 * num_elements;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    input_ptr[2] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = (int)genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_add4_kernel_code, "test_int_add4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int_sub4_kernel_code, "test_int_sub4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int_mul4_kernel_code, "test_int_mul4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int_mad4_kernel_code, "test_int_mad4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_int_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_int_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_int_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_int_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_intmath_long.c b/test_conformance/basic/test_intmath_long.c
new file mode 100644
index 00000000..20a8333e
--- /dev/null
+++ b/test_conformance/basic/test_intmath_long.c
@@ -0,0 +1,342 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *long_add_kernel_code =
+"__kernel void test_long_add(__global long *srcA, __global long *srcB, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *long_sub_kernel_code =
+"__kernel void test_long_sub(__global long *srcA, __global long *srcB, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *long_mul_kernel_code =
+"__kernel void test_long_mul(__global long *srcA, __global long *srcB, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *long_mad_kernel_code =
+"__kernel void test_long_mad(__global long *srcA, __global long *srcB, __global long *srcC, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+static const float    MAX_ERR = 1e-5f;
+
+int
+verify_long_add(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_ADD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_ADD int test passed\n");
+    return 0;
+}
+
+int
+verify_long_sub(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_SUB int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_SUB int test passed\n");
+    return 0;
+}
+
+int
+verify_long_mul(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MUL int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MUL int test passed\n");
+    return 0;
+}
+
+int
+verify_long_mad(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MAD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MAD int test passed\n");
+    return 0;
+}
+
+int
+test_intmath_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_long *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+
+    if(! gHasLong )
+    {
+        log_info("64-bit integers are not supported by this device. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_long) * num_elements;
+
+    input_ptr[0] = (cl_long*)malloc(length);
+    input_ptr[1] = (cl_long*)malloc(length);
+    input_ptr[2] = (cl_long*)malloc(length);
+    output_ptr   = (cl_long*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &long_add_kernel_code, "test_long_add");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &long_sub_kernel_code, "test_long_sub");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &long_mul_kernel_code, "test_long_mul");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &long_mad_kernel_code, "test_long_mad");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_long_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_long_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_long_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_long_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_intmath_long2.c b/test_conformance/basic/test_intmath_long2.c
new file mode 100644
index 00000000..d883e182
--- /dev/null
+++ b/test_conformance/basic/test_intmath_long2.c
@@ -0,0 +1,340 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *long_add2_kernel_code =
+"__kernel void test_long_add2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *long_sub2_kernel_code =
+"__kernel void test_long_sub2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *long_mul2_kernel_code =
+"__kernel void test_long_mul2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *long_mad2_kernel_code =
+"__kernel void test_long_mad2(__global long2 *srcA, __global long2 *srcB, __global long2 *srcC, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_long_add2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_ADD long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_ADD long2 test passed\n");
+    return 0;
+}
+
+int
+verify_long_sub2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_SUB long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_SUB long2 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mul2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MUL long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MUL long2 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mad2(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MAD long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MAD long2 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_long2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_long *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+
+    if(! gHasLong)
+    {
+        log_info("64-bit integers are not supported in this device. Skipping test.\n");
+        return 0;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_long) * 2* num_elements;
+
+    input_ptr[0] = (cl_long*)malloc(length);
+    input_ptr[1] = (cl_long*)malloc(length);
+    input_ptr[2] = (cl_long*)malloc(length);
+    output_ptr   = (cl_long*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements * 2; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[1];
+    for (i=0; i<num_elements * 2; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[2];
+    for (i=0; i<num_elements * 2; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &long_add2_kernel_code, "test_long_add2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &long_sub2_kernel_code, "test_long_sub2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &long_mul2_kernel_code, "test_long_mul2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &long_mad2_kernel_code, "test_long_mad2");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_long_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_long_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_long_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_long_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_intmath_long4.c b/test_conformance/basic/test_intmath_long4.c
new file mode 100644
index 00000000..84c5ed83
--- /dev/null
+++ b/test_conformance/basic/test_intmath_long4.c
@@ -0,0 +1,340 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *long_add4_kernel_code =
+"__kernel void test_long_add4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *long_sub4_kernel_code =
+"__kernel void test_long_sub4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *long_mul4_kernel_code =
+"__kernel void test_long_mul4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *long_mad4_kernel_code =
+"__kernel void test_long_mad4(__global long4 *srcA, __global long4 *srcB, __global long4 *srcC, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_long_add4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_ADD long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_ADD long4 test passed\n");
+    return 0;
+}
+
+int
+verify_long_sub4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_SUB long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_SUB long4 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mul4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MUL long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MUL long4 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mad4(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MAD long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MAD long4 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_long4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_long *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+
+    if(! gHasLong )
+    {
+        log_info("64-bit integers are not supported by this device. Skipping test.\n");
+        return 0;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_long) * 4 * num_elements;
+
+    input_ptr[0] = (cl_long*)malloc(length);
+    input_ptr[1] = (cl_long*)malloc(length);
+    input_ptr[2] = (cl_long*)malloc(length);
+    output_ptr   = (cl_long*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements * 4; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[1];
+    for (i=0; i<num_elements * 4; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[2];
+    for (i=0; i<num_elements * 4; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &long_add4_kernel_code, "test_long_add4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &long_sub4_kernel_code, "test_long_sub4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &long_mul4_kernel_code, "test_long_mul4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &long_mad4_kernel_code, "test_long_mad4");
+    if (err != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_long_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_long_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_long_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_long_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_kernel_call_kernel_function.cpp b/test_conformance/basic/test_kernel_call_kernel_function.cpp
new file mode 100644
index 00000000..80fea55f
--- /dev/null
+++ b/test_conformance/basic/test_kernel_call_kernel_function.cpp
@@ -0,0 +1,253 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+
+const char *kernel_call_kernel_code[] = {
+    "void test_function_to_call(__global int *output, __global int *input, int where);\n"
+    "\n"
+    "__kernel void test_kernel_to_call(__global int *output, __global int *input, int where) \n"
+    "{\n"
+    "  int b;\n"
+    "  if (where == 0) {\n"
+    "    output[get_global_id(0)] = 0;\n"
+    "  }\n"
+    "  for (b=0; b<where; b++)\n"
+    "    output[get_global_id(0)] += input[b];  \n"
+    "}\n"
+    "\n"
+    "__kernel void test_call_kernel(__global int *src, __global int *dst, int times) \n"
+    "{\n"
+    "  int tid = get_global_id(0);\n"
+    "  int a;\n"
+    "  dst[tid] = 1;\n"
+    "  for (a=0; a<times; a++)\n"
+    "    test_kernel_to_call(dst, src, tid);\n"
+    "}\n"
+    "void test_function_to_call(__global int *output, __global int *input, int where) \n"
+    "{\n"
+    "  int b;\n"
+    "  if (where == 0) {\n"
+    "    output[get_global_id(0)] = 0;\n"
+    "  }\n"
+    "  for (b=0; b<where; b++)\n"
+    "    output[get_global_id(0)] += input[b];  \n"
+    "}\n"
+    "\n"
+    "__kernel void test_call_function(__global int *src, __global int *dst, int times) \n"
+    "{\n"
+    "  int tid = get_global_id(0);\n"
+    "  int a;\n"
+    "  dst[tid] = 1;\n"
+    "  for (a=0; a<times; a++)\n"
+    "    test_function_to_call(dst, src, tid);\n"
+    "}\n"
+};
+
+
+
+int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    num_elements = 256;
+
+    int error, errors = 0;
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2, kernel_to_call;
+    clMemWrapper    streams[2];
+
+    size_t    threads[] = {num_elements,1,1};
+    cl_int *input, *output, *expected;
+    cl_int times = 4;
+    int pass = 0;
+
+    input = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+    output = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+    expected = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+
+    for (int i=0; i<num_elements; i++) {
+        input[i] = i;
+        output[i] = i;
+        expected[i] = output[i];
+    }
+    // Calculate the expected results
+    for (int tid=0; tid<num_elements; tid++) {
+        expected[tid] = 1;
+        for (int a=0; a<times; a++) {
+            int where = tid;
+            if (where == 0)
+                expected[tid] = 0;
+            for (int b=0; b<where; b++) {
+                expected[tid] += input[b];
+            }
+        }
+    }
+
+    // Test kernel calling a kernel
+    log_info("Testing kernel calling kernel...\n");
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel1, 1, kernel_call_kernel_code, "test_call_kernel" ) != 0 )
+    {
+        return -1;
+    }
+
+    kernel_to_call = clCreateKernel(program, "test_kernel_to_call", &error);
+    test_error(error, "clCreateKernel failed");
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(cl_int)*num_elements, input, &error);
+    test_error( error, "clCreateBuffer failed" );
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(cl_int)*num_elements, output, &error);
+    test_error( error, "clCreateBuffer failed" );
+
+    error = clSetKernelArg(kernel1, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel1, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel1, 2, sizeof( times ), &times);
+    test_error( error, "clSetKernelArg failed" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel1, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "clEnqueueNDRangeKernel failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
+    test_error( error, "clEnqueueReadBuffer failed" );
+
+    // Compare the results
+    pass = 1;
+    for (int i=0; i<num_elements; i++) {
+        if (output[i] != expected[i]) {
+            if (errors > 10)
+                continue;
+            if (errors == 10) {
+                log_error("Suppressing further results...\n");
+                continue;
+            }
+            log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
+            errors++;
+            pass = 0;
+        }
+    }
+    if (pass) log_info("Passed kernel calling kernel...\n");
+
+
+
+    // Test kernel calling a function
+    log_info("Testing kernel calling function...\n");
+    // Reset the inputs
+    for (int i=0; i<num_elements; i++) {
+        input[i] = i;
+        output[i] = i;
+    }
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+
+    kernel2 = clCreateKernel(program, "test_call_function", &error);
+    test_error(error, "clCreateKernel failed");
+
+    error = clSetKernelArg(kernel2, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel2, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel2, 2, sizeof( times ), &times);
+    test_error( error, "clSetKernelArg failed" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel2, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "clEnqueueNDRangeKernel failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
+    test_error( error, "clEnqueueReadBuffer failed" );
+
+    // Compare the results
+    pass = 1;
+    for (int i=0; i<num_elements; i++) {
+        if (output[i] != expected[i]) {
+            if (errors > 10)
+                continue;
+            if (errors > 10) {
+                log_error("Suppressing further results...\n");
+                continue;
+            }
+            log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
+            errors++;
+            pass = 0;
+        }
+    }
+    if (pass) log_info("Passed kernel calling function...\n");
+
+
+    // Test calling the kernel we called from another kernel
+    log_info("Testing calling the kernel we called from another kernel before...\n");
+    // Reset the inputs
+    for (int i=0; i<num_elements; i++) {
+        input[i] = i;
+        output[i] = i;
+        expected[i] = output[i];
+    }
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+
+    // Calculate the expected results
+    int where = times;
+    for (int tid=0; tid<num_elements; tid++) {
+        if (where == 0)
+            expected[tid] = 0;
+        for (int b=0; b<where; b++) {
+            expected[tid] += input[b];
+        }
+    }
+
+
+    error = clSetKernelArg(kernel_to_call, 0, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel_to_call, 1, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel_to_call, 2, sizeof( times ), &times);
+    test_error( error, "clSetKernelArg failed" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel_to_call, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "clEnqueueNDRangeKernel failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
+    test_error( error, "clEnqueueReadBuffer failed" );
+
+    // Compare the results
+    pass = 1;
+    for (int i=0; i<num_elements; i++) {
+        if (output[i] != expected[i]) {
+            if (errors > 10)
+                continue;
+            if (errors > 10) {
+                log_error("Suppressing further results...\n");
+                continue;
+            }
+            log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
+            errors++;
+            pass = 0;
+        }
+    }
+    if (pass) log_info("Passed calling the kernel we called from another kernel before...\n");
+
+    free( input );
+    free( output );
+    free( expected );
+
+    return errors;
+}
+
+
diff --git a/test_conformance/basic/test_kernel_memory_alignment.cpp b/test_conformance/basic/test_kernel_memory_alignment.cpp
new file mode 100644
index 00000000..adcfdaa2
--- /dev/null
+++ b/test_conformance/basic/test_kernel_memory_alignment.cpp
@@ -0,0 +1,570 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+// For global, local, and constant
+const char *parameter_kernel_long =
+"%s\n" // optional pragma
+"kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
+"{\n"
+"   results[0] = (ulong)&mem0[0];\n"
+"   results[1] = (ulong)&mem2[0];\n"
+"   results[2] = (ulong)&mem3[0];\n"
+"   results[3] = (ulong)&mem4[0];\n"
+"   results[4] = (ulong)&mem8[0];\n"
+"   results[5] = (ulong)&mem16[0];\n"
+"}\n";
+
+// For private and local
+const char *local_kernel_long =
+"%s\n" // optional pragma
+"kernel void test(global ulong *results)\n"
+"{\n"
+"   %s %s mem0[3];\n"
+"   %s %s2 mem2[3];\n"
+"   %s %s3 mem3[3];\n"
+"   %s %s4 mem4[3];\n"
+"   %s %s8 mem8[3];\n"
+"   %s %s16 mem16[3];\n"
+"   results[0] = (ulong)&mem0[0];\n"
+"   results[1] = (ulong)&mem2[0];\n"
+"   results[2] = (ulong)&mem3[0];\n"
+"   results[3] = (ulong)&mem4[0];\n"
+"   results[4] = (ulong)&mem8[0];\n"
+"   results[5] = (ulong)&mem16[0];\n"
+"}\n";
+
+// For constant
+const char *constant_kernel_long =
+"%s\n" // optional pragma
+"  constant %s mem0[3]    = {0};\n"
+"  constant %s2 mem2[3]   = {(%s2)(0)};\n"
+"  constant %s3 mem3[3]   = {(%s3)(0)};\n"
+"  constant %s4 mem4[3]   = {(%s4)(0)};\n"
+"  constant %s8 mem8[3]   = {(%s8)(0)};\n"
+"  constant %s16 mem16[3] = {(%s16)(0)};\n"
+"\n"
+"kernel void test(global ulong *results)\n"
+"{\n"
+"   results[0] = (ulong)&mem0;\n"
+"   results[1] = (ulong)&mem2;\n"
+"   results[2] = (ulong)&mem3;\n"
+"   results[3] = (ulong)&mem4;\n"
+"   results[4] = (ulong)&mem8;\n"
+"   results[5] = (ulong)&mem16;\n"
+"}\n";
+
+
+// For global, local, and constant
+const char *parameter_kernel_no_long =
+"%s\n" // optional pragma
+"kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
+"{\n"
+"   results[0] = (uint)&mem0[0];\n"
+"   results[1] = (uint)&mem2[0];\n"
+"   results[2] = (uint)&mem3[0];\n"
+"   results[3] = (uint)&mem4[0];\n"
+"   results[4] = (uint)&mem8[0];\n"
+"   results[5] = (uint)&mem16[0];\n"
+"}\n";
+
+// For private and local
+const char *local_kernel_no_long =
+"%s\n" // optional pragma
+"kernel void test(global uint *results)\n"
+"{\n"
+"   %s %s mem0[3];\n"
+"   %s %s2 mem2[3];\n"
+"   %s %s3 mem3[3];\n"
+"   %s %s4 mem4[3];\n"
+"   %s %s8 mem8[3];\n"
+"   %s %s16 mem16[3];\n"
+"   results[0] = (uint)&mem0[0];\n"
+"   results[1] = (uint)&mem2[0];\n"
+"   results[2] = (uint)&mem3[0];\n"
+"   results[3] = (uint)&mem4[0];\n"
+"   results[4] = (uint)&mem8[0];\n"
+"   results[5] = (uint)&mem16[0];\n"
+"}\n";
+
+// For constant
+const char *constant_kernel_no_long =
+"%s\n" // optional pragma
+"  constant %s mem0[3]    = {0};\n"
+"  constant %s2 mem2[3]   = {(%s2)(0)};\n"
+"  constant %s3 mem3[3]   = {(%s3)(0)};\n"
+"  constant %s4 mem4[3]   = {(%s4)(0)};\n"
+"  constant %s8 mem8[3]   = {(%s8)(0)};\n"
+"  constant %s16 mem16[3] = {(%s16)(0)};\n"
+"\n"
+"kernel void test(global uint *results)\n"
+"{\n"
+"   results[0] = (uint)&mem0;\n"
+"   results[1] = (uint)&mem2;\n"
+"   results[2] = (uint)&mem3;\n"
+"   results[3] = (uint)&mem4;\n"
+"   results[4] = (uint)&mem8;\n"
+"   results[5] = (uint)&mem16;\n"
+"}\n";
+
+enum AddressSpaces
+{
+    kGlobal        = 0,
+    kLocal,
+    kConstant,
+    kPrivate
+};
+
+typedef enum AddressSpaces    AddressSpaces;
+
+#define DEBUG 0
+
+const char * get_explicit_address_name( AddressSpaces address )
+{
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"};
+
+    return sExplicitAddressNames[ address ];
+}
+
+
+int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address )
+{
+    const char *constant_kernel;
+    const char *parameter_kernel;
+    const char *local_kernel;
+
+    if ( gHasLong )
+    {
+        constant_kernel  = constant_kernel_long;
+        parameter_kernel = parameter_kernel_long;
+        local_kernel     = local_kernel_long;
+    }
+    else
+    {
+        constant_kernel  = constant_kernel_no_long;
+        parameter_kernel = parameter_kernel_no_long;
+        local_kernel     = local_kernel_no_long;
+    }
+
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    char *kernel_code = (char*)malloc(4096);
+    cl_kernel kernel;
+    cl_program program;
+    int error;
+    int total_errors = 0;
+    cl_mem results;
+    cl_ulong *results_data;
+    cl_mem mem0, mem2, mem3, mem4, mem8, mem16;
+
+    results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6);
+    results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+
+    // For each type
+
+    // Calculate alignment mask for each size
+
+    // For global, local, constant, private
+
+    // If global, local or constant -- do parameter_kernel
+    // If private or local -- do local_kernel
+    // If constant -- do constant kernel
+
+    int numConstantArgs;
+    clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL);
+
+    int typeIndex;
+    for (typeIndex = 0; typeIndex < 10; typeIndex++) {
+        // Skip double tests if we don't support doubles
+        if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) {
+            log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+            continue;
+        }
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex]));
+
+        // Determine the expected alignment masks.
+        // E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011
+        // We can then and the returned address with that and we should have 0.
+        cl_ulong alignments[6];
+        alignments[0] = get_explicit_type_size(vecType[typeIndex])-1;
+        alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1;
+        alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
+        alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
+        alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1;
+        alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1;
+
+        // Parameter kernel
+        if (address == kGlobal || address == kLocal || address == kConstant) {
+            log_info("\tTesting parameter kernel...\n");
+
+            if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) {
+                sprintf(kernel_code, parameter_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
+                );
+            }
+            else {
+                sprintf(kernel_code, parameter_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
+                );
+            }
+            //printf("Kernel is: \n%s\n", kernel_code);
+
+            // Create the kernel
+            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
+            test_error(error, "create_single_kernel_helper failed");
+
+            // Initialize the results
+            memset(results_data, 0, sizeof(cl_long)*5);
+            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueWriteBuffer failed");
+
+            // Set the arguments
+            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
+            test_error(error, "clSetKernelArg failed");
+            if (address != kLocal) {
+                error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16);
+                test_error(error, "clSetKernelArg failed");
+            } else {
+                error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL);
+                test_error(error, "clSetKernelArg failed");
+            }
+
+            // Enqueue the kernel
+            size_t global_size = 1;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            test_error(error, "clEnqueueNDRangeKernel failed");
+
+            // Read back the results
+            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueReadBuffer failed");
+
+            // Verify the results
+            if (gHasLong) {
+                for (int i = 0; i < 6; i++) {
+                    if ((results_data[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
+                    }
+                }
+            }
+            // Verify the results on devices that do not support longs
+            else {
+                cl_uint *results_data_no_long = (cl_uint *)results_data;
+
+                for (int i = 0; i < 6; i++) {
+                    if ((results_data_no_long[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    }
+                }
+            }
+
+            clReleaseKernel(kernel);
+            clReleaseProgram(program);
+        }
+
+
+
+
+        // Local kernel
+        if (address == kLocal || address == kPrivate) {
+            log_info("\tTesting local kernel...\n");
+            sprintf(kernel_code, local_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
+                    );
+            //printf("Kernel is: \n%s\n", kernel_code);
+
+            // Create the kernel
+            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
+            test_error(error, "create_single_kernel_helper failed");
+
+            // Initialize the results
+            memset(results_data, 0, sizeof(cl_long)*5);
+            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueWriteBuffer failed");
+
+            // Set the arguments
+            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
+            test_error(error, "clSetKernelArg failed");
+
+            // Enqueue the kernel
+            size_t global_size = 1;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            test_error(error, "clEnqueueNDRangeKernel failed");
+
+            // Read back the results
+            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueReadBuffer failed");
+
+            // Verify the results
+            if (gHasLong) {
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
+                    }
+                }
+            }
+            // Verify the results on devices that do not support longs
+            else {
+                cl_uint *results_data_no_long = (cl_uint *)results_data;
+
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data_no_long[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    }
+                }
+            }
+            clReleaseKernel(kernel);
+            clReleaseProgram(program);
+        }
+
+
+
+        // Constant kernel
+        if (address == kConstant) {
+            log_info("\tTesting constant kernel...\n");
+            sprintf(kernel_code, constant_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex])
+                    );
+            //printf("Kernel is: \n%s\n", kernel_code);
+
+            // Create the kernel
+            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
+            test_error(error, "create_single_kernel_helper failed");
+
+            // Initialize the results
+            memset(results_data, 0, sizeof(cl_long)*5);
+            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueWriteBuffer failed");
+
+            // Set the arguments
+            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
+            test_error(error, "clSetKernelArg failed");
+
+            // Enqueue the kernel
+            size_t global_size = 1;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            test_error(error, "clEnqueueNDRangeKernel failed");
+
+            // Read back the results
+            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueReadBuffer failed");
+
+            // Verify the results
+            if (gHasLong) {
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
+                    }
+                }
+            }
+            // Verify the results on devices that do not support longs
+            else {
+                cl_uint *results_data_no_long = (cl_uint *)results_data;
+
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data_no_long[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    }
+                }
+            }
+            clReleaseKernel(kernel);
+            clReleaseProgram(program);
+        }
+    }
+
+    clReleaseMemObject(results);
+    clReleaseMemObject(mem0);
+    clReleaseMemObject(mem2);
+    clReleaseMemObject(mem3);
+    clReleaseMemObject(mem4);
+    clReleaseMemObject(mem8);
+    clReleaseMemObject(mem16);
+    free( kernel_code );
+    free( results_data );
+
+    if (total_errors != 0)
+        return -1;
+    return 0;
+
+}
+
+
+int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kLocal );
+}
+
+int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kGlobal );
+}
+
+int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances
+    // are unable to meaningfully take (or more precisely use) the address of constant data by virtue
+    // of limitations in their ISA design. This feature was not tested in 1.0, so they were declared
+    // conformant by Khronos. The failure is however caught here.
+    //
+    // Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction
+    // of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1
+    // devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform.
+    // To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement
+    // of no new tests for 1.0 devices.  So certain allowances have to be made in intractable cases
+    // such as this one.
+    //
+    // There is some precedent. Similar allowances are made for other 1.0 hardware features such as
+    // local memory size.  The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1.
+
+    // Detect 1.0 devices
+    // Get CL_DEVICE_VERSION size
+    size_t string_size = 0;
+    int err;
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) )
+    {
+        log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" );
+        return -1;
+    }
+
+    //Allocate storage to hold the version string
+    char *version_string = (char*) malloc(string_size);
+    if( NULL == version_string )
+    {
+        log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" );
+        return -1;
+    }
+
+    // Get CL_DEVICE_VERSION string
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) )
+    {
+        log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" );
+        return -1;
+    }
+
+    // easy out for 1.0 devices
+    const char *string_1_0 = "OpenCL 1.0 ";
+    if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) )
+    {
+        log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string );
+        free(version_string);
+        return 0;
+    }
+    log_info( "Device version string: \"%s\"\n", version_string );
+    free(version_string);
+
+    // Everyone else is to be ground mercilessly under the wheels of progress
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kConstant );
+}
+
+int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kPrivate );
+}
+
+
diff --git a/test_conformance/basic/test_local.c b/test_conformance/basic/test_local.c
new file mode 100644
index 00000000..54345ebd
--- /dev/null
+++ b/test_conformance/basic/test_local.c
@@ -0,0 +1,368 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *barrier_with_localmem_kernel_code[] = {
+"__kernel void compute_sum_with_localmem(__global int *a, int n, __local int *tmp_sum, __global int *sum)\n"
+"{\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"\n"
+"    if( lsize == 1 )\n"
+"    {\n"
+"       if( tid == 0 )\n"
+"           *sum = tmp_sum[0];\n"
+"       return;\n"
+"    }\n"
+"\n"
+"    do\n"
+"    {\n"
+"       barrier(CLK_LOCAL_MEM_FENCE);\n"
+"       if (tid < lsize/2)\n"
+"       {\n"
+"           int sum = tmp_sum[tid];\n"
+"           if( (lsize & 1) && tid == 0 )\n"
+"               sum += tmp_sum[tid + lsize - 1];\n"
+"           tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
+"       }\n"
+"       lsize = lsize/2; \n"
+"    }while( lsize );\n"
+"\n"
+"    if( tid == 0 )\n"
+"       *sum = tmp_sum[0];\n"
+"}\n",
+"__kernel void compute_sum_with_localmem(__global int *a, int n, __global int *sum)\n"
+"{\n"
+"     __local int tmp_sum[%d];\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"\n"
+"    if( lsize == 1 )\n"
+"    {\n"
+"       if( tid == 0 )\n"
+"           *sum = tmp_sum[0];\n"
+"       return;\n"
+"    }\n"
+"\n"
+"    do\n"
+"    {\n"
+"       barrier(CLK_LOCAL_MEM_FENCE);\n"
+"       if (tid < lsize/2)\n"
+"       {\n"
+"           int sum = tmp_sum[tid];\n"
+"           if( (lsize & 1) && tid == 0 )\n"
+"               sum += tmp_sum[tid + lsize - 1];\n"
+"           tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
+"       }\n"
+"       lsize = lsize/2; \n"
+"    }while( lsize );\n"
+"\n"
+"    if( tid == 0 )\n"
+"       *sum = tmp_sum[0];\n"
+"}\n"
+};
+
+static int
+verify_sum(int *inptr, int *outptr, int n)
+{
+    int            r = 0;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r += inptr[i];
+    }
+
+    if (r != outptr[0])
+    {
+        log_error("LOCAL test failed: *%d vs %d\n", r, outptr[0] );
+        return -1;
+    }
+
+    log_info("LOCAL test passed\n");
+    return 0;
+}
+
+int test_local_arg_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+
+    cl_int *input_ptr, *output_ptr;
+    size_t global_threads[1], local_threads[1];
+    size_t wgsize, kwgsize;
+    size_t max_local_workgroup_size[3];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
+    if (err) {
+        log_error("clGetDeviceInfo failed, %d\n\n", err);
+        return -1;
+    }
+    wgsize/=2;
+    if (wgsize < 1)
+        wgsize = 1;
+
+    size_t in_length = sizeof(cl_int) * num_elements;
+    size_t out_length = sizeof(cl_int) * wgsize;
+
+    input_ptr = (cl_int *)malloc(in_length);
+    output_ptr = (cl_int *)malloc(out_length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_with_localmem_kernel_code[0], "compute_sum_with_localmem" );
+  if (err)
+    return -1;
+
+  err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
+  test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+
+  err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+  test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+  // Pick the minimum of the device and the kernel
+  if (kwgsize > max_local_workgroup_size[0])
+    kwgsize = max_local_workgroup_size[0];
+
+  //    err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
+  err  = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+  err |= clSetKernelArg(kernel, 2, wgsize * sizeof(cl_int), NULL);
+  err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    global_threads[0] = wgsize;
+    local_threads[0] = wgsize;
+
+  // Adjust the local thread size to fit and be a nice multiple.
+  if (kwgsize < wgsize) {
+    log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
+        local_threads[0] = kwgsize;
+  }
+  while (global_threads[0] % local_threads[0] != 0)
+    local_threads[0]--;
+
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_sum(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+int test_local_kernel_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+
+    cl_int *input_ptr, *output_ptr;
+    size_t global_threads[1], local_threads[1];
+    size_t wgsize, kwgsize;
+    int err, i;
+    char *program_source = (char*)malloc(sizeof(char)*2048);
+    MTdata d = init_genrand( gRandomSeed );
+    size_t max_local_workgroup_size[3];
+    memset(program_source, 0, 2048);
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
+    if (err) {
+        log_error("clGetDeviceInfo failed, %d\n\n", err);
+        return -1;
+    }
+    wgsize/=2;
+    if (wgsize < 1)
+        wgsize = 1;
+
+    size_t in_length = sizeof(cl_int) * num_elements;
+    size_t out_length = sizeof(cl_int) * wgsize;
+
+    input_ptr = (cl_int *)malloc(in_length);
+    output_ptr = (cl_int *)malloc(out_length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_int) genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    // Validate that created kernel doesn't violate local memory size allowed by the device
+    cl_ulong localMemSize = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clGetDeviceInfo failed\n");
+        return -1;
+    }
+    if ( wgsize > (localMemSize / (sizeof(cl_int)*sizeof(cl_int))) )
+    {
+        wgsize = localMemSize / (sizeof(cl_int)*sizeof(cl_int));
+    }
+
+    sprintf(program_source, barrier_with_localmem_kernel_code[1], (int)(wgsize * sizeof(cl_int)));
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, (const char**)&program_source, "compute_sum_with_localmem" );
+    free(program_source);
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
+    test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (kwgsize > max_local_workgroup_size[0])
+        kwgsize = max_local_workgroup_size[0];
+
+    //    err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
+    err  = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    global_threads[0] = wgsize;
+    local_threads[0] = wgsize;
+
+  // Adjust the local thread size to fit and be a nice multiple.
+  if (kwgsize < wgsize) {
+    log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
+        local_threads[0] = kwgsize;
+  }
+  while (global_threads[0] % local_threads[0] != 0)
+    local_threads[0]--;
+
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_sum(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/basic/test_local_kernel_scope.cpp b/test_conformance/basic/test_local_kernel_scope.cpp
new file mode 100644
index 00000000..d958dc64
--- /dev/null
+++ b/test_conformance/basic/test_local_kernel_scope.cpp
@@ -0,0 +1,139 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+#define MAX_LOCAL_STORAGE_SIZE  256
+#define MAX_LOCAL_STORAGE_SIZE_STRING "256"
+
+const char *kernelSource[] = {
+    "__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n"
+    "{\n"
+    "   __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n"
+    "   unsigned int theValue = input[ get_global_id( 0 ) ];\n"
+    "\n"
+    "   // If we just write linearly, there's no verification that the items in a group share local data\n"
+    "   // So we write reverse-linearly, which requires items to read the local data written by at least one\n"
+    "   // different item\n"
+    "   localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n"
+    "\n"
+    "   // The barrier ensures that all local items have written to the local storage\n"
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "   // Now we loop back through the local storage and look for the max value. We only do this if\n"
+    "   // we're the first item in a group\n"
+    "   unsigned int max = 0;\n"
+    "   if( get_local_id( 0 ) == 0 )\n"
+    "   {\n"
+    "       for( size_t i = 0; i < get_local_size( 0 ); i++ )\n"
+    "       {\n"
+    "           if( localStorage[ i ] > max )\n"
+    "               max = localStorage[ i ];\n"
+    "       }\n"
+    "       outMaxes[ get_group_id( 0 ) ] = max;\n"
+    "   }\n"
+    "}\n"
+};
+
+int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    MTdata randSeed = init_genrand( gRandomSeed );
+
+    // Create a test kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" );
+    test_error( error, "Unable to create test kernel" );
+
+
+    // Determine an appropriate test size
+    size_t workGroupSize;
+    error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL );
+    test_error( error, "Unable to obtain kernel work group size" );
+
+    // Make sure the work group size doesn't overrun our local storage size in the kernel
+    while( workGroupSize > MAX_LOCAL_STORAGE_SIZE )
+        workGroupSize >>= 1;
+
+    size_t testSize = workGroupSize;
+    while( testSize < 1024 )
+        testSize += workGroupSize;
+    size_t numGroups = testSize / workGroupSize;
+    log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize );
+
+    // Create two buffers for operation
+    cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) );
+    generate_random_data( kUInt, testSize, randSeed, inputData );
+    free_mtdata( randSeed );
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error );
+    test_error( error, "Unable to create input buffer" );
+
+    cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+
+    // Set up the kernel args and run
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel arg" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel arg" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL );
+    test_error( error, "Unable to enqueue kernel" );
+
+
+    // Read results and verify
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL );
+    test_error( error, "Unable to read output data" );
+
+    // MingW compiler seems to have a bug that otimizes the code below incorrectly.
+    // adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler.
+    for( volatile size_t i = 0; i < numGroups; i++ )
+    {
+        // Determine the max in our case
+        cl_uint localMax = 0;
+        for( volatile size_t j = 0; j < workGroupSize; j++ )
+        {
+            if( inputData[ i * workGroupSize + j ] > localMax )
+                localMax = inputData[ i * workGroupSize + j ];
+        }
+
+        if( outputData[ i ] != localMax )
+        {
+            log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i );
+            free(inputData);
+            free(outputData);
+            return -1;
+        }
+    }
+
+    free(inputData);
+    free(outputData);
+    return 0;
+}
+
+
diff --git a/test_conformance/basic/test_local_linear_id.c b/test_conformance/basic/test_local_linear_id.c
new file mode 100644
index 00000000..52e4cb00
--- /dev/null
+++ b/test_conformance/basic/test_local_linear_id.c
@@ -0,0 +1,122 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+#include "procs.h"
+
+static const char *local_linear_id_1d_code =
+"__kernel void test_local_linear_id_1d(global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int linear_id = get_local_id(0);\n"
+"    int result = (linear_id == (int)get_local_linear_id()) ? 0x1 : 0x0;\n"
+"    dst[tid] = result;\n"
+"}\n";
+
+static const char *local_linear_id_2d_code =
+"__kernel void test_local_linear_id_2d(global int *dst)\n"
+"{\n"
+"    int  tid_x = get_global_id(0);\n"
+"    int  tid_y = get_global_id(1);\n"
+"\n"
+"    int linear_id = get_local_id(1) * get_local_size(0) + get_local_id(0);\n"
+"    int result = (linear_id == (int)get_local_linear_id()) ? 0x1 : 0x0;\n"
+"    dst[tid_y * get_global_size(0) + tid_x] = result;\n"
+"}\n";
+
+
+static int
+verify_local_linear_id(int *result, int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        if (result[i] == 0)
+        {
+            log_error("get_local_linear_id failed\n");
+            return -1;
+        }
+    }
+    log_info("get_local_linear_id passed\n");
+    return 0;
+}
+
+
+int
+test_local_linear_id(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+      cl_mem streams;
+      cl_program program[2];
+      cl_kernel kernel[2];
+
+    int *output_ptr;
+      size_t threads[2];
+      int err;
+      num_elements = (int)sqrt((float)num_elements);
+      int length = num_elements * num_elements;
+
+      output_ptr   = (cl_int*)malloc(sizeof(int) * length);
+
+    streams = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length*sizeof(int), NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    err = create_single_kernel_helper_with_build_options(context, &program[0], &kernel[0], 1, &local_linear_id_1d_code, "test_local_linear_id_1d", "-cl-std=CL2.0");
+    test_error( err, "create_single_kernel_helper failed");
+    err = create_single_kernel_helper_with_build_options(context, &program[1], &kernel[1], 1, &local_linear_id_2d_code, "test_local_linear_id_2d", "-cl-std=CL2.0");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
+    test_error( err, "clSetKernelArgs failed.");
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
+    test_error( err, "clSetKernelArgs failed.");
+
+    threads[0] = (size_t)num_elements;
+    threads[1] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, length*sizeof(int), output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    err = verify_local_linear_id(output_ptr, length);
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, num_elements*sizeof(int), output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    err = verify_local_linear_id(output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program[0]);
+    clReleaseProgram(program[1]);
+    free(output_ptr);
+
+    return err;
+}
diff --git a/test_conformance/basic/test_loop.c b/test_conformance/basic/test_loop.c
new file mode 100644
index 00000000..250cce2a
--- /dev/null
+++ b/test_conformance/basic/test_loop.c
@@ -0,0 +1,185 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *loop_kernel_code =
+"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    int  n = get_global_size(0);\n"
+"    int  i, j;\n"
+"\n"
+"    dst[tid] = 0;\n"
+"    for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n"
+"    {\n"
+"        if (j >= n)\n"
+"            j = 0;\n"
+"        dst[tid] += src[j];\n"
+"    }\n"
+"\n"
+"}\n";
+
+
+int
+verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
+{
+    int     r, i, j, k;
+
+    for (i=0; i<n; i++)
+    {
+        r = 0;
+        for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++)
+        {
+            if (k >= n)
+                k = 0;
+            r += inptr[k];
+        }
+
+        if (r != outptr[i])
+        {
+            log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r);
+            return -1;
+        }
+    }
+
+    log_info("LOOP test passed\n");
+    return 0;
+}
+
+int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[1];
+    int err, i;
+
+    size_t length = sizeof(cl_int) * num_elements;
+    input_ptr  = (cl_int*)malloc(length);
+    loop_indx  = (cl_int*)malloc(length);
+    loop_cnt   = (cl_int*)malloc(length);
+    output_ptr = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        input_ptr[i] = (int)genrand_int32(d);
+        loop_indx[i] = (int)get_random_float(0, num_elements-1, d);
+        loop_cnt[i] = (int)get_random_float(0, num_elements/32, d);
+    }
+    free_mtdata(d); d = NULL;
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" );
+  if (err)
+    return -1;
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)num_elements;
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(loop_indx);
+    free(loop_cnt);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_multireadimagemultifmt.c b/test_conformance/basic/test_multireadimagemultifmt.c
new file mode 100644
index 00000000..6272c624
--- /dev/null
+++ b/test_conformance/basic/test_multireadimagemultifmt.c
@@ -0,0 +1,236 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *multireadimage_kernel_code =
+"__kernel void test_multireadimage(read_only image2d_t img0, read_only image2d_t img1, \n"
+"                                  read_only image2d_t img2, __global float4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int2           tid = (int2)(tid_x, tid_y);\n"
+"    int            indx = tid_y * get_image_width(img1) + tid_x;\n"
+"    float4         sum;\n"
+"\n"
+"    sum = read_imagef(img0, sampler, tid);\n"
+"    sum += read_imagef(img1, sampler, tid);\n"
+"    sum += read_imagef(img2, sampler, tid);\n"
+"\n"
+"    dst[indx] = sum;\n"
+"}\n";
+
+#define MAX_ERR    1e-7f
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static unsigned short *
+generate_16bit_image(int w, int h, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static float *
+generate_float_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * (int)sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+
+static int
+verify_multireadimage(void *image[], float *outptr, int w, int h)
+{
+  int     i;
+  float   sum;
+  float ulp, max_ulp = 0.0f;
+
+  // ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
+  float max_ulp_allowed = (float)(3*1.5+2*0.5);
+
+  for (i=0; i<w*h*4; i++)
+  {
+    sum = (float)((unsigned char *)image[0])[i] / 255.0f;
+    sum += (float)((unsigned short *)image[1])[i] / 65535.0f;
+    sum += (float)((float *)image[2])[i];
+    ulp = Ulp_Error(outptr[i], sum);
+    if (ulp > max_ulp)
+      max_ulp = ulp;
+  }
+
+  if (max_ulp > max_ulp_allowed) {
+    log_error("READ_MULTIREADIMAGE_MULTIFORMAT test failed.  Max ulp error = %g\n", max_ulp);
+        return -1;
+  }
+
+  log_info("READ_MULTIREADIMAGE_MULTIFORMAT test passed.  Max ulp error = %g\n", max_ulp);
+  return 0;
+}
+
+
+int
+test_multireadimagemultifmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[4];
+    cl_image_format    img_format;
+    void            *input_ptr[3], *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                i, err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr[0] = (void *)generate_8888_image(img_width, img_height, d);
+    input_ptr[1] = (void *)generate_16bit_image(img_width, img_height, d);
+    input_ptr[2] = (void *)generate_float_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (void *)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float)*4 * img_width*img_height, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<3; i++)
+    {
+      size_t origin[3] = {0,0,0}, region[3]={img_width, img_height,1};
+      err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage failed\n");
+            return -1;
+        }
+    }
+
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
+    if (err)
+        return -1;
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+    test_error(err, "clCreateSamplerWithProperties failed");
+
+    for (i=0; i<4; i++)
+      err |= clSetKernelArg(kernel, i,sizeof streams[i], &streams[i]);
+    err |= clSetKernelArg(kernel, 4, sizeof sampler, &sampler);
+
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)img_width;
+    threads[1] = (size_t)img_height;
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, sizeof(float)*4*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_multireadimage(input_ptr, (float*)output_ptr, img_width, img_height);
+
+    // cleanup
+    clReleaseSampler(sampler);
+    for (i=0; i<4; i++)
+        clReleaseMemObject(streams[i]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    for (i=0; i<3; i++)
+        free(input_ptr[i]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_multireadimageonefmt.c b/test_conformance/basic/test_multireadimageonefmt.c
new file mode 100644
index 00000000..40fd6bf6
--- /dev/null
+++ b/test_conformance/basic/test_multireadimageonefmt.c
@@ -0,0 +1,204 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *multireadimage_kernel_code =
+"__kernel void test_multireadimage(int n, int m, sampler_t sampler, \n"
+"                                  read_only image2d_t img0, read_only image2d_t img1, \n"
+"                                  read_only image2d_t img2, read_only image2d_t img3, \n"
+"                                  read_only image2d_t img4, read_only image2d_t img5, \n"
+"                                  read_only image2d_t img6, __global float4 *dst)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int2   tid = (int2)(tid_x, tid_y);\n"
+"    int    indx = tid_y * get_image_width(img5) + tid_x;\n"
+"    float4 sum;\n"
+"\n"
+"    sum = read_imagef(img0, sampler, tid);\n"
+"    sum += read_imagef(img1, sampler, tid);\n"
+"    sum += read_imagef(img2, sampler, tid);\n"
+"    sum += read_imagef(img3, sampler, tid);\n"
+"    sum += read_imagef(img4, sampler, tid);\n"
+"    sum += read_imagef(img5, sampler, tid);\n"
+"    sum += read_imagef(img6, sampler, tid);\n"
+"\n"
+"    dst[indx] = sum;\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_multireadimage(void *image[], int num_images, float *outptr, int w, int h)
+{
+  int     i, j;
+  float   sum;
+  float ulp, max_ulp = 0.0f;
+
+  // ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
+  float max_ulp_allowed = (float)(num_images*1.5+0.5*(num_images-1));
+
+  for (i=0; i<w*h*4; i++)
+  {
+    sum = 0.0f;
+    for (j=0; j<num_images; j++)
+    {
+      sum += ((float)((unsigned char *)image[j])[i] / 255.0f);
+    }
+    ulp = Ulp_Error(outptr[i], sum);
+    if (ulp > max_ulp)
+      max_ulp = ulp;
+  }
+
+    if (max_ulp > max_ulp_allowed)
+    {
+        log_error("READ_MULTIREADIMAGE_RGBA8888 test failed.  Max ULP err = %g\n", max_ulp);
+        return -1;
+    }
+  log_info("READ_MULTIREADIMAGE_RGBA8888 test passed.  Max ULP err = %g\n", max_ulp);
+  return 0;
+}
+
+
+int test_multireadimageonefmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[8];
+    cl_image_format    img_format;
+    void *input_ptr[7], *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(float);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    output_ptr = malloc(length);
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<7; i++) {
+        input_ptr[i] = (void *)generate_8888_image(img_width, img_height, d);
+
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[i] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!streams[i])
+        {
+          log_error("create_image_2d failed\n");
+          return -1;
+        }
+
+        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+          log_error("clWriteImage failed\n");
+          return -1;
+        }
+    }
+    free_mtdata(d); d = NULL;
+
+
+  streams[7] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[7])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
+    if (err)
+        return -1;
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+  test_error(err, "clCreateSamplerWithProperties failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof i, &i);
+  err |= clSetKernelArg(kernel, 1, sizeof err, &err);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+  for (i=0; i<8; i++)
+    err |= clSetKernelArg(kernel, 3+i, sizeof streams[i], &streams[i]);
+
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+
+  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clExecuteKernel failed\n");
+    return -1;
+  }
+  err = clEnqueueReadBuffer(queue, streams[7], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_multireadimage(input_ptr, 7, (float *)output_ptr, img_width, img_height);
+
+    // cleanup
+  clReleaseSampler(sampler);
+  for (i=0; i<8; i++)
+    clReleaseMemObject(streams[i]);
+  clReleaseKernel(kernel);
+  clReleaseProgram(program);
+  for (i=0; i<7; i++)
+    free(input_ptr[i]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/basic/test_numeric_constants.cpp b/test_conformance/basic/test_numeric_constants.cpp
new file mode 100644
index 00000000..5aeca0ed
--- /dev/null
+++ b/test_conformance/basic/test_numeric_constants.cpp
@@ -0,0 +1,710 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+
+#define TEST_VALUE_POSITIVE( string_name, name, value ) \
+{ \
+if (name < value) { \
+log_error("FAILED: " string_name ": " #name " < " #value "\n"); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " >= " #value "\n"); \
+} \
+}
+
+#define TEST_VALUE_NEGATIVE( string_name, name, value ) \
+{ \
+if (name > value) { \
+log_error("FAILED: " string_name ": " #name " > " #value "\n"); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " <= " #value "\n"); \
+} \
+}
+
+#define TEST_VALUE_EQUAL_LITERAL( string_name, name, value ) \
+{ \
+if (name != value) { \
+log_error("FAILED: " string_name ": " #name " != " #value "\n"); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " = " #value "\n"); \
+} \
+}
+
+#define TEST_VALUE_EQUAL( string_name, name, value ) \
+{ \
+if (name != value) { \
+log_error("FAILED: " string_name ": " #name " != %a   (%17.21g)\n", value, value); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " = %a  (%17.21g)\n", value, value); \
+} \
+}
+
+int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int errors = 0;
+    TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_BIT",     CL_CHAR_BIT,    8)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MAX",    CL_SCHAR_MAX,   127)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MIN",    CL_SCHAR_MIN,   (-127-1))
+    TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MAX",     CL_CHAR_MAX,    CL_SCHAR_MAX)
+    TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MIN",     CL_CHAR_MIN,    CL_SCHAR_MIN)
+    TEST_VALUE_EQUAL_LITERAL( "CL_UCHAR_MAX",    CL_UCHAR_MAX,   255)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MAX",     CL_SHRT_MAX,    32767)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MIN",     CL_SHRT_MIN,    (-32767-1))
+    TEST_VALUE_EQUAL_LITERAL( "CL_USHRT_MAX",    CL_USHRT_MAX,   65535)
+    TEST_VALUE_EQUAL_LITERAL( "CL_INT_MAX",      CL_INT_MAX,     2147483647)
+    TEST_VALUE_EQUAL_LITERAL( "CL_INT_MIN",      CL_INT_MIN,     (-2147483647-1))
+    TEST_VALUE_EQUAL_LITERAL( "CL_UINT_MAX",     CL_UINT_MAX,    0xffffffffU)
+    TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MAX",     CL_LONG_MAX,    ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
+    TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MIN",     CL_LONG_MIN,    ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
+    TEST_VALUE_EQUAL_LITERAL( "CL_ULONG_MAX",    CL_ULONG_MAX,   ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
+
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_DIG",         CL_FLT_DIG,         6)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MANT_DIG",    CL_FLT_MANT_DIG,    24)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_10_EXP",  CL_FLT_MAX_10_EXP,  +38)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_EXP",     CL_FLT_MAX_EXP,     +128)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_10_EXP",  CL_FLT_MIN_10_EXP,  -37)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_EXP",     CL_FLT_MIN_EXP,     -125)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_RADIX",       CL_FLT_RADIX,       2)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX",         CL_FLT_MAX,         MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103))
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN",         CL_FLT_MIN,         MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_EPSILON",     CL_FLT_EPSILON,     MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
+
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_DIG",         CL_DBL_DIG,         15)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MANT_DIG",    CL_DBL_MANT_DIG,    53)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_10_EXP",  CL_DBL_MAX_10_EXP,  +308)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_EXP",     CL_DBL_MAX_EXP,     +1024)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_10_EXP",  CL_DBL_MIN_10_EXP,  -307)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_EXP",     CL_DBL_MIN_EXP,     -1021)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_RADIX",       CL_DBL_RADIX,       2)
+    TEST_VALUE_EQUAL( "CL_DBL_MAX",         CL_DBL_MAX,         MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
+    TEST_VALUE_EQUAL( "CL_DBL_MIN",         CL_DBL_MIN,         MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
+    TEST_VALUE_EQUAL( "CL_DBL_EPSILON",     CL_DBL_EPSILON,     MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
+
+    TEST_VALUE_EQUAL( "CL_M_E",          CL_M_E,         MAKE_HEX_DOUBLE(0x1.5bf0a8b145769p+1, 0x15bf0a8b145769LL, -51) );
+    TEST_VALUE_EQUAL( "CL_M_LOG2E",      CL_M_LOG2E,     MAKE_HEX_DOUBLE(0x1.71547652b82fep+0, 0x171547652b82feLL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_LOG10E",     CL_M_LOG10E,    MAKE_HEX_DOUBLE(0x1.bcb7b1526e50ep-2, 0x1bcb7b1526e50eLL, -54) );
+    TEST_VALUE_EQUAL( "CL_M_LN2",        CL_M_LN2,       MAKE_HEX_DOUBLE(0x1.62e42fefa39efp-1, 0x162e42fefa39efLL, -53) );
+    TEST_VALUE_EQUAL( "CL_M_LN10",       CL_M_LN10,      MAKE_HEX_DOUBLE(0x1.26bb1bbb55516p+1, 0x126bb1bbb55516LL, -51) );
+    TEST_VALUE_EQUAL( "CL_M_PI",         CL_M_PI,        MAKE_HEX_DOUBLE(0x1.921fb54442d18p+1, 0x1921fb54442d18LL, -51) );
+    TEST_VALUE_EQUAL( "CL_M_PI_2",       CL_M_PI_2,      MAKE_HEX_DOUBLE(0x1.921fb54442d18p+0, 0x1921fb54442d18LL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_PI_4",       CL_M_PI_4,      MAKE_HEX_DOUBLE(0x1.921fb54442d18p-1, 0x1921fb54442d18LL, -53) );
+    TEST_VALUE_EQUAL( "CL_M_1_PI",       CL_M_1_PI,      MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-2, 0x145f306dc9c883LL, -54) );
+    TEST_VALUE_EQUAL( "CL_M_2_PI",       CL_M_2_PI,      MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-1, 0x145f306dc9c883LL, -53) );
+    TEST_VALUE_EQUAL( "CL_M_2_SQRTPI",   CL_M_2_SQRTPI,  MAKE_HEX_DOUBLE(0x1.20dd750429b6dp+0, 0x120dd750429b6dLL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_SQRT2",      CL_M_SQRT2,     MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp+0, 0x16a09e667f3bcdLL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_SQRT1_2",    CL_M_SQRT1_2,   MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp-1, 0x16a09e667f3bcdLL, -53) );
+
+    TEST_VALUE_EQUAL( "CL_M_E_F",        CL_M_E_F,       MAKE_HEX_FLOAT(0x1.5bf0a8p+1f, 0x15bf0a8L, -23));
+    TEST_VALUE_EQUAL( "CL_M_LOG2E_F",    CL_M_LOG2E_F,   MAKE_HEX_FLOAT(0x1.715476p+0f, 0x1715476L, -24));
+    TEST_VALUE_EQUAL( "CL_M_LOG10E_F",   CL_M_LOG10E_F,  MAKE_HEX_FLOAT(0x1.bcb7b2p-2f, 0x1bcb7b2L, -26));
+    TEST_VALUE_EQUAL( "CL_M_LN2_F",      CL_M_LN2_F,     MAKE_HEX_FLOAT(0x1.62e43p-1f, 0x162e43L, -21) );
+    TEST_VALUE_EQUAL( "CL_M_LN10_F",     CL_M_LN10_F,    MAKE_HEX_FLOAT(0x1.26bb1cp+1f, 0x126bb1cL, -23));
+    TEST_VALUE_EQUAL( "CL_M_PI_F",       CL_M_PI_F,      MAKE_HEX_FLOAT(0x1.921fb6p+1f, 0x1921fb6L, -23));
+    TEST_VALUE_EQUAL( "CL_M_PI_2_F",     CL_M_PI_2_F,    MAKE_HEX_FLOAT(0x1.921fb6p+0f, 0x1921fb6L, -24));
+    TEST_VALUE_EQUAL( "CL_M_PI_4_F",     CL_M_PI_4_F,    MAKE_HEX_FLOAT(0x1.921fb6p-1f, 0x1921fb6L, -25));
+    TEST_VALUE_EQUAL( "CL_M_1_PI_F",     CL_M_1_PI_F,    MAKE_HEX_FLOAT(0x1.45f306p-2f, 0x145f306L, -26));
+    TEST_VALUE_EQUAL( "CL_M_2_PI_F",     CL_M_2_PI_F,    MAKE_HEX_FLOAT(0x1.45f306p-1f, 0x145f306L, -25));
+    TEST_VALUE_EQUAL( "CL_M_2_SQRTPI_F", CL_M_2_SQRTPI_F,MAKE_HEX_FLOAT(0x1.20dd76p+0f, 0x120dd76L, -24));
+    TEST_VALUE_EQUAL( "CL_M_SQRT2_F",    CL_M_SQRT2_F,   MAKE_HEX_FLOAT(0x1.6a09e6p+0f, 0x16a09e6L, -24));
+    TEST_VALUE_EQUAL( "CL_M_SQRT1_2_F",  CL_M_SQRT1_2_F, MAKE_HEX_FLOAT(0x1.6a09e6p-1f, 0x16a09e6L, -25));
+
+    return errors;
+}
+
+
+const char *kernel_int_float[] = {
+  "__kernel void test( __global float *float_out, __global int *int_out, __global uint *uint_out) \n"
+  "{\n"
+  "  int_out[0] = CHAR_BIT;\n"
+  "  int_out[1] = SCHAR_MAX;\n"
+  "  int_out[2] = SCHAR_MIN;\n"
+  "  int_out[3] = CHAR_MAX;\n"
+  "  int_out[4] = CHAR_MIN;\n"
+  "  int_out[5] = UCHAR_MAX;\n"
+  "  int_out[6] = SHRT_MAX;\n"
+  "  int_out[7] = SHRT_MIN;\n"
+  "  int_out[8] = USHRT_MAX;\n"
+  "  int_out[9] = INT_MAX;\n"
+  "  int_out[10] = INT_MIN;\n"
+  "  uint_out[0] = UINT_MAX;\n"
+
+  "  int_out[11] = FLT_DIG;\n"
+  "  int_out[12] = FLT_MANT_DIG;\n"
+  "  int_out[13] = FLT_MAX_10_EXP;\n"
+  "  int_out[14] = FLT_MAX_EXP;\n"
+  "  int_out[15] = FLT_MIN_10_EXP;\n"
+  "  int_out[16] = FLT_MIN_EXP;\n"
+  "  int_out[17] = FLT_RADIX;\n"
+  "#ifdef __IMAGE_SUPPORT__\n"
+  "  int_out[18] = __IMAGE_SUPPORT__;\n"
+  "#else\n"
+  "  int_out[18] = 0xf00baa;\n"
+  "#endif\n"
+  "  float_out[0] = FLT_MAX;\n"
+  "  float_out[1] = FLT_MIN;\n"
+  "  float_out[2] = FLT_EPSILON;\n"
+  "  float_out[3] = M_E_F;\n"
+  "  float_out[4] = M_LOG2E_F;\n"
+  "  float_out[5] = M_LOG10E_F;\n"
+  "  float_out[6] = M_LN2_F;\n"
+  "  float_out[7] = M_LN10_F;\n"
+  "  float_out[8] = M_PI_F;\n"
+  "  float_out[9] = M_PI_2_F;\n"
+  "  float_out[10] = M_PI_4_F;\n"
+  "  float_out[11] = M_1_PI_F;\n"
+  "  float_out[12] = M_2_PI_F;\n"
+  "  float_out[13] = M_2_SQRTPI_F;\n"
+  "  float_out[14] = M_SQRT2_F;\n"
+  "  float_out[15] = M_SQRT1_2_F;\n"
+  "}\n"
+};
+
+const char *kernel_long[] = {
+  "__kernel void test(__global long *long_out, __global ulong *ulong_out) \n"
+  "{\n"
+  "  long_out[0] = LONG_MAX;\n"
+  "  long_out[1] = LONG_MIN;\n"
+  "  ulong_out[0] = ULONG_MAX;\n"
+  "}\n"
+};
+
+const char *kernel_double[] = {
+  "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+  "__kernel void test( __global double *double_out, __global long *long_out ) \n    "
+  "{\n"
+  "  long_out[0] = DBL_DIG;\n"
+  "  long_out[1] = DBL_MANT_DIG;\n"
+  "  long_out[2] = DBL_MAX_10_EXP;\n"
+  "  long_out[3] = DBL_MAX_EXP;\n"
+  "  long_out[4] = DBL_MIN_10_EXP;\n"
+  "  long_out[5] = DBL_MIN_EXP;\n"
+  "  long_out[6] = DBL_RADIX;\n"
+  "  double_out[0] = DBL_MAX;\n"
+  "  double_out[1] = DBL_MIN;\n"
+  "  double_out[2] = DBL_EPSILON;\n"
+  "  double_out[3] = M_E;\n"
+  "  double_out[4] = M_LOG2E;\n"
+  "  double_out[5] = M_LOG10E;\n"
+  "  double_out[6] = M_LN2;\n"
+  "  double_out[7] = M_LN10;\n"
+  "  double_out[8] = M_PI;\n"
+  "  double_out[9] = M_PI_2;\n"
+  "  double_out[10] = M_PI_4;\n"
+  "  double_out[11] = M_1_PI;\n"
+  "  double_out[12] = M_2_PI;\n"
+  "  double_out[13] = M_2_SQRTPI;\n"
+  "  double_out[14] = M_SQRT2;\n"
+  "  double_out[15] = M_SQRT1_2;\n"
+  "}\n"
+};
+
+
+int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error, errors = 0;
+    //    clProgramWrapper program;
+    //    clKernelWrapper kernel;
+    //    clMemWrapper    streams[3];
+    cl_program program;
+    cl_kernel kernel;
+    cl_mem    streams[3];
+
+    size_t    threads[] = {1,1,1};
+    cl_float float_out[16];
+    cl_int int_out[19];
+    cl_uint uint_out[1];
+    cl_long long_out[7];
+    cl_ulong ulong_out[1];
+    cl_double double_out[16];
+
+    /** INTs and FLOATs **/
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_int_float, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float_out), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(int_out), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(uint_out), NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(float_out), (void*)float_out, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(int_out), (void*)int_out, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(uint_out), (void*)uint_out, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    TEST_VALUE_EQUAL_LITERAL( "CHAR_BIT", int_out[0],         8)
+    TEST_VALUE_EQUAL_LITERAL( "SCHAR_MAX", int_out[1],        127)
+    TEST_VALUE_EQUAL_LITERAL( "SCHAR_MIN", int_out[2],        (-127-1))
+    TEST_VALUE_EQUAL_LITERAL( "CHAR_MAX", int_out[3],         CL_SCHAR_MAX)
+    TEST_VALUE_EQUAL_LITERAL( "CHAR_MIN", int_out[4],         CL_SCHAR_MIN)
+    TEST_VALUE_EQUAL_LITERAL( "UCHAR_MAX", int_out[5],        255)
+    TEST_VALUE_EQUAL_LITERAL( "SHRT_MAX", int_out[6],         32767)
+    TEST_VALUE_EQUAL_LITERAL( "SHRT_MIN",int_out[7],          (-32767-1))
+    TEST_VALUE_EQUAL_LITERAL( "USHRT_MAX", int_out[8],        65535)
+    TEST_VALUE_EQUAL_LITERAL( "INT_MAX", int_out[9],          2147483647)
+    TEST_VALUE_EQUAL_LITERAL( "INT_MIN", int_out[10],         (-2147483647-1))
+    TEST_VALUE_EQUAL_LITERAL( "UINT_MAX", uint_out[0],        0xffffffffU)
+
+    TEST_VALUE_EQUAL_LITERAL( "FLT_DIG", int_out[11],         6)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MANT_DIG", int_out[12],    24)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_10_EXP", int_out[13],  +38)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_EXP", int_out[14],     +128)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_10_EXP", int_out[15],  -37)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_EXP", int_out[16],     -125)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_RADIX", int_out[17],       2)
+    TEST_VALUE_EQUAL( "FLT_MAX", float_out[0],           MAKE_HEX_FLOAT(0x1.fffffep127f, 0x1fffffeL, 103))
+    TEST_VALUE_EQUAL( "FLT_MIN", float_out[1],           MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
+    TEST_VALUE_EQUAL( "FLT_EPSILON", float_out[2],       MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
+    TEST_VALUE_EQUAL( "M_E_F", float_out[3],             CL_M_E_F )
+    TEST_VALUE_EQUAL( "M_LOG2E_F", float_out[4],         CL_M_LOG2E_F )
+    TEST_VALUE_EQUAL( "M_LOG10E_F", float_out[5],        CL_M_LOG10E_F )
+    TEST_VALUE_EQUAL( "M_LN2_F", float_out[6],           CL_M_LN2_F )
+    TEST_VALUE_EQUAL( "M_LN10_F", float_out[7],          CL_M_LN10_F )
+    TEST_VALUE_EQUAL( "M_PI_F", float_out[8],            CL_M_PI_F )
+    TEST_VALUE_EQUAL( "M_PI_2_F", float_out[9],          CL_M_PI_2_F )
+    TEST_VALUE_EQUAL( "M_PI_4_F", float_out[10],         CL_M_PI_4_F )
+    TEST_VALUE_EQUAL( "M_1_PI_F", float_out[11],         CL_M_1_PI_F )
+    TEST_VALUE_EQUAL( "M_2_PI_F", float_out[12],         CL_M_2_PI_F )
+    TEST_VALUE_EQUAL( "M_2_SQRTPI_F", float_out[13],     CL_M_2_SQRTPI_F )
+    TEST_VALUE_EQUAL( "M_SQRT2_F", float_out[14],        CL_M_SQRT2_F )
+    TEST_VALUE_EQUAL( "M_SQRT1_2_F", float_out[15],      CL_M_SQRT1_2_F )
+
+    // We need to check these values against what we know is supported on the device
+    if( checkForImageSupport( deviceID ) == 0 )
+    { // has images
+        // If images are supported, the constant should have been defined to the value 1
+        if( int_out[18] == 0xf00baa )
+        {
+            log_error( "FAILURE: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
+            return -1;
+        }
+        else if( int_out[18] != 1 )
+        {
+            log_error( "FAILURE: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", int_out[18] );
+            return -1;
+        }
+    }
+    else
+    { // no images
+        // If images aren't supported, the constant should be undefined
+        if( int_out[18] != 0xf00baa )
+        {
+            log_error( "FAILURE: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", int_out[18] );
+            return -1;
+        }
+    }
+    log_info( "\t__IMAGE_SUPPORT__: %d\n", int_out[18]);
+
+    clReleaseMemObject(streams[0]); streams[0] = NULL;
+    clReleaseMemObject(streams[1]); streams[1] = NULL;
+    clReleaseMemObject(streams[2]); streams[2] = NULL;
+    clReleaseKernel(kernel); kernel = NULL;
+    clReleaseProgram(program); program = NULL;
+
+    /** LONGs **/
+
+    if(!gHasLong) {
+        log_info("Longs not supported; skipping long tests.\n");
+    }
+    else
+    {
+        // Create the kernel
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_long, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(ulong_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+
+        error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(ulong_out), &ulong_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        TEST_VALUE_EQUAL_LITERAL( "LONG_MAX", long_out[0],        ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
+        TEST_VALUE_EQUAL_LITERAL( "LONG_MIN", long_out[1],        ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
+        TEST_VALUE_EQUAL_LITERAL( "ULONG_MAX", ulong_out[0],       ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
+
+        clReleaseMemObject(streams[0]); streams[0] = NULL;
+        clReleaseMemObject(streams[1]); streams[1] = NULL;
+        clReleaseKernel(kernel); kernel = NULL;
+        clReleaseProgram(program); program = NULL;
+    }
+
+    /** DOUBLEs **/
+
+    if(!is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+    }
+    else
+    {
+        // Create the kernel
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_double, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(double_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+
+        error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(double_out), &double_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        TEST_VALUE_EQUAL_LITERAL( "DBL_DIG", long_out[0],          15)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MANT_DIG", long_out[1],     53)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_10_EXP", long_out[2],   +308)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_EXP", long_out[3],      +1024)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_10_EXP", long_out[4],   -307)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_EXP", long_out[5],      -1021)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_RADIX", long_out[6],        2)
+        TEST_VALUE_EQUAL( "DBL_MAX", double_out[0],         MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
+        TEST_VALUE_EQUAL( "DBL_MIN", double_out[1],         MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
+        TEST_VALUE_EQUAL( "DBL_EPSILON", double_out[2],     MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
+        //TEST_VALUE_EQUAL( "M_E", double_out[3], CL_M_E )
+        TEST_VALUE_EQUAL( "M_LOG2E", double_out[4],         CL_M_LOG2E )
+        TEST_VALUE_EQUAL( "M_LOG10E", double_out[5],        CL_M_LOG10E )
+        TEST_VALUE_EQUAL( "M_LN2", double_out[6],           CL_M_LN2 )
+        TEST_VALUE_EQUAL( "M_LN10", double_out[7],          CL_M_LN10 )
+        TEST_VALUE_EQUAL( "M_PI", double_out[8],            CL_M_PI )
+        TEST_VALUE_EQUAL( "M_PI_2", double_out[9],          CL_M_PI_2 )
+        TEST_VALUE_EQUAL( "M_PI_4", double_out[10],         CL_M_PI_4 )
+        TEST_VALUE_EQUAL( "M_1_PI", double_out[11],         CL_M_1_PI )
+        TEST_VALUE_EQUAL( "M_2_PI", double_out[12],         CL_M_2_PI )
+        TEST_VALUE_EQUAL( "M_2_SQRTPI", double_out[13],     CL_M_2_SQRTPI )
+        TEST_VALUE_EQUAL( "M_SQRT2", double_out[14],        CL_M_SQRT2 )
+        TEST_VALUE_EQUAL( "M_SQRT1_2", double_out[15],      CL_M_SQRT1_2 )
+
+        clReleaseMemObject(streams[0]); streams[0] = NULL;
+        clReleaseMemObject(streams[1]); streams[1] = NULL;
+        clReleaseKernel(kernel); kernel = NULL;
+        clReleaseProgram(program); program = NULL;
+    }
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed");
+
+    return errors;
+}
+
+
+const char *kernel_constant_limits[] = {
+    "__kernel void test( __global int *intOut, __global float *floatOut ) \n"
+    "{\n"
+    "  intOut[0] = isinf( MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[1] = isnormal( MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[2] = isnan( MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[3] = sizeof( MAXFLOAT );\n"
+    "  intOut[4] = ( MAXFLOAT == FLT_MAX ) ? 1 : 0;\n"
+    //    "  intOut[5] = ( MAXFLOAT == CL_FLT_MAX ) ? 1 : 0;\n"
+    "  intOut[6] = ( MAXFLOAT == MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[7] = ( MAXFLOAT == 0x1.fffffep127f ) ? 1 : 0;\n"
+    "  floatOut[0] = MAXFLOAT;\n"
+    "}\n"
+};
+
+const char *kernel_constant_extended_limits[] = {
+    "__kernel void test( __global int *intOut, __global float *floatOut ) \n"
+    "{\n"
+    "  intOut[0] = ( INFINITY == HUGE_VALF ) ? 1 : 0;\n"
+    "  intOut[1] = sizeof( INFINITY );\n"
+    "  intOut[2] = isinf( INFINITY ) ? 1 : 0;\n"
+    "  intOut[3] = isnormal( INFINITY ) ? 1 : 0;\n"
+    "  intOut[4] = isnan( INFINITY ) ? 1 : 0;\n"
+    "  intOut[5] = ( INFINITY > MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[6] = ( -INFINITY < -MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[7] = ( ( MAXFLOAT + MAXFLOAT ) == INFINITY ) ? 1 : 0;\n"
+    "  intOut[8] = ( nextafter( MAXFLOAT, INFINITY ) == INFINITY ) ? 1 : 0;\n"
+    "  intOut[9] = ( nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY ) ? 1 : 0;\n"
+    "  intOut[10] = ( INFINITY == INFINITY ) ? 1 : 0;\n"
+    "  intOut[11] = ( as_uint( INFINITY ) == 0x7f800000 ) ? 1 : 0;\n"
+    "  floatOut[0] = INFINITY;\n"
+    "\n"
+    "  intOut[12] = sizeof( HUGE_VALF );\n"
+    "  intOut[13] = ( HUGE_VALF == INFINITY ) ? 1 : 0;\n"
+    "  floatOut[1] = HUGE_VALF;\n"
+    "\n"
+    "  intOut[14] = ( NAN == NAN ) ? 1 : 0;\n"
+    "  intOut[15] = ( NAN != NAN ) ? 1 : 0;\n"
+    "  intOut[16] = isnan( NAN ) ? 1 : 0;\n"
+    "  intOut[17] = isinf( NAN ) ? 1 : 0;\n"
+    "  intOut[18] = isnormal( NAN ) ? 1 : 0;\n"
+    "  intOut[19] = ( ( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000 ) ? 1 : 0;\n"
+    "  intOut[20] = sizeof( NAN );\n"
+    "  floatOut[2] = NAN;\n"
+    "\n"
+    "  intOut[21] = isnan( INFINITY / INFINITY ) ? 1 : 0;\n"
+    "  intOut[22] = isnan( INFINITY - INFINITY ) ? 1 : 0;\n"
+    "  intOut[23] = isnan( 0.f / 0.f ) ? 1 : 0;\n"
+    "  intOut[24] = isnan( INFINITY * 0.f ) ? 1 : 0;\n"
+    "  intOut[25] = ( INFINITY == NAN ); \n"
+    "  intOut[26] = ( -INFINITY == NAN ); \n"
+    "  intOut[27] = ( INFINITY > NAN ); \n"
+    "  intOut[28] = ( -INFINITY < NAN ); \n"
+    "  intOut[29] = ( INFINITY != NAN ); \n"
+    "  intOut[30] = ( NAN > INFINITY ); \n"
+    "  intOut[31] = ( NAN < -INFINITY ); \n"
+
+    "}\n"
+};
+
+const char *kernel_constant_double_limits[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+    "__kernel void test( __global int *intOut, __global double *doubleOut ) \n"
+    "{\n"
+    "  intOut[0] = sizeof( HUGE_VAL );\n"
+    "  intOut[1] = ( HUGE_VAL == INFINITY ) ? 1 : 0;\n"
+    "  intOut[2] = isinf( HUGE_VAL ) ? 1 : 0;\n"
+    "  intOut[3] = isnormal( HUGE_VAL ) ? 1 : 0;\n"
+    "  intOut[4] = isnan( HUGE_VAL ) ? 1 : 0;\n"
+    "  intOut[5] = ( HUGE_VAL == HUGE_VALF ) ? 1 : 0;\n"
+    "  intOut[6] = ( as_ulong( HUGE_VAL ) == 0x7ff0000000000000UL ) ? 1 : 0;\n"
+    "  doubleOut[0] = HUGE_VAL;\n"
+    "}\n"
+};
+
+#define TEST_FLOAT_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Float constant failed requirement: %s (bitwise value is 0x%8.8x)\n", msg, *( (uint32_t *)&f ) ); return -1; }
+#define TEST_DOUBLE_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Double constant failed requirement: %s (bitwise value is 0x%16.16llx)\n", msg, *( (uint64_t *)&f ) ); return -1; }
+
+int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t              threads[] = {1,1,1};
+    clMemWrapper        intStream, floatStream, doubleStream;
+    cl_int              intOut[ 32 ];
+    cl_float            floatOut[ 3 ];
+    cl_double           doubleOut[ 1 ];
+
+
+    /* Create some I/O streams */
+    intStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(intOut), NULL, &error );
+    test_error( error, "Creating test array failed" );
+    floatStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(floatOut), NULL, &error );
+    test_error( error, "Creating test array failed" );
+
+    // Stage 1: basic limits on MAXFLOAT
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_limits, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        // Test MAXFLOAT properties
+        TEST_FLOAT_ASSERTION( intOut[0] == 0, "isinf( MAXFLOAT ) = false", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[1] == 1, "isnormal( MAXFLOAT ) = true", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[2] == 0, "isnan( MAXFLOAT ) = false", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[3] == 4, "sizeof( MAXFLOAT ) = 4", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[4] == 1, "MAXFLOAT = FLT_MAX", floatOut[0] )
+        TEST_FLOAT_ASSERTION( floatOut[0] == CL_FLT_MAX, "MAXFLOAT = CL_FLT_MAX", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[6] == 1, "MAXFLOAT = MAXFLOAT", floatOut[0] )
+        TEST_FLOAT_ASSERTION( floatOut[0] == MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103), "MAXFLOAT = 0x1.fffffep127f", floatOut[0] )
+    }
+
+    // Stage 2: INFINITY and NAN
+    char profileStr[128] = "";
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL );
+    test_error( error, "Unable to run INFINITY/NAN tests (unable to get CL_DEVICE_PROFILE" );
+
+    bool testInfNan = true;
+    if( strcmp( profileStr, "EMBEDDED_PROFILE" ) == 0 )
+    {
+        // We test if we're not an embedded profile, OR if the inf/nan flag in the config is set
+        cl_device_fp_config single = 0;
+        error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
+        test_error( error, "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)" );
+
+        if( ( single & CL_FP_INF_NAN ) == 0 )
+        {
+            log_info( "Skipping INFINITY and NAN tests on embedded device (INF/NAN not supported on this device)" );
+            testInfNan = false;
+        }
+    }
+
+    if( testInfNan )
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_extended_limits, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        TEST_FLOAT_ASSERTION( intOut[0] == 1, "INFINITY == HUGE_VALF", intOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[1] == 4, "sizeof( INFINITY ) == 4", intOut[1] )
+        TEST_FLOAT_ASSERTION( intOut[2] == 1, "isinf( INFINITY ) == true", intOut[2] )
+        TEST_FLOAT_ASSERTION( intOut[3] == 0, "isnormal( INFINITY ) == false", intOut[3] )
+        TEST_FLOAT_ASSERTION( intOut[4] == 0, "isnan( INFINITY ) == false", intOut[4] )
+        TEST_FLOAT_ASSERTION( intOut[5] == 1, "INFINITY > MAXFLOAT", intOut[5] )
+        TEST_FLOAT_ASSERTION( intOut[6] == 1, "-INFINITY < -MAXFLOAT", intOut[6] )
+        TEST_FLOAT_ASSERTION( intOut[7] == 1, "( MAXFLOAT + MAXFLOAT ) == INFINITY", intOut[7] )
+        TEST_FLOAT_ASSERTION( intOut[8] == 1, "nextafter( MAXFLOAT, INFINITY ) == INFINITY", intOut[8] )
+        TEST_FLOAT_ASSERTION( intOut[9] == 1, "nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY", intOut[9] )
+        TEST_FLOAT_ASSERTION( intOut[10] == 1, "INFINITY = INFINITY", intOut[10] )
+        TEST_FLOAT_ASSERTION( intOut[11] == 1, "asuint( INFINITY ) == 0x7f800000", intOut[11] )
+        TEST_FLOAT_ASSERTION( *( (uint32_t *)&floatOut[0] ) == 0x7f800000, "asuint( INFINITY ) == 0x7f800000", floatOut[0] )
+        TEST_FLOAT_ASSERTION( floatOut[1] == INFINITY, "INFINITY == INFINITY", floatOut[1] )
+
+        TEST_FLOAT_ASSERTION( intOut[12] == 4, "sizeof( HUGE_VALF ) == 4", intOut[12] )
+        TEST_FLOAT_ASSERTION( intOut[13] == 1, "HUGE_VALF == INFINITY", intOut[13] )
+        TEST_FLOAT_ASSERTION( floatOut[1] == HUGE_VALF, "HUGE_VALF == HUGE_VALF", floatOut[1] )
+
+        TEST_FLOAT_ASSERTION( intOut[14] == 0, "(NAN == NAN) = false", intOut[14] )
+        TEST_FLOAT_ASSERTION( intOut[15] == 1, "(NAN != NAN) = true", intOut[15] )
+        TEST_FLOAT_ASSERTION( intOut[16] == 1, "isnan( NAN ) = true", intOut[16] )
+        TEST_FLOAT_ASSERTION( intOut[17] == 0, "isinf( NAN ) = false", intOut[17] )
+        TEST_FLOAT_ASSERTION( intOut[18] == 0, "isnormal( NAN ) = false", intOut[18] )
+        TEST_FLOAT_ASSERTION( intOut[19] == 1, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", intOut[19] )
+        TEST_FLOAT_ASSERTION( intOut[20] == 4, "sizeof( NAN ) = 4", intOut[20] )
+        TEST_FLOAT_ASSERTION( ( *( (uint32_t *)&floatOut[2] ) & 0x7fffffff ) > 0x7f800000, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", floatOut[2] )
+
+        TEST_FLOAT_ASSERTION( intOut[ 21 ] == 1, "isnan( INFINITY / INFINITY ) = true", intOut[ 21 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 22 ] == 1, "isnan( INFINITY - INFINITY ) = true", intOut[ 22 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 23 ] == 1, "isnan( 0.f / 0.f ) = true", intOut[ 23 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 24 ] == 1, "isnan( INFINITY * 0.f ) = true", intOut[ 24 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 25 ] == 0, "( INFINITY == NAN ) = false", intOut[ 25 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 26 ] == 0, "(-INFINITY == NAN ) = false", intOut[ 26 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 27 ] == 0, "( INFINITY > NAN ) = false", intOut[ 27 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 28 ] == 0, "(-INFINITY < NAN ) = false", intOut[ 28 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 29 ] == 1, "( INFINITY != NAN ) = true", intOut[ 29 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 30 ] == 0, "( NAN < INFINITY ) = false", intOut[ 30 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 31 ] == 0, "( NAN > -INFINITY ) = false", intOut[ 31 ] )
+    }
+
+    // Stage 3: limits on HUGE_VAL (double)
+    if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
+        log_info( "Note: Skipping double HUGE_VAL tests (doubles unsupported on device)\n" );
+    else
+    {
+        cl_device_fp_config config = 0;
+        error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( config ), &config, NULL );
+        test_error( error, "Unable to run INFINITY/NAN tests (unable to get double FP_CONFIG bits)" );
+
+        if( ( config & CL_FP_INF_NAN ) == 0 )
+            log_info( "Skipping HUGE_VAL tests (INF/NAN not supported on this device)" );
+        else
+        {
+            clProgramWrapper program;
+            clKernelWrapper kernel;
+
+            if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_double_limits, "test" ) != 0 )
+            {
+                return -1;
+            }
+
+            doubleStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(doubleOut), NULL, &error );
+            test_error( error, "Creating test array failed" );
+
+            error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
+            test_error( error, "Unable to set indexed kernel arguments" );
+            error = clSetKernelArg( kernel, 1, sizeof( doubleStream ), &doubleStream );
+            test_error( error, "Unable to set indexed kernel arguments" );
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Kernel execution failed" );
+
+            error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
+            test_error( error, "Unable to get result data" );
+            error = clEnqueueReadBuffer( queue, doubleStream, CL_TRUE, 0, sizeof(doubleOut), doubleOut, 0, NULL, NULL );
+            test_error( error, "Unable to get result data" );
+
+            TEST_DOUBLE_ASSERTION( intOut[0] == 8, "sizeof( HUGE_VAL ) = 8", intOut[0] )
+            TEST_DOUBLE_ASSERTION( intOut[1] == 1, "HUGE_VAL = INFINITY", intOut[1] )
+            TEST_DOUBLE_ASSERTION( intOut[2] == 1, "isinf( HUGE_VAL ) = true", intOut[2] )
+            TEST_DOUBLE_ASSERTION( intOut[3] == 0, "isnormal( HUGE_VAL ) = false", intOut[3] )
+            TEST_DOUBLE_ASSERTION( intOut[4] == 0, "isnan( HUGE_VAL ) = false", intOut[4] )
+            TEST_DOUBLE_ASSERTION( intOut[5] == 1, "HUGE_VAL = HUGE_VAL", intOut[5] )
+            TEST_DOUBLE_ASSERTION( intOut[6] == 1, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", intOut[6] )
+            TEST_DOUBLE_ASSERTION( *( (uint64_t *)&doubleOut[0] ) == 0x7ff0000000000000ULL, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", doubleOut[0] )
+        }
+    }
+
+    return 0;
+}
+
+
diff --git a/test_conformance/basic/test_pointercast.c b/test_conformance/basic/test_pointercast.c
new file mode 100644
index 00000000..0b343f95
--- /dev/null
+++ b/test_conformance/basic/test_pointercast.c
@@ -0,0 +1,141 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *pointer_cast_kernel_code =
+"__kernel void test_pointer_cast(__global unsigned char *src, __global unsigned int *dst)\n"
+"{\n"
+"    int          tid = get_global_id(0);\n"
+"    __global unsigned int *p = (__global unsigned int *)src;\n"
+"\n"
+"    dst[tid] = p[tid];\n"
+"\n"
+"}\n";
+
+
+int
+verify_pointer_cast(unsigned char *inptr, unsigned int *outptr, int n)
+{
+    unsigned int    *p = (unsigned int *)inptr;
+    int             i;
+    cl_uint r;
+
+    for (i=0; i<n; i++)
+    {
+        r = p[i];
+
+        if (r != outptr[i])
+        {
+            log_error("POINTER_CAST test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("POINTER_CAST test passed\n");
+    return 0;
+}
+
+int test_pointer_cast(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    unsigned char *input_ptr;
+    unsigned int *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(int) * num_elements;
+    input_ptr  = (unsigned char*)malloc(length);
+    output_ptr = (unsigned int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements*4; i++)
+        input_ptr[i] = (unsigned char)genrand_int32(d);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &pointer_cast_kernel_code, "test_pointer_cast" );
+    if (err)
+        return -1;
+
+    err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)num_elements;
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_pointer_cast(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_preprocessors.cpp b/test_conformance/basic/test_preprocessors.cpp
new file mode 100644
index 00000000..332f99de
--- /dev/null
+++ b/test_conformance/basic/test_preprocessors.cpp
@@ -0,0 +1,393 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include <ctype.h>
+
+// Test __FILE__, __LINE__, __OPENCL_VERSION__, __OPENCL_C_VERSION__, __ENDIAN_LITTLE__, __ROUNDING_MODE__, __IMAGE_SUPPORT__, __FAST_RELAXED_MATH__
+// __kernel_exec
+
+const char *preprocessor_test = {
+    "#line 2 \"%s\"\n"
+    "__kernel void test( __global int *results, __global char *outFileString, __global char *outRoundingString )\n"
+    "{\n"
+
+    // Integer preprocessor macros
+    "#ifdef __IMAGE_SUPPORT__\n"
+    "    results[0] =    __IMAGE_SUPPORT__;\n"
+    "#else\n"
+    "    results[0] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __ENDIAN_LITTLE__\n"
+    "    results[1] =    __ENDIAN_LITTLE__;\n"
+    "#else\n"
+    "    results[1] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __OPENCL_VERSION__\n"
+    "    results[2] =    __OPENCL_VERSION__;\n"
+    "#else\n"
+    "    results[2] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __OPENCL_C_VERSION__\n"
+    "    results[3] =    __OPENCL_C_VERSION__;\n"
+    "#else\n"
+    "    results[3] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __LINE__\n"
+    "    results[4] =    __LINE__;\n"
+    "#else\n"
+    "    results[4] = 0xf00baa;\n"
+    "#endif\n"
+
+#if 0 // Removed by Affie's request 2/24
+    "#ifdef __FAST_RELAXED_MATH__\n"
+    "    results[5] =    __FAST_RELAXED_MATH__;\n"
+    "#else\n"
+    "    results[5] = 0xf00baa;\n"
+    "#endif\n"
+#endif
+
+    "#ifdef __kernel_exec\n"
+    "    results[6] = 1;\n"    // By spec, we can only really evaluate that it is defined, not what it expands to
+    "#else\n"
+    "    results[6] = 0xf00baa;\n"
+    "#endif\n"
+
+    // String preprocessor macros. Technically, there are strings in OpenCL, but not really.
+    "#ifdef __FILE__\n"
+    "    int i;\n"
+    "    constant char *f = \"\" __FILE__;\n"
+    "   for( i = 0; f[ i ] != 0 && i < 512; i++ )\n"
+    "        outFileString[ i ] = f[ i ];\n"
+    "    outFileString[ i ] = 0;\n"
+    "#else\n"
+    "    outFileString[ 0 ] = 0;\n"
+    "#endif\n"
+
+    "}\n"
+    };
+
+int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 3 ];
+
+    int error;
+    size_t    threads[] = {1,1,1};
+
+    cl_int results[ 7 ];
+    cl_char fileString[ 512 ] = "", roundingString[ 128 ] = "";
+    char programSource[4096];
+    char curFileName[512];
+    char *programPtr = programSource;
+    int i = 0;
+    snprintf(curFileName, 512, "%s", __FILE__);
+#ifdef _WIN32
+    // Replace "\" with "\\"
+    while(curFileName[i] != '\0') {
+        if (curFileName[i] == '\\') {
+            int j = i + 1;
+            char prev = '\\';
+            while (curFileName[j - 1] != '\0') {
+                char tmp = curFileName[j];
+                curFileName[j] = prev;
+                prev = tmp;
+                j++;
+            }
+            i++;
+        }
+        i++;
+    }
+#endif
+    sprintf(programSource,preprocessor_test,curFileName);
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1,  (const char **)&programPtr, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(results), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(fileString), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(roundingString), NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    // Set up and run
+    for( int i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
+        test_error( error, "Unable to set indexed kernel arguments" );
+    }
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(fileString), fileString, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(roundingString), roundingString, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+
+    /////// Check the integer results
+
+    // We need to check these values against what we know is supported on the device
+    if( checkForImageSupport( deviceID ) == 0 )
+    {
+        // If images are supported, the constant should have been defined to the value 1
+        if( results[ 0 ] == 0xf00baa )
+        {
+            log_error( "ERROR: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
+            return -1;
+        }
+        else if( results[ 0 ] != 1 )
+        {
+            log_error( "ERROR: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 0 ] );
+            return -1;
+        }
+    }
+    else
+    {
+        // If images aren't supported, the constant should be undefined
+        if( results[ 0 ] != 0xf00baa )
+        {
+            log_error( "ERROR: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", (int)results[ 0 ] );
+            return -1;
+        }
+    }
+
+    // __ENDIAN_LITTLE__ is similar to __IMAGE_SUPPORT__: 1 if it's true, undefined if it isn't
+    cl_bool deviceIsLittleEndian;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_ENDIAN_LITTLE, sizeof( deviceIsLittleEndian ), &deviceIsLittleEndian, NULL );
+    test_error( error, "Unable to get endian property of device to validate against" );
+
+    if( deviceIsLittleEndian )
+    {
+        if( results[ 1 ] == 0xf00baa )
+        {
+            log_error( "ERROR: __ENDIAN_LITTLE__ undefined even though the device is little endian\n" );
+            return -1;
+        }
+        else if( results[ 1 ] != 1 )
+        {
+            log_error( "ERROR: __ENDIAN_LITTLE__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 1 ] );
+            return -1;
+        }
+    }
+    else
+    {
+        if( results[ 1 ] != 0xf00baa )
+        {
+            log_error( "ERROR: __ENDIAN_LITTLE__ defined to value %d even though the device is not little endian (should be undefined per spec)", (int)results[ 1 ] );
+            return -1;
+        }
+    }
+
+    // __OPENCL_VERSION__
+    if( results[ 2 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ undefined!" );
+        return -1;
+    }
+
+    // The OpenCL version reported by the macro reports the feature level supported by the compiler. Since
+    // this doesn't directly match any property we can query, we just check to see if it's a sane value
+    char versionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( versionBuffer ), versionBuffer, NULL );
+    test_error( error, "Unable to get device's version to validate against" );
+
+    // We need to parse to get the version number to compare against
+    char *p1, *p2, *p3;
+    for( p1 = versionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+
+    if( p2 == p3 )
+    {
+        log_error( "ERROR: Unable to verify OpenCL version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    int major = atoi( p1 );
+    int minor = atoi( p2 + 1 );
+    int realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 2 ] < 100 ) || ( results[ 2 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
+        return -1;
+    }
+
+    // __OPENCL_C_VERSION__
+    if( results[ 3 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ undefined!\n" );
+        return -1;
+    }
+
+    // The OpenCL C version reported by the macro reports the OpenCL C supported by the compiler for this OpenCL device.
+    char cVersionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( cVersionBuffer ), cVersionBuffer, NULL );
+    test_error( error, "Unable to get device's OpenCL C version to validate against" );
+
+    // We need to parse to get the version number to compare against
+    for( p1 = cVersionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+
+    if( p2 == p3 )
+    {
+        log_error( "ERROR: Unable to verify OpenCL C version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    major = atoi( p1 );
+    minor = atoi( p2 + 1 );
+    realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 3 ] < 100 ) || ( results[ 3 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
+        return -1;
+    }
+
+    // __LINE__
+    if( results[ 4 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __LINE__ undefined!" );
+        return -1;
+    }
+
+    // This is fun--we get to search for where __LINE__ actually is so we know what line it should define to!
+    // Note: it shows up twice, once for the #ifdef, and the other for the actual result output
+    const char *linePtr = strstr( preprocessor_test, "__LINE__" );
+    if( linePtr == NULL )
+    {
+        log_error( "ERROR: Nonsensical NULL pointer encountered!" );
+        return -2;
+    }
+    linePtr = strstr( linePtr + strlen( "__LINE__" ), "__LINE__" );
+    if( linePtr == NULL )
+    {
+        log_error( "ERROR: Nonsensical NULL pointer encountered!" );
+        return -2;
+    }
+
+    // Now count how many carriage returns are before the string
+    const char *retPtr = strchr( preprocessor_test, '\n' );
+    int retCount = 1;
+    for( ; ( retPtr < linePtr ) && ( retPtr != NULL ); retPtr = strchr( retPtr + 1, '\n' ) )
+        retCount++;
+
+    if( retCount != results[ 4 ] )
+    {
+        log_error( "ERROR: Kernel preprocessor __LINE__ does not expand to the actual line number! (expanded to %d, but was on line %d)\n",
+                  results[ 4 ], retCount );
+        return -1;
+    }
+
+#if 0 // Removed by Affie's request 2/24
+    // __FAST_RELAXED_MATH__
+    // Since create_single_kernel_helper does NOT define -cl-fast-relaxed-math, this should be undefined
+    if( results[ 5 ] != 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ defined even though build option was not used (should be undefined)\n" );
+        return -1;
+    }
+#endif
+
+    // __kernel_exec
+    // We can ONLY check to verify that it is defined
+    if( results[ 6 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __kernel_exec must be defined\n" );
+        return -1;
+    }
+
+    //// String preprocessors
+
+    // Since we provided the program directly, __FILE__ should compile to "<program source>".
+    if( fileString[ 0 ] == 0 )
+    {
+        log_error( "ERROR: Kernel preprocessor __FILE__ undefined!\n" );
+        return -1;
+    }
+    else if( strncmp( (char *)fileString, __FILE__, 512 ) != 0 )
+    {
+        log_info( "WARNING: __FILE__ defined, but to an unexpected value (%s)\n\tShould be: \"%s\"", fileString, __FILE__ );
+        return -1;
+    }
+
+
+#if 0 // Removed by Affie's request 2/24
+    // One more try through: try with -cl-fast-relaxed-math to make sure the appropriate preprocessor gets defined
+    clProgramWrapper programB = clCreateProgramWithSource( context, 1, preprocessor_test, NULL, &error );
+    test_error( error, "Unable to create test program" );
+
+    // Try compiling
+    error = clBuildProgram( programB, 1, &deviceID, "-cl-fast-relaxed-math", NULL, NULL );
+    test_error( error, "Unable to build program" );
+
+    // Create a kernel again to run against
+    clKernelWrapper kernelB = clCreateKernel( programB, "test", &error );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Set up and run
+    for( int i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg( kernelB, i, sizeof( streams[i] ), &streams[i] );
+        test_error( error, "Unable to set indexed kernel arguments" );
+    }
+
+    error = clEnqueueNDRangeKernel( queue, kernelB, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    // Only need the one read
+    error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    // We only need to check the one result this time
+    if( results[ 5 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined!\n" );
+        return -1;
+    }
+    else if( results[ 5 ] != 1 )
+    {
+        log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined to 1 (was %d)\n", results[ 5 ] );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
new file mode 100644
index 00000000..7445444e
--- /dev/null
+++ b/test_conformance/basic/test_progvar.cpp
@@ -0,0 +1,1555 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+// Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits.
+// Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
+
+#define FLUSH fflush(stdout)
+
+#define MAX_STR 16*1024
+
+#define ALIGNMENT 128
+
+#define OPTIONS "-cl-std=CL2.0"
+
+// NUM_ROUNDS must be at least 1.
+// It determines how many sets of random data we push through the global
+// variables.
+#define NUM_ROUNDS 1
+
+// This is a shared property of the writer and reader kernels.
+#define NUM_TESTED_VALUES 5
+
+// TODO: pointer-to-half (and its vectors)
+// TODO: union of...
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <cassert>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/mt19937.h"
+#include "procs.h"
+
+
+////////////////////
+// Device capabilities
+static int l_has_double = 0;
+static int l_has_half = 0;
+static int l_64bit_device = 0;
+static int l_has_int64_atomics = 0;
+static int l_has_intptr_atomics = 0;
+static int l_has_cles_int64 = 0;
+
+static int l_host_is_big_endian = 1;
+
+static size_t l_max_global_id0 = 0;
+static cl_bool l_linker_available = false;
+
+#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
+
+////////////////////
+// Info about types we can use for program scope variables.
+
+
+class TypeInfo {
+
+public:
+    TypeInfo() :
+        name(""),
+        m_buf_elem_type(""),
+        m_is_vecbase(false),
+        m_is_atomic(false),
+        m_is_like_size_t(false),
+        m_is_bool(false),
+        m_elem_type(0), m_num_elem(0),
+        m_size(0),
+        m_value_size(0)
+        {}
+    TypeInfo(const char* name_arg) :
+        name(name_arg),
+        m_buf_elem_type(name_arg),
+        m_is_vecbase(false),
+        m_is_atomic(false),
+        m_is_like_size_t(false),
+        m_is_bool(false),
+        m_elem_type(0), m_num_elem(0),
+        m_size(0),
+        m_value_size(0)
+        { }
+
+    // Vectors
+    TypeInfo( TypeInfo* elem_type, int num_elem ) :
+        m_is_vecbase(false),
+        m_is_atomic(false),
+        m_is_like_size_t(false),
+        m_is_bool(false),
+        m_elem_type(elem_type),
+        m_num_elem(num_elem)
+    {
+        char the_name[10]; // long enough for longest vector type name "double16"
+        snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem);
+        this->name = std::string(the_name);
+        this->m_buf_elem_type = std::string(the_name);
+        this->m_value_size = num_elem * elem_type->get_size();
+        if ( m_num_elem == 3 ) {
+            this->m_size = 4 * elem_type->get_size();
+        } else {
+            this->m_size = num_elem * elem_type->get_size();
+        }
+    }
+    const std::string& get_name(void) const { return name; }
+    const char* get_name_c_str(void) const { return name.c_str(); }
+    TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; }
+    TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; }
+    TypeInfo& set_like_size_t(void) {
+        this->m_is_like_size_t = true;
+        this->set_size( l_64bit_device ? 8 : 4 );
+        this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
+        return *this;
+    }
+    TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; }
+    TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; }
+    TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; }
+
+    const TypeInfo* elem_type(void) const { return m_elem_type; }
+    int num_elem(void) const { return m_num_elem; }
+
+    bool is_vecbase(void) const {return m_is_vecbase;}
+    bool is_atomic(void) const {return m_is_atomic;}
+    bool is_like_size_t(void) const {return m_is_like_size_t;}
+    bool is_bool(void) const {return m_is_bool;}
+    size_t get_size(void) const {return m_size;}
+    size_t get_value_size(void) const {return m_value_size;}
+
+    // When passing values of this type to a kernel, what buffer type
+    // should be used?
+    const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); }
+
+    std::string as_string(const cl_uchar* value_ptr) const {
+        // This method would be shorter if I had a real handle to element
+        // vector type.
+        if ( this->is_bool() ) {
+            std::string result( name );
+            result += "<";
+            result += (*value_ptr ? "true" : "false");
+            result += ", ";
+            char buf[10];
+            sprintf(buf,"%02x",*value_ptr);
+            result += buf;
+            result += ">";
+            return result;
+        } else if ( this->num_elem() ) {
+            std::string result( name );
+            result += "<";
+            for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+                char buf[MAX_STR];
+                if ( ielem ) result += ", ";
+                for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) {
+                    sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] );
+                }
+                result += buf;
+            }
+            result += ">";
+            return result;
+        } else {
+            std::string result( name );
+            result += "<";
+            char buf[MAX_STR];
+            for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) {
+                sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] );
+            }
+            result += buf;
+            result += ">";
+            return result;
+        }
+    }
+
+    // Initialize the given buffer to a constant value initialized as if it
+    // were from the INIT_VAR macro below.
+    // Only needs to support values 0 and 1.
+    void init( cl_uchar* buf, cl_uchar val) const {
+        if ( this->num_elem() ) {
+            for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+                // Delegate!
+                this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val );
+            }
+        } else {
+            init_elem( buf, val );
+        }
+    }
+
+private:
+    void init_elem( cl_uchar* buf, cl_uchar val ) const {
+        size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size();
+        memset(buf,0,elem_size);
+        if ( val ) {
+            if ( strstr( name.c_str(), "float" ) ) {
+                *(float*)buf = (float)val;
+                return;
+            }
+            if ( strstr( name.c_str(), "double" ) ) {
+                *(double*)buf = (double)val;
+                return;
+            }
+            if ( this->is_bool() ) { *buf = (bool)val; return; }
+
+            // Write a single character value to the correct spot,
+            // depending on host endianness.
+            if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val;
+            else *buf = (cl_uchar)val;
+        }
+    }
+public:
+
+    void dump(FILE* fp) const {
+        fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(),
+                (int)m_size,
+                (int)m_value_size,
+                m_buf_elem_type.c_str() );
+        if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() );
+        if ( this->m_is_vecbase ) fprintf(fp, " vecbase");
+        if ( this->m_is_bool ) fprintf(fp, " bool");
+        if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t");
+        if ( this->m_is_atomic ) fprintf(fp, " atomic");
+        fprintf(fp,"\n");
+        fflush(fp);
+    }
+
+private:
+    std::string name;
+    TypeInfo* m_elem_type;
+    int m_num_elem;
+    bool m_is_vecbase;
+    bool m_is_atomic;
+    bool m_is_like_size_t;
+    bool m_is_bool;
+    size_t m_size; // Number of bytes of storage occupied by this type.
+    size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3.
+
+    // When passing values of this type to a kernel, what buffer type
+    // should be used?
+    // For most types, it's just itself.
+    // Use a std::string so I don't have to make a copy constructor.
+    std::string m_buf_elem_type;
+};
+
+
+#define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double
+#define NUM_VECTOR_SIZES (5)   // 2,3,4,8,16
+#define NUM_PLAIN_TYPES \
+      5 /*boolean and size_t family */  \
+    + NUM_SCALAR_TYPES \
+    + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \
+    + 10 /* atomic types */
+
+// Need room for plain, array, pointer, struct
+#define MAX_TYPES (4*NUM_PLAIN_TYPES)
+
+static TypeInfo type_info[MAX_TYPES];
+static int num_type_info = 0; // Number of valid entries in type_info[]
+
+
+
+
+// A helper class to form kernel source arguments for clCreateProgramWithSource.
+class StringTable {
+public:
+    StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {}
+    ~StringTable() { release_frozen(); }
+
+    void add(std::string s) { release_frozen(); m_strings.push_back(s); }
+
+    const size_t num_str() { freeze(); return m_strings.size(); }
+    const char** strs() { freeze(); return m_c_strs; }
+    const size_t* lengths() { freeze(); return m_lengths; }
+
+private:
+    void freeze(void) {
+        if ( !m_frozen ) {
+            release_frozen();
+
+            m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size());
+            m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size());
+            assert( m_c_strs );
+            assert( m_lengths );
+
+            for ( size_t i = 0; i < m_strings.size() ; i++ ) {
+                m_c_strs[i] = m_strings[i].c_str();
+                m_lengths[i] = strlen(m_c_strs[i]);
+            }
+
+            m_frozen = true;
+        }
+    }
+    void release_frozen(void) {
+        if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; }
+        if ( m_lengths ) { free(m_lengths); m_lengths = 0; }
+        m_frozen = false;
+    }
+
+    typedef std::vector<std::string> strlist_t;
+    strlist_t m_strings;
+    const char** m_c_strs;
+    size_t* m_lengths;
+    bool m_frozen;
+};
+
+
+////////////////////
+// File scope function declarations
+
+static void l_load_abilities(cl_device_id device);
+static const char* l_get_fp64_pragma(void);
+static const char* l_get_cles_int64_pragma(void);
+static int l_build_type_table(cl_device_id device);
+
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret);
+
+static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state );
+static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti );
+static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti );
+
+static std::string conversion_functions(const TypeInfo& ti);
+static std::string global_decls(const TypeInfo& ti, bool with_init);
+static std::string writer_function(const TypeInfo& ti);
+static std::string reader_function(const TypeInfo& ti);
+
+static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
+static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+
+static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
+static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+
+static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size );
+static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation );
+
+
+
+////////////////////
+// File scope function definitions
+
+static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options)
+{
+    cl_uint i;
+    cl_int error;
+    BufferOwningPtr<cl_device_id> devices;
+
+    if(num_devices == 0 || device_list == NULL)
+    {
+        error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
+        test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
+
+        device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
+        devices.reset(device_list);
+
+        memset(device_list, 0, sizeof(cl_device_id) * num_devices);
+
+        error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL);
+        test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
+    }
+
+    cl_uint z;
+    bool sourcePrinted = false;
+
+    for(z = 0; z < num_devices; z++)
+    {
+        char deviceName[4096] = "";
+        error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
+        check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z);
+
+        cl_build_status buildStatus;
+        error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
+        check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+
+        if(buildStatus != CL_BUILD_SUCCESS)
+        {
+            if(!sourcePrinted)
+            {
+                log_error("Build options: %s\n", options);
+                if(count && strings)
+                {
+                    log_error("Original source is: ------------\n");
+                    for(i = 0; i < count; i++) log_error("%s", strings[i]);
+                }
+                sourcePrinted = true;
+            }
+
+            char statusString[64] = "";
+            if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
+              sprintf(statusString, "CL_BUILD_SUCCESS");
+            else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
+              sprintf(statusString, "CL_BUILD_NONE");
+            else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
+              sprintf(statusString, "CL_BUILD_ERROR");
+            else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
+              sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+            else
+              sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+
+            log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+
+            size_t paramSize = 0;
+            error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
+            if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break;
+
+            std::string log;
+            log.resize(paramSize/sizeof(char));
+
+            error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
+            if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break;
+            if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n");
+            else
+            {
+                log_error("Build log:\n", deviceName);
+                log_error("%s\n", log.c_str());
+            }
+        }
+    }
+    return error;
+}
+
+static void l_load_abilities(cl_device_id device)
+{
+    l_has_half       = is_extension_available(device,"cl_khr_fp16");
+    l_has_double     = is_extension_available(device,"cl_khr_fp64");
+    l_has_cles_int64 = is_extension_available(device,"cles_khr_int64");
+
+    l_has_int64_atomics
+    =  is_extension_available(device,"cl_khr_int64_base_atomics")
+    && is_extension_available(device,"cl_khr_int64_extended_atomics");
+
+    {
+        int status = CL_SUCCESS;
+        cl_uint addr_bits = 32;
+        status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0);
+        l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 );
+    }
+
+    // 32-bit devices always have intptr atomics.
+    l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
+
+    union { char c[4]; int i; } probe;
+    probe.i = 1;
+    l_host_is_big_endian = !probe.c[0];
+
+    // Determine max global id.
+    {
+        int status = CL_SUCCESS;
+        cl_uint max_dim = 0;
+        status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0);
+        assert( status == CL_SUCCESS );
+        assert( max_dim > 0 );
+        size_t max_id[3];
+        max_id[0] = 0;
+    status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0);
+        assert( status == CL_SUCCESS );
+        l_max_global_id0 = max_id[0];
+    }
+
+    { // Is separate compilation supported?
+        int status = CL_SUCCESS;
+        l_linker_available = false;
+        status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0);
+        assert( status == CL_SUCCESS );
+    }
+}
+
+
+static const char* l_get_fp64_pragma(void)
+{
+    return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "";
+}
+
+static const char* l_get_cles_int64_pragma(void)
+{
+    return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : "";
+}
+
+
+static int l_build_type_table(cl_device_id device)
+{
+    int status = CL_SUCCESS;
+    size_t iscalar = 0;
+    size_t ivecsize = 0;
+    int vecsizes[] = { 2, 3, 4, 8, 16 };
+    const char* vecbase[] = {
+        "uchar", "char",
+        "ushort", "short",
+        "uint", "int",
+        "ulong", "long",
+        "float",
+        "double"
+    };
+    int vecbase_size[] = {
+        1, 1,
+        2, 2,
+        4, 4,
+        8, 8,
+        4,
+        8
+    };
+    const char* like_size_t[] = {
+        "intptr_t",
+        "uintptr_t",
+        "size_t",
+        "ptrdiff_t"
+    };
+    const char* atomics[] = {
+        "atomic_int", "atomic_uint",
+        "atomic_long", "atomic_ulong",
+        "atomic_float",
+        "atomic_double",
+    };
+    int atomics_size[] = {
+        4, 4,
+        8, 8,
+        4,
+        8
+    };
+    const char* intptr_atomics[] = {
+        "atomic_intptr_t",
+        "atomic_uintptr_t",
+        "atomic_size_t",
+        "atomic_ptrdiff_t"
+    };
+
+    l_load_abilities(device);
+    num_type_info = 0;
+
+    // Boolean.
+    type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar");
+
+    // Vector types, and the related scalar element types.
+    for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) {
+        if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue;
+        if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue;
+
+        // Scalar
+        TypeInfo* elem_type = type_info + num_type_info++;
+        *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] );
+
+        // Vector
+        for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) {
+            type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] );
+        }
+    }
+
+    // Size_t-like types
+    for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) {
+        type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t();
+    }
+
+    // Atomic types.
+    for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) {
+        if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue;
+        if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue;
+
+        // The +7 is used to skip over the "atomic_" prefix.
+        const char* buf_type = atomics[iscalar] + 7;
+        type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type );
+    }
+    if ( l_has_intptr_atomics ) {
+        for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) {
+            type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t();
+        }
+    }
+
+    assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES
+
+#if 0
+    for ( size_t i = 0 ; i < num_type_info ; i++ ) {
+        type_info[ i ].dump(stdout);
+    }
+    exit(0);
+#endif
+
+    return status;
+}
+
+static const TypeInfo& l_find_type( const char* name )
+{
+    for ( size_t i = 0; i < num_type_info ; i++ ) {
+        if ( 0 == strcmp( name, type_info[i].get_name_c_str() ) ) return type_info[i];
+    }
+    assert(0);
+}
+
+
+
+// Populate return parameters for max program variable size, preferred program variable size.
+
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret)
+{
+    int err = CL_SUCCESS;
+    size_t return_size = 0;
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size);
+    if ( err != CL_SUCCESS ) {
+        log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
+        return err;
+    }
+    if ( return_size != sizeof(size_t) ) {
+        log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+        return 1;
+    }
+    if ( return_size != sizeof(size_t) ) {
+        log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+        return 1;
+    }
+
+    return_size = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size);
+    if ( err != CL_SUCCESS ) {
+        log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err);
+        return err;
+    }
+    if ( return_size != sizeof(size_t) ) {
+        log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size );
+        return 1;
+    }
+
+    return CL_SUCCESS;
+}
+
+
+static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state )
+{
+    assert( 0 == (buf_size % sizeof(cl_uint) ) );
+    for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) {
+        *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state );
+    }
+#if 0
+    for ( size_t i = 0; i < buf_size ; i++ ) {
+        printf("%02x",buf[i]);
+    }
+    printf("\n");
+#endif
+}
+
+// Return num_value values of the given type.
+// Returns CL_SUCCESS if they compared as equal.
+static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti )
+{
+    // Compare only the valid returned bytes.
+    for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) {
+        const cl_uchar* expv = expected + value_idx * ti.get_size();
+        const cl_uchar* gotv = received + value_idx * ti.get_size();
+        if ( memcmp( expv, gotv, ti.get_value_size() ) ) {
+            std::string exp_str = ti.as_string( expv );
+            std::string got_str = ti.as_string( gotv );
+            log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n",
+                    test_name,
+                    ti.get_name_c_str(), value_idx,
+                    exp_str.c_str(),
+                    got_str.c_str() );
+            return 1;
+        }
+    }
+    return CL_SUCCESS;
+}
+
+// Copy a target value from src[idx] to dest[idx]
+static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti )
+{
+    cl_uchar* raw_dest      = dest + dest_idx * ti.get_size();
+    const cl_uchar* raw_src =  src +  src_idx * ti.get_size();
+    memcpy( raw_dest, raw_src,  ti.get_value_size() );
+
+    return 0;
+}
+
+
+static std::string conversion_functions(const TypeInfo& ti)
+{
+    std::string result;
+    static char buf[MAX_STR];
+    int num_printed = 0;
+    // The atomic types just use the base type.
+    if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) {
+        // The type is represented in a buffer by itself.
+        num_printed = snprintf(buf,MAX_STR,
+                "%s from_buf(%s a) { return a; }\n"
+                "%s to_buf(%s a) { return a; }\n",
+                ti.get_buf_elem_type(), ti.get_buf_elem_type(),
+                ti.get_buf_elem_type(), ti.get_buf_elem_type() );
+    } else {
+        // Just use C-style cast.
+        num_printed = snprintf(buf,MAX_STR,
+                "%s from_buf(%s a) { return (%s)a; }\n"
+                "%s to_buf(%s a) { return (%s)a; }\n",
+                ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(),
+                ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() );
+    }
+    // Add initializations.
+    if ( ti.is_atomic() ) {
+        num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
+                "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" );
+    } else {
+        // This cast works even if the target type is a vector type.
+        num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
+                "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
+    }
+    assert( num_printed < MAX_STR ); // or increase MAX_STR
+    result = buf;
+    return result;
+}
+
+static std::string global_decls(const TypeInfo& ti, bool with_init )
+{
+    const char* tn = ti.get_name_c_str();
+    const char* vol = (ti.is_atomic() ? " volatile " : " ");
+    static char decls[MAX_STR];
+    int num_printed = 0;
+    if ( with_init ) {
+        const char *decls_template_with_init =
+            "%s %s var = INIT_VAR(0);\n"
+            "global %s %s g_var = INIT_VAR(1);\n"
+            "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
+            "volatile global %s %s* p_var = &a_var[1];\n\n";
+        num_printed = snprintf(decls,sizeof(decls),decls_template_with_init,
+                vol,tn,vol,tn,vol,tn,vol,tn);
+    } else {
+        const char *decls_template_no_init =
+            "%s %s var;\n"
+            "global %s %s g_var;\n"
+            "%s %s a_var[2];\n"
+            "global %s %s* p_var;\n\n";
+        num_printed = snprintf(decls,sizeof(decls),decls_template_no_init,
+             vol,tn,vol,tn,vol,tn,vol,tn);
+    }
+    assert( num_printed < sizeof(decls) );
+    return std::string(decls);
+}
+
+
+// Return the source text for the writer function for the given type.
+// For types that can't be passed as pointer-to-type as a kernel argument,
+// use a substitute base type of the same size.
+static std::string writer_function(const TypeInfo& ti)
+{
+    static char writer_src[MAX_STR];
+    int num_printed = 0;
+    if ( !ti.is_atomic() ) {
+        const char* writer_template_normal =
+            "kernel void writer( global %s* src, uint idx ) {\n"
+            "  var = from_buf(src[0]);\n"
+            "  g_var = from_buf(src[1]);\n"
+            "  a_var[0] = from_buf(src[2]);\n"
+            "  a_var[1] = from_buf(src[3]);\n"
+            "  p_var = a_var + idx;\n"
+            "}\n\n";
+        num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type());
+    } else {
+        const char* writer_template_atomic =
+            "kernel void writer( global %s* src, uint idx ) {\n"
+            "  atomic_store( &var, from_buf(src[0]) );\n"
+            "  atomic_store( &g_var, from_buf(src[1]) );\n"
+            "  atomic_store( &a_var[0], from_buf(src[2]) );\n"
+            "  atomic_store( &a_var[1], from_buf(src[3]) );\n"
+            "  p_var = a_var + idx;\n"
+            "}\n\n";
+        num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type());
+    }
+    assert( num_printed < sizeof(writer_src) );
+    std::string result = writer_src;
+    return result;
+}
+
+
+// Return source text for teh reader function for the given type.
+// For types that can't be passed as pointer-to-type as a kernel argument,
+// use a substitute base type of the same size.
+static std::string reader_function(const TypeInfo& ti)
+{
+    static char reader_src[MAX_STR];
+    int num_printed = 0;
+    if ( !ti.is_atomic() ) {
+        const char* reader_template_normal =
+            "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
+            "  *p_var = from_buf(ptr_write_val);\n"
+            "  dest[0] = to_buf(var);\n"
+            "  dest[1] = to_buf(g_var);\n"
+            "  dest[2] = to_buf(a_var[0]);\n"
+            "  dest[3] = to_buf(a_var[1]);\n"
+            "}\n\n";
+        num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type());
+    } else {
+        const char* reader_template_atomic =
+            "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
+            "  atomic_store( p_var, from_buf(ptr_write_val) );\n"
+            "  dest[0] = to_buf( atomic_load( &var ) );\n"
+            "  dest[1] = to_buf( atomic_load( &g_var ) );\n"
+            "  dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
+            "  dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
+            "}\n\n";
+        num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type());
+    }
+    assert( num_printed < sizeof(reader_src) );
+    std::string result = reader_src;
+    return result;
+}
+
+
+// Check write-then-read.
+static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    int status = CL_SUCCESS;
+    int itype;
+
+    RandomSeed rand_state( gRandomSeed );
+
+    for ( itype = 0; itype < num_type_info ; itype++ ) {
+        status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+        FLUSH;
+    }
+
+    return status;
+}
+static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+{
+    int err = CL_SUCCESS;
+    std::string type_name( ti.get_name() );
+    const char* tn = type_name.c_str();
+    log_info("  %s ",tn);
+
+    StringTable ksrc;
+    ksrc.add( l_get_fp64_pragma() );
+    ksrc.add( l_get_cles_int64_pragma() );
+    ksrc.add( conversion_functions(ti) );
+    ksrc.add( global_decls(ti,false) );
+    ksrc.add( writer_function(ti) );
+    ksrc.add( reader_function(ti) );
+
+    int status = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper writer;
+
+    status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
+    test_error_ret(status,"Failed to create program for read-after-write test",status);
+
+    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
+    test_error_ret(status,"Failed to create reader kernel for read-after-write test",status);
+
+    // Check size query.
+    size_t used_bytes = 0;
+    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
+    test_error_ret(status,"Failed to query global variable total size",status);
+    size_t expected_used_bytes =
+        (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
+        + ( l_64bit_device ? 8 : 4 ); // The pointer
+    if ( used_bytes < expected_used_bytes ) {
+        log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+        err |= 1;
+    }
+
+    // We need to create 5 random values of the given type,
+    // and read 4 of them back.
+    cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)];
+    cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+
+    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) );
+    test_error_ret(status,"Failed to allocate write buffer",status);
+    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) );
+    test_error_ret(status,"Failed to allocate read buffer",status);
+
+    status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+
+    // Boolean random data needs to be massaged a bit more.
+    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+    unsigned bool_iter = 0;
+
+    for ( int iround = 0; iround < num_rounds ; iround++ ) {
+        for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+            // Generate new random data to push through.
+            // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+
+            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0);
+
+            if ( ti.is_bool() ) {
+                // For boolean, random data cast to bool isn't very random.
+                // So use the bottom bit of bool_value_iter to get true
+                // diversity.
+                for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
+                    write_data[value_idx] = (1<<value_idx) & bool_iter;
+                    //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+                }
+                bool_iter++;
+            } else {
+                l_set_randomly( write_data, sizeof(write_data), rand_state );
+            }
+            status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+
+            // The value to write via the pointer should be taken from the
+            // 5th typed slot of the write_data.
+            status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+
+            // Determine the expected values.
+            cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+            memset( expected, -1, sizeof(expected) );
+            l_copy( expected, 0, write_data, 0, ti );
+            l_copy( expected, 1, write_data, 1, ti );
+            l_copy( expected, 2, write_data, 2, ti );
+            l_copy( expected, 3, write_data, 3, ti );
+            // But we need to take into account the value from the pointer write.
+            // The 2 represents where the "a" array values begin in our read-back.
+            l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
+
+            clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
+
+            if ( ti.is_bool() ) {
+                // Collapse down to one bit.
+                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+            }
+
+            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
+            memset( read_data, -1, sizeof(read_data) );
+            clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
+
+            // Now run the kernel
+            const size_t one = 1;
+            status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
+            status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
+            status = clFinish(queue); test_error_ret(status,"finish",status);
+
+            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
+
+            if ( ti.is_bool() ) {
+                // Collapse down to one bit.
+                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+            }
+
+            // Compare only the valid returned bytes.
+            int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti );
+            // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+            err |= compare_result;
+
+            clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
+
+            if ( err ) break;
+        }
+    }
+
+    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+
+    return err;
+}
+
+
+// Check initialization, then, read, then write, then read.
+static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    int status = CL_SUCCESS;
+    int itype;
+
+    RandomSeed rand_state( gRandomSeed );
+
+    for ( itype = 0; itype < num_type_info ; itype++ ) {
+        status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+    }
+    return status;
+}
+static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+{
+    int err = CL_SUCCESS;
+    std::string type_name( ti.get_name() );
+    const char* tn = type_name.c_str();
+    log_info("  %s ",tn);
+
+    StringTable ksrc;
+    ksrc.add( l_get_fp64_pragma() );
+    ksrc.add( l_get_cles_int64_pragma() );
+    ksrc.add( conversion_functions(ti) );
+    ksrc.add( global_decls(ti,true) );
+    ksrc.add( writer_function(ti) );
+    ksrc.add( reader_function(ti) );
+
+    int status = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper writer;
+
+    status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
+    test_error_ret(status,"Failed to create program for init-read-after-write test",status);
+
+    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
+    test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status);
+
+    // Check size query.
+    size_t used_bytes = 0;
+    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
+    test_error_ret(status,"Failed to query global variable total size",status);
+    size_t expected_used_bytes =
+        (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
+        + ( l_64bit_device ? 8 : 4 ); // The pointer
+    if ( used_bytes < expected_used_bytes ) {
+        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+        err |= 1;
+    }
+
+    // We need to create 5 random values of the given type,
+    // and read 4 of them back.
+    cl_uchar CL_ALIGNED(ALIGNMENT) write_data[NUM_TESTED_VALUES * sizeof(cl_ulong16)];
+    cl_uchar CL_ALIGNED(ALIGNMENT) read_data[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+
+    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(write_data), write_data, &status ) );
+    test_error_ret(status,"Failed to allocate write buffer",status);
+    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(read_data), read_data, &status ) );
+    test_error_ret(status,"Failed to allocate read buffer",status);
+
+    status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+
+    // Boolean random data needs to be massaged a bit more.
+    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+    unsigned bool_iter = 0;
+
+    // We need to count iterations.  We do something *different on the
+    // first iteration, to ensure we actually pick up the initialized
+    // values.
+    unsigned iteration = 0;
+
+    for ( int iround = 0; iround < num_rounds ; iround++ ) {
+        for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+            // Generate new random data to push through.
+            // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+
+            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, sizeof(write_data), 0, 0, 0, 0);
+
+            if ( ti.is_bool() ) {
+                // For boolean, random data cast to bool isn't very random.
+                // So use the bottom bit of bool_value_iter to get true
+                // diversity.
+                for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
+                    write_data[value_idx] = (1<<value_idx) & bool_iter;
+                    //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+                }
+                bool_iter++;
+            } else {
+                l_set_randomly( write_data, sizeof(write_data), rand_state );
+            }
+            status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+
+            if ( !iteration ) {
+                // On first iteration, the value we write via the last arg
+                // to the "reader" function is 0.
+                // It's way easier to code the test this way.
+                ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 );
+            }
+
+            // The value to write via the pointer should be taken from the
+            // 5th typed slot of the write_data.
+            status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+
+            // Determine the expected values.
+            cl_uchar expected[ (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16)];
+            memset( expected, -1, sizeof(expected) );
+            if ( iteration ) {
+                l_copy( expected, 0, write_data, 0, ti );
+                l_copy( expected, 1, write_data, 1, ti );
+                l_copy( expected, 2, write_data, 2, ti );
+                l_copy( expected, 3, write_data, 3, ti );
+                // But we need to take into account the value from the pointer write.
+                // The 2 represents where the "a" array values begin in our read-back.
+                // But we need to take into account the value from the pointer write.
+                l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
+            } else {
+                // On first iteration, expect these initialized values!
+                // See the decls_template_with_init above.
+                ti.init( expected, 0 );
+                ti.init( expected + ti.get_size(), 1 );
+                ti.init( expected + 2*ti.get_size(), 1 );
+                // Emulate the effect of the write via the pointer.
+                // The value is 0, not 1 (see above).
+                // The pointer is always initialized to the second element
+                // of the array. So it goes into slot 3 of the "expected" array.
+                ti.init( expected + 3*ti.get_size(), 0 );
+            }
+
+            if ( ti.is_bool() ) {
+                // Collapse down to one bit.
+                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+            }
+
+            clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
+
+            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
+            memset( read_data, -1, sizeof(read_data) );
+            clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
+
+            // Now run the kernel
+            const size_t one = 1;
+            if ( iteration ) {
+                status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
+            } else {
+                // On first iteration, we should be picking up the
+                // initialized value. So don't enqueue the writer.
+            }
+            status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
+            status = clFinish(queue); test_error_ret(status,"finish",status);
+
+            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(read_data), 0, 0, 0, 0);
+
+            if ( ti.is_bool() ) {
+                // Collapse down to one bit.
+                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+            }
+
+            // Compare only the valid returned bytes.
+            //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
+            int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti );
+            //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+            err |= compare_result;
+
+            clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
+
+            if ( err ) break;
+
+            iteration++;
+        }
+    }
+
+    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+
+    return err;
+}
+
+
+// Check that we can make at least one variable with size
+// max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
+static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size )
+{
+    int err = CL_SUCCESS;
+    // Just test one type.
+    const TypeInfo ti( l_find_type("uchar") );
+    log_info(" l_capacity...");
+
+    const char prog_src_template[] =
+#if defined(_WIN32)
+        "uchar var[%Iu];\n\n"
+#else
+        "uchar var[%zu];\n\n"
+#endif
+        "kernel void get_max_size( global ulong* size_ret ) {\n"
+#if defined(_WIN32)
+        "  *size_ret = (ulong)%Iu;\n"
+#else
+        "  *size_ret = (ulong)%zu;\n"
+#endif
+        "}\n\n"
+        "kernel void writer( global uchar* src ) {\n"
+        "  var[get_global_id(0)] = src[get_global_linear_id()];\n"
+        "}\n\n"
+        "kernel void reader( global uchar* dest ) {\n"
+        "  dest[get_global_linear_id()] = var[get_global_id(0)];\n"
+        "}\n\n";
+    char prog_src[MAX_STR];
+    int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
+    assert( num_printed < MAX_STR ); // or increase MAX_STR
+
+    StringTable ksrc;
+    ksrc.add( prog_src );
+
+    int status = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper get_max_size;
+
+    status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS);
+    test_error_ret(status,"Failed to create program for capacity test",status);
+
+    // Check size query.
+    size_t used_bytes = 0;
+    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
+    test_error_ret(status,"Failed to query global variable total size",status);
+    if ( used_bytes < max_size ) {
+        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes );
+        err |= 1;
+    }
+
+    // Prepare to execute
+    clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
+    test_error_ret(status,"Failed to create writer kernel for capacity test",status);
+    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
+    test_error_ret(status,"Failed to create reader kernel for capacity test",status);
+
+    cl_ulong max_size_ret = 0;
+    const size_t arr_size = 10*1024*1024;
+    cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT );
+
+    if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; }
+
+    clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) );
+    test_error_ret(status,"Failed to allocate size query buffer",status);
+    clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) );
+    test_error_ret(status,"Failed to allocate write buffer",status);
+
+    status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
+
+    // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
+    const size_t one = 1;
+    status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status);
+    status = clFinish(queue); test_error_ret(status,"finish",status);
+
+    cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0);
+    if ( max_size_ret != max_size ) {
+        log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n",
+                (unsigned long long) max_size_ret,
+                (unsigned long long) max_size );
+        err |= 1;
+    }
+    clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
+
+    RandomSeed rand_state_write( gRandomSeed );
+    for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
+        size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+        l_set_randomly( buffer, curr_size, rand_state_write );
+        status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status);
+        status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status);
+    status = clFinish(queue); test_error_ret(status,"finish",status);
+    }
+
+    RandomSeed rand_state_read( gRandomSeed );
+    for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
+        size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+        status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status);
+        cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status);
+        l_set_randomly( buffer, curr_size, rand_state_read );
+        err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti );
+        clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
+    }
+
+    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    align_free(buffer);
+
+    return err;
+}
+
+
+// Check operation on a user type.
+static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile )
+{
+    int err = CL_SUCCESS;
+    // Just test one type.
+    const TypeInfo ti( l_find_type("uchar") );
+    log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" );
+
+    if ( separate_compile && ! l_linker_available ) {
+        log_info("Separate compilation is not supported. Skipping test\n");
+        return err;
+    }
+
+    const char type_src[] =
+        "typedef struct { uchar c; uint i; } my_struct_t;\n\n";
+    const char def_src[] =
+        "my_struct_t var = { 'a', 42 };\n\n";
+    const char decl_src[] =
+        "extern my_struct_t var;\n\n";
+
+    // Don't use a host struct. We can't guarantee that the host
+    // compiler has the same structure layout as the device compiler.
+    const char writer_src[] =
+        "kernel void writer( uchar c, uint i ) {\n"
+        "  var.c = c;\n"
+        "  var.i = i;\n"
+        "}\n\n";
+    const char reader_src[] =
+        "kernel void reader( global uchar* C, global uint* I ) {\n"
+        "  *C = var.c;\n"
+        "  *I = var.i;\n"
+        "}\n\n";
+
+    clProgramWrapper program;
+
+    if ( separate_compile ) {
+        // Separate compilation flow.
+        StringTable wksrc;
+        wksrc.add( type_src );
+        wksrc.add( def_src );
+        wksrc.add( writer_src );
+
+        StringTable rksrc;
+        rksrc.add( type_src );
+        rksrc.add( decl_src );
+        rksrc.add( reader_src );
+
+        int status = CL_SUCCESS;
+        clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) );
+        test_error_ret(status,"Failed to create writer program for user type test",status);
+
+        status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
+        if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status)))
+        {
+            print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS);
+            return status;
+        }
+
+        clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) );
+        test_error_ret(status,"Failed to create reader program for user type test",status);
+
+        status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
+        if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status)))
+        {
+            print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS);
+            return status;
+        }
+
+        cl_program progs[2];
+        progs[0] = writer_program;
+        progs[1] = reader_program;
+
+        program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status );
+        if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status)))
+        {
+            print_build_log(program, 1, &device, 0, NULL, NULL, "");
+            return status;
+        }
+    } else {
+        // Single compilation flow.
+        StringTable ksrc;
+        ksrc.add( type_src );
+        ksrc.add( def_src );
+        ksrc.add( writer_src );
+        ksrc.add( reader_src );
+
+        int status = CL_SUCCESS;
+
+        status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
+        if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status)))
+        {
+            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+            return status;
+        }
+    }
+
+
+    // Check size query.
+    size_t used_bytes = 0;
+    int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
+    test_error_ret(status,"Failed to query global variable total size",status);
+    size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
+    if ( used_bytes < expected_size ) {
+        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+        err |= 1;
+    }
+
+    // Prepare to execute
+    clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
+    test_error_ret(status,"Failed to create writer kernel for user type test",status);
+    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
+    test_error_ret(status,"Failed to create reader kernel for user type test",status);
+
+    // Set up data.
+    cl_uchar CL_ALIGNED(ALIGNMENT) uchar_data;
+    cl_uint CL_ALIGNED(ALIGNMENT) uint_data;
+
+    clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uchar_data), &uchar_data, &status ) );
+    test_error_ret(status,"Failed to allocate uchar buffer",status);
+    clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(uint_data), &uint_data, &status ) );
+    test_error_ret(status,"Failed to allocate uint buffer",status);
+
+    status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status);
+
+    cl_uchar expected_uchar = 'a';
+    cl_uint expected_uint = 42;
+    for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
+        // Read back data
+        uchar_data = -1;
+        uint_data = -1;
+        const size_t one = 1;
+        status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
+        status = clFinish(queue); test_error_ret(status,"finish",status);
+
+        cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uint_data), 0, 0, 0, 0);
+        cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(uchar_data), 0, 0, 0, 0);
+
+        if ( expected_uchar != uchar_data || expected_uint != uint_data ) {
+            log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
+                    iter, (int)uchar_data, uint_data, (int)expected_uchar, expected_uint );
+            err |= 1;
+        }
+
+        clEnqueueUnmapMemObject(queue, uint_mem, uint_data_ptr, 0, 0, 0);
+        clEnqueueUnmapMemObject(queue, uchar_mem, uchar_data_ptr, 0, 0, 0);
+
+        // Mutate the data.
+        expected_uchar++;
+        expected_uint++;
+
+        // Write the new values into persistent store.
+        uchar_data = expected_uchar;
+        uint_data = expected_uint;
+        status = clSetKernelArg(writer,0,sizeof(uchar_data),&uchar_data); test_error_ret(status,"set arg",status);
+        status = clSetKernelArg(writer,1,sizeof(uint_data),&uint_data); test_error_ret(status,"set arg",status);
+        status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
+        status = clFinish(queue); test_error_ret(status,"finish",status);
+    }
+
+    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+
+    return err;
+}
+
+
+////////////////////
+// Global functions
+
+
+// Test support for variables at program scope. Miscellaneous
+int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t max_size = 0;
+    size_t pref_size = 0;
+
+    cl_int err = CL_SUCCESS;
+
+    err = l_get_device_info( device, &max_size, &pref_size );
+    err |= l_build_type_table( device );
+
+    err |= l_capacity( device, context, queue, max_size );
+    err |= l_user_type( device, context, queue, false );
+    err |= l_user_type( device, context, queue, true );
+
+    return err;
+}
+
+
+// Test support for variables at program scope. Unitialized data
+int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t max_size = 0;
+    size_t pref_size = 0;
+
+    cl_int err = CL_SUCCESS;
+
+    err = l_get_device_info( device, &max_size, &pref_size );
+    err |= l_build_type_table( device );
+
+    err |= l_write_read( device, context, queue );
+
+    return err;
+}
+
+// Test support for variables at program scope. Initialized data.
+int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t max_size = 0;
+    size_t pref_size = 0;
+
+    cl_int err = CL_SUCCESS;
+
+    err = l_get_device_info( device, &max_size, &pref_size );
+    err |= l_build_type_table( device );
+
+    err |= l_init_write_read( device, context, queue );
+
+    return err;
+}
+
+
+// A simple test for support of static variables inside a kernel.
+int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t max_size = 0;
+    size_t pref_size = 0;
+
+    cl_int err = CL_SUCCESS;
+
+    // Deliberately have two variables with the same name but in different
+    // scopes.
+    // Also, use a large initialized structure in both cases.
+    const char prog_src[] =
+        "typedef struct { char c; int16 i; } mystruct_t;\n"
+        "kernel void test_bump( global int* value, int which ) {\n"
+        "  if ( which ) {\n"
+        // Explicit address space.
+        // Last element set to 0
+        "     static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
+        "     *value = persistent.i.sf++;\n"
+        "  } else {\n"
+        // Implicitly global
+        // Last element set to 100
+        "     static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
+        "     *value = persistent.i.sf++;\n"
+        "  }\n"
+        "}\n";
+
+    StringTable ksrc;
+    ksrc.add( prog_src );
+
+    int status = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper test_bump;
+
+    status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS);
+    test_error_ret(status, "Failed to create program for function static variable test", status);
+
+    // Check size query.
+    size_t used_bytes = 0;
+    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
+    test_error_ret(status,"Failed to query global variable total size",status);
+    size_t expected_size = 2 * sizeof(cl_int); // Two ints.
+    if ( used_bytes < expected_size ) {
+        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+        err |= 1;
+    }
+
+    // Prepare the data.
+    cl_int counter_value = 0;
+    clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) );
+    test_error_ret(status,"Failed to allocate counter query buffer",status);
+
+    status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status);
+
+    // Go a few rounds, alternating between the two counters in the kernel.
+
+    // Same as initial values in kernel.
+    // But "true" which increments the 0-based counter, and "false" which
+    // increments the 100-based counter.
+    cl_int expected_counter[2] = { 100, 0 };
+
+    const size_t one = 1;
+    for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around
+        for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters
+            status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status);
+            status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status);
+            status = clFinish(queue); test_error_ret(status,"finish",status);
+
+            cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0);
+
+            if ( counter_value != expected_counter[iwhich] ) {
+                log_error("Error: Round %d on counter %d: Expected %d but got %d\n",
+                        iround, iwhich, expected_counter[iwhich], counter_value );
+                err |= 1;
+            }
+            expected_counter[iwhich]++; // Emulate behaviour of the kernel.
+
+            clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0);
+        }
+    }
+
+    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+
+    return err;
+}
diff --git a/test_conformance/basic/test_queue_priority.c b/test_conformance/basic/test_queue_priority.c
new file mode 100644
index 00000000..f780962d
--- /dev/null
+++ b/test_conformance/basic/test_queue_priority.c
@@ -0,0 +1,358 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+#include "procs.h"
+
+static const char *fpadd_kernel_code =
+"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+static const char *fpsub_kernel_code =
+"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+static const char *fpmul_kernel_code =
+"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+static const float    MAX_ERR = 1e-5f;
+
+static int
+verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
+{
+    float       r;
+    int         i;
+
+    float * reference_ptr = (float *)malloc(n * sizeof(float));
+
+    for (i=0; i<n; i++)
+    {
+        reference_ptr[i] = inptrA[i] + inptrB[i];
+    }
+
+    for (i=0; i<n; i++)
+    {
+        if (reference_ptr[i] != outptr[i])
+        {
+            log_error("FP_ADD float test failed\n");
+            return -1;
+        }
+    }
+
+    free(reference_ptr);
+
+    log_info("FP_ADD float test passed\n");
+    return 0;
+}
+
+static int
+verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
+{
+    float       r;
+    int         i;
+
+    float * reference_ptr = (float *)malloc(n * sizeof(float));
+
+    for (i=0; i<n; i++)
+    {
+        reference_ptr[i] = inptrA[i] - inptrB[i];
+    }
+
+    for (i=0; i<n; i++)
+    {
+        if (reference_ptr[i] != outptr[i])
+        {
+            log_error("FP_SUB float test failed\n");
+            return -1;
+        }
+    }
+
+    free(reference_ptr);
+
+    log_info("FP_SUB float test passed\n");
+    return 0;
+}
+
+static int
+verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
+{
+    float       r;
+    int         i;
+
+    float * reference_ptr = (float *)malloc(n * sizeof(float));
+
+    for (i=0; i<n; i++)
+    {
+        reference_ptr[i] = inptrA[i] * inptrB[i];
+    }
+
+    for (i=0; i<n; i++)
+    {
+        if (reference_ptr[i] != outptr[i])
+        {
+            log_error("FP_MUL float test failed\n");
+            return -1;
+        }
+    }
+
+    free(reference_ptr);
+
+    log_info("FP_MUL float test passed\n");
+    return 0;
+}
+
+#if defined( __APPLE__ )
+
+int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  int err;
+  int command_queue_priority = 0;
+  int command_queue_select_compute_units = 0;
+
+  cl_queue_properties queue_properties[] = { CL_QUEUE_PROPERTIES, 0, 0, 0, 0, 0, 0 };
+  int idx = 2;
+
+  // Check to see if queue priority is supported
+  if (((command_queue_priority = is_extension_available(device, "cl_APPLE_command_queue_priority"))) == 0)
+  {
+    log_info("cl_APPLE_command_queue_priority extension is not supported - skipping test\n");
+  }
+
+  // Check to see if selecting the number of compute units is supported
+  if (((command_queue_select_compute_units = is_extension_available(device, "cl_APPLE_command_queue_select_compute_units"))) == 0)
+  {
+    log_info("cl_APPLE_command_queue_select_compute_units extension is not supported - skipping test\n");
+  }
+
+  // If neither extension is supported, skip the test
+  if (!command_queue_priority && !command_queue_select_compute_units)
+    return 0;
+
+  // Setup the queue properties
+#ifdef cl_APPLE_command_queue_priority
+  if (command_queue_priority) {
+    queue_properties[idx++] = CL_QUEUE_PRIORITY_APPLE;
+    queue_properties[idx++] = CL_QUEUE_PRIORITY_BACKGROUND_APPLE;
+  }
+#endif
+
+#ifdef cl_APPLE_command_queue_select_compute_units
+  // Check the number of compute units on the device
+  cl_uint num_compute_units = 0;
+  err = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( num_compute_units ), &num_compute_units, NULL );
+  if (err) {
+    log_error("clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed: %d", err);
+    test_finish();
+    return -1;
+  }
+
+  if (command_queue_select_compute_units) {
+    queue_properties[idx++] = CL_QUEUE_NUM_COMPUTE_UNITS_APPLE;
+    queue_properties[idx++] = num_compute_units/2;
+  }
+#endif
+  queue_properties[idx++] = 0;
+
+  // Create the command queue
+  cl_command_queue background_queue = clCreateCommandQueueWithProperties(context, device, queue_properties, &err);
+  if (err) {
+    log_error("clCreateCommandQueueWithPropertiesAPPLE failed: %d", err);
+    test_finish();
+    return -1;
+  }
+
+  // Test the command queue
+  cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+  cl_event marker_event;
+
+  float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int i;
+  MTdata d = init_genrand( gRandomSeed );
+  size_t length = sizeof(cl_float) * num_elements;
+  int isRTZ = 0;
+  RoundingMode oldMode = kDefaultRoundingMode;
+
+  // check for floating point capabilities
+  cl_device_fp_config single_config = 0;
+  err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+  if (err) {
+    log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+    test_finish();
+    return -1;
+  }
+  //If we only support rtz mode
+  if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+  {
+    //Check to make sure we are an embedded device
+    char profile[32];
+    err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if( err )
+    {
+      log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+    {
+      log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+      test_finish();
+      return -1;
+    }
+
+    isRTZ = 1;
+    oldMode = get_round();
+  }
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+  streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+  test_error( err, "clCreateBuffer failed.");
+  streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+  test_error( err, "clCreateBuffer failed.");
+  streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+  test_error( err, "clCreateBuffer failed.");
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+  test_error( err, "clCreateBuffer failed.");
+
+  p = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+    p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+  p = input_ptr[1];
+  for (i=0; i<num_elements; i++)
+    p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+  p = input_ptr[2];
+  for (i=0; i<num_elements; i++)
+    p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+  test_error( err, "clEnqueueWriteBuffer failed.");
+
+  err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+  test_error( err, "clEnqueueWriteBuffer failed.");
+
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+  test_error( err, "clEnqueueWriteBuffer failed.");
+
+  err = clEnqueueMarkerWithWaitList(queue, 0, NULL, &marker_event);
+  test_error( err, "clEnqueueMarkerWithWaitList failed.");
+  clFlush(queue);
+
+  err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
+  test_error( err, "create_single_kernel_helper failed");
+
+  err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
+  test_error( err, "create_single_kernel_helper failed");
+
+  err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
+  test_error( err, "create_single_kernel_helper failed");
+
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+  test_error( err, "clSetKernelArgs failed.");
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+  test_error( err, "clSetKernelArgs failed.");
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+  test_error( err, "clSetKernelArgs failed.");
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<3; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 1, &marker_event, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    if( isRTZ )
+      set_round( kRoundTowardZero, kfloat );
+
+    switch (i)
+    {
+      case 0:
+        err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
+        break;
+      case 1:
+        err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
+        break;
+      case 2:
+        err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
+        break;
+    }
+
+    if( isRTZ )
+      set_round( oldMode, kfloat );
+  }
+
+  // cleanup
+  clReleaseCommandQueue(background_queue);
+  clReleaseEvent(marker_event);
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+  for (i=0; i<3; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(input_ptr[1]);
+  free(input_ptr[2]);
+  free(output_ptr);
+  free_mtdata( d );
+
+  return err;
+}
+
+
+
+#endif
+
diff --git a/test_conformance/basic/test_readimage.c b/test_conformance/basic/test_readimage.c
new file mode 100644
index 00000000..f6374964
--- /dev/null
+++ b/test_conformance/basic/test_readimage.c
@@ -0,0 +1,297 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *bgra8888_kernel_code =
+"\n"
+"__kernel void test_bgra8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
+"    dst[indx] = convert_uchar4_rte(color.zyxw);\n"
+"\n"
+"}\n";
+
+
+static const char *rgba8888_kernel_code =
+"\n"
+"__kernel void test_rgba8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
+"    dst[indx] = convert_uchar4_rte(color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32( d);
+
+    return ptr;
+}
+
+static int
+verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_BGRA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_BGRA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+static int
+verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_RGBA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_RGBA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+
+int test_readimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program[2];
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    cl_image_format *supported_formats;
+    unsigned char    *input_ptr[2], *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(unsigned char);
+    MTdata d = init_genrand( gRandomSeed );
+    int supportsBGRA = 0;
+    cl_uint numFormats = 0;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr[0] = generate_8888_image(img_width, img_height, d);
+    input_ptr[1] = generate_8888_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (unsigned char*)malloc(length);
+
+    if(gIsEmbedded)
+    {
+        /* Get the supported image formats to see if BGRA is supported */
+        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numFormats);
+        supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numFormats);
+        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
+
+        for(i = 0; i < numFormats; i++)
+        {
+            if(supported_formats[i].image_channel_order == CL_BGRA)
+            {
+                supportsBGRA = 1;
+                break;
+            }
+        }
+    }
+    else
+    {
+        supportsBGRA = 1;
+    }
+
+    if(supportsBGRA)
+    {
+        img_format.image_channel_order = CL_BGRA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[0] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateImage2D failed\n");
+            return -1;
+        }
+    }
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateImage2D failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    if(supportsBGRA)
+    {
+        err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteImage failed\n");
+            return -1;
+        }
+    }
+
+    err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteImage failed\n");
+        return -1;
+    }
+
+    if(supportsBGRA)
+    {
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
+        if (err)
+            return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
+    if (err)
+        return -1;
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+    test_error(err, "clCreateSamplerWithProperties failed");
+
+    if(supportsBGRA)
+    {
+        err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
+        err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArg failed\n");
+            return -1;
+        }
+    }
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArg failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+
+    for (i=0; i<2; i++)
+    {
+        if(i == 0 && !supportsBGRA)
+            continue;
+
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+            return -1;
+        }
+        err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        switch (i)
+        {
+            case 0:
+                err = verify_bgra8888_image(input_ptr[i], output_ptr, img_width, img_height);
+                break;
+            case 1:
+                err = verify_rgba8888_image(input_ptr[i], output_ptr, img_width, img_height);
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    // cleanup
+    clReleaseSampler(sampler);
+
+    if(supportsBGRA)
+            clReleaseMemObject(streams[0]);
+
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<2; i++)
+    {
+        if(i == 0 && !supportsBGRA)
+            continue;
+
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
diff --git a/test_conformance/basic/test_readimage3d.c b/test_conformance/basic/test_readimage3d.c
new file mode 100644
index 00000000..01c927b0
--- /dev/null
+++ b/test_conformance/basic/test_readimage3d.c
@@ -0,0 +1,237 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *bgra8888_kernel_code =
+"\n"
+"__kernel void test_bgra8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    dst[indx].x = color.z;\n"
+"    dst[indx].y = color.y;\n"
+"    dst[indx].z = color.x;\n"
+"    dst[indx].w = color.w;\n"
+"\n"
+"}\n";
+
+
+static const char *rgba8888_kernel_code =
+"\n"
+"__kernel void test_rgba8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    //indx *= 4;\n"
+"    dst[indx].x = color.x;\n"
+"    dst[indx].y = color.y;\n"
+"    dst[indx].z = color.z;\n"
+"    dst[indx].w = color.w;\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_3d_image8(int w, int h, int d, MTdata data)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * d * 4);
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(data);
+
+    return ptr;
+}
+
+static int
+verify_3d_image8(double *image, float *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != (float)image[i])
+        {
+            float ulps = Ulp_Error( outptr[i], image[i]);
+
+            if(! (fabsf(ulps) < 1.5f) )
+            {
+                log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
+                    (int)i, image[i], outptr[ i ],  ulps );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static double *
+prepare_reference(unsigned char * input_ptr, int w, int h, int d)
+{
+    double   *ptr = (double*)malloc(w * h * d * 4 * sizeof(double));
+    int         i;
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = ((double)input_ptr[i]/255);
+
+    return ptr;
+}
+
+
+int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program[2];
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr[2];
+    float *output_ptr;
+    double *ref_ptr[2];
+    size_t threads[3];
+    int img_width = 64;
+    int img_height = 64;
+    int img_depth = 64;
+    int i, err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, img_depth};
+    size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr[0] = generate_3d_image8(img_width, img_height, img_depth, d);
+    input_ptr[1] = generate_3d_image8(img_width, img_height, img_depth, d);
+    ref_ptr[0] = prepare_reference(input_ptr[0], img_width, img_height, img_depth);
+    ref_ptr[1] = prepare_reference(input_ptr[1], img_width, img_height, img_depth);
+    free_mtdata(d); d = NULL;
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_BGRA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+    err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+  err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
+  if (err)
+    return -1;
+
+  err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
+  if (err)
+    return -1;
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+  test_error(err, "clCreateSamplerWithProperties failed");
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+     threads[2] = (unsigned int)img_depth;
+
+  for (i=0; i<2; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 3, NULL, threads, NULL, 0, NULL, NULL);
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    switch (i)
+    {
+      case 0:
+        err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
+        if ( err != 0 )
+            log_info("READ_IMAGE3D_BGRA_UNORM_INT8 test passed\n");
+        break;
+      case 1:
+        err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
+        if ( err != 0 )
+            log_info("READ_IMAGE3D_RGBA_UNORM_INT8 test passed\n");
+        break;
+    }
+
+    if (err)
+      break;
+  }
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+  for (i=0; i<2; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+  free(ref_ptr[0]);
+  free(ref_ptr[1]);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_readimage3d_fp32.c b/test_conformance/basic/test_readimage3d_fp32.c
new file mode 100644
index 00000000..e17b3c3d
--- /dev/null
+++ b/test_conformance/basic/test_readimage3d_fp32.c
@@ -0,0 +1,153 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+static const char *rgbaFFFF_kernel_code =
+"__kernel void test_rgbaFFFF(read_only image3d_t srcimg, __global float *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    indx *= 4;\n"
+"    dst[indx+0] = color.x;\n"
+"    dst[indx+1] = color.y;\n"
+"    dst[indx+2] = color.z;\n"
+"    dst[indx+3] = color.w;\n"
+"\n"
+"}\n";
+
+
+static float *
+generate_float_image(int w, int h, int d, MTdata data)
+{
+    float   *ptr = (float*)malloc(w * h * d * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, data);
+
+    return ptr;
+}
+
+static int
+verify_float_image(float *image, float *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE3D_RGBA_FLOAT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE3D_RGBA_FLOAT test passed\n");
+    return 0;
+}
+
+
+int test_readimage3d_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    float *input_ptr, *output_ptr;
+    size_t threads[3];
+    int img_width = 64;
+    int img_height = 64;
+    int img_depth = 64;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, img_depth};
+    size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr = generate_float_image(img_width, img_height, img_depth, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
+  if (err)
+    return -1;
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+  test_error(err, "clCreateSamplerWithProperties failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+    threads[2] = (unsigned int)img_depth;
+  err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  err = verify_float_image(input_ptr, output_ptr, img_width, img_height, img_depth);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_readimage3d_int16.c b/test_conformance/basic/test_readimage3d_int16.c
new file mode 100644
index 00000000..598497b3
--- /dev/null
+++ b/test_conformance/basic/test_readimage3d_int16.c
@@ -0,0 +1,152 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba16_kernel_code =
+"__kernel void test_rgba16(read_only image3d_t srcimg, __global ushort4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    ushort4 dst_write;\n"
+"    dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
+"    dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
+"    dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
+"    dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
+"    dst[indx] = dst_write;\n"
+"\n"
+"}\n";
+
+
+static unsigned short *
+generate_16bit_image(int w, int h, int d, MTdata data)
+{
+    unsigned short    *ptr = (cl_ushort*)malloc(w * h * d * 4 * sizeof(cl_ushort));
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = (cl_ushort)genrand_int32(data);
+
+    return ptr;
+}
+
+static int
+verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE3D_RGBA_UNORM_INT16 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE3D_RGBA_UNORM_INT16 test passed\n");
+    return 0;
+}
+
+int test_readimage3d_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    cl_ushort *input_ptr, *output_ptr;
+    size_t threads[3];
+    int img_width = 64;
+    int img_height = 64;
+    int img_depth = 64;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, img_depth};
+    size_t length = img_width * img_height * img_depth * 4 * sizeof(cl_ushort);
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr = generate_16bit_image(img_width, img_height, img_depth, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_ushort*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
+  if (err)
+    return -1;
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+  test_error(err, "clCreateSamplerWithProperties failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+    threads[2] = (unsigned int)img_depth;
+  err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height, img_depth);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_readimage_fp32.c b/test_conformance/basic/test_readimage_fp32.c
new file mode 100644
index 00000000..b1663ab4
--- /dev/null
+++ b/test_conformance/basic/test_readimage_fp32.c
@@ -0,0 +1,173 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+static const char *rgbaFFFF_kernel_code =
+"__kernel void test_rgbaFFFF(read_only image2d_t srcimg, __global float *dst, sampler_t smp)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
+"    indx *= 4;\n"
+"    dst[indx+0] = color.x;\n"
+"    dst[indx+1] = color.y;\n"
+"    dst[indx+2] = color.z;\n"
+"    dst[indx+3] = color.w;\n"
+"\n"
+"}\n";
+
+
+static float *
+generate_float_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_float_image(float *image, float *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_RGBA_FLOAT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_RGBA_FLOAT test passed\n");
+    return 0;
+}
+
+int test_readimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    float *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(float);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+  d = init_genrand( gRandomSeed );
+    input_ptr = generate_float_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteImage failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
+  if (err)
+    return -1;
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+  test_error(err, "clCreateSamplerWithProperties failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_float_image(input_ptr, output_ptr, img_width, img_height);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_readimage_int16.c b/test_conformance/basic/test_readimage_int16.c
new file mode 100644
index 00000000..4456f4ae
--- /dev/null
+++ b/test_conformance/basic/test_readimage_int16.c
@@ -0,0 +1,172 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba16_kernel_code =
+"__kernel void test_rgba16(read_only image2d_t srcimg, __global ushort4 *dst, sampler_t smp)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
+"    ushort4 dst_write;\n"
+"    dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
+"    dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
+"    dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
+"    dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
+"    dst[indx] = dst_write;\n"
+"\n"
+"}\n";
+
+
+static unsigned short *
+generate_16bit_image(int w, int h, MTdata d)
+{
+    cl_ushort    *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (cl_ushort)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h)
+{
+    int     i;
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_RGBA_UNORM_INT16 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_RGBA_UNORM_INT16 test passed\n");
+    return 0;
+}
+
+int test_readimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    cl_ushort *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_16bit_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_ushort*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteImage failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
+  if (err)
+    return -1;
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  cl_sampler sampler = clCreateSamplerWithProperties(context, properties, &err);
+  test_error(err, "clCreateSamplerWithProperties failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_rw_image_access_qualifier.c b/test_conformance/basic/test_rw_image_access_qualifier.c
new file mode 100644
index 00000000..0f33cd8d
--- /dev/null
+++ b/test_conformance/basic/test_rw_image_access_qualifier.c
@@ -0,0 +1,167 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/clImageHelper.h"
+
+static const char* rw_kernel_code =
+"kernel void test_rw_images(read_write image2d_t src_image) {\n"
+"  int tid_x = get_global_id(0);\n"
+"  int tid_y = get_global_id(1);\n"
+"\n"
+"  int2 coords = (int2)(tid_x, tid_y);\n"
+"\n"
+"  uint4 src_val = read_imageui(src_image, coords);\n"
+"  src_val += 3;\n"
+"\n"
+"  // required to ensure that following read from image at\n"
+"  // location coord returns the latest color value.\n"
+"  atomic_work_item_fence(CLK_IMAGE_MEM_FENCE,\n"
+"                         memory_order_acq_rel,\n"
+"                         memory_scope_work_item);\n"
+"\n"
+"  write_imageui(src_image, coords, src_val);\n"
+"}\n";
+
+
+int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, cl_command_queue commands, int num_elements)
+{
+
+    unsigned int i;
+
+    unsigned int size_x;
+    unsigned int size_y;
+    unsigned int size;
+
+    cl_int err;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    cl_mem_flags flags;
+    cl_image_format format;
+    cl_mem src_image;
+
+    unsigned int *input;
+    unsigned int *output;
+
+    /* Create test input */
+    size_x = 4;
+    size_y = 4;
+    size = size_x * size_y * 4;
+
+    input = (unsigned int *)malloc(size*sizeof(unsigned int));
+    output = (unsigned int *)malloc(size*sizeof(unsigned int));
+
+    if (!input && !output) {
+        log_error("Error: memory allocation failed\n");
+    return -1;
+    }
+
+    /* Fill input array with random values */
+    for (i = 0; i < size; i++) {
+        input[i] = (unsigned int)(rand()/((double)RAND_MAX + 1)*255);
+    }
+
+    /* Zero out output array */
+    for (i = 0; i < size; i++) {
+        output[i] = 0.0f;
+    }
+
+    /* Build the program executable */
+  err = create_single_kernel_helper_with_build_options(context,&program,&kernel,1,&rw_kernel_code,"test_rw_images", "-cl-std=CL2.0");
+    if (err != CL_SUCCESS || !program) {
+        log_error("Error: clCreateProgramWithSource failed\n");
+    return err;
+    }
+
+    /* Create arrays for input and output data */
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNSIGNED_INT32;
+
+    /* Create input image */
+    flags = (cl_mem_flags) (CL_MEM_READ_WRITE
+                            | CL_MEM_COPY_HOST_PTR);
+    src_image = create_image_2d(context, flags, &format,
+                                size_x, size_y, 0,
+                                (void *)input, &err);
+    if (err != CL_SUCCESS || !src_image) {
+        log_error("Error: clCreateImage2D failed\n");
+        return err;
+    }
+
+    /* Set kernel arguments */
+  err = clSetKernelArg(kernel, 0, sizeof(src_image), &src_image);
+  if (err != CL_SUCCESS) {
+    log_error("Error: clSetKernelArg failed\n");
+    return err;
+  }
+
+    /* Set kernel execution parameters */
+    int dim_count = 2;
+    size_t global_dim[2];
+    size_t local_dim[2];
+
+    global_dim[0] = size_x;
+    global_dim[1] = size_y;
+
+    local_dim[0] = 1;
+    local_dim[1] = 1;
+
+    /* Execute kernel */
+    err = CL_SUCCESS;
+    unsigned int num_iter = 1;
+    for(i = 0; i < num_iter; i++) {
+        err |= clEnqueueNDRangeKernel(commands, kernel, dim_count,
+                                      NULL, global_dim, local_dim,
+                                      0, NULL, NULL);
+    }
+
+    /* Read back the results from the device to verify the output */
+    const size_t origin[3] = {0, 0, 0};
+    const size_t region[3] = {size_x, size_y, 1};
+    err |= clEnqueueReadImage(commands, src_image, CL_TRUE, origin, region, 0, 0,
+                              output, 0, NULL, NULL);
+    if (err != CL_SUCCESS) {
+        log_error("Error: clEnqueueReadBuffer failed\n");
+    return err;
+    }
+
+    /* Verify the correctness of kernel result */
+  err = 0;
+    for (i = 0; i < size; i++) {
+        if (output[i] != (input[i] + 3)) {
+      log_error("Error: mismatch at index %d\n", i);
+            err++;
+            break;
+        }
+    }
+
+  /* Release programs, kernel, contect, and memory objects */
+    clReleaseMemObject(src_image);
+    clReleaseProgram(program);
+    clReleaseKernel(kernel);
+
+  /* Deallocate arrays */
+    free(input);
+    free(output);
+
+    return err;
+}
diff --git a/test_conformance/basic/test_simple_image_pitch.c b/test_conformance/basic/test_simple_image_pitch.c
new file mode 100644
index 00000000..392fed58
--- /dev/null
+++ b/test_conformance/basic/test_simple_image_pitch.c
@@ -0,0 +1,153 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements)
+{
+  cl_int err = CL_SUCCESS;
+
+  size_t imageW = 143;
+  size_t imageH = 151;
+  size_t bufferW = 151*4;
+  size_t bufferH = 151;
+
+  size_t pixel_bytes = 4;
+  size_t image_bytes = imageW * imageH * pixel_bytes;
+
+  size_t buffer_bytes = bufferW * bufferH;
+
+  PASSIVE_REQUIRE_IMAGE_SUPPORT( device );
+
+  char* host_image = (char*)malloc(image_bytes);
+  memset(host_image,0x1,image_bytes);
+
+  cl_image_format fmt = { 0 };
+  fmt.image_channel_order     = CL_RGBA;
+  fmt.image_channel_data_type = CL_UNORM_INT8;
+
+  cl_image_desc desc = { 0 };
+  desc.image_type         = CL_MEM_OBJECT_IMAGE2D;
+  desc.image_width        = imageW;
+  desc.image_height       = imageH;
+
+  cl_mem image = clCreateImage(cl_context_, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE, &fmt, &desc, host_image, &err);
+  test_error(err,"clCreateImage");
+
+  char* host_buffer = (char*)malloc(buffer_bytes);
+  memset(host_buffer,0xa,buffer_bytes);
+
+  // Test reading from the image
+  size_t origin[] = { 0, 0, 0 };
+  size_t region[] = { imageW, imageH, 1 };
+
+  err = clEnqueueReadImage(q, image, CL_TRUE, origin, region, bufferW, 0, host_buffer, 0, NULL, NULL);
+  test_error(err,"clEnqueueReadImage");
+
+  size_t errors = 0;
+  for (size_t j=0;j<bufferH;++j) {
+    for (size_t i=0;i<bufferW;++i) {
+      char val = host_buffer[j*bufferW+i];
+      if ((i<imageW*pixel_bytes) && (val != 0x1)) {
+        log_error("Bad value %x in image at (byte: %lu, row: %lu)\n",val,i,j);
+        ++errors;
+      }
+      else if ((i>=imageW*pixel_bytes) && (val != 0xa)) {
+        log_error("Bad value %x outside image at (byte: %lu, row: %lu)\n",val,i,j);
+        ++errors;
+      }
+    }
+  }
+
+  test_error(clReleaseMemObject(image),"clReleaseMemObject");
+  free(host_image);
+  free(host_buffer);
+
+  return CL_SUCCESS;
+}
+
+int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements)
+{
+  cl_int err = CL_SUCCESS;
+
+  size_t imageW = 143;
+  size_t imageH = 151;
+  size_t bufferW = 151*4;
+  size_t bufferH = 151;
+
+  size_t pixel_bytes = 4;
+  size_t image_bytes = imageW * imageH * pixel_bytes;
+
+  size_t buffer_bytes = bufferW * bufferH;
+
+  PASSIVE_REQUIRE_IMAGE_SUPPORT( device );
+
+  char* host_image = (char*)malloc(image_bytes);
+  memset(host_image,0x0,image_bytes);
+
+  cl_image_format fmt = { 0 };
+  fmt.image_channel_order     = CL_RGBA;
+  fmt.image_channel_data_type = CL_UNORM_INT8;
+
+  cl_image_desc desc = { 0 };
+  desc.image_type         = CL_MEM_OBJECT_IMAGE2D;
+  desc.image_width        = imageW;
+  desc.image_height       = imageH;
+
+  cl_mem image = clCreateImage(cl_context_, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE, &fmt, &desc, host_image, &err);
+  test_error(err,"clCreateImage");
+
+  char* host_buffer = (char*)malloc(buffer_bytes);
+  memset(host_buffer,0xa,buffer_bytes);
+
+  // Test reading from the image
+  size_t origin[] = { 0, 0, 0 };
+  size_t region[] = { imageW, imageH, 1 };
+
+  err = clEnqueueWriteImage(q, image, CL_TRUE, origin, region, bufferW, 0, host_buffer, 0, NULL, NULL);
+  test_error(err,"clEnqueueWriteImage");
+
+  size_t mapped_pitch = 0;
+  char* mapped_image = (char*)clEnqueueMapImage(q, image, CL_TRUE, CL_MAP_READ, origin, region, &mapped_pitch, NULL, 0, NULL, NULL, &err);
+  test_error(err,"clEnqueueMapImage");
+
+  size_t errors = 0;
+  for (size_t j=0;j<imageH;++j) {
+    for (size_t i=0;i<mapped_pitch;++i) {
+      char val = mapped_image[j*mapped_pitch+i];
+      if ((i<imageW*pixel_bytes) && (val != 0xa)) {
+        log_error("Bad value %x in image at (byte: %lu, row: %lu)\n",val,i,j);
+        ++errors;
+      }
+    }
+  }
+
+  err = clEnqueueUnmapMemObject(q, image, (void *)mapped_image, 0, 0, 0);
+  test_error(err,"clEnqueueUnmapMemObject");
+
+  test_error(clReleaseMemObject(image),"clReleaseMemObject");
+  free(host_image);
+  free(host_buffer);
+
+  return CL_SUCCESS;
+}
diff --git a/test_conformance/basic/test_sizeof.c b/test_conformance/basic/test_sizeof.c
new file mode 100644
index 00000000..166cf206
--- /dev/null
+++ b/test_conformance/basic/test_sizeof.c
@@ -0,0 +1,397 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "procs.h"
+
+
+
+cl_int get_type_size( cl_context context, cl_command_queue queue, const char *type, cl_ulong *size  )
+{
+    const char *sizeof_kernel_code[4] =
+    {
+        "", /* optional pragma string */
+        "__kernel __attribute__((reqd_work_group_size(1,1,1))) void test_sizeof(__global uint *dst) \n"
+        "{\n"
+        "   dst[0] = (uint) sizeof( ", type, " );\n"
+        "}\n"
+    };
+
+    cl_program  p;
+    cl_kernel   k;
+    cl_mem      m;
+    cl_uint        temp;
+
+
+    if (!strncmp(type, "double", 6))
+    {
+        sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    }
+    else if (!strncmp(type, "half", 4))
+    {
+        sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+    }
+
+    cl_int err = create_single_kernel_helper( context, &p, &k, 4, sizeof_kernel_code, "test_sizeof" );
+    if( err )
+        return err;
+
+    m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
+    if( NULL == m )
+    {
+        clReleaseProgram( p );
+        clReleaseKernel( k );
+        log_error("\nclCreateBuffer FAILED\n");
+        return err;
+    }
+
+    err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
+    if( err )
+    {
+        clReleaseProgram( p );
+        clReleaseKernel( k );
+        clReleaseMemObject( m );
+        log_error("\nclSetKernelArg FAILED\n");
+        return err;
+    }
+
+    err = clEnqueueTask( queue, k, 0, NULL, NULL );
+    clReleaseProgram( p );
+    clReleaseKernel( k );
+    if( err )
+    {
+        clReleaseMemObject( m );
+        log_error( "\nclEnqueueTask FAILED\n" );
+        return err;
+    }
+
+    err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
+    clReleaseMemObject( m );
+    if( err )
+        log_error( "\nclEnqueueReadBuffer FAILED\n" );
+
+    *size = (cl_ulong) temp;
+
+    return err;
+}
+
+typedef struct size_table
+{
+    const char *name;
+    cl_ulong   size;
+    cl_ulong   cl_size;
+}size_table;
+
+const size_table  scalar_table[] =
+{
+    // Fixed size entries from table 6.1
+    {  "char",              1,  sizeof( cl_char )   },
+    {  "uchar",             1,  sizeof( cl_uchar)   },
+    {  "unsigned char",     1,  sizeof( cl_uchar)   },
+    {  "short",             2,  sizeof( cl_short)   },
+    {  "ushort",            2,  sizeof( cl_ushort)  },
+    {  "unsigned short",    2,  sizeof( cl_ushort)  },
+    {  "int",               4,  sizeof( cl_int )    },
+    {  "uint",              4,  sizeof( cl_uint)    },
+    {  "unsigned int",      4,  sizeof( cl_uint)    },
+    {  "float",             4,  sizeof( cl_float)   },
+    {  "long",              8,  sizeof( cl_long )   },
+    {  "ulong",             8,  sizeof( cl_ulong)   },
+    {  "unsigned long",     8,  sizeof( cl_ulong)   }
+};
+
+const size_table  vector_table[] =
+{
+    // Fixed size entries from table 6.1
+    {  "char",      1,  sizeof( cl_char )   },
+    {  "uchar",     1,  sizeof( cl_uchar)   },
+    {  "short",     2,  sizeof( cl_short)   },
+    {  "ushort",    2,  sizeof( cl_ushort)  },
+    {  "int",       4,  sizeof( cl_int )    },
+    {  "uint",      4,  sizeof( cl_uint)    },
+    {  "float",     4,  sizeof( cl_float)   },
+    {  "long",      8,  sizeof( cl_long )   },
+    {  "ulong",     8,  sizeof( cl_ulong)   }
+};
+
+const char  *ptr_table[] =
+{
+    "void*",
+    "size_t",
+    "sizeof(int)",      // check return type of sizeof
+    "ptrdiff_t"
+};
+
+const char *other_types[] =
+{
+    "event_t",
+    "image2d_t",
+    "image3d_t",
+    "sampler_t"
+};
+
+static int IsPowerOfTwo( cl_ulong x ){ return 0 == (x & (x-1)); }
+
+int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t i, j;
+    cl_ulong test;
+    cl_uint ptr_size = CL_UINT_MAX;
+    cl_int err = CL_SUCCESS;
+
+    // Check address space size
+    err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(ptr_size), &ptr_size, NULL);
+    if( err || ptr_size > 64)
+    {
+        log_error( "FAILED:  Unable to get CL_DEVICE_ADDRESS_BITS for device %p\n", device );
+        return -1;
+    }
+    log_info( "\tCL_DEVICE_ADDRESS_BITS = %u\n", ptr_size );
+    ptr_size /= 8;
+
+    // Test standard scalar sizes
+    for( i = 0; i < sizeof( scalar_table ) / sizeof( scalar_table[0] ); i++ )
+    {
+        if( ! gHasLong &&
+           (0 == strcmp(scalar_table[i].name, "long") ||
+            0 == strcmp(scalar_table[i].name, "ulong") ||
+            0 == strcmp(scalar_table[i].name, "unsigned long")))
+        {
+            log_info("\nLongs are not supported by this device. Skipping test.\t");
+            continue;
+        }
+
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, scalar_table[i].name, &test  );
+        if( err )
+            return err;
+        if( test != scalar_table[i].size )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", scalar_table[i].name, test, scalar_table[i].size );
+            return -1;
+        }
+        if( test != scalar_table[i].cl_size )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", scalar_table[i].name, test, scalar_table[i].cl_size );
+            return -2;
+        }
+        log_info( "%16s", scalar_table[i].name );
+    }
+    log_info( "\n" );
+
+    // Test standard vector sizes
+    for( j = 2; j <= 16; j *= 2 )
+    {
+        // For each vector size, iterate through types
+        for( i = 0; i < sizeof( vector_table ) / sizeof( vector_table[0] ); i++ )
+        {
+            if( !gHasLong &&
+               (0 == strcmp(vector_table[i].name, "long") ||
+                0 == strcmp(vector_table[i].name, "ulong")))
+            {
+                log_info("\nLongs are not supported by this device. Skipping test.\t");
+                continue;
+            }
+
+            char name[32];
+            sprintf( name, "%s%ld", vector_table[i].name, j );
+
+            test = CL_ULONG_MAX;
+            err = get_type_size( context, queue, name, &test  );
+            if( err )
+                return err;
+            if( test != j * vector_table[i].size )
+            {
+                log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", name, test, j * vector_table[i].size );
+                return -1;
+            }
+            if( test != j * vector_table[i].cl_size )
+            {
+                log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", name, test, j * vector_table[i].cl_size );
+                return -2;
+            }
+            log_info( "%16s", name );
+        }
+        log_info( "\n" );
+    }
+
+    //Check that pointer sizes are correct
+    for( i = 0; i < sizeof( ptr_table ) / sizeof( ptr_table[0] ); i++ )
+    {
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, ptr_table[i], &test  );
+        if( err )
+            return err;
+        if( test != ptr_size )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, but expected size %u!\n", ptr_table[i], test, ptr_size );
+            return -1;
+        }
+        log_info( "%16s", ptr_table[i] );
+    }
+
+    // Check that intptr_t is large enough
+    test = CL_ULONG_MAX;
+    err = get_type_size( context, queue, "intptr_t", &test  );
+    if( err )
+        return err;
+    if( test < ptr_size )
+    {
+        log_error( "\nFAILED: intptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
+        return -1;
+    }
+    if( ! IsPowerOfTwo( test ) )
+    {
+        log_error( "\nFAILED: sizeof(intptr_t) is %lld, but must be a power of two!\n", test );
+        return -2;
+    }
+    log_info( "%16s", "intptr_t" );
+
+    // Check that uintptr_t is large enough
+    test = CL_ULONG_MAX;
+    err = get_type_size( context, queue, "uintptr_t", &test  );
+    if( err )
+        return err;
+    if( test < ptr_size )
+    {
+        log_error( "\nFAILED: uintptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
+        return -1;
+    }
+    if( ! IsPowerOfTwo( test ) )
+    {
+        log_error( "\nFAILED: sizeof(uintptr_t) is %lld, but must be a power of two!\n", test );
+        return -2;
+    }
+    log_info( "%16s\n", "uintptr_t" );
+
+    //Check that other types are powers of two
+    for( i = 0; i < sizeof( other_types ) / sizeof( other_types[0] ); i++ )
+    {
+        if( 0 == strcmp(other_types[i], "image2d_t") &&
+           checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        {
+            log_info("\nimages are not supported by this device. Skipping test.\t");
+            continue;
+        }
+
+        if( gIsEmbedded &&
+           0 == strcmp(other_types[i], "image3d_t") &&
+           checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        {
+            log_info("\n3D images are not supported by this device. Skipping test.\t");
+            continue;
+        }
+
+        if( 0 == strcmp(other_types[i], "sampler_t") &&
+           checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        {
+          log_info("\nimages are not supported by this device. Skipping test.\t");
+          continue;
+        }
+
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, other_types[i], &test  );
+        if( err )
+            return err;
+        if( ! IsPowerOfTwo( test ) )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, which is not a power of two (section 6.1.5)!\n", other_types[i], test );
+            return -1;
+        }
+        log_info( "%16s", other_types[i] );
+    }
+    log_info( "\n" );
+
+
+    //Check double
+    if( is_extension_available( device, "cl_khr_fp64" ) )
+    {
+        log_info( "\tcl_khr_fp64:" );
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, "double", &test  );
+        if( err )
+            return err;
+        if( test != 8 )
+        {
+            log_error( "\nFAILED: double has size %lld, but must be 8!\n", test );
+            return -1;
+        }
+        log_info( "%16s", "double" );
+
+        // Test standard vector sizes
+        for( j = 2; j <= 16; j *= 2 )
+        {
+            char name[32];
+            sprintf( name, "double%ld", j );
+
+            test = CL_ULONG_MAX;
+            err = get_type_size( context, queue, name, &test  );
+            if( err )
+                return err;
+            if( test != 8*j )
+            {
+                log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 8 * j);
+                return -1;
+            }
+            log_info( "%16s", name );
+        }
+        log_info( "\n" );
+    }
+
+    //Check half
+    if( is_extension_available( device, "cl_khr_fp16" ) )
+    {
+        log_info( "\tcl_khr_fp16:" );
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, "half", &test  );
+        if( err )
+            return err;
+        if( test != 2 )
+        {
+            log_error( "\nFAILED: half has size %lld, but must be 2!\n", test );
+            return -1;
+        }
+        log_info( "%16s", "half" );
+
+        // Test standard vector sizes
+        for( j = 2; j <= 16; j *= 2 )
+        {
+            char name[32];
+            sprintf( name, "half%ld", j );
+
+            test = CL_ULONG_MAX;
+            err = get_type_size( context, queue, name, &test  );
+            if( err )
+                return err;
+            if( test != 2*j )
+            {
+                log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 2 * j);
+                return -1;
+            }
+            log_info( "%16s", name );
+        }
+        log_info( "\n" );
+    }
+
+    return err;
+}
+
+
diff --git a/test_conformance/basic/test_vec_type_hint.c b/test_conformance/basic/test_vec_type_hint.c
new file mode 100644
index 00000000..a35d8403
--- /dev/null
+++ b/test_conformance/basic/test_vec_type_hint.c
@@ -0,0 +1,107 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+
+static const char *sample_kernel = {
+  "%s\n" // optional pragma string
+  "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n"
+  "{\n"
+  "    int  tid = get_global_id(0);\n"
+  "     dst[tid] = src[tid];\n"
+  "\n"
+  "}\n"
+};
+
+int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  int error;
+  int vec_type_index, vec_size_index;
+
+  ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    const char *size_names[] = {"", "2", "4", "8", "16"};
+    char *program_source;
+
+  program_source = (char*)malloc(sizeof(char)*4096);
+
+  for (vec_type_index=0; vec_type_index<10; vec_type_index++) {
+    if (vecType[vec_type_index] == kDouble) {
+      if (!is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        continue;
+      }
+      log_info("Testing doubles.\n");
+    }
+
+    if (vecType[vec_type_index] == kLong || vecType[vec_type_index] == kULong)
+    {
+        if (!gHasLong)
+        {
+            log_info("Extension cl_khr_int64 not supported; skipping long tests.\n");
+            continue;
+        }
+    }
+
+    for (vec_size_index=0; vec_size_index<5; vec_size_index++) {
+      clProgramWrapper program;
+      clKernelWrapper kernel;
+      clMemWrapper in, out;
+      size_t global[] = {1,1,1};
+
+      log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
+
+      program_source[0] = '\0';
+      sprintf(program_source, sample_kernel,
+              (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+              get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
+
+      error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" );
+      if( error != 0 )
+        return error;
+
+      in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error);
+      test_error(error, "clCreateBuffer failed");
+      out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error);
+      test_error(error, "clCreateBuffer failed");
+
+      error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+      test_error(error, "clSetKernelArg failed");
+      error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+      test_error(error, "clSetKernelArg failed");
+
+      error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
+      test_error(error, "clEnqueueNDRangeKernel failed");
+
+      error = clFinish(queue);
+      test_error(error, "clFinish failed");
+    }
+  }
+
+  free(program_source);
+
+  return 0;
+}
diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp
new file mode 100644
index 00000000..9ab0103a
--- /dev/null
+++ b/test_conformance/basic/test_vector_creation.cpp
@@ -0,0 +1,406 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+
+
+
+#define DEBUG 0
+#define DEPTH 16
+// Limit the maximum code size for any given kernel.
+#define MAX_CODE_SIZE (1024*32)
+
+const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
+const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
+
+// Creates a kernel by enumerating all possible ways of building the vector out of vloads
+// skip_to_results will skip results up to a given number. If the amount of code generated
+// is greater than MAX_CODE_SIZE, this function will return the number of results used,
+// which can then be used as the skip_to_result value to continue where it left off.
+int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
+
+    int number_of_sizes;
+
+    switch (output_size) {
+        case 1:
+            number_of_sizes = 1;
+            break;
+        case 2:
+            number_of_sizes = 2;
+            break;
+        case 3:
+            number_of_sizes = 3;
+            break;
+        case 4:
+            number_of_sizes = 4;
+            break;
+        case 8:
+            number_of_sizes = 5;
+            break;
+        case 16:
+            number_of_sizes = 6;
+            break;
+        default:
+            log_error("Invalid size: %d\n", output_size);
+            return -1;
+    }
+
+    int total_results = 0;
+    int current_result = 0;
+    int total_vloads = 0;
+    int total_program_length = 0;
+    int aborted_due_to_size = 0;
+
+    if (skip_to_result < 0)
+        skip_to_result = 0;
+
+    // The line of code for the vector creation
+    char line[1024];
+    // Keep track of what size vector we are using in each position so we can iterate through all fo them
+    int pos[DEPTH];
+    int max_size = output_size;
+    if (DEBUG > 1) log_info("max_size: %d\n", max_size);
+
+    program[0] = '\0';
+    sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
+            type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+            get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
+    total_program_length += (int)strlen(program);
+
+    char storePrefix[ 128 ], storeSuffix[ 128 ];
+
+    // Start out trying sizes 1,1,1,1,1...
+    for (int i=0; i<DEPTH; i++)
+        pos[i] = 0;
+
+    int done = 0;
+    while (!done) {
+        if (DEBUG > 1) {
+            log_info("pos size[] = [");
+            for (int k=0; k<DEPTH; k++)
+                log_info(" %d ", pos[k]);
+            log_info("]\n");
+        }
+
+        // Go through the selected vector sizes and see if the first n of them fit the
+        //  required size exactly.
+        int size_so_far = 0;
+        int vloads;
+        for ( vloads=0; vloads<DEPTH; vloads++) {
+            if (size_so_far + sizes[pos[vloads]] <= max_size) {
+                size_so_far += sizes[pos[vloads]];
+            } else {
+                break;
+            }
+        }
+        if (DEBUG > 1)  log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
+
+        // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
+        //  of the sizes to the right. Prune them from the search.
+        if (size_so_far != max_size) {
+            // Zero all the sizes to the right
+            for (int k=vloads+1; k<DEPTH; k++) {
+                pos[k] = 0;
+            }
+            // Increment this current size and propagate the values up if needed
+            for (int d=vloads; d>=0; d--) {
+                pos[d]++;
+                if (pos[d] >= number_of_sizes) {
+                    pos[d] = 0;
+                    if (d == 0) {
+                        // If we rolled over then we are done
+                        done = 1;
+                        break;
+                    }
+                } else {
+                    break;
+                }
+            }
+            // Go on to the next size since this one (and all others "under" it) didn't fit
+            continue;
+        }
+
+
+        // Generate the actual load line if we are building this part
+        line[0]= '\0';
+        if (skip_to_result == 0 || total_results >= skip_to_result) {
+            if( number_of_sizes == 3 )
+            {
+                sprintf( storePrefix, "vstore3( " );
+                sprintf( storeSuffix, ", %d, result )", current_result );
+            }
+            else
+            {
+                sprintf( storePrefix, "result[%d] = ", current_result );
+                storeSuffix[ 0 ] = 0;
+            }
+
+            sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
+            current_result++;
+
+            int offset = 0;
+            for (int i=0; i<vloads; i++) {
+                if (pos[i] == 0)
+                    sprintf(line + strlen(line), "src[%d]", offset);
+                else
+                    sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
+                offset += sizes[pos[i]];
+                if (i<(vloads-1))
+                    sprintf(line + strlen(line), ",");
+            }
+            sprintf(line + strlen(line), ")%s;\n", storeSuffix);
+
+            strcat(program, line);
+            total_vloads += vloads;
+        }
+        total_results++;
+        total_program_length += (int)strlen(line);
+        if (total_program_length > MAX_CODE_SIZE) {
+            aborted_due_to_size = 1;
+            done = 1;
+        }
+
+
+        if (DEBUG) log_info("line is: %s", line);
+
+        // If we did not use all of them, then we ignore any changes further to the right.
+        // We do this by causing those loops to skip on the next iteration.
+        if (vloads < DEPTH) {
+            if (DEBUG > 1) log_info("done with this depth\n");
+            for (int k=vloads; k<DEPTH; k++)
+                pos[k] = number_of_sizes;
+        }
+
+        // Increment the far right size by 1, rolling over as needed
+        for (int d=DEPTH-1; d>=0; d--) {
+            pos[d]++;
+            if (pos[d] >= number_of_sizes) {
+                pos[d] = 0;
+                if (d == 0) {
+                    // If we rolled over at the far-left then we are done
+                    done = 1;
+                    break;
+                }
+            } else {
+                break;
+            }
+        }
+        if (done)
+            break;
+
+        // Continue until we are done.
+    }
+    strcat(program, "}\n\n"); //log_info("%s\n", program);
+    total_program_length += 3;
+    if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
+                        get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
+    *number_of_results = current_result;
+    if (aborted_due_to_size)
+        return total_results;
+    return 0;
+}
+
+
+
+
+int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
+
+    char *program_source;
+    int error;
+    int total_errors = 0;
+
+    cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+    cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+    void *input_data_converted;
+    void *output_data;
+
+    int number_of_results;;
+
+    input_data_converted = malloc(sizeof(cl_double)*16);
+    program_source = (char*)malloc(sizeof(char)*1024*1024*4);
+
+    // Iterate over all the types
+    for (int type_index=0; type_index<10; type_index++) {
+    if(!gHasLong && ((vecType[type_index] == kLong)  || (vecType[type_index] == kULong)))
+    {
+      log_info("Long/ULong data type not supported on this device\n");
+      continue;
+    }
+
+        clMemWrapper input;
+
+        if (vecType[type_index] == kDouble) {
+            if (!is_extension_available(deviceID, "cl_khr_fp64")) {
+                log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+                continue;
+            }
+            log_info("Testing doubles.\n");
+        }
+
+        // Convert the data to the right format for the test.
+        memset(input_data_converted, 0xff, sizeof(cl_double)*16);
+        if (vecType[type_index] != kDouble) {
+            for (int j=0; j<16; j++) {
+                convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
+                                       kInt, 0, kRoundToEven, vecType[type_index]);
+            }
+        } else {
+            memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
+        }
+
+        input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
+                               (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
+        if (error) {
+            print_error(error, "clCreateBuffer failed");
+            total_errors++;
+            continue;
+        }
+
+        // Iterate over all the vector sizes.
+        for (int size_index=1; size_index< 5; size_index++) {
+            size_t global[] = {1,1,1};
+            int number_generated = -1;
+            int previous_number_generated = 0;
+
+            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
+            while (number_generated != 0) {
+                clMemWrapper output;
+                clKernelWrapper kernel;
+                clProgramWrapper program;
+
+                number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
+                if (number_generated != 0) {
+                    if (previous_number_generated == 0)
+                        log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
+                    log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
+                }
+
+                error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
+                if (error) {
+                    log_error("create_single_kernel_helper failed.\n");
+                    total_errors++;
+                    break;
+                }
+
+                output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                        number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
+                                        NULL, &error);
+                if (error) {
+                    print_error(error, "clCreateBuffer failed");
+                    total_errors++;
+                    break;
+                }
+
+                error = clSetKernelArg(kernel, 0, sizeof(input), &input);
+                error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
+                if (error) {
+                    print_error(error, "clSetKernelArg failed");
+                    total_errors++;
+                    break;
+                }
+
+                error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+                if (error) {
+                    print_error(error, "clEnqueueNDRangeKernel failed");
+                    total_errors++;
+                    break;
+                }
+
+                error = clFinish(queue);
+                if (error) {
+                    print_error(error, "clFinish failed");
+                    total_errors++;
+                    break;
+                }
+
+                output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
+                if (output_data == NULL) {
+                    log_error("Failed to allocate memory for output data.\n");
+                    total_errors++;
+                    break;
+                }
+                memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
+                error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
+                                            number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
+                                            output_data, 0, NULL, NULL);
+                if (error) {
+                    print_error(error, "clEnqueueReadBuffer failed");
+                    total_errors++;
+                    free(output_data);
+                    break;
+                }
+
+                // Check the results
+                char *res = (char *)output_data;
+                char *exp = (char *)input_data_converted;
+                for (int i=0; i<number_of_results; i++) {
+                    // If they do not match, then print out why
+                    if (memcmp(input_data_converted,
+                               res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
+                               get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
+                        ) {
+                        log_error("Data failed to validate for result %d\n", i);
+
+                        // Find the line in the program that failed. This is ugly.
+                        char search[32];
+                        char found_line[1024];
+                        found_line[0]='\0';
+                        search[0]='\0';
+                        sprintf(search, "result[%d] = (", i);
+                        char *start_loc = strstr(program_source, search);
+                        if (start_loc == NULL)
+                            log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
+                        else {
+                          char *end_loc = strstr(start_loc, "\n");
+                          memcpy(&found_line, start_loc, (end_loc-start_loc));
+                          found_line[end_loc-start_loc]='\0';
+                          log_error("Failed vector line: %s\n", found_line);
+                        }
+
+                        for (int j=0; j<(int)vecSizes[size_index]; j++) {
+                            char expected_value[64];
+                            char returned_value[64];
+                            expected_value[0]='\0';
+                            returned_value[0]='\0';
+                            print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
+                            print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
+                            log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
+                                      returned_value, expected_value);
+                        }
+
+                        total_errors++;
+                    }
+                }
+                free(output_data);
+                previous_number_generated = number_generated;
+            } // number_generated != 0
+
+        } // vector sizes
+    } // vector types
+
+    free(input_data_converted);
+    free(program_source);
+
+    return total_errors;
+}
+
+
diff --git a/test_conformance/basic/test_vloadstore.c b/test_conformance/basic/test_vloadstore.c
new file mode 100644
index 00000000..74518137
--- /dev/null
+++ b/test_conformance/basic/test_vloadstore.c
@@ -0,0 +1,986 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+// Outputs debug information for stores
+#define DEBUG 0
+// Forces stores/loads to be done with offsets = tid
+#define LINEAR_OFFSETS 0
+#define NUM_LOADS    512
+
+static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+#pragma mark -------------------- vload harness --------------------------
+
+typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize );
+
+int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
+               create_vload_program_fn createFn, size_t bufferSize, MTdata d )
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 4 ];
+    const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS;
+
+    if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128;
+
+    size_t threads[ 1 ], localThreads[ 1 ];
+    clProtectedArray inBuffer( bufferSize );
+    char programSrc[ 10240 ];
+    cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ];
+    size_t numElements, typeSize, i;
+    unsigned int outVectorSize;
+
+
+    typeSize = get_explicit_type_size( type );
+    numElements = bufferSize / ( typeSize * vecSize );
+    bufferSize = numElements * typeSize * vecSize;    // To account for rounding
+
+    if (DEBUG) log_info("Testing: numLoads: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numLoads, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
+
+    // Create some random input data and random offsets to load from
+    generate_random_data( type, numElements * vecSize, d, (void *)inBuffer );
+    for( i = 0; i < numLoads; i++ )
+    {
+        offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 1, d );
+        if( offsets[ i ] < numElements - 2 )
+            alignmentOffsets[ i ] = (cl_uint)random_in_range( 0, (int)vecSize - 1, d );
+        else
+            alignmentOffsets[ i ] = 0;
+        if (LINEAR_OFFSETS) offsets[i] = (cl_uint)i;
+    }
+    if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n");
+
+    // 32-bit fixup
+    outVectorSize = vecSize;
+
+    // Declare output buffers now
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char outBuffer[ numLoads * typeSize * outVectorSize ];
+    char referenceBuffer[ numLoads * typeSize * vecSize ];
+#else
+    char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char));
+    char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char));
+#endif
+
+    // Create the program
+
+
+    createFn( programSrc, numElements, type, vecSize, outVectorSize);
+
+    // Create our kernel
+    const char *ptr = programSrc;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+    if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
+
+    // Get the number of args to differentiate the kernels with local storage. (They have 5)
+    cl_uint numArgs;
+    error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
+    test_error( error, "clGetKernelInfo failed");
+
+    // Set up parameters
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, bufferSize, (void *)inBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(offsets[0]), offsets, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+
+    // Set parameters and run
+    if (numArgs == 5) {
+        // We need to set the size of the local storage
+        error = clSetKernelArg(kernel, 0, bufferSize, NULL);
+        test_error( error, "clSetKernelArg for buffer failed");
+        for( i = 0; i < 4; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
+            test_error( error, "Unable to set kernel argument" );
+        }
+    } else {
+        // No local storage
+        for( i = 0; i < 4; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
+            test_error( error, "Unable to set kernel argument" );
+        }
+    }
+
+    threads[ 0 ] = numLoads;
+    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
+    test_error( error, "Unable to get local thread size" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to exec kernel" );
+
+    // Get the results
+    error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+
+    // Create the reference results
+    memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char));
+    for( i = 0; i < numLoads; i++ )
+    {
+        memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize,
+               typeSize * vecSize );
+    }
+
+    // Validate the results now
+    char *expected = referenceBuffer;
+    char *actual = outBuffer;
+    char *in = (char *)(void *)inBuffer;
+
+    if (DEBUG) {
+        log_info("Memory contents:\n");
+        for (i=0; i<numElements; i++) {
+            char  inString[1024];
+            char expectedString[ 1024 ], actualString[ 1024 ];
+            if (i < numLoads) {
+                log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                         GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                         GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ),
+                         offsets[i], alignmentOffsets[i]);
+                if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*outVectorSize]), typeSize * vecSize) != 0)
+                    log_error(" << ERROR\n");
+                else
+                    log_info("\n");
+            } else {
+                log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                         GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                         GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ));
+            }
+        }
+    }
+
+    for( i = 0; i < numLoads; i++ )
+    {
+        if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
+        {
+            char expectedString[ 1024 ], actualString[ 1024 ];
+            log_error( "ERROR: Data sample %d for vload of %s%d did not validate (expected {%s}, got {%s}, loaded from offset %d)\n",
+                      (int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
+                      GetDataVectorString( actual, typeSize, vecSize, actualString ), (int)offsets[ i ] );
+            return 1;
+        }
+        expected += typeSize * vecSize;
+        actual += typeSize * outVectorSize;
+    }
+
+    return 0;
+}
+
+int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
+    const char *size_names[] = { "2", "3", "4", "8", "16"};
+    unsigned int typeIdx, sizeIdx;
+    int error = 0;
+    MTdata mtData = init_genrand( gRandomSeed );
+
+    log_info("Testing with buffer size of %d.\n", (int)bufferSize);
+
+    for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
+    {
+
+        if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+            continue;
+
+        if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
+            continue;
+
+        for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+        {
+            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
+
+            int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData );
+            if (error_this_type) {
+                error += error_this_type;
+                log_error("Failure; skipping further sizes for this type.");
+                break;
+            }
+        }
+    }
+
+    free_mtdata(mtData);
+
+    return error;
+}
+
+#pragma mark -------------------- vload test cases --------------------------
+
+void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid ] = tmp.s0;\n"
+    "   results[ 3*tid+1 ] = tmp.s1;\n"
+    "   results[ 3*tid+2 ] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName, typeName, typeName );
+    } else {
+        sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
+                (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_vloadset( device, context, queue, create_global_load_code, 10240 );
+}
+
+
+void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    //"   __local %s%d sSharedStorage[ %d ];\n"
+    "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "   int lid = get_local_id( 0 );\n"
+    "\n"
+    "    if( lid == 0 )\n"
+    "    {\n"
+    "        for( int i = 0; i < %d; i++ )\n"
+    "           sSharedStorage[ i ] = src[ i ];\n"
+    "    }\n"
+    //  Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
+    //  threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
+    //  updated on all threads at that point
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    //"   __local %s%d sSharedStorage[ %d ];\n"
+    "__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "   int lid = get_local_id( 0 );\n"
+    "\n"
+    "    if( lid == 0 )\n"
+    "    {\n"
+    "        for( int i = 0; i < %d; i++ ) {\n"
+    "           sSharedStorage[ 3*i   ] = src[ 3*i   ];\n"
+    "           sSharedStorage[ 3*i +1] = src[ 3*i +1];\n"
+    "           sSharedStorage[ 3*i +2] = src[ 3*i +2];\n"
+    "        }\n"
+    "    }\n"
+    //  Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
+    //  threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
+    //  updated on all threads at that point
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid   ] = tmp.s0;\n"
+    "   results[ 3*tid +1] = tmp.s1;\n"
+    "   results[ 3*tid +2] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble  ? doubleExtensionPragma : "",
+                "",
+                typeName, /*(int)inBufferSize,*/
+                typeName, typeName,
+                (int)inBufferSize,
+                typeName, typeName );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble  ? doubleExtensionPragma : "",
+                "",
+                typeName, (int)inVectorSize, /*(int)inBufferSize,*/
+                typeName, (int)inVectorSize, typeName, (int)outVectorSize,
+                (int)inBufferSize,
+                typeName, (int)inVectorSize, (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Determine the max size of a local buffer that we can test against
+    cl_ulong localSize;
+    int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
+    test_error( error, "Unable to get max size of local memory buffer" );
+    if( localSize > 10240 )
+        localSize = 10240;
+    if (localSize > 4096)
+        localSize -= 2048;
+    else
+        localSize /= 2;
+
+    return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize );
+}
+
+
+void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid   ] = tmp.s0;\n"
+    "   results[ 3*tid+1 ] = tmp.s1;\n"
+    "   results[ 3*tid+2 ] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName,  typeName,
+                typeName );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
+                (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Determine the max size of a local buffer that we can test against
+    cl_ulong maxSize;
+    int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL );
+    test_error( error, "Unable to get max size of constant memory buffer" );
+    if( maxSize > 10240 )
+        maxSize = 10240;
+    if (maxSize > 4096)
+        maxSize -= 2048;
+    else
+        maxSize /= 2;
+
+    return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize );
+}
+
+
+void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "#define PRIV_TYPE %s%d\n"
+    "#define PRIV_SIZE %d\n"
+    "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
+    "    int tid = get_global_id( 0 );\n"
+    "\n"
+    "    for( int i = 0; i < %d; i++ )\n"
+    "      sPrivateStorage[ i ] = src[ i ];\n"
+    //    Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
+    //  anybody else to sync up
+    "\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "#define PRIV_TYPE %s\n"
+    "#define PRIV_SIZE %d\n"
+    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
+    "    int tid = get_global_id( 0 );\n"
+    "\n"
+    "    for( int i = 0; i < PRIV_SIZE; i++ )\n"
+    "    {\n"
+    "        sPrivateStorage[ i ] = src[ i ];\n"
+    "    }\n"
+    //    Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
+    //  anybody else to sync up
+    "\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid   ] = tmp.s0;\n"
+    "   results[ 3*tid+1 ] = tmp.s1;\n"
+    "   results[ 3*tid+2 ] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize ==3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, 3*((int)inBufferSize),
+                typeName, typeName,
+                typeName );
+        // log_info("Src is \"\n%s\n\"\n", destBuffer);
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, (int)inVectorSize, (int)inBufferSize,
+                typeName, (int)inVectorSize, typeName, (int)outVectorSize,
+                (int)inBufferSize,
+                typeName, (int)inVectorSize, (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // We have no idea how much actual private storage is available, so just pick a reasonable value,
+    // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
+    return test_vloadset( device, context, queue, create_private_load_code, 256 );
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma mark -------------------- vstore harness --------------------------
+
+typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize );
+
+int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
+                create_vstore_program_fn createFn, size_t bufferSize, MTdata d )
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 3 ];
+
+    size_t threads[ 1 ], localThreads[ 1 ];
+
+    size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS;
+
+    if (DEBUG)
+        bufferSize = (bufferSize < 128) ? bufferSize : 128;
+
+    typeSize = get_explicit_type_size( type );
+    numElements = bufferSize / ( typeSize * vecSize );
+    bufferSize = numElements * typeSize * vecSize;    // To account for rounding
+    if( numStores > numElements * 2 / 3 )
+    {
+        // Note: unlike load, we have to restrict the # of stores here, since all offsets must be unique for our test
+        // (Plus, we leave some room for extra values to make sure didn't get written)
+        numStores = numElements * 2 / 3;
+        if( numStores < 1 )
+            numStores = 1;
+    }
+    if (DEBUG)
+        log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    cl_uint offsets[ numStores ];
+#else
+    cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint));
+#endif
+    char programSrc[ 10240 ];
+    size_t i;
+
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char inBuffer[ numStores * typeSize * vecSize ];
+#else
+    char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char));
+#endif
+    clProtectedArray outBuffer( numElements * typeSize * vecSize );
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char referenceBuffer[ numElements * typeSize * vecSize ];
+#else
+    char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char));
+#endif
+
+    // Create some random input data and random offsets to load from
+    generate_random_data( type, numStores * vecSize, d, (void *)inBuffer );
+
+    // Note: make sure no two offsets are the same, otherwise the output would depend on
+    // the order that threads ran in, and that would be next to impossible to verify
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char flags[ numElements ];
+#else
+    char* flags = (char*)_malloca( numElements * sizeof(char));
+#endif
+
+    memset( flags, 0, numElements * sizeof(char) );
+    for( i = 0; i < numStores; i++ )
+    {
+        do
+        {
+            offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 2, d );    // Note: keep it one vec below the end for offset testing
+        } while( flags[ offsets[ i ] ] != 0 );
+        flags[ offsets[ i ] ] = -1;
+        if (LINEAR_OFFSETS)
+            offsets[i] = (int)i;
+    }
+    if (LINEAR_OFFSETS)
+        log_info("Offsets set to thread IDs to simplify output.\n");
+
+    createFn( programSrc, numElements, type, vecSize );
+
+    // Create our kernel
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+    if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
+
+    // Get the number of args to differentiate the kernels with local storage. (They have 5)
+    cl_uint numArgs;
+    error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
+    test_error( error, "clGetKernelInfo failed");
+
+    // Set up parameters
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+
+    // Set parameters and run
+    if (numArgs == 5)
+    {
+        // We need to set the size of the local storage
+        error = clSetKernelArg(kernel, 0, bufferSize, NULL);
+        test_error( error, "clSetKernelArg for buffer failed");
+        for( i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
+            test_error( error, "Unable to set kernel argument" );
+        }
+    }
+    else
+    {
+        // No local storage
+        for( i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
+            if (error)
+                log_info("%s\n", programSrc);
+            test_error( error, "Unable to set kernel argument" );
+        }
+    }
+
+    threads[ 0 ] = numStores;
+    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
+    test_error( error, "Unable to get local thread size" );
+
+    // Run in a loop, changing the address offset from 0 to ( vecSize - 1 ) each time, since
+    // otherwise stores might overlap each other, and it'd be a nightmare to test!
+    for( cl_uint addressOffset = 0; addressOffset < vecSize; addressOffset++ )
+    {
+        if (DEBUG)
+            log_info("\tstore addressOffset is %d, executing with threads %d\n", addressOffset, (int)threads[0]);
+
+        // Clear the results first
+        memset( outBuffer, 0, numElements * typeSize * vecSize );
+        error = clEnqueueWriteBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
+        test_error( error, "Unable to erase result stream" );
+
+        // Set up the new offset and run
+        if (numArgs == 5)
+            error = clSetKernelArg( kernel, 3+1, sizeof( cl_uint ), &addressOffset );
+        else
+            error = clSetKernelArg( kernel, 3, sizeof( cl_uint ), &addressOffset );
+        test_error( error, "Unable to set address offset argument" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Unable to exec kernel" );
+
+        // Get the results
+        error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
+        test_error( error, "Unable to read results" );
+
+
+        // Create the reference results
+        memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) );
+        for( i = 0; i < numStores; i++ )
+        {
+            memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize );
+        }
+
+        // Validate the results now
+        char *expected = referenceBuffer;
+        char *actual = (char *)(void *)outBuffer;
+
+        if (DEBUG)
+        {
+            log_info("Memory contents:\n");
+            for (i=0; i<numElements; i++)
+            {
+                char  inString[1024];
+                char expectedString[ 1024 ], actualString[ 1024 ];
+                if (i < numStores)
+                {
+                    log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                             GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                             GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ),
+                             offsets[i]);
+                    if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*vecSize]), typeSize * vecSize) != 0)
+                        log_error(" << ERROR\n");
+                    else
+                        log_info("\n");
+                }
+                else
+                {
+                    log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                             GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                             GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ));
+                }
+            }
+        }
+
+        for( i = 0; i < numElements; i++ )
+        {
+            if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
+            {
+                char expectedString[ 1024 ], actualString[ 1024 ];
+                log_error( "ERROR: Data sample %d for vstore of %s%d did not validate (expected {%s}, got {%s}",
+                          (int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
+                          GetDataVectorString( actual, typeSize, vecSize, actualString ) );
+                size_t j;
+                for( j = 0; j < numStores; j++ )
+                {
+                    if( offsets[ j ] == (cl_uint)i )
+                    {
+                        log_error( ", stored from store #%d (of %d, offset = %d) with address offset of %d", (int)j, (int)numStores, offsets[j], (int)addressOffset );
+                        break;
+                    }
+                }
+                if( j == numStores )
+                    log_error( ", supposed to be canary value" );
+                log_error( ")\n" );
+                return 1;
+            }
+            expected += typeSize * vecSize;
+            actual += typeSize * vecSize;
+        }
+    }
+
+    return 0;
+}
+
+int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
+    const char *size_names[] = { "2", "3", "4", "8", "16"};
+    unsigned int typeIdx, sizeIdx;
+    int error = 0;
+    MTdata d = init_genrand( gRandomSeed );
+
+    log_info("Testing with buffer size of %d.\n", (int)bufferSize);
+
+    for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
+    {
+        if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+            continue;
+
+        if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
+            continue;
+
+        for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+        {
+            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
+
+            int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d );
+            if (error_this_type)
+            {
+                log_error("Failure; skipping further sizes for this type.\n");
+                error += error_this_type;
+                break;
+            }
+        }
+    }
+
+    free_mtdata(d);
+    return error;
+}
+
+
+#pragma mark -------------------- vstore test cases --------------------------
+
+void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+{
+    const char *pattern =
+    "%s"
+    "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s"
+    "__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    if((tid&3) == 0) { // if \"tid\" is a multiple of 4 \n"
+    "      vstore3( srcValues[ 3*(tid>>2) ], offsets[ tid ], destBuffer + alignmentOffset );\n"
+    "    } else {\n"
+    "      vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n"
+    "    }\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, typeName, typeName);
+
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, (int)inVectorSize, typeName, (int)inVectorSize );
+    }
+    // if(inVectorSize == 3 || inVectorSize == 4) {
+    //     log_info("\n----\n%s\n----\n", destBuffer);
+    // }
+}
+
+int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_vstoreset( device, context, queue, create_global_store_code, 10240 );
+}
+
+
+void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+{
+    const char *pattern =
+    "%s"
+    "\n"
+    "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n"
+    " sSharedStorage[ offsets[tid] +1 ] =  sSharedStorage[ offsets[tid] ];\n"
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "    vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n"
+    "\n"
+    // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
+    // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  int i;\n"
+    "  __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
+    "  __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
+    "  for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s"
+    "\n"
+    "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    "    sSharedStorage[ 3*offsets[tid]   ] = (%s)0;\n"
+    "    sSharedStorage[ 3*offsets[tid] +1 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid] ];\n"
+    "    sSharedStorage[ 3*offsets[tid] +2 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid]];\n"
+    "    sSharedStorage[ 3*offsets[tid] +3 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid]];\n"
+    "    sSharedStorage[ 3*offsets[tid] +4 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid] ];\n"
+    "    sSharedStorage[ 3*offsets[tid] +5 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid]];\n"
+    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    " vstore3( vload3(tid,srcValues), offsets[ tid ], sSharedStorage  + alignmentOffset );\n"
+    "\n"
+    // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
+    // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  int i;\n"
+    "  __local %s *sp =  (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
+    "  __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
+    "  for( i = 0; i < 3; i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName,
+                typeName,
+                typeName,  typeName,
+                typeName, typeName, typeName  );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, (int)inVectorSize,
+                typeName, (int)inVectorSize, typeName, (int)inVectorSize,
+                typeName, (int)inVectorSize, typeName,
+                (int)inVectorSize, typeName, typeName,
+                typeName, typeName, typeName  );
+    }
+    // log_info(destBuffer);
+}
+
+int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Determine the max size of a local buffer that we can test against
+    cl_ulong localSize;
+    int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
+    test_error( error, "Unable to get max size of local memory buffer" );
+    if( localSize > 10240 )
+        localSize = 10240;
+    if (localSize > 4096)
+        localSize -= 2048;
+    else
+        localSize /= 2;
+    return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize );
+}
+
+
+void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+{
+    const char *pattern =
+    "%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "\n"
+    "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    __private %s%d sPrivateStorage[ %d ];\n"
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    " sPrivateStorage[tid] = (%s%d)(%s)0;\n"
+    "\n"
+    "   vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  uint i;\n"
+    "  __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
+    "  __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
+    "  for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+
+    const char *patternV3 =
+    "%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "\n"
+    "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    __private %s3 sPrivateStorage[ %d ];\n" // keep this %d
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    " sPrivateStorage[tid] = (%s3)(%s)0;\n"
+    "\n"
+
+    "   vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  uint i;\n"
+    "  __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
+    "  __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
+    "  for( i = 0; i < 3; i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName,  typeName,
+                typeName, (int)inBufferSize,
+                typeName, typeName,
+                typeName, typeName, typeName, typeName, typeName );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, (int)inVectorSize, typeName, (int)inVectorSize,
+                typeName, (int)inVectorSize, (int)inBufferSize,
+                typeName, (int)inVectorSize, typeName,
+                (int)inVectorSize, typeName, typeName, typeName, typeName, typeName );
+    }
+}
+
+int test_vstore_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // We have no idea how much actual private storage is available, so just pick a reasonable value,
+    // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
+    return test_vstoreset( device, context, queue, create_private_store_code, 256 );
+}
+
+
+
diff --git a/test_conformance/basic/test_wg_barrier.c b/test_conformance/basic/test_wg_barrier.c
new file mode 100644
index 00000000..e23490d3
--- /dev/null
+++ b/test_conformance/basic/test_wg_barrier.c
@@ -0,0 +1,153 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *wg_barrier_kernel_code =
+"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
+"{\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"     \n"
+"     // updated to work for any workgroup size \n"
+"    for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
+"    {\n"
+"        work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+"        if (tid + i < lsize)\n"
+"            tmp_sum[tid] += tmp_sum[tid + i];\n"
+"         lsize = i; \n"
+"    }\n"
+"\n"
+"     //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
+"    if (tid == 0)\n"
+"        *sum = tmp_sum[0];\n"
+"}\n";
+
+
+static int
+verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
+{
+    int i;
+    int reference = 0;
+
+    for (i=0; i<n; i++)
+    {
+        reference += inptr[i];
+    }
+
+    if (reference != outptr[0])
+    {
+        log_error("work_group_barrier test failed\n");
+        return -1;
+    }
+
+    log_info("work_group_barrier test passed\n");
+    return 0;
+}
+
+
+int
+test_wg_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[3];
+    cl_int            *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3];
+    size_t    local_threads[3];
+    int                err;
+    int                i;
+    size_t max_local_workgroup_size[3];
+    size_t max_threadgroup_size = 0;
+    MTdata d;
+
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum", "-cl-std=CL2.0" );
+    test_error(err, "Failed to build kernel/program.");
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
+                                 sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (max_threadgroup_size > max_local_workgroup_size[0])
+        max_threadgroup_size = max_local_workgroup_size[0];
+
+    // work group size must divide evenly into the global size
+    while( num_elements % max_threadgroup_size )
+        max_threadgroup_size--;
+
+    input_ptr = (int*)malloc(sizeof(int) * num_elements);
+    output_ptr = (int*)malloc(sizeof(int));
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
+    free_mtdata(d);  d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed.");
+
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+    err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
+    test_error(err, "clSetKernelArg failed.");
+
+    global_threads[0] = max_threadgroup_size;
+    local_threads[0] = max_threadgroup_size;
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
+    test_error(err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed.");
+
+    err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp
new file mode 100644
index 00000000..a9ae4df8
--- /dev/null
+++ b/test_conformance/basic/test_work_item_functions.cpp
@@ -0,0 +1,178 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+typedef struct work_item_data
+{
+    cl_uint workDim;
+    cl_uint globalSize[ 3 ];
+    cl_uint globalID[ 3 ];
+    cl_uint localSize[ 3 ];
+    cl_uint localID[ 3 ];
+    cl_uint numGroups[ 3 ];
+    cl_uint groupID[ 3 ];
+};
+
+static const char *workItemKernelCode =
+"typedef struct {\n"
+"    uint workDim;\n"
+"    uint globalSize[ 3 ];\n"
+"    uint globalID[ 3 ];\n"
+"    uint localSize[ 3 ];\n"
+"    uint localID[ 3 ];\n"
+"    uint numGroups[ 3 ];\n"
+"    uint groupID[ 3 ];\n"
+" } work_item_data;\n"
+"\n"
+"__kernel void sample_kernel( __global work_item_data *outData )\n"
+"{\n"
+"    int id = get_global_id(0);\n"
+"   outData[ id ].workDim = (uint)get_work_dim();\n"
+"    for( uint i = 0; i < get_work_dim(); i++ )\n"
+"   {\n"
+"       outData[ id ].globalSize[ i ] = (uint)get_global_size( i );\n"
+"       outData[ id ].globalID[ i ] = (uint)get_global_id( i );\n"
+"       outData[ id ].localSize[ i ] = (uint)get_local_size( i );\n"
+"       outData[ id ].localID[ i ] = (uint)get_local_id( i );\n"
+"       outData[ id ].numGroups[ i ] = (uint)get_num_groups( i );\n"
+"       outData[ id ].groupID[ i ] = (uint)get_group_id( i );\n"
+"   }\n"
+"}";
+
+#define NUM_TESTS 1
+
+int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper outData;
+    work_item_data    testData[ 10240 ];
+    size_t threads[3], localThreads[3];
+    MTdata d;
+
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &workItemKernelCode, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+
+    outData = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( testData ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( outData ), &outData );
+    test_error( error, "Unable to set kernel arg" );
+
+    d = init_genrand( gRandomSeed );
+    for( size_t dim = 1; dim <= 3; dim++ )
+    {
+        for( int i = 0; i < NUM_TESTS; i++  )
+        {
+            size_t numItems = 1;
+            for( size_t j = 0; j < dim; j++ )
+            {
+                // All of our thread sizes should be within the max local sizes, since they're all <= 20
+                threads[ j ] = (size_t)random_in_range( 1, 20, d );
+                localThreads[ j ] = threads[ j ] / (size_t)random_in_range( 1, (int)threads[ j ], d );
+                while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) )
+                    localThreads[ j ]--;
+
+                numItems *= threads[ j ];
+
+                // Hack for now: localThreads > 1 are iffy
+                localThreads[ j ] = 1;
+            }
+            error = clEnqueueNDRangeKernel( queue, kernel, (cl_uint)dim, NULL, threads, localThreads, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            error = clEnqueueReadBuffer( queue, outData, CL_TRUE, 0, sizeof( testData ), testData, 0, NULL, NULL );
+            test_error( error, "Unable to read results" );
+
+            // Validate
+            for( size_t q = 0; q < threads[0]; q++ )
+            {
+                // We can't really validate the actual value of each one, but we can validate that they're within a sane range
+                if( testData[ q ].workDim != (cl_uint)dim )
+                {
+                    log_error( "ERROR: get_work_dim() did not return proper value for %d dimensions (expected %d, got %d)\n", (int)dim, (int)dim, (int)testData[ q ].workDim );
+                    free_mtdata(d);
+                    return -1;
+                }
+                for( size_t j = 0; j < dim; j++ )
+                {
+                    if( testData[ q ].globalSize[ j ] != (cl_uint)threads[ j ] )
+                    {
+                        log_error( "ERROR: get_global_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
+                                    (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalSize[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].globalID[ j ] < 0 || testData[ q ].globalID[ j ] >= (cl_uint)threads[ j ] )
+                    {
+                        log_error( "ERROR: get_global_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
+                                  (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalID[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].localSize[ j ] != (cl_uint)localThreads[ j ] )
+                    {
+                        log_error( "ERROR: get_local_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
+                                  (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localSize[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].localID[ j ] < 0 && testData[ q ].localID[ j ] >= (cl_uint)localThreads[ j ] )
+                    {
+                        log_error( "ERROR: get_local_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
+                                  (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localID[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    size_t groupCount = ( threads[ j ] + localThreads[ j ] - 1 ) / localThreads[ j ];
+                    if( testData[ q ].numGroups[ j ] != (cl_uint)groupCount )
+                    {
+                        log_error( "ERROR: get_num_groups(%d) did not return proper value for %d dimensions (expected %d with global dim %d and local dim %d, got %d)\n",
+                                  (int)j, (int)dim, (int)groupCount, (int)threads[ j ], (int)localThreads[ j ], (int)testData[ q ].numGroups[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].groupID[ j ] < 0 || testData[ q ].groupID[ j ] >= (cl_uint)groupCount )
+                    {
+                        log_error( "ERROR: get_group_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
+                                  (int)j, (int)dim, (int)groupCount, (int)testData[ q ].groupID[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+
+    free_mtdata(d);
+    return 0;
+}
+
+
diff --git a/test_conformance/basic/test_writeimage.c b/test_conformance/basic/test_writeimage.c
new file mode 100644
index 00000000..da4bd99d
--- /dev/null
+++ b/test_conformance/basic/test_writeimage.c
@@ -0,0 +1,354 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *bgra8888_write_kernel_code =
+"\n"
+"__kernel void test_bgra8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+2], (float)src[indx+1], (float)src[indx+0], (float)src[indx+3]);\n"
+"    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static const char *rgba8888_write_kernel_code =
+"\n"
+"__kernel void test_rgba8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
+"    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    cl_uchar   *ptr = (cl_uchar *)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (cl_uchar)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("WRITE_IMAGE_BGRA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("WRITE_IMAGE_BGRA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+static int
+verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("WRITE_IMAGE_RGBA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("WRITE_IMAGE_RGBA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+
+int test_writeimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[6];
+    cl_program program[2];
+    cl_kernel kernel[4];
+
+    unsigned char    *input_ptr[2], *output_ptr;
+    cl_image_format    img_format;
+    cl_image_format *supported_formats;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err, any_err = 0;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(unsigned char);
+    int supportsBGRA = 0;
+    cl_uint numFormats = 0;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr[0] = generate_8888_image(img_width, img_height, d);
+    input_ptr[1] = generate_8888_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+    output_ptr = (unsigned char*)malloc(length);
+
+    if(gIsEmbedded)
+    {
+        /* Get the supported image formats to see if BGRA is supported */
+        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numFormats);
+        supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numFormats);
+        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
+
+        for(i = 0; i < numFormats; i++)
+        {
+            if(supported_formats[i].image_channel_order == CL_BGRA)
+            {
+                    supportsBGRA = 1;
+                    break;
+            }
+        }
+    }
+    else
+    {
+        supportsBGRA = 1;
+    }
+
+    if(supportsBGRA)
+    {
+        img_format.image_channel_order = CL_BGRA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[0] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateImage2D failed\n");
+            return -1;
+        }
+    }
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    if(supportsBGRA)
+    {
+        img_format.image_channel_order = CL_BGRA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[2] = clCreateImage2D(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!streams[2])
+        {
+            log_error("clCreateImage2D failed\n");
+            return -1;
+        }
+    }
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[3] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    streams[4] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[4])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[5] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[5])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteBuffer(queue, streams[4], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[5], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    if(supportsBGRA)
+    {
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_write_kernel_code, "test_bgra8888_write" );
+        if (err)
+                return -1;
+
+        kernel[2] = clCreateKernel(program[0], "test_bgra8888_write", NULL);
+        if (!kernel[2])
+        {
+                log_error("clCreateKernel failed\n");
+                return -1;
+        }
+    }
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_write_kernel_code, "test_rgba8888_write" );
+    if (err)
+    return -1;
+    kernel[3] = clCreateKernel(program[1], "test_rgba8888_write", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+    if(supportsBGRA)
+    {
+        err  = clSetKernelArg(kernel[0], 0, sizeof streams[4], &streams[4]);
+        err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[5], &streams[5]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    if(supportsBGRA)
+    {
+        err  = clSetKernelArg(kernel[2], 0, sizeof streams[4], &streams[4]);
+        err |= clSetKernelArg(kernel[2], 1, sizeof streams[2], &streams[2]);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    err  = clSetKernelArg(kernel[3], 0, sizeof streams[5], &streams[5]);
+    err |= clSetKernelArg(kernel[3], 1, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+
+    for (i=0; i<4; i++)
+    {
+         if(!supportsBGRA && (i == 0 || i == 2))
+            continue;
+
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clReadImage failed\n");
+            return -1;
+        }
+
+        switch (i)
+        {
+            case 0:
+            case 2:
+                err = verify_bgra8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
+                break;
+            case 1:
+            case 3:
+                err = verify_rgba8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
+                break;
+        }
+
+        //if (err)
+        //break;
+
+        any_err |= err;
+    }
+
+    // cleanup
+    if(supportsBGRA)
+        clReleaseMemObject(streams[0]);
+
+    clReleaseMemObject(streams[1]);
+
+    if(supportsBGRA)
+        clReleaseMemObject(streams[2]);
+
+    clReleaseMemObject(streams[3]);
+    clReleaseMemObject(streams[4]);
+    clReleaseMemObject(streams[5]);
+    for (i=0; i<2; i++)
+    {
+        if(i == 0 && !supportsBGRA)
+            continue;
+
+        clReleaseKernel(kernel[i]);
+        clReleaseKernel(kernel[i+2]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return any_err;
+}
diff --git a/test_conformance/basic/test_writeimage_fp32.c b/test_conformance/basic/test_writeimage_fp32.c
new file mode 100644
index 00000000..0fc2f3de
--- /dev/null
+++ b/test_conformance/basic/test_writeimage_fp32.c
@@ -0,0 +1,189 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+static const char *rgbaFFFF_write_kernel_code =
+"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static float *
+generate_float_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_float_image(const char *string, float *image, float *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("%s failed\n", string);
+            return -1;
+        }
+    }
+
+    log_info("%s passed\n", string);
+    return 0;
+}
+
+int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program;
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    float *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err, any_err = 0;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(float);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_float_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
+  if (err)
+    return -1;
+    kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)img_width;
+  threads[1] = (unsigned int)img_height;
+
+    for (i=0; i<2; i++)
+    {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clExecuteKernel failed\n");
+            return -1;
+        }
+
+    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clReadImage failed\n");
+            return -1;
+        }
+
+        err = verify_float_image((i == 0) ? "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_READ_WRITE" :
+                             "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_WRITE_ONLY",
+                             input_ptr, output_ptr, img_width, img_height);
+        any_err |= err;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return any_err;
+}
+
+
diff --git a/test_conformance/basic/test_writeimage_int16.c b/test_conformance/basic/test_writeimage_int16.c
new file mode 100644
index 00000000..c084c3a4
--- /dev/null
+++ b/test_conformance/basic/test_writeimage_int16.c
@@ -0,0 +1,195 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba16_write_kernel_code =
+"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
+"    color /= 65535.0f;\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned short *
+generate_16bit_image(int w, int h, MTdata d)
+{
+    cl_ushort  *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (cl_ushort)genrand_int32(d);
+
+    return ptr;
+}
+
+// normalized 16bit ints ... get dived by 64k then muled by 64k...
+// give the poor things some tolerance
+#define MAX_ERR 1
+
+static int
+verify_16bit_image(const char *string, cl_ushort *image, cl_ushort *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (abs(outptr[i] - image[i]) > MAX_ERR)
+        {
+            log_error("%s failed\n", string);
+            return -1;
+        }
+    }
+
+    log_info("%s passed\n", string);
+    return 0;
+}
+
+int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program;
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    cl_ushort *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err, any_err = 0;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr = generate_16bit_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_ushort*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
+  if (err)
+    return -1;
+    kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)img_width;
+  threads[1] = (unsigned int)img_height;
+
+    for (i=0; i<2; i++)
+    {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clExecuteKernel failed\n");
+            return -1;
+        }
+
+    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clReadImage failed\n");
+            return -1;
+        }
+
+        err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" :
+                             "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY",
+                             input_ptr, output_ptr, img_width, img_height);
+        any_err |= err;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return any_err;
+}
+
+
diff --git a/test_conformance/buffers/CMakeLists.txt b/test_conformance/buffers/CMakeLists.txt
new file mode 100644
index 00000000..8265eca5
--- /dev/null
+++ b/test_conformance/buffers/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(MODULE_NAME BUFFERS)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_buffer_copy.c
+    test_buffer_read.c
+    test_buffer_write.c
+    test_buffer_mem.c
+    array_info.c
+    test_buffer_map.c
+    test_sub_buffers.cpp
+    test_buffer_fill.c
+    test_buffer_migrate.c
+    test_image_migrate.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/buffers/Jamfile b/test_conformance/buffers/Jamfile
new file mode 100644
index 00000000..be1b449e
--- /dev/null
+++ b/test_conformance/buffers/Jamfile
@@ -0,0 +1,24 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_buffers
+    : array_info.c
+      main.c
+      test_buffer_copy.c
+      test_buffer_map.c
+      test_buffer_mem.c
+      test_buffer_read.c
+      test_buffer_write.c
+      test_buffer_fill.c
+    : <library>../..//glew
+    ;
+
+install dist
+    : test_buffers
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/buffers
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/buffers
+    ;
+
diff --git a/test_conformance/buffers/Makefile b/test_conformance/buffers/Makefile
new file mode 100644
index 00000000..50954563
--- /dev/null
+++ b/test_conformance/buffers/Makefile
@@ -0,0 +1,49 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c test_buffer_copy.c test_buffer_read.c test_buffer_write.c \
+			test_buffer_mem.c array_info.c test_buffer_map.c \
+			test_sub_buffers.cpp test_buffer_fill.c \
+			test_buffer_migrate.c test_image_migrate.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/kernelHelpers.c \
+                  ../../test_common/harness/conversions.c \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/typeWrappers.cpp
+		  
+		
+			
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_buffers
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
+LIBRARIES = -framework OpenCL -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
+
diff --git a/test_conformance/buffers/array_info.c b/test_conformance/buffers/array_info.c
new file mode 100644
index 00000000..0da92a02
--- /dev/null
+++ b/test_conformance/buffers/array_info.c
@@ -0,0 +1,63 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+
+int testBufferSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem          memobj;
+    cl_int          err;
+    size_t          w = 32, h = 32, d = 32;
+    size_t          retSize;
+    size_t          elementSize = sizeof( cl_int );
+
+    memobj = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  elementSize * w*h*d, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+
+    err = clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof( size_t ), (void *)&retSize, NULL);
+    if ( err ){
+        log_error( "Error calling clGetMemObjectInfo(): %d\n", err );
+        clReleaseMemObject(memobj);
+        return -1;
+    }
+    if ( (elementSize * w * h * d) != retSize ) {
+        log_error( "Error in clGetMemObjectInfo() check of size\n" );
+        clReleaseMemObject(memobj);
+        return -1;
+    }
+    else{
+        log_info( " CL_MEM_SIZE passed.\n" );
+    }
+
+    // cleanup
+    clReleaseMemObject(memobj);
+
+    return err;
+
+}   // end testArrayElementSize()
+
+
+// FIXME: need to test other flags
+
diff --git a/test_conformance/buffers/main.c b/test_conformance/buffers/main.c
new file mode 100644
index 00000000..df76a5d0
--- /dev/null
+++ b/test_conformance/buffers/main.c
@@ -0,0 +1,246 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+basefn  bufferfn_list[] = {
+    test_buffer_read_async_int,
+    test_buffer_read_async_uint,
+    test_buffer_read_async_long,
+    test_buffer_read_async_ulong,
+    test_buffer_read_async_short,
+    test_buffer_read_async_ushort,
+    test_buffer_read_async_char,
+    test_buffer_read_async_uchar,
+    test_buffer_read_async_float,
+    test_buffer_read_array_barrier_int,
+    test_buffer_read_array_barrier_uint,
+    test_buffer_read_array_barrier_long,
+    test_buffer_read_array_barrier_ulong,
+    test_buffer_read_array_barrier_short,
+    test_buffer_read_array_barrier_ushort,
+    test_buffer_read_array_barrier_char,
+    test_buffer_read_array_barrier_uchar,
+    test_buffer_read_array_barrier_float,
+    test_buffer_read_int,
+    test_buffer_read_uint,
+    test_buffer_read_long,
+    test_buffer_read_ulong,
+    test_buffer_read_short,
+    test_buffer_read_ushort,
+    test_buffer_read_float,
+    0, //test_buffer_read_half,
+    test_buffer_read_char,
+    test_buffer_read_uchar,
+    test_buffer_read_struct,
+    test_buffer_read_random_size,
+    test_buffer_map_read_int,
+    test_buffer_map_read_uint,
+    test_buffer_map_read_long,
+    test_buffer_map_read_ulong,
+    test_buffer_map_read_short,
+    test_buffer_map_read_ushort,
+    test_buffer_map_read_char,
+    test_buffer_map_read_uchar,
+    test_buffer_map_read_float,
+    test_buffer_map_read_struct,
+
+    test_buffer_map_write_int,
+    test_buffer_map_write_uint,
+    test_buffer_map_write_long,
+    test_buffer_map_write_ulong,
+    test_buffer_map_write_short,
+    test_buffer_map_write_ushort,
+    test_buffer_map_write_char,
+    test_buffer_map_write_uchar,
+    test_buffer_map_write_float,
+    test_buffer_map_write_struct,
+
+    test_buffer_write_int,
+    test_buffer_write_uint,
+    test_buffer_write_short,
+    test_buffer_write_ushort,
+    test_buffer_write_char,
+    test_buffer_write_uchar,
+    test_buffer_write_float,
+    0, //test_buffer_write_half,
+    test_buffer_write_long,
+    test_buffer_write_ulong,
+    test_buffer_write_struct,
+    test_buffer_write_async_int,
+    test_buffer_write_async_uint,
+    test_buffer_write_async_short,
+    test_buffer_write_async_ushort,
+    test_buffer_write_async_char,
+    test_buffer_write_async_uchar,
+    test_buffer_write_async_float,
+    test_buffer_write_async_long,
+    test_buffer_write_async_ulong,
+    test_buffer_copy,
+    test_buffer_partial_copy,
+    test_mem_read_write_flags,
+    test_mem_write_flags,
+    test_mem_read_flags,
+    test_mem_copy_host_flags,
+    0, //test_mem_alloc_ref_flags,
+    testBufferSize,
+
+    test_sub_buffers_read_write,
+    test_sub_buffers_read_write_dual_devices,
+    test_sub_buffers_overlapping,
+
+    test_buffer_fill_int,
+    test_buffer_fill_uint,
+    test_buffer_fill_short,
+    test_buffer_fill_ushort,
+    test_buffer_fill_char,
+    test_buffer_fill_uchar,
+    test_buffer_fill_long,
+    test_buffer_fill_ulong,
+    test_buffer_fill_float,
+    test_buffer_fill_struct,
+
+    test_buffer_migrate,
+    test_image_migrate,
+};
+
+const char *bufferfn_names[] = {
+    "buffer_read_async_int",
+    "buffer_read_async_uint",
+    "buffer_read_async_long",
+    "buffer_read_async_ulong",
+    "buffer_read_async_short",
+    "buffer_read_async_ushort",
+    "buffer_read_async_char",
+    "buffer_read_async_uchar",
+    "buffer_read_async_float",
+    "buffer_read_array_barrier_int",
+    "buffer_read_array_barrier_uint",
+    "buffer_read_array_barrier_long",
+    "buffer_read_array_barrier_ulong",
+    "buffer_read_array_barrier_short",
+    "buffer_read_array_barrier_ushort",
+    "buffer_read_array_barrier_char",
+    "buffer_read_array_barrier_uchar",
+    "buffer_read_array_barrier_float",
+    "buffer_read_int",
+    "buffer_read_uint",
+    "buffer_read_long",
+    "buffer_read_ulong",
+    "buffer_read_short",
+    "buffer_read_ushort",
+    "buffer_read_float",
+    "buffer_read_half",
+    "buffer_read_char",
+    "buffer_read_uchar",
+    "buffer_read_struct",
+    "buffer_read_random_size",
+    "buffer_map_read_int",
+    "buffer_map_read_uint",
+    "buffer_map_read_long",
+    "buffer_map_read_ulong",
+    "buffer_map_read_short",
+    "buffer_map_read_ushort",
+    "buffer_map_read_char",
+    "buffer_map_read_uchar",
+    "buffer_map_read_float",
+    "buffer_map_read_struct",
+
+    "buffer_map_write_int",
+    "buffer_map_write_uint",
+    "buffer_map_write_long",
+    "buffer_map_write_ulong",
+    "buffer_map_write_short",
+    "buffer_map_write_ushort",
+    "buffer_map_write_char",
+    "buffer_map_write_uchar",
+    "buffer_map_write_float",
+    "buffer_map_write_struct",
+
+    "buffer_write_int",
+    "buffer_write_uint",
+    "buffer_write_short",
+    "buffer_write_ushort",
+    "buffer_write_char",
+    "buffer_write_uchar",
+    "buffer_write_float",
+    "buffer_write_half",
+    "buffer_write_long",
+    "buffer_write_ulong",
+    "buffer_write_struct",
+    "buffer_write_async_int",
+    "buffer_write_async_uint",
+    "buffer_write_async_short",
+    "buffer_write_async_ushort",
+    "buffer_write_async_char",
+    "buffer_write_async_uchar",
+    "buffer_write_async_float",
+    "buffer_write_async_long",
+    "buffer_write_async_ulong",
+    "buffer_copy",
+    "buffer_partial_copy",
+    "mem_read_write_flags",
+    "mem_write_only_flags",
+    "mem_read_only_flags",
+    "mem_copy_host_flags",
+    "mem_alloc_ref_flags",
+    "array_info_size",
+    "sub_buffers_read_write",
+    "sub_buffers_read_write_dual_devices",
+    "sub_buffers_overlapping",
+    "buffer_fill_int",
+    "buffer_fill_uint",
+    "buffer_fill_short",
+    "buffer_fill_ushort",
+    "buffer_fill_char",
+    "buffer_fill_uchar",
+    "buffer_fill_long",
+    "buffer_fill_ulong",
+    "buffer_fill_float",
+    "buffer_fill_struct",
+    "buffer_migrate",
+    "image_migrate",
+};
+
+ct_assert((sizeof(bufferfn_names) / sizeof(bufferfn_names[0])) == (sizeof(bufferfn_list) / sizeof(bufferfn_list[0])));
+
+int num_bufferfns = sizeof(bufferfn_names) / sizeof(char *);
+
+const cl_mem_flags flag_set[] = {
+    CL_MEM_ALLOC_HOST_PTR,
+    CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+    CL_MEM_USE_HOST_PTR,
+    CL_MEM_COPY_HOST_PTR,
+    0
+};
+const char* flag_set_names[] = {
+    "CL_MEM_ALLOC_HOST_PTR",
+    "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
+    "CL_MEM_USE_HOST_PTR",
+    "CL_MEM_COPY_HOST_PTR",
+    "0"
+};
+
+int main( int argc, const char *argv[] )
+{
+    return runTestHarness( argc, argv, num_bufferfns, bufferfn_list, bufferfn_names,
+                           false, false, 0 );
+}
diff --git a/test_conformance/buffers/procs.h b/test_conformance/buffers/procs.h
new file mode 100644
index 00000000..ff1edbfb
--- /dev/null
+++ b/test_conformance/buffers/procs.h
@@ -0,0 +1,132 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __PROCS_H__
+#define __PROCS_H__
+
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/mt19937.h"
+#include "../../test_common/harness/conversions.h"
+
+#ifndef __APPLE__
+#include <CL/cl.h>
+#endif
+
+extern const cl_mem_flags flag_set[];
+extern const char* flag_set_names[];
+#define NUM_FLAGS 5
+
+extern int      test_buffer_read_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_random_size( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_read_array_barrier_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_write_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_partial_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      testBufferSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_mem_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_mem_read_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_mem_alloc_ref_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+extern int      test_buffer_map_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_map_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+extern int      test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_buffer_fill_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+#endif    // #ifndef __PROCS_H__
+
diff --git a/test_conformance/buffers/test_buffer_copy.c b/test_conformance/buffers/test_buffer_copy.c
new file mode 100644
index 00000000..fa2d579d
--- /dev/null
+++ b/test_conformance/buffers/test_buffer_copy.c
@@ -0,0 +1,295 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+
+static int verify_copy_buffer(int *inptr, int *outptr, int n)
+{
+    int         i;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int test_copy( cl_command_queue queue, cl_context context, int num_elements, MTdata d )
+{
+    cl_mem  buffers[2];
+    cl_int  *int_input_ptr, *int_output_ptr;
+    cl_int  err;
+    int     i;
+    int     src_flag_id, dst_flag_id;
+    int     errors = 0;
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    int_input_ptr = (cl_int*) align_malloc(sizeof(cl_int) * num_elements, min_alignment);
+    int_output_ptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
+
+            for (i=0; i<num_elements; i++){
+                int_input_ptr[i] = (int)genrand_int32( d );
+                int_output_ptr[i] = 0xdeaddead; // seed with incorrect data
+            }
+
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  sizeof(cl_int) * num_elements, int_input_ptr, &err);
+            else
+                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  sizeof(cl_int) * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error(err, " clCreateBuffer failed\n" );
+                align_free( (void *)int_input_ptr );
+                align_free( (void *)int_output_ptr );
+                return -1;
+            }
+
+            if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id],  sizeof(cl_int) * num_elements, int_output_ptr, &err);
+            else
+                buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id],  sizeof(cl_int) * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error(err, " clCreateBuffer failed\n" );
+                clReleaseMemObject( buffers[0] );
+                align_free( (void *)int_input_ptr );
+                align_free( (void *)int_output_ptr );
+                return -1;
+            }
+
+            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+                err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)int_input_ptr, 0, NULL, NULL);
+                if ( err != CL_SUCCESS ){
+                    print_error( err, "clEnqueueWriteBuffer failed" );
+                    clReleaseMemObject( buffers[0] );
+                    clReleaseMemObject( buffers[1] );
+                    align_free( (void *)int_output_ptr );
+                    align_free( (void *)int_input_ptr );
+                    return -1;
+                }
+            }
+
+            err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], 0, 0, sizeof(cl_int)*num_elements, 0, NULL, NULL);
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clCopyArray failed" );
+                clReleaseMemObject( buffers[0] );
+                clReleaseMemObject( buffers[1] );
+                align_free( (void *)int_output_ptr );
+                align_free( (void *)int_input_ptr );
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(int)*num_elements, (void *)int_output_ptr, 0, NULL, NULL );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[0] );
+                clReleaseMemObject( buffers[1] );
+                align_free( (void *)int_output_ptr );
+                align_free( (void *)int_input_ptr );
+                return -1;
+            }
+
+            if ( verify_copy_buffer(int_input_ptr, int_output_ptr, num_elements) ){
+                log_error( " test failed\n" );
+                errors++;
+            }
+            else{
+                log_info( " test passed\n" );
+            }
+    // cleanup
+            clReleaseMemObject( buffers[0] );
+            clReleaseMemObject( buffers[1] );
+        } // dst flags
+    }  // src flags
+    // cleanup
+    align_free( (void *)int_output_ptr );
+    align_free( (void *)int_input_ptr );
+
+    return errors;
+
+}   // end test_copy()
+
+
+static int testPartialCopy( cl_command_queue queue, cl_context context, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
+{
+    cl_mem  buffers[2];
+    int     *inptr, *outptr;
+    cl_int  err;
+    int     i;
+    int     src_flag_id, dst_flag_id;
+    int     errors = 0;
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    inptr = (int *)align_malloc( sizeof(int) * num_elements, min_alignment);
+    if ( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(int) * num_elements );
+        return -1;
+    }
+    outptr = (int *)align_malloc( sizeof(int) * num_elements, min_alignment);
+    if ( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(int) * num_elements );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
+
+            for (i=0; i<num_elements; i++){
+                inptr[i] = (int)genrand_int32( d );
+                outptr[i] = (int)0xdeaddead;    // seed with incorrect data
+            }
+
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  sizeof(cl_int) * num_elements, inptr, &err);
+            else
+                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  sizeof(cl_int) * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error(err, " clCreateBuffer failed\n" )
+                align_free( (void *)outptr );
+                align_free( (void *)inptr );
+                return -1;
+            }
+
+            if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id],  sizeof(cl_int) * num_elements, outptr, &err);
+            else
+                buffers[1] = clCreateBuffer(context, flag_set[dst_flag_id],  sizeof(cl_int) * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error(err, " clCreateBuffer failed\n" );
+                clReleaseMemObject( buffers[0] );
+                align_free( (void *)outptr );
+                align_free( (void *)inptr );
+                return -1;
+            }
+
+            if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)){
+                err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+                if ( err != CL_SUCCESS ){
+                    print_error( err, "clEnqueueWriteBuffer failed" );
+                    clReleaseMemObject( buffers[1] );
+                    clReleaseMemObject( buffers[0] );
+                    align_free( (void *)outptr );
+                    align_free( (void *)inptr );
+                    return -1;
+                }
+            }
+
+            err = clEnqueueCopyBuffer(queue, buffers[0], buffers[1], srcStart*sizeof(cl_int), dstStart*sizeof(cl_int), sizeof(cl_int)*size, 0, NULL, NULL);
+            if ( err != CL_SUCCESS){
+                print_error( err, "clEnqueueCopyBuffer failed" );
+                clReleaseMemObject( buffers[1] );
+                clReleaseMemObject( buffers[0] );
+                align_free( (void *)outptr );
+                align_free( (void *)inptr );
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(int)*num_elements, (void *)outptr, 0, NULL, NULL );
+            if ( err != CL_SUCCESS){
+                print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[1] );
+                clReleaseMemObject( buffers[0] );
+                align_free( (void *)outptr );
+                align_free( (void *)inptr );
+                return -1;
+            }
+
+            if ( verify_copy_buffer(inptr + srcStart, outptr + dstStart, size) ){
+                log_error("buffer_COPY test failed\n");
+                errors++;
+            }
+            else{
+                log_info("buffer_COPY test passed\n");
+            }
+    // cleanup
+            clReleaseMemObject( buffers[1] );
+            clReleaseMemObject( buffers[0] );
+        } // dst mem flags
+    } // src mem flags
+    // cleanup
+    align_free( (void *)outptr );
+    align_free( (void *)inptr );
+
+    return errors;
+
+}   // end testPartialCopy()
+
+
+int test_buffer_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int     i, err = 0;
+    int     size;
+    MTdata  d = init_genrand( gRandomSeed );
+
+    // test the preset size
+    log_info( "set size: %d: ", num_elements );
+    if (test_copy( queue, context, num_elements, d ))
+        err++;
+
+    // now test random sizes
+    for ( i = 0; i < 8; i++ ){
+        size = (int)get_random_float(2.f,131072.f, d);
+        log_info( "random size: %d: ", size );
+        if (test_copy( queue, context, size, d ))
+            err++;
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_copy()
+
+
+int test_buffer_partial_copy( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int     i, err = 0;
+    int     size;
+    cl_uint srcStart, dstStart;
+    MTdata  d = init_genrand( gRandomSeed );
+
+    // now test copy of partial sizes
+    for ( i = 0; i < 8; i++ ){
+        srcStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
+        size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
+        dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
+        log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
+        if (testPartialCopy( queue, context, num_elements, srcStart, dstStart, size, d ))
+            err++;
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_partial_copy()
+
diff --git a/test_conformance/buffers/test_buffer_fill.c b/test_conformance/buffers/test_buffer_fill.c
new file mode 100644
index 00000000..f5662da7
--- /dev/null
+++ b/test_conformance/buffers/test_buffer_fill.c
@@ -0,0 +1,1560 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#define USE_LOCAL_WORK_GROUP    1
+
+#define TEST_PRIME_CHAR        0x77
+#define TEST_PRIME_INT        ((1<<16)+1)
+#define TEST_PRIME_UINT        ((1U<<16)+1U)
+#define TEST_PRIME_LONG        ((1LL<<32)+1LL)
+#define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
+#define TEST_PRIME_SHORT    (cl_short)((1<<8)+1)
+#define TEST_PRIME_USHORT   (cl_ushort)((1<<8)+1)
+#define TEST_PRIME_FLOAT    (cl_float)3.40282346638528860e+38
+#define TEST_PRIME_HALF        119.f
+
+#ifndef TestStruct
+typedef struct{
+    cl_int     a;
+    cl_float   b;
+} TestStruct;
+#endif
+
+const char *buffer_fill_int_kernel_code[] = {
+    "__kernel void test_buffer_fill_int(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_int2(__global int2 *src, __global int2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_int4(__global int4 *src, __global int4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_int8(__global int8 *src, __global int8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_int16(__global int16 *src, __global int16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *int_kernel_name[] = { "test_buffer_fill_int", "test_buffer_fill_int2", "test_buffer_fill_int4", "test_buffer_fill_int8", "test_buffer_fill_int16" };
+
+
+const char *buffer_fill_uint_kernel_code[] = {
+    "__kernel void test_buffer_fill_uint(__global uint *src, __global uint *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uint2(__global uint2 *src, __global uint2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uint4(__global uint4 *src, __global uint4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uint8(__global uint8 *src, __global uint8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uint16(__global uint16 *src, __global uint16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *uint_kernel_name[] = { "test_buffer_fill_uint", "test_buffer_fill_uint2", "test_buffer_fill_uint4", "test_buffer_fill_uint8", "test_buffer_fill_uint16" };
+
+
+const char *buffer_fill_short_kernel_code[] = {
+    "__kernel void test_buffer_fill_short(__global short *src, __global short *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_short2(__global short2 *src, __global short2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_short4(__global short4 *src, __global short4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_short8(__global short8 *src, __global short8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_short16(__global short16 *src, __global short16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *short_kernel_name[] = { "test_buffer_fill_short", "test_buffer_fill_short2", "test_buffer_fill_short4", "test_buffer_fill_short8", "test_buffer_fill_short16" };
+
+
+const char *buffer_fill_ushort_kernel_code[] = {
+    "__kernel void test_buffer_fill_ushort(__global ushort *src, __global ushort *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *ushort_kernel_name[] = { "test_buffer_fill_ushort", "test_buffer_fill_ushort2", "test_buffer_fill_ushort4", "test_buffer_fill_ushort8", "test_buffer_fill_ushort16" };
+
+
+const char *buffer_fill_char_kernel_code[] = {
+    "__kernel void test_buffer_fill_char(__global char *src, __global char *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_char2(__global char2 *src, __global char2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_char4(__global char4 *src, __global char4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_char8(__global char8 *src, __global char8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_char16(__global char16 *src, __global char16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *char_kernel_name[] = { "test_buffer_fill_char", "test_buffer_fill_char2", "test_buffer_fill_char4", "test_buffer_fill_char8", "test_buffer_fill_char16" };
+
+
+const char *buffer_fill_uchar_kernel_code[] = {
+    "__kernel void test_buffer_fill_uchar(__global uchar *src, __global uchar *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *uchar_kernel_name[] = { "test_buffer_fill_uchar", "test_buffer_fill_uchar2", "test_buffer_fill_uchar4", "test_buffer_fill_uchar8", "test_buffer_fill_uchar16" };
+
+
+const char *buffer_fill_long_kernel_code[] = {
+    "__kernel void test_buffer_fill_long(__global long *src, __global long *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_long2(__global long2 *src, __global long2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_long4(__global long4 *src, __global long4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_long8(__global long8 *src, __global long8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_long16(__global long16 *src, __global long16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *long_kernel_name[] = { "test_buffer_fill_long", "test_buffer_fill_long2", "test_buffer_fill_long4", "test_buffer_fill_long8", "test_buffer_fill_long16" };
+
+
+const char *buffer_fill_ulong_kernel_code[] = {
+    "__kernel void test_buffer_fill_ulong(__global ulong *src, __global ulong *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *ulong_kernel_name[] = { "test_buffer_fill_ulong", "test_buffer_fill_ulong2", "test_buffer_fill_ulong4", "test_buffer_fill_ulong8", "test_buffer_fill_ulong16" };
+
+
+const char *buffer_fill_float_kernel_code[] = {
+    "__kernel void test_buffer_fill_float(__global float *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_float2(__global float2 *src, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_float4(__global float4 *src, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_float8(__global float8 *src, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_fill_float16(__global float16 *src, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *float_kernel_name[] = { "test_buffer_fill_float", "test_buffer_fill_float2", "test_buffer_fill_float4", "test_buffer_fill_float8", "test_buffer_fill_float16" };
+
+
+static const char *struct_kernel_code =
+"typedef struct{\n"
+"int    a;\n"
+"float    b;\n"
+"} TestStruct;\n"
+"__kernel void read_fill_struct(__global TestStruct *src, __global TestStruct *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid].a = src[tid].a;\n"
+"     dst[tid].b = src[tid].b;\n"
+"}\n";
+
+
+
+static int verify_fill_int( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_int  *inptr = (cl_int *)ptr1;
+    cl_int  *outptr = (cl_int *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_uint( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_uint *inptr = (cl_uint *)ptr1;
+    cl_uint *outptr = (cl_uint *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_short( void *ptr1, void *ptr2, int n )
+{
+    int      i;
+    cl_short *inptr = (cl_short *)ptr1;
+    cl_short *outptr = (cl_short *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_ushort( void *ptr1, void *ptr2, int n )
+{
+    int       i;
+    cl_ushort *inptr = (cl_ushort *)ptr1;
+    cl_ushort *outptr = (cl_ushort *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_char( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_char *inptr = (cl_char *)ptr1;
+    cl_char *outptr = (cl_char *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_uchar( void *ptr1, void *ptr2, int n )
+{
+    int      i;
+    cl_uchar *inptr = (cl_uchar *)ptr1;
+    cl_uchar *outptr = (cl_uchar *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_long( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_long *inptr = (cl_long *)ptr1;
+    cl_long *outptr = (cl_long *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_ulong( void *ptr1, void *ptr2, int n )
+{
+    int      i;
+    cl_ulong *inptr = (cl_ulong *)ptr1;
+    cl_ulong *outptr = (cl_ulong *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_float( void *ptr1, void *ptr2, int n )
+{
+    int      i;
+    cl_float *inptr = (cl_float *)ptr1;
+    cl_float *outptr = (cl_float *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_fill_struct( void *ptr1, void *ptr2, int n )
+{
+    int         i;
+    TestStruct  *inptr = (TestStruct *)ptr1;
+    TestStruct  *outptr = (TestStruct *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+
+int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type,
+                     int loops, void *inptr[5], void *hostptr[5], void *pattern[5], size_t offset_elements, size_t fill_elements,
+                     const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
+{
+    cl_mem      buffers[10];
+    void        *outptr[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event[2];
+    size_t      ptrSizes[5];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    int         err;
+    int         i, ii;
+    int         src_flag_id;
+    int         total_errors = 0;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
+
+        loops = ( loops < 5 ? loops : 5 );
+        for ( i = 0; i < loops; i++ ){
+            ii = i << 1;
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, hostptr[i], &err);
+            else
+                buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( !buffers[ii] || err){
+                print_error(err, "clCreateBuffer failed\n" );
+                return -1;
+            }
+            // Initialize source buffer with 0, since the validation code expects 0(s) outside of the fill region.
+            if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
+                err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, hostptr[i], 0, NULL, NULL);
+                if ( err != CL_SUCCESS ){
+                    print_error(err, "clEnqueueWriteBuffer failed\n" );
+                    return -1;
+                }
+            }
+
+            outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+            memset(outptr[i], 0, ptrSizes[i] * num_elements);
+            buffers[ii+1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  ptrSizes[i] * num_elements, outptr[i], &err);
+            if ( !buffers[ii+1] || err){
+                print_error(err, "clCreateBuffer failed\n" );
+                clReleaseMemObject( buffers[ii] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            err = clEnqueueFillBuffer(queue, buffers[ii], pattern[i], ptrSizes[i],
+                                      ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements,
+                                      0, NULL, &(event[0]));
+            /* uncomment for test debugging
+             err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
+             */
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueFillBuffer failed" );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+#endif
+
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+            err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArg failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            err = clWaitForEvents(  1, &(event[0]) );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clWaitForEvents() failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                align_free( outptr[i] );
+                return -1;
+            }
+            clReleaseEvent(event[0]);
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+            if (err != CL_SUCCESS){
+                print_error( err, "clEnqueueNDRangeKernel failed" );
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
+            if (err != CL_SUCCESS){
+                print_error( err, "clEnqueueReadBuffer failed" );
+                return -1;
+            }
+
+            err = clWaitForEvents( 1, &(event[1]) );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clWaitForEvents() failed" );
+            }
+            clReleaseEvent(event[1]);
+
+            if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
+                log_error( " %s%d test failed\n", type, 1<<i );
+                total_errors++;
+            }
+            else{
+                log_info( " %s%d test passed\n", type, 1<<i );
+            }
+
+            // cleanup
+            clReleaseMemObject( buffers[ii] );
+            clReleaseMemObject( buffers[ii+1] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            align_free( outptr[i] );
+        }
+    } // src cl_mem_flag
+
+    return total_errors;
+
+}   // end test_buffer_fill()
+
+
+int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem      buffers[2];
+    void        *outptr;
+    TestStruct  *inptr;
+    TestStruct  *hostptr;
+    TestStruct  *pattern;
+    cl_program  program;
+    cl_kernel   kernel;
+    cl_event    event[2];
+    size_t      ptrSize = sizeof( TestStruct );
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    int         n, err;
+    size_t      j, offset_elements, fill_elements;
+    int         src_flag_id;
+    int         total_errors = 0;
+    MTdata      d = init_genrand( gRandomSeed );
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        pattern = (TestStruct *)malloc(ptrSize);
+        pattern->a = (cl_int)genrand_int32(d);
+        pattern->b = (cl_float)get_random_float( -FLT_MAX, FLT_MAX, d );
+
+        inptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
+        for ( j = 0; j < offset_elements; j++ ) {
+            inptr[j].a = 0;
+            inptr[j].b =0;
+        }
+        for ( j = offset_elements; j < offset_elements + fill_elements; j++ ) {
+            inptr[j].a = pattern->a;
+            inptr[j].b = pattern->b;
+        }
+        for ( j = offset_elements + fill_elements; j < (size_t)num_elements; j++ ) {
+            inptr[j].a = 0;
+            inptr[j].b = 0;
+        }
+
+        hostptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
+        memset(hostptr, 0, ptrSize * num_elements);
+
+        for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+            log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
+
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, hostptr, &err);
+            else
+                buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, NULL, &err);
+            if ( err ){
+                print_error(err, " clCreateBuffer failed\n" );
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+            if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
+                err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, hostptr, 0, NULL, NULL);
+                if ( err != CL_SUCCESS ){
+                    print_error(err, " clEnqueueWriteBuffer failed\n" );
+                    clReleaseEvent( event[0] );
+                    clReleaseEvent( event[1] );
+                    free( (void *)pattern );
+                    align_free( (void *)inptr );
+                    align_free( (void *)hostptr );
+                    free_mtdata(d);
+                    return -1;
+                }
+            }
+            outptr = align_malloc( ptrSize * num_elements, min_alignment);
+            memset(outptr, 0, ptrSize * num_elements);
+            buffers[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  ptrSize * num_elements, outptr, &err);
+            if ( ! buffers[1] || err){
+                print_error(err, " clCreateBuffer failed\n" );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+
+            err = clEnqueueFillBuffer(queue, buffers[0], pattern, ptrSize,
+                                      ptrSize * offset_elements, ptrSize * fill_elements,
+                                      0, NULL, &(event[0]));
+            /* uncomment for test debugging
+             err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, inptr, 0, NULL, &(event[0]));
+             */
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueFillBuffer failed" );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+
+            err = create_single_kernel_helper( context, &program, &kernel, 1, &struct_kernel_code, "read_fill_struct" );
+            if ( err ){
+                log_error( " Error creating program for struct\n" );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel, global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+#endif
+
+            err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *)&buffers[0] );
+            err |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *)&buffers[1] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clSetKernelArg failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+
+            err = clWaitForEvents(  1, &(event[0]) );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clWaitForEvents() failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+            clReleaseEvent( event[0] );
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+            err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueNDRangeKernel failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer( queue, buffers[1], CL_FALSE, 0, ptrSize * num_elements, outptr, 0, NULL, &(event[1]) );
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueReadBuffer failed" );
+                clReleaseKernel( kernel );
+                clReleaseProgram( program );
+                align_free( outptr );
+                clReleaseMemObject(buffers[0]);
+                clReleaseMemObject(buffers[1]);
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                free( (void *)pattern );
+                align_free( (void *)inptr );
+                align_free( (void *)hostptr );
+                free_mtdata(d);
+                return -1;
+            }
+
+            err = clWaitForEvents( 1, &(event[1]) );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clWaitForEvents() failed" );
+            }
+            clReleaseEvent( event[1] );
+
+            if ( verify_fill_struct( inptr, outptr, num_elements) ) {
+                log_error( " buffer_FILL async struct test failed\n" );
+                total_errors++;
+            }
+            else{
+                log_info( " buffer_FILL async struct test passed\n" );
+            }
+            // cleanup
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
+            align_free( outptr );
+            clReleaseMemObject( buffers[0] );
+            clReleaseMemObject( buffers[1] );
+        } // src cl_mem_flag
+        free( (void *)pattern );
+        align_free( (void *)inptr );
+        align_free( (void *)hostptr );
+    }
+
+    free_mtdata(d);
+
+    return total_errors;
+
+}   // end test_buffer_fill_struct()
+
+
+int test_buffer_fill_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int  *inptr[5];
+    cl_int  *hostptr[5];
+    cl_int  *pattern[5];
+    size_t  ptrSizes[5];
+    int     n, i, err=0;
+    size_t  j, offset_elements, fill_elements;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_int;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_int *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_INT;
+
+            inptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_INT;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_int_kernel_code, int_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_int_fill()
+
+
+int test_buffer_fill_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uint *inptr[5];
+    cl_uint *hostptr[5];
+    cl_uint *pattern[5];
+    size_t  ptrSizes[5];
+    int     n, i, err=0;
+    size_t  j, offset_elements, fill_elements;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_uint;
+
+    ptrSizes[0] = sizeof(cl_uint);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_uint *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_UINT;
+
+            inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_UINT;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_uint_kernel_code, uint_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_uint_fill()
+
+
+int test_buffer_fill_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_short *inptr[5];
+    cl_short *hostptr[5];
+    cl_short *pattern[5];
+    size_t   ptrSizes[5];
+    int      n, i, err=0;
+    size_t   j, offset_elements, fill_elements;
+    MTdata   d = init_genrand( gRandomSeed );
+    int      (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_short;
+
+    ptrSizes[0] = sizeof(cl_short);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_short *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_SHORT;
+
+            inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_SHORT;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_short_kernel_code, short_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_short_fill()
+
+
+int test_buffer_fill_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ushort *inptr[5];
+    cl_ushort *hostptr[5];
+    cl_ushort *pattern[5];
+    size_t    ptrSizes[5];
+    int       n, i, err=0;
+    size_t    j, offset_elements, fill_elements;
+    MTdata    d = init_genrand( gRandomSeed );
+    int       (*foo)(void *,void *,int);
+
+    size_t    min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_ushort;
+
+    ptrSizes[0] = sizeof(cl_ushort);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_ushort *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_USHORT;
+
+            inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_USHORT;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_ushort_kernel_code, ushort_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_ushort_fill()
+
+
+int test_buffer_fill_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_char *inptr[5];
+    cl_char *hostptr[5];
+    cl_char *pattern[5];
+    size_t  ptrSizes[5];
+    int     n, i, err=0;
+    size_t  j, offset_elements, fill_elements;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_char;
+
+    ptrSizes[0] = sizeof(cl_char);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_char *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_CHAR;
+
+            inptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_CHAR;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_char_kernel_code, char_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_char_fill()
+
+
+int test_buffer_fill_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uchar *inptr[5];
+    cl_uchar *hostptr[5];
+    cl_uchar *pattern[5];
+    size_t   ptrSizes[5];
+    int      n, i, err=0;
+    size_t   j, offset_elements, fill_elements;
+    MTdata   d = init_genrand( gRandomSeed );
+    int      (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_uchar;
+
+    ptrSizes[0] = sizeof(cl_uchar);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_uchar *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_CHAR;
+
+            inptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_CHAR;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_uchar_kernel_code, uchar_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_uchar_fill()
+
+
+int test_buffer_fill_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_long *inptr[5];
+    cl_long *hostptr[5];
+    cl_long *pattern[5];
+    size_t  ptrSizes[5];
+    int     n, i, err=0;
+    size_t  j, offset_elements, fill_elements;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_long;
+
+    ptrSizes[0] = sizeof(cl_long);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support long
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_long *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_LONG;
+
+            inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_LONG;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_long_kernel_code, long_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_long_fill()
+
+
+int test_buffer_fill_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ulong *inptr[5];
+    cl_ulong *hostptr[5];
+    cl_ulong *pattern[5];
+    size_t   ptrSizes[5];
+    int      n, i, err=0;
+    size_t   j, offset_elements, fill_elements;
+    MTdata   d = init_genrand( gRandomSeed );
+    int      (*foo)(void *,void *,int);
+
+    size_t   min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_ulong;
+
+    ptrSizes[0] = sizeof(cl_ulong);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_ulong *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_ULONG;
+
+            inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_ULONG;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_ulong_kernel_code, ulong_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_ulong_fill()
+
+
+int test_buffer_fill_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_float *inptr[5];
+    cl_float *hostptr[5];
+    cl_float *pattern[5];
+    size_t   ptrSizes[5];
+    int      n, i, err=0;
+    size_t   j, offset_elements, fill_elements;
+    MTdata   d = init_genrand( gRandomSeed );
+    int      (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_fill_float;
+
+    ptrSizes[0] = sizeof(cl_float);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    // Test with random offsets and fill sizes
+    for ( n = 0; n < 8; n++ ){
+        offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
+        fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
+        log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
+
+        for ( i = 0; i < 5; i++ ){
+            pattern[i] = (cl_float *)malloc(ptrSizes[i]);
+            for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
+                pattern[i][j] = TEST_PRIME_FLOAT;
+
+            inptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+            for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
+                inptr[i][j] = TEST_PRIME_FLOAT;
+            for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+                inptr[i][j] = 0;
+
+            hostptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            memset(hostptr[i], 0, ptrSizes[i] * num_elements);
+        }
+
+        if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float",
+                             5, (void**)inptr, (void**)hostptr, (void**)pattern,
+                             offset_elements, fill_elements,
+                             buffer_fill_float_kernel_code, float_kernel_name, foo ))
+            err++;
+
+        for ( i = 0; i < 5; i++ ){
+            free( (void *)pattern[i] );
+            align_free( (void *)inptr[i] );
+            align_free( (void *)hostptr[i] );
+        }
+
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_float_fill()
diff --git a/test_conformance/buffers/test_buffer_map.c b/test_conformance/buffers/test_buffer_map.c
new file mode 100644
index 00000000..0f7089da
--- /dev/null
+++ b/test_conformance/buffers/test_buffer_map.c
@@ -0,0 +1,703 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+
+#define TEST_PRIME_INT        ((1<<16)+1)
+#define TEST_PRIME_UINT        ((1U<<16)+1U)
+#define TEST_PRIME_LONG        ((1LL<<32)+1LL)
+#define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
+#define TEST_PRIME_SHORT    ((1S<<8)+1S)
+#define TEST_PRIME_FLOAT    (float)3.40282346638528860e+38
+#define TEST_PRIME_HALF        119.f
+#define TEST_BOOL            true
+#define TEST_PRIME_CHAR        0x77
+
+
+#ifndef TestStruct
+typedef struct{
+    int     a;
+    float   b;
+} TestStruct;
+#endif
+
+
+//--- the code for the kernel executables
+static const char *buffer_read_int_kernel_code[] = {
+    "__kernel void test_buffer_read_int(__global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n" };
+
+static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
+
+static const char *buffer_read_uint_kernel_code[] = {
+    "__kernel void test_buffer_read_uint(__global uint *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n" };
+
+static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
+
+static const char *buffer_read_long_kernel_code[] = {
+    "__kernel void test_buffer_read_long(__global long *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n" };
+
+static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
+
+static const char *buffer_read_ulong_kernel_code[] = {
+    "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n" };
+
+static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
+
+static const char *buffer_read_short_kernel_code[] = {
+    "__kernel void test_buffer_read_short(__global short *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n" };
+
+static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
+
+
+static const char *buffer_read_ushort_kernel_code[] = {
+    "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n" };
+
+static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
+
+
+static const char *buffer_read_float_kernel_code[] = {
+    "__kernel void test_buffer_read_float(__global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n" };
+
+static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
+
+
+static const char *buffer_read_char_kernel_code[] = {
+    "__kernel void test_buffer_read_char(__global char *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n" };
+
+static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
+
+
+static const char *buffer_read_uchar_kernel_code[] = {
+    "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = 'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n" };
+
+static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
+
+
+static const char *buffer_read_struct_kernel_code[] = {
+    "typedef struct{\n"
+    "int    a;\n"
+    "float    b;\n"
+    "} TestStruct;\n"
+    "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid].a = ((1<<16)+1);\n"
+    "     dst[tid].b = (float)3.40282346638528860e+38;\n"
+    "}\n" };
+
+static const char *struct_kernel_name[] = { "test_buffer_read_struct" };
+
+
+//--- the verify functions
+static int verify_read_int(void *ptr, int n)
+{
+    int     i;
+    int     *outptr = (int *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_INT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_uint(void *ptr, int n)
+{
+    int     i;
+    cl_uint *outptr = (cl_uint *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_UINT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_long(void *ptr, int n)
+{
+    int     i;
+    cl_long *outptr = (cl_long *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_LONG )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_ulong(void *ptr, int n)
+{
+    int      i;
+    cl_ulong *outptr = (cl_ulong *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_ULONG )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_short(void *ptr, int n)
+{
+    int     i;
+    short   *outptr = (short *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != (short)((1<<8)+1) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_ushort(void *ptr, int n)
+{
+    int       i;
+    cl_ushort *outptr = (cl_ushort *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != (cl_ushort)((1<<8)+1) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_float( void *ptr, int n )
+{
+    int     i;
+    float   *outptr = (float *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_FLOAT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_char(void *ptr, int n)
+{
+    int     i;
+    char    *outptr = (char *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_CHAR )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_uchar( void *ptr, int n )
+{
+    int      i;
+    cl_uchar *outptr = (cl_uchar *)ptr;
+
+    for ( i = 0; i < n; i++ ){
+        if ( outptr[i] != TEST_PRIME_CHAR )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_struct( void *ptr, int n )
+{
+    int         i;
+    TestStruct  *outptr = (TestStruct *)ptr;
+
+    for ( i = 0; i < n; i++ ){
+        if ( ( outptr[i].a != TEST_PRIME_INT ) ||
+             ( outptr[i].b != TEST_PRIME_FLOAT ) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+//----- the test functions
+static int test_buffer_map_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
+                                 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
+{
+    cl_mem      buffers[5];
+    void        *outptr[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    size_t      threads[3], localThreads[3];
+    cl_int      err;
+    int         i;
+    size_t      ptrSizes[5];
+    int         src_flag_id;
+    int         total_errors = 0;
+    void        *mappedPtr;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    threads[0] = (cl_uint)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //embedded devices don't support long/ulong so skip over
+    if (! gHasLong && strstr(type,"long"))
+        return 0;
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+        for ( i = 0; i < loops; i++ ){
+            outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+            if ( ! outptr[i] ){
+                log_error( " unable to allocate %d bytes of memory\n", (int)ptrSizes[i] * num_elements );
+                return -1;
+            }
+
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
+            else
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+
+            if ( ! buffers[i] | err){
+                print_error(err, "clCreateBuffer failed\n" );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            err = create_single_kernel_helper(context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArg failed\n" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            threads[0] = (cl_uint)num_elements;
+
+            err = get_max_common_work_group_size( context, kernel[i], threads[0], &localThreads[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueNDRangeKernel failed\n" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            mappedPtr = clEnqueueMapBuffer(queue, buffers[i], CL_TRUE, CL_MAP_READ, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueMapBuffer failed" );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            if (fn(mappedPtr, num_elements*(1<<i))){
+                log_error(" %s%d test failed\n", type, 1<<i);
+                total_errors++;
+            }
+            else{
+                log_info(" %s%d test passed\n", type, 1<<i);
+            }
+
+            err = clEnqueueUnmapMemObject(queue, buffers[i], mappedPtr, 0, NULL, NULL);
+            test_error(err, "clEnqueueUnmapMemObject failed");
+
+            // cleanup
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( buffers[i] );
+
+            // If we are using the outptr[i] as backing via USE_HOST_PTR we need to make sure we are done before freeing.
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR)) {
+                err = clFinish(queue);
+                test_error(err, "clFinish failed");
+            }
+            align_free( outptr[i] );
+        }
+    } // cl_mem_flags
+
+    return total_errors;
+
+}   // end test_buffer_map_read()
+
+
+#define DECLARE_LOCK_TEST(type, realType) \
+int test_buffer_map_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
+{ \
+return test_buffer_map_read( deviceID, context, queue,  num_elements, sizeof( realType ), (char*)#type, 5, \
+buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
+}
+
+DECLARE_LOCK_TEST(int, cl_int)
+DECLARE_LOCK_TEST(uint, cl_uint)
+DECLARE_LOCK_TEST(long, cl_long)
+DECLARE_LOCK_TEST(ulong, cl_ulong)
+DECLARE_LOCK_TEST(short, cl_short)
+DECLARE_LOCK_TEST(ushort, cl_ushort)
+DECLARE_LOCK_TEST(char, cl_char)
+DECLARE_LOCK_TEST(uchar, cl_uchar)
+DECLARE_LOCK_TEST(float, cl_float)
+
+int test_buffer_map_read_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int (*foo)(void *,int);
+    foo = verify_read_struct;
+
+    return test_buffer_map_read( deviceID, context, queue, num_elements, sizeof( TestStruct ), (char*)"struct", 1,
+                                 buffer_read_struct_kernel_code, struct_kernel_name, foo );
+
+}   // end test_buffer_map_struct_read()
+
diff --git a/test_conformance/buffers/test_buffer_mem.c b/test_conformance/buffers/test_buffer_mem.c
new file mode 100644
index 00000000..676873c7
--- /dev/null
+++ b/test_conformance/buffers/test_buffer_mem.c
@@ -0,0 +1,524 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#ifndef uchar
+typedef unsigned char uchar;
+#endif
+
+#define USE_LOCAL_WORK_GROUP 1
+
+
+const char *mem_read_write_kernel_code =
+"__kernel void test_mem_read_write(__global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = dst[tid]+1;\n"
+"}\n";
+
+const char *mem_read_kernel_code =
+"__kernel void test_mem_read(__global int *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid]+1;\n"
+"}\n";
+
+const char *mem_write_kernel_code =
+"__kernel void test_mem_write(__global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = dst[tid]+1;\n"
+"}\n";
+
+
+static int verify_mem( int *outptr, int n )
+{
+    int i;
+
+    for ( i = 0; i < n; i++ ){
+        if ( outptr[i] != ( i + 1 ) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+
+int test_mem_read_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem      buffers[1];
+    cl_int      *inptr, *outptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    inptr = (cl_int*)align_malloc(sizeof(cl_int)  * num_elements, min_alignment);
+    outptr = (cl_int*)align_malloc(sizeof(cl_int) * num_elements, min_alignment);
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, NULL, &err);
+    if (err != CL_SUCCESS) {
+        print_error( err, "clCreateBuffer failed");
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS) {
+        print_error( err, "clEnqueueWriteBuffer failed");
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArg failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if (err != CL_SUCCESS){
+        log_error("clEnqueueNDRangeKernel failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    if (verify_mem(outptr, num_elements)){
+        log_error("buffer_MEM_READ_WRITE test failed\n");
+        err = -1;
+    }
+    else{
+        log_info("buffer_MEM_READ_WRITE test passed\n");
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)outptr );
+    align_free( (void *)inptr );
+
+    return err;
+}   // end test_mem_read_write()
+
+
+int test_mem_write_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem      buffers[1];
+    int         *inptr, *outptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+    outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        align_free( (void *)inptr );
+        return -1;
+    }
+    buffers[0] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
+    if (err != CL_SUCCESS)
+    {
+        print_error(err, "clCreateBuffer failed\n");
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS){
+        print_error( err, "clEnqueueWriteBuffer failed" );
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_write_kernel_code, "test_mem_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArg failed");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "Error reading array" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)outptr );
+    align_free( (void *)inptr );
+
+    return err;
+}   // end test_mem_write()
+
+
+int test_mem_read_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem      buffers[2];
+    int         *inptr, *outptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    inptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+    outptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * num_elements, NULL, &err);
+    if ( err != CL_SUCCESS ){
+        print_error(err, " clCreateBuffer failed to create READ_ONLY array\n" );
+        align_free( (void *)outptr );
+        align_free( (void *)inptr );
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        inptr[i] = i;
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
+    if ( err != CL_SUCCESS ){
+        print_error(err, " clCreateBuffer failed to create MEM_ALLOC_GLOBAL_POOL array\n" );
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    err = clEnqueueWriteBuffer(queue, buffers[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueWriteBuffer() failed");
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_kernel_code, "test_mem_read" );
+    if ( err ){
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&buffers[1] );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArgs failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if (err != CL_SUCCESS){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseMemObject( buffers[1]) ;
+        clReleaseMemObject( buffers[0]) ;
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)inptr );
+        align_free( (void *)outptr );
+        return -1;
+    }
+
+    if (verify_mem(outptr, num_elements)){
+        log_error( " CL_MEM_READ_ONLY test failed\n" );
+        err = -1;
+    }
+    else{
+        log_info( " CL_MEM_READ_ONLY test passed\n" );
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[1]) ;
+    clReleaseMemObject( buffers[0]) ;
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)inptr );
+    align_free( (void *)outptr );
+
+    return err;
+
+}   // end test_mem_read()
+
+
+int test_mem_copy_host_flags( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem      buffers[1];
+    int         *ptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+
+    size_t min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    ptr = (int *)align_malloc( sizeof(cl_int) * num_elements, min_alignment);
+    if ( ! ptr ){
+        log_error( " unable to allocate %d bytes of memory\n", (int)sizeof(cl_int) * num_elements );
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = i;
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * num_elements, (void *)ptr, &err);
+    if (err != CL_SUCCESS){
+        print_error(err, "clCreateBuffer failed for CL_MEM_COPY_HOST_PTR\n");
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &mem_read_write_kernel_code, "test_mem_read_write" );
+    if (err){
+        clReleaseMemObject( buffers[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+#endif
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if (err != CL_SUCCESS){
+        log_error("clSetKernelArgs failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if (err != CL_SUCCESS){
+        log_error("clEnqueueNDRangeKernel failed\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, sizeof(cl_int)*num_elements, (void *)ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS){
+        log_error("CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_CONSTANT_POOL failed.\n");
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( (void *)ptr );
+        return -1;
+    }
+
+    if ( verify_mem( ptr, num_elements ) ){
+        log_error("CL_MEM_COPY_HOST_PTR test failed\n");
+        err = -1;
+    }
+    else{
+        log_info("CL_MEM_COPY_HOST_PTR test passed\n");
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( (void *)ptr );
+
+    return err;
+
+}   // end test_mem_copy_host_flags()
+
diff --git a/test_conformance/buffers/test_buffer_migrate.c b/test_conformance/buffers/test_buffer_migrate.c
new file mode 100644
index 00000000..1c4ba043
--- /dev/null
+++ b/test_conformance/buffers/test_buffer_migrate.c
@@ -0,0 +1,417 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/testHarness.h"
+
+#define MAX_SUB_DEVICES        16        // Limit the sub-devices to ensure no out of resource errors.
+#define BUFFER_SIZE        1024
+
+// Kernel source code
+static const char *buffer_migrate_kernel_code =
+"__kernel void test_buffer_migrate(__global uint *dst, __global uint *src1, __global uint *src2, uint x)\n"
+"{\n"
+"  int tid = get_global_id(0);\n"
+"  dst[tid] = src1[tid] ^ src2[tid] ^ x;\n"
+"}\n";
+
+enum migrations { MIGRATE_PREFERRED,         // migrate to the preferred sub-device
+  MIGRATE_NON_PREFERRED,     // migrate to a randomly chosen non-preferred sub-device
+  MIGRATE_RANDOM,        // migrate to a randomly chosen sub-device with randomly chosen flags
+  NUMBER_OF_MIGRATIONS };
+
+static cl_mem init_buffer(cl_command_queue cmd_q, cl_mem buffer, cl_uint *data)
+{
+  cl_int err;
+
+  if (buffer) {
+    if ((err = clEnqueueWriteBuffer(cmd_q, buffer, CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, data, 0, NULL, NULL)) != CL_SUCCESS) {
+      print_error(err, "Failed on enqueue write of buffer data.");
+    }
+  }
+  return buffer;
+}
+
+static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
+{
+  cl_uint i, j;
+  cl_int  err = CL_SUCCESS;
+
+  for (i=0; i<num_devices; i++) {
+    j = genrand_int32(d) % num_devices;
+    flags[i] = 0;
+    switch (migrate) {
+      case MIGRATE_PREFERRED:
+        // Force the device to be preferred
+        j = i;
+        break;
+      case MIGRATE_NON_PREFERRED:
+        // Coerce the device to be non-preferred
+        if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
+        break;
+      case MIGRATE_RANDOM:
+        // Choose a random set of flags
+        flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));;
+        break;
+    }
+    if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]), flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
+      print_error(err, "Failed migrating memory object.");
+    }
+  }
+  return err;
+}
+
+static cl_int restoreBuffer(cl_command_queue *queues, cl_mem *buffers, cl_uint num_devices, cl_mem_migration_flags *flags, cl_uint *buffer)
+{
+  cl_uint i, j;
+  cl_int  err;
+
+  // If the buffer was previously migrated with undefined content, reload the content.
+
+  for (i=0; i<num_devices; i++) {
+    if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
+      if ((err = clEnqueueWriteBuffer(queues[i], buffers[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
+        print_error(err, "Failed on restoration enqueue write of buffer data.");
+        return err;
+      }
+    }
+  }
+  return CL_SUCCESS;
+}
+
+int test_buffer_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  int failed = 0;
+  cl_uint i, j;
+  cl_int err;
+  cl_uint max_sub_devices = 0;
+  cl_uint num_devices, num_devices_limited;
+  cl_uint A[BUFFER_SIZE], B[BUFFER_SIZE], C[BUFFER_SIZE];
+  cl_uint test_number = 1;
+  cl_device_affinity_domain domain, domains;
+  cl_device_id *devices;
+  cl_command_queue *queues;
+  cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
+  cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
+  cl_mem *bufferA, *bufferB, *bufferC;
+  cl_program program = NULL;
+  cl_kernel kernel = NULL;
+  cl_context ctx = NULL;    // context for all sub-devices
+  enum migrations migrateA, migrateB, migrateC;
+  MTdata d = init_genrand(gRandomSeed);
+  const size_t wgs[1] = {BUFFER_SIZE};
+
+  /* Allocate arrays whose size varies according to the maximum number of sub-devices */
+  if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
+    print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
+    return -1;
+  }
+  if (max_sub_devices < 1) {
+    log_error("ERROR: Invalid number of compute units returned.\n");
+    return -1;
+  }
+  devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
+  queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
+  flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
+  flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
+  flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
+  bufferA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
+  bufferB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
+  bufferC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
+
+  if ((devices == NULL) || (queues  == NULL) ||
+      (flagsA  == NULL) || (flagsB  == NULL) || (flagsC  == NULL) ||
+      (bufferA == NULL) || (bufferB == NULL) || (bufferC == NULL)) {
+    log_error("ERROR: Failed to successfully allocate required local buffers.\n");
+    failed = -1;
+    goto cleanup_allocations;
+  }
+
+  for (i=0; i<max_sub_devices; i++) {
+    devices[i] = NULL;
+    queues [i] = NULL;
+    bufferA[i] = bufferB[i] = bufferC[i] = NULL;
+  }
+
+  for (i=0; i<BUFFER_SIZE; i++) {
+    A[i] = genrand_int32(d);
+    B[i] = genrand_int32(d);
+  }
+
+  // Attempt to partition the device along each of the allowed affinity domain.
+  if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
+    print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
+    return -1;
+  }
+
+  domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
+              CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
+
+  do {
+    if (domains) {
+      for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
+      domains &= ~domain;
+    } else {
+      domain = 0;
+    }
+
+    // Determine the number of partitions for the device given the specific domain.
+    if (domain) {
+      property[1] = domain;
+      err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
+      if ((err != CL_SUCCESS) || (num_devices == 0)) {
+        print_error(err, "Obtaining the number of partions by affinity failed.");
+        failed = 1;
+        goto cleanup;
+      }
+    } else {
+      num_devices = 1;
+    }
+
+    if (num_devices > 1) {
+      // Create each of the sub-devices and a corresponding context.
+      if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
+        print_error(err, "Failed creating sub devices.");
+        failed = 1;
+        goto cleanup;
+      }
+
+      // Create a context containing all the sub-devices
+      ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
+      if (ctx == NULL) {
+    print_error(err, "Failed creating context containing the sub-devices.");
+    failed = 1;
+    goto cleanup;
+      }
+
+      // Create a command queue for each sub-device
+      for (i=0; i<num_devices; i++) {
+        if (devices[i]) {
+          if ((queues[i] = clCreateCommandQueueWithProperties(ctx, devices[i], 0, &err)) == NULL) {
+            print_error(err, "Failed creating command queues.");
+            failed = 1;
+            goto cleanup;
+          }
+        }
+      }
+    } else {
+      // No partitioning available. Just exercise the APIs on a single device.
+      devices[0] = deviceID;
+      queues[0] = queue;
+      ctx = context;
+    }
+
+    // Build the kernel program.
+    if (err = create_single_kernel_helper(ctx, &program, &kernel, 1, &buffer_migrate_kernel_code, "test_buffer_migrate")) {
+      print_error(err, "Failed creating kernel.");
+      failed = 1;
+      goto cleanup;
+    }
+
+    num_devices_limited = num_devices;
+
+    // Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
+    // If we run out of memory, then restrict the number of sub-devices to be tested.
+    for (i=0; i<num_devices; i++) {
+      bufferA[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY  | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), A);
+      bufferB[i] = init_buffer(queues[i], clCreateBuffer(ctx, (CL_MEM_READ_ONLY  | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err), B);
+      bufferC[i] = clCreateBuffer(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR), sizeof(cl_uint) * BUFFER_SIZE, NULL, &err);
+
+      if ((bufferA[i] == NULL) || (bufferB[i] == NULL) || (bufferC[i] == NULL)) {
+        if (i == 0) {
+          log_error("Failed to allocate even 1 set of buffers.\n");
+          failed = 1;
+          goto cleanup;
+        }
+        num_devices_limited = i;
+        break;
+      }
+    }
+
+    // For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
+    for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
+      if (migrateMemObject(migrateA, queues, bufferA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
+        failed = 1;
+        goto cleanup;
+      }
+      for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
+        if (migrateMemObject(migrateC, queues, bufferC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
+          failed = 1;
+          goto cleanup;
+        }
+        for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
+          if (migrateMemObject(migrateB, queues, bufferB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
+            failed = 1;
+            goto cleanup;
+          }
+          // Run the test on each of the partitions.
+          for (i=0; i<num_devices_limited; i++) {
+            cl_uint x;
+
+            x = i + test_number;
+
+            if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&bufferC[i])) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 0.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&bufferA[i])) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 1.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&bufferB[i])) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 2.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clSetKernelArg(kernel, 3, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 3.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 1, NULL, wgs, NULL, 0, NULL, NULL)) != CL_SUCCESS) {
+              print_error(err, "Failed enqueueing the NDRange kernel.");
+              failed = 1;
+              goto cleanup;
+            }
+          }
+          // Verify the results as long as neither input is an undefined migration
+          for (i=0; i<num_devices_limited; i++, test_number++) {
+            if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
+              if ((err = clEnqueueReadBuffer(queues[i], bufferC[i], CL_TRUE, 0, sizeof(cl_uint)*BUFFER_SIZE, C, 0, NULL, NULL)) != CL_SUCCESS) {
+                print_error(err, "Failed reading output buffer.");
+                failed = 1;
+                goto cleanup;
+              }
+              for (j=0; j<BUFFER_SIZE; j++) {
+                cl_uint expected;
+
+                expected = A[j] ^ B[j] ^ test_number;
+                if (C[j] != expected) {
+                  log_error("Failed on device %d,  work item %4d,  expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
+                  failed = 1;
+                }
+              }
+              if (failed) goto cleanup;
+            }
+          }
+
+          if (restoreBuffer(queues, bufferB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
+            failed = 1;
+            goto cleanup;
+          }
+        }
+      }
+      if (restoreBuffer(queues, bufferA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
+        failed = 1;
+        goto cleanup;
+      }
+    }
+
+  cleanup:
+    // Clean up all the allocted resources create by the test. This includes sub-devices,
+    // command queues, and memory buffers.
+
+    for (i=0; i<max_sub_devices; i++) {
+      // Memory buffer cleanup
+      if (bufferA[i]) {
+        if ((err = clReleaseMemObject(bufferA[i])) != CL_SUCCESS) {
+          print_error(err, "Failed releasing memory object.");
+          failed = 1;
+        }
+      }
+      if (bufferB[i]) {
+        if ((err = clReleaseMemObject(bufferB[i])) != CL_SUCCESS) {
+          print_error(err, "Failed releasing memory object.");
+          failed = 1;
+        }
+      }
+      if (bufferC[i]) {
+        if ((err = clReleaseMemObject(bufferC[i])) != CL_SUCCESS) {
+          print_error(err, "Failed releasing memory object.");
+          failed = 1;
+        }
+      }
+
+
+      if (num_devices > 1) {
+        // Command queue cleanup
+        if (queues[i]) {
+          if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
+            print_error(err, "Failed releasing command queue.");
+            failed = 1;
+          }
+        }
+
+        // Sub-device cleanup
+        if (devices[i]) {
+          if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
+            print_error(err, "Failed releasing sub device.");
+            failed = 1;
+          }
+        }
+        devices[i] = 0;
+      }
+    }
+
+    // Context, program, and kernel cleanup
+    if (program) {
+      if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
+    print_error(err, "Failed releasing program.");
+    failed = 1;
+      }
+      program = NULL;
+    }
+
+    if (kernel) {
+      if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
+    print_error(err, "Failed releasing kernel.");
+    failed = 1;
+      }
+      kernel = NULL;
+    }
+
+    if (ctx && (ctx != context)) {
+      if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
+    print_error(err, "Failed releasing context.");
+    failed = 1;
+      }
+    }
+    ctx = NULL;
+
+    if (failed) goto cleanup_allocations;
+  } while (domains);
+
+cleanup_allocations:
+  if (devices) free(devices);
+  if (queues)  free(queues);
+  if (flagsA)  free(flagsA);
+  if (flagsB)  free(flagsB);
+  if (flagsC)  free(flagsC);
+  if (bufferA) free(bufferA);
+  if (bufferB) free(bufferB);
+  if (bufferC) free(bufferC);
+
+  return ((failed) ? -1 : 0);
+}
diff --git a/test_conformance/buffers/test_buffer_read.c b/test_conformance/buffers/test_buffer_read.c
new file mode 100644
index 00000000..2755afec
--- /dev/null
+++ b/test_conformance/buffers/test_buffer_read.c
@@ -0,0 +1,1463 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+//#define HK_DO_NOT_RUN_SHORT_ASYNC    1
+//#define HK_DO_NOT_RUN_USHORT_ASYNC    1
+//#define HK_DO_NOT_RUN_CHAR_ASYNC    1
+//#define HK_DO_NOT_RUN_UCHAR_ASYNC    1
+
+#define TEST_PRIME_INT        ((1<<16)+1)
+#define TEST_PRIME_UINT        ((1U<<16)+1U)
+#define TEST_PRIME_LONG        ((1LL<<32)+1LL)
+#define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
+#define TEST_PRIME_SHORT    ((1S<<8)+1S)
+#define TEST_PRIME_FLOAT    (float)3.40282346638528860e+38
+#define TEST_PRIME_HALF        119.f
+#define TEST_BOOL            true
+#define TEST_PRIME_CHAR        0x77
+
+#ifndef ulong
+typedef unsigned long ulong;
+#endif
+
+#ifndef uchar
+typedef unsigned char uchar;
+#endif
+
+#ifndef TestStruct
+typedef struct{
+    int     a;
+    float   b;
+} TestStruct;
+#endif
+
+//--- the code for the kernel executables
+static const char *buffer_read_int_kernel_code[] = {
+    "__kernel void test_buffer_read_int(__global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1<<16)+1);\n"
+    "}\n" };
+
+static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
+
+static const char *buffer_read_uint_kernel_code[] = {
+    "__kernel void test_buffer_read_uint(__global uint *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1U<<16)+1U);\n"
+    "}\n" };
+
+static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
+
+static const char *buffer_read_long_kernel_code[] = {
+    "__kernel void test_buffer_read_long(__global long *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1L<<32)+1L);\n"
+    "}\n" };
+
+static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
+
+static const char *buffer_read_ulong_kernel_code[] = {
+    "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = ((1UL<<32)+1UL);\n"
+    "}\n" };
+
+static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
+
+static const char *buffer_read_short_kernel_code[] = {
+    "__kernel void test_buffer_read_short(__global short *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (short)((1<<8)+1);\n"
+    "}\n" };
+
+static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
+
+
+static const char *buffer_read_ushort_kernel_code[] = {
+    "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (ushort)((1<<8)+1);\n"
+    "}\n" };
+
+static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
+
+
+static const char *buffer_read_float_kernel_code[] = {
+    "__kernel void test_buffer_read_float(__global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)3.40282346638528860e+38;\n"
+    "}\n" };
+
+static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
+
+
+static const char *buffer_read_half_kernel_code[] = {
+    "__kernel void test_buffer_read_half(__global half *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (half)119;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (half)119;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (half)119;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (half)119;\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (half)119;\n"
+    "}\n" };
+
+static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
+
+
+static const char *buffer_read_char_kernel_code[] = {
+    "__kernel void test_buffer_read_char(__global char *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (char)'w';\n"
+    "}\n" };
+
+static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
+
+
+static const char *buffer_read_uchar_kernel_code[] = {
+    "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = 'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n",
+
+    "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (uchar)'w';\n"
+    "}\n" };
+
+static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
+
+
+static const char *buffer_read_struct_kernel_code =
+"typedef struct{\n"
+"int    a;\n"
+"float    b;\n"
+"} TestStruct;\n"
+"__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid].a = ((1<<16)+1);\n"
+"     dst[tid].b = (float)3.40282346638528860e+38;\n"
+"}\n";
+
+
+//--- the verify functions
+static int verify_read_int(void *ptr, int n)
+{
+    int     i;
+    cl_int  *outptr = (cl_int *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_INT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_uint(void *ptr, int n)
+{
+    int     i;
+    cl_uint *outptr = (cl_uint *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_UINT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_long(void *ptr, int n)
+{
+    int     i;
+    cl_long *outptr = (cl_long *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_LONG )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_ulong(void *ptr, int n)
+{
+    int      i;
+    cl_ulong *outptr = (cl_ulong *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_ULONG )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_short(void *ptr, int n)
+{
+    int      i;
+    cl_short *outptr = (cl_short *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != (cl_short)((1<<8)+1) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_ushort(void *ptr, int n)
+{
+    int       i;
+    cl_ushort *outptr = (cl_ushort *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != (cl_ushort)((1<<8)+1) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_float( void *ptr, int n )
+{
+    int      i;
+    cl_float *outptr = (cl_float *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_FLOAT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_half( void *ptr, int n )
+{
+    int     i;
+    float   *outptr = (float *)ptr; // FIXME: should this be cl_half_float?
+
+    for ( i = 0; i < n / 2; i++ ){
+        if ( outptr[i] != TEST_PRIME_HALF )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_char(void *ptr, int n)
+{
+    int     i;
+    cl_char *outptr = (cl_char *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_CHAR )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_uchar(void *ptr, int n)
+{
+    int      i;
+    cl_uchar *outptr = (cl_uchar *)ptr;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != TEST_PRIME_CHAR )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_struct(TestStruct *outptr, int n)
+{
+    int     i;
+
+    for (i=0; i<n; i++)
+    {
+        if ( ( outptr[i].a != TEST_PRIME_INT ) ||
+             ( outptr[i].b != TEST_PRIME_FLOAT ) )
+            return -1;
+    }
+
+    return 0;
+}
+
+//----- the test functions
+int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
+                      const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
+{
+    cl_mem      buffers[5];
+    void        *outptr[5];
+    void        *inptr[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+    size_t      ptrSizes[5];
+    int         src_flag_id;
+    int         total_errors = 0;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support long
+    if (! gHasLong && strstr(type,"long") )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+        for ( i = 0; i < loops; i++ ){
+            outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+            if ( ! outptr[i] ){
+                log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
+                return -1;
+            }
+            inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+            if ( ! inptr[i] ){
+                log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
+                return -1;
+            }
+
+
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
+            else
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error(err, " clCreateBuffer failed\n" );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error("Creating program for %s\n", type);
+                print_error(err,  " Error creating program " );
+                clReleaseMemObject(buffers[i]);
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArg failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            if (fn(outptr[i], num_elements*(1<<i))){
+                log_error( " %s%d test failed\n", type, 1<<i );
+                total_errors++;
+            }
+            else{
+                log_info( " %s%d test passed\n", type, 1<<i );
+            }
+
+            err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            if (fn(inptr[i], num_elements*(1<<i))){
+                log_error( " %s%d test failed in-place readback\n", type, 1<<i );
+                total_errors++;
+            }
+            else{
+                log_info( " %s%d test passed in-place readback\n", type, 1<<i );
+            }
+
+
+            // cleanup
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            align_free( outptr[i] );
+            align_free( inptr[i] );
+        }
+    } // mem flag
+
+    return total_errors;
+
+}   // end test_buffer_read()
+
+int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
+                            const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
+{
+    cl_mem      buffers[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event;
+    void        *outptr[5];
+    void        *inptr[5];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+    size_t      lastIndex;
+    size_t      ptrSizes[5];
+    int         src_flag_id;
+    int         total_errors = 0;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support long
+    if (! gHasLong && strstr(type,"long") )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+        for ( i = 0; i < loops; i++ ){
+            outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            if ( ! outptr[i] ){
+                log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
+                return -1;
+            }
+            memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
+            inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            if ( ! inptr[i] ){
+                log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
+                return -1;
+            }
+            memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
+
+
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
+            else
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error(err, " clCreateBuffer failed\n" );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i]);
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArg failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
+            err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &event );
+#ifdef CHECK_FOR_NON_WAIT
+            if ( ((uchar *)outptr[i])[lastIndex] ){
+                log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
+                log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
+            }
+#endif
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+            err = clWaitForEvents(1, &event );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clWaitForEvents() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            if ( fn(outptr[i], num_elements*(1<<i)) ){
+                log_error( " %s%d test failed\n", type, 1<<i );
+                total_errors++;
+            }
+            else{
+                log_info( " %s%d test passed\n", type, 1<<i );
+            }
+
+            // cleanup
+            clReleaseEvent( event );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            align_free( outptr[i] );
+            align_free( inptr[i] );
+        }
+    } // mem flags
+
+
+    return total_errors;
+
+}   // end test_buffer_read_array_async()
+
+
+int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
+                                    const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
+{
+    cl_mem      buffers[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event;
+    void        *outptr[5], *inptr[5];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i;
+    size_t      lastIndex;
+    size_t      ptrSizes[5];
+    int         src_flag_id;
+    int         total_errors = 0;
+
+    size_t min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support long
+    if (! gHasLong && strstr(type,"long") )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+        for ( i = 0; i < loops; i++ ){
+            outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            if ( ! outptr[i] ){
+                log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
+                return -1;
+            }
+            memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
+            inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
+            if ( ! inptr[i] ){
+                log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
+                return -1;
+            }
+            memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
+
+            if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
+            else
+                buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+            if ( err != CL_SUCCESS ){
+                print_error(err, " clCreateBuffer failed\n" );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+            if ( err ){
+                log_error( " Error creating program for %s\n", type );
+                clReleaseMemObject( buffers[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clSetKernelArgs failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+#ifdef USE_LOCAL_WORK_GROUP
+            err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
+            err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, (void *)(outptr[i]), 0, NULL, &event );
+#ifdef CHECK_FOR_NON_WAIT
+            if ( ((uchar *)outptr[i])[lastIndex] ){
+                log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
+                log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
+            }
+#endif
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueReadBuffer failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+            err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clEnqueueBarrierWithWaitList() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                return -1;
+            }
+
+            err = clWaitForEvents(1, &event);
+            if ( err != CL_SUCCESS ){
+                print_error( err, "clWaitForEvents() failed" );
+                clReleaseMemObject( buffers[i] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( inptr[i] );
+                return -1;
+            }
+
+            if ( fn(outptr[i], num_elements*(1<<i)) ){
+                log_error(" %s%d test failed\n", type, 1<<i);
+                total_errors++;
+            }
+            else{
+                log_info(" %s%d test passed\n", type, 1<<i);
+            }
+
+            // cleanup
+            clReleaseEvent( event );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            align_free( outptr[i] );
+            align_free( inptr[i] );
+        }
+    } // cl_mem flags
+    return total_errors;
+
+}   // end test_buffer_read_array_barrier()
+
+
+#define DECLARE_READ_TEST(type, realType) \
+int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
+{ \
+return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
+buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
+}
+
+DECLARE_READ_TEST(int, cl_int)
+DECLARE_READ_TEST(uint, cl_uint)
+DECLARE_READ_TEST(long, cl_long)
+DECLARE_READ_TEST(ulong, cl_ulong)
+DECLARE_READ_TEST(short, cl_short)
+DECLARE_READ_TEST(ushort, cl_ushort)
+DECLARE_READ_TEST(float, cl_float)
+DECLARE_READ_TEST(char, cl_char)
+DECLARE_READ_TEST(uchar, cl_uchar)
+
+int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
+                             buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
+}
+
+
+#define DECLARE_ASYNC_TEST(type, realType) \
+int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
+{ \
+return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
+buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
+}
+
+DECLARE_ASYNC_TEST(char, cl_char)
+DECLARE_ASYNC_TEST(uchar, cl_uchar)
+DECLARE_ASYNC_TEST(short, cl_short)
+DECLARE_ASYNC_TEST(ushort, cl_ushort)
+DECLARE_ASYNC_TEST(int, cl_int)
+DECLARE_ASYNC_TEST(uint, cl_uint)
+DECLARE_ASYNC_TEST(long, cl_long)
+DECLARE_ASYNC_TEST(ulong, cl_ulong)
+DECLARE_ASYNC_TEST(float, cl_float)
+
+
+#define DECLARE_BARRIER_TEST(type, realType) \
+int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
+{ \
+return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
+buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
+}
+
+DECLARE_BARRIER_TEST(int, cl_int)
+DECLARE_BARRIER_TEST(uint, cl_uint)
+DECLARE_BARRIER_TEST(long, cl_long)
+DECLARE_BARRIER_TEST(ulong, cl_ulong)
+DECLARE_BARRIER_TEST(short, cl_short)
+DECLARE_BARRIER_TEST(ushort, cl_ushort)
+DECLARE_BARRIER_TEST(char, cl_char)
+DECLARE_BARRIER_TEST(uchar, cl_uchar)
+DECLARE_BARRIER_TEST(float, cl_float)
+
+/*
+ int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
+ {
+ cl_mem        buffers[1];
+ float        *outptr;
+ cl_program program[1];
+ cl_kernel    kernel[1];
+ void        *values[1];
+ size_t        sizes[1] = { sizeof(cl_buffer) };
+ uint        threads[1];
+ int        err;
+ int        i;
+ size_t        ptrSize;    // sizeof(half)
+
+ ptrSize = sizeof(cl_float)/2;
+ outptr = (float *)malloc(ptrSize * num_elements);
+ buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSize * num_elements, NULL);
+ if( !buffers[0] ){
+ log_error("clCreateBuffer failed\n");
+ return -1;
+ }
+
+ err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
+ if( err ){
+ log_error( " Error creating program for half\n" );
+ clReleaseMemObject(buffers[0]);
+ free( (void *)outptr );
+ return -1;
+ }
+
+ values[0] = buffers[0];
+ err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
+ if( err != CL_SUCCESS ){
+ log_error("clSetKernelArgs failed\n");
+ return -1;
+ }
+
+ global_work_size[0] = (cl_uint)num_elements;
+ err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ log_error("clEnqueueNDRangeKernel failed\n");
+ return -1;
+ }
+
+ err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ log_error("clEnqueueReadBuffer failed: %d\n", err);
+ return -1;
+ }
+
+ if( verify_read_half( outptr, num_elements >> 1 ) ){
+ log_error( "buffer_READ half test failed\n" );
+ err = -1;
+ }
+ else{
+ log_info( "buffer_READ half test passed\n" );
+ err = 0;
+ }
+
+ // cleanup
+ clReleaseMemObject( buffers[0] );
+ clReleaseKernel( kernel[0] );
+ clReleaseProgram( program[0] );
+ free( (void *)outptr );
+
+ return err;
+
+ }    // end test_buffer_half_read()
+ */
+
+int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem      buffers[1];
+    TestStruct  *output_ptr;
+    cl_program  program[1];
+    cl_kernel   kernel[1];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    size_t      objSize = sizeof(TestStruct);
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
+    if ( ! output_ptr ){
+        log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
+        return -1;
+    }
+    buffers[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL , &err);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clCreateBuffer failed\n" );
+        align_free( output_ptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
+    if ( err ){
+        clReleaseProgram( program[0] );
+        align_free( output_ptr );
+        return -1;
+    }
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
+    if ( err != CL_SUCCESS){
+        print_error( err, "clSetKernelArg failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( output_ptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_WORK_GROUP
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    test_error( err, "Unable to get work group size to use" );
+
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+    if ( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( output_ptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if ( err != CL_SUCCESS){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseMemObject( buffers[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        align_free( output_ptr );
+        return -1;
+    }
+
+    if (verify_read_struct(output_ptr, num_elements)){
+        log_error(" struct test failed\n");
+        err = -1;
+    }
+    else{
+        log_info(" struct test passed\n");
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseMemObject( buffers[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    align_free( output_ptr );
+
+    return err;
+}
+
+
+static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
+{
+    cl_mem      buffers[3];
+    int         *outptr[3];
+    cl_program  program[3];
+    cl_kernel   kernel[3];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i, j;
+    size_t      ptrSizes[3];    // sizeof(int), sizeof(int2), sizeof(int4)
+    int         total_errors = 0;
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    for ( i = 0; i < 3; i++ ){
+        outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
+        if ( ! outptr[i] ){
+            log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
+            for ( j = 0; j < i; j++ ){
+                clReleaseMemObject( buffers[j] );
+                align_free( outptr[j] );
+            }
+            return -1;
+        }
+        buffers[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err);
+        if ( err != CL_SUCCESS ){
+            print_error(err, " clCreateBuffer failed\n" );
+            for ( j = 0; j < i; j++ ){
+                clReleaseMemObject( buffers[j] );
+                align_free( outptr[j] );
+            }
+            align_free( outptr[i] );
+            return -1;
+        }
+    }
+
+    err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
+    if ( err ){
+        log_error( " Error creating program for int\n" );
+        for ( i = 0; i < 3; i++ ){
+            clReleaseMemObject( buffers[i] );
+            align_free( outptr[i] );
+        }
+        return -1;
+    }
+
+    err = create_single_kernel_helper(  context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
+    if ( err ){
+        log_error( " Error creating program for int2\n" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        for ( i = 0; i < 3; i++ ){
+            clReleaseMemObject( buffers[i] );
+            align_free( outptr[i] );
+        }
+        return -1;
+    }
+
+    err = create_single_kernel_helper(  context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
+    if ( err ){
+        log_error( " Error creating program for int4\n" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[1] );
+        clReleaseProgram( program[1] );
+        for ( i = 0; i < 3; i++ ){
+            clReleaseMemObject( buffers[i] );
+            align_free( outptr[i] );
+        }
+        return -1;
+    }
+
+    for (i=0; i<3; i++){
+        err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
+        if ( err != CL_SUCCESS ){
+            print_error( err, "clSetKernelArgs failed" );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            align_free( outptr[i] );
+            return -1;
+        }
+
+#ifdef USE_LOCAL_WORK_GROUP
+        err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+        test_error( err, "Unable to get work group size to use" );
+
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+        if ( err != CL_SUCCESS ){
+            print_error( err, "clEnqueueNDRangeKernel failed" );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            align_free( outptr[i] );
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
+        if ( err != CL_SUCCESS ){
+            print_error( err, "clEnqueueReadBuffer failed" );
+            clReleaseMemObject( buffers[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            align_free( outptr[i] );
+            return -1;
+        }
+
+        if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
+            log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
+            total_errors++;
+        }
+        else{
+            log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
+        }
+
+        // cleanup
+        clReleaseMemObject( buffers[i] );
+        clReleaseKernel( kernel[i] );
+        clReleaseProgram( program[i] );
+        align_free( outptr[i] );
+    }
+
+    return total_errors;
+
+}   // end testRandomReadSize()
+
+
+int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int     err = 0;
+    int     i;
+    cl_uint start;
+    size_t  size;
+    MTdata  d = init_genrand( gRandomSeed );
+
+    // now test for random sizes of array being read
+    for ( i = 0; i < 8; i++ ){
+        start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
+        size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
+        if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
+            err++;
+    }
+
+    free_mtdata(d);
+
+    return err;
+}
+
diff --git a/test_conformance/buffers/test_buffer_write.c b/test_conformance/buffers/test_buffer_write.c
new file mode 100644
index 00000000..c7b4dc60
--- /dev/null
+++ b/test_conformance/buffers/test_buffer_write.c
@@ -0,0 +1,1956 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#define USE_LOCAL_WORK_GROUP    1
+
+#ifndef uchar
+typedef unsigned char uchar;
+#endif
+
+#ifndef TestStruct
+typedef struct{
+    int     a;
+    float   b;
+} TestStruct;
+#endif
+
+// If this is set to 1 the writes are done via map/unmap
+static int gTestMap = 0;
+
+const char *buffer_write_int_kernel_code[] = {
+    "__kernel void test_buffer_write_int(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_int2(__global int2 *src, __global int2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_int4(__global int4 *src, __global int4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_int8(__global int8 *src, __global int8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_int16(__global int16 *src, __global int16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *int_kernel_name[] = { "test_buffer_write_int", "test_buffer_write_int2", "test_buffer_write_int4", "test_buffer_write_int8", "test_buffer_write_int16" };
+
+
+const char *buffer_write_uint_kernel_code[] = {
+    "__kernel void test_buffer_write_uint(__global uint *src, __global uint *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *uint_kernel_name[] = { "test_buffer_write_uint", "test_buffer_write_uint2", "test_buffer_write_uint4", "test_buffer_write_uint8", "test_buffer_write_uint16" };
+
+
+const char *buffer_write_ushort_kernel_code[] = {
+    "__kernel void test_buffer_write_ushort(__global ushort *src, __global ushort *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *ushort_kernel_name[] = { "test_buffer_write_ushort", "test_buffer_write_ushort2", "test_buffer_write_ushort4", "test_buffer_write_ushort8", "test_buffer_write_ushort16" };
+
+
+
+const char *buffer_write_short_kernel_code[] = {
+    "__kernel void test_buffer_write_short(__global short *src, __global short *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_short2(__global short2 *src, __global short2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_short4(__global short4 *src, __global short4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_short8(__global short8 *src, __global short8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_short16(__global short16 *src, __global short16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *short_kernel_name[] = { "test_buffer_write_short", "test_buffer_write_short2", "test_buffer_write_short4", "test_buffer_write_short8", "test_buffer_write_short16" };
+
+
+const char *buffer_write_char_kernel_code[] = {
+    "__kernel void test_buffer_write_char(__global char *src, __global char *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_char2(__global char2 *src, __global char2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_char4(__global char4 *src, __global char4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_char8(__global char8 *src, __global char8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_char16(__global char16 *src, __global char16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *char_kernel_name[] = { "test_buffer_write_char", "test_buffer_write_char2", "test_buffer_write_char4", "test_buffer_write_char8", "test_buffer_write_char16" };
+
+
+const char *buffer_write_uchar_kernel_code[] = {
+    "__kernel void test_buffer_write_uchar(__global uchar *src, __global uchar *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *uchar_kernel_name[] = { "test_buffer_write_uchar", "test_buffer_write_uchar2", "test_buffer_write_uchar4", "test_buffer_write_uchar8", "test_buffer_write_uchar16" };
+
+
+const char *buffer_write_float_kernel_code[] = {
+    "__kernel void test_buffer_write_float(__global float *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_float2(__global float2 *src, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_float4(__global float4 *src, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_float8(__global float8 *src, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_float16(__global float16 *src, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *float_kernel_name[] = { "test_buffer_write_float", "test_buffer_write_float2", "test_buffer_write_float4", "test_buffer_write_float8", "test_buffer_write_float16" };
+
+
+const char *buffer_write_half_kernel_code[] = {
+    "__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half2( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half4( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half8( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half16( tid * 2, src );\n"
+    "}\n" };
+
+static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
+
+
+const char *buffer_write_long_kernel_code[] = {
+    "__kernel void test_buffer_write_long(__global long *src, __global long *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_long2(__global long2 *src, __global long2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_long4(__global long4 *src, __global long4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_long8(__global long8 *src, __global long8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_long16(__global long16 *src, __global long16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *long_kernel_name[] = { "test_buffer_write_long", "test_buffer_write_long2", "test_buffer_write_long4", "test_buffer_write_long8", "test_buffer_write_long16" };
+
+
+const char *buffer_write_ulong_kernel_code[] = {
+    "__kernel void test_buffer_write_ulong(__global ulong *src, __global ulong *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_buffer_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *ulong_kernel_name[] = { "test_buffer_write_ulong", "test_buffer_write_ulong2", "test_buffer_write_ulong4", "test_buffer_write_ulong8", "test_buffer_write_ulong16" };
+
+
+static const char *struct_kernel_code =
+"typedef struct{\n"
+"int    a;\n"
+"float    b;\n"
+"} TestStruct;\n"
+"__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid].a = src[tid].a;\n"
+"     dst[tid].b = src[tid].b;\n"
+"}\n";
+
+
+
+static int verify_write_int( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    int     *inptr = (int *)ptr1;
+    int     *outptr = (int *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_uint( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_uint *inptr = (cl_uint *)ptr1;
+    cl_uint *outptr = (cl_uint *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_short( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    short   *inptr = (short *)ptr1;
+    short   *outptr = (short *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_ushort( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_ushort   *inptr = (cl_ushort *)ptr1;
+    cl_ushort   *outptr = (cl_ushort *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_char( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    char    *inptr = (char *)ptr1;
+    char    *outptr = (char *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_uchar( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    uchar   *inptr = (uchar *)ptr1;
+    uchar   *outptr = (uchar *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_float( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    float   *inptr = (float *)ptr1;
+    float   *outptr = (float *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_half( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_ushort   *inptr = (cl_ushort *)ptr1;
+    cl_ushort   *outptr = (cl_ushort *)ptr2;
+
+    for ( i = 0; i < n; i++ ){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_long( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_long *inptr = (cl_long *)ptr1;
+    cl_long *outptr = (cl_long *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_ulong( void *ptr1, void *ptr2, int n )
+{
+    int     i;
+    cl_ulong    *inptr = (cl_ulong *)ptr1;
+    cl_ulong    *outptr = (cl_ulong *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_struct( void *ptr1, void *ptr2, int n )
+{
+    int         i;
+    TestStruct  *inptr = (TestStruct *)ptr1;
+    TestStruct  *outptr = (TestStruct *)ptr2;
+
+    for (i=0; i<n; i++){
+        if ( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int test_buffer_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
+                       void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
+{
+    cl_mem      buffers[10];
+    void        *outptr[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    size_t      ptrSizes[5];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i, ii;
+    int         src_flag_id, dst_flag_id;
+    int         total_errors = 0;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
+
+            loops = ( loops < 5 ? loops : 5 );
+            for ( i = 0; i < loops; i++ ){
+                ii = i << 1;
+                if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
+                else
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+
+                if ( ! buffers[ii] || err){
+                    align_free( outptr[i] );
+                    print_error(err, " clCreateBuffer failed\n" );
+                    return -1;
+                }
+                if ( ! strcmp( type, "half" ) ){
+                    outptr[i] = align_malloc( ptrSizes[i] * (num_elements * 2 ), min_alignment);
+                    if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, outptr[i], &err);
+                    else
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, NULL, &err);
+                }
+                else{
+                    outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+                    if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
+                    else
+                        buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+                }
+                if ( err ){
+                    clReleaseMemObject(buffers[ii]);
+                    align_free( outptr[i] );
+                    print_error(err, " clCreateBuffer failed\n" );
+                    return -1;
+                }
+
+                if (gTestMap) {
+                    void *dataPtr;
+                    dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
+                    if (err) {
+                        print_error(err, "clEnqueueMapBuffer failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
+                        align_free( outptr[i] );
+                        return -1;
+                    }
+
+                    memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
+
+                    err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
+                    if (err) {
+                        print_error(err, "clEnqueueUnmapMemObject failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
+                        align_free( outptr[i] );
+                        return -1;
+                    }
+                }
+                else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+                    err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
+                    if ( err != CL_SUCCESS ){
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
+                        align_free( outptr[i] );
+                        print_error( err, " clWriteBuffer failed" );
+                        return -1;
+                    }
+                }
+
+                err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+                if ( err ){
+                    clReleaseMemObject(buffers[ii]);
+                    clReleaseMemObject(buffers[ii+1]);
+                    align_free( outptr[i] );
+                    log_error( " Error creating program for %s\n", type );
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+#endif
+
+                err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+                err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
+                if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    align_free( outptr[i] );
+                    print_error( err, " clSetKernelArg failed" );
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+                if ( err != CL_SUCCESS ){
+                    print_error( err, " clEnqueueNDRangeKernel failed" );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    align_free( outptr[i] );
+                    return -1;
+                }
+
+                if ( ! strcmp( type, "half" ) ){
+                    err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+                }
+                else{
+                    err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+                }
+                if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    align_free( outptr[i] );
+                    print_error( err, " clEnqueueReadBuffer failed" );
+                    return -1;
+                }
+
+                if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
+                    log_error( " %s%d test failed\n", type, 1<<i );
+                    total_errors++;
+                }
+                else{
+                    log_info( " %s%d test passed\n", type, 1<<i );
+                }
+                // cleanup
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+            }
+        } // dst cl_mem_flag
+    } // src cl_mem_flag
+
+    return total_errors;
+
+}   // end test_buffer_write()
+
+
+
+
+int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem      buffers[10];
+    void        *outptr[5];
+    TestStruct  *inptr[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    size_t      ptrSizes[5];
+    size_t      size = sizeof( TestStruct );
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i, ii;
+    cl_uint     j;
+    int         loops = 1;      // no vector for structs
+    int         src_flag_id, dst_flag_id;
+    int         total_errors = 0;
+    MTdata      d = init_genrand( gRandomSeed );
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
+
+            loops = ( loops < 5 ? loops : 5 );
+            for ( i = 0; i < loops; i++ ){
+
+                inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+                for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
+                    inptr[i][j].a = (int)genrand_int32(d);
+                    inptr[i][j].b = get_random_float( -FLT_MAX, FLT_MAX, d );
+                }
+
+                ii = i << 1;
+                if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
+                else
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+                if ( err ){
+                    align_free( outptr[i] );
+                    print_error(err, " clCreateBuffer failed\n" );
+                    free_mtdata(d);
+                    return -1;
+                }
+                outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+                if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
+                else
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+                if ( ! buffers[ii+1] || err){
+                    clReleaseMemObject(buffers[ii]);
+                    align_free( outptr[i] );
+                    print_error(err, " clCreateBuffer failed\n" );
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                if (gTestMap) {
+                    void *dataPtr;
+                    dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
+                    if (err) {
+                        print_error(err, "clEnqueueMapBuffer failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
+                        align_free( outptr[i] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+
+                    memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
+
+                    err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
+                    if (err) {
+                        print_error(err, "clEnqueueUnmapMemObject failed");
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
+                        align_free( outptr[i] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                }
+                else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+                    err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
+                    if ( err != CL_SUCCESS ){
+                        clReleaseMemObject(buffers[ii]);
+                        clReleaseMemObject(buffers[ii+1]);
+                        align_free( outptr[i] );
+                        print_error( err, " clWriteBuffer failed" );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                }
+
+                err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &struct_kernel_code, "read_write_struct" );
+                if ( err ){
+                    clReleaseMemObject(buffers[ii]);
+                    clReleaseMemObject(buffers[ii+1]);
+                    align_free( outptr[i] );
+                    log_error( " Error creating program for struct\n" );
+                    free_mtdata(d);
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+#endif
+
+                err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+                err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
+                if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    align_free( outptr[i] );
+                    print_error( err, " clSetKernelArg failed" );
+                    free_mtdata(d);
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+                if ( err != CL_SUCCESS ){
+                    print_error( err, " clEnqueueNDRangeKernel failed" );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    align_free( outptr[i] );
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+                if ( err != CL_SUCCESS ){
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    align_free( outptr[i] );
+                    print_error( err, " clEnqueueReadBuffer failed" );
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                if ( verify_write_struct( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
+                    log_error( " buffer_WRITE struct%d test failed\n", 1<<i );
+                    total_errors++;
+                }
+                else{
+                    log_info( " buffer_WRITE struct%d test passed\n", 1<<i );
+                }
+                // cleanup
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+                align_free( (void *)inptr[i] );
+            }
+        } // dst cl_mem_flag
+    } // src cl_mem_flag
+
+    free_mtdata(d);
+
+    return total_errors;
+
+}   // end test_buffer_struct_write()
+
+
+int test_buffer_write_array_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
+                                   void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
+{
+    cl_mem      buffers[10];
+    void        *outptr[5];
+    cl_program  program[5];
+    cl_kernel   kernel[5];
+    cl_event    event[2];
+    size_t      ptrSizes[5];
+    size_t      global_work_size[3];
+#ifdef USE_LOCAL_WORK_GROUP
+    size_t      local_work_size[3];
+#endif
+    cl_int      err;
+    int         i, ii;
+    int         src_flag_id, dst_flag_id;
+    int         total_errors = 0;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
+        for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
+            log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
+
+            loops = ( loops < 5 ? loops : 5 );
+            for ( i = 0; i < loops; i++ ){
+                ii = i << 1;
+                if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
+                else
+                    buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+                if ( !buffers[ii] || err){
+                    print_error(err, "clCreateBuffer failed\n" );
+                    return -1;
+                }
+
+                outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+                if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
+                else
+                    buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
+                if ( !buffers[ii+1] || err){
+                    print_error(err, "clCreateBuffer failed\n" );
+                    return -1;
+                }
+
+                err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
+                if ( err != CL_SUCCESS ){
+                    print_error( err, "clEnqueueWriteBuffer failed" );
+                    return -1;
+                }
+
+                err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+                if ( err ){
+                    log_error( " Error creating program for %s\n", type );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    align_free( outptr[i] );
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
+                test_error( err, "Unable to get work group size to use" );
+#endif
+
+                err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
+                err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
+                if ( err != CL_SUCCESS ){
+                    print_error( err, "clSetKernelArg failed" );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    align_free( outptr[i] );
+                    return -1;
+                }
+
+                err = clWaitForEvents(  1, &(event[0]) );
+                if ( err != CL_SUCCESS ){
+                    print_error( err, "clWaitForEvents() failed" );
+                    clReleaseKernel( kernel[i] );
+                    clReleaseProgram( program[i] );
+                    clReleaseMemObject( buffers[ii] );
+                    clReleaseMemObject( buffers[ii+1] );
+                    align_free( outptr[i] );
+                    return -1;
+                }
+
+#ifdef USE_LOCAL_WORK_GROUP
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
+#else
+                err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
+#endif
+                if (err != CL_SUCCESS){
+                    print_error( err, "clEnqueueNDRangeKernel failed" );
+                    return -1;
+                }
+
+                err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
+                if (err != CL_SUCCESS){
+                    print_error( err, "clEnqueueReadBuffer failed" );
+                    return -1;
+                }
+
+                err = clWaitForEvents( 1, &(event[1]) );
+                if ( err != CL_SUCCESS ){
+                    print_error( err, "clWaitForEvents() failed" );
+                }
+
+                if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
+                    log_error( " %s%d test failed\n", type, 1<<i );
+                    total_errors++;
+                }
+                else{
+                    log_info( " %s%d test passed\n", type, 1<<i );
+                }
+
+                // cleanup
+                clReleaseEvent( event[0] );
+                clReleaseEvent( event[1] );
+                clReleaseMemObject( buffers[ii] );
+                clReleaseMemObject( buffers[ii+1] );
+                clReleaseKernel( kernel[i] );
+                clReleaseProgram( program[i] );
+                align_free( outptr[i] );
+            }
+        } // dst cl_mem_flag
+    } // src cl_mem_flag
+
+    return total_errors;
+
+}   // end test_buffer_write_array_async()
+
+
+int test_buffer_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int     *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_int;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (int)genrand_int32(d);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
+                             buffer_write_int_kernel_code, int_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_int_write()
+
+
+int test_buffer_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uint *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_uint;
+
+    ptrSizes[0] = sizeof(cl_uint);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = genrand_int32(d);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
+                             buffer_write_uint_kernel_code, uint_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_uint_write()
+
+
+int test_buffer_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    short   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_short;
+
+    ptrSizes[0] = sizeof(cl_short);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_short)genrand_int32(d);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
+                             buffer_write_short_kernel_code, short_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_short_write()
+
+
+int test_buffer_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ushort *inptr[5];
+    size_t    ptrSizes[5];
+    int       i, err;
+    cl_uint   j;
+    MTdata    d = init_genrand( gRandomSeed );
+    int       (*foo)(void *,void *,int);
+
+    size_t    min_alignment = get_min_alignment(context);
+
+    foo = verify_write_ushort;
+
+    ptrSizes[0] = sizeof(cl_ushort);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ushort)genrand_int32(d);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
+                             buffer_write_ushort_kernel_code, ushort_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_ushort_write()
+
+
+int test_buffer_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    char    *inptr[5];
+    size_t  ptrSizes[5];
+    int     i,  err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_char;
+
+    ptrSizes[0] = sizeof(cl_char);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (char)genrand_int32(d);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
+                             buffer_write_char_kernel_code, char_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_char_write()
+
+
+int test_buffer_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    uchar   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_uchar;
+
+    ptrSizes[0] = sizeof(cl_uchar);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (uchar)genrand_int32(d);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
+                             buffer_write_uchar_kernel_code, uchar_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_uchar_write()
+
+
+int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    float   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_float;
+
+    ptrSizes[0] = sizeof(cl_float);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
+                             buffer_write_float_kernel_code, float_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_float_write()
+
+
+int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    float   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_half;
+
+    ptrSizes[0] = sizeof( cl_float ) / 2;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
+            inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr,
+                             buffer_write_half_kernel_code, half_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_half_write()
+
+
+int test_buffer_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_long *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_long;
+
+    ptrSizes[0] = sizeof(cl_long);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support long
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"cl_long", 5, (void**)inptr,
+                             buffer_write_long_kernel_code, long_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_long_write()
+
+
+int test_buffer_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ulong *inptr[5];
+    size_t   ptrSizes[5];
+    int      i, err;
+    cl_uint  j;
+    MTdata   d = init_genrand( gRandomSeed );
+    int      (*foo)(void *,void *,int);
+
+    size_t   min_alignment = get_min_alignment(context);
+
+    foo = verify_write_ulong;
+
+    ptrSizes[0] = sizeof(cl_ulong);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+    }
+
+    err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong long", 5, (void**)inptr,
+                             buffer_write_ulong_kernel_code, ulong_kernel_name, foo, d );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}   // end test_buffer_ulong_write()
+
+
+int test_buffer_map_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_int(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_uint(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_long(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_ulong(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_short(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_ushort(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_char(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_uchar(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_float(deviceID, context, queue, num_elements);
+}
+
+int test_buffer_map_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    gTestMap = 1;
+    return test_buffer_write_struct(deviceID, context, queue, num_elements);
+}
+
+
+int test_buffer_write_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int     *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_int;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (int)genrand_int32(d);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
+                                         buffer_write_int_kernel_code, int_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_int_write_array_async()
+
+
+int test_buffer_write_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uint *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_uint;
+
+    ptrSizes[0] = sizeof(cl_uint);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_uint)genrand_int32(d);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
+                                         buffer_write_uint_kernel_code, uint_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_uint_write_array_async()
+
+
+int test_buffer_write_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    short   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_short;
+
+    ptrSizes[0] = sizeof(cl_short);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (short *)align_malloc(ptrSizes[i] * num_elements + min_alignment, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (short)genrand_int32(d);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
+                                         buffer_write_short_kernel_code, short_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_short_write_array_async()
+
+
+int test_buffer_write_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ushort *inptr[5];
+    size_t    ptrSizes[5];
+    int       i, err;
+    cl_uint   j;
+    MTdata    d = init_genrand( gRandomSeed );
+    int       (*foo)(void *,void *,int);
+
+    size_t    min_alignment = get_min_alignment(context);
+
+    foo = verify_write_ushort;
+
+    ptrSizes[0] = sizeof(cl_ushort);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ushort)genrand_int32(d);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
+                                         buffer_write_ushort_kernel_code, ushort_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_ushort_write_array_async()
+
+
+int test_buffer_write_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    char    *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_char;
+
+    ptrSizes[0] = sizeof(cl_char);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (char)genrand_int32(d);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
+                                         buffer_write_char_kernel_code, char_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_char_write_array_async()
+
+
+int test_buffer_write_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    uchar   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_uchar;
+
+    ptrSizes[0] = sizeof(cl_uchar);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (uchar)genrand_int32(d);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
+                                         buffer_write_uchar_kernel_code, uchar_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_uchar_write_array_async()
+
+
+int test_buffer_write_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    float   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_float;
+
+    ptrSizes[0] = sizeof(cl_float);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
+                                         buffer_write_float_kernel_code, float_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_float_write_array_async()
+
+
+int test_buffer_write_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_long *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    MTdata  d = init_genrand( gRandomSeed );
+    int     (*foo)(void *,void *,int);
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_write_long;
+
+    ptrSizes[0] = sizeof(cl_long);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = ((cl_long) genrand_int32(d)) ^ ((cl_long) genrand_int32(d) << 32);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"cl_long", 5, (void**)inptr,
+                                         buffer_write_long_kernel_code, long_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_long_write_array_async()
+
+
+int test_buffer_write_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ulong *inptr[5];
+    size_t   ptrSizes[5];
+    int      i, err;
+    cl_uint  j;
+    MTdata   d = init_genrand( gRandomSeed );
+    int      (*foo)(void *,void *,int);
+
+    size_t   min_alignment = get_min_alignment(context);
+
+    foo = verify_write_ulong;
+
+    ptrSizes[0] = sizeof(cl_ulong);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+    }
+
+    err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong long", 5, (void**)inptr,
+                                         buffer_write_ulong_kernel_code, ulong_kernel_name, foo );
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}   // end test_buffer_ulong_write_array_async()
+
diff --git a/test_conformance/buffers/test_image_migrate.c b/test_conformance/buffers/test_image_migrate.c
new file mode 100644
index 00000000..c3c29757
--- /dev/null
+++ b/test_conformance/buffers/test_image_migrate.c
@@ -0,0 +1,487 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#define MAX_SUB_DEVICES        16        // Limit the sub-devices to ensure no out of resource errors.
+#define MEM_OBJ_SIZE          1024
+#define IMAGE_DIM         16
+
+// Kernel source code
+static const char *image_migrate_kernel_code =
+"__kernel void test_image_migrate(write_only image2d_t dst, read_only image2d_t src1,\n"
+"                                 read_only image2d_t src2, sampler_t sampler, uint x)\n"
+"{\n"
+"  int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"  int2 coords = (int2) {tidX, tidY};\n"
+"  uint4 val = read_imageui(src1, sampler, coords) ^\n"
+"              read_imageui(src2, sampler, coords) ^\n"
+"              x;\n"
+"  write_imageui(dst, coords, val);\n"
+"}\n";
+
+enum migrations { MIGRATE_PREFERRED,           // migrate to the preferred sub-device
+                  MIGRATE_NON_PREFERRED,     // migrate to a randomly chosen non-preferred sub-device
+                  MIGRATE_RANDOM,              // migrate to a randomly chosen sub-device with randomly chosen flags
+                  NUMBER_OF_MIGRATIONS };
+
+static cl_mem init_image(cl_command_queue cmd_q, cl_mem image, cl_uint *data)
+{
+  cl_int err;
+
+  size_t origin[3] = {0, 0, 0};
+  size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
+
+  if (image) {
+    if ((err = clEnqueueWriteImage(cmd_q, image, CL_TRUE,
+                                   origin, region, 0, 0, data, 0, NULL, NULL)) != CL_SUCCESS) {
+      print_error(err, "Failed on enqueue write of image data.");
+    }
+  }
+
+  return image;
+}
+
+static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues, cl_mem *mem_objects,
+                               cl_uint num_devices, cl_mem_migration_flags *flags, MTdata d)
+{
+  cl_uint i, j;
+  cl_int  err = CL_SUCCESS;
+
+  for (i=0; i<num_devices; i++) {
+    j = genrand_int32(d) % num_devices;
+    flags[i] = 0;
+    switch (migrate) {
+      case MIGRATE_PREFERRED:
+        // Force the device to be preferred
+        j = i;
+        break;
+      case MIGRATE_NON_PREFERRED:
+        // Coerce the device to be non-preferred
+        if ((j == i) && (num_devices > 1)) j = (j+1) % num_devices;
+        break;
+      case MIGRATE_RANDOM:
+        // Choose a random set of flags
+        flags[i] = (cl_mem_migration_flags)(genrand_int32(d) & (CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED));
+        break;
+    }
+    if ((err = clEnqueueMigrateMemObjects(queues[j], 1, (const cl_mem *)(&mem_objects[i]),
+                                          flags[i], 0, NULL, NULL)) != CL_SUCCESS) {
+      print_error(err, "Failed migrating memory object.");
+    }
+  }
+  return err;
+}
+
+static cl_int restoreImage(cl_command_queue *queues, cl_mem *mem_objects, cl_uint num_devices,
+                           cl_mem_migration_flags *flags, cl_uint *buffer)
+{
+  cl_uint i;
+  cl_int  err;
+
+  const size_t origin[3] = {0, 0, 0};
+  const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
+
+  // If the image was previously migrated with undefined content, reload the content.
+
+  for (i=0; i<num_devices; i++) {
+    if (flags[i] & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) {
+      if ((err = clEnqueueWriteImage(queues[i], mem_objects[i], CL_TRUE,
+                                     origin, region, 0, 0, buffer, 0, NULL, NULL)) != CL_SUCCESS) {
+        print_error(err, "Failed on restoration enqueue write of image data.");
+        return err;
+      }
+    }
+  }
+  return CL_SUCCESS;
+}
+
+// Declaration moved out of protected scope/goto
+cl_sampler_properties properties[] = {
+  CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+  CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP,
+  CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+  0
+};
+
+int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  int failed = 0;
+  cl_uint i, j;
+  cl_int err;
+  cl_uint max_sub_devices = 0;
+  cl_uint num_devices, num_devices_limited;
+  cl_uint A[MEM_OBJ_SIZE], B[MEM_OBJ_SIZE], C[MEM_OBJ_SIZE];
+  cl_uint test_number = 1;
+  cl_device_affinity_domain domain, domains;
+  cl_device_id *devices;
+  cl_command_queue *queues;
+  cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
+  cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
+  cl_mem *imageA, *imageB, *imageC;
+  cl_mem_flags flags;
+  cl_image_format format;
+  cl_sampler sampler = NULL;
+  cl_program program = NULL;
+  cl_kernel kernel = NULL;
+  cl_context ctx = NULL;
+  enum migrations migrateA, migrateB, migrateC;
+  MTdata d = init_genrand(gRandomSeed);
+  const size_t wgs[2] = {IMAGE_DIM, IMAGE_DIM};
+  const size_t wls[2] = {1, 1};
+
+  // Check for image support.
+  if(checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+  // Allocate arrays whose size varies according to the maximum number of sub-devices.
+  if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_sub_devices), &max_sub_devices, NULL)) != CL_SUCCESS) {
+    print_error(err, "clGetDeviceInfo(CL_DEVICE_MAX_COMPUTE_UNITS) failed");
+    return -1;
+  }
+  if (max_sub_devices < 1) {
+    log_error("ERROR: Invalid number of compute units returned.\n");
+    return -1;
+  }
+
+  devices = (cl_device_id *)malloc(max_sub_devices * sizeof(cl_device_id));
+  queues = (cl_command_queue *)malloc(max_sub_devices * sizeof(cl_command_queue));
+  flagsA = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
+  flagsB = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
+  flagsC = (cl_mem_migration_flags *)malloc(max_sub_devices * sizeof(cl_mem_migration_flags));
+  imageA = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
+  imageB = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
+  imageC = (cl_mem *)malloc(max_sub_devices * sizeof(cl_mem));
+
+  if ((devices == NULL) || (queues  == NULL) ||
+      (flagsA  == NULL) || (flagsB  == NULL) || (flagsC == NULL) ||
+      (imageA  == NULL) || (imageB == NULL)  || (imageC == NULL)) {
+    log_error("ERROR: Failed to successfully allocate required local buffers.\n");
+    failed = -1;
+    goto cleanup_allocations;
+  }
+
+  for (i=0; i<max_sub_devices; i++) {
+    devices[i] = NULL;
+    queues [i] = NULL;
+    imageA[i] = imageB[i] = imageC[i] = NULL;
+  }
+
+  for (i=0; i<MEM_OBJ_SIZE; i++) {
+    A[i] = genrand_int32(d);
+    B[i] = genrand_int32(d);
+  }
+
+  // Set image format.
+  format.image_channel_order = CL_RGBA;
+  format.image_channel_data_type = CL_UNSIGNED_INT32;
+
+
+  // Attempt to partition the device along each of the allowed affinity domain.
+  if ((err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(domains), &domains, NULL)) != CL_SUCCESS) {
+    print_error(err, "clGetDeviceInfo(CL_PARTITION_AFFINITY_DOMAIN) failed");
+    return -1;
+  }
+
+  domains &= (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
+              CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE | CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE | CL_DEVICE_AFFINITY_DOMAIN_NUMA);
+
+  do {
+    if (domains) {
+      for (domain = 1; (domain & domains) == 0; domain <<= 1) {};
+      domains &= ~domain;
+    } else {
+      domain = 0;
+    }
+
+    // Determine the number of partitions for the device given the specific domain.
+    if (domain) {
+      property[1] = domain;
+      err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, -1, NULL, &num_devices);
+      if ((err != CL_SUCCESS) || (num_devices == 0)) {
+        print_error(err, "Obtaining the number of partions by affinity failed.");
+        failed = 1;
+        goto cleanup;
+      }
+    } else {
+      num_devices = 1;
+    }
+
+    if (num_devices > 1) {
+      // Create each of the sub-devices and a corresponding context.
+      if ((err = clCreateSubDevices(deviceID, (const cl_device_partition_property *)property, num_devices, devices, &num_devices)) != CL_SUCCESS) {
+        print_error(err, "Failed creating sub devices.");
+        failed = 1;
+        goto cleanup;
+      }
+
+      // Create a context containing all the sub-devices
+      ctx = clCreateContext(NULL, num_devices, devices, notify_callback, NULL, &err);
+      if (ctx == NULL) {
+    print_error(err, "Failed creating context containing the sub-devices.");
+    failed = 1;
+    goto cleanup;
+      }
+
+      // Create a command queue for each sub-device
+      for (i=0; i<num_devices; i++) {
+        if (devices[i]) {
+          if ((queues[i] = clCreateCommandQueueWithProperties(ctx, devices[i], 0, &err)) == NULL) {
+            print_error(err, "Failed creating command queues.");
+            failed = 1;
+            goto cleanup;
+          }
+        }
+      }
+    } else {
+      // No partitioning available. Just exercise the APIs on a single device.
+      devices[0] = deviceID;
+      queues[0] = queue;
+      ctx = context;
+    }
+
+    // Build the kernel program.
+    if (err = create_single_kernel_helper(ctx, &program, &kernel, 1, &image_migrate_kernel_code, "test_image_migrate")) {
+      print_error(err, "Failed creating kernel.");
+      failed = 1;
+      goto cleanup;
+    }
+
+    // Create sampler.
+    sampler = clCreateSamplerWithProperties(ctx, properties, &err );
+    if ((err != CL_SUCCESS) || !sampler) {
+      print_error(err, "Failed to create a sampler.");
+      failed = 1;
+      goto cleanup;
+    }
+
+    num_devices_limited = num_devices;
+
+    // Allocate memory buffers. 3 buffers (2 input, 1 output) for each sub-device.
+    // If we run out of memory, then restrict the number of sub-devices to be tested.
+    for (i=0; i<num_devices; i++) {
+      imageA[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
+                                                        &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), A);
+      imageB[i] = init_image(queues[i], create_image_2d(ctx, (CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR),
+                                                        &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err), B);
+      imageC[i] = create_image_2d(ctx, (CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR),
+                                  &format, IMAGE_DIM, IMAGE_DIM, 0, NULL, &err);
+
+      if ((imageA[i] == NULL) || (imageB[i] == NULL) || (imageC[i] == NULL)) {
+        if (i == 0) {
+          log_error("Failed to allocate even 1 set of buffers.\n");
+          failed = 1;
+          goto cleanup;
+        }
+        num_devices_limited = i;
+        break;
+      }
+    }
+
+    // For each partition, we will execute the test kernel with each of the 3 buffers migrated to one of the migrate options
+    for (migrateA=(enum migrations)(0); migrateA<NUMBER_OF_MIGRATIONS; migrateA = (enum migrations)((int)migrateA + 1)) {
+      if (migrateMemObject(migrateA, queues, imageA, num_devices_limited, flagsA, d) != CL_SUCCESS) {
+        failed = 1;
+        goto cleanup;
+      }
+      for (migrateC=(enum migrations)(0); migrateC<NUMBER_OF_MIGRATIONS; migrateC = (enum migrations)((int)migrateC + 1)) {
+        if (migrateMemObject(migrateC, queues, imageC, num_devices_limited, flagsC, d) != CL_SUCCESS) {
+          failed = 1;
+          goto cleanup;
+        }
+        for (migrateB=(enum migrations)(0); migrateB<NUMBER_OF_MIGRATIONS; migrateB = (enum migrations)((int)migrateB + 1)) {
+          if (migrateMemObject(migrateB, queues, imageB, num_devices_limited, flagsB, d) != CL_SUCCESS) {
+            failed = 1;
+            goto cleanup;
+          }
+          // Run the test on each of the partitions.
+          for (i=0; i<num_devices_limited; i++) {
+            cl_uint x;
+
+            x = i + test_number;
+
+            if ((err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (const void *)&imageC[i])) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 0.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (const void *)&imageA[i])) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 1.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (const void *)&imageB[i])) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 2.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clSetKernelArg(kernel, 3, sizeof(cl_sampler), (const void *)&sampler)) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 3.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clSetKernelArg(kernel, 4, sizeof(cl_uint), (const void *)&x)) != CL_SUCCESS) {
+              print_error(err, "Failed set kernel argument 4.");
+              failed = 1;
+              goto cleanup;
+            }
+
+            if ((err = clEnqueueNDRangeKernel(queues[i], kernel, 2, NULL, wgs, wls, 0, NULL, NULL)) != CL_SUCCESS) {
+              print_error(err, "Failed enqueueing the NDRange kernel.");
+              failed = 1;
+              goto cleanup;
+            }
+          }
+          // Verify the results as long as neither input is an undefined migration
+          const size_t origin[3] = {0, 0, 0};
+          const size_t region[3] = {IMAGE_DIM, IMAGE_DIM, 1};
+
+          for (i=0; i<num_devices_limited; i++, test_number++) {
+            if (((flagsA[i] | flagsB[i]) & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) == 0) {
+              if ((err = clEnqueueReadImage(queues[i], imageC[i], CL_TRUE,
+                                            origin, region, 0, 0, C, 0, NULL, NULL)) != CL_SUCCESS) {
+                print_error(err, "Failed reading output buffer.");
+                failed = 1;
+                goto cleanup;
+              }
+              for (j=0; j<MEM_OBJ_SIZE; j++) {
+                cl_uint expected;
+
+                expected = A[j] ^ B[j] ^ test_number;
+                if (C[j] != expected) {
+                  log_error("Failed on device %d,  work item %4d,  expected 0x%08x got 0x%08x (0x%08x ^ 0x%08x ^ 0x%08x)\n", i, j, expected, C[j], A[j], B[j], test_number);
+                  failed = 1;
+                }
+              }
+              if (failed) goto cleanup;
+            }
+          }
+
+          if (restoreImage(queues, imageB, num_devices_limited, flagsB, B) != CL_SUCCESS) {
+            failed = 1;
+            goto cleanup;
+          }
+        }
+      }
+      if (restoreImage(queues, imageA, num_devices_limited, flagsA, A) != CL_SUCCESS) {
+        failed = 1;
+        goto cleanup;
+      }
+    }
+
+  cleanup:
+    // Clean up all the allocted resources create by the test. This includes sub-devices,
+    // command queues, and memory buffers.
+
+    for (i=0; i<max_sub_devices; i++) {
+      // Memory buffer cleanup
+      if (imageA[i]) {
+        if ((err = clReleaseMemObject(imageA[i])) != CL_SUCCESS) {
+          print_error(err, "Failed releasing memory object.");
+          failed = 1;
+        }
+      }
+      if (imageB[i]) {
+        if ((err = clReleaseMemObject(imageB[i])) != CL_SUCCESS) {
+          print_error(err, "Failed releasing memory object.");
+          failed = 1;
+        }
+      }
+      if (imageC[i]) {
+        if ((err = clReleaseMemObject(imageC[i])) != CL_SUCCESS) {
+          print_error(err, "Failed releasing memory object.");
+          failed = 1;
+        }
+      }
+
+      if (num_devices > 1) {
+        // Command queue cleanup
+        if (queues[i]) {
+          if ((err = clReleaseCommandQueue(queues[i])) != CL_SUCCESS) {
+            print_error(err, "Failed releasing command queue.");
+            failed = 1;
+          }
+        }
+
+        // Sub-device cleanup
+        if (devices[i]) {
+          if ((err = clReleaseDevice(devices[i])) != CL_SUCCESS) {
+            print_error(err, "Failed releasing sub device.");
+            failed = 1;
+          }
+        }
+        devices[i] = 0;
+      }
+    }
+
+    // Sampler cleanup
+    if (sampler) {
+      if ((err = clReleaseSampler(sampler)) != CL_SUCCESS) {
+    print_error(err, "Failed releasing sampler.");
+    failed = 1;
+      }
+      sampler = NULL;
+    }
+
+    // Context, program, and kernel cleanup
+    if (program) {
+      if ((err = clReleaseProgram(program)) != CL_SUCCESS) {
+    print_error(err, "Failed releasing program.");
+    failed = 1;
+      }
+      program = NULL;
+    }
+
+    if (kernel) {
+      if ((err = clReleaseKernel(kernel)) != CL_SUCCESS) {
+    print_error(err, "Failed releasing kernel.");
+    failed = 1;
+      }
+      kernel = NULL;
+    }
+
+    if (ctx && (ctx != context)) {
+      if ((err = clReleaseContext(ctx)) != CL_SUCCESS) {
+    print_error(err, "Failed releasing context.");
+    failed = 1;
+      }
+    }
+    ctx = NULL;
+
+    if (failed) goto cleanup_allocations;
+  } while (domains);
+
+cleanup_allocations:
+  if (devices) free(devices);
+  if (queues)  free(queues);
+  if (flagsA)  free(flagsA);
+  if (flagsB)  free(flagsB);
+  if (flagsC)  free(flagsC);
+  if (imageA)  free(imageA);
+  if (imageB)  free(imageB);
+  if (imageC)  free(imageC);
+
+  return ((failed) ? -1 : 0);
+}
diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp
new file mode 100644
index 00000000..8a8d0269
--- /dev/null
+++ b/test_conformance/buffers/test_sub_buffers.cpp
@@ -0,0 +1,631 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+
+// Design:
+// To test sub buffers, we first create one main buffer. We then create several sub-buffers and
+// queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
+// what results it expects, and so we can test scaling degrees of Actions on scaling numbers of
+// sub-buffers.
+
+class SubBufferWrapper : public clMemWrapper
+{
+public:
+    cl_mem mParentBuffer;
+    size_t mOrigin;
+    size_t mSize;
+
+    cl_int Allocate( cl_mem parent, cl_mem_flags flags, size_t origin, size_t size )
+    {
+        mParentBuffer = parent;
+        mOrigin = origin;
+        mSize = size;
+
+        cl_buffer_region region;
+        region.origin = mOrigin;
+        region.size = mSize;
+
+        cl_int error;
+        mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
+        return error;
+    }
+};
+
+class Action
+{
+public:
+    virtual ~Action() {}
+    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState ) = 0;
+    virtual const char * GetName( void ) const = 0;
+
+    static MTdata d;
+    static MTdata GetRandSeed( void )
+    {
+        if ( d == 0 )
+            d = init_genrand( gRandomSeed );
+        return d;
+    }
+    static void FreeRandSeed() {
+        if ( d != 0 ) {
+            free_mtdata(d);
+            d = 0;
+        }
+    }
+};
+
+MTdata Action::d = 0;
+
+class ReadWriteAction : public Action
+{
+public:
+    virtual ~ReadWriteAction() {}
+    virtual const char * GetName( void ) const { return "ReadWrite";}
+
+    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
+    {
+        cl_char *tempBuffer = (cl_char*)malloc(buffer1.mSize);
+        if (!tempBuffer) {
+            log_error("Out of memory\n");
+            return -1;
+        }
+        cl_int error = clEnqueueReadBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
+        test_error( error, "Unable to enqueue buffer read" );
+
+        size_t start = get_random_size_t( 0, buffer1.mSize / 2, GetRandSeed() );
+        size_t end = get_random_size_t( start, buffer1.mSize, GetRandSeed() );
+
+        for ( size_t i = start; i < end; i++ )
+        {
+            tempBuffer[ i ] |= tag;
+            parentBufferState[ i + buffer1.mOrigin ] |= tag;
+        }
+
+        error = clEnqueueWriteBuffer( queue, buffer1, CL_TRUE, 0, buffer1.mSize, tempBuffer, 0, NULL, NULL );
+        test_error( error, "Unable to enqueue buffer write" );
+        free(tempBuffer);
+        return CL_SUCCESS;
+    }
+};
+
+#ifndef MAX
+#define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
+#endif
+#ifndef MIN
+#define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
+#endif
+
+class CopyAction : public Action
+{
+public:
+    virtual ~CopyAction() {}
+    virtual const char * GetName( void ) const { return "Copy";}
+
+    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
+    {
+        // Copy from sub-buffer 1 to sub-buffer 2
+        size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
+
+        size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
+        size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
+
+        cl_int error = clEnqueueCopyBuffer( queue, buffer1, buffer2, startOffset, endOffset, size, 0, NULL, NULL );
+        test_error( error, "Unable to enqueue buffer copy" );
+
+        memcpy( parentBufferState + buffer2.mOrigin + endOffset, parentBufferState + buffer1.mOrigin + startOffset, size );
+
+        return CL_SUCCESS;
+    }
+};
+
+class MapAction : public Action
+{
+public:
+    virtual ~MapAction() {}
+    virtual const char * GetName( void ) const { return "Map";}
+
+    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
+    {
+        size_t size = get_random_size_t( 0, buffer1.mSize, GetRandSeed() );
+        size_t start = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
+
+        cl_int error;
+        void * mappedPtr = clEnqueueMapBuffer( queue, buffer1, CL_TRUE, (cl_map_flags)( CL_MAP_READ | CL_MAP_WRITE ),
+                                               start, size, 0, NULL, NULL, &error );
+        test_error( error, "Unable to map buffer" );
+
+        cl_char *cPtr = (cl_char *)mappedPtr;
+        for ( size_t i = 0; i < size; i++ )
+        {
+            cPtr[ i ] |= tag;
+            parentBufferState[ i + start + buffer1.mOrigin ] |= tag;
+        }
+
+        error = clEnqueueUnmapMemObject( queue, buffer1, mappedPtr, 0, NULL, NULL );
+        test_error( error, "Unable to unmap buffer" );
+
+        return CL_SUCCESS;
+    }
+};
+
+class KernelReadWriteAction : public Action
+{
+public:
+    virtual ~KernelReadWriteAction() {}
+    virtual const char * GetName( void ) const { return "KernelReadWrite";}
+
+    virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
+    {
+        const char *kernelCode[] = {
+            "__kernel void readTest( __global char *inBuffer, char tag )\n"
+            "{\n"
+            "    int tid = get_global_id(0);\n"
+            "    inBuffer[ tid ] |= tag;\n"
+            "}\n" };
+
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        cl_int error;
+
+        if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
+        {
+            return -1;
+        }
+
+        size_t threads[1] = { buffer1.mSize };
+
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &buffer1 );
+        test_error( error, "Unable to set kernel argument" );
+        error = clSetKernelArg( kernel, 1, sizeof( tag ), &tag );
+        test_error( error, "Unable to set kernel argument" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to queue kernel" );
+
+        for ( size_t i = 0; i < buffer1.mSize; i++ )
+            parentBufferState[ i + buffer1.mOrigin ] |= tag;
+
+        return CL_SUCCESS;
+    }
+};
+
+cl_int get_reasonable_buffer_size( cl_device_id device, size_t &outSize )
+{
+    cl_ulong maxAllocSize;
+    cl_int error;
+
+    // Get the largest possible buffer we could allocate
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get max alloc size" );
+
+    // Don't create a buffer quite that big, just so we have some space left over for other work
+    outSize = (size_t)( maxAllocSize / 5 );
+
+    // Cap at 32M so tests complete in a reasonable amount of time.
+    if ( outSize > 32 << 20 )
+        outSize = 32 << 20;
+
+    return CL_SUCCESS;
+}
+
+size_t find_subbuffer_by_index( SubBufferWrapper * subBuffers, size_t numSubBuffers, size_t index )
+{
+    for ( size_t i = 0; i < numSubBuffers; i++ )
+    {
+        if ( subBuffers[ i ].mOrigin > index )
+            return numSubBuffers;
+        if ( ( subBuffers[ i ].mOrigin <= index ) && ( ( subBuffers[ i ].mOrigin + subBuffers[ i ].mSize ) > index ) )
+            return i;
+    }
+    return numSubBuffers;
+}
+
+// This tests the read/write capabilities of sub buffers (if we are read/write, the sub buffers
+// can't overlap)
+int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queueA, cl_command_queue queueB, size_t mainSize, size_t addressAlign )
+{
+    clMemWrapper mainBuffer;
+    SubBufferWrapper subBuffers[ 8 ];
+    size_t numSubBuffers;
+    cl_int error;
+    size_t i;
+    MTdata m = init_genrand( 22 );
+
+
+    cl_char * mainBufferContents = (cl_char*)calloc(1,mainSize);
+    cl_char * actualResults      = (cl_char*)calloc(1,mainSize);
+
+    for ( i = 0; i < mainSize / 4; i++ )
+        ((cl_uint*) mainBufferContents)[i] = genrand_int32(m);
+
+    free_mtdata( m );
+
+    // Create the main buffer to test against
+    mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, mainSize, mainBufferContents, &error );
+    test_error( error, "Unable to create test main buffer" );
+
+    // Create some sub-buffers to use
+    size_t toStartFrom = 0;
+    for ( numSubBuffers = 0; numSubBuffers < 8; numSubBuffers++ )
+    {
+        size_t endRange = toStartFrom + ( mainSize / 4 );
+        if ( endRange > mainSize )
+            endRange = mainSize;
+
+        size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
+        size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
+        error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
+        test_error( error, "Unable to allocate sub buffer" );
+
+        toStartFrom = offset + size;
+        if ( toStartFrom > ( mainSize - ( addressAlign * 256 ) ) )
+            break;
+    }
+
+    ReadWriteAction rwAction;
+    MapAction mapAction;
+    CopyAction copyAction;
+    KernelReadWriteAction kernelAction;
+
+    Action * actions[] = { &rwAction, &mapAction, &copyAction, &kernelAction };
+    int numErrors = 0;
+
+    // Do the following steps twice, to make sure the parent gets updated *and* we can
+    // still work on the sub-buffers
+    cl_command_queue prev_queue = queueA;
+    for ( int time = 0; time < 2; time++ )
+    {
+        // Randomly apply actions to the set of sub buffers
+        size_t i;
+        for (  i = 0; i < 64; i++ )
+        {
+            int which = random_in_range( 0, 3, Action::GetRandSeed() );
+            int whichQueue = random_in_range( 0, 1, Action::GetRandSeed() );
+            int whichBufferA = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
+            int whichBufferB;
+            do
+            {
+                whichBufferB = random_in_range( 0, (int)numSubBuffers - 1, Action::GetRandSeed() );
+            } while ( whichBufferB == whichBufferA );
+
+            cl_command_queue queue = ( whichQueue == 1 ) ? queueB : queueA;
+            if (queue != prev_queue) {
+                error = clFinish( prev_queue );
+                test_error( error, "Error finishing other queue." );
+
+                prev_queue = queue;
+            }
+
+            error = actions[ which ]->Execute( context, queue, (cl_int)i, subBuffers[ whichBufferA ], subBuffers[ whichBufferB ], mainBufferContents );
+            test_error( error, "Unable to execute action against sub buffers" );
+        }
+
+        error = clFinish( queueA );
+        test_error( error, "Error finishing queueA." );
+
+        error = clFinish( queueB );
+        test_error( error, "Error finishing queueB." );
+
+        // Validate by reading the final contents of the main buffer and
+        // validating against our ref copy we generated
+        error = clEnqueueReadBuffer( queueA, mainBuffer, CL_TRUE, 0, mainSize, actualResults, 0, NULL, NULL );
+        test_error( error, "Unable to enqueue buffer read" );
+
+        for ( i = 0; i < mainSize; i += 65536 )
+        {
+            size_t left = 65536;
+            if ( ( i + left ) > mainSize )
+                left = mainSize - i;
+
+            if ( memcmp( actualResults + i, mainBufferContents + i, left ) == 0 )
+                continue;
+
+            // The fast compare failed, so we need to determine where exactly the failure is
+
+            for ( size_t j = 0; j < left; j++ )
+            {
+                if ( actualResults[ i + j ] != mainBufferContents[ i + j ] )
+                {
+                    // Hit a failure; report the subbuffer at this address as having failed
+                    size_t sbThatFailed = find_subbuffer_by_index( subBuffers, numSubBuffers, i + j );
+                    if ( sbThatFailed == numSubBuffers )
+                    {
+                        log_error( "ERROR: Validation failure outside of a sub-buffer! (Shouldn't be possible, but it happened at index %ld out of %ld...)\n", i + j, mainSize );
+                        // Since this is a nonsensical, don't bother continuing to check
+                        // (we will, however, print our map of sub-buffers for comparison)
+                        for ( size_t k = 0; k < numSubBuffers; k++ )
+                        {
+                            log_error( "\tBuffer %ld: %ld to %ld (length %ld)\n", k, subBuffers[ k ].mOrigin, subBuffers[ k ].mOrigin + subBuffers[ k ].mSize, subBuffers[ k ].mSize );
+                        }
+                        return -1;
+                    }
+                    log_error( "ERROR: Validation failure on sub-buffer %ld (start: %ld, length: %ld)\n", sbThatFailed, subBuffers[ sbThatFailed ].mOrigin, subBuffers[ sbThatFailed ].mSize );
+                    size_t newPos = subBuffers[ sbThatFailed ].mOrigin + subBuffers[ sbThatFailed ].mSize - 1;
+                    i = newPos & ~65535;
+                    j = newPos - i;
+                    numErrors++;
+                }
+            }
+        }
+    }
+
+    free(mainBufferContents);
+    free(actualResults);
+    Action::FreeRandSeed();
+
+    return numErrors;
+}
+
+int test_sub_buffers_read_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int error;
+    size_t mainSize;
+    cl_uint addressAlignBits;
+
+    // Get the size of the main buffer to use
+    error = get_reasonable_buffer_size( deviceID, mainSize );
+    test_error( error, "Unable to get reasonable buffer size" );
+
+    // Determine the alignment of the device so we can make sure sub buffers are valid
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlignBits ), &addressAlignBits, NULL );
+    test_error( error, "Unable to get device's address alignment" );
+
+    size_t addressAlign = addressAlignBits/8;
+
+    return test_sub_buffers_read_write_core( context, queue, queue, mainSize, addressAlign );
+}
+
+// This test performs the same basic operations as sub_buffers_read_write, but instead of a single
+// device, it creates a context and buffer shared between two devices, then executes commands
+// on queues for each device to ensure that everything still operates as expected.
+int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int error;
+
+
+    // First obtain the second device
+    cl_device_id otherDevice = GetOpposingDevice( deviceID );
+    if ( otherDevice == NULL )
+    {
+        log_error( "ERROR: Unable to obtain a second device for sub-buffer dual-device test.\n" );
+        return -1;
+    }
+    if ( otherDevice == deviceID )
+    {
+        log_info( "Note: Unable to run dual-device sub-buffer test (only one device available). Skipping test (implicitly passing).\n" );
+        return 0;
+    }
+
+    // Determine the device id.
+    size_t param_size;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, &param_size );
+    test_error( error, "Error obtaining device name" );
+
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char device_name[param_size];
+#else
+    char* device_name = (char*)_malloca(param_size);
+#endif
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
+    test_error( error, "Error obtaining device name" );
+
+    log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
+
+    // Create a shared context for these two devices
+    cl_device_id devices[ 2 ] = { deviceID, otherDevice };
+    clContextWrapper testingContext = clCreateContext( NULL, 2, devices, NULL, NULL, &error );
+    test_error( error, "Unable to create shared context" );
+
+    // Create two queues (can't use the existing one, because it's on the wrong context)
+    clCommandQueueWrapper queue1 = clCreateCommandQueueWithProperties( testingContext, deviceID, 0, &error );
+    test_error( error, "Unable to create command queue on main device" );
+
+    clCommandQueueWrapper queue2 = clCreateCommandQueueWithProperties( testingContext, otherDevice, 0, &error );
+    test_error( error, "Unable to create command queue on secondary device" );
+
+    // Determine the reasonable buffer size and address alignment that applies to BOTH devices
+    size_t maxBuffer1, maxBuffer2;
+    error = get_reasonable_buffer_size( deviceID, maxBuffer1 );
+    test_error( error, "Unable to get buffer size for main device" );
+
+    error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
+    test_error( error, "Unable to get buffer size for secondary device" );
+    maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
+
+    cl_uint addressAlign1Bits, addressAlign2Bits;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
+    test_error( error, "Unable to get main device's address alignment" );
+
+    error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
+    test_error( error, "Unable to get secondary device's address alignment" );
+
+    cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
+
+    // Finally time to run!
+    return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
+}
+
+cl_int read_buffer_via_kernel( cl_context context, cl_command_queue queue, cl_mem buffer, size_t length, cl_char *outResults )
+{
+    const char *kernelCode[] = {
+        "__kernel void readTest( __global char *inBuffer, __global char *outBuffer )\n"
+        "{\n"
+        "    int tid = get_global_id(0);\n"
+        "    outBuffer[ tid ] = inBuffer[ tid ];\n"
+        "}\n" };
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    cl_int error;
+
+    if ( create_single_kernel_helper( context, &program, &kernel, 1, kernelCode, "readTest" ) )
+    {
+        return -1;
+    }
+
+    size_t threads[1] = { length };
+
+    clMemWrapper outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, length, NULL, &error );
+    test_error( error, "Unable to create output stream" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( buffer ), &buffer );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( outStream ), &outStream );
+    test_error( error, "Unable to set kernel argument" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to queue kernel" );
+
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, length, outResults, 0, NULL, NULL );
+    test_error( error, "Unable to read results from kernel" );
+
+    return CL_SUCCESS;
+}
+
+
+int test_sub_buffers_overlapping( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int error;
+    size_t mainSize;
+    cl_uint addressAlign;
+
+    clMemWrapper mainBuffer;
+    SubBufferWrapper subBuffers[ 16 ];
+
+
+    // Create the main buffer to test against
+    error = get_reasonable_buffer_size( deviceID, mainSize );
+    test_error( error, "Unable to get reasonable buffer size" );
+
+    mainBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, mainSize, NULL, &error );
+    test_error( error, "Unable to create test main buffer" );
+
+    // Determine the alignment of the device so we can make sure sub buffers are valid
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign ), &addressAlign, NULL );
+    test_error( error, "Unable to get device's address alignment" );
+
+    // Create some sub-buffers to use. Note: they don't have to not overlap (we actually *want* them to overlap)
+    for ( size_t i = 0; i < 16; i++ )
+    {
+        size_t offset = get_random_size_t( 0, mainSize / addressAlign, Action::GetRandSeed() ) * addressAlign;
+        size_t size = get_random_size_t( 1, ( mainSize - offset ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
+
+        error = subBuffers[ i ].Allocate( mainBuffer, CL_MEM_READ_ONLY, offset, size );
+        test_error( error, "Unable to allocate sub buffer" );
+    }
+
+    /// For logging, we determine the amount of overlap we just generated
+    // Build a fast in-out map to help with generating the stats
+    int sbMap[ 32 ], mapSize = 0;
+    for ( int i = 0; i < 16; i++ )
+    {
+        int j;
+        for ( j = 0; j < mapSize; j++ )
+        {
+            size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
+                        : subBuffers[ sbMap[ j ] ].mOrigin;
+            if ( subBuffers[ i ].mOrigin < pt )
+            {
+                // Origin is before this part of the map, so move map forward so we can insert
+                memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
+                sbMap[ j ] = i;
+                mapSize++;
+                break;
+            }
+        }
+        if ( j == mapSize )
+        {
+            sbMap[ j ] = i;
+            mapSize++;
+        }
+
+        size_t endPt = subBuffers[ i ].mOrigin + subBuffers[ i ].mSize;
+        for ( j = 0; j < mapSize; j++ )
+        {
+            size_t pt = ( sbMap[ j ] < 0 ) ? ( subBuffers[ -sbMap[ j ] ].mOrigin + subBuffers[ -sbMap[ j ] ].mSize )
+                        : subBuffers[ sbMap[ j ] ].mOrigin;
+            if ( endPt < pt )
+            {
+                // Origin is before this part of the map, so move map forward so we can insert
+                memmove( &sbMap[ j + 1 ], &sbMap[ j ], sizeof( int ) * ( mapSize - j ) );
+                sbMap[ j ] = -( i + 1 );
+                mapSize++;
+                break;
+            }
+        }
+        if ( j == mapSize )
+        {
+            sbMap[ j ] = -( i + 1 );
+            mapSize++;
+        }
+    }
+    long long delta = 0;
+    size_t maxOverlap = 1, overlap = 0;
+    for ( int i = 0; i < 32; i++ )
+    {
+        if ( sbMap[ i ] >= 0 )
+        {
+            overlap++;
+            if ( overlap > 1 )
+                delta -= (long long)( subBuffers[ sbMap[ i ] ].mOrigin );
+            if ( overlap > maxOverlap )
+                maxOverlap = overlap;
+        }
+        else
+        {
+            if ( overlap > 1 )
+                delta += (long long)( subBuffers[ -sbMap[ i ] - 1 ].mOrigin + subBuffers[ -sbMap[ i ] - 1 ].mSize );
+            overlap--;
+        }
+    }
+
+    log_info( "\tTesting %d sub-buffers with %lld overlapping Kbytes (%d%%; as many as %ld buffers overlapping at once)\n",
+              16, ( delta / 1024LL ), (int)( delta * 100LL / (long long)mainSize ), maxOverlap );
+
+    // Write some random contents to the main buffer
+    cl_char * contents = new cl_char[ mainSize ];
+    generate_random_data( kChar, mainSize, Action::GetRandSeed(), contents );
+
+    error = clEnqueueWriteBuffer( queue, mainBuffer, CL_TRUE, 0, mainSize, contents, 0, NULL, NULL );
+    test_error( error, "Unable to write to main buffer" );
+
+    // Now read from each sub-buffer and check to make sure that they make sense w.r.t. the main contents
+    cl_char * tempBuffer = new cl_char[ mainSize ];
+
+    int numErrors = 0;
+    for ( size_t i = 0; i < 16; i++ )
+    {
+        // Read from this buffer
+        int which = random_in_range( 0, 1, Action::GetRandSeed() );
+        if ( which )
+            error = clEnqueueReadBuffer( queue, subBuffers[ i ], CL_TRUE, 0, subBuffers[ i ].mSize, tempBuffer, 0, NULL, NULL );
+        else
+            error = read_buffer_via_kernel( context, queue, subBuffers[ i ], subBuffers[ i ].mSize, tempBuffer );
+        test_error( error, "Unable to read sub buffer contents" );
+
+        if ( memcmp( tempBuffer, contents + subBuffers[ i ].mOrigin, subBuffers[ i ].mSize ) != 0 )
+        {
+            log_error( "ERROR: Validation for sub-buffer %ld failed!\n", i );
+            numErrors++;
+        }
+    }
+
+    delete [] contents;
+    delete [] tempBuffer;
+    Action::FreeRandSeed();
+
+    return numErrors;
+}
+
diff --git a/test_conformance/c11_atomics/CMakeLists.txt b/test_conformance/c11_atomics/CMakeLists.txt
new file mode 100644
index 00000000..96525406
--- /dev/null
+++ b/test_conformance/c11_atomics/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(MODULE_NAME C11_ATOMICS)
+
+set(${MODULE_NAME}_SOURCES
+    common.cpp
+    host_atomics.cpp
+    main.cpp
+    test_atomics.cpp
+    ../../test_common/harness/ThreadPool.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/c11_atomics/Makefile b/test_conformance/c11_atomics/Makefile
new file mode 100644
index 00000000..7bd23b5d
--- /dev/null
+++ b/test_conformance/c11_atomics/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+	test_atomics.cpp \
+	host_atomics.cpp \
+	common.cpp \
+	../../test_common/harness/errorHelpers.c \
+	../../test_common/harness/threadTesting.c \
+	../../test_common/harness/testHarness.c \
+	../../test_common/harness/kernelHelpers.c \
+	../../test_common/harness/typeWrappers.cpp \
+	../../test_common/harness/mt19937.c \
+	../../test_common/harness/conversions.c \
+	../../test_common/harness/ThreadPool.c
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_atomics_c11
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/c11_atomics/common.cpp b/test_conformance/c11_atomics/common.cpp
new file mode 100644
index 00000000..1fb6b9b2
--- /dev/null
+++ b/test_conformance/c11_atomics/common.cpp
@@ -0,0 +1,208 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include "common.h"
+
+const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType)
+{
+  switch (orderType)
+  {
+  case MEMORY_ORDER_EMPTY:
+    return "";
+  case MEMORY_ORDER_RELAXED:
+    return "memory_order_relaxed";
+  case MEMORY_ORDER_ACQUIRE:
+    return "memory_order_acquire";
+  case MEMORY_ORDER_RELEASE:
+    return "memory_order_release";
+  case MEMORY_ORDER_ACQ_REL:
+    return "memory_order_acq_rel";
+  case MEMORY_ORDER_SEQ_CST:
+    return "memory_order_seq_cst";
+  default:
+    return 0;
+  }
+}
+
+const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType)
+{
+  switch (scopeType)
+  {
+  case MEMORY_SCOPE_EMPTY:
+    return "";
+  case MEMORY_SCOPE_WORK_GROUP:
+    return "memory_scope_work_group";
+  case MEMORY_SCOPE_DEVICE:
+    return "memory_scope_device";
+  case MEMORY_SCOPE_ALL_SVM_DEVICES:
+    return "memory_scope_all_svm_devices";
+  default:
+    return 0;
+  }
+}
+
+
+cl_uint AtomicTypeInfo::Size(cl_device_id device)
+{
+  switch(_type)
+  {
+  case TYPE_ATOMIC_INT:
+  case TYPE_ATOMIC_UINT:
+  case TYPE_ATOMIC_FLOAT:
+  case TYPE_ATOMIC_FLAG:
+    return sizeof(cl_int);
+  case TYPE_ATOMIC_LONG:
+  case TYPE_ATOMIC_ULONG:
+  case TYPE_ATOMIC_DOUBLE:
+    return sizeof(cl_long);
+  case TYPE_ATOMIC_INTPTR_T:
+  case TYPE_ATOMIC_UINTPTR_T:
+  case TYPE_ATOMIC_SIZE_T:
+  case TYPE_ATOMIC_PTRDIFF_T:
+    {
+      int error;
+      cl_uint addressBits = 0;
+
+      error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(addressBits), &addressBits, 0);
+      test_error_ret(error, "clGetDeviceInfo", 0);
+
+      return addressBits/8;
+    }
+  default:
+    return 0;
+  }
+}
+
+const char *AtomicTypeInfo::AtomicTypeName()
+{
+  switch(_type)
+  {
+  case TYPE_ATOMIC_INT:
+    return "atomic_int";
+  case TYPE_ATOMIC_UINT:
+    return "atomic_uint";
+  case TYPE_ATOMIC_FLOAT:
+    return "atomic_float";
+  case TYPE_ATOMIC_FLAG:
+    return "atomic_flag";
+  case TYPE_ATOMIC_LONG:
+    return "atomic_long";
+  case TYPE_ATOMIC_ULONG:
+    return "atomic_ulong";
+  case TYPE_ATOMIC_DOUBLE:
+    return "atomic_double";
+  case TYPE_ATOMIC_INTPTR_T:
+    return "atomic_intptr_t";
+  case TYPE_ATOMIC_UINTPTR_T:
+    return "atomic_uintptr_t";
+  case TYPE_ATOMIC_SIZE_T:
+    return "atomic_size_t";
+  case TYPE_ATOMIC_PTRDIFF_T:
+    return "atomic_ptrdiff_t";
+  default:
+    return 0;
+  }
+}
+
+const char *AtomicTypeInfo::RegularTypeName()
+{
+  switch(_type)
+  {
+  case TYPE_ATOMIC_INT:
+    return "int";
+  case TYPE_ATOMIC_UINT:
+    return "uint";
+  case TYPE_ATOMIC_FLOAT:
+    return "float";
+  case TYPE_ATOMIC_FLAG:
+    return "int";
+  case TYPE_ATOMIC_LONG:
+    return "long";
+  case TYPE_ATOMIC_ULONG:
+    return "ulong";
+  case TYPE_ATOMIC_DOUBLE:
+    return "double";
+  case TYPE_ATOMIC_INTPTR_T:
+    return "intptr_t";
+  case TYPE_ATOMIC_UINTPTR_T:
+    return "uintptr_t";
+  case TYPE_ATOMIC_SIZE_T:
+    return "size_t";
+  case TYPE_ATOMIC_PTRDIFF_T:
+    return "ptrdiff_t";
+  default:
+    return 0;
+  }
+}
+
+const char *AtomicTypeInfo::AddSubOperandTypeName()
+{
+  switch(_type)
+  {
+  case TYPE_ATOMIC_INTPTR_T:
+  case TYPE_ATOMIC_UINTPTR_T:
+    return AtomicTypeInfo(TYPE_ATOMIC_PTRDIFF_T).RegularTypeName();
+  default:
+    return RegularTypeName();
+  }
+}
+
+int AtomicTypeInfo::IsSupported(cl_device_id device)
+{
+  switch(_type)
+  {
+  case TYPE_ATOMIC_INT:
+  case TYPE_ATOMIC_UINT:
+  case TYPE_ATOMIC_FLOAT:
+  case TYPE_ATOMIC_FLAG:
+    return 1;
+  case TYPE_ATOMIC_LONG:
+  case TYPE_ATOMIC_ULONG:
+    return is_extension_available(device, "cl_khr_int64_base_atomics") &&
+      is_extension_available(device, "cl_khr_int64_extended_atomics");
+  case TYPE_ATOMIC_DOUBLE:
+    return is_extension_available(device, "cl_khr_int64_base_atomics") &&
+      is_extension_available(device, "cl_khr_int64_extended_atomics") &&
+      is_extension_available(device, "cl_khr_fp64");
+  case TYPE_ATOMIC_INTPTR_T:
+  case TYPE_ATOMIC_UINTPTR_T:
+  case TYPE_ATOMIC_SIZE_T:
+  case TYPE_ATOMIC_PTRDIFF_T:
+    if(Size(device) == 4)
+      return 1;
+    return is_extension_available(device, "cl_khr_int64_base_atomics") &&
+      is_extension_available(device, "cl_khr_int64_extended_atomics");
+  default:
+    return 0;
+  }
+}
+
+template<> cl_int AtomicTypeExtendedInfo<cl_int>::MinValue() {return CL_INT_MIN;}
+template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MinValue() {return 0;}
+template<> cl_long AtomicTypeExtendedInfo<cl_long>::MinValue() {return CL_LONG_MIN;}
+template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MinValue() {return 0;}
+template<> cl_float AtomicTypeExtendedInfo<cl_float>::MinValue() {return CL_FLT_MIN;}
+template<> cl_double AtomicTypeExtendedInfo<cl_double>::MinValue() {return CL_DBL_MIN;}
+
+template<> cl_int AtomicTypeExtendedInfo<cl_int>::MaxValue() {return CL_INT_MAX;}
+template<> cl_uint AtomicTypeExtendedInfo<cl_uint>::MaxValue() {return CL_UINT_MAX;}
+template<> cl_long AtomicTypeExtendedInfo<cl_long>::MaxValue() {return CL_LONG_MAX;}
+template<> cl_ulong AtomicTypeExtendedInfo<cl_ulong>::MaxValue() {return CL_ULONG_MAX;}
+template<> cl_float AtomicTypeExtendedInfo<cl_float>::MaxValue() {return CL_FLT_MAX;}
+template<> cl_double AtomicTypeExtendedInfo<cl_double>::MaxValue() {return CL_DBL_MAX;}
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
new file mode 100644
index 00000000..afb5106f
--- /dev/null
+++ b/test_conformance/c11_atomics/common.h
@@ -0,0 +1,1179 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _COMMON_H_
+#define _COMMON_H_
+
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/ThreadPool.h"
+
+#include "host_atomics.h"
+
+#include <vector>
+#include <sstream>
+
+#define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
+#define MAX_HOST_THREADS GetThreadCount()
+
+#define EXECUTE_TEST(error, test)\
+  error |= test;\
+  if(error && !gContinueOnError)\
+  return error;
+
+enum TExplicitAtomicType
+{
+  TYPE_ATOMIC_INT,
+  TYPE_ATOMIC_UINT,
+  TYPE_ATOMIC_LONG,
+  TYPE_ATOMIC_ULONG,
+  TYPE_ATOMIC_FLOAT,
+  TYPE_ATOMIC_DOUBLE,
+  TYPE_ATOMIC_INTPTR_T,
+  TYPE_ATOMIC_UINTPTR_T,
+  TYPE_ATOMIC_SIZE_T,
+  TYPE_ATOMIC_PTRDIFF_T,
+  TYPE_ATOMIC_FLAG
+};
+
+enum TExplicitMemoryScopeType
+{
+  MEMORY_SCOPE_EMPTY,
+  MEMORY_SCOPE_WORK_GROUP,
+  MEMORY_SCOPE_DEVICE,
+  MEMORY_SCOPE_ALL_SVM_DEVICES
+};
+
+extern bool gHost; // temporary flag for testing native host threads (test verification)
+extern bool gOldAPI; // temporary flag for testing with old API (OpenCL 1.2)
+extern bool gContinueOnError; // execute all cases even when errors detected
+extern bool gNoGlobalVariables; // disable cases with global atomics in program scope
+extern bool gNoGenericAddressSpace; // disable cases with generic address space
+extern bool gUseHostPtr; // use malloc/free instead of clSVMAlloc/clSVMFree
+extern bool gDebug; // print OpenCL kernel code
+extern int gInternalIterations; // internal test iterations for atomic operation, sufficient to verify atomicity
+extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device
+
+extern cl_uint gRandomSeed;
+
+extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType);
+extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType);
+
+class AtomicTypeInfo
+{
+public:
+  TExplicitAtomicType _type;
+  AtomicTypeInfo(TExplicitAtomicType type): _type(type) {}
+  cl_uint Size(cl_device_id device);
+  const char* AtomicTypeName();
+  const char* RegularTypeName();
+  const char* AddSubOperandTypeName();
+  int IsSupported(cl_device_id device);
+};
+
+template<typename HostDataType>
+class AtomicTypeExtendedInfo : public AtomicTypeInfo
+{
+public:
+  AtomicTypeExtendedInfo(TExplicitAtomicType type) : AtomicTypeInfo(type) {}
+  HostDataType MinValue();
+  HostDataType MaxValue();
+  HostDataType SpecialValue(cl_uchar x)
+  {
+    HostDataType tmp;
+    cl_uchar *ptr = (cl_uchar*)&tmp;
+    for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_uchar); i++)
+      ptr[i] = x;
+    return tmp;
+  }
+  HostDataType SpecialValue(cl_ushort x)
+  {
+    HostDataType tmp;
+    cl_ushort *ptr = (cl_ushort*)&tmp;
+    for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_ushort); i++)
+      ptr[i] = x;
+    return tmp;
+  }
+};
+
+class CTest  {
+public:
+  virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) = 0;
+};
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTest : CTest
+{
+public:
+  typedef struct {
+    CBasicTest *test;
+    cl_uint tid;
+    cl_uint threadCount;
+    volatile HostAtomicType *destMemory;
+    HostDataType *oldValues;
+  } THostThreadContext;
+  static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id, void *userInfo)
+  {
+    THostThreadContext *threadContext = ((THostThreadContext*)userInfo)+job_id;
+    threadContext->test->HostFunction(threadContext->tid, threadContext->threadCount, threadContext->destMemory, threadContext->oldValues);
+    return 0;
+  }
+  CBasicTest(TExplicitAtomicType dataType, bool useSVM) : CTest(),
+    _maxDeviceThreads(MAX_DEVICE_THREADS),
+    _dataType(dataType), _useSVM(useSVM), _startValue(255),
+    _localMemory(false), _declaredInProgram(false),
+    _usedInFunction(false), _genericAddrSpace(false),
+    _oldValueCheck(true), _localRefValues(false),
+    _maxGroupSize(0), _passCount(0), _iterations(gInternalIterations)
+  {
+  }
+  virtual ~CBasicTest()
+  {
+    if(_passCount)
+      log_info("  %u tests executed successfully for %s\n", _passCount, DataType().AtomicTypeName());
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return 1;
+  }
+  virtual cl_uint NumNonAtomicVariablesPerThread()
+  {
+    return 1;
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    return false;
+  }
+  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
+  {
+    return false;
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    return false;
+  }
+  virtual std::string PragmaHeader(cl_device_id deviceID);
+  virtual std::string ProgramHeader(cl_uint maxNumDestItems);
+  virtual std::string FunctionCode();
+  virtual std::string KernelCode(cl_uint maxNumDestItems);
+  virtual std::string ProgramCore() = 0;
+  virtual std::string SingleTestName()
+  {
+    std::string testName = LocalMemory() ? "local" : "global";
+    testName += " ";
+    testName += DataType().AtomicTypeName();
+    if(DeclaredInProgram())
+    {
+      testName += " declared in program";
+    }
+    if(DeclaredInProgram() && UsedInFunction())
+      testName += ",";
+    if(UsedInFunction())
+    {
+      testName += " used in ";
+      if(GenericAddrSpace())
+        testName += "generic ";
+      testName += "function";
+    }
+    return testName;
+  }
+  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue);
+  int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    int error = 0;
+    UsedInFunction(false);
+    EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+    UsedInFunction(true);
+    GenericAddrSpace(false);
+    EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+    GenericAddrSpace(true);
+    EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+    GenericAddrSpace(false);
+    return error;
+  }
+  int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    int error = 0;
+    DeclaredInProgram(false);
+    EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue));
+    if(!UseSVM())
+    {
+      DeclaredInProgram(true);
+      EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue));
+    }
+    return error;
+  }
+  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    int error = 0;
+    if(_maxDeviceThreads > 0 && !UseSVM())
+    {
+      LocalMemory(true);
+      EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue));
+    }
+    if(_maxDeviceThreads+MaxHostThreads() > 0)
+    {
+      LocalMemory(false);
+      EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue));
+    }
+    return error;
+  }
+  virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+  {
+    if(sizeof(HostAtomicType) != DataType().Size(deviceID))
+    {
+      log_info("Invalid test: Host atomic type size (%u) is different than OpenCL type size (%u)\n", (cl_uint)sizeof(HostAtomicType), DataType().Size(deviceID));
+      return -1;
+    }
+    if(sizeof(HostAtomicType) != sizeof(HostDataType))
+    {
+      log_info("Invalid test: Host atomic type size (%u) is different than corresponding type size (%u)\n", (cl_uint)sizeof(HostAtomicType), (cl_uint)sizeof(HostDataType));
+      return -1;
+    }
+    // Verify we can run first
+    if(UseSVM() && !gUseHostPtr)
+    {
+      cl_device_svm_capabilities caps;
+      cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, 0);
+      test_error(error, "clGetDeviceInfo failed");
+      if((caps & CL_DEVICE_SVM_ATOMICS) == 0)
+      {
+        log_info("\t%s - SVM_ATOMICS not supported\n", DataType().AtomicTypeName());
+        // implicit pass
+        return 0;
+      }
+    }
+    if(!DataType().IsSupported(deviceID))
+    {
+      log_info("\t%s not supported\n", DataType().AtomicTypeName());
+      // implicit pass or host test (debug feature)
+      if(UseSVM())
+        return 0;
+      _maxDeviceThreads = 0;
+    }
+    if(_maxDeviceThreads+MaxHostThreads() == 0)
+      return 0;
+    return ExecuteForEachParameterSet(deviceID, context, queue);
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    log_info("Empty thread function %u\n", (cl_uint)tid);
+  }
+  AtomicTypeExtendedInfo<HostDataType> DataType() const
+  {
+    return AtomicTypeExtendedInfo<HostDataType>(_dataType);
+  }
+  cl_uint _maxDeviceThreads;
+  virtual cl_uint MaxHostThreads()
+  {
+    if(UseSVM() || gHost)
+      return MAX_HOST_THREADS;
+    else
+      return 0;
+  }
+  virtual bool SVMDataBufferAllSVMConsistent() {return false;}
+  bool UseSVM() {return _useSVM;}
+  void StartValue(HostDataType startValue) {_startValue = startValue;}
+  HostDataType StartValue() {return _startValue;}
+  void LocalMemory(bool local) {_localMemory = local;}
+  bool LocalMemory() {return _localMemory;}
+  void DeclaredInProgram(bool declaredInProgram) {_declaredInProgram = declaredInProgram;}
+  bool DeclaredInProgram() {return _declaredInProgram;}
+  void UsedInFunction(bool local) {_usedInFunction = local;}
+  bool UsedInFunction() {return _usedInFunction;}
+  void GenericAddrSpace(bool genericAddrSpace) {_genericAddrSpace = genericAddrSpace;}
+  bool GenericAddrSpace() {return _genericAddrSpace;}
+  void OldValueCheck(bool check) {_oldValueCheck = check;}
+  bool OldValueCheck() {return _oldValueCheck;}
+  void LocalRefValues(bool localRefValues) {_localRefValues = localRefValues;}
+  bool LocalRefValues() {return _localRefValues;}
+  void MaxGroupSize(cl_uint maxGroupSize) {_maxGroupSize = maxGroupSize;}
+  cl_uint MaxGroupSize() {return _maxGroupSize;}
+  void CurrentGroupSize(cl_uint currentGroupSize)
+  {
+    if(MaxGroupSize() && MaxGroupSize() < currentGroupSize)
+      _currentGroupSize = MaxGroupSize();
+    else
+      _currentGroupSize = currentGroupSize;
+  }
+  cl_uint CurrentGroupSize() {return _currentGroupSize;}
+  virtual cl_uint CurrentGroupNum(cl_uint threadCount)
+  {
+    if(threadCount == 0)
+      return 0;
+    if(LocalMemory())
+      return 1;
+    return threadCount/CurrentGroupSize();
+  }
+  cl_int Iterations() {return _iterations;}
+  std::string IterationsStr() {std::stringstream ss; ss << _iterations; return ss.str();}
+private:
+  const TExplicitAtomicType _dataType;
+  const bool _useSVM;
+  HostDataType	_startValue;
+  bool _localMemory;
+  bool _declaredInProgram;
+  bool _usedInFunction;
+  bool _genericAddrSpace;
+  bool _oldValueCheck;
+  bool _localRefValues;
+  cl_uint _maxGroupSize;
+  cl_uint _currentGroupSize;
+  cl_uint _passCount;
+  const cl_int _iterations;
+};
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestMemOrderScope : public CBasicTest<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTest<HostAtomicType, HostDataType>::LocalMemory;
+  using CBasicTest<HostAtomicType, HostDataType>::MaxGroupSize;
+  CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+  }
+  virtual std::string ProgramHeader(cl_uint maxNumDestItems)
+  {
+    std::string header;
+    if(gOldAPI)
+    {
+      std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s";
+      header +=
+        "#define atomic_store_explicit(x,y,o"+s+")                     atomic_store(x,y)\n"
+        "#define atomic_load_explicit(x,o"+s+")                        atomic_load(x)\n"
+        "#define atomic_exchange_explicit(x,y,o"+s+")                  atomic_exchange(x,y)\n"
+        "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_strong(x,y,z)\n"
+        "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of"+s+")   atomic_compare_exchange_weak(x,y,z)\n"
+        "#define atomic_fetch_add_explicit(x,y,o"+s+")                 atomic_fetch_add(x,y)\n"
+        "#define atomic_fetch_sub_explicit(x,y,o"+s+")                 atomic_fetch_sub(x,y)\n"
+        "#define atomic_fetch_or_explicit(x,y,o"+s+")                  atomic_fetch_or(x,y)\n"
+        "#define atomic_fetch_xor_explicit(x,y,o"+s+")                 atomic_fetch_xor(x,y)\n"
+        "#define atomic_fetch_and_explicit(x,y,o"+s+")                 atomic_fetch_and(x,y)\n"
+        "#define atomic_fetch_min_explicit(x,y,o"+s+")                 atomic_fetch_min(x,y)\n"
+        "#define atomic_fetch_max_explicit(x,y,o"+s+")                 atomic_fetch_max(x,y)\n"
+        "#define atomic_flag_test_and_set_explicit(x,o"+s+")           atomic_flag_test_and_set(x)\n"
+        "#define atomic_flag_clear_explicit(x,o"+s+")                  atomic_flag_clear(x)\n";
+    }
+    return header+CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
+  }
+  virtual std::string SingleTestName()
+  {
+    std::string testName = CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+    if(MemoryOrder() != MEMORY_ORDER_EMPTY)
+    {
+      testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory"));
+    }
+    if(MemoryScope() != MEMORY_SCOPE_EMPTY)
+    {
+      testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
+    }
+    return testName;
+  }
+  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    if(LocalMemory() &&
+      MemoryScope() != MEMORY_SCOPE_EMPTY &&
+      MemoryScope() != MEMORY_SCOPE_WORK_GROUP) //memory scope should only be used for global memory
+      return 0;
+    if(MemoryScope() == MEMORY_SCOPE_DEVICE)
+      MaxGroupSize(16); // increase number of groups by forcing smaller group size
+    else
+      MaxGroupSize(0); // group size limited by device capabilities
+    return CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
+  }
+  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    // repeat test for each reasonable memory order/scope combination
+    std::vector<TExplicitMemoryOrderType> memoryOrder;
+    std::vector<TExplicitMemoryScopeType> memoryScope;
+    int error = 0;
+
+    memoryOrder.push_back(MEMORY_ORDER_EMPTY);
+    memoryOrder.push_back(MEMORY_ORDER_RELAXED);
+    memoryOrder.push_back(MEMORY_ORDER_ACQUIRE);
+    memoryOrder.push_back(MEMORY_ORDER_RELEASE);
+    memoryOrder.push_back(MEMORY_ORDER_ACQ_REL);
+    memoryOrder.push_back(MEMORY_ORDER_SEQ_CST);
+    memoryScope.push_back(MEMORY_SCOPE_EMPTY);
+    memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP);
+    memoryScope.push_back(MEMORY_SCOPE_DEVICE);
+    memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
+
+    for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
+    {
+      for(unsigned si = 0; si < memoryScope.size(); si++)
+      {
+        if(memoryOrder[oi] == MEMORY_ORDER_EMPTY && memoryScope[si] != MEMORY_SCOPE_EMPTY)
+          continue;
+        MemoryOrder(memoryOrder[oi]);
+        MemoryScope(memoryScope[si]);
+        EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
+      }
+    }
+    return error;
+  }
+  void MemoryOrder(TExplicitMemoryOrderType memoryOrder) {_memoryOrder = memoryOrder;}
+  TExplicitMemoryOrderType MemoryOrder() {return _memoryOrder;}
+  std::string MemoryOrderStr()
+  {
+    if(MemoryOrder() != MEMORY_ORDER_EMPTY)
+      return std::string(", ")+get_memory_order_type_name(MemoryOrder());
+    return "";
+  }
+  void MemoryScope(TExplicitMemoryScopeType memoryScope) {_memoryScope = memoryScope;}
+  TExplicitMemoryScopeType MemoryScope() {return _memoryScope;}
+  std::string MemoryScopeStr()
+  {
+    if(MemoryScope() != MEMORY_SCOPE_EMPTY)
+      return std::string(", ")+get_memory_scope_type_name(MemoryScope());
+    return "";
+  }
+  std::string MemoryOrderScopeStr()
+  {
+    return MemoryOrderStr()+MemoryScopeStr();
+  }
+  virtual cl_uint CurrentGroupNum(cl_uint threadCount)
+  {
+    if(MemoryScope() == MEMORY_SCOPE_WORK_GROUP)
+      return 1;
+    return CBasicTest<HostAtomicType, HostDataType>::CurrentGroupNum(threadCount);
+  }
+  virtual cl_uint MaxHostThreads()
+  {
+    // block host threads execution for memory scope different than memory_scope_all_svm_devices
+    if(MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
+      return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
+    else
+      return 0;
+  }
+private:
+  TExplicitMemoryOrderType _memoryOrder;
+  TExplicitMemoryScopeType _memoryScope;
+};
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestMemOrder2Scope : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+  CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+  }
+  virtual std::string SingleTestName()
+  {
+    std::string testName = CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+    if(MemoryOrder() != MEMORY_ORDER_EMPTY)
+      testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory"));
+    if(MemoryOrder2() != MEMORY_ORDER_EMPTY)
+      testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder2())).substr(sizeof("memory"));
+    if(MemoryScope() != MEMORY_SCOPE_EMPTY)
+      testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
+    return testName;
+  }
+  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    // repeat test for each reasonable memory order/scope combination
+    std::vector<TExplicitMemoryOrderType> memoryOrder;
+    std::vector<TExplicitMemoryScopeType> memoryScope;
+    int error = 0;
+
+    memoryOrder.push_back(MEMORY_ORDER_EMPTY);
+    memoryOrder.push_back(MEMORY_ORDER_RELAXED);
+    memoryOrder.push_back(MEMORY_ORDER_ACQUIRE);
+    memoryOrder.push_back(MEMORY_ORDER_RELEASE);
+    memoryOrder.push_back(MEMORY_ORDER_ACQ_REL);
+    memoryOrder.push_back(MEMORY_ORDER_SEQ_CST);
+    memoryScope.push_back(MEMORY_SCOPE_EMPTY);
+    memoryScope.push_back(MEMORY_SCOPE_WORK_GROUP);
+    memoryScope.push_back(MEMORY_SCOPE_DEVICE);
+    memoryScope.push_back(MEMORY_SCOPE_ALL_SVM_DEVICES);
+
+    for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
+    {
+      for(unsigned o2i = 0; o2i < memoryOrder.size(); o2i++)
+      {
+        for(unsigned si = 0; si < memoryScope.size(); si++)
+        {
+          if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
+            && memoryOrder[oi] != memoryOrder[o2i])
+            continue; // both memory order arguments must be set (or none)
+          if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
+            && memoryScope[si] != MEMORY_SCOPE_EMPTY)
+            continue; // memory scope without memory order is not allowed
+          MemoryOrder(memoryOrder[oi]);
+          MemoryOrder2(memoryOrder[o2i]);
+          MemoryScope(memoryScope[si]);
+          EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
+        }
+      }
+    }
+    return error;
+  }
+  void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail) {_memoryOrder2 = memoryOrderFail;}
+  TExplicitMemoryOrderType MemoryOrder2() {return _memoryOrder2;}
+  std::string MemoryOrderFailStr()
+  {
+    if(MemoryOrder2() != MEMORY_ORDER_EMPTY)
+      return std::string(", ")+get_memory_order_type_name(MemoryOrder2());
+    return "";
+  }
+  std::string MemoryOrderScope()
+  {
+    return MemoryOrderStr()+MemoryOrderFailStr()+MemoryScopeStr();
+  }
+private:
+  TExplicitMemoryOrderType _memoryOrder2;
+};
+
+template<typename HostAtomicType, typename HostDataType>
+std::string CBasicTest<HostAtomicType, HostDataType>::PragmaHeader(cl_device_id deviceID)
+{
+  std::string pragma;
+
+  if(gOldAPI)
+  {
+    pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n";
+    pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n";
+    pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n";
+    pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n";
+  }
+  // Create the pragma lines for this kernel
+  if(DataType().Size(deviceID) == 8)
+  {
+    pragma += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
+    pragma += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
+  }
+  if(_dataType == TYPE_ATOMIC_DOUBLE)
+    pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+  return pragma;
+}
+
+template<typename HostAtomicType, typename HostDataType>
+std::string CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
+{
+  // Create the program header
+  std::string header;
+  std::string aTypeName = DataType().AtomicTypeName();
+  std::string cTypeName = DataType().RegularTypeName();
+  std::string argListForKernel;
+  std::string argListForFunction;
+  std::string argListNoTypes;
+  std::string functionPrototype;
+  std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+
+  if(gOldAPI)
+  {
+    header += std::string("#define ")+aTypeName+" "+cTypeName+"\n"
+      "#define atomic_store(x,y)                                (*(x) = y)\n"
+      "#define atomic_load(x)                                   (*(x))\n"
+      "#define ATOMIC_VAR_INIT(x)                               (x)\n"
+      "#define ATOMIC_FLAG_INIT                                 0\n"
+      "#define atomic_init(x,y)                                 atomic_store(x,y)\n";
+    if(aTypeName == "atomic_float")
+      header += "#define atomic_exchange(x,y)                             atomic_xchg(x,y)\n";
+    else if(aTypeName == "atomic_double")
+      header += "double atomic_exchange(volatile "+addressSpace+"atomic_double *x, double y)\n"
+        "{\n"
+        "  long tmp = *(long*)&y, res;\n"
+        "  volatile "+addressSpace+"long *tmpA = (volatile "+addressSpace+"long)x;\n"
+        "  res = atom_xchg(tmpA,tmp);\n"
+        "  return *(double*)&res;\n"
+        "}\n";
+    else
+      header += "#define atomic_exchange(x,y)                             atom_xchg(x,y)\n";
+    if(aTypeName != "atomic_float" && aTypeName != "atomic_double")
+      header +=
+      "bool atomic_compare_exchange_strong(volatile "+addressSpace+" "+aTypeName+" *a, "+cTypeName+" *expected, "+cTypeName+" desired)\n"
+      "{\n"
+      "  "+cTypeName+" old = atom_cmpxchg(a, *expected, desired);\n"
+      "  if(old == *expected)\n"
+      "    return true;\n"
+      "  *expected = old;\n"
+      "  return false;\n"
+      "}\n"
+      "#define atomic_compare_exchange_weak                     atomic_compare_exchange_strong\n";
+    header +=
+      "#define atomic_fetch_add(x,y)                            atom_add(x,y)\n"
+      "#define atomic_fetch_sub(x,y)                            atom_sub(x,y)\n"
+      "#define atomic_fetch_or(x,y)                             atom_or(x,y)\n"
+      "#define atomic_fetch_xor(x,y)                            atom_xor(x,y)\n"
+      "#define atomic_fetch_and(x,y)                            atom_and(x,y)\n"
+      "#define atomic_fetch_min(x,y)                            atom_min(x,y)\n"
+      "#define atomic_fetch_max(x,y)                            atom_max(x,y)\n"
+      "#define atomic_flag_test_and_set(x)                      atomic_exchange(x,1)\n"
+      "#define atomic_flag_clear(x)                             atomic_store(x,0)\n"
+      "\n";
+  }
+  if(!LocalMemory() && DeclaredInProgram())
+  {
+    // additional atomic variable for results copying (last thread will do this)
+    header += "__global volatile atomic_uint finishedThreads = ATOMIC_VAR_INIT(0);\n";
+    // atomic variables declared in program scope - test data
+    std::stringstream ss;
+    ss << maxNumDestItems;
+    header += std::string("__global volatile ")+aTypeName+" destMemory["+ss.str()+"] = {\n";
+    ss.str("");
+    ss << _startValue;
+    for(cl_uint i = 0; i < maxNumDestItems; i++)
+    {
+      if(aTypeName == "atomic_flag")
+        header +=  "  ATOMIC_FLAG_INIT";
+      else
+        header +=  "  ATOMIC_VAR_INIT("+ss.str()+")";
+      if(i+1 < maxNumDestItems)
+        header += ",";
+      header += "\n";
+    }
+    header+=
+      "};\n"
+      "\n";
+  }
+  return header;
+}
+
+template<typename HostAtomicType, typename HostDataType>
+std::string CBasicTest<HostAtomicType, HostDataType>::FunctionCode()
+{
+  if(!UsedInFunction())
+    return "";
+  std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+  std::string code = "void test_atomic_function(uint tid, uint threadCount, uint numDestItems, volatile ";
+  if(!GenericAddrSpace())
+    code += addressSpace;
+  code += std::string(DataType().AtomicTypeName())+" *destMemory, __global "+DataType().RegularTypeName()+
+    " *oldValues";
+  if(LocalRefValues())
+    code += std::string(", __local ")+DataType().RegularTypeName()+" *localValues";
+  code += ")\n"
+    "{\n";
+  code += ProgramCore();
+  code += "}\n"
+    "\n";
+  return code;
+}
+
+template<typename HostAtomicType, typename HostDataType>
+std::string CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
+{
+  std::string aTypeName = DataType().AtomicTypeName();
+  std::string cTypeName = DataType().RegularTypeName();
+  std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+  std::string code = "__kernel void test_atomic_kernel(uint threadCount, uint numDestItems, ";
+
+  // prepare list of arguments for kernel
+  if(LocalMemory())
+  {
+    code += std::string("__global ")+cTypeName+" *finalDest, __global "+cTypeName+" *oldValues,"
+      " volatile "+addressSpace+aTypeName+" *"+(DeclaredInProgram() ? "notUsed" : "")+"destMemory";
+  }
+  else
+  {
+    code += "volatile "+addressSpace+(DeclaredInProgram() ? (cTypeName+" *finalDest") : (aTypeName+" *destMemory"))+
+      ", __global "+cTypeName+" *oldValues";
+  }
+  if(LocalRefValues())
+    code += std::string(", __local ")+cTypeName+" *localValues";
+  code += ")\n"
+    "{\n";
+  if(LocalMemory() && DeclaredInProgram())
+  {
+    // local atomics declared in kernel scope
+    std::stringstream ss;
+    ss << maxNumDestItems;
+    code += std::string("  __local volatile ")+aTypeName+" destMemory["+ss.str()+"];\n";
+  }
+  code += "  uint  tid = get_global_id(0);\n"
+    "\n";
+  if(LocalMemory())
+  {
+    code +=
+      "  // initialize atomics not reachable from host (first thread is doing this, other threads are waiting on barrier)\n"
+      "  if(get_local_id(0) == 0)\n"
+      "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n"
+      "    {\n";
+    if(aTypeName == "atomic_flag")
+    {
+      code +=
+        "      if(finalDest[dstItemIdx])\n"
+        "        atomic_flag_test_and_set(destMemory+dstItemIdx);\n"
+        "      else\n"
+        "        atomic_flag_clear(destMemory+dstItemIdx);\n";
+    }
+    else
+    {
+      code +=
+        "      atomic_store(destMemory+dstItemIdx, finalDest[dstItemIdx]);\n";
+    }
+    code +=
+      "    }\n"
+      "  barrier(CLK_LOCAL_MEM_FENCE);\n"
+      "\n";
+  }
+  if (LocalRefValues())
+  {
+    code +=
+      "  // Copy input reference values into local memory\n";
+    if (NumNonAtomicVariablesPerThread() == 1)
+      code += "  localValues[get_local_id(0)] = oldValues[tid];\n";
+    else
+    {
+      std::stringstream ss;
+      ss << NumNonAtomicVariablesPerThread();
+      code +=
+        "  for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n"
+        "    localValues[get_local_id(0)*" + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n";
+    }
+    code +=
+      "  barrier(CLK_LOCAL_MEM_FENCE);\n"
+      "\n";
+  }
+  if (UsedInFunction())
+    code += std::string("  test_atomic_function(tid, threadCount, numDestItems, destMemory, oldValues")+
+    (LocalRefValues() ? ", localValues" : "")+");\n";
+  else
+    code += ProgramCore();
+  code += "\n";
+  if (LocalRefValues())
+  {
+    code +=
+      "  // Copy local reference values into output array\n"
+      "  barrier(CLK_LOCAL_MEM_FENCE);\n";
+    if (NumNonAtomicVariablesPerThread() == 1)
+      code += "  oldValues[tid] = localValues[get_local_id(0)];\n";
+    else
+    {
+      std::stringstream ss;
+      ss << NumNonAtomicVariablesPerThread();
+      code +=
+        "  for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n"
+        "    oldValues[tid*" + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str() + "+rfId];\n";
+    }
+    code += "\n";
+  }
+  if(LocalMemory() || DeclaredInProgram())
+  {
+    code += "  // Copy final values to host reachable buffer\n";
+    if(LocalMemory())
+      code +=
+        "  barrier(CLK_LOCAL_MEM_FENCE);\n"
+        "  if(get_local_id(0) == 0) // first thread in workgroup\n";
+    else
+      // global atomics declared in program scope
+      code +=
+      "  if(atomic_fetch_add(&finishedThreads, 1) == get_global_size(0)-1)\n"
+      "    // last finished thread\n";
+    code +=
+        "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n";
+    if(aTypeName == "atomic_flag")
+    {
+      code +=
+        "      finalDest[dstItemIdx] = atomic_flag_test_and_set(destMemory+dstItemIdx);\n";
+    }
+    else
+    {
+      code +=
+        "      finalDest[dstItemIdx] = atomic_load(destMemory+dstItemIdx);\n";
+    }
+  }
+  code += "}\n"
+    "\n";
+  return code;
+}
+
+template <typename HostAtomicType, typename HostDataType>
+int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+{
+  int error;
+  clProgramWrapper program;
+  clKernelWrapper kernel;
+  size_t threadNum[1];
+  clMemWrapper streams[2];
+  std::vector<HostAtomicType> destItems;
+  HostAtomicType *svmAtomicBuffer = 0;
+  std::vector<HostDataType> refValues, startRefValues;
+  HostDataType *svmDataBuffer = 0;
+  cl_uint deviceThreadCount, hostThreadCount, threadCount;
+  size_t groupSize = 0;
+  std::string programSource;
+  const char *programLine;
+  MTdata d;
+  size_t typeSize = DataType().Size(deviceID);
+
+  deviceThreadCount = _maxDeviceThreads;
+  hostThreadCount = MaxHostThreads();
+  threadCount = deviceThreadCount+hostThreadCount;
+
+  //log_info("\t%s %s%s...\n", local ? "local" : "global", DataType().AtomicTypeName(), memoryOrderScope.c_str());
+  log_info("\t%s...\n", SingleTestName().c_str());
+
+  if(!LocalMemory() && DeclaredInProgram() && gNoGlobalVariables) // no support for program scope global variables
+  {
+    log_info("\t\tTest disabled\n");
+    return 0;
+  }
+  if(UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace)
+  {
+    log_info("\t\tTest disabled\n");
+    return 0;
+  }
+
+  // set up work sizes based on device capabilities and test configuration
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL);
+  test_error(error, "Unable to obtain max work group size for device");
+  CurrentGroupSize((cl_uint)groupSize);
+  if(CurrentGroupSize() > deviceThreadCount)
+    CurrentGroupSize(deviceThreadCount);
+  if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
+    deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
+  threadCount = deviceThreadCount+hostThreadCount;
+
+  // If we're given a num_results function, we need to determine how many result objects we need.
+  // This is the first assessment for current maximum number of threads (exact thread count is not known here)
+  // - needed for program source code generation (arrays of atomics declared in program)
+  cl_uint numDestItems = NumResults(threadCount, deviceID);
+
+  if(deviceThreadCount > 0)
+  {
+    cl_ulong usedLocalMemory;
+    cl_ulong totalLocalMemory;
+    cl_uint maxWorkGroupSize;
+
+    // Set up the kernel code
+    programSource = PragmaHeader(deviceID)+ProgramHeader(numDestItems)+FunctionCode()+KernelCode(numDestItems);
+    programLine = programSource.c_str();
+    if(create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &programLine, "test_atomic_kernel",
+      gOldAPI ? "" : "-cl-std=CL2.0"))
+    {
+      return -1;
+    }
+    if(gDebug)
+    {
+      log_info("Program source:\n");
+      log_info("%s\n", programLine);
+    }
+    // tune up work sizes based on kernel info
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL);
+    test_error(error, "Unable to obtain max work group size for device and kernel combo");
+
+    if(LocalMemory())
+    {
+      error = clGetKernelWorkGroupInfo (kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(usedLocalMemory), &usedLocalMemory, NULL);
+      test_error(error, "clGetKernelWorkGroupInfo failed");
+
+      error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(totalLocalMemory), &totalLocalMemory, NULL);
+      test_error(error, "clGetDeviceInfo failed");
+
+      // We know that each work-group is going to use typeSize * deviceThreadCount bytes of local memory
+      // so pick the maximum value for deviceThreadCount that uses all the local memory.
+      maxWorkGroupSize = ((totalLocalMemory - usedLocalMemory) / typeSize);
+
+      if(maxWorkGroupSize < groupSize)
+        groupSize = maxWorkGroupSize;
+    }
+
+    CurrentGroupSize((cl_uint)groupSize);
+    if(CurrentGroupSize() > deviceThreadCount)
+      CurrentGroupSize(deviceThreadCount);
+    if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
+      deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
+    threadCount = deviceThreadCount+hostThreadCount;
+  }
+  if(deviceThreadCount > 0)
+    log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, CurrentGroupSize());
+  if(hostThreadCount > 0)
+    log_info("\t\t(host threads %u)\n", hostThreadCount);
+
+  refValues.resize(threadCount*NumNonAtomicVariablesPerThread());
+
+  // Generate ref data if we have a ref generator provided		
+  d = init_genrand(gRandomSeed);
+  startRefValues.resize(threadCount*NumNonAtomicVariablesPerThread());
+  if(GenerateRefs(threadCount, &startRefValues[0], d))
+  {
+    //copy ref values for host threads
+    memcpy(&refValues[0], &startRefValues[0], sizeof(HostDataType)*threadCount*NumNonAtomicVariablesPerThread());
+  }
+  else
+  {
+    startRefValues.resize(0);
+  }
+  free_mtdata(d);
+  d = NULL;
+
+  // If we're given a num_results function, we need to determine how many result objects we need. If
+  // we don't have it, we assume it's just 1
+  // This is final value (exact thread count is known in this place)
+  numDestItems = NumResults(threadCount, deviceID);
+
+  destItems.resize(numDestItems);
+  for(cl_uint i = 0; i < numDestItems; i++)
+    destItems[i] = _startValue;
+
+  // Create main buffer with atomic variables (array size dependent on particular test)
+  if(UseSVM())
+  {
+    if(gUseHostPtr)
+      svmAtomicBuffer = (HostAtomicType*)malloc(typeSize * numDestItems);
+    else
+      svmAtomicBuffer = (HostAtomicType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, typeSize * numDestItems, 0);
+    if(!svmAtomicBuffer)
+    {
+      log_error("ERROR: clSVMAlloc failed!\n");
+      return -1;
+    }
+    memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), typeSize * numDestItems, svmAtomicBuffer, NULL);
+  }
+  else
+  {
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, &destItems[0], NULL);
+  }
+  if (!streams[0])
+  {
+    log_error("ERROR: Creating output array failed!\n");
+    return -1;
+  }
+  // Create buffer for per-thread input/output data
+  if(UseSVM())
+  {
+    if(gUseHostPtr)
+      svmDataBuffer = (HostDataType*)malloc(typeSize*threadCount*NumNonAtomicVariablesPerThread());
+    else
+      svmDataBuffer = (HostDataType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS : 0), typeSize*threadCount*NumNonAtomicVariablesPerThread(), 0);
+    if(!svmDataBuffer)
+    {
+      log_error("ERROR: clSVMAlloc failed!\n");
+      return -1;
+    }
+    if(startRefValues.size())
+      memcpy(svmDataBuffer, &startRefValues[0], typeSize*threadCount*NumNonAtomicVariablesPerThread());
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), typeSize*threadCount*NumNonAtomicVariablesPerThread(), svmDataBuffer, NULL);
+  }
+  else
+  {
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
+      typeSize * threadCount*NumNonAtomicVariablesPerThread(), startRefValues.size() ? &startRefValues[0] : 0, NULL);
+  }
+  if (!streams[1])
+  {
+    log_error("ERROR: Creating reference array failed!\n");
+    return -1;
+  }
+  if(deviceThreadCount > 0)
+  {
+    cl_uint argInd = 0;
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount);
+    test_error(error, "Unable to set kernel argument");
+    error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems), &numDestItems);
+    test_error(error, "Unable to set indexed kernel argument");
+    error = clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    if(LocalMemory())
+    {
+      error = clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL);
+      test_error(error, "Unable to set indexed local kernel argument");
+    }
+    if(LocalRefValues())
+    {
+      error = clSetKernelArg(kernel, argInd++, LocalRefValues() ? typeSize*CurrentGroupSize()*NumNonAtomicVariablesPerThread() : 1, NULL);
+      test_error(error, "Unable to set indexed kernel argument");
+    }
+  }
+  /* Configure host threads */
+  std::vector<THostThreadContext> hostThreadContexts(hostThreadCount);
+  for(unsigned int t = 0; t < hostThreadCount; t++)
+  {
+    hostThreadContexts[t].test = this;
+    hostThreadContexts[t].tid = deviceThreadCount+t;
+    hostThreadContexts[t].threadCount = threadCount;
+    hostThreadContexts[t].destMemory = UseSVM() ? svmAtomicBuffer : &destItems[0];
+    hostThreadContexts[t].oldValues = UseSVM() ? svmDataBuffer : &refValues[0];
+  }
+
+  if(deviceThreadCount > 0)
+  {
+    /* Run the kernel */
+    threadNum[0] = deviceThreadCount;
+    groupSize = CurrentGroupSize();
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, &groupSize, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
+    /* start device threads */
+    error = clFlush(queue);
+    test_error(error, "clFlush failed");
+  }
+
+  /* Start host threads and wait for finish */
+  if(hostThreadCount > 0)
+    ThreadPool_Do(HostThreadFunction, hostThreadCount, &hostThreadContexts[0]);
+
+  if(UseSVM())
+  {
+    error = clFinish(queue);
+    test_error(error, "clFinish failed");
+    memcpy(&destItems[0], svmAtomicBuffer, typeSize*numDestItems);
+    memcpy(&refValues[0], svmDataBuffer, typeSize*threadCount*NumNonAtomicVariablesPerThread());
+  }
+  else
+  {
+    if(deviceThreadCount > 0)
+    {
+      error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL);
+      test_error(error, "Unable to read result value!");
+      error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize * deviceThreadCount*NumNonAtomicVariablesPerThread(), &refValues[0], 0, NULL, NULL);
+      test_error(error, "Unable to read reference values!");
+    }
+  }
+  bool dataVerified = false;
+  // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
+  // have one, it's because we're comparing ref values only
+  for(cl_uint i = 0; i < numDestItems; i++)
+  {
+    HostDataType expected;
+
+    if(!ExpectedValue(expected, threadCount, startRefValues.size() ? &startRefValues[0] : 0, i))
+      break; // no expected value function provided
+
+    if(expected != destItems[i])
+    {
+      std::stringstream logLine;
+      logLine << "ERROR: Result " << i << " from kernel does not validate! (should be " << expected << ", was " << destItems[i] << ")\n";
+      log_error("%s", logLine.str().c_str());
+      for(i = 0; i < threadCount; i++)
+      {
+        logLine.str("");
+        logLine << " --- " << i << " - ";
+        if(startRefValues.size())
+          logLine << startRefValues[i] << " -> " << refValues[i];
+        else
+          logLine << refValues[i];
+        logLine << " --- ";
+        if(i < numDestItems)
+          logLine << destItems[i];
+        logLine << "\n";
+        log_info("%s", logLine.str().c_str());
+      }
+      if(!gDebug)
+      {
+        log_info("Program source:\n");
+        log_info("%s\n", programLine);
+      }
+      return -1;
+    }
+    dataVerified = true;
+  }
+
+  bool dataCorrect = false;
+  /* Use the verify function (if provided) to also check the results */
+  if(VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0]))
+  {
+    if(!dataCorrect)
+    {
+      log_error("ERROR: Reference values did not validate!\n");
+      std::stringstream logLine;
+      for(cl_uint i = 0; i < threadCount; i++)
+      for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++)
+      {
+        logLine.str("");
+        logLine << " --- " << i << " - " << refValues[i*NumNonAtomicVariablesPerThread()+j] << " --- ";
+        if(j == 0 && i < numDestItems)
+          logLine << destItems[i];
+        logLine << "\n";
+        log_info("%s", logLine.str().c_str());
+      }
+      if(!gDebug)
+      {
+        log_info("Program source:\n");
+        log_info("%s\n", programLine);
+      }
+      return -1;
+    }
+  }
+  else if(!dataVerified)
+  {
+    log_error("ERROR: Test doesn't check total or refs; no values are verified!\n");
+    return -1;
+  }
+
+  if(OldValueCheck() &&
+    !(DeclaredInProgram() && !LocalMemory())) // don't test for programs scope global atomics
+                                             // 'old' value has been overwritten by previous clEnqueueNDRangeKernel
+  {
+    /* Re-write the starting value */
+    for(size_t i = 0; i < numDestItems; i++)
+      destItems[i] = _startValue;
+    refValues[0] = 0;
+    if(deviceThreadCount > 0)
+    {
+      error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL);
+      test_error(error, "Unable to write starting values!");
+
+      /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
+      threadNum[0] = 1;
+      error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, threadNum, 0, NULL, NULL);
+      test_error(error, "Unable to execute test kernel");
+
+      error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize, &refValues[0], 0, NULL, NULL);
+      test_error(error, "Unable to read reference values!");
+    }
+    else
+    {
+      /* Start host thread */
+      HostFunction(0, 1, &destItems[0], &refValues[0]);
+    }
+
+    if(refValues[0] != _startValue)//destItems[0])
+    {
+      std::stringstream logLine;
+      logLine << "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
+        " (should have been " << destItems[0] << ", returned " << refValues[0] << ")!\n";
+      log_error("%s", logLine.str().c_str());
+      if(!gDebug)
+      {
+        log_info("Program source:\n");
+        log_info("%s\n", programLine);
+      }
+      return -1;
+    }
+  }
+  if(UseSVM())
+  {
+    // the buffer object must first be released before the SVM buffer is freed
+    error = clReleaseMemObject(streams[0]);
+    streams[0] = 0;
+    test_error(error, "clReleaseMemObject failed");
+    if(gUseHostPtr)
+      free(svmAtomicBuffer);
+    else
+      clSVMFree(context, svmAtomicBuffer);
+    error = clReleaseMemObject(streams[1]);
+    streams[1] = 0;
+    test_error(error, "clReleaseMemObject failed");
+    if(gUseHostPtr)
+      free(svmDataBuffer);
+    else
+      clSVMFree(context, svmDataBuffer);
+  }
+  _passCount++;
+  return 0;
+}
+
+#endif //_COMMON_H_
diff --git a/test_conformance/c11_atomics/host_atomics.cpp b/test_conformance/c11_atomics/host_atomics.cpp
new file mode 100644
index 00000000..008fdd25
--- /dev/null
+++ b/test_conformance/c11_atomics/host_atomics.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "host_atomics.h"
+
+void host_atomic_thread_fence(TExplicitMemoryOrderType order)
+{
+  if(order != MEMORY_ORDER_RELAXED) {
+#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+    MemoryBarrier();
+#elif defined(__GNUC__)
+    __sync_synchronize();
+#else
+    log_info("Host function not implemented: host_atomic_thread_fence\n");
+#endif
+  }
+}
+
+template <>
+HOST_FLOAT host_atomic_exchange(volatile HOST_ATOMIC_FLOAT* a, HOST_FLOAT c, TExplicitMemoryOrderType order)
+{
+  HOST_UINT tmp = host_atomic_exchange((volatile HOST_ATOMIC_UINT*)a, *(HOST_UINT*)&c, order);
+  return *(float*)&tmp;
+}
+template <>
+HOST_DOUBLE host_atomic_exchange(volatile HOST_ATOMIC_DOUBLE* a, HOST_DOUBLE c, TExplicitMemoryOrderType order)
+{
+  HOST_ULONG tmp = host_atomic_exchange((volatile HOST_ATOMIC_ULONG*)a, *(HOST_ULONG*)&c, order);
+  return *(double*)&tmp;
+}
+
+template <>
+HOST_FLOAT host_atomic_load(volatile HOST_ATOMIC_FLOAT* a, TExplicitMemoryOrderType order)
+{
+  HOST_UINT tmp = host_atomic_load<HOST_ATOMIC_UINT, HOST_UINT>((volatile HOST_ATOMIC_UINT*)a, order);
+  return *(float*)&tmp;
+}
+template <>
+HOST_DOUBLE host_atomic_load(volatile HOST_ATOMIC_DOUBLE* a, TExplicitMemoryOrderType order)
+{
+  HOST_ULONG tmp = host_atomic_load<HOST_ATOMIC_ULONG, HOST_ULONG>((volatile HOST_ATOMIC_ULONG*)a, order);
+  return *(double*)&tmp;
+}
+
+bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order)
+{
+  HOST_FLAG old = host_atomic_exchange(a, 1, order);
+  return old != 0;
+}
+
+void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order)
+{
+  host_atomic_store(a, 0, order);
+}
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
new file mode 100644
index 00000000..9a044ded
--- /dev/null
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -0,0 +1,250 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _HOST_ATOMICS_H_
+#define _HOST_ATOMICS_H_
+
+#include "../../test_common/harness/testHarness.h"
+
+#ifdef WIN32
+#include "Windows.h"
+#endif
+
+//flag for test verification (good test should discover non-atomic functions and fail)
+//#define NON_ATOMIC_FUNCTIONS
+
+enum TExplicitMemoryOrderType
+{
+  MEMORY_ORDER_EMPTY,
+  MEMORY_ORDER_RELAXED,
+  MEMORY_ORDER_ACQUIRE,
+  MEMORY_ORDER_RELEASE,
+  MEMORY_ORDER_ACQ_REL,
+  MEMORY_ORDER_SEQ_CST
+};
+
+// host atomic types (applicable for atomic functions supported on host OS)
+#ifdef WIN32
+#define HOST_ATOMIC_INT         unsigned long
+#define HOST_ATOMIC_UINT        unsigned long
+#define HOST_ATOMIC_LONG        unsigned long long
+#define HOST_ATOMIC_ULONG       unsigned long long
+#define HOST_ATOMIC_FLOAT       float
+#define HOST_ATOMIC_DOUBLE      double
+#else
+#define HOST_ATOMIC_INT         cl_int
+#define HOST_ATOMIC_UINT        cl_uint
+#define HOST_ATOMIC_LONG        cl_long
+#define HOST_ATOMIC_ULONG       cl_ulong
+#define HOST_ATOMIC_FLOAT       cl_float
+#define HOST_ATOMIC_DOUBLE      cl_double
+#endif
+
+#define HOST_ATOMIC_INTPTR_T32  HOST_ATOMIC_INT
+#define HOST_ATOMIC_UINTPTR_T32 HOST_ATOMIC_INT
+#define HOST_ATOMIC_SIZE_T32    HOST_ATOMIC_UINT
+#define HOST_ATOMIC_PTRDIFF_T32 HOST_ATOMIC_INT
+
+#define HOST_ATOMIC_INTPTR_T64  HOST_ATOMIC_LONG
+#define HOST_ATOMIC_UINTPTR_T64 HOST_ATOMIC_LONG
+#define HOST_ATOMIC_SIZE_T64    HOST_ATOMIC_ULONG
+#define HOST_ATOMIC_PTRDIFF_T64 HOST_ATOMIC_LONG
+
+#define HOST_ATOMIC_FLAG        HOST_ATOMIC_INT
+
+// host regular types corresponding to atomic types
+#define HOST_INT                cl_int
+#define HOST_UINT               cl_uint
+#define HOST_LONG               cl_long
+#define HOST_ULONG              cl_ulong
+#define HOST_FLOAT              cl_float
+#define HOST_DOUBLE             cl_double
+
+#define HOST_INTPTR_T32         cl_int
+#define HOST_UINTPTR_T32        cl_uint
+#define HOST_SIZE_T32           cl_uint
+#define HOST_PTRDIFF_T32        cl_int
+
+#define HOST_INTPTR_T64         cl_long
+#define HOST_UINTPTR_T64        cl_ulong
+#define HOST_SIZE_T64           cl_ulong
+#define HOST_PTRDIFF_T64        cl_long
+
+#define HOST_FLAG               cl_uint
+
+// host atomic functions
+void host_atomic_thread_fence(TExplicitMemoryOrderType order);
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_fetch_add(volatile AtomicType *a, CorrespondingType c,
+                                        TExplicitMemoryOrderType order)
+{
+#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+  return InterlockedExchangeAdd(a, c);
+#elif defined(__GNUC__)
+  return __sync_fetch_and_add(a, c);
+#else
+  log_info("Host function not implemented: atomic_fetch_add\n");
+  return 0;
+#endif
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_fetch_sub(volatile AtomicType *a, CorrespondingType c,
+                                        TExplicitMemoryOrderType order)
+{
+#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+  return InterlockedExchangeSubtract(a, c);
+#elif defined(__GNUC__)
+  return __sync_fetch_and_sub(a, c);
+#else
+  log_info("Host function not implemented: atomic_fetch_sub\n");
+  return 0;
+#endif
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_exchange(volatile AtomicType *a, CorrespondingType c,
+                                       TExplicitMemoryOrderType order)
+{
+#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+  return InterlockedExchange(a, c);
+#elif defined(__GNUC__)
+  return __sync_lock_test_and_set(a, c);
+#else
+  log_info("Host function not implemented: atomic_exchange\n");
+  return 0;
+#endif
+}
+template <> HOST_FLOAT host_atomic_exchange(volatile HOST_ATOMIC_FLOAT *a, HOST_FLOAT c,
+                                            TExplicitMemoryOrderType order);
+template <> HOST_DOUBLE host_atomic_exchange(volatile HOST_ATOMIC_DOUBLE *a, HOST_DOUBLE c,
+                                             TExplicitMemoryOrderType order);
+
+template <typename AtomicType, typename CorrespondingType>
+bool host_atomic_compare_exchange(volatile AtomicType *a, CorrespondingType *expected, CorrespondingType desired,
+                                  TExplicitMemoryOrderType order_success,
+                                  TExplicitMemoryOrderType order_failure)
+{
+  CorrespondingType tmp;
+#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+  tmp = InterlockedCompareExchange(a, desired, *expected);
+#elif defined(__GNUC__)
+  tmp = __sync_val_compare_and_swap(a, *expected, desired);
+#else
+  log_info("Host function not implemented: atomic_compare_exchange\n");
+  tmp = 0;
+#endif
+  if(tmp == *expected)
+    return true;
+  *expected = tmp;
+  return false;
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_load(volatile AtomicType *a,
+                                   TExplicitMemoryOrderType order)
+{
+#if defined( _MSC_VER ) || (defined( __INTEL_COMPILER ) && defined(WIN32))
+  return InterlockedExchangeAdd(a, 0);
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(a, 0);
+#else
+  log_info("Host function not implemented: atomic_load\n");
+  return 0;
+#endif
+}
+template <> HOST_FLOAT host_atomic_load(volatile HOST_ATOMIC_FLOAT *a,
+                                        TExplicitMemoryOrderType order);
+template <> HOST_DOUBLE host_atomic_load(volatile HOST_ATOMIC_DOUBLE *a,
+                                         TExplicitMemoryOrderType order);
+
+template <typename AtomicType, typename CorrespondingType>
+void host_atomic_store(volatile AtomicType* a, CorrespondingType c,
+                       TExplicitMemoryOrderType order)
+{
+  host_atomic_exchange(a, c, order);
+}
+
+template <typename AtomicType, typename CorrespondingType>
+void host_atomic_init(volatile AtomicType* a, CorrespondingType c)
+{
+  host_atomic_exchange(a, c, MEMORY_ORDER_RELAXED);
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_fetch_or(volatile AtomicType *a, CorrespondingType c,
+                                       TExplicitMemoryOrderType order)
+{
+  CorrespondingType expected = host_atomic_load<AtomicType, CorrespondingType>(a, order);
+  CorrespondingType desired;
+  do
+  desired = expected | c;
+  while(!host_atomic_compare_exchange(a, &expected, desired, order, order));
+  return expected;
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_fetch_and(volatile AtomicType *a, CorrespondingType c,
+                                        TExplicitMemoryOrderType order)
+{
+  CorrespondingType expected = host_atomic_load<AtomicType, CorrespondingType>(a, order);
+  CorrespondingType desired;
+  do
+  desired = expected & c;
+  while(!host_atomic_compare_exchange(a, &expected, desired, order, order));
+  return expected;
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_fetch_xor(volatile AtomicType *a, CorrespondingType c,
+                                        TExplicitMemoryOrderType order)
+{
+  CorrespondingType expected = host_atomic_load<AtomicType, CorrespondingType>(a, order);
+  CorrespondingType desired;
+  do
+  desired = expected ^ c;
+  while(!host_atomic_compare_exchange(a, &expected, desired, order, order));
+  return expected;
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_fetch_min(volatile AtomicType *a, CorrespondingType c,
+                                        TExplicitMemoryOrderType order)
+{
+  CorrespondingType expected = host_atomic_load<AtomicType, CorrespondingType>(a, order);
+  CorrespondingType desired;
+  do
+  desired = expected < c ? expected : c;
+  while(!host_atomic_compare_exchange(a, &expected, desired, order, order));
+  return expected;
+}
+
+template <typename AtomicType, typename CorrespondingType>
+CorrespondingType host_atomic_fetch_max(volatile AtomicType *a, CorrespondingType c,
+                                        TExplicitMemoryOrderType order)
+{
+  CorrespondingType expected = host_atomic_load<AtomicType, CorrespondingType>(a, order);
+  CorrespondingType desired;
+  do
+  desired = expected > c ? expected : c;
+  while(!host_atomic_compare_exchange(a, &expected, desired, order, order));
+  return expected;
+}
+
+bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order);
+void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order);
+
+#endif //_HOST_ATOMICS_H_
diff --git a/test_conformance/c11_atomics/main.cpp b/test_conformance/c11_atomics/main.cpp
new file mode 100644
index 00000000..f40c45e6
--- /dev/null
+++ b/test_conformance/c11_atomics/main.cpp
@@ -0,0 +1,230 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include <iostream>
+#include <string>
+
+bool gHost = false; // flag for testing native host threads (test verification)
+bool gOldAPI = false; // flag for testing with old API (OpenCL 1.2) - test verification
+bool gContinueOnError = false; // execute all cases even when errors detected
+bool gNoGlobalVariables = false; // disable cases with global atomics in program scope
+bool gNoGenericAddressSpace = false; // disable cases with generic address space
+bool gUseHostPtr = false; // use malloc/free with CL_MEM_USE_HOST_PTR instead of clSVMAlloc/clSVMFree
+bool gDebug = false; // always print OpenCL kernel code
+int gInternalIterations = 10000; // internal test iterations for atomic operation, sufficient to verify atomicity
+int gMaxDeviceThreads = 1024; // maximum number of threads executed on OCL device
+
+extern int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_store_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_atomic_init_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_store_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_load_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_store_load_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_exchange_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_compare_exchange_weak_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_compare_exchange_strong_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_add_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_sub_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_and_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_or_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_orand_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_xor_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_xor2_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_min_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fetch_max_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_flag_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_fence_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+basefn    basefn_list[] = {
+  test_atomic_init,
+  test_atomic_store,
+  test_atomic_load,
+  test_atomic_exchange,
+  test_atomic_compare_exchange_weak,
+  test_atomic_compare_exchange_strong,
+  test_atomic_fetch_add,
+  test_atomic_fetch_sub,
+  test_atomic_fetch_and,
+  test_atomic_fetch_or,
+  test_atomic_fetch_orand,
+  test_atomic_fetch_xor,
+  test_atomic_fetch_xor2,
+  test_atomic_fetch_min,
+  test_atomic_fetch_max,
+  test_atomic_flag,
+  test_atomic_fence,
+
+  test_atomic_init_svm,
+  test_atomic_store_svm,
+  test_atomic_load_svm,
+  test_atomic_exchange_svm,
+  test_atomic_compare_exchange_weak_svm,
+  test_atomic_compare_exchange_strong_svm,
+  test_atomic_fetch_add_svm,
+  test_atomic_fetch_sub_svm,
+  test_atomic_fetch_and_svm,
+  test_atomic_fetch_or_svm,
+  test_atomic_fetch_orand_svm,
+  test_atomic_fetch_xor_svm,
+  test_atomic_fetch_xor2_svm,
+  test_atomic_fetch_min_svm,
+  test_atomic_fetch_max_svm,
+  test_atomic_flag_svm,
+  test_atomic_fence_svm
+};
+
+const char    *basefn_names[] = {
+  "atomic_init",
+  "atomic_store",
+  "atomic_load",
+  "atomic_exchange",
+  "atomic_compare_exchange_weak",
+  "atomic_compare_exchange_strong",
+  "atomic_fetch_add",
+  "atomic_fetch_sub",
+  "atomic_fetch_and",
+  "atomic_fetch_or",
+  "atomic_fetch_orand",
+  "atomic_fetch_xor",
+  "atomic_fetch_xor2",
+  "atomic_fetch_min",
+  "atomic_fetch_max",
+  "atomic_flag",
+  "atomic_fence",
+
+  "svm_atomic_init",
+  "svm_atomic_store",
+  "svm_atomic_load",
+  "svm_atomic_exchange",
+  "svm_atomic_compare_exchange_weak",
+  "svm_atomic_compare_exchange_strong",
+  "svm_atomic_fetch_add",
+  "svm_atomic_fetch_sub",
+  "svm_atomic_fetch_and",
+  "svm_atomic_fetch_or",
+  "svm_atomic_fetch_orand",
+  "svm_atomic_fetch_xor",
+  "svm_atomic_fetch_xor2",
+  "svm_atomic_fetch_min",
+  "svm_atomic_fetch_max",
+  "svm_atomic_flag",
+  "svm_atomic_fence",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+  bool noCert = false;
+  while(true)
+  {
+    if(std::string(argv[argc-1]) == "-h")
+    {
+      log_info("Test options:\n");
+      log_info("  '-host'                    flag for testing native host threads (test verification)\n");
+      log_info("  '-oldAPI'                  flag for testing with old API (OpenCL 1.2) - test verification\n");
+      log_info("  '-continueOnError'         execute all cases even when errors detected\n");
+      log_info("  '-noGlobalVariables'       disable cases with global atomics in program scope\n");
+      log_info("  '-noGenericAddressSpace'   disable cases with generic address space\n");
+      log_info("  '-useHostPtr'              use malloc/free with CL_MEM_USE_HOST_PTR instead of clSVMAlloc/clSVMFree\n");
+      log_info("  '-debug'                   always print OpenCL kernel code\n");
+      log_info("  '-internalIterations <X>'  internal test iterations for atomic operation, sufficient to verify atomicity\n");
+      log_info("  '-maxDeviceThreads <X>'    maximum number of threads executed on OCL device");
+
+      break;
+    }
+    if(std::string(argv[argc-1]) == "-host") // temporary option for testing native host threads
+    {
+      gHost = true;
+      noCert = true;
+    }
+    else if(std::string(argv[argc-1]) == "-oldAPI") // temporary flag for testing with old API (OpenCL 1.2)
+    {
+      gOldAPI = true;
+      gNoGlobalVariables = true;
+      gNoGenericAddressSpace = true;
+      gUseHostPtr = true;
+      noCert = true;
+    }
+    else if(std::string(argv[argc-1]) == "-continueOnError") // execute all cases even when errors detected
+      gContinueOnError = true;
+    else if(std::string(argv[argc-1]) == "-noGlobalVariables") // disable cases with global atomics in program scope
+    {
+      gNoGlobalVariables = true;
+      noCert = true;
+    }
+    else if(std::string(argv[argc-1]) == "-noGenericAddressSpace") // disable cases with generic address space
+    {
+      gNoGenericAddressSpace = true;
+      noCert = true;
+    }
+    else if(std::string(argv[argc-1]) == "-useHostPtr") // use malloc/free with CL_MEM_USE_HOST_PTR instead of clSVMAlloc/clSVMFree
+    {
+      gUseHostPtr = true;
+      noCert = true;
+    }
+    else if(std::string(argv[argc-1]) == "-debug") // print OpenCL kernel code
+      gDebug = true;
+    else if(argc > 2 && std::string(argv[argc-2]) == "-internalIterations") // internal test iterations for atomic operation, sufficient to verify atomicity
+    {
+      gInternalIterations = atoi(argv[argc-1]);
+      if(gInternalIterations < 1)
+      {
+        log_info("Invalid value: Number of internal iterations (%d) must be > 0\n", gInternalIterations);
+        return -1;
+      }
+      argc--;
+      noCert = true;
+    }
+    else if(argc > 2 && std::string(argv[argc-2]) == "-maxDeviceThreads") // maximum number of threads executed on OCL device
+    {
+      gMaxDeviceThreads = atoi(argv[argc-1]);
+      argc--;
+      noCert = true;
+    }
+    else
+      break;
+    argc--;
+  }
+  if(noCert)
+  {
+    log_info("\n" );
+    log_info("***                                                                        ***\n");
+    log_info("*** WARNING: Test execution in debug mode (forced by command-line option)! ***\n");
+    log_info("*** Use of this mode is not sufficient to verify correctness.              ***\n");
+    log_info("***                                                                        ***\n");
+  }
+  return runTestHarness(argc, argv, num_fns, basefn_list, basefn_names, false, false, 0);
+}
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
new file mode 100644
index 00000000..542e893f
--- /dev/null
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -0,0 +1,2142 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include "common.h"
+#include "host_atomics.h"
+
+#include <sstream>
+#include <vector>
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestStore : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    OldValueCheck(false);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return threadCount;
+  }
+  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
+      MemoryOrder() == MEMORY_ORDER_ACQ_REL)
+      return 0; //skip test - not applicable
+    return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = (HostDataType)whichDestValue;
+    return true;
+  }
+};
+
+int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
+  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
+  CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
+  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_store_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_store_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_store_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
+  CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    OldValueCheck(false);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return threadCount;
+  }
+  virtual std::string ProgramCore()
+  {
+    return
+      "  atomic_init(&destMemory[tid], tid);\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    host_atomic_init(&destMemory[tid], (HostDataType)tid);
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = (HostDataType)whichDestValue;
+    return true;
+  }
+};
+
+int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
+  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
+  CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
+  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_init_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_init_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_init_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestLoad : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    OldValueCheck(false);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return threadCount;
+  }
+  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
+      MemoryOrder() == MEMORY_ORDER_ACQ_REL)
+      return 0; //skip test - not applicable
+    return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  atomic_store(&destMemory[tid], tid);\n"
+      "  oldValues[tid] = atomic_load"+postfix+"(&destMemory[tid]"+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST);
+    oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[tid], MemoryOrder());
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = (HostDataType)whichDestValue;
+    return true;
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    correct = true;
+    for(cl_uint i = 0; i < threadCount; i++ )
+    {
+      if(refValues[i] != (HostDataType)i)
+      {
+        log_error("Invalid value for thread %u\n", (cl_uint)i);
+        correct = false;
+        return true;
+      }
+    }
+    return true;
+  }
+};
+
+int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
+  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
+  CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
+  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_load_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_load_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_load_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestExchange : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+  CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(123456);
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n"
+      "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
+      "    oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n";
+  }
+
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder());
+    for(int i = 0; i < Iterations(); i++)
+      oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder());
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only
+    correct = true;
+    /* We are expecting values from 0 to size-1 and initial value from atomic variable */
+    /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
+    /* Any repeated value is treated as an error */
+    std::vector<bool> tidFound(threadCount);
+    bool startValueFound = false;
+    cl_uint i;
+
+    for(i = 0; i <= threadCount; i++)
+    {
+      cl_uint value;
+      if(i == threadCount)
+        value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
+      else
+        value = (cl_uint)refValues[i];
+      if(value == (cl_uint)StartValue())
+      {
+        // Special initial value
+        if(startValueFound)
+        {
+          log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
+          correct = false;
+          return true;
+        }
+        startValueFound = true;
+        continue;
+      }
+      if(value >= threadCount)
+      {
+        log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
+        correct = false;
+        return true;
+      }
+      if(tidFound[value])
+      {
+        log_error("ERROR: Value (%u) occurred more thane once\n", value);
+        correct = false;
+        return true;
+      }
+      tidFound[value] = true;
+    }
+    return true;
+  }
+};
+
+int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
+  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
+  CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
+  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_exchange_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
+  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
+  CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(123456);
+    OldValueCheck(false);
+  }
+  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    if(MemoryOrder2() == MEMORY_ORDER_RELEASE ||
+      MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
+      return 0; // not allowed as 'failure' argument
+    if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
+      (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
+      return 0; // failure argument shall be no stronger than the success
+    return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScope();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      std::string("  ")+DataType().RegularTypeName()+" expected, previous;\n"
+      "  int successCount = 0;\n"
+      "  oldValues[tid] = tid;\n"
+      "  expected = tid;  // force failure at the beginning\n"
+      "  if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
+      "    oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
+      "  else\n"
+      "  {\n"
+      "    for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
+      "    {\n"
+      "      previous = expected;\n"
+      "      if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
+      "      {\n"
+      "        oldValues[tid] = expected;\n"
+      "        successCount++;\n"
+      "      }\n"
+      "      else\n"
+      "      {\n"
+      "        if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n"
+      "        {\n"
+      "          oldValues[tid] = threadCount; //mark fail with invalid value\n"
+      "          break;\n"
+      "        }\n"
+      "      }\n"
+      "    }\n"
+      "  }\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    HostDataType expected = (HostDataType)StartValue(), previous;
+    oldValues[tid] = (HostDataType)tid;
+    for(int i = 0; i < Iterations(); i++)
+    {
+      previous = expected;
+      if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2()))
+        oldValues[tid] = expected;
+      else
+      {
+        if(previous == expected) // shouldn't occur for 'strong'
+        {
+          oldValues[tid] = threadCount; //mark fail with invalid value
+        }
+      }
+    }
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    correct = true;
+    /* We are expecting values from 0 to size-1 and initial value from atomic variable */
+    /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
+    /* Any repeated value is treated as an error */
+    std::vector<bool> tidFound(threadCount);
+    bool startValueFound = false;
+    cl_uint i;
+
+    for(i = 0; i <= threadCount; i++)
+    {
+      cl_uint value;
+      if(i == threadCount)
+        value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
+      else
+        value = (cl_uint)refValues[i];
+      if(value == (cl_uint)StartValue())
+      {
+        // Special initial value
+        if(startValueFound)
+        {
+          log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
+          correct = false;
+          return true;
+        }
+        startValueFound = true;
+        continue;
+      }
+      if(value >= threadCount)
+      {
+        if(value == threadCount)
+          log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n");
+        log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
+        correct = false;
+        return true;
+      }
+      if(tidFound[value])
+      {
+        log_error("ERROR: Value (%u) occurred more thane once\n", value);
+        correct = false;
+        return true;
+      }
+      tidFound[value] = true;
+    }
+    return true;
+  }
+};
+
+int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_compare_exchange_strong_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestCompareWeak : public CBasicTestCompareStrong<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::MemoryOrderScope;
+  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
+  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
+  CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScope();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      std::string("  ")+DataType().RegularTypeName()+" expected , previous;\n"
+      "  int successCount = 0;\n"
+      "  oldValues[tid] = tid;\n"
+      "  expected = tid;  // force failure at the beginning\n"
+      "  if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
+      "    oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
+      "  else\n"
+      "  {\n"
+      "    for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
+      "    {\n"
+      "      previous = expected;\n"
+      "      if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
+      "      {\n"
+      "        oldValues[tid] = expected;\n"
+      "        successCount++;\n"
+      "      }\n"
+      "    }\n"
+      "  }\n";
+  }
+};
+
+int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_compare_exchange_weak_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAdd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], tid + 3"+memoryOrderScope+");\n"+
+      "  atomic_fetch_add"+postfix+"(&destMemory[0], tid + 3"+memoryOrderScope+");\n"
+      "  atomic_fetch_add"+postfix+"(&destMemory[0], tid + 3"+memoryOrderScope+");\n"
+      "  atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
+    host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
+    host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
+    host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder());
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = StartValue();
+    for(cl_uint i = 0; i < threadCount; i++)
+      expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
+    return true;
+  }
+};
+
+int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_add_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchSub : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder());
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = StartValue();
+    for(cl_uint i = 0; i < threadCount; i++)
+      expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
+    return true;
+  }
+};
+
+int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_sub_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchOr : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(0);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    cl_uint numBits = DataType().Size(deviceID) * 8;
+
+    return (threadCount + numBits - 1) / numBits;
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      std::string("    size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
+      "    int whichResult = tid / numBits;\n"
+      "    int bitIndex = tid - (whichResult * numBits);\n"
+      "\n"
+      "    oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    size_t numBits = sizeof(HostDataType) * 8;
+    size_t whichResult = tid / numBits;
+    size_t bitIndex = tid - (whichResult * numBits);
+
+    oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder());
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
+    if(whichDestValue < numValues - 1)
+    {
+      expected = ~(HostDataType)0;
+      return true;
+    }
+    // Last item doesn't get or'ed on every bit, so we have to mask away
+    cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
+    expected = StartValue();
+    for(cl_uint i = 0; i < numBits; i++)
+      expected |= ((HostDataType)1 << i);
+    return true;
+  }
+};
+
+int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_or_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchXor : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue((HostDataType)0x2f08ab418ba0541LL);
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      std::string("  int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
+      "  int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
+      "\n"
+      "  oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    int numBits = sizeof(HostDataType) * 8;
+    int bitIndex = (numBits-1)*(tid+1)/threadCount;
+
+    oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    int numBits = sizeof(HostDataType)*8;
+    expected = StartValue();
+    for(cl_uint i = 0; i < threadCount; i++)
+    {
+      int bitIndex = (numBits-1)*(i+1)/threadCount;
+      expected ^= ((HostDataType)1 << bitIndex);
+    }
+    return true;
+  }
+};
+
+int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_xor_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(~(HostDataType)0);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    cl_uint numBits = DataType().Size(deviceID) * 8;
+
+    return (threadCount + numBits - 1) / numBits;
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      std::string("  size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
+      "  int whichResult = tid / numBits;\n"
+      "  int bitIndex = tid - (whichResult * numBits);\n"
+      "\n"
+      "  oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    size_t numBits = sizeof(HostDataType) * 8;
+    size_t whichResult = tid / numBits;
+    size_t bitIndex = tid - (whichResult * numBits);
+
+    oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder());
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
+    if(whichDestValue < numValues - 1)
+    {
+      expected = 0;
+      return true;
+    }
+    // Last item doesn't get and'ed on every bit, so we have to mask away
+    size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
+    expected = StartValue();
+    for(size_t i = 0; i < numBits; i++)
+      expected &= ~((HostDataType)1 << i);
+    return true;
+  }
+};
+
+int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_and_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+  CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(0);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
+  }
+  // each thread modifies (with OR and AND operations) and verifies
+  // only one bit in atomic variable
+  // other bits are modified by other threads but it must not affect current thread operation
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      std::string("  int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
+      "  size_t valueInd = tid/bits;\n"
+      "  "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
+      "  oldValues[tid] = 0;\n"
+      "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
+      "  {\n"
+      "    value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
+      "    if(value & bitMask) // bit should be set to 0\n"
+      "      oldValues[tid]++;\n"
+      "    value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n"
+      "    if(!(value & bitMask)) // bit should be set to 1\n"
+      "      oldValues[tid]++;\n"
+      "  }\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    int bits = sizeof(HostDataType)*8;
+    size_t valueInd = tid/bits;
+    HostDataType value, bitMask = (HostDataType)1 << tid%bits;
+    oldValues[tid] = 0;
+    for(int i = 0; i < Iterations(); i++)
+    {
+      value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder());
+      if(value & bitMask) // bit should be set to 0
+        oldValues[tid]++;
+      value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder());
+      if(!(value & bitMask)) // bit should be set to 1
+        oldValues[tid]++;
+    }
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = 0;
+    return true;
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    correct = true;
+    for(cl_uint i = 0; i < threadCount; i++)
+    {
+      if(refValues[i] > 0)
+      {
+        log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]);
+        correct = false;
+      }
+    }
+    return true;
+  }
+};
+
+int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_orand_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchXor2 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+  CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(0);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
+  }
+  // each thread modifies (with XOR operation) and verifies
+  // only one bit in atomic variable
+  // other bits are modified by other threads but it must not affect current thread operation
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      std::string("  int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
+      "  size_t valueInd = tid/bits;\n"
+      "  "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
+      "  oldValues[tid] = 0;\n"
+      "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
+      "  {\n"
+      "    value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
+      "    if(value & bitMask) // bit should be set to 0\n"
+      "      oldValues[tid]++;\n"
+      "    value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
+      "    if(!(value & bitMask)) // bit should be set to 1\n"
+      "      oldValues[tid]++;\n"
+      "  }\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    int bits = sizeof(HostDataType)*8;
+    size_t valueInd = tid/bits;
+    HostDataType value, bitMask = (HostDataType)1 << tid%bits;
+    oldValues[tid] = 0;
+    for(int i = 0; i < Iterations(); i++)
+    {
+      value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
+      if(value & bitMask) // bit should be set to 0
+        oldValues[tid]++;
+      value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
+      if(!(value & bitMask)) // bit should be set to 1
+        oldValues[tid]++;
+    }
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = 0;
+    return true;
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    correct = true;
+    for(cl_uint i = 0; i < threadCount; i++)
+    {
+      if(refValues[i] > 0)
+      {
+        log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]);
+        correct = false;
+      }
+    }
+    return true;
+  }
+};
+
+int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_xor2_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMin : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(DataType().MaxValue());
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder());
+  }
+  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
+  {
+    for(cl_uint i = 0; i < threadCount; i++)
+    {
+      startRefValues[i] = genrand_int32(d);
+      if(sizeof(HostDataType) >= 8)
+        startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+    }
+    return true;
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = StartValue();
+    for(cl_uint i = 0; i < threadCount; i++)
+    {
+      if(startRefValues[ i ] < expected)
+        expected = startRefValues[ i ];
+    }
+    return true;
+  }
+};
+
+int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_min_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMax : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(DataType().MinValue());
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    return
+      "  oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder());
+  }
+  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
+  {
+    for(cl_uint i = 0; i < threadCount; i++)
+    {
+      startRefValues[i] = genrand_int32(d);
+      if(sizeof(HostDataType) >= 8)
+        startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+    }
+    return true;
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = StartValue();
+    for(cl_uint i = 0; i < threadCount; i++)
+    {
+      if(startRefValues[ i ] > expected)
+        expected = startRefValues[ i ];
+    }
+    return true;
+  }
+};
+
+int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fetch_max_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFlag : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+  static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+  CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(0);
+    OldValueCheck(false);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return threadCount;
+  }
+  TExplicitMemoryOrderType MemoryOrderForClear()
+  {
+    // Memory ordering for atomic_flag_clear function
+    // ("shall not be memory_order_acquire nor memory_order_acq_rel")
+    if(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
+      return MEMORY_ORDER_RELAXED;
+    if (MemoryOrder() == MEMORY_ORDER_ACQ_REL)
+      return MEMORY_ORDER_RELEASE;
+    return MemoryOrder();
+  }
+  std::string MemoryOrderScopeStrForClear()
+  {
+    std::string orderStr;
+    if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+      orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
+    return orderStr + MemoryScopeStr();
+  }
+  virtual std::string ProgramCore()
+  {
+    std::string memoryOrderScope = MemoryOrderScopeStr();
+    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+    std::string program =
+      "  uint cnt, stop = 0;\n"
+      "  for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
+      "  {\n"
+      "    bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
+    if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE)
+      program += "    atomic_work_item_fence(" +
+                 std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
+                 "memory_order_acquire," +
+                 std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") +
+                 ");\n";
+
+    program +=
+      "    if (!set)\n"
+      "    {\n";
+
+    if (LocalMemory())
+      program += "      uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
+    else
+      program += "      uint csIndex = cnt;\n";
+
+    std::ostringstream csNotVisited;
+    csNotVisited << CRITICAL_SECTION_NOT_VISITED;
+    program +=
+      "      // verify that thread is the first visitor\n"
+      "      if(oldValues[csIndex] == "+csNotVisited.str()+")\n"
+      "      {\n"
+      "        oldValues[csIndex] = tid; // set the winner id for this critical section\n"
+      "        stop = 1;\n"
+      "      }\n";
+
+    if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED)
+      program += "      atomic_work_item_fence(" +
+                 std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
+                 "memory_order_release," +
+                 std::string(LocalMemory() ? "memory_scope_work_group" : "memory_scope_device") +
+                 ");\n";
+
+    program +=
+      "      atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n"
+      "    }\n"
+      "  }\n";
+    return program;
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    cl_uint cnt, stop = 0;
+    for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
+    {
+      if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
+      {
+        cl_uint csIndex = cnt;
+        // verify that thread is the first visitor\n"
+        if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
+        {
+          oldValues[csIndex] = tid; // set the winner id for this critical section\n"
+          stop = 1;
+        }
+        host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
+      }
+    }
+  }
+  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
+  {
+    expected = StartValue();
+    return true;
+  }
+  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
+  {
+    for(cl_uint i = 0 ; i < threadCount; i++)
+      startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
+    return true;
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    correct = true;
+    /* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */
+    /* These values must be distributed across refValues array */
+    std::vector<bool> tidFound(threadCount);
+    cl_uint i;
+
+    for (i = 0; i < threadCount; i++)
+    {
+      cl_uint value = (cl_uint)refValues[i];
+      if (value == CRITICAL_SECTION_NOT_VISITED)
+      {
+        // Special initial value
+        log_error("ERROR: Critical section %u not visited\n", i);
+        correct = false;
+        return true;
+      }
+      if (value >= threadCount)
+      {
+        log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
+        correct = false;
+        return true;
+      }
+      if (tidFound[value])
+      {
+        log_error("ERROR: Value (%u) occurred more thane once\n", value);
+        correct = false;
+        return true;
+      }
+      tidFound[value] = true;
+    }
+    return true;
+  }
+};
+
+int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG, useSVM);
+  EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements));
+  return error;
+}
+
+int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_flag_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_flag_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_flag_generic(deviceID, context, queue, num_elements, true);
+}
+
+template<typename HostAtomicType, typename HostDataType>
+class CBasicTestFence : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
+{
+  struct TestDefinition {
+    bool op1IsFence;
+    TExplicitMemoryOrderType op1MemOrder;
+    bool op2IsFence;
+    TExplicitMemoryOrderType op2MemOrder;
+  };
+public:
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DeclaredInProgram;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::CurrentGroupSize;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
+  CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
+  {
+    StartValue(0);
+    OldValueCheck(false);
+  }
+  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+  {
+    return threadCount;
+  }
+  virtual cl_uint NumNonAtomicVariablesPerThread()
+  {
+    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+      return 1;
+    if (LocalMemory())
+      return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec
+    return 256;
+  }
+  virtual std::string SingleTestName()
+  {
+    std::string testName;
+    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+      testName += "seq_cst fence, ";
+    else
+      testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order"))
+        + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with "
+        + std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order"))
+        + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
+    testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+    testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
+    return testName;
+  }
+  virtual bool SVMDataBufferAllSVMConsistent()
+  {
+    return MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
+  }
+  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    int error = 0;
+    // execute 3 (maximum) sub cases for each memory order
+    for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
+    {
+      EXECUTE_TEST(error, (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
+    }
+    return error;
+  }
+  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  {
+    if(DeclaredInProgram() || UsedInFunction())
+      return 0; //skip test - not applicable - no overloaded fence functions for different address spaces
+    if(MemoryOrder() == MEMORY_ORDER_EMPTY ||
+      MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15
+      return 0; //skip test - not applicable
+    if((UseSVM() || gHost)
+      && LocalMemory())
+      return 0; // skip test - not applicable for SVM and local memory
+    struct TestDefinition acqTests[] = {
+      // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
+      { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
+      { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
+      { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
+    };
+    struct TestDefinition relTests[] = {
+      { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
+      { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
+    };
+    struct TestDefinition arTests[] = {
+      { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
+      { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
+      { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
+    };
+    switch (MemoryOrder())
+    {
+    case MEMORY_ORDER_ACQUIRE:
+      if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition))
+        return 0;
+      _subCase = acqTests[_subCaseId];
+      break;
+    case MEMORY_ORDER_RELEASE:
+      if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition))
+        return 0;
+      _subCase = relTests[_subCaseId];
+      break;
+    case MEMORY_ORDER_ACQ_REL:
+      if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition))
+        return 0;
+      _subCase = arTests[_subCaseId];
+      break;
+    case MEMORY_ORDER_SEQ_CST:
+      if (_subCaseId != 0) // one special case only
+        return 0;
+      break;
+    default:
+      return 0;
+    }
+    LocalRefValues(LocalMemory());
+    return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
+  }
+  virtual std::string ProgramHeader(cl_uint maxNumDestItems)
+  {
+    std::string header;
+    if(gOldAPI)
+    {
+      if(MemoryScope() == MEMORY_SCOPE_EMPTY)
+      {
+        header += "#define atomic_work_item_fence(x,y)                        mem_fence(x)\n";
+      }
+      else
+      {
+        header += "#define atomic_work_item_fence(x,y,z)                      mem_fence(x)\n";
+      }
+    }
+    return header+CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
+  }
+  virtual std::string ProgramCore()
+  {
+    std::ostringstream naValues;
+    naValues << NumNonAtomicVariablesPerThread();
+    std::string program, fenceType, nonAtomic;
+    if (LocalMemory())
+    {
+      program = "  size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n";
+      fenceType = "CLK_LOCAL_MEM_FENCE";
+      nonAtomic = "localValues";
+    }
+    else
+    {
+      program = "  size_t myId = tid, hisId = threadCount-1-tid;\n";
+      fenceType = "CLK_GLOBAL_MEM_FENCE";
+      nonAtomic = "oldValues";
+    }
+    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+    {
+      // All threads are divided into pairs.
+      // Each thread has its own atomic variable and performs the following actions:
+      // - increments its own variable
+      // - performs fence operation to propagate its value and to see value from other thread
+      // - reads value from other thread's variable
+      // - repeats the above steps when both values are the same (and less than 1000000)
+      // - stores the last value read from other thread (in additional variable)
+      // At the end of execution at least one thread should know the last value from other thread
+      program += std::string("") +
+        "  " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n"
+        "  do {\n"
+        "    myValue++;\n"
+        "    atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"
+        "    atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n"
+        "    hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
+        "  } while(myValue == hisValue && myValue < 1000000);\n"
+        "  " + nonAtomic + "[myId] = hisValue; \n";
+    }
+    else
+    {
+      // Each thread modifies one of its non-atomic variables, increments value of its atomic variable
+      // and reads values from another thread in typical synchronizes-with scenario with:
+      // - non-atomic variable (at index A) modification (value change from 0 to A)
+      // - release operation (additional fence or within atomic) + atomic variable modification (value A)
+      // - atomic variable read (value B) + acquire operation (additional fence or within atomic)
+      // - non-atomic variable (at index B) read (value C)
+      // Each thread verifies dependency between atomic and non-atomic value read from another thread
+      // The following condition must be true: B == C
+      program += std::string("") +
+        "  " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n"
+        "  do {\n"
+        "    myValue++;\n"
+        "    " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n";
+      if (_subCase.op1IsFence)
+        program += std::string("") +
+        "    atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n"
+        "    atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n";
+      else
+        program += std::string("") +
+        "    atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n";
+      if (_subCase.op2IsFence)
+        program += std::string("") +
+        "    hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
+        "    atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n";
+      else
+        program += std::string("") +
+        "    hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n";
+      program +=
+        "    hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n";
+      if (LocalMemory())
+        program += "    hisId = (hisId+1)%get_local_size(0);\n";
+      else
+        program += "    hisId = (hisId+1)%threadCount;\n";
+      program +=
+        "  } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n"
+        "  if(hisAtomicValue != hisValue)\n"
+        "  { // fail\n"
+        "    atomic_store(&destMemory[myId], myValue-1);\n";
+      if (LocalMemory())
+        program += "    hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n";
+      else
+        program += "    hisId = (hisId+threadCount-1)%threadCount;\n";
+      program +=
+        "    if(myValue+1 < " + naValues.str() + ")\n"
+        "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n"
+        "    if(myValue+2 < " + naValues.str() + ")\n"
+        "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n"
+        "    if(myValue+3 < " + naValues.str() + ")\n"
+        "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n";
+      if (gDebug)
+      {
+        program +=
+          "    printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n";
+      }
+      program +=
+        "  }\n";
+    }
+    return program;
+  }
+  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
+  {
+    size_t myId = tid, hisId = threadCount - 1 - tid;
+    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+    {
+      HostDataType myValue = 0, hisValue;
+      // CPU thread typically starts faster - wait for GPU thread
+      myValue++;
+      host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
+      while (host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0);
+      do {
+        myValue++;
+        host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+        host_atomic_thread_fence(MemoryOrder());
+        hisValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
+      } while (myValue == hisValue && hisValue < 1000000);
+      oldValues[tid] = hisValue;
+    }
+    else
+    {
+      HostDataType myValue = 0, hisAtomicValue, hisValue;
+      do {
+        myValue++;
+        oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue;
+        if (_subCase.op1IsFence)
+        {
+          host_atomic_thread_fence(_subCase.op1MemOrder);
+          host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+        }
+        else
+          host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, _subCase.op1MemOrder);
+        if (_subCase.op2IsFence)
+        {
+          hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
+          host_atomic_thread_fence(_subCase.op2MemOrder);
+        }
+        else
+          hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], _subCase.op2MemOrder);
+        hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue];
+        hisId = (hisId + 1) % threadCount;
+      } while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1);
+      if(hisAtomicValue != hisValue)
+      { // fail
+        host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST);
+        if (gDebug)
+        {
+          hisId = (hisId + threadCount - 1) % threadCount;
+          printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue);
+        }
+      }
+    }
+  }
+  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
+  {
+    for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++)
+      startRefValues[i] = 0;
+    return true;
+  }
+  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
+  {
+    correct = true;
+    cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
+    for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize)
+    {
+      if(workOffset+workSize > threadCount)
+        // last workgroup (host threads)
+        workSize = threadCount-workOffset;
+      for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++)
+      {
+        HostAtomicType myValue = finalValues[workOffset + i];
+        if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+        {
+          HostDataType hisValue = refValues[workOffset + i];
+          if (myValue == hisValue)
+          {
+            // a draw - both threads should reach final value 1000000
+            if (myValue != 1000000)
+            {
+              log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue);
+              correct = false;
+              return true;
+            }
+          }
+          else
+          {
+            //slower thread (in total order of seq_cst operations) must know last value written by faster thread
+            HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i];
+            HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i];
+
+            // who is the winner? - thread with lower private counter value
+            if (myValue == hisRealValue) // forbidden result - fence doesn't work
+            {
+              log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue);
+              log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim);
+              correct = false;
+              return true;
+            }
+            if (myValue > hisRealValue) // I'm slower
+            {
+              if (hisRealValue != hisValue)
+              {
+                log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue);
+                log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i);
+                correct = false;
+                return true;
+              }
+            }
+            else // I'm faster
+            {
+              if (myValueReadByHim != myValue)
+              {
+                log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue);
+                log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i);
+                correct = false;
+                return true;
+              }
+            }
+          }
+        }
+        else
+        {
+          if (myValue != NumNonAtomicVariablesPerThread()-1)
+          {
+            log_error("ERROR: Invalid atomic value #%u (%d instead of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread()-1);
+            log_error("ERROR: Thread #%u observed invalid values in other thread's variables\n", workOffset + i, myValue);
+            correct = false;
+            return true;
+          }
+        }
+      }
+    }
+    return true;
+  }
+private:
+  int _subCaseId;
+  struct TestDefinition _subCase;
+};
+
+int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+{
+  int error = 0;
+  CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
+  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
+  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
+  CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
+  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
+  if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+  {
+    CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  else
+  {
+    CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+  }
+  return error;
+}
+
+int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fence_generic(deviceID, context, queue, num_elements, false);
+}
+
+int test_atomic_fence_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_atomic_fence_generic(deviceID, context, queue, num_elements, true);
+}
diff --git a/test_conformance/clcpp/CMakeLists.txt b/test_conformance/clcpp/CMakeLists.txt
new file mode 100644
index 00000000..04484e7a
--- /dev/null
+++ b/test_conformance/clcpp/CMakeLists.txt
@@ -0,0 +1,21 @@
+add_subdirectory(address_spaces)
+add_subdirectory(api)
+add_subdirectory(atomics)
+add_subdirectory(attributes)
+add_subdirectory(common_funcs)
+add_subdirectory(convert)
+add_subdirectory(device_queue)
+add_subdirectory(geometric_funcs)
+add_subdirectory(images)
+add_subdirectory(integer_funcs)
+add_subdirectory(math_funcs)
+add_subdirectory(pipes)
+add_subdirectory(program_scope_ctors_dtors)
+add_subdirectory(reinterpret)
+add_subdirectory(relational_funcs)
+add_subdirectory(spec_constants)
+add_subdirectory(subgroups)
+add_subdirectory(synchronization)
+add_subdirectory(vload_vstore)
+add_subdirectory(workgroups)
+add_subdirectory(workitems)
diff --git a/test_conformance/clcpp/address_spaces/CMakeLists.txt b/test_conformance/clcpp/address_spaces/CMakeLists.txt
new file mode 100644
index 00000000..be369fc5
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_ADDRESS_SPACES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/address_spaces/common.hpp b/test_conformance/clcpp/address_spaces/common.hpp
new file mode 100644
index 00000000..47b78ea9
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/common.hpp
@@ -0,0 +1,203 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#define RUN_ADDRESS_SPACES_TEST_MACRO(TEST_CLASS) \
+    last_error = run_address_spaces_test(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+// This is a base class for address spaces tests.
+template <class T>
+struct address_spaces_test : public detail::base_func_type<T>
+{
+    // output buffer type
+    typedef T type;
+
+    virtual ~address_spaces_test() {};
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+    // Returns kernel names IN ORDER
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    virtual T operator()(size_t i, size_t work_group_size) = 0;
+
+    // If local size has to be set in clEnqueueNDRangeKernel()
+    // this should return true; otherwise - false;
+    virtual bool set_local_size()
+    {
+        return false;
+    }
+
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                                      cl_device_id device,
+                                      size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t wg_size = work_group_size;
+        for(auto&k : kernels)
+        {
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            wg_size = (std::min)(max_wg_size, wg_size);
+        }
+        return wg_size;
+    }
+
+    // This covers typical case: each kernel is executed once, every kernel
+    // has only one argument which is output buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size,
+                           size_t work_group_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+};
+
+template <class address_spaces_test>
+int run_address_spaces_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, address_spaces_test op)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t wg_size;
+    size_t work_size[1];
+    cl_int err;
+
+    typedef typename address_spaces_test::type TYPE;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+
+    // Find the max possible wg size for among all the kernels
+    wg_size = op.get_max_local_size(kernels, device, 1024, err);
+    RETURN_ON_ERROR(err);
+
+    work_size[0] = count;
+    if(op.set_local_size())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size)
+        );
+        work_size[0] = wg_number * wg_size;
+    }
+
+    // output on host
+    std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
+
+    // output buffer
+    buffers[0] = clCreateBuffer
+        (context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
+    RETURN_ON_ERROR(err)
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        TYPE v = op(i, wg_size);
+        if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<TYPE>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    for(auto& k : kernels)
+        clReleaseKernel(k);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_COMMON_HPP
diff --git a/test_conformance/clcpp/address_spaces/main.cpp b/test_conformance/clcpp/address_spaces/main.cpp
new file mode 100644
index 00000000..88ea3930
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_pointer_types.hpp"
+#include "test_storage_types.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/address_spaces/test_pointer_types.hpp b/test_conformance/clcpp/address_spaces/test_pointer_types.hpp
new file mode 100644
index 00000000..edc50b6f
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/test_pointer_types.hpp
@@ -0,0 +1,411 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
+#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// ----------------------------
+// ---------- PRIVATE
+// ----------------------------
+
+template <class T>
+struct private_pointer_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "private_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    TYPE v = TYPE(gid);\n"
+                "    private_ptr<TYPE> v_ptr1(dynamic_asptr_cast<private_ptr<TYPE>>(&v));\n"
+                "    private_ptr<TYPE> v_ptr2(v_ptr1);\n"
+                "    TYPE a[] = { TYPE(0), TYPE(1) };\n"
+                "    private_ptr<TYPE> a_ptr = dynamic_asptr_cast<private_ptr<TYPE>>(a);\n"
+                "    a_ptr++;\n"
+                "    TYPE * a_ptr2 = a_ptr.get();\n"
+                "    *a_ptr2 = *v_ptr2;\n"
+                "    output[gid] = a[1];\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_private_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // private pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- LOCAL
+// ----------------------------
+
+template <class T>
+struct local_pointer_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "local_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        size_t r = i / work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(r));
+    }
+
+    bool set_local_size()
+    {
+        return true;
+    }
+
+    size_t get_max_local_size(const std::vector<cl_kernel>& kernels, 
+                              cl_device_id device,
+                              size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        // Additionally this already set 2nd argument of the test kernel, so we don't
+        // have to modify execute() method.
+        error = clSetKernelArg(kernels[0], 1, sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg");
+
+        size_t wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernels[0], device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        wg_size = wg_size <= work_group_size ? wg_size : work_group_size;        
+        return wg_size;
+    }
+
+    // Every work-item writes id of its work-group to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
+                                                              "local uint * local_mem_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output, "
+                                                              "local_ptr<uint[]> local_mem_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    size_t lid = get_local_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                // 1st work-item in work-group writes get_group_id() to var
+                "    local<uint> var;\n"
+                "    local_ptr<uint> var_ptr = var.ptr();\n"
+                "    if(lid == 0) { *var_ptr = get_group_id(0); }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // last work-item in work-group writes var to 1st element of local_mem
+                "    local_ptr<uint[]> local_mem_ptr2(local_mem_ptr);\n"
+                "    auto local_mem_ptr3 = local_mem_ptr2.release();\n"
+                "    if(lid == (get_local_size(0) - 1)) { *(local_mem_ptr3) = var; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // each work-item in work-group writes local_mem_ptr[0] to output[work-item-global-id]
+                "    output[gid] = local_mem_ptr[0];\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_local_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // local pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- GLOBAL
+// ----------------------------
+
+template <class T>
+struct global_pointer_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "global_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "void set_to_gid(global_ptr<TYPE> ptr)\n"
+                "{\n"
+                "    *ptr = TYPE(get_global_id(0));"
+                "}\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    auto ptr = output.get();\n"
+                "    global_ptr<TYPE> ptr2(ptr);\n"
+                "    ptr2 += ptrdiff_t(gid);\n"
+                "    set_to_gid(ptr2);\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_global_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // global pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- CONSTANT
+// ----------------------------
+
+template <class T>
+struct constant_pointer_test : public address_spaces_test<T>
+{
+    // m_test_value is just a random value we use in this test.
+    constant_pointer_test() : m_test_value(0xdeaddeadU)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "constant_pointer";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+
+    // Each work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output, "
+                                                              "constant uint * const_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(const_ptr[0]);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<TYPE[]> output, "
+                                                              "constant_ptr<uint[]> const_ptr)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    constant_ptr<uint[]> const_ptr2 = const_ptr;\n"
+                "    auto const_ptr3 = const_ptr2.get();\n"
+                "    output[gid] = *const_ptr3;\n"
+                "}\n";        
+        #endif
+    }
+
+    // execute() method needs to be modified, to create additional buffer
+    // and set it in 2nd arg (constant_ptr<uint[]> const_ptr)
+    cl_int execute(const std::vector<cl_kernel>& kernels,
+                   cl_mem& output_buffer,
+                   cl_command_queue& queue,
+                   size_t work_size,
+                   size_t work_group_size)
+    {           
+        cl_int err;
+
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo");
+
+        // Create constant buffer
+        auto const_buff = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY), sizeof(cl_uint), NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+        // Write m_test_value to const_buff
+        err = clEnqueueWriteBuffer(
+            queue, const_buff, CL_TRUE, 0, sizeof(cl_uint),
+            static_cast<void *>(&m_test_value), 0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+        err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernels[0], 1, sizeof(const_buff), &const_buff);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &work_group_size : NULL, 0, NULL, NULL
+        );      
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+        err = clFinish(queue);
+        RETURN_ON_CL_ERROR(err, "clFinish");
+
+        err = clReleaseMemObject(const_buff);
+        RETURN_ON_CL_ERROR(err, "clReleaseMemObject");
+        return err;
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_constant_pointer)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // constant pointer
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_pointer_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_POINTER_TYPES_HPP
diff --git a/test_conformance/clcpp/address_spaces/test_storage_types.hpp b/test_conformance/clcpp/address_spaces/test_storage_types.hpp
new file mode 100644
index 00000000..e47f9523
--- /dev/null
+++ b/test_conformance/clcpp/address_spaces/test_storage_types.hpp
@@ -0,0 +1,418 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
+#define TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// ----------------------------
+// ---------- PRIVATE
+// ----------------------------
+
+template <class T>
+struct private_storage_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "private_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        (void) work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(i));
+    }
+
+    // Each work-item writes its global id to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(gid);\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    priv<TYPE> v = { TYPE(gid) };\n"
+                "    const TYPE *v_ptr1 = &v;\n"
+                "    private_ptr<TYPE> v_ptr2 = v.ptr();\n"
+                "    TYPE v2 = *v_ptr2;\n"
+                "    priv<array<TYPE, 1>> a;\n"
+                "    *(a.begin()) = v2;\n"
+                "    output[gid] = a[0];\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_private_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // private storage
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(private_storage_test<cl_uint16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- LOCAL
+// ----------------------------
+
+template <class T>
+struct local_storage_test : public address_spaces_test<T>
+{
+    std::string str()
+    {
+        return "local_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        size_t r = i / work_group_size;
+        return detail::make_value<T>(static_cast<SCALAR>(r));
+    }
+
+    bool set_local_size()
+    {
+        return true;
+    }
+
+    // Every work-item writes id of its work-group to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(get_group_id(0));\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // Using program scope local variable
+                "local<" + type_name<T>() + "> program_scope_var;"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    size_t lid = get_local_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                // 1st work-item in work-group writes get_group_id() to var
+                "    local<TYPE> var;\n"
+                "    if(lid == 0) { var = TYPE(get_group_id(0)); }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // last work-item in work-group writes var to 1st element of a
+                "    local_ptr<TYPE> var_ptr = var.ptr();\n"
+                "    TYPE var2 = *var_ptr;\n"
+                "    local<array<TYPE, 1>> a;\n"
+                "    if(lid == (get_local_size(0) - 1)) { *(a.begin()) = var2; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                // 1st work-item in work-group writes a[0] to program_scope_var
+                "    if(lid == 0) { program_scope_var = a[0]; }\n"
+                "    work_group_barrier(mem_fence::local);\n"
+                "    const TYPE *program_scope_var_ptr = &program_scope_var;\n"
+                "    output[gid] = *program_scope_var_ptr;\n"
+                "}\n";        
+        #endif
+    }
+};
+
+AUTO_TEST_CASE(test_local_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // local storage
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(local_storage_test<cl_int16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- GLOBAL
+// ----------------------------
+
+template <class T>
+struct global_storage_test : public address_spaces_test<T>
+{
+    // m_test_value is just a random value we use in this test.
+    // m_test_value should not be zero.
+    global_storage_test() : m_test_value(0xdeaddeadU)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "global_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+
+    std::vector<std::string> get_kernel_names()
+    {
+        return 
+        {
+            this->get_kernel_name() + "1",
+            this->get_kernel_name() + "2"
+        };
+    }
+
+    // Every work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_names()[0] + "(global " + type_name<T>() + " *output, "
+                                                                  "uint test_value)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(test_value);\n"
+                "}\n"
+                "__kernel void " + this->get_kernel_names()[1] + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = output[gid];\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                // Using program scope global variable
+                "global<array<TYPE, 1>> program_scope_global_array;"
+                "__kernel void " + this->get_kernel_names()[0] + "(global_ptr<" + type_name<T>() + "[]> output, "
+                                                                  "uint test_value)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                // 1st work-item writes test_value to program_scope_global_array[0]
+                "    if(gid == 0) { program_scope_global_array[0] = test_value; }\n"
+                "}\n" 
+                "__kernel void " + this->get_kernel_names()[1] + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    static global<uint> func_scope_global_var { 0 };\n"
+                // if (func_scope_global_var == 1) is true then
+                // each work-item saves program_scope_global_array[0] to output[work-item-global-id]
+                "    if(func_scope_global_var == uint(1))\n"
+                "    {\n"
+                "        output[gid] = program_scope_global_array[0];\n"
+                "        return;\n"
+                "    }\n"
+                // 1st work-item writes 1 to func_scope_global_var
+                "    if(gid == 0) { func_scope_global_var = uint(1); }\n"
+                "}\n";         
+        #endif
+    }
+
+    // In this test execution is quite complicated. We have two kernels.
+    // 1st kernel tests program scope global variable, and 2nd kernel tests 
+    // function scope global variable (that's why it is run twice).
+    cl_int execute(const std::vector<cl_kernel>& kernels,
+                   cl_mem& output_buffer,
+                   cl_command_queue& queue,
+                   size_t work_size,
+                   size_t wg_size)
+    {           
+        cl_int err;
+        err = clSetKernelArg(kernels[0], 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernels[0], 1, sizeof(cl_uint), &m_test_value);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+        // Run first kernel, once.
+        // This kernel saves m_test_value to program scope global variable called program_scope_global_var
+        err = clEnqueueNDRangeKernel(
+            queue, kernels[0], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        err = clFinish(queue);
+        RETURN_ON_CL_ERROR(err, "clFinish")
+
+        err = clSetKernelArg(kernels[1], 0, sizeof(output_buffer), &output_buffer);
+        // Run 2nd kernel, twice.
+        // 1st run: program_scope_global_var is saved to function scope global array called func_scope_global_array
+        // 2nd run: each work-item saves func_scope_global_array[0] to ouput[work-item-global-id]
+        for(size_t i = 0; i < 2; i++)
+        {
+            err = clEnqueueNDRangeKernel(
+                queue, kernels[1], 1, NULL, &work_size, this->set_local_size() ? &wg_size : NULL, 0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+            err = clFinish(queue);
+            RETURN_ON_CL_ERROR(err, "clFinish")
+        }
+        return err;
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_global_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(global_storage_test<cl_int16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+// ----------------------------
+// ---------- CONSTANT
+// ----------------------------
+
+template <class T>
+struct constant_storage_test : public address_spaces_test<T>
+{
+    // m_test_value is just a random value we use in this test.
+    constant_storage_test() : m_test_value(0xdeaddeadU)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "constant_storage";
+    }
+
+    T operator()(size_t i, size_t work_group_size)
+    {
+        typedef typename scalar_type<T>::type SCALAR;
+        return detail::make_value<T>(static_cast<SCALAR>(m_test_value));
+    }
+
+    // Every work-item writes m_test_value to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global " + type_name<T>() + " *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = (" + type_name<T>() + ")(" + std::to_string(m_test_value) + ");\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // Program scope constant variable, program_scope_var == (m_test_value - 1)
+                "constant<uint> program_scope_const{ (" + std::to_string(m_test_value) + " - 1) };"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<" + type_name<T>() + "[]> output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    typedef " + type_name<T>() + " TYPE;\n"
+                "    static constant<uint> func_scope_const{ 1 };\n"
+                "    constant_ptr<uint> ps_const_ptr = program_scope_const.ptr();\n"
+                // "    constant_ptr<array<uint, 1>> fs_const_ptr = &func_scope_const;\n"
+                "    output[gid] = TYPE(*ps_const_ptr + func_scope_const);\n"
+                "}\n";        
+        #endif
+    }
+private:
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_constant_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_uint>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float2>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float4>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_float8>());
+    RUN_ADDRESS_SPACES_TEST_MACRO(constant_storage_test<cl_int16>());
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ADDRESS_SPACES_TEST_STORAGE_TYPES_HPP
diff --git a/test_conformance/clcpp/api/CMakeLists.txt b/test_conformance/clcpp/api/CMakeLists.txt
new file mode 100644
index 00000000..db9ce582
--- /dev/null
+++ b/test_conformance/clcpp/api/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_API)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/api/main.cpp b/test_conformance/clcpp/api/main.cpp
new file mode 100644
index 00000000..bfcbdd43
--- /dev/null
+++ b/test_conformance/clcpp/api/main.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_spec_consts.hpp"
+#include "test_ctors_dtors.hpp"
+#include "test_ctors.hpp"
+#include "test_dtors.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/api/test_ctors.hpp b/test_conformance/clcpp/api/test_ctors.hpp
new file mode 100644
index 00000000..8cdfc6ea
--- /dev/null
+++ b/test_conformance/clcpp/api/test_ctors.hpp
@@ -0,0 +1,481 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <numeric>
+
+#include "../common.hpp"
+
+// TEST 1
+// Verify that constructors are executed before any kernel is executed.
+// Verify that when present, multiple constructors are executed. The order between
+// constructors is undefined, but they should all execute.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_ctors_executed =
+    "__kernel void test_ctors_executed(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+const char * kernel_test_ctors_executed_multiple_ctors =
+    "__kernel void test_ctors_executed_multiple_ctors(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_ctors_executed =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(int y) { x = y;};\n"
+    "   int x;\n"
+    "};\n"
+    // global scope program variable
+    "ctor_test_class global_var(int(0xbeefbeef));\n"
+    "__kernel void test_ctors_executed(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var.x != int(0xbeefbeef)) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+const char * kernel_test_ctors_executed_multiple_ctors =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_limits>\n"
+    "using namespace cl;\n"
+    "template<class T>\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(T y) { x = y;};\n"
+    "   T x;\n"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class<int> global_var0(int(0xbeefbeef));\n"
+    "ctor_test_class<uint> global_var1(uint(0xbeefbeefU));\n"
+    "ctor_test_class<float> global_var2(float(FLT_MAX));\n"
+    "__kernel void test_ctors_executed_multiple_ctors(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var0.x != int(0xbeefbeef))   result = 1;\n"
+    "   if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
+    "   if(global_var2.x != float(FLT_MAX))    result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+int test_ctors_execution(cl_device_id device,
+                         cl_context context,
+                         cl_command_queue queue,
+                         int count,
+                         std::string kernel_name,
+                         const char * kernel_source)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name, "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(context, &program, &kernel, kernel_source, kernel_name);
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_uint) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test %s failed.", kernel_name.c_str());
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_global_scope_ctors_executed)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = test_ctors_execution(
+        device, context, queue, count,
+        "test_ctors_executed", kernel_test_ctors_executed
+    );
+    CHECK_ERROR(local_error);
+    error |= local_error;
+
+    local_error = test_ctors_execution(
+        device, context, queue, count,
+        "test_ctors_executed_multiple_ctors", kernel_test_ctors_executed_multiple_ctors
+    );
+    CHECK_ERROR(local_error);
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+// TEST 2
+// Verify that constructors are only executed once when multiple kernels from a program are executed.
+
+// How: The first kernel (test_ctors_executed_once_set) is run once. It changes values of program scope
+// variables, then the second kernel is run multiple times, each time verifying that global variables
+// have correct values (the second kernel should observe the values assigned by the first kernel, not
+// by the constructors).
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_ctors_executed_once =
+    "__kernel void test_ctors_executed_once_set()\n"
+    "{\n"
+    "}\n"
+    "__kernel void test_ctors_executed_once_read(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_ctors_executed_once =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct template
+    "template<class T>\n"
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class(T y) { x = y;};\n"
+    "   T x;\n"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class<int> global_var0(int(0));\n"
+    "ctor_test_class<uint> global_var1(uint(0));\n"
+
+    "__kernel void test_ctors_executed_once_set()\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   if(gid == 0) {\n"
+    "       global_var0.x = int(0xbeefbeef);\n"
+    "       global_var1.x = uint(0xbeefbeefU);\n"
+    "   }\n"
+    "}\n\n"
+
+    "__kernel void test_ctors_executed_once_read(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(global_var0.x != int(0xbeefbeef))   result = 1;\n"
+    "   if(global_var1.x != uint(0xbeefbeefU)) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_ctors_executed_once)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel_set_global_vars;
+    cl_kernel kernel_read_global_vars;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set", "", false
+    );
+    RETURN_ON_ERROR(error)
+    // Get the second kernel
+    kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel");
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel_set_global_vars,
+        program_test_ctors_executed_once, "test_ctors_executed_once_set"
+    );
+    RETURN_ON_ERROR(error)
+    // Get the second kernel
+    kernel_read_global_vars = clCreateKernel(program, "test_ctors_executed_once_read", &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel");
+#endif
+
+    // Execute kernel_set_global_vars
+
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel_set_global_vars, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    // Execute kernel_read_global_vars 4 times, each time we check if
+    // global variables have correct values.
+
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    for(size_t i = 0; i < 4; i++)
+    {
+        std::fill(output.begin(), output.end(), cl_uint(1));
+        error = clEnqueueWriteBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_uint) * output.size(),
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+        error = clSetKernelArg(kernel_read_global_vars, 0, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        work_size[0] = output.size();
+        error = clEnqueueNDRangeKernel(
+            queue, kernel_read_global_vars,
+            dim, NULL, work_size, NULL,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_uint) * output.size(),
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+        if(sum != 0)
+        {
+            error = -1;
+            CHECK_ERROR_MSG(error, "Test test_ctors_executed_onces failed.");
+        }
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel_set_global_vars);
+    clReleaseKernel(kernel_read_global_vars);
+    clReleaseProgram(program);
+    return error;
+}
+
+// TEST3
+// Verify that when constructor is executed, the ND-range used is (1,1,1).
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_ctors_ndrange =
+    "__kernel void test_ctors_ndrange(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_ctors_ndrange =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct ctor_test_class {\n"
+    // non-trivial ctor
+    "   ctor_test_class() {\n"
+    "       x = get_global_size(0);\n"
+    "       y = get_global_size(1);\n"
+    "       z = get_global_size(2);\n"
+    "   };\n"
+    "   ulong x;\n"
+    "   ulong y;\n"
+    "   ulong z;\n"
+    // return true if the ND-range used when ctor was exectured was
+    // (1, 1, 1); otherwise - false
+    "   bool check() { return (x == 1) && (y == 1) && (z == 1);}"
+    "};\n"
+    // global scope program variables
+    "ctor_test_class global_var0;\n"
+    "ctor_test_class global_var1;\n"
+
+    "__kernel void test_ctors_ndrange(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(!global_var0.check()) result = 1;\n"
+    "   if(!global_var1.check()) result = 1;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_ctors_ndrange)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_ctors_ndrange, "test_ctors_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 1
+    std::vector<cl_uint> output(count, cl_uint(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_ctors_executed_ndrange failed.");
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_HPP
diff --git a/test_conformance/clcpp/api/test_ctors_dtors.hpp b/test_conformance/clcpp/api/test_ctors_dtors.hpp
new file mode 100644
index 00000000..02838fa7
--- /dev/null
+++ b/test_conformance/clcpp/api/test_ctors_dtors.hpp
@@ -0,0 +1,185 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+#include "../common.hpp"
+
+// Verify queries clGetProgramInfo correctly return the presence of constructors and/or destructors
+// in the program (using option CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT/CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT)
+// (both are present, either one is present, none is present).
+
+std::string generate_ctor_dtor_program(const bool ctor, const bool dtor)
+{
+    std::string program;
+    if(ctor)
+    {
+        program +=
+            "struct ctor_test_class {\n"
+            // non-trivial ctor
+            "   ctor_test_class(int y) { x = y;};\n"
+            "   int x;\n"
+            "};\n"
+            "ctor_test_class ctor = ctor_test_class(1024);\n"
+        ;
+    }
+    if(dtor)
+    {
+        program +=
+            "struct dtor_test_class {\n"
+            // non-trivial dtor
+            "   ~dtor_test_class() { x = -1024; };\n"
+            "   int x;\n"
+            "};\n"
+            "dtor_test_class dtor;\n"
+        ;
+    }
+    program += "__kernel void test_ctor_dtor()\n {\n }\n";
+    return program;
+}
+
+int test_get_program_info_global_ctors_dtors_present(cl_device_id device,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     const bool ctor,
+                                                     const bool dtor)
+{
+    int error = CL_SUCCESS;
+    cl_program program;
+
+    // program source and options
+    std::string options = "";
+    std::string source = generate_ctor_dtor_program(ctor, dtor);
+    const char * source_ptr = source.c_str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    return CL_SUCCESS;
+// Normal run
+#else
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &source_ptr, options.c_str());
+    RETURN_ON_ERROR(error)
+#endif
+
+    // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool
+    // This indicates that the program object contains non-trivial constructor(s) that will be
+    // executed by runtime before any kernel from the program is executed.
+
+    // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool
+    // This indicates that the program object contains non-trivial destructor(s) that will be
+    // executed by runtime when program is destroyed.
+
+    // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
+    cl_bool ctors_present;
+    size_t cl_bool_size;
+    error = clGetProgramInfo(
+        program,
+        CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
+        sizeof(cl_bool),
+        static_cast<void*>(&ctors_present),
+        &cl_bool_size
+    );
+    RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+    if(cl_bool_size != sizeof(cl_bool))
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
+    }
+    if(ctor && ctors_present != CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.");
+    }
+    else if(!ctor && ctors_present == CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.");
+    }
+
+    // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT
+    cl_bool dtors_present = 0;
+    error = clGetProgramInfo(
+        program,
+        CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
+        sizeof(cl_bool),
+        static_cast<void*>(&ctors_present),
+        &cl_bool_size
+    );
+    RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+    if(cl_bool_size != sizeof(cl_bool))
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).", cl_bool_size, sizeof(cl_bool));
+    }
+    if(dtor && dtors_present != CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.");
+    }
+    else if(!dtor && dtors_present == CL_TRUE)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(-1, "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.");
+    }
+
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_global_scope_ctors_dtors_present)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    // both present
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, true);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // dtor
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, true);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // ctor
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, true, false);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+    // none present
+    last_error = test_get_program_info_global_ctors_dtors_present(device, context, queue, false, false);
+    CHECK_ERROR(last_error);
+    error |= last_error;
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_CTORS_DTORS_HPP
diff --git a/test_conformance/clcpp/api/test_dtors.hpp b/test_conformance/clcpp/api/test_dtors.hpp
new file mode 100644
index 00000000..2f4fd0c7
--- /dev/null
+++ b/test_conformance/clcpp/api/test_dtors.hpp
@@ -0,0 +1,553 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <numeric>
+
+#include "../common.hpp"
+
+// TEST 1
+// Verify that destructor is executed.
+
+// How: destructor of struct dtor_test_class has a side effect: zeroing buffer. If values
+// in buffer are not zeros after releasing program, destructor was not executed.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtor_is_executed =
+    "__kernel void test_dtor_is_executed(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_dtor_is_executed =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // set all values in buffer to 0
+    "   ~dtor_test_class() {\n"
+    "       for(ulong i = 0; i < size; i++)\n"
+    "           buffer[i] = 0;\n"
+    "   };\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "   ulong size;\n"
+    "};\n"
+    // global scope program variable
+    "dtor_test_class global_var;\n"
+
+    // values in output __MUST BE__ greater than 0 for the test to work
+    // correctly
+    "__kernel void test_dtor_is_executed(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       global_var.buffer = output;\n"
+    "       global_var.size = get_global_size(0);\n"
+    "   }\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_dtor_is_executed)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_is_executed, "test_dtor_is_executed"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    // Release kernel and program
+    // Dtor should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_dtor_is_executed failed.");
+    }
+
+    clReleaseMemObject(output_buffer);
+    return error;
+}
+
+// TEST 2
+// Verify that multiple destructors, if present, are executed. Order between multiple
+// destructors is undefined.
+// Verify that each destructor is executed only once.
+
+// How:
+// 0) dtor_test_class struct has a global pointer to a buffer, it's set by
+// test_dtors_executed_once kernel.
+// 1) Destructors have a side effect: each dtor writes to its part of the buffer. If all
+// dtors are executed, all values in that buffer should be changed.
+// 2) The first time destructors are executed, they set their parts of the buffer to zero.
+// Next time to 1, next time to 2 etc. Since dtors should be executed only once, all
+// values in that buffer should be equal to zero.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtors_executed_once =
+    "__kernel void test_dtors_executed_once(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_dtors_executed_once =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // Set all values in range [start; end - 1] in buffer to counter.
+    // If dtor is executed only once (correct), all values in range
+    // [start; end - 1] in buffer should be equal to zero after releasing
+    // the program
+    "   ~dtor_test_class() {\n"
+    "       for(ulong i = start; i < end; i++){\n"
+    "           buffer[i] = counter;\n"
+    "       };\n"
+    "       counter++;\n"
+    "   };\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "   ulong start;\n"
+    "   ulong end;\n"
+    "   ulong counter;\n"
+    "};\n"
+    // global scope program variables
+    "dtor_test_class global_var0;\n"
+    "dtor_test_class global_var1;\n"
+    "dtor_test_class global_var2;\n"
+    "dtor_test_class global_var3;\n"
+
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    "__kernel void test_dtors_executed_once(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       ulong end = get_global_size(0) / 4;"
+    // global_var0
+    "       global_var0.buffer = output;\n"
+    "       global_var0.start = 0;\n"
+    "       global_var0.end = end;\n"
+    "       global_var0.counter = 0;\n"
+    // global_var1
+    "       global_var1.buffer = output;\n"
+    "       global_var1.start = end;\n"
+    "       end += get_global_size(0) / 4;\n"
+    "       global_var1.end = end;\n"
+    "       global_var1.counter = 0;\n"
+    // global_var2
+    "       global_var2.buffer = output;\n"
+    "       global_var2.start = end;\n"
+    "       end += get_global_size(0) / 4;\n"
+    "       global_var2.end = end;\n"
+    "       global_var2.counter = 0;\n"
+    // global_var3
+    "       global_var3.buffer = output;\n"
+    "       global_var3.start = end;\n"
+    "       global_var3.end = get_global_size(0);\n"
+    "       global_var3.counter = 0;\n"
+    "   }\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_dtors_executed_once)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtors_executed_once, "test_dtors_executed_once"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    cl_uint init_value = cl_uint(0xbeefbeef);
+    std::vector<cl_uint> output(count, init_value);
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+
+    // Increments the program reference count. Twice
+    error = clRetainProgram(program);
+    RETURN_ON_CL_ERROR(error, "clRetainProgram")
+    error = clRetainProgram(program);
+    RETURN_ON_CL_ERROR(error, "clRetainProgram")
+
+    // Should just decrement the program reference count.
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Should just decrement the program reference count.
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+#ifndef USE_OPENCLC_KERNELS
+    // At this point global scope variables should not be destroyed,
+    // values in output buffer should not be modified.
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+    for(auto& i : output)
+    {
+        if(i != init_value)
+        {
+            log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
+            log_error("\tDestructors were executed prematurely.\n");
+            RETURN_ON_ERROR(-1)
+        }
+    }
+#endif
+
+    // Release kernel and program, destructors should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        log_error("ERROR: Test test_global_scope_dtors_executed_once failed.");
+        // Maybe some dtors were not run?
+        for(auto& i : output)
+        {
+            if(i == init_value)
+            {
+                log_error("\tSome dtors were not executed.");
+                break;
+            }
+        }
+        log_error("\n");
+        RETURN_ON_ERROR(-1)
+    }
+
+    // Clean
+    clReleaseMemObject(output_buffer);
+    return error;
+}
+
+// TEST3
+// Verify that ND-range during destructor execution is set to (1,1,1)
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * program_test_dtor_ndrange =
+    "__kernel void test_dtor_ndrange(global uint *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * program_test_dtor_ndrange =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "using namespace cl;\n"
+    // struct
+    "struct dtor_test_class {\n"
+    // non-trivial dtor
+    // set all values in buffer to 0 only if ND-range is (1, 1, 1)
+    "   ~dtor_test_class() {\n"
+    "       if(check()){\n"
+    "           for(ulong i = 0; i < size; i++)\n"
+    "               buffer[i] = 0;\n"
+    "       }\n"
+    "   };\n"
+    // return true if the ND-range is (1, 1, 1); otherwise - false
+    "   bool check() {\n"
+    "       return (get_global_size(0) == 1)"
+              " && (get_global_size(1) == 1)"
+              " && (get_global_size(2) == 1);\n"
+    "   }"
+    "   ulong size;\n"
+    "   global_ptr<uint[]> buffer;\n"
+    "};\n"
+    // global scope program variable
+    "dtor_test_class global_var;\n"
+
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    "__kernel void test_dtor_ndrange(global_ptr<uint[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    // set buffer and size in global var
+    "   if(gid == 0){\n"
+    "       global_var.buffer = output;\n"
+    "       global_var.size = get_global_size(0);\n"
+    "   }\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_global_scope_dtor_ndrange)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        program_test_dtor_ndrange, "test_dtor_ndrange"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == count, output[0...count-1] == 0xbeefbeef (3203383023)
+    // values in output __MUST BE__ greater than 0 for the test to work correctly
+    std::vector<cl_uint> output(count, cl_uint(0xbeefbeef));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(
+        queue, kernel,
+        dim, NULL, work_size, NULL,
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    // Release kernel and program
+    // Dtor should be called now
+    error = clReleaseKernel(kernel);
+    RETURN_ON_CL_ERROR(error, "clReleaseKernel")
+    error = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(error, "clReleaseProgram")
+
+    // Finish
+    error = clFinish(queue);
+    RETURN_ON_CL_ERROR(error, "clFinish")
+
+    // Read output buffer
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    size_t sum = std::accumulate(output.begin(), output.end(), size_t(0));
+    if(sum != 0)
+    {
+        error = -1;
+        CHECK_ERROR_MSG(error, "Test test_dtor_ndrange failed.");
+    }
+
+    clReleaseMemObject(output_buffer);
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_DTORS_HPP
diff --git a/test_conformance/clcpp/api/test_spec_consts.hpp b/test_conformance/clcpp/api/test_spec_consts.hpp
new file mode 100644
index 00000000..1d061683
--- /dev/null
+++ b/test_conformance/clcpp/api/test_spec_consts.hpp
@@ -0,0 +1,474 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
+#define TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+#include "../common.hpp"
+
+// TEST 1
+// Verify that if left unset the specialization constant defaults to the default value set in SPIR-V (zero).
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_defaults =
+    "__kernel void test_spec_consts_defaults(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_spec_consts_defaults =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "using namespace cl;\n"
+    "spec_constant<char,  1> spec1(0);\n"
+    "spec_constant<uchar, 2> spec2(0);\n"
+    "spec_constant<short, 3> spec3(0);\n"
+    "spec_constant<ushort,4> spec4(0);\n"
+    "spec_constant<int,   5> spec5(0);\n"
+    "spec_constant<uint,  6> spec6(0);\n"
+    "spec_constant<long,  7> spec7(0);\n"
+    "spec_constant<ulong, 8> spec8(0);\n"
+    "spec_constant<float, 9> spec9(0.0f);\n"
+    "#ifdef cl_khr_fp64\n"
+    "spec_constant<double, 10> spec10(0.0);\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "spec_constant<half, 11> spec11(0.0h);\n"
+    "#endif\n"
+    "__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != char(0))   result = 1;\n"
+    "   if(get(spec2) != uchar(0))  result = 1;\n"
+    "   if(get(spec3) != short(0))  result = 1;\n"
+    "   if(get(spec4) != ushort(0)) result = 1;\n"
+    "   if(get(spec5) != int(0))    result = 1;\n"
+    "   if(get(spec6) != uint(0))   result = 1;\n"
+    "   if(get(spec7) != long(0))   result = 1;\n"
+    "   if(get(spec8) != ulong(0))  result = 1;\n"
+    "   if(get(spec9) != float(0))  result = 1;\n"
+    "#ifdef cl_khr_fp64\n"
+    "   if(get(spec10) != double(0)) result = 1;\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "   if(get(spec11) != half(0)) result = 1;\n"
+    "#endif\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_spec_consts_defaults)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    // Spec constants are NOT set before clBuildProgram (called in create_opencl_kernel), so
+    // they all should default to the default value set in SPIR-V (zero).
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_defaults, "test_spec_consts_defaults", options);
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKerne")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    // if output[0] != 0, then some spec constant(s) did not default to zero.
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_defaults failed, output[0]: %d.", output[0])
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+// TEST 2
+// Verify that setting an existing specialization constant affects only
+// the value of that constant and not of other specialization constants.
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_many_constants =
+    "__kernel void test_spec_consts_many_constants(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_spec_consts_many_constants =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "using namespace cl;\n"
+    "spec_constant<int, 1> spec1(0);\n"
+    "spec_constant<int, 2> spec2(0);\n"
+    "spec_constant<int, 3> spec3(0);\n"
+    "__kernel void test_spec_consts_defaults(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != int(-1024)) result += 1;\n"
+    "   if(get(spec2) != int(0))     result += 2;\n"
+    "   if(get(spec3) != int(1024))  result += 4;\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+AUTO_TEST_CASE(test_spec_consts_many_constants)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants", "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_many_constants);
+    RETURN_ON_ERROR(error)
+
+    // Set specialization constants
+
+    // clSetProgramSpecializationConstant(
+    //     cl_program /* program */, cl_uint /* spec_id */, size_t  /* spec_size */,const void* /* spec_value */
+    // )
+    cl_int spec1 = -1024;
+    cl_int spec3 = 1024;
+    // Set spec1
+    error = clSetProgramSpecializationConstant(program, cl_uint(1), sizeof(cl_int), static_cast<void*>(&spec1));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Specialization constant spec2 should default to zero
+    // Set spec3
+    error = clSetProgramSpecializationConstant(program, cl_uint(3), sizeof(cl_int), static_cast<void*>(&spec3));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+
+    // Build program and create kernel
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    // if output[0] != 0, then values of spec constants were incorrect
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_many_constants failed, output[0]: %d.", output[0]);
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+// TEST 3
+// Verify that the API correctly handles the size of a specialization constant by exercising
+// the API for specialization constants of different types (int, bool, float, etc.)
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const char * kernel_test_spec_consts_different_types =
+    "__kernel void test_spec_consts_different_types(global int *output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   output[gid] = 0;\n"
+    "}\n"
+;
+#else
+const char * kernel_test_spec_consts_different_types =
+    "#include <opencl_memory>\n"
+    "#include <opencl_work_item>\n"
+    "#include <opencl_spec_constant>\n"
+    "#include <opencl_limits>\n"
+    "using namespace cl;\n"
+    "spec_constant<char,  1> spec1(0);\n"
+    "spec_constant<uchar, 2> spec2(0);\n"
+    "spec_constant<short, 3> spec3(0);\n"
+    "spec_constant<ushort,4> spec4(0);\n"
+    "spec_constant<int,   5> spec5(0);\n"
+    "spec_constant<uint,  6> spec6(0);\n"
+    "spec_constant<long,  7> spec7(0);\n"
+    "spec_constant<ulong, 8> spec8(0);\n"
+    "spec_constant<float, 9> spec9(0.0f);\n"
+    "#ifdef cl_khr_fp64\n"
+    "spec_constant<double, 10> spec10(0.0);\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "spec_constant<half, 11> spec11(0.0h);\n"
+    "#endif\n"
+    "__kernel void test_spec_consts_different_types(global_ptr<int[]> output)\n"
+    "{\n"
+    "   ulong gid = get_global_id(0);\n"
+    "   int result = 0;\n"
+    "   if(get(spec1) != char(CHAR_MAX))    result += 1;\n"
+    "   if(get(spec2) != uchar(UCHAR_MAX))  result += 2;\n"
+    "   if(get(spec3) != short(SHRT_MAX))   result += 4;\n"
+    "   if(get(spec4) != ushort(USHRT_MAX)) result += 8;\n"
+    "   if(get(spec5) != int(INT_MAX))      result += 16;\n"
+    "   if(get(spec6) != uint(UINT_MAX))    result += 32;\n"
+    "   if(get(spec7) != long(LONG_MAX))    result += 64;\n"
+    "   if(get(spec8) != ulong(ULONG_MAX))  result += 128;\n"
+    "   if(get(spec9) != float(FLT_MAX))    result += 256;\n"
+    "#ifdef cl_khr_fp64\n"
+    "   if(get(spec10) != double(DBL_MAX)) result += 512;\n"
+    "#endif\n"
+    "#ifdef cl_khr_fp16\n"
+    "   if(get(spec11) != half(HALF_MAX)) result += 1024;\n"
+    "#endif\n"
+    "   output[gid] = result;\n"
+    "}\n"
+;
+#endif
+
+
+AUTO_TEST_CASE(test_spec_consts_different_types)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+
+    cl_mem output_buffer;
+    cl_program program;
+    cl_kernel kernel;
+
+    size_t dim = 1;
+    size_t work_size[1];
+
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", options);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, kernel_test_spec_consts_different_types, "test_spec_consts_different_types", "", false);
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    // Create program
+    error = create_openclcpp_program(context, &program, 1, &kernel_test_spec_consts_different_types, options.c_str());
+    RETURN_ON_ERROR(error)
+
+    // Set specialization constants
+    cl_uint spec_id = 1;
+
+    cl_char   spec1 = CL_CHAR_MAX;
+    cl_uchar  spec2 = CL_UCHAR_MAX;
+    cl_short  spec3 = CL_SHRT_MAX;
+    cl_ushort spec4 = CL_USHRT_MAX;
+    cl_int    spec5 = CL_INT_MAX;
+    cl_uint   spec6 = CL_UINT_MAX;
+    cl_long   spec7 = CL_LONG_MAX;
+    cl_ulong  spec8 = CL_ULONG_MAX;
+    cl_float  spec9 = CL_FLT_MAX;
+    cl_double spec10 = CL_DBL_MAX;
+    cl_half   spec11 = CL_HALF_MAX;
+
+    // Set spec1
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_char), static_cast<void*>(&spec1));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec2
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uchar), static_cast<void*>(&spec2));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec3
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_short), static_cast<void*>(&spec3));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec4
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ushort), static_cast<void*>(&spec4));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec5
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_int), static_cast<void*>(&spec5));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec6
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_uint), static_cast<void*>(&spec6));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec7
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_long), static_cast<void*>(&spec7));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec8
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_ulong), static_cast<void*>(&spec8));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec9
+    error = clSetProgramSpecializationConstant(program, spec_id++, sizeof(cl_float), static_cast<void*>(&spec9));
+    RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    // Set spec10
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = clSetProgramSpecializationConstant(program, cl_uint(10), sizeof(cl_double), static_cast<void*>(&spec10));
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+    // Set spec11
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        error = clSetProgramSpecializationConstant(program, cl_uint(11), sizeof(cl_half), static_cast<void*>(&spec11));
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+
+    // Build program and create kernel
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &kernel_test_spec_consts_many_constants, "test_spec_consts_many_constants"
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    // Copy output to output_buffer, run kernel, copy output_buffer back to output, check result
+
+    // host vector, size == 1, output[0] == 1
+    std::vector<cl_int> output(1, cl_int(1));
+    output_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = output.size();
+    error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(cl_int) * output.size(), static_cast<void *>(output.data()), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    // if output[0] != 0, then some spec constants had incorrect values
+    if(output[0] != 0)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Test test_spec_consts_different_types failed, output[0]: %d.", output[0])
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_API_TEST_SPEC_CONSTS_HPP
diff --git a/test_conformance/clcpp/atomics/CMakeLists.txt b/test_conformance/clcpp/atomics/CMakeLists.txt
new file mode 100644
index 00000000..7f77110c
--- /dev/null
+++ b/test_conformance/clcpp/atomics/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_ATOMICS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/atomics/atomic_fetch.hpp b/test_conformance/clcpp/atomics/atomic_fetch.hpp
new file mode 100644
index 00000000..5618375e
--- /dev/null
+++ b/test_conformance/clcpp/atomics/atomic_fetch.hpp
@@ -0,0 +1,306 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
+#define TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+
+const size_t atomic_bucket_size = 100;
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class type>
+std::string generate_kernel_atomic_fetch(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
+    std::string function_call = "atomic_" + func.str() + "(&" + out1_value + ", " + in1_value + ")";
+    return
+        "" + func.defs() +
+        "__kernel void test_" + func.str() + "(global " + type_name<type>() + " *input, global atomic_" + type_name<type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class type>
+std::string generate_kernel_atomic_fetch(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string out1_value = "output[gid / " + std::to_string(atomic_bucket_size) + "]";
+    std::string function_call = func.str() + "(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<type>() +  "[]> input,"
+                                              "global_ptr<atomic<" + type_name<type>() + ">[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + out1_value + "." + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class TYPE, class atomic_fetch>
+bool verify_atomic_fetch(const std::vector<TYPE> &in, const std::vector<TYPE> &out, atomic_fetch op)
+{
+    for (size_t i = 0; i < out.size(); i++)
+    {
+        TYPE expected = op.init_out();
+        for (size_t k = 0; k < atomic_bucket_size; k++)
+        {
+            const size_t in_i = i * atomic_bucket_size + k;
+            if (in_i >= in.size())
+                break;
+            expected = op(expected, in[in_i]);
+        }
+        if (expected != out[i])
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class atomic_fetch>
+int test_atomic_fetch_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, atomic_fetch op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename atomic_fetch::in_type TYPE;
+
+    // Don't run test for unsupported types
+    if (!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+    if (sizeof(TYPE) == 8 &&
+        (!is_extension_available(device, "cl_khr_int64_base_atomics") ||
+         !is_extension_available(device, "cl_khr_int64_extended_atomics")))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_atomic_fetch<atomic_fetch, TYPE>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<TYPE> input = generate_input<TYPE>(count, op.min1(), op.max1(), std::vector<TYPE>());
+    std::vector<TYPE> output = generate_output<TYPE>((count - 1) / atomic_bucket_size + 1);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+
+    const TYPE pattern = op.init_out();
+    err = clEnqueueFillBuffer(queue, buffers[1], &pattern, sizeof(pattern), 0, sizeof(TYPE) * output.size(), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueFillBuffer")
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+    err = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
+
+    if (!verify_atomic_fetch(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s failed", op.str().c_str(), type_name<TYPE>().c_str());
+    }
+    log_info("test_%s %s passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+
+template<class TYPE>
+struct atomic_fetch
+{
+    typedef TYPE in_type;
+
+    std::string decl_str()
+    {
+        return type_name<TYPE>();
+    }
+
+    std::string defs()
+    {
+        std::string defs;
+        if (sizeof(TYPE) == 8)
+        {
+            defs += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
+            defs += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
+        }
+        return defs;
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_atomic>\n";
+    }
+
+    TYPE min1()
+    {
+        return 0;
+    }
+
+    TYPE max1()
+    {
+        return 1000;
+    }
+};
+
+
+#define DEF_ATOMIC_FETCH_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION, INIT_OUT) \
+template<class TYPE> \
+struct CLASS_NAME : public atomic_fetch<TYPE> \
+{ \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    TYPE init_out() \
+    { \
+        return INIT_OUT; \
+    } \
+    \
+    TYPE operator()(const TYPE& x, const TYPE& y) \
+    { \
+        return HOST_FUNC_EXPRESSION; \
+    } \
+};
+
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_add, fetch_add, x + y, 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_sub, fetch_sub, x - y, (std::numeric_limits<TYPE>::max)())
+
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_and, fetch_and, x & y, (std::numeric_limits<TYPE>::max)())
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_or,  fetch_or,  x | y, 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_xor, fetch_xor, x ^ y, 0)
+
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_max, fetch_max, (std::max)(x, y), 0)
+DEF_ATOMIC_FETCH_FUNC(atomic_fetch_min, fetch_min, (std::min)(x, y), (std::numeric_limits<TYPE>::max)())
+
+#undef DEF_ATOMIC_FETCH_FUNC
+
+
+AUTO_TEST_CASE(test_atomic_fetch)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_ATOMIC_MACRO(TEST_CLASS) \
+    last_error = test_atomic_fetch_func( \
+        device, context, queue, n_elems, TEST_CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_add<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_sub<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_and<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_or<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_xor<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_max<cl_ulong>()))
+
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_int>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_uint>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_long>()))
+    TEST_ATOMIC_MACRO((atomic_fetch_min<cl_ulong>()))
+
+#undef TEST_ATOMIC_MACRO
+
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_ATOMICS_ATOMIC_FETCH_HPP
diff --git a/test_conformance/clcpp/atomics/main.cpp b/test_conformance/clcpp/atomics/main.cpp
new file mode 100644
index 00000000..0cf3d77d
--- /dev/null
+++ b/test_conformance/clcpp/atomics/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "atomic_fetch.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/attributes/CMakeLists.txt b/test_conformance/clcpp/attributes/CMakeLists.txt
new file mode 100644
index 00000000..eee2785f
--- /dev/null
+++ b/test_conformance/clcpp/attributes/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_ATTRIBUTES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/attributes/main.cpp b/test_conformance/clcpp/attributes/main.cpp
new file mode 100644
index 00000000..cd834a71
--- /dev/null
+++ b/test_conformance/clcpp/attributes/main.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_ivdep.hpp"
+#include "test_max_size.hpp"
+#include "test_required_num_sub_groups.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/attributes/test_ivdep.hpp b/test_conformance/clcpp/attributes/test_ivdep.hpp
new file mode 100644
index 00000000..17b1f586
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_ivdep.hpp
@@ -0,0 +1,418 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
+#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_ivdep {
+
+enum class loop_kind
+{
+    for_loop,
+    while_loop,
+    do_loop
+};
+
+struct test_options
+{
+    loop_kind loop;
+    int ivdep_length;
+    int offset1;
+    int offset2;
+    int iter_count;
+    bool offset1_param;
+    bool offset2_param;
+    bool iter_count_param;
+    bool cond_in_header;
+    bool init_in_header;
+    bool incr_in_header;
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
+    std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
+
+    std::string init = "i = 0";
+    std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
+    std::string incr = "i += 2";
+
+    std::stringstream s;
+    s << R"(
+    kernel void test(global int *a, global int *b, global int *c, int offset1, int offset2, int iter_count)
+    {
+        int i;
+    )";
+
+    // Loop #1
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    // Loop #2
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    s << "}" << std::endl;
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::string offset1s = options.offset1_param ? "offset1" : std::to_string(options.offset1);
+    std::string offset2s = options.offset2_param ? "offset2" : std::to_string(options.offset2);
+
+    std::string init = "i = 0";
+    std::string cond = std::string("i < ") + (options.iter_count_param ? "iter_count" : std::to_string(options.iter_count));
+    std::string incr = "i += 2";
+
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+
+    using namespace cl;
+    )";
+    s << R"(
+    kernel void test(global_ptr<int[]> a, global_ptr<int[]> b, global_ptr<int[]> c, int offset1, int offset2, int iter_count)
+    {
+        int i;
+    )";
+
+    // Loop #1
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.ivdep_length > 0) s << "[[cl::ivdep]]" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset1s << "] = b[i + " << offset1s << "] * c[i + " << offset1s << "];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    // Loop #2
+    if (!options.init_in_header) s << init << ";" << std::endl;
+    if (options.ivdep_length > 0) s << "[[cl::ivdep(" << options.ivdep_length << ")]]" << std::endl;
+    if (options.loop == loop_kind::for_loop)
+        s << "for (" <<
+            (options.init_in_header ? init : "") << ";" <<
+            (options.cond_in_header ? cond : "") << ";" <<
+            (options.incr_in_header ? incr : "") << ")";
+    else if (options.loop == loop_kind::while_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ")";
+    else if (options.loop == loop_kind::do_loop)
+        s << "do";
+    s << "{" << std::endl;
+    if (!options.cond_in_header) s << "if (!(" << cond << ")) break;" << std::endl;
+    s << "a[i + " << offset2s << "] = a[i] + b[i];" << std::endl;
+    if (!options.incr_in_header) s << incr << ";" << std::endl;
+    s << "}" << std::endl;
+    if (options.loop == loop_kind::do_loop)
+        s << "while (" << (options.cond_in_header ? cond : "true") << ");" << std::endl;
+
+    s << "}" << std::endl;
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    const size_t count = 100;
+    const size_t global_size = 1;
+
+    std::vector<int> a(count);
+    std::vector<int> b(count);
+    std::vector<int> c(count);
+    for (size_t i = 0; i < count; i++)
+    {
+        a[i] = 0;
+        b[i] = i;
+        c[i] = 1;
+    }
+
+    cl_mem a_buffer;
+    a_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(a.data()), &error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    cl_mem b_buffer;
+    b_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(b.data()), &error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    cl_mem c_buffer;
+    c_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        sizeof(int) * count, static_cast<void *>(c.data()),&error
+    );
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 3, sizeof(cl_int), &options.offset1);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 4, sizeof(cl_int), &options.offset2);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 5, sizeof(cl_int), &options.iter_count);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    std::vector<int> a_output(count);
+    error = clEnqueueReadBuffer(
+        queue, a_buffer, CL_TRUE,
+        0, sizeof(int) * count,
+        static_cast<void *>(a_output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    for (int i = 0; i < options.iter_count; i += 2)
+    {
+        a[i + options.offset1] = b[i + options.offset1] * c[i + options.offset1];
+    }
+
+    for (int i = 0; i < options.iter_count; i += 2)
+    {
+        a[i + options.offset2] = a[i] + b[i];
+    }
+
+    for (size_t i = 0; i < count; i++)
+    {
+        const int value = a_output[i];
+        const int expected = a[i];
+        if (value != expected)
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "Test failed. Element %lu: %d should be: %d",
+                i, value, expected
+            );
+        }
+    }
+
+    clReleaseMemObject(a_buffer);
+    clReleaseMemObject(b_buffer);
+    clReleaseMemObject(c_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+const std::vector<std::tuple<int, int, int>> params{
+    std::make_tuple<int, int, int>( -1, 0, 0 ),
+    std::make_tuple<int, int, int>( -1, 3, 4 ),
+    std::make_tuple<int, int, int>( 1, 1, 1 ),
+    std::make_tuple<int, int, int>( 3, 4, 2 ),
+    std::make_tuple<int, int, int>( 3, 4, 3 ),
+    std::make_tuple<int, int, int>( 8, 10, 7 ),
+    std::make_tuple<int, int, int>( 16, 16, 16 )
+};
+const std::vector<int> iter_counts{ { 1, 4, 12, 40 } };
+
+AUTO_TEST_CASE(test_ivdep_for)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    for (bool init_in_header : { false, true })
+    for (bool incr_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::for_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = init_in_header;
+        options.incr_in_header = incr_in_header;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_ivdep_while)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::while_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = false;
+        options.incr_in_header = false;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_ivdep_do)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    for (auto param : params)
+    for (auto iter_count : iter_counts)
+    for (bool offset1_param : { false, true })
+    for (bool offset2_param : { false, true })
+    for (bool iter_count_param : { false, true })
+    for (bool cond_in_header : { false, true })
+    {
+        test_options options;
+        options.loop = loop_kind::do_loop;
+        options.ivdep_length = std::get<0>(param);
+        options.offset1 = std::get<1>(param);
+        options.offset2 = std::get<2>(param);
+        options.iter_count = iter_count;
+        options.offset1_param = offset1_param;
+        options.offset2_param = offset2_param;
+        options.iter_count_param = iter_count_param;
+        options.cond_in_header = cond_in_header;
+        options.init_in_header = false;
+        options.incr_in_header = false;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_IVDEP_HPP
diff --git a/test_conformance/clcpp/attributes/test_max_size.hpp b/test_conformance/clcpp/attributes/test_max_size.hpp
new file mode 100644
index 00000000..15e7ead6
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_max_size.hpp
@@ -0,0 +1,266 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
+#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_max_size {
+
+enum class address_space
+{
+    constant,
+    local
+};
+
+enum class param_kind
+{
+    ptr_type, // constant_ptr<T>
+    ptr,      // constant<T>*
+    ref       // constant<T>&
+};
+
+const param_kind param_kinds[] =
+{
+    param_kind::ptr_type,
+    param_kind::ptr,
+    param_kind::ref
+};
+
+struct test_options
+{
+    address_space space;
+    int max_size;
+    bool spec_const;
+    param_kind kind;
+    bool array;
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << "kernel void test(";
+    s << (options.space == address_space::constant ? "constant" : "local");
+    s << " int2 *input) { }" << std::endl;
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::string type_str = "int2";
+    if (options.array)
+        type_str += "[]";
+
+    std::stringstream s;
+    s << "#include <opencl_memory>" << std::endl;
+
+    if (options.spec_const)
+    {
+        s << "#include <opencl_spec_constant>" << std::endl;
+        s << "cl::spec_constant<int, 1> max_size_spec{ 1234567890 };" << std::endl;
+    }
+
+    s << "kernel void test(";
+    s << "[[cl::max_size(" << (options.spec_const ? "max_size_spec" : std::to_string(options.max_size)) << ")]] ";
+    s << (options.space == address_space::constant ? "cl::constant" : "cl::local");
+    if (options.kind == param_kind::ptr_type)
+        s << "_ptr<" << type_str << ">";
+    else if (options.kind == param_kind::ptr)
+        s << "<" << type_str << ">*";
+    else if (options.kind == param_kind::ref)
+        s << "<" << type_str << ">&";
+    s << " input) { }" << std::endl;
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    const char *source_c_str = source.c_str();
+    error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
+    RETURN_ON_ERROR(error)
+
+    if (options.spec_const)
+    {
+        error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_int), &options.max_size);
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    const int max_size = options.max_size;
+    const int sizes[] = {
+        1,
+        max_size / 2,
+        max_size,
+        max_size + 1,
+        max_size * 2
+    };
+
+    for (int size : sizes)
+    {
+        cl_mem const_buffer;
+        if (options.space == address_space::constant)
+        {
+            const_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &error);
+            RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+            error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &const_buffer);
+            // Check the status later (depending on size and max_size values)
+        }
+        else if (options.space == address_space::local)
+        {
+            error = clSetKernelArg(kernel, 0, size, NULL);
+            // Check the status later (depending on size and max_size values)
+        }
+
+        if (size <= max_size)
+        {
+            // Correct value, must not fail
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            const size_t global_size = 123;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+            error = clFinish(queue);
+            RETURN_ON_CL_ERROR(error, "clFinish")
+        }
+        else
+        {
+            // Incorrect value, must fail
+            if (error != CL_MAX_SIZE_RESTRICTION_EXCEEDED)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "clSetKernelArg must fail with CL_MAX_SIZE_RESTRICTION_EXCEEDED,"
+                    " but returned %s (%d)", get_cl_error_string(error).c_str(), error
+                );
+            }
+        }
+
+        if (options.space == address_space::constant)
+        {
+            error = clReleaseMemObject(const_buffer);
+            RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_max_size_constant)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    cl_ulong max_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(max_size), &max_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    for (bool spec_const : { false, true })
+    for (auto kind : param_kinds)
+    for (bool array : { false, true })
+    {
+        test_options options;
+        options.space = address_space::constant;
+        options.max_size = max_size / 2;
+        options.spec_const = spec_const;
+        options.kind = kind;
+        options.array = array;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_max_size_local)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    cl_ulong max_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_size), &max_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    for (bool spec_const : { false, true })
+    for (auto kind : param_kinds)
+    for (bool array : { false, true })
+    {
+        test_options options;
+        options.space = address_space::local;
+        options.max_size = max_size / 2;
+        options.spec_const = spec_const;
+        options.kind = kind;
+        options.array = array;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_MAX_SIZE_HPP
diff --git a/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp b/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp
new file mode 100644
index 00000000..2380eafe
--- /dev/null
+++ b/test_conformance/clcpp/attributes/test_required_num_sub_groups.hpp
@@ -0,0 +1,285 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
+#define TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_required_num_sub_groups {
+
+struct test_options
+{
+    size_t num_sub_groups;
+    bool spec_const;
+    size_t max_count;
+    size_t num_tests;
+};
+
+struct output_type
+{
+    cl_ulong num_sub_groups;
+    cl_ulong enqueued_num_sub_groups;
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    ulong num_sub_groups;
+    ulong enqueued_num_sub_groups;
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+    kernel void test(global struct output_type *output)
+    {
+        const ulong gid = get_global_linear_id();
+        output[gid].num_sub_groups = get_num_sub_groups();
+        output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+    }
+    )";
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    using namespace cl;
+    )";
+
+    if (options.spec_const)
+    {
+        s << "#include <opencl_spec_constant>" << std::endl;
+        s << "cl::spec_constant<uint, 1> num_sub_groups_spec{ 1234567890 };" << std::endl;
+    }
+
+    s << source_common << std::endl;
+    s << "[[cl::required_num_sub_groups(" << (options.spec_const ? "num_sub_groups_spec" : std::to_string(options.num_sub_groups)) << ")]]";
+    s << R"(
+    kernel void test(global_ptr<output_type[]> output)
+    {
+        const ulong gid = get_global_linear_id();
+        output[gid].num_sub_groups = get_num_sub_groups();
+        output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+    }
+    )";
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (!is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    const char *source_c_str = source.c_str();
+    error = create_openclcpp_program(context, &program, 1, &source_c_str, "");
+    RETURN_ON_ERROR(error)
+
+    if (options.spec_const)
+    {
+        cl_uint spec_num_sub_groups = static_cast<cl_uint>(options.num_sub_groups);
+        error = clSetProgramSpecializationConstant(program, 1, sizeof(cl_uint), &spec_num_sub_groups);
+        RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+    }
+
+    error = build_program_create_kernel_helper(
+        context, &program, &kernel, 1, &source_c_str, kernel_name.c_str()
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t compile_num_sub_groups;
+    error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
+        0, NULL,
+        sizeof(size_t), &compile_num_sub_groups, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+    if (compile_num_sub_groups != options.num_sub_groups)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "CL_KERNEL_COMPILE_NUM_SUB_GROUPS did not return correct value (expected %lu, got %lu)",
+            options.num_sub_groups, compile_num_sub_groups
+        )
+    }
+
+    cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * options.max_count, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
+
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        for (size_t dim = 1; dim <= 3; dim++)
+        {
+            size_t global_size[3] = { 1, 1, 1 };
+            size_t count = count_dis(gen);
+            std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
+            for (size_t d = 0; d < dim; d++)
+            {
+                global_size[d] = global_size_dis(gen);
+            }
+            count = global_size[0] * global_size[1] * global_size[2];
+
+            size_t local_size[3] = { 1, 1, 1 };
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
+                sizeof(size_t), &options.num_sub_groups,
+                sizeof(size_t) * dim, local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT did not return correct value"
+                )
+            }
+
+            size_t sub_group_count_for_ndrange;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                sizeof(size_t) * dim, local_size,
+                sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (sub_group_count_for_ndrange != options.num_sub_groups)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected %lu, got %lu)",
+                    options.num_sub_groups, sub_group_count_for_ndrange
+                )
+            }
+
+            const char pattern = 0;
+            error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+            error = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, global_size, local_size, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+            std::vector<output_type> output(count);
+            error = clEnqueueReadBuffer(
+                queue, output_buffer, CL_TRUE,
+                0, sizeof(output_type) * count,
+                static_cast<void *>(output.data()),
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+            for (size_t gid = 0; gid < count; gid++)
+            {
+                const output_type &o = output[gid];
+
+                if (o.enqueued_num_sub_groups != options.num_sub_groups)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "get_enqueued_num_sub_groups does not equal to required_num_sub_groups")
+                }
+                if (o.num_sub_groups > options.num_sub_groups)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "get_num_sub_groups did not return correct value")
+                }
+            }
+        }
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_required_num_sub_groups)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+
+    cl_uint max_num_sub_groups;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_NUM_SUB_GROUPS, sizeof(max_num_sub_groups), &max_num_sub_groups, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    for (bool spec_const : { false, true })
+    for (size_t num_sub_groups = 1; num_sub_groups <= max_num_sub_groups; num_sub_groups++)
+    {
+        test_options options;
+        options.spec_const = spec_const;
+        options.num_sub_groups = num_sub_groups;
+        options.num_tests = 100;
+        options.max_count = num_elements;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_ATTRIBUTES_TEST_REQUIRED_NUM_SUB_GROUPS_HPP
diff --git a/test_conformance/clcpp/common.hpp b/test_conformance/clcpp/common.hpp
new file mode 100644
index 00000000..ff92f029
--- /dev/null
+++ b/test_conformance/clcpp/common.hpp
@@ -0,0 +1,51 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
+#define TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cmath>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+// harness framework
+#include "../../test_common/harness/compat.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+
+// autotest
+#include "../../test_common/autotest/autotest.hpp"
+
+// utils_common
+#include "utils_common/is_vector_type.hpp"
+#include "utils_common/scalar_type.hpp"
+#include "utils_common/make_vector_type.hpp"
+#include "utils_common/type_name.hpp"
+#include "utils_common/type_supported.hpp"
+#include "utils_common/vector_size.hpp"
+#include "utils_common/kernel_helpers.hpp"
+#include "utils_common/errors.hpp"
+#include "utils_common/string.hpp"
+
+size_t get_uniform_global_size(size_t global_size, size_t local_size)
+{
+    return static_cast<size_t>(std::ceil(static_cast<double>(global_size) / local_size)) * local_size;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_COMMON_INC_HPP
diff --git a/test_conformance/clcpp/common_funcs/CMakeLists.txt b/test_conformance/clcpp/common_funcs/CMakeLists.txt
new file mode 100644
index 00000000..bc192a5c
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_COMMON_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/common_funcs/common_funcs.hpp b/test_conformance/clcpp/common_funcs/common_funcs.hpp
new file mode 100644
index 00000000..d6f8c897
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/common_funcs.hpp
@@ -0,0 +1,417 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+#include <algorithm>
+
+// floatn clamp(floatn x, floatn min, floatn max) (only scalars)
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "clamp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::min)((std::max)(x, minval), maxval);
+    }
+
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(4000.0f);
+    }
+
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(4000.0f));
+    }
+
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)() / IN3(2000.0f);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn degrees(floatn t)
+template<class IN1, class OUT1, class REFERENCE>
+struct common_func_degrees : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "degrees";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    REFERENCE operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        return (REFERENCE(180.0) / CL_M_PI) * static_cast<REFERENCE>(x);
+    }
+
+    float ulp()
+    {
+        return 2.5f;
+    }
+};
+
+// floatn max(floatn x, floatn y)
+template<class IN1, class IN2, class OUT1>
+struct common_func_max : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "max";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::max)(x, y);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn min(floatn x, floatn y)
+template<class IN1, class IN2, class OUT1>
+struct common_func_min : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "min";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        return (std::min)(x, y);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn mix(floatn x, floatn y, floatn a);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_mix : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mix";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& a)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        return static_cast<double>(x) + ((static_cast<double>(y) - static_cast<double>(x)) * static_cast<double>(a));
+    }
+
+    IN3 min3()
+    {
+        return IN3(0.0f + CL_FLT_EPSILON);
+    }
+
+    IN3 max3()
+    {
+        return IN3(1.0f - CL_FLT_EPSILON);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+};
+
+// floatn radians(floatn t)
+template<class IN1, class OUT1, class REFERENCE>
+struct common_func_radians : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "radians";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    REFERENCE operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        return (CL_M_PI / REFERENCE(180.0)) * static_cast<REFERENCE>(x);
+    }
+
+    float ulp()
+    {
+        return 2.5f;
+    }
+};
+
+// floatn step(floatn edge, floatn x)
+template<class IN1, class IN2, class OUT1>
+struct common_func_step : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "step";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& edge, const IN2& x)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x < edge)
+            return OUT1(0.0f);
+        return OUT1(1.0f);
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+};
+
+// floatn smoothstep(floatn edge0, floatn edge1, floatn x);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct common_func_smoothstep : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "smoothstep";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& edge0, const IN2& edge1, const IN3& x)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x <= edge0)
+        {
+            return OUT1(0.0f);
+        }
+        if(x >= edge1)
+        {
+            return OUT1(1.0f);
+        }
+        OUT1 t = (x - edge0) / (edge1 - edge0);
+        t = t * t * (3.0f - 2.0f * t);
+        return t;
+    }
+
+    // edge0 must be < edge1
+    IN1 min1()
+    {
+        return (std::numeric_limits<IN1>::min)();
+    }
+
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() / IN1(8000.0f);
+    }
+
+    IN2 min2()
+    {
+        return IN3(1) + ((std::numeric_limits<IN2>::max)() / IN2(4000.0f));
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2000.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+};
+
+// floatn sign(floatn t)
+template<class IN1, class OUT1>
+struct common_func_sign : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "sign";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_common>\n";
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "All types must be the same"
+        );
+        if(x == IN1(-0.0f))
+        {
+            return IN1(-0.0f);
+        }
+        if(x == IN1(+0.0f))
+        {
+            return IN1(+0.0f);
+        }
+        if(x > IN1(0.0f))
+        {
+            return IN1(1.0f);
+        }
+        return IN1(-1.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+
+    float ulp()
+    {
+        return 0.0f;
+    }
+
+    std::vector<IN1> in_special_cases()
+    {
+        return { -0.0f, +0.0f };
+    }
+};
+
+AUTO_TEST_CASE(test_common_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // floatn clamp(floatn x, floatn min, floatn max)
+    TEST_TERNARY_FUNC_MACRO((common_func_clamp<cl_float, cl_float, cl_float, cl_float>()))  
+
+    // floatn degrees(floatn t)
+    TEST_UNARY_FUNC_MACRO((common_func_degrees<cl_float, cl_float, cl_double>()))  
+        
+    // floatn max(floatn x, floatn y);
+    TEST_BINARY_FUNC_MACRO((common_func_max<cl_float, cl_float, cl_float>()))
+
+    // floatn min(floatn x, floatn y);
+    TEST_BINARY_FUNC_MACRO((common_func_min<cl_float, cl_float, cl_float>()))
+   
+    // floatn mix(floatn x, floatn y, floatn a);
+    TEST_TERNARY_FUNC_MACRO((common_func_mix<cl_float, cl_float, cl_float, cl_float>()))
+
+    // floatn radians(floatn t)
+    TEST_UNARY_FUNC_MACRO((common_func_radians<cl_float, cl_float, cl_double>()))
+
+    // floatn step(floatn edge, floatn x)
+    TEST_BINARY_FUNC_MACRO((common_func_step<cl_float, cl_float, cl_float>()))
+
+    // floatn smoothstep(floatn edge0, floatn edge1, floatn x)
+    TEST_TERNARY_FUNC_MACRO((common_func_smoothstep<cl_float, cl_float, cl_float, cl_float>()))
+
+    // floatn sign(floatn t);
+    TEST_UNARY_FUNC_MACRO((common_func_sign<cl_float, cl_float>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_COMMON_FUNCS_COMMON_FUNCS_HPP
diff --git a/test_conformance/clcpp/common_funcs/main.cpp b/test_conformance/clcpp/common_funcs/main.cpp
new file mode 100644
index 00000000..ff9443ec
--- /dev/null
+++ b/test_conformance/clcpp/common_funcs/main.cpp
@@ -0,0 +1,48 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <limits>
+
+#include "../common.hpp"
+
+#include "common_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/convert/CMakeLists.txt b/test_conformance/clcpp/convert/CMakeLists.txt
new file mode 100644
index 00000000..4fb956e5
--- /dev/null
+++ b/test_conformance/clcpp/convert/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_CONVERT)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/convert/convert_cast.hpp b/test_conformance/clcpp/convert/convert_cast.hpp
new file mode 100644
index 00000000..81fcca63
--- /dev/null
+++ b/test_conformance/clcpp/convert/convert_cast.hpp
@@ -0,0 +1,309 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
+#define TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <functional>
+
+
+enum class rounding_mode
+{
+    def,
+    /*rte, not implemented here */
+    rtz,
+    rtp,
+    rtn
+};
+
+enum class saturate { def, off, on };
+
+std::string rounding_mode_name(rounding_mode rmode)
+{
+    switch (rmode)
+    {
+        case rounding_mode::rtz: return "rtz";
+        case rounding_mode::rtp: return "rtp";
+        case rounding_mode::rtn: return "rtn";
+        default: return "";
+    }
+}
+
+std::string saturate_name(saturate smode)
+{
+    switch (smode)
+    {
+        case saturate::off: return "off";
+        case saturate::on:  return "on";
+        default: return "";
+    }
+}
+
+template<class T>
+T clamp(T x, T a, T b)
+{
+    return (std::min)(b, (std::max)(a, x));
+}
+
+template<class IN1, class OUT1>
+struct convert_cast : public unary_func<IN1, OUT1>
+{
+    static_assert(vector_size<IN1>::value == vector_size<OUT1>::value, "The operand and result type must have the same number of elements");
+
+    typedef typename scalar_type<IN1>::type in_scalar_type;
+    typedef typename scalar_type<OUT1>::type out_scalar_type;
+
+    in_scalar_type in_min;
+    in_scalar_type in_max;
+    rounding_mode rmode;
+    saturate smode;
+
+    convert_cast(in_scalar_type min, in_scalar_type max, rounding_mode rmode, saturate smode)
+        : in_min(min), in_max(max), rmode(rmode), smode(smode)
+    {
+    }
+
+    std::string str()
+    {
+        return "convert_cast";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_convert>\n";
+    }
+
+    IN1 min1()
+    {
+        return detail::def_limit<IN1>(in_min);
+    }
+
+    IN1 max1()
+    {
+        return detail::def_limit<IN1>(in_max);
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        OUT1 y;
+        for (size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            in_scalar_type v;
+            if (smode == saturate::on)
+                v = clamp(x.s[i],
+                    static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::min)()),
+                    static_cast<in_scalar_type>((std::numeric_limits<out_scalar_type>::max)())
+                );
+            else
+                v = x.s[i];
+
+            if (std::is_integral<out_scalar_type>::value)
+            {
+                switch (rmode)
+                {
+                    case rounding_mode::rtp:
+                        y.s[i] = static_cast<out_scalar_type>(std::ceil(v));
+                        break;
+                    case rounding_mode::rtn:
+                        y.s[i] = static_cast<out_scalar_type>(std::floor(v));
+                        break;
+                    default:
+                        y.s[i] = static_cast<out_scalar_type>(v);
+                }
+            }
+            else
+            {
+                y.s[i] = static_cast<out_scalar_type>(v);
+            }
+        }
+        return y;
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_convert_cast(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string function_call = "convert_" + type_name<out_type>();
+    if (func.smode == saturate::on)
+        function_call += "_sat";
+    if (func.rmode != rounding_mode::def)
+        function_call += "_" + rounding_mode_name(func.rmode);
+    function_call += "(" + in1_value + ")";
+    return
+        "__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_convert_cast(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    std::string function_call = "convert_cast<" + type_name<out_type>();
+    if (func.rmode != rounding_mode::def)
+        function_call += ", rounding_mode::" + rounding_mode_name(func.rmode);
+    if (func.smode != saturate::def)
+        function_call += ", saturate::" + saturate_name(func.smode);
+    function_call += ">(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template <class convert_cast_op>
+int test_convert_cast_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, convert_cast_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int error;
+
+    typedef typename convert_cast_op::in_type INPUT;
+    typedef typename convert_cast_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_convert_cast<convert_cast_op, INPUT, OUTPUT>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(error)
+#else
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    if (!verify_unary(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_convert_cast)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_CONVERT_CAST_MACRO(OP) \
+    last_error = test_convert_cast_func( \
+        device, context, queue, n_elems, OP \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    // No-op
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_float2>(-100.0f, +100.0f, rounding_mode::rtn, saturate::def)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar2, cl_uchar2>(0, 255, rounding_mode::def, saturate::def)))
+
+    // int to int
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_int4, cl_short4>(40000, 40000, rounding_mode::def, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_uchar8, cl_char8>(0, 127, rounding_mode::def, saturate::off)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_char8, cl_int8>(-100, 100, rounding_mode::def, saturate::off)))
+
+    // float to int
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float2, cl_uchar2>(-100.0f, +400.0f, rounding_mode::def, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_double4, cl_char4>(-127.0, +127.0, rounding_mode::rtp, saturate::off)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float8, cl_uint8>(-1000.0f, +10000.0f, rounding_mode::rtp, saturate::on)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_float16, cl_ushort16>(-10000.0f, +70000.0f, rounding_mode::rtn, saturate::on)))
+
+    // int to float
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_short8, cl_float8>(0, 12345, rounding_mode::def, saturate::def)))
+    TEST_CONVERT_CAST_MACRO((convert_cast<cl_long2, cl_float2>(-1000000, +1000000, rounding_mode::rtz, saturate::def)))
+
+#undef TEST_CONVERT_CAST_MACRO
+
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_CONVERT_CONVERT_CAST_HPP
diff --git a/test_conformance/clcpp/convert/main.cpp b/test_conformance/clcpp/convert/main.cpp
new file mode 100644
index 00000000..979156bd
--- /dev/null
+++ b/test_conformance/clcpp/convert/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "convert_cast.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/device_queue/CMakeLists.txt b/test_conformance/clcpp/device_queue/CMakeLists.txt
new file mode 100644
index 00000000..d9260c2c
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_DEVICE_QUEUE)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/device_queue/main.cpp b/test_conformance/clcpp/device_queue/main.cpp
new file mode 100644
index 00000000..5d754b78
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_enqueue.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/device_queue/test_enqueue.hpp b/test_conformance/clcpp/device_queue/test_enqueue.hpp
new file mode 100644
index 00000000..f5d4e6dc
--- /dev/null
+++ b/test_conformance/clcpp/device_queue/test_enqueue.hpp
@@ -0,0 +1,699 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
+#define TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_enqueue {
+
+struct test_options
+{
+    int test;
+};
+
+struct output_type
+{
+    cl_int enqueue_kernel1_success;
+    cl_int enqueue_kernel2_success;
+    cl_int enqueue_kernel3_success;
+    cl_int enqueue_marker_success;
+    cl_int event1_is_valid;
+    cl_int event2_is_valid;
+    cl_int event3_is_valid;
+    cl_int user_event1_is_valid;
+    cl_int user_event2_is_valid;
+    cl_int values[10000];
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    int enqueue_kernel1_success;
+    int enqueue_kernel2_success;
+    int enqueue_kernel3_success;
+    int enqueue_marker_success;
+    int event1_is_valid;
+    int event2_is_valid;
+    int event3_is_valid;
+    int user_event1_is_valid;
+    int user_event2_is_valid;
+    int values[10000];
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    if (options.test == 0)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel2_success = 1;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event2_is_valid = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0));
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+
+        release_event(event1);
+    }
+    )";
+    }
+    else if (options.test == 1)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
+        clk_event_t event2;
+        int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 1, &event1, &event2,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 1;
+        });
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+
+        release_event(event1);
+        release_event(event2);
+    }
+    )";
+    }
+    else if (options.test == 2)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->enqueue_kernel3_success = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        clk_event_t user_event1 = create_user_event();
+        retain_event(user_event1);
+        output->user_event1_is_valid = is_valid_event(user_event1);
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange1, 1, &user_event1, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+        release_event(user_event1);
+
+        clk_event_t user_event2 = create_user_event();
+        output->user_event2_is_valid = is_valid_event(user_event2);
+
+        clk_event_t events[2];
+        events[0] = user_event2;
+        events[1] = user_event1;
+
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, get_local_size(0));
+        clk_event_t event2;
+        int status2 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange2, 2, events, &event2,
+        ^(local void *p0, local void *p1, local void *p2) {
+            const ulong gid = get_global_id(0);
+            const ulong lid = get_local_id(0);
+            local int2 *l0 = (local int2 *)p0;
+            local int *l1 = (local int *)p1;
+            local int *l2 = (local int *)p2;
+            l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+            if (lid < 5) l0[lid] = (int2)(3, 4);
+            if (lid < 3) l2[lid] = 5;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+            output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
+        }, sizeof(int2) * 5, sizeof(int) * get_local_size(0), sizeof(int) * 3);
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+
+        set_user_event_status(user_event1, CL_COMPLETE);
+        set_user_event_status(user_event2, CL_COMPLETE);
+
+        release_event(user_event1);
+        release_event(user_event2);
+        release_event(event1);
+        release_event(event2);
+    }
+    )";
+    }
+    else if (options.test == 3)
+    {
+        s << R"(
+    kernel void test(queue_t queue, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->user_event2_is_valid = 1;
+
+        queue_t default_queue = get_default_queue();
+
+        ndrange_t ndrange1 = ndrange_1D(get_global_size(0) / 2);
+        clk_event_t event1;
+        int status1 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange1, 0, NULL, &event1,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 20;
+        });
+        output->enqueue_kernel1_success = status1 == CLK_SUCCESS;
+        output->event1_is_valid = is_valid_event(event1);
+
+        ndrange_t ndrange2 = ndrange_1D(1, get_global_size(0) / 2, 1);
+        clk_event_t event2;
+        int status2 = enqueue_kernel(queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange2, 0, NULL, &event2,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 20;
+        });
+        output->enqueue_kernel2_success = status2 == CLK_SUCCESS;
+        output->event2_is_valid = is_valid_event(event2);
+
+        clk_event_t user_event1 = create_user_event();
+        output->user_event1_is_valid = is_valid_event(user_event1);
+
+        clk_event_t events[3];
+        events[0] = event2;
+        events[1] = user_event1;
+        events[2] = event1;
+
+        clk_event_t event3;
+        int status3 = enqueue_marker(queue, 3, events, &event3);
+        output->enqueue_marker_success = status3 == CLK_SUCCESS;
+        output->event3_is_valid = is_valid_event(event3);
+
+        int status4 = enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange_1D(get_global_size(0)), 1, &event3, NULL,
+        ^{
+            const ulong gid = get_global_id(0);
+            output->values[gid] /= 20;
+        });
+        output->enqueue_kernel3_success = status4 == CLK_SUCCESS;
+
+        set_user_event_status(user_event1, CL_COMPLETE);
+
+        release_event(user_event1);
+        release_event(event1);
+        release_event(event2);
+        release_event(event3);
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_common>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    #include <opencl_device_queue>
+    using namespace cl;
+    )";
+
+    s << source_common;
+    if (options.test == 0)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel2_success = 1;
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event2_is_valid = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        ndrange ndrange1(get_global_size(0));
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+
+        event1.release();
+    }
+    )";
+    }
+    else if (options.test == 1)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_kernel3_success = 1;
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->user_event1_is_valid = 1;
+        output->user_event2_is_valid = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+
+        ndrange ndrange2(1, get_global_size(0) / 2, 1);
+        event event2;
+        enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &event1, &event2, ndrange2,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 1;
+        }, output);
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+
+        event1.release();
+        event2.release();
+    }
+    )";
+    }
+    else if (options.test == 2)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->enqueue_marker_success = 1;
+        output->event3_is_valid = 1;
+        output->enqueue_kernel3_success = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        event user_event1 = make_user_event();
+        user_event1.retain();
+        output->user_event1_is_valid = user_event1.is_valid();
+
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 1, &user_event1, &event1, ndrange1,
+        [](global<output_type> *output){
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 1;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+        user_event1.release();
+
+        event user_event2 = make_user_event();
+        output->user_event2_is_valid = user_event2.is_valid();
+
+        event events[2];
+        events[0] = user_event2;
+        events[1] = user_event1;
+
+        ndrange ndrange2(1, get_global_size(0) / 2, get_local_size(0));
+        event event2;
+        enqueue_status status2 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 2, events, &event2, ndrange2,
+        [](global<output_type> *output, local_ptr<int2[]> l0, local_ptr<int[]> l1, local_ptr<int[]> l2) {
+            const ulong gid = get_global_id(0);
+            const ulong lid = get_local_id(0);
+            l1[get_local_size(0) - lid - 1] = gid > 0 ? 1 : 0;
+            work_group_barrier(mem_fence::local);
+            if (lid < 5) l0[lid] = int2(3, 4);
+            if (lid < 3) l2[lid] = 5;
+            work_group_barrier(mem_fence::local);
+            output->values[(gid - 1) * 2 + 1] = min(l1[lid], min(l0[0].x, l2[0]));
+        }, output, local_ptr<int2[]>::size_type(5), local_ptr<int[]>::size_type(get_local_size(0)), local_ptr<int[]>::size_type(3));
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+
+        user_event1.set_status(event_status::complete);
+        user_event2.set_status(event_status::complete);
+
+        user_event1.release();
+        user_event2.release();
+        event1.release();
+        event2.release();
+    }
+    )";
+    }
+    else if (options.test == 3)
+    {
+        s << R"(
+    kernel void test(device_queue queue, global<output_type> *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        if (gid != 0)
+            return;
+
+        output->user_event2_is_valid = 1;
+
+        device_queue default_queue = get_default_device_queue();
+
+        ndrange ndrange1(get_global_size(0) / 2);
+        event event1;
+        enqueue_status status1 = default_queue.enqueue_kernel(enqueue_policy::wait_work_group, 0, nullptr, &event1, ndrange1,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid * 2] = 20;
+        }, output);
+        output->enqueue_kernel1_success = status1 == enqueue_status::success;
+        output->event1_is_valid = event1.is_valid();
+
+        ndrange ndrange2(1, get_global_size(0) / 2, 1);
+        event event2;
+        enqueue_status status2 = queue.enqueue_kernel(enqueue_policy::wait_kernel, 0, nullptr, &event2, ndrange2,
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[(gid - 1) * 2 + 1] = 20;
+        }, output);
+        output->enqueue_kernel2_success = status2 == enqueue_status::success;
+        output->event2_is_valid = event2.is_valid();
+
+        event user_event1 = make_user_event();
+        output->user_event1_is_valid = user_event1.is_valid();
+
+        event events[3];
+        events[0] = event2;
+        events[1] = user_event1;
+        events[2] = event1;
+
+        event event3;
+        enqueue_status status3 = queue.enqueue_marker(3, events, &event3);
+        output->enqueue_marker_success = status3 == enqueue_status::success;
+        output->event3_is_valid = event3.is_valid();
+
+        enqueue_status status4 = default_queue.enqueue_kernel(enqueue_policy::no_wait, 1, &event3, nullptr, ndrange(get_global_size(0)),
+        [](global<output_type> *output) {
+            const ulong gid = get_global_id(0);
+            output->values[gid] /= 20;
+        }, output);
+        output->enqueue_kernel3_success = status4 == enqueue_status::success;
+
+        user_event1.set_status(event_status::complete);
+
+        user_event1.release();
+        event1.release();
+        event2.release();
+        event3.release();
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    cl_uint max_queues;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(cl_uint), &max_queues, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    cl_uint max_events;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(cl_uint), &max_events, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    cl_command_queue device_queue1 = NULL;
+    cl_command_queue device_queue2 = NULL;
+
+    cl_queue_properties queue_properties1[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT,
+        0
+    };
+    device_queue1 = clCreateCommandQueueWithProperties(context, device, queue_properties1, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
+
+    if (max_queues > 1)
+    {
+        cl_queue_properties queue_properties2[] =
+        {
+            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE,
+            0
+        };
+        device_queue2 = clCreateCommandQueueWithProperties(context, device, queue_properties2, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateCommandQueueWithProperties")
+    }
+
+    cl_mem output_buffer;
+    output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(cl_command_queue), device_queue2 != NULL ? &device_queue2 : &device_queue1);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    const char pattern = 0;
+    error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type), 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+    size_t max_work_group_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    const size_t local_size = (std::min)((size_t)256, max_work_group_size);
+    const size_t global_size = 10000 / local_size * local_size;
+    const size_t count = global_size;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    output_type output;
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(output_type),
+        static_cast<void *>(&output),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    if (!output.enqueue_kernel1_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_kernel2_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_kernel3_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_kernel did not succeed")
+    }
+    if (!output.enqueue_marker_success)
+    {
+        RETURN_ON_ERROR_MSG(-1, "enqueue_marker did not succeed")
+    }
+    if (!output.event1_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event1 is not valid")
+    }
+    if (!output.event2_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event2 is not valid")
+    }
+    if (!output.event3_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "event3 is not valid")
+    }
+    if (!output.user_event1_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "user_event1 is not valid")
+    }
+    if (!output.user_event2_is_valid)
+    {
+        RETURN_ON_ERROR_MSG(-1, "user_event2 is not valid")
+    }
+
+    for (size_t i = 0; i < count; i++)
+    {
+        const cl_int result = output.values[i];
+        const cl_int expected = 1;
+
+        if (result != expected)
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "kernel did not return correct value. Expected: %s, got: %s",
+                format_value(expected).c_str(), format_value(result).c_str()
+            )
+        }
+    }
+
+    clReleaseMemObject(output_buffer);
+    clReleaseCommandQueue(device_queue1);
+    if (device_queue2 != NULL)
+        clReleaseCommandQueue(device_queue2);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_enqueue_one_kernel)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 0;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_enqueue_two_kernels)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 1;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_enqueue_user_events_and_locals)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 2;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_enqueue_marker)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.test = 3;
+    return test(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_DEVICE_QUEUE_TEST_ENQUEUE_HPP
diff --git a/test_conformance/clcpp/funcs_test_utils.hpp b/test_conformance/clcpp/funcs_test_utils.hpp
new file mode 100644
index 00000000..e839231c
--- /dev/null
+++ b/test_conformance/clcpp/funcs_test_utils.hpp
@@ -0,0 +1,72 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
+#define TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
+
+// This file contains helper classes and functions for testing various unary, binary
+// and ternary OpenCL functions (for example cl::abs(x) or cl::abs_diff(x, y)), 
+// as well as other helper functions/classes.
+
+#include "common.hpp"
+
+#define TEST_UNARY_FUNC_MACRO(TEST_CLASS) \
+    last_error = test_unary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+#define TEST_BINARY_FUNC_MACRO(TEST_CLASS) \
+    last_error = test_binary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+#define TEST_TERNARY_FUNC_MACRO(TEST_CLASS) \
+    last_error = test_ternary_func(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+#include "utils_test/compare.hpp"
+#include "utils_test/generate_inputs.hpp"
+
+// HOWTO:
+//
+// unary_func, binary_func, ternary_func - base classes wrapping OpenCL functions that
+// you want to test.
+// 
+// To create a wrapper class for given function, you need to create a class derived from correct
+// base class (unary_func, binary_func, ternary_func), and define:
+//
+// * std::string str() method which should return class name in OpenCL ("abs", "abs_diff"),
+// * operator(x), operator(x, y) or operator(x,y,z) depending on arity of the function you wish
+// to test, method should work exactly as the tested function works in OpenCL
+// * if it's needed you can overload min1, max1, min2, max2, min3, max3 methods with returns min 
+// and max values that can be generated for given input (function argument) [required for vec 
+// arguments],
+// * if you want to use vector arguments (for example: cl_int2, cl_ulong16), you should look at
+// how int_func_clamp<> is implemented in integer_funcs/numeric_funcs.hpp.
+//
+// To see how you should use class you've just created see AUTO_TEST_CASE(test_int_numeric_funcs)
+// in integer_funcs/numeric_funcs.hpp.
+#include "utils_test/unary.hpp"
+#include "utils_test/binary.hpp"
+#include "utils_test/ternary.hpp"
+
+#endif // TEST_CONFORMANCE_CLCPP_FUNCS_TEST_UTILS_HPP
diff --git a/test_conformance/clcpp/geometric_funcs/CMakeLists.txt b/test_conformance/clcpp/geometric_funcs/CMakeLists.txt
new file mode 100644
index 00000000..3314863e
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_GEOMETRIC_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp
new file mode 100644
index 00000000..c1797288
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/fast_geometric_funcs.hpp
@@ -0,0 +1,229 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+
+// float fast_distance(float4 p0, float4 p1);
+struct geometric_func_fast_distance : public binary_func<cl_float4, cl_float4, cl_float>
+{
+
+    std::string str()
+    {
+        return "fast_distance";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_double r = 0.0f;
+        cl_double t;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
+            r += t * t;
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    cl_double delta(const cl_float4& p0, const cl_float4& p1, const cl_float& expected)
+    {
+        (void) p0; (void) p1;
+        return 0.01f * expected;
+    }
+
+    float ulp()
+    {
+        return
+            8192.0f + // error in sqrt
+            (1.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+// float fast_length(float4 p);
+struct geometric_func_fast_length : public unary_func<cl_float4,cl_float>
+{
+    std::string str()
+    {
+        return "fast_length";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p)
+    {
+        cl_double r = 0.0f;
+        for(size_t i = 0; i < 4; i++)
+        {
+            r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    cl_double delta(const cl_float4& p, const cl_float& expected)
+    {
+        (void) p;
+        return 0.01f * expected;
+    }
+
+    float ulp()
+    {
+        return
+            8192.0f + // error in sqrt
+            0.5f * // effect on e of taking sqrt( x + e )
+                ((0.5f * 4.0f) + // cumulative error for multiplications
+                (0.5f * 3.0f));  // cumulative error for additions
+    }
+};
+
+// float4 fast_normalize(float4 p);
+struct geometric_func_fast_normalize : public unary_func<cl_float4,cl_float4>
+{
+    std::string str()
+    {
+        return "fast_normalize";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float4 operator()(const cl_float4& p)
+    {
+        cl_double t = 0.0f;
+        cl_float4 r;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+
+        if(t == 0.0f)
+        {
+            for(size_t i = 0; i < 4; i++)
+            {
+                r.s[i] = 0.0f;
+            }
+            return r;
+        }
+
+        t = std::sqrt(t);
+        for(size_t i = 0; i < 4; i++)
+        {
+            r.s[i] = static_cast<cl_double>(p.s[i]) / t;
+        }
+        return r;
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-512.0f);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(512.0f);
+    }
+
+    std::vector<cl_float4> in_special_cases()
+    {
+        return {
+            {0.0f, 0.0f, 0.0f, 0.0f}
+        };
+    }
+
+
+    cl_double4 delta(const cl_float4& p, const cl_float4& expected)
+    {
+        (void) p;
+        auto e = detail::make_value<cl_double4>(0.01f);
+        return detail::multiply<cl_double4>(e, expected);
+    }
+
+    float ulp()
+    {
+        return
+            8192.5f + // error in rsqrt + error in multiply
+            (0.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+AUTO_TEST_CASE(test_fast_geometric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // float fast_distance(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_fast_distance()))
+
+    // float fast_length(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_fast_length()))
+
+    // float4 fast_normalize(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_fast_normalize()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_FAST_GEOMETRIC_FUNCS_HPP
diff --git a/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp b/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp
new file mode 100644
index 00000000..561f9e9b
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/geometric_funcs.hpp
@@ -0,0 +1,389 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+
+// float4 cross(float4 p0, float4 p1)
+struct geometric_func_cross : public binary_func<cl_float4, cl_float4, cl_float4>
+{
+    geometric_func_cross(cl_device_id device)
+    {
+        // On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product
+        this->m_delta = 3.0f * CL_FLT_EPSILON;
+        // RTZ devices accrue approximately double the amount of error per operation.  Allow for that.
+        if(get_default_rounding_mode(device) == CL_FP_ROUND_TO_ZERO)
+        {
+            this->m_delta *= 2.0f;
+        }
+    }
+
+    std::string str()
+    {
+        return "cross";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float4 operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_float4 r;
+        r.s[0] = (p0.s[1] * p1.s[2]) - (p0.s[2] * p1.s[1]);
+        r.s[1] = (p0.s[2] * p1.s[0]) - (p0.s[0] * p1.s[2]);
+        r.s[2] = (p0.s[0] * p1.s[1]) - (p0.s[1] * p1.s[0]);
+        r.s[3] = 0.0f;
+        return r;
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+
+    cl_double4 delta(const cl_float4& p0, const cl_float4& p1, const cl_float4& expected)
+    {
+        (void) p0; (void) p1;
+        auto e = detail::make_value<cl_double4>(m_delta);
+        return detail::multiply<cl_double4>(e, expected);
+    }
+
+private:
+    cl_double m_delta;
+};
+
+// float dot(float4 p0, float4 p1);
+struct geometric_func_dot : public binary_func<cl_float4, cl_float4, cl_float>
+{
+
+    std::string str()
+    {
+        return "dot";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_float r;
+        r = p0.s[0] * p1.s[0];
+        r += p0.s[1] * p1.s[1];
+        r += p0.s[2] * p1.s[2];
+        r += p0.s[3] * p1.s[3];
+        return r;
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    bool use_ulp()
+    {
+        return false;
+    }
+
+    cl_double delta(const cl_float4& p0, const cl_float4& p1, cl_float expected)
+    {
+        (void) p0; (void) p1;
+        return expected * ((4.0f + (4.0f - 1.0f)) * CL_FLT_EPSILON);
+    }
+};
+
+// float distance(float4 p0, float4 p1);
+struct geometric_func_distance : public binary_func<cl_float4, cl_float4, cl_float>
+{
+
+    std::string str()
+    {
+        return "distance";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
+    {
+        cl_double r = 0.0f;
+        cl_double t;
+        for(size_t i = 0; i < 4; i++)
+        {
+            t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
+            r += t * t;
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 max2()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    cl_float4 min2()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    float ulp()
+    {
+        return
+            3.0f + // error in sqrt
+            (1.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+// float length(float4 p);
+struct geometric_func_length : public unary_func<cl_float4,cl_float>
+{
+
+    std::string str()
+    {
+        return "length";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float operator()(const cl_float4& p)
+    {
+        cl_double r = 0.0f;
+        for(size_t i = 0; i < 4; i++)
+        {
+            r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+        return std::sqrt(r);
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    float ulp()
+    {
+        return
+            3.0f + // error in sqrt
+            0.5f * // effect on e of taking sqrt( x + e )
+                ((0.5f * 4.0f) + // cumulative error for multiplications
+                (0.5f * 3.0f));  // cumulative error for additions
+    }
+};
+
+// float4 normalize(float4 p);
+struct geometric_func_normalize : public unary_func<cl_float4,cl_float4>
+{
+    std::string str()
+    {
+        return "normalize";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_geometric>\n";
+    }
+
+    cl_float4 operator()(const cl_float4& p)
+    {
+        cl_double t = 0.0f;
+        cl_float4 r;
+
+        // normalize( v ) returns a vector full of NaNs if any element is a NaN.
+        for(size_t i = 0; i < 4; i++)
+        {
+            if((std::isnan)(p.s[i]))
+            {
+                for(size_t j = 0; j < 4; j++)
+                {
+                    r.s[j] = p.s[i];
+                }
+                return r;
+            }
+        }
+
+        // normalize( v ) for which any element in v is infinite shall proceed as
+        // if the elements in v were replaced as follows:
+        // for( i = 0; i < sizeof(v) / sizeof(v[0] ); i++ )
+        //     v[i] = isinf(v[i]) ? copysign(1.0, v[i]) : 0.0 * v [i];
+        for(size_t i = 0; i < 4; i++)
+        {
+            if((std::isinf)(p.s[i]))
+            {
+                for(size_t j = 0; j < 4; j++)
+                {
+                    r.s[j] = (std::isinf)(p.s[j]) ? (std::copysign)(1.0, p.s[j]) : 0.0 * p.s[j];
+                }
+                r = (*this)(r);
+                return r;
+            }
+        }
+
+        for(size_t i = 0; i < 4; i++)
+        {
+            t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
+        }
+
+        // normalize( v ) returns v if all elements of v are zero.
+        if(t == 0.0f)
+        {
+            for(size_t i = 0; i < 4; i++)
+            {
+                r.s[i] = 0.0f;
+            }
+            return r;
+        }
+
+        t = std::sqrt(t);
+        for(size_t i = 0; i < 4; i++)
+        {
+            r.s[i] = static_cast<cl_double>(p.s[i]) / t;
+        }
+
+        return r;
+    }
+
+    cl_float4 max1()
+    {
+        return detail::def_limit<cl_float4>(1000.0f);
+    }
+
+    cl_float4 min1()
+    {
+        return detail::def_limit<cl_float4>(-1000.0f);
+    }
+
+    std::vector<cl_float4> in_special_cases()
+    {
+        return {
+            {0.0f, 0.0f, 0.0f, 0.0f},
+            {std::numeric_limits<float>::infinity(), 0.0f, 0.0f, 0.0f},
+            {
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity(),
+                std::numeric_limits<float>::infinity()
+            },
+            {
+                std::numeric_limits<float>::infinity(),
+                1.0f,
+                0.0f,
+                std::numeric_limits<float>::quiet_NaN()
+            },
+            {-1.0f, -1.0f, 0.0f,-300.0f}
+        };
+    }
+
+    float ulp()
+    {
+        return
+            2.5f + // error in rsqrt + error in multiply
+            (0.5f * 4.0f) + // cumulative error for multiplications
+            (0.5f * 3.0f);  // cumulative error for additions
+    }
+};
+
+AUTO_TEST_CASE(test_geometric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // float4 cross(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_cross(device)))
+
+    // float dot(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_dot()))
+
+    // float distance(float4 p0, float4 p1)
+    TEST_BINARY_FUNC_MACRO((geometric_func_distance()))
+
+    // float length(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_length()))
+
+    // float4 normalize(float4 p)
+    TEST_UNARY_FUNC_MACRO((geometric_func_normalize()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
diff --git a/test_conformance/clcpp/geometric_funcs/main.cpp b/test_conformance/clcpp/geometric_funcs/main.cpp
new file mode 100644
index 00000000..fb522af6
--- /dev/null
+++ b/test_conformance/clcpp/geometric_funcs/main.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <limits>
+
+#include "../common.hpp"
+
+#include "geometric_funcs.hpp"
+#include "fast_geometric_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/images/CMakeLists.txt b/test_conformance/clcpp/images/CMakeLists.txt
new file mode 100644
index 00000000..e6b282d8
--- /dev/null
+++ b/test_conformance/clcpp/images/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(MODULE_NAME CPP_IMAGES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/imageHelpers.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/images/common.hpp b/test_conformance/clcpp/images/common.hpp
new file mode 100644
index 00000000..f975d7ed
--- /dev/null
+++ b/test_conformance/clcpp/images/common.hpp
@@ -0,0 +1,198 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
+
+#include <type_traits>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+// This global variable is used by read_image_pixel from harness/imageHelpers
+bool gTestRounding = false;
+
+#include "../../../test_common/harness/imageHelpers.h"
+
+
+namespace detail
+{
+
+template<cl_channel_type channel_type>
+struct channel_info;
+
+template<>
+struct channel_info<CL_SIGNED_INT8>
+{
+    typedef cl_char channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_SIGNED_INT16>
+{
+    typedef cl_short channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_SIGNED_INT32>
+{
+    typedef cl_int channel_type;
+    typedef cl_int4 element_type;
+    static std::string function_suffix() { return "i"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_UNSIGNED_INT8>
+{
+    typedef cl_uchar channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_UNSIGNED_INT16>
+{
+    typedef cl_ushort channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_UNSIGNED_INT32>
+{
+    typedef cl_uint channel_type;
+    typedef cl_uint4 element_type;
+    static std::string function_suffix() { return "ui"; }
+
+    channel_type channel_min() { return (std::numeric_limits<channel_type>::min)(); }
+    channel_type channel_max() { return (std::numeric_limits<channel_type>::max)(); }
+};
+
+template<>
+struct channel_info<CL_FLOAT>
+{
+    typedef cl_float channel_type;
+    typedef cl_float4 element_type;
+    static std::string function_suffix() { return "f"; }
+
+    channel_type channel_min() { return -1e-3f; }
+    channel_type channel_max() { return +1e+3f; }
+};
+
+template<cl_mem_object_type image_type>
+struct image_info;
+
+template<>
+struct image_info<CL_MEM_OBJECT_IMAGE1D>
+{
+    static std::string image_type_name() { return "image1d"; }
+    static std::string coord_accessor() { return "x"; }
+};
+
+template<>
+struct image_info<CL_MEM_OBJECT_IMAGE2D>
+{
+    static std::string image_type_name() { return "image2d"; }
+    static std::string coord_accessor() { return "xy"; }
+};
+
+template<>
+struct image_info<CL_MEM_OBJECT_IMAGE3D>
+{
+    static std::string image_type_name() { return "image3d"; }
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    static std::string coord_accessor() { return "xyzw"; }
+#else
+    static std::string coord_accessor() { return "xyz"; }
+#endif
+};
+
+} // namespace
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test_base :
+    detail::channel_info<ChannelType>,
+    detail::image_info<ImageType>
+{ };
+
+// Create image_descriptor (used by harness/imageHelpers functions)
+image_descriptor create_image_descriptor(cl_image_desc &image_desc, cl_image_format *image_format)
+{
+    image_descriptor image_info;
+    image_info.width = image_desc.image_width;
+    image_info.height = image_desc.image_height;
+    image_info.depth = image_desc.image_depth;
+    image_info.arraySize = image_desc.image_array_size;
+    image_info.rowPitch = image_desc.image_row_pitch;
+    image_info.slicePitch = image_desc.image_slice_pitch;
+    image_info.format = image_format;
+    image_info.buffer = image_desc.mem_object;
+    image_info.type = image_desc.image_type;
+    image_info.num_mip_levels = image_desc.num_mip_levels;
+    return image_info;
+}
+
+const std::vector<cl_channel_order> get_channel_orders(cl_device_id device)
+{
+    // According to "Minimum List of Supported Image Formats" of OpenCL specification:
+    return { CL_R, CL_RG, CL_RGBA };
+}
+
+bool is_test_supported(cl_device_id device)
+{
+    // Check for image support
+    if (checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+    {
+        log_info("SKIPPED: Device does not support images. Skipping test.\n");
+        return false;
+    }
+    return true;
+}
+
+// Checks if x is equal to y.
+template<class type>
+inline bool are_equal(const type& x,
+                      const type& y)
+{
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(!(x.s[i] == y.s[i]))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_COMMON_HPP
diff --git a/test_conformance/clcpp/images/main.cpp b/test_conformance/clcpp/images/main.cpp
new file mode 100644
index 00000000..5cf221c8
--- /dev/null
+++ b/test_conformance/clcpp/images/main.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_read.hpp"
+#include "test_sample.hpp"
+#include "test_write.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/images/test_read.hpp b/test_conformance/clcpp/images/test_read.hpp
new file mode 100644
index 00000000..3bc7b5ef
--- /dev/null
+++ b/test_conformance/clcpp/images/test_read.hpp
@@ -0,0 +1,307 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "common.hpp"
+
+
+namespace test_images_read {
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+{
+    cl_channel_order channel_order;
+
+    image_test(cl_channel_order channel_order) :
+        channel_order(channel_order)
+    { }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+
+        kernel void test(
+            read_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            global element_type *output
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = read_image)" << image_test::function_suffix() <<
+                "(img, coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#else
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+
+        kernel void test(
+            const )" << image_test::image_type_name() << R"(<element_type, image_access::read> img,
+            const global_ptr<int4[]> coords,
+            global_ptr<element_type[]> output
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = img.read(coords[gid].)" << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#endif
+
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+
+        cl_program program;
+        cl_kernel kernel;
+
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+#else
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+#endif
+
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+
+        std::vector<channel_type> image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+
+        const size_t count = num_elements;
+
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+
+        cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            &image_format, &image_desc, static_cast<void *>(image_values.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<element_type> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(element_type) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type result = output[i];
+
+            element_type expected;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                expected.s);
+
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Reading from coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(output_buffer);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+};
+
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+
+    int error = CL_SUCCESS;
+
+    for (auto channel_order : get_channel_orders(device))
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_FLOAT>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_images_read_1d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_read_2d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_read_3d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_READ_HPP
diff --git a/test_conformance/clcpp/images/test_sample.hpp b/test_conformance/clcpp/images/test_sample.hpp
new file mode 100644
index 00000000..a96a563a
--- /dev/null
+++ b/test_conformance/clcpp/images/test_sample.hpp
@@ -0,0 +1,363 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "common.hpp"
+
+
+namespace test_images_sample {
+
+enum class sampler_source
+{
+    param,
+    program_scope
+};
+
+const sampler_source sampler_sources[] = { sampler_source::param, sampler_source::program_scope };
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+{
+    cl_channel_order channel_order;
+    sampler_source source;
+
+    image_test(cl_channel_order channel_order, sampler_source source) :
+        channel_order(channel_order),
+        source(source)
+    { }
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+        )";
+
+        std::string sampler;
+        if (source == sampler_source::program_scope)
+        {
+            s << R"(
+        constant sampler_t sampler_program_scope = CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE;
+            )";
+            sampler = "sampler_program_scope";
+        }
+        else if (source == sampler_source::param)
+        {
+            sampler = "sampler_param";
+        }
+
+        s << R"(
+        kernel void test(
+            read_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            global element_type *output,
+            sampler_t sampler_param
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = read_image)" << image_test::function_suffix() <<
+                "(img, " << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#else
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+        )";
+
+        std::string sampler;
+        if (source == sampler_source::program_scope)
+        {
+            s << R"(
+        sampler sampler_program_scope = make_sampler<addressing_mode::none, normalized_coordinates::unnormalized, filtering_mode::nearest>();
+            )";
+            sampler = "sampler_program_scope";
+        }
+        else if (source == sampler_source::param)
+        {
+            sampler = "sampler_param";
+        }
+
+        s << R"(
+        kernel void test(
+            const )" << image_test::image_type_name() << R"(<element_type, image_access::sample> img,
+            const global_ptr<int4[]> coords,
+            global_ptr<element_type[]> output,
+            sampler sampler_param
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            output[gid] = img.sample()" << sampler << ", coords[gid]." << image_test::coord_accessor() << R"();
+        }
+        )";
+
+        return s.str();
+    }
+#endif
+
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+
+        cl_program program;
+        cl_kernel kernel;
+
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+#else
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+#endif
+
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+
+        std::vector<channel_type> image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+
+        const size_t count = num_elements;
+
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+
+        cl_mem img = clCreateImage(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            &image_format, &image_desc, static_cast<void *>(image_values.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(element_type) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        const cl_sampler_properties sampler_properties[] = {
+            CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+            CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+            CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+            0
+        };
+        cl_sampler sampler = clCreateSamplerWithProperties(context, sampler_properties, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateSamplerWithProperties")
+
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 3, sizeof(sampler), &sampler);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<element_type> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(element_type) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type result = output[i];
+
+            element_type expected;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                expected.s);
+
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Sampling from coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(output_buffer);
+        clReleaseSampler(sampler);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+};
+
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+
+    int error = CL_SUCCESS;
+
+    for (auto channel_order : get_channel_orders(device))
+    for (auto source : sampler_sources)
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_FLOAT>(channel_order, source)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_images_sample_1d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_sample_2d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_sample_3d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_SAMPLE_HPP
diff --git a/test_conformance/clcpp/images/test_write.hpp b/test_conformance/clcpp/images/test_write.hpp
new file mode 100644
index 00000000..0f544874
--- /dev/null
+++ b/test_conformance/clcpp/images/test_write.hpp
@@ -0,0 +1,327 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
+#define TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+#include "common.hpp"
+
+
+namespace test_images_write {
+
+template<cl_mem_object_type ImageType, cl_channel_type ChannelType>
+struct image_test : image_test_base<ImageType, ChannelType>
+{
+    cl_channel_order channel_order;
+
+    image_test(cl_channel_order channel_order) :
+        channel_order(channel_order)
+    { }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() << R"( element_type;
+
+        kernel void test(
+            write_only )" << image_test::image_type_name() << R"(_t img,
+            const global int4 *coords,
+            const global element_type *input
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            write_image)" << image_test::function_suffix() <<
+                "(img, coords[gid]." << image_test::coord_accessor() << R"(, input[gid]);
+        }
+        )";
+
+        return s.str();
+    }
+#else
+    std::string generate_source()
+    {
+        std::stringstream s;
+        s << R"(
+        #include <opencl_memory>
+        #include <opencl_common>
+        #include <opencl_work_item>
+        #include <opencl_image>
+        using namespace cl;
+        )";
+
+        s << R"(
+        typedef )" << type_name<typename image_test::element_type>() <<  R"( element_type;
+
+        kernel void test(
+            )" << image_test::image_type_name() << R"(<element_type, image_access::write> img,
+            const global_ptr<int4[]> coords,
+            const global_ptr<element_type[]> input
+        ) {
+            const ulong gid = get_global_linear_id();
+
+            img.write(coords[gid].)" << image_test::coord_accessor() << R"(, input[gid]);
+        }
+        )";
+
+        return s.str();
+    }
+#endif
+
+    int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+    {
+        int error = CL_SUCCESS;
+
+        cl_program program;
+        cl_kernel kernel;
+
+        std::string kernel_name = "test";
+        std::string source = generate_source();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+        return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name, "-cl-std=CL2.0", false
+        );
+        RETURN_ON_ERROR(error)
+// Normal run
+#else
+        error = create_opencl_kernel(
+            context, &program, &kernel,
+            source, kernel_name
+        );
+        RETURN_ON_ERROR(error)
+#endif
+
+        using element_type = typename image_test::element_type;
+        using coord_type = cl_int4;
+        using scalar_element_type = typename scalar_type<element_type>::type;
+        using channel_type = typename image_test::channel_type;
+
+        cl_image_format image_format;
+        image_format.image_channel_order = channel_order;
+        image_format.image_channel_data_type = ChannelType;
+
+        const size_t pixel_size = get_pixel_size(&image_format);
+        const size_t channel_count = get_channel_order_channel_count(image_format.image_channel_order);
+
+        cl_image_desc image_desc;
+        image_desc.image_type = ImageType;
+        if (ImageType == CL_MEM_OBJECT_IMAGE1D)
+        {
+            image_desc.image_width = 2048;
+            image_desc.image_height = 1;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE2D)
+        {
+            image_desc.image_width = 256;
+            image_desc.image_height = 256;
+            image_desc.image_depth = 1;
+        }
+        else if (ImageType == CL_MEM_OBJECT_IMAGE3D)
+        {
+            image_desc.image_width = 64;
+            image_desc.image_height = 64;
+            image_desc.image_depth = 64;
+        }
+        image_desc.image_array_size = 0;
+        image_desc.image_row_pitch = image_desc.image_width * pixel_size;
+        image_desc.image_slice_pitch = image_desc.image_row_pitch * image_desc.image_height;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.mem_object = NULL;
+
+        image_descriptor image_info = create_image_descriptor(image_desc, &image_format);
+
+        std::vector<channel_type> random_image_values = generate_input(
+            image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count,
+            image_test::channel_min(), image_test::channel_max(),
+            std::vector<channel_type>()
+        );
+
+        const size_t count = num_elements;
+
+        std::vector<coord_type> coords = generate_input(
+            count,
+            detail::make_value<coord_type>(0),
+            coord_type {
+                static_cast<cl_int>(image_desc.image_width - 1),
+                static_cast<cl_int>(image_desc.image_height - 1),
+                static_cast<cl_int>(image_desc.image_depth - 1),
+                0
+            },
+            std::vector<coord_type>()
+        );
+
+        std::vector<element_type> input(count);
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+
+            // Use read_image_pixel from harness/imageHelpers to fill input values
+            // (it will deal with correct channels, orders etc.)
+            read_image_pixel<scalar_element_type>(static_cast<void *>(random_image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                input[i].s);
+        }
+
+        // image_row_pitch and image_slice_pitch must be 0, when clCreateImage is used with host_ptr = NULL
+        image_desc.image_row_pitch = 0;
+        image_desc.image_slice_pitch = 0;
+        cl_mem img = clCreateImage(context, CL_MEM_WRITE_ONLY,
+            &image_format, &image_desc, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateImage")
+
+        cl_mem coords_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(coord_type) * count, static_cast<void *>(coords.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        cl_mem input_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+            sizeof(element_type) * count, static_cast<void *>(input.data()), &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &img);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(coords_buffer), &coords_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(input_buffer), &input_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+        const size_t global_size = count;
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<channel_type> image_values(image_desc.image_width * image_desc.image_height * image_desc.image_depth * channel_count);
+
+        const size_t origin[3] = { 0 };
+        const size_t region[3] = { image_desc.image_width, image_desc.image_height, image_desc.image_depth };
+        error = clEnqueueReadImage(
+            queue, img, CL_TRUE,
+            origin, region, 0, 0,
+            static_cast<void *>(image_values.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        for (size_t i = 0; i < count; i++)
+        {
+            const coord_type c = coords[i];
+            const element_type expected = input[i];
+
+            element_type result;
+            read_image_pixel<scalar_element_type>(static_cast<void *>(image_values.data()), &image_info,
+                c.s[0], c.s[1], c.s[2],
+                result.s);
+
+            if (!are_equal(result, expected))
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Writing to coordinates %s failed. Expected: %s, got: %s",
+                    format_value(c).c_str(), format_value(expected).c_str(), format_value(result).c_str()
+                );
+            }
+        }
+
+        clReleaseMemObject(img);
+        clReleaseMemObject(coords_buffer);
+        clReleaseMemObject(input_buffer);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        return error;
+    }
+};
+
+template<cl_mem_object_type ImageType>
+int run_test_cases(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    if (!is_test_supported(device))
+        return CL_SUCCESS;
+
+    int error = CL_SUCCESS;
+
+    for (auto channel_order : get_channel_orders(device))
+    {
+        error = image_test<ImageType, CL_SIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_SIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_UNSIGNED_INT8>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT16>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+        error = image_test<ImageType, CL_UNSIGNED_INT32>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+
+        error = image_test<ImageType, CL_FLOAT>(channel_order)
+            .run(device, context, queue, num_elements);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+
+AUTO_TEST_CASE(test_images_write_1d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE1D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_write_2d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE2D>(device, context, queue, num_elements);
+}
+
+AUTO_TEST_CASE(test_images_write_3d)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return run_test_cases<CL_MEM_OBJECT_IMAGE3D>(device, context, queue, num_elements);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_IMAGES_TEST_WRITE_HPP
diff --git a/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp b/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp
new file mode 100644
index 00000000..98da450b
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/24bit_funcs.hpp
@@ -0,0 +1,142 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
+
+#include "common.hpp"
+#include <type_traits>
+
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad24 : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mad24";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "All types must be the same"
+        );
+        static_assert(
+            std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
+            "Function takes only signed/unsigned integers."
+        );
+        return (x * y) + z;
+    }
+
+    IN1 min1()
+    {
+        return 0;
+    }
+
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_mul24 : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "mul24";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, OUT1>::value,
+            "All types must be the same"
+        );
+        static_assert(
+            std::is_same<cl_uint, IN1>::value || std::is_same<cl_int, IN1>::value,
+            "Function takes only signed/unsigned integers."
+        );
+        return x * y;
+    }
+
+    IN1 min1()
+    {
+        return 0;
+    }
+
+    IN1 max1()
+    {
+        return (std::numeric_limits<IN1>::max)() & IN1(0x00FFFF);
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() & IN2(0x00FFFF);
+    }
+};
+
+AUTO_TEST_CASE(test_int_24bit_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    
+    // intn mad24(intn x, intn y, intn z);
+    // uintn mad24(uintn x, uintn y, uintn z);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad24<cl_uint, cl_uint, cl_uint, cl_uint>()))
+
+    // intn mul24(intn x, intn y);
+    // uintn mul24(uintn x, uintn y);
+    TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul24<cl_uint, cl_uint, cl_uint>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_24BIT_HPP
diff --git a/test_conformance/clcpp/integer_funcs/CMakeLists.txt b/test_conformance/clcpp/integer_funcs/CMakeLists.txt
new file mode 100644
index 00000000..88eb2a49
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_INTEGER_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp b/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp
new file mode 100644
index 00000000..13ca1563
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/bitwise_funcs.hpp
@@ -0,0 +1,232 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
+
+#include "common.hpp"
+#include <type_traits>
+
+template<class IN1, class OUT1>
+struct int_func_popcount : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "popcount";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 x)
+    {
+        OUT1 count = 0;
+        for (count = 0; x != 0; count++)
+        {
+            x &= x - 1;
+        }
+        return count;
+    }
+};
+
+template<class IN1, class OUT1>
+struct int_func_clz : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "clz";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 x)
+    {
+        OUT1 count = 0;
+        if(std::is_unsigned<IN1>::value)
+        {
+            cl_ulong value = x;
+            value <<= 8 * sizeof(value) - (8 * sizeof(x));
+            for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+            {
+                value <<= 1;
+            }
+        }
+        else
+        {            
+            cl_long value = x;
+            value <<= 8 * sizeof(value) - (8 * sizeof(x));
+            for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+            {
+                value <<= 1;
+            }
+        }
+        return count;
+    }
+};
+
+template<class IN1, class OUT1>
+struct int_func_ctz : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "ctz";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 x)
+    {
+        if(x == 0)
+            return sizeof(x);
+
+        OUT1 count = 0;
+        IN1 value = x;
+        for(count = 0; 0 == (value & 0x1); count++)
+        {
+            value >>= 1;
+        }
+        return count;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_rotate : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "rotate";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 value, IN2 shift)
+    {
+        static_assert(
+            std::is_unsigned<IN1>::value,
+            "Only unsigned integers are supported"
+        );
+        if ((shift &= sizeof(value)*8 - 1) == 0)
+            return value;
+        return (value << shift) | (value >> (sizeof(value)*8 - shift));
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return sizeof(IN1) * 8;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_upsample : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "upsample";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(IN1 hi, IN2 lo)
+    {
+        static_assert(
+            sizeof(IN1) == sizeof(IN2),
+            "sizeof(IN1) != sizeof(IN2)"
+        );
+        static_assert(
+            sizeof(OUT1) == 2 * sizeof(IN1),
+            "sizeof(OUT1) != 2 * sizeof(IN1)"
+        );
+        static_assert(
+            std::is_unsigned<IN2>::value,
+            "IN2 type must be unsigned"
+        );
+        return (static_cast<OUT1>(hi) << (8*sizeof(IN1))) | lo;
+    }
+
+    IN2 min2()
+    {
+        return 0;
+    }
+
+    IN2 max2()
+    {
+        return sizeof(IN1) * 8;
+    }
+};
+
+AUTO_TEST_CASE(test_int_bitwise_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+    
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_popcount<cl_ulong, cl_ulong>()))
+
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_clz<cl_ulong, cl_ulong>()))
+
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_int, cl_int>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_long, cl_long>()))
+    TEST_UNARY_FUNC_MACRO((int_func_ctz<cl_ulong, cl_ulong>()))
+
+    TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rotate<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // shortn upsample(charn hi, ucharn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_char, cl_uchar, cl_short>()))
+    // ushortn upsample(ucharn hi, ucharn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uchar, cl_uchar, cl_ushort>()))
+    // intn upsample(shortn hi, ushortn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_short, cl_ushort, cl_int>()))
+    // uintn upsample(ushortn hi, ushortn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_ushort, cl_ushort, cl_uint>()))
+    // longn upsample(intn hi, uintn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_int, cl_uint, cl_long>()))
+    // ulongn upsample(uintn hi, uintn lo);
+    TEST_BINARY_FUNC_MACRO((int_func_upsample<cl_uint, cl_uint, cl_ulong>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_BITWISE_HPP
diff --git a/test_conformance/clcpp/integer_funcs/common.hpp b/test_conformance/clcpp/integer_funcs/common.hpp
new file mode 100644
index 00000000..f04811e1
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/common.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
+
+#include <random>
+#include <limits>
+#include <algorithm>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_COMMON_HPP
diff --git a/test_conformance/clcpp/integer_funcs/main.cpp b/test_conformance/clcpp/integer_funcs/main.cpp
new file mode 100644
index 00000000..ca5d54a9
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/main.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "bitwise_funcs.hpp"
+#include "numeric_funcs.hpp"
+#include "24bit_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp b/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp
new file mode 100644
index 00000000..21d75c5a
--- /dev/null
+++ b/test_conformance/clcpp/integer_funcs/numeric_funcs.hpp
@@ -0,0 +1,703 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
+#define TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
+
+#include "common.hpp"
+#include <type_traits>
+
+template<class IN1, class OUT1>
+struct int_func_abs : public unary_func<IN1, OUT1>
+{
+    std::string str()
+    {
+        return "abs";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "OUT1 type must be unsigned"
+        );
+        if(x < IN1(0))
+            return static_cast<OUT1>(-x);
+        return static_cast<OUT1>(x);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_abs_diff : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "abs_diff";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "OUT1 type must be unsigned"
+        );
+        if(x < y)
+            return static_cast<OUT1>(y-x);
+        return static_cast<OUT1>(x-y);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_add_sat : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "add_sat";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        // sat unsigned integers
+        if(std::is_unsigned<OUT1>::value)
+        {
+            OUT1 z = x + y;
+            if(z < x || z < y)
+                return (std::numeric_limits<OUT1>::max)();
+            return z;
+        }
+        // sat signed integers
+        OUT1 z = x + y;
+        if(y > 0)
+        {
+            if(z < x)
+                return (std::numeric_limits<OUT1>::max)();
+        }
+        else
+        {
+            if(z > x)
+                return (std::numeric_limits<OUT1>::min)();
+        }
+        return z;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_hadd : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "hadd";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        return (x >> OUT1(1)) + (y >> OUT1(1)) + (x & y & OUT1(1));
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_rhadd : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "rhadd";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value,
+            "IN1 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN2>::value,
+            "OUT1 must be IN2"
+        );
+        return (x >> OUT1(1)) + (y >> OUT1(1)) + ((x | y) & OUT1(1));
+    }
+};
+
+// clamp for scalars
+template<class IN1, class IN2, class IN3, class OUT1, class Enable = void>
+struct int_func_clamp : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "clamp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN2, IN3>::value,
+            "IN3 must be IN2"
+        );
+        static_assert(
+            std::is_same<OUT1, IN1>::value,
+            "OUT1 must be IN1"
+        );
+        return (std::min)((std::max)(x, minval), maxval);
+    }
+
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2);
+    }
+
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
+    }
+
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)();
+    }
+};
+
+// gentype clamp(gentype x, scalar minval, scalar maxval);
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_clamp<IN1, IN2, IN3, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "clamp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& minval, const IN3& maxval)
+    {
+        static_assert(
+            std::is_same<IN2, IN3>::value,
+            "IN3 must be IN2"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value && !is_vector_type<IN3>::value,
+            "IN3 and IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<OUT1, IN1>::value,
+            "OUT1 must be IN1"
+        );
+        OUT1 result;
+        for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+        {
+            result.s[i] = (std::min)((std::max)(x.s[i], minval), maxval);
+        }
+        return result;
+    }
+
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+
+    IN2 min2()
+    {
+        return (std::numeric_limits<IN2>::min)();
+    }
+
+    IN2 max2()
+    {
+        return (std::numeric_limits<IN2>::max)() / IN2(2);
+    }
+
+    IN3 min3()
+    {
+        return IN3(1) + ((std::numeric_limits<IN3>::max)() / IN3(2));
+    }
+
+    IN3 max3()
+    {
+        return (std::numeric_limits<IN3>::max)();
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_mul_hi : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "mul_hi";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, OUT1>::value,
+            "Types must be the same"
+        );
+        static_assert(
+            !std::is_same<IN1, cl_long>::value && !std::is_same<IN1, cl_ulong>::value,
+            "Operation unimplemented for 64-bit scalars"
+        );  
+        cl_long xl = static_cast<cl_long>(x);
+        cl_long yl = static_cast<cl_long>(y);
+        return static_cast<OUT1>((xl * yl) >> (8 * sizeof(OUT1)));
+    }
+};
+
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad_hi : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mad_hi";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "Types must be the same"
+        );   
+        return int_func_mul_hi<IN1, IN2, OUT1>()(x, y) + z;
+    }
+};
+
+// This test is implemented only for unsigned integers
+template<class IN1, class IN2, class IN3, class OUT1>
+struct int_func_mad_sat : public ternary_func<IN1, IN2, IN3, OUT1>
+{
+    std::string str()
+    {
+        return "mad_sat";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y, const IN3& z)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value
+                && std::is_same<IN2, IN3>::value
+                && std::is_same<IN3, OUT1>::value,
+            "Types must be the same"
+        );
+        static_assert(
+            std::is_unsigned<OUT1>::value,
+            "Test operation is not implemented for signed integers"
+        );  
+        // mad_sat unsigned integers
+        OUT1 w1 = (x * y);
+        if (x != 0 && w1 / x != y)
+            return (std::numeric_limits<OUT1>::max)();
+        OUT1 w2 = w1 + z;
+        if(w2 < w1)
+            return (std::numeric_limits<OUT1>::max)();
+        return w2;
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_sub_sat : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "sub_sat";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        // sat unsigned integers
+        if(std::is_unsigned<OUT1>::value)
+        {
+            OUT1 z = x - y;
+            if(x < y)
+                return (std::numeric_limits<OUT1>::min)();
+            return z;
+        }
+        // sat signed integers
+        OUT1 z = x - y;
+        if(y < 0)
+        {
+            if(z < x)
+                return (std::numeric_limits<OUT1>::max)();
+        }
+        else
+        {
+            if(z > x)
+                return (std::numeric_limits<OUT1>::min)();
+        }
+        return z;
+    }
+};
+
+template<class IN1, class IN2, class OUT1, class Enable = void>
+struct int_func_max : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "max";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        return (std::max)(x, y);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_max<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "max";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "IN1 and OUT1 must be the same types"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value,
+            "IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
+            "IN2 must match with OUT1 and IN1"
+        );
+        IN1 result = x;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            result.s[i] = (std::max)(x.s[i], y);
+        }
+        return result;
+    }
+};
+
+template<class IN1, class IN2, class OUT1, class Enable = void>
+struct int_func_min : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "min";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, IN2>::value && std::is_same<IN2, OUT1>::value,
+            "IN1, IN2 and OUT1 must be the same types"
+        );
+        return (std::min)(x, y);
+    }
+};
+
+template<class IN1, class IN2, class OUT1>
+struct int_func_min<IN1, IN2, OUT1, typename std::enable_if<is_vector_type<OUT1>::value>::type> : public binary_func<IN1, IN2, OUT1>
+{
+    std::string str()
+    {
+        return "min";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_integer>\n";
+    }
+
+    IN1 min1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 min1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            min1.s[i] = (std::numeric_limits<SCALAR1>::min)();
+        }
+        return min1;
+    }
+
+    IN1 max1()
+    {
+        typedef typename scalar_type<IN1>::type SCALAR1;
+        IN1 max1;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            max1.s[i] = (std::numeric_limits<SCALAR1>::max)();
+        }
+        return max1;
+    }
+
+    OUT1 operator()(const IN1& x, const IN2& y)
+    {
+        static_assert(
+            std::is_same<IN1, OUT1>::value,
+            "IN1 and OUT1 must be the same types"
+        );
+        static_assert(
+            !is_vector_type<IN2>::value,
+            "IN2 must be scalar"
+        );
+        static_assert(
+            std::is_same<typename scalar_type<OUT1>::type, IN2>::value,
+            "IN2 must match with OUT1 and IN1"
+        );
+        IN1 result = x;
+        for(size_t i = 0; i < vector_size<IN1>::value; i++)
+        {
+            result.s[i] = (std::min)(x.s[i], y);
+        }
+        return result;
+    }
+};
+
+AUTO_TEST_CASE(test_int_numeric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // ugentype abs(gentype x);
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_int, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_uint, cl_uint>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_long, cl_ulong>()))
+    TEST_UNARY_FUNC_MACRO((int_func_abs<cl_ulong, cl_ulong>()))
+
+    // ugentype abs_diff(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_int, cl_int, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_long, cl_long, cl_ulong>()))
+    TEST_BINARY_FUNC_MACRO((int_func_abs_diff<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype add_sat(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_add_sat<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype hadd(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_hadd<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype rhadd(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_rhadd<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype clamp(gentype x, gentype minval, gentype maxval);
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long, cl_long, cl_long, cl_long>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype clamp(gentype x, scalar minval, scalar maxval);
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_int2, cl_int, cl_int, cl_int2>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_uint4, cl_uint, cl_uint, cl_uint4>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_long8, cl_long, cl_long, cl_long8>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_clamp<cl_ulong16, cl_ulong, cl_ulong, cl_ulong16>()))
+
+    // gentype mad_hi(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_short, cl_short, cl_short, cl_short>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_int, cl_int, cl_int, cl_int>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_hi<cl_uint, cl_uint, cl_uint, cl_uint>()))
+
+    // gentype mad_sat(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ushort, cl_ushort, cl_ushort, cl_ushort>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_uint, cl_uint, cl_uint, cl_uint>()))
+    TEST_TERNARY_FUNC_MACRO((int_func_mad_sat<cl_ulong, cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype max(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype max(gentype x, scalar y);
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_int2, cl_int, cl_int2>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_uint4, cl_uint, cl_uint4>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_long8, cl_long, cl_long8>()))
+    TEST_BINARY_FUNC_MACRO((int_func_max<cl_ulong16, cl_ulong, cl_ulong16>()))
+
+    // gentype min(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong, cl_ulong, cl_ulong>()))
+
+    // gentype min(gentype x, scalar y);
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_int2, cl_int, cl_int2>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_uint4, cl_uint, cl_uint4>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_long8, cl_long, cl_long8>()))
+    TEST_BINARY_FUNC_MACRO((int_func_min<cl_ulong16, cl_ulong, cl_ulong16>()))
+
+    // gentype mul_hi(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_short, cl_short, cl_short>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_ushort, cl_ushort, cl_ushort>())) 
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_mul_hi<cl_uint, cl_uint, cl_uint>()))
+
+    // gentype sub_sat(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_int, cl_int, cl_int>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_uint, cl_uint, cl_uint>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_long, cl_long, cl_long>()))
+    TEST_BINARY_FUNC_MACRO((int_func_sub_sat<cl_ulong, cl_ulong, cl_ulong>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_INTEGER_FUNCS_NUMERIC_HPP
diff --git a/test_conformance/clcpp/math_funcs/CMakeLists.txt b/test_conformance/clcpp/math_funcs/CMakeLists.txt
new file mode 100644
index 00000000..8b48aac1
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_MATH_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/math_funcs/common.hpp b/test_conformance/clcpp/math_funcs/common.hpp
new file mode 100644
index 00000000..32249056
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/common.hpp
@@ -0,0 +1,347 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
+
+#include <cmath>
+#include <limits>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "reference.hpp"
+
+#ifndef MATH_FUNCS_CLASS_NAME
+    #define MATH_FUNCS_CLASS_NAME(x, y) x ## _func_ ## y        
+#endif 
+
+#define MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, OCL_FUNC, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public unary_func<cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_FUNC; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    /* Reference value type is cl_double */ \
+    cl_double operator()(const cl_float& x)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x)); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const T& expected) \
+    { \
+        typedef  \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+#define MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public binary_func<cl_float, cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_NAME; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    \
+    cl_float operator()(const cl_float& x, const cl_float& y)  \
+    { \
+        return (HOST_FUNC)(x, y); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_float min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_float max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in2_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const cl_float& in2, const T& expected) \
+    { \
+        typedef \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        (void) in2; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+#define MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, OCL_NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \
+struct MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME) : public ternary_func<cl_float, cl_float, cl_float, cl_float> \
+{ \
+    MATH_FUNCS_CLASS_NAME(GROUP_NAME, NAME)(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #OCL_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    \
+    cl_double operator()(const cl_float& x, const cl_float& y, const cl_float& z)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x), static_cast<cl_double>(y), static_cast<cl_double>(z)); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_float min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_float max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    cl_float min3() \
+    { \
+        return MIN3; \
+    } \
+    \
+    cl_float max3() \
+    { \
+        return MAX3; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in2_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    std::vector<cl_float> in3_special_cases() \
+    { \
+        return {  \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+            cl_float(1.0f), \
+            cl_float(-1.0f), \
+            cl_float(2.0f), \
+            cl_float(-2.0f), \
+            std::numeric_limits<cl_float>::infinity(), \
+            -std::numeric_limits<cl_float>::infinity(), \
+            std::numeric_limits<cl_float>::quiet_NaN() \
+        }; \
+    } \
+    \
+    template<class T> \
+    typename make_vector_type<cl_double, vector_size<T>::value>::type \
+    delta(const cl_float& in1, const cl_float& in2, const cl_float& in3, const T& expected) \
+    { \
+        typedef \
+            typename make_vector_type<cl_double, vector_size<T>::value>::type \
+            delta_vector_type; \
+        (void) in1; \
+        (void) in2; \
+        (void) in3; \
+        auto e = detail::make_value<delta_vector_type>(DELTA); \
+        return detail::multiply<delta_vector_type>(e, expected); \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+#define MATH_FUNCS_DEFINE_UNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1) \
+    MATH_FUNCS_DEFINE_UNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1)
+#define MATH_FUNCS_DEFINE_BINARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2) \
+    MATH_FUNCS_DEFINE_BINARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2)
+#define MATH_FUNCS_DEFINE_TERNARY_FUNC(GROUP_NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3) \
+    MATH_FUNCS_DEFINE_TERNARY_FUNC1(GROUP_NAME, NAME, NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, DELTA, MIN1, MAX1, MIN2, MAX2, MIN3, MAX3)
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMMON_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/comparison_funcs.hpp b/test_conformance/clcpp/math_funcs/comparison_funcs.hpp
new file mode 100644
index 00000000..0bd6ff91
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/comparison_funcs.hpp
@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fdim, std::fdim, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmax, std::fmax, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, fmin, std::fmin, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, maxmag, reference::maxmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(comparison, minmag, reference::minmag, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+
+// comparison functions
+AUTO_TEST_CASE(test_comparison_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    TEST_BINARY_FUNC_MACRO((comparison_func_fdim(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_fmax(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_fmin(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_maxmag(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((comparison_func_minmag(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_COMPARISON_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/exponential_funcs.hpp b/test_conformance/clcpp/math_funcs/exponential_funcs.hpp
new file mode 100644
index 00000000..82a8247a
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/exponential_funcs.hpp
@@ -0,0 +1,139 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp, std::exp, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, expm1, std::expm1, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp2, std::exp2, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(exponential, exp10, reference::exp10, true, 3.0f, 4.0f, 0.001f, -1000.0f, 1000.0f)
+
+struct exponential_func_ldexp : public binary_func<cl_float, cl_int, cl_float>
+{
+    exponential_func_ldexp(bool is_embedded) : m_is_embedded(is_embedded) 
+    {
+   
+    }
+   
+    std::string str()
+    {
+        return "ldexp";
+    }
+   
+    std::string headers() 
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    /* Reference value type is cl_double */
+    cl_double operator()(const cl_float& x, const cl_int& y) 
+    {
+        return (std::ldexp)(static_cast<cl_double>(x), y);
+    }
+   
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+   
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    cl_int min2()
+    {
+        return -8;
+    }
+   
+    cl_int max2()
+    {
+        return 8;
+    }
+   
+    std::vector<cl_float> in1_special_cases()
+    {
+        return { 
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+   
+    bool use_ulp()
+    {
+        return true;
+    }
+   
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// exponential functions
+AUTO_TEST_CASE(test_exponential_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // auto exp(gentype x);
+    // auto expm1(gentype x);
+    // auto exp2(gentype x);
+    // auto exp10(gentype x);
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_expm1(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((exponential_func_exp10(is_embedded_profile)))
+
+    // auto ldexp(gentype x, intn k);
+    TEST_BINARY_FUNC_MACRO((exponential_func_ldexp(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_EXP_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp b/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp
new file mode 100644
index 00000000..63b4c230
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/floating_point_funcs.hpp
@@ -0,0 +1,733 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// -------------- UNARY FUNCTIONS
+
+// gentype ceil(gentype x);
+// gentype floor(gentype x);
+// gentype rint(gentype x);
+// gentype round(gentype x);
+// gentype trunc(gentype x);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, ceil, std::ceil, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, floor, std::floor, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, rint, std::rint, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, round, std::round, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(fp, trunc, std::trunc, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+
+// floatn nan(uintn nancode);
+struct fp_func_nan : public unary_func<cl_uint, cl_float>
+{
+    std::string str()
+    {
+        return "nan";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_float operator()(const cl_uint& x)
+    {
+        cl_uint r = x | 0x7fc00000U;
+        // cl_float and cl_int have the same size so that's correct
+        cl_float rf = *reinterpret_cast<cl_float*>(&r);
+        return rf;
+    }
+
+    cl_uint min1()
+    {
+        return 0;
+    }
+
+    cl_uint max1()
+    {
+        return 100;
+    }
+
+    std::vector<cl_uint> in1_special_cases()
+    {
+        return {
+            0, 1
+        };
+    }
+};
+
+// -------------- UNARY FUNCTIONS, 2ND ARG IS POINTER
+
+// gentype fract(gentype x, gentype* iptr);
+//
+// Fuction fract() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// fract() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (gentype* iptr).
+struct fp_func_fract : public unary_func<cl_float, cl_float2>
+{
+    fp_func_fract(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "fract";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x)
+    {
+        return reference::fract(static_cast<cl_double>(x));
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// We need to specialize generate_kernel_unary<>() function template for fp_func_fract.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
+{
+    return
+        "__kernel void test_fract(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = fract(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<fp_func_fract, cl_float, cl_float2>(fp_func_fract func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_fract(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = fract(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// gentype modf(gentype x, gentype* iptr);
+//
+// Fuction modf() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// modf() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (gentype* iptr).
+struct fp_func_modf : public unary_func<cl_float, cl_float2>
+{
+    fp_func_modf(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "modf";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x)
+    {
+        cl_double2 r;
+        r.s[0] = (std::modf)(static_cast<cl_double>(x), &(r.s[1]));
+        return r;
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// We need to specialize generate_kernel_unary<>() function template for fp_func_modf.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
+{
+    return
+        "__kernel void test_modf(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = modf(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<fp_func_modf, cl_float, cl_float2>(fp_func_modf func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_modf(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    float itpr = 0;\n"
+        "    result.x = modf(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// gentype frexp(gentype x, intn* exp);
+//
+// Fuction frexp() returns additional value via pointer (2nd argument). In order to test
+// if it's correct output buffer type is cl_float2. In first compontent we store what
+// modf() function returns, and in the 2nd component we store what is returned via its
+// 2nd argument (intn* exp).
+struct fp_func_frexp : public unary_func<cl_float, cl_float2>
+{
+    fp_func_frexp(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "frexp";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x)
+    {
+        cl_double2 r;
+        cl_int e;
+        r.s[0] = (std::frexp)(static_cast<cl_double>(x), &e);
+        r.s[1] = static_cast<cl_float>(e);
+        return r;
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// We need to specialize generate_kernel_unary<>() function template for fp_func_frexp.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
+{
+    return
+        "__kernel void test_frexp(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int itpr = 0;\n"
+        "    result.x = frexp(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<fp_func_frexp, cl_float, cl_float2>(fp_func_frexp func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_frexp(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int itpr = 0;\n"
+        "    result.x = frexp(input[gid], &itpr);\n"
+        "    result.y = itpr;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// -------------- BINARY FUNCTIONS
+
+// gentype copysign(gentype x, gentype y);
+// gentype fmod(gentype x, gentype y);
+// gentype remainder(gentype x, gentype y);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, copysign, std::copysign, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, fmod, std::fmod, true, 0.0f, 0.0f, 0.001f, -100.0f, 100.0f, -10.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(fp, remainder, std::remainder, true, 0.0f, 0.001f, 0.0f, -100.0f, 100.0f, -10.0f, 10.0f)
+
+// In case of function float nextafter(float, float) reference function must
+// operate on floats and return float.
+struct fp_func_nextafter : public binary_func<cl_float, cl_float, cl_float>
+{
+    fp_func_nextafter(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "nextafter";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    /* In this case reference value type MUST BE cl_float */
+    cl_float operator()(const cl_float& x, const cl_float& y)
+    {
+        return (std::nextafter)(x, y);
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 500.0f;
+    }
+
+    cl_float min2()
+    {
+        return 501.0f;
+    }
+
+    cl_float max2()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    std::vector<cl_float> in2_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// gentype remquo(gentype x, gentype y, intn* quo);
+struct fp_func_remquo : public binary_func<cl_float, cl_float, cl_float2>
+{
+    fp_func_remquo(bool is_embedded) : m_is_embedded(is_embedded)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "remquo";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_double2 operator()(const cl_float& x, const cl_float& y)
+    {
+        return reference::remquo(static_cast<cl_double>(x), static_cast<cl_double>(y));
+    }
+
+    cl_float min1()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    cl_float min2()
+    {
+        return -1000.0f;
+    }
+
+    cl_float max2()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    std::vector<cl_float> in2_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 0.0f;
+        }
+        return 0.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+
+// We need to specialize generate_kernel_binary<>() function template for fp_func_remquo.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
+{
+    return
+        "__kernel void test_remquo(global float *input1, global float *input2, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int quo = 0;\n"
+        "    int sign = 0;\n"
+        "    result.x = remquo(input1[gid], input2[gid], &quo);\n"
+        // Specification say:
+        // "remquo also calculates the lower seven bits of the integral quotient x/y,
+        // and gives that value the same sign as x/y. It stores this signed value in
+        // the object pointed to by quo."
+        // Implemenation may save into quo more than seven bits. We need to take
+        // care of that here.
+        "    sign = (quo < 0) ? -1 : 1;\n"
+        "    quo = (quo < 0) ? -quo : quo;\n"
+        "    quo &= 0x0000007f;\n"
+        "    result.y = (sign < 0) ? -quo : quo;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_binary<fp_func_remquo, cl_float, cl_float, cl_float2>(fp_func_remquo func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_remquo(global_ptr<float[]> input1, global_ptr<float[]> input2, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 result;\n"
+        "    int quo = 0;\n"
+        "    int sign = 0;\n"
+        "    result.x = remquo(input1[gid], input2[gid], &quo);\n"
+        // Specification say:
+        // "remquo also calculates the lower seven bits of the integral quotient x/y,
+        // and gives that value the same sign as x/y. It stores this signed value in
+        // the object pointed to by quo."
+        // Implemenation may save into quo more than seven bits. We need to take
+        // care of that here.
+        "    sign = (quo < 0) ? -1 : 1;\n"
+        "    quo = (quo < 0) ? -quo : quo;\n"
+        "    quo &= 0x0000007f;\n"
+        "    result.y = (sign < 0) ? -quo : quo;\n"
+        "    output[gid] = result;\n"
+        "}\n";
+}
+#endif
+
+// -------------- TERNARY FUNCTIONS
+
+// gentype fma(gentype a, gentype b, gentype c);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
+MATH_FUNCS_DEFINE_TERNARY_FUNC(fp, fma, std::fma, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+
+// floating point functions
+AUTO_TEST_CASE(test_fp_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype ceil(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_ceil(is_embedded_profile)))
+    // gentype floor(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_floor(is_embedded_profile)))
+    // gentype rint(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_rint(is_embedded_profile)))
+    // gentype round(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_round(is_embedded_profile)))
+    // gentype trunc(gentype x);
+    TEST_UNARY_FUNC_MACRO((fp_func_trunc(is_embedded_profile)))
+
+    // floatn nan(uintn nancode);
+    TEST_UNARY_FUNC_MACRO((fp_func_nan()))
+
+    // gentype fract(gentype x, gentype* iptr);
+    TEST_UNARY_FUNC_MACRO((fp_func_fract(is_embedded_profile)))
+    // gentype modf(gentype x, gentype* iptr);
+    TEST_UNARY_FUNC_MACRO((fp_func_modf(is_embedded_profile)))
+    // gentype frexp(gentype x, intn* exp);
+    TEST_UNARY_FUNC_MACRO((fp_func_frexp(is_embedded_profile)))
+
+    // gentype remainder(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_remainder(is_embedded_profile)))
+    // gentype copysign(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_copysign(is_embedded_profile)))
+    // gentype fmod(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_fmod(is_embedded_profile)))
+
+    // gentype nextafter(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((fp_func_nextafter(is_embedded_profile)))
+
+    // gentype remquo(gentype x, gentype y, intn* quo);
+    TEST_BINARY_FUNC_MACRO((fp_func_remquo(is_embedded_profile)))
+
+    // gentype fma(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((fp_func_fma(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_FP_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/half_math_funcs.hpp b/test_conformance/clcpp/math_funcs/half_math_funcs.hpp
new file mode 100644
index 00000000..d72d717c
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/half_math_funcs.hpp
@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)  
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+#else
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, cos, half_math::cos, std::cos, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sin, half_math::sin, std::sin, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, tan, half_math::tan, std::tan, true, 8192.0f, 8192.0f, 0.1f, -CL_M_PI_F, CL_M_PI_F)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp, half_math::exp, std::exp, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp2, half_math::exp2, std::exp2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, exp10, half_math::exp10, reference::exp10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log, half_math::log, std::log, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log2, half_math::log2, std::log2, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, log10, half_math::log10, std::log10, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, rsqrt, half_math::rsqrt, reference::rsqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, sqrt, half_math::sqrt, std::sqrt, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+MATH_FUNCS_DEFINE_UNARY_FUNC1(half_math, recip, half_math::recip, reference::recip, true, 8192.0f, 8192.0f, 0.1f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, divide, half_math::divide, reference::divide, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC1(half_math, powr, half_math::powr, reference::powr, true, 8192.0f, 8192.0f, 0.1f, -1024.0f, 1024.0f, -1024.0f, 1024.0f)
+#endif
+
+// comparison functions
+AUTO_TEST_CASE(test_half_math_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    TEST_UNARY_FUNC_MACRO((half_math_func_cos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_sin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_tan(is_embedded_profile)))
+
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_exp10(is_embedded_profile)))
+
+    TEST_UNARY_FUNC_MACRO((half_math_func_log(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_log2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((half_math_func_log10(is_embedded_profile)))
+
+    TEST_BINARY_FUNC_MACRO((half_math_func_divide(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_HALF_MATH_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp b/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp
new file mode 100644
index 00000000..cd25d8e8
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/logarithmic_funcs.hpp
@@ -0,0 +1,260 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+namespace detail
+{
+
+// This function reads values of FP_ILOGB0 and FP_ILOGBNAN macros defined on the device.
+// OpenCL C++ Spec:
+// The value of FP_ILOGB0 shall be either {INT_MIN} or {INT_MAX}. The value of FP_ILOGBNAN
+// shall be either {INT_MAX} or {INT_MIN}.
+int get_ilogb_nan_zero(cl_device_id device, cl_context context, cl_command_queue queue, cl_int& ilogb_nan, cl_int& ilogb_zero)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str =
+        "__kernel void get_ilogb_nan_zero(__global int *out)\n"
+        "{\n"
+        "   out[0] = FP_ILOGB0;\n"
+        "   out[1] = FP_ILOGBNAN;\n"
+        "}\n";
+    std::string kernel_name("get_ilogb_nan_zero");
+
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+
+    std::vector<cl_int> output = generate_output<cl_int>(2);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = 1;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_int) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    // Save
+    ilogb_zero = output[0];
+    ilogb_nan = output[1];
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+} // namespace detail
+
+struct logarithmic_func_ilogb : public unary_func<cl_float, cl_int>
+{
+    logarithmic_func_ilogb(cl_int ilogb_nan, cl_int ilogb_zero)
+        : m_ilogb_nan(ilogb_nan), m_ilogb_zero(ilogb_zero)
+    {
+
+    }
+
+    std::string str()
+    {
+        return "ilogb";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    cl_int operator()(const cl_float& x)
+    {
+        if((std::isnan)(x))
+        {
+            return m_ilogb_nan;
+        }
+        else if(x == 0.0 || x == -0.0)
+        {
+            return m_ilogb_zero;
+        }
+        static_assert(
+            sizeof(cl_int) == sizeof(int),
+            "Tests assumes that sizeof(cl_int) == sizeof(int)"
+        );
+        return (std::ilogb)(x);
+    }
+
+    cl_float min1()
+    {
+        return -100.0f;
+    }
+
+    cl_float max1()
+    {
+        return 1000.0f;
+    }
+
+    std::vector<cl_float> in1_special_cases()
+    {
+        return {
+            cl_float(0.0f),
+            cl_float(-0.0f),
+            cl_float(1.0f),
+            cl_float(-1.0f),
+            cl_float(2.0f),
+            cl_float(-2.0f),
+            std::numeric_limits<cl_float>::infinity(),
+            -std::numeric_limits<cl_float>::infinity(),
+            std::numeric_limits<cl_float>::quiet_NaN()
+        };
+    }
+private:
+    cl_int m_ilogb_nan;
+    cl_int m_ilogb_zero;
+};
+
+// gentype log(gentype x);
+// gentype logb(gentype x);
+// gentype log2(gentype x);
+// gentype log10(gentype x);
+// gentype log1p(gentype x);
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log, std::log, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, logb, std::logb, true, 0.0f, 0.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log2, std::log2, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log10, std::log10, true, 3.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, log1p, std::log1p, true, 2.0f, 4.0f, 0.001f, -10.0f, 1000.0f)
+
+// gentype lgamma(gentype x);
+// OpenCL C++ Spec.:
+// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
+// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
+
+// gentype lgamma_r(gentype x, intn* signp);
+// OpenCL C++ Spec.:
+// The ULP values for built-in math functions lgamma and lgamma_r is currently undefined.
+// Because of that we don't check ULP and set acceptable delta to 0.2f (20%).
+//
+// Note:
+// We DO NOT test if sign of the gamma function return by lgamma_r is correct.
+MATH_FUNCS_DEFINE_UNARY_FUNC(logarithmic, lgamma_r, std::lgamma, false, 0.0f, 0.0f, 0.2f, -10.0f, 1000.0f)
+
+// We need to specialize generate_kernel_unary<>() function template for logarithmic_func_lgamma_r
+// because it takes two arguments, but only one of it is input, the 2nd one is used to return
+// the sign of the gamma function.
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <>
+std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
+{
+    return
+        "__kernel void test_lgamma_r(global float *input, global float *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    int sign;\n"
+        "    output[gid] = lgamma_r(input[gid], &sign);\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<logarithmic_func_lgamma_r, cl_float, cl_float>(logarithmic_func_lgamma_r func)
+{
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_lgamma_r(global_ptr<float[]> input, global_ptr<float[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    int sign;\n"
+        "    output[gid] = lgamma_r(input[gid], &sign);\n"
+        "}\n";
+}
+#endif
+
+// logarithmic functions
+AUTO_TEST_CASE(test_logarithmic_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // Write values of FP_ILOGB0 and FP_ILOGBNAN, which are macros defined on the device, to
+    // ilogb_zero and ilogb_nan.
+    cl_int ilogb_nan = 0;
+    cl_int ilogb_zero = 0;
+    error = detail::get_ilogb_nan_zero(device, context, queue, ilogb_nan, ilogb_zero);
+    RETURN_ON_ERROR_MSG(error, "detail::get_ilogb_nan_zero function failed");
+
+    // intn ilogb(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_ilogb(ilogb_nan, ilogb_zero)))
+
+    // gentype log(gentype x);
+    // gentype logb(gentype x);
+    // gentype log2(gentype x);
+    // gentype log10(gentype x);
+    // gentype log1p(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_logb(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log2(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log10(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_log1p(is_embedded_profile)))
+
+    // gentype lgamma(gentype x);
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma(is_embedded_profile)))
+
+    // gentype lgamma(gentype x);
+    //
+    // Note:
+    // We DO NOT test if sign of the gamma function return by lgamma_r is correct
+    TEST_UNARY_FUNC_MACRO((logarithmic_func_lgamma_r(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_LOG_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/main.cpp b/test_conformance/clcpp/math_funcs/main.cpp
new file mode 100644
index 00000000..f2106253
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/main.cpp
@@ -0,0 +1,55 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <limits>
+
+#include "../common.hpp"
+
+#include "comparison_funcs.hpp"
+#include "exponential_funcs.hpp"
+#include "floating_point_funcs.hpp"
+#include "half_math_funcs.hpp"
+#include "logarithmic_funcs.hpp"
+#include "other_funcs.hpp"
+#include "power_funcs.hpp"
+#include "trigonometric_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Check if cl_float (float) and cl_double (double) fulfill the requirements of
+    // IEC 559 (IEEE 754) standard. This is required for the tests to run correctly.
+    if(!std::numeric_limits<cl_float>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_float (float) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+    if(!std::numeric_limits<cl_double>::is_iec559)
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "cl_double (double) does not fulfill the requirements of IEC 559 (IEEE 754) standard. "
+            "Tests won't run correctly."
+        );
+    }
+
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/math_funcs/other_funcs.hpp b/test_conformance/clcpp/math_funcs/other_funcs.hpp
new file mode 100644
index 00000000..f939a567
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/other_funcs.hpp
@@ -0,0 +1,75 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, erfc, std::erfc, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, erf, std::erf, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, fabs, std::fabs, true, 0.0f, 0.0f, 0.001f, -1000.0f, 1000.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(other, tgamma, std::tgamma, true, 16.0f, 16.0f, 0.001f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(other, hypot, std::hypot, true, 4.0f, 4.0f, 0.001f, -1000.0f, 1000.0f, -1000.0f, 1000.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2, min3, max3
+MATH_FUNCS_DEFINE_TERNARY_FUNC(other, mad, reference::mad, false, 0.0f, 0.0f, 0.1f, -10.0f, 10.0f, -10.0f, 10.0f, -10.0f, 10.0f)
+
+// other functions
+AUTO_TEST_CASE(test_other_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype erf(gentype x);
+    // gentype erfc(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_erfc(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((other_func_erf(is_embedded_profile)))
+
+    // gentype fabs(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_fabs(is_embedded_profile)))
+
+    // gentype tgamma(gentype x);
+    TEST_UNARY_FUNC_MACRO((other_func_tgamma(is_embedded_profile)))
+
+    // gentype hypot(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((other_func_hypot(is_embedded_profile)))
+
+    // gentype mad(gentype a, gentype b, gentype c);
+    TEST_TERNARY_FUNC_MACRO((other_func_mad(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_OTHER_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/power_funcs.hpp b/test_conformance/clcpp/math_funcs/power_funcs.hpp
new file mode 100644
index 00000000..2ace9b35
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/power_funcs.hpp
@@ -0,0 +1,153 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
+
+#include <limits>
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+#define DEFINE_BINARY_POWER_FUNC_INT(NAME, HOST_FUNC, USE_ULP, ULP, ULP_EMBEDDED, MIN1, MAX1, MIN2, MAX2) \
+struct power_func_ ## NAME : public binary_func<cl_float, cl_int, cl_float> \
+{ \
+    power_func_ ## NAME(bool is_embedded) : m_is_embedded(is_embedded)  \
+    { \
+    \
+    } \
+    \
+    std::string str() \
+    { \
+        return #NAME; \
+    } \
+    \
+    std::string headers()  \
+    { \
+        return "#include <opencl_math>\n"; \
+    } \
+    /* Reference value type is cl_double */ \
+    cl_double operator()(const cl_float& x, const cl_int& y)  \
+    { \
+        return (HOST_FUNC)(static_cast<cl_double>(x), y); \
+    } \
+    \
+    cl_float min1() \
+    { \
+        return MIN1; \
+    } \
+    \
+    cl_float max1() \
+    { \
+        return MAX1; \
+    } \
+    \
+    cl_int min2() \
+    { \
+        return MIN2; \
+    } \
+    \
+    cl_int max2() \
+    { \
+        return MAX2; \
+    } \
+    \
+    std::vector<cl_float> in1_special_cases() \
+    { \
+        return {  \
+            cl_float(-1.0f), \
+            cl_float(0.0f), \
+            cl_float(-0.0f), \
+        }; \
+    } \
+    \
+    std::vector<cl_int> in2_special_cases() \
+    { \
+        return {  \
+            2, 3, -1, 1, -2, 2 \
+        }; \
+    } \
+    \
+    bool use_ulp() \
+    { \
+        return USE_ULP; \
+    } \
+    \
+    float ulp() \
+    { \
+        if(m_is_embedded) \
+        { \
+            return ULP_EMBEDDED; \
+        } \
+        return ULP; \
+    } \
+private: \
+    bool m_is_embedded; \
+};
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, cbrt, std::cbrt, true, 2.0f, 4.0f, 0.001f, -1000.0f, -9.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, rsqrt, reference::rsqrt, true, 2.0f, 4.0f, 0.001f, 1.0f, 100.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(power, sqrt, std::sqrt, true, 3.0f, 4.0f, 0.001f, 1.0f, 100.0f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(power, pow, std::pow, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(power, powr, reference::powr, true, 16.0f, 16.0f, 0.001f, 1.0f, 100.0f, 1.0f, 10.0f)
+
+// func_name, reference_func, use_ulp, ulp, ulp_for_embedded, min1, max1, min2, max2
+DEFINE_BINARY_POWER_FUNC_INT(pown, std::pow, true, 16.0f, 16.0f, 1.0f, 100.0f, 1, 10)
+DEFINE_BINARY_POWER_FUNC_INT(rootn, reference::rootn, true, 16.0f, 16.0f, -100.0f, 100.0f, -10, 10)
+
+// power functions
+AUTO_TEST_CASE(test_power_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype cbrt(gentype x);
+    // gentype rsqrt(gentype x);
+    // gentype sqrt(gentype x);
+    TEST_UNARY_FUNC_MACRO((power_func_cbrt(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((power_func_sqrt(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((power_func_rsqrt(is_embedded_profile)))
+
+    // gentype pow(gentype x, gentype y);
+    // gentype powr(gentype x, gentype y);
+    TEST_BINARY_FUNC_MACRO((power_func_pow(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((power_func_powr(is_embedded_profile)))
+
+    // gentype pown(gentype x, intn y);
+    // gentype rootn(gentype x, intn y);
+    TEST_BINARY_FUNC_MACRO((power_func_pown(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((power_func_rootn(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_POWER_FUNCS_HPP
diff --git a/test_conformance/clcpp/math_funcs/reference.hpp b/test_conformance/clcpp/math_funcs/reference.hpp
new file mode 100644
index 00000000..0f5fc2fc
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/reference.hpp
@@ -0,0 +1,315 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
+
+#include <type_traits>
+#include <cmath>
+#include <limits>
+
+#include "../common.hpp"
+
+namespace reference
+{
+    // Reference functions for OpenCL comparison functions that
+    // are not already defined in STL.
+    cl_float maxmag(const cl_float& x, const cl_float& y)
+    {
+        if((std::abs)(x) > (std::abs)(y))
+        {
+            return x;
+        }
+        else if((std::abs)(y) > (std::abs)(x))
+        {
+            return y;
+        }
+        return (std::fmax)(x, y);
+    }
+
+    cl_float minmag(const cl_float& x, const cl_float& y)
+    {
+        if((std::abs)(x) < (std::abs)(y))
+        {
+            return x;
+        }
+        else if((std::abs)(y) < (std::abs)(x))
+        {
+            return y;
+        }
+        return (std::fmin)(x, y);
+    }
+
+    // Reference functions for OpenCL exp functions that
+    // are not already defined in STL.
+    cl_double exp10(const cl_double& x)
+    {   
+        // 10^x = exp2( x * log2(10) )
+        auto log2_10 = (std::log2)(static_cast<long double>(10.0));
+        cl_double x_log2_10 = static_cast<cl_double>(x * log2_10);
+        return (std::exp2)(x_log2_10);
+    }
+
+    // Reference functions for OpenCL floating point functions that
+    // are not already defined in STL.
+    cl_double2 fract(cl_double x)
+    {
+        // Copied from math_brute_force/reference_math.c
+        cl_double2 r;
+        if((std::isnan)(x))
+        {
+            r.s[0] = std::numeric_limits<cl_double>::quiet_NaN();
+            r.s[1] = std::numeric_limits<cl_double>::quiet_NaN();
+            return r;
+        }
+
+        r.s[0] = (std::modf)(x, &(r.s[1]));
+        if(r.s[0] < 0.0 )
+        {
+            r.s[0] = 1.0f + r.s[0];
+            r.s[1] -= 1.0f;
+            if( r.s[0] == 1.0f )
+                r.s[0] = HEX_FLT(+, 1, fffffe, -, 1);
+        }
+        return r;
+    }
+
+    cl_double2 remquo(cl_double x, cl_double y)
+    {
+        cl_double2 r;
+        // remquo return the same value that is returned by the
+        // remainder function
+        r.s[0] = (std::remainder)(x,y);
+        // calulcate quo
+        cl_double x_y = (x - r.s[0]) / y;
+        cl_uint quo = (std::abs)(x_y);
+        r.s[1] = quo & 0x0000007fU;
+        if(x_y < 0.0)
+            r.s[1] = -r.s[1];
+
+        // fix edge cases
+        if(!(std::isnan)(x) && y == 0.0)
+        {
+            r.s[1] = 0;
+        }
+        else if((std::isnan)(x) && (std::isnan)(y))
+        {
+            r.s[1] = 0;
+        }
+        return r;
+    }
+
+    // Reference functions for OpenCL half_math:: functions that
+    // are not already defined in STL.
+    cl_double divide(cl_double x, cl_double y)
+    {
+        return x / y;
+    }
+
+    cl_double recip(cl_double x)
+    {
+        return 1.0 / x;
+    }
+
+    // Reference functions for OpenCL other functions that
+    // are not already defined in STL.
+    cl_double mad(cl_double x, cl_double y, cl_double z)
+    {
+        return (x * y) + z;
+    }
+
+    // Reference functions for OpenCL power functions that
+    // are not already defined in STL.
+    cl_double rsqrt(const cl_double& x)
+    {
+        return cl_double(1.0) / ((std::sqrt)(x));
+    }
+
+    cl_double powr(const cl_double& x, const cl_double& y)
+    {
+        //powr(x, y) returns NaN for x < 0.
+        if( x < 0.0 )
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        //powr ( x, NaN ) returns the NaN for x >= 0.
+        //powr ( NaN, y ) returns the NaN.
+        if((std::isnan)(x) || (std::isnan)(y) )
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        if( x == 1.0 )
+        {
+            //powr ( +1, +-inf ) returns NaN.
+            if((std::abs)(y) == INFINITY )
+                return std::numeric_limits<cl_double>::quiet_NaN();
+
+            //powr ( +1, y ) is 1 for finite y. (NaN handled above)
+            return 1.0;
+        }
+
+        if( y == 0.0 )
+        {
+            //powr ( +inf, +-0 ) returns NaN.
+            //powr ( +-0, +-0 ) returns NaN.
+            if( x == 0.0 || x == std::numeric_limits<cl_double>::infinity())
+                return std::numeric_limits<cl_double>::quiet_NaN();
+
+            //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
+            return 1.0;
+        }
+
+        if( x == 0.0 )
+        {
+            //powr ( +-0, -inf) is +inf.
+            //powr ( +-0, y ) is +inf for finite y < 0.
+            if( y < 0.0 )
+                return std::numeric_limits<cl_double>::infinity();
+
+            //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+            return 0.0;
+        }
+
+        // x = +inf
+        if( (std::isinf)(x) )
+        {
+            if( y < 0 )
+                return 0;
+            return std::numeric_limits<cl_double>::infinity();
+        }
+
+        double fabsx = (std::abs)(x);
+        double fabsy = (std::abs)(y);
+
+        //y = +-inf cases
+        if( (std::isinf)(fabsy) )
+        {
+            if( y < 0.0 )
+            {
+                if( fabsx < 1.0 )
+                    return std::numeric_limits<cl_double>::infinity();
+                return 0;
+            }
+            if( fabsx < 1.0 )
+                return 0.0;
+            return std::numeric_limits<cl_double>::infinity();
+        }        
+        return (std::pow)(x, y);
+    }
+
+    cl_double rootn(const cl_double& x, const cl_int n)
+    {
+        //rootn (x, 0) returns a NaN.
+        if(n == 0)
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+        if(x < 0 && 0 == (n & 1))
+            return std::numeric_limits<cl_double>::quiet_NaN();
+
+        if(x == 0.0)
+        {
+            if(n > 0)
+            {
+                //rootn ( +-0,  n ) is +0 for even n > 0.
+                if(0 == (n & 1))
+                {
+                    return cl_double(0.0);
+                }
+                //rootn ( +-0,  n ) is +-0 for odd n > 0.
+                else
+                {
+                    return x;
+                }
+            }
+            else
+            {
+                //rootn ( +-0,  n ) is +inf for even n < 0.
+                if(0 == ((-n) & 1))
+                {
+                    return std::numeric_limits<cl_double>::infinity();
+                }
+                //rootn ( +-0,  n ) is +-inf for odd n < 0.
+                else
+                {
+                    return (std::copysign)(
+                        std::numeric_limits<cl_double>::infinity(), x
+                    );
+                }   
+            }
+        }
+
+        cl_double r = (std::abs)(x);
+        r = (std::exp2)((std::log2)(r) / static_cast<cl_double>(n));
+        return (std::copysign)(r, x);
+    }
+
+    // Reference functions for OpenCL trigonometric functions that
+    // are not already defined in STL.
+    cl_double acospi(cl_double x)
+    {
+        return (std::acos)(x) / CL_M_PI;
+    }
+
+    cl_double asinpi(cl_double x)
+    {
+        return (std::asin)(x) / CL_M_PI;
+    }
+
+    cl_double atanpi(cl_double x)
+    {
+        return (std::atan)(x) / CL_M_PI;
+    }
+
+    cl_double cospi(cl_double x)
+    {
+        return (std::cos)(x * CL_M_PI);
+    }
+
+    cl_double sinpi(cl_double x)
+    {
+        return (std::sin)(x * CL_M_PI);
+    }
+
+    cl_double tanpi(cl_double x)
+    {
+        return (std::tan)(x * CL_M_PI);
+    }
+
+    cl_double atan2(cl_double x, cl_double y)
+    {
+    #if defined(WIN32) || defined(_WIN32) 
+        // Fix edge cases for Windows
+        if ((std::isinf)(x) && (std::isinf)(y)) {
+            cl_double retval = (y > 0) ? CL_M_PI_4 : 3.f * CL_M_PI_4;
+            return (x > 0) ? retval : -retval;
+        }
+    #endif // defined(WIN32) || defined(_WIN32) 
+        return (std::atan2)(x, y);
+    }
+
+    cl_double atan2pi(cl_double x, cl_double y)
+    {
+        return ::reference::atan2(x, y) / CL_M_PI;
+    }
+
+    cl_double2 sincos(cl_double x)
+    {
+        cl_double2 r;
+        r.s[0] = (std::sin)(x);
+        r.s[1] = (std::cos)(x);
+        return r;
+    }
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_REFERENCE_HPP
diff --git a/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp b/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp
new file mode 100644
index 00000000..343024a8
--- /dev/null
+++ b/test_conformance/clcpp/math_funcs/trigonometric_funcs.hpp
@@ -0,0 +1,222 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "common.hpp"
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acos, std::acos, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acosh, std::acosh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, acospi, reference::acospi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asin, std::asin, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinh, std::asinh, true, 4.0f, 4.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, asinpi, reference::asinpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atan, std::atan, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanh, std::atanh, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, atanpi, reference::atanpi, true, 5.0f, 5.0f, 0.001f, -1.0f, 1.0f)
+
+// For (sin/cos/tan)pi functions min input value is -0.24 and max input value is 0.24,
+// so (CL_M_PI * x) is never greater than CL_M_PI_F.
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cos, std::cos, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cosh, std::cosh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, cospi, reference::cospi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sin, std::sin, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinh, std::sinh, true, 4.0f, 4.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, sinpi, reference::sinpi, true, 4.0f, 4.0f, 0.001f, -0.24, -0.24f)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tan, std::tan, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanh, std::tanh, true, 5.0f, 5.0f, 0.001f, -CL_M_PI_F, CL_M_PI_F)
+MATH_FUNCS_DEFINE_UNARY_FUNC(trigonometric, tanpi, reference::tanpi, true, 6.0f, 6.0f, 0.001f, -0.24, -0.24f)
+
+// group_name, func_name, reference_func, use_ulp, ulp, ulp_for_embedded, max_delta, min1, max1, min2, max2
+MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2, reference::atan2, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
+MATH_FUNCS_DEFINE_BINARY_FUNC(trigonometric, atan2pi, reference::atan2pi, true, 6.0f, 6.0f, 0.001f, -1.0f, 1.0f, -1.0f, 1.0f)
+
+// gentype sincos(gentype x, gentype * cosval);
+//
+// Fact that second argument is a pointer is inconvenient.
+//
+// We don't want to modify all helper functions defined in funcs_test_utils.hpp
+// that run test kernels generated based on this class and check if results are
+// correct, so instead of having two output cl_float buffers, one for sines and
+// one for cosines values, we use one cl_float2 output buffer (first component is
+// sine, second is cosine).
+//
+// Below we also define specialization of generate_kernel_unary function template
+// for trigonometric_func_sincos.
+struct trigonometric_func_sincos : public unary_func<cl_float, cl_float2>
+{
+    trigonometric_func_sincos(bool is_embedded) : m_is_embedded(is_embedded) 
+    {
+
+    }
+
+    std::string str()
+    {
+        return "sincos";
+    }
+
+    std::string headers() 
+    {
+        return "#include <opencl_math>\n";
+    }
+
+    /* Reference value type is cl_double */
+    cl_double2 operator()(const cl_float& x) 
+    {
+        return (reference::sincos)(static_cast<cl_double>(x));
+    }
+
+    cl_float min1()
+    {
+        return -CL_M_PI_F;
+    }
+
+    cl_float max1()
+    {
+        return CL_M_PI_F;
+    }
+
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    float ulp()
+    {
+        if(m_is_embedded)
+        {
+            return 4.0f;
+        }
+        return 4.0f;
+    }
+private:
+    bool m_is_embedded;
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)    
+template <>
+std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
+{    
+    return 
+        "__kernel void test_sincos(global float *input, global float2 *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 sine_cosine_of_x;\n"
+        "    float cosine_of_x = 0;\n"
+        "    sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
+        "    sine_cosine_of_x.y = cosine_of_x;\n"
+        "    output[gid] = sine_cosine_of_x;\n"
+        "}\n";
+}
+#else
+template <>
+std::string generate_kernel_unary<trigonometric_func_sincos, cl_float, cl_float2>(trigonometric_func_sincos func)
+{
+    return         
+        "" + func.defs() + 
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_sincos(global_ptr<float[]> input, global_ptr<float2[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    float2 sine_cosine_of_x;\n"
+        "    float cosine_of_x = 0;\n"
+        "    sine_cosine_of_x.x = sincos(input[gid], &(cosine_of_x));\n"
+        "    sine_cosine_of_x.y = cosine_of_x;\n"
+        "    output[gid] = sine_cosine_of_x;\n"
+        "}\n";
+}
+#endif
+
+// trigonometric functions
+AUTO_TEST_CASE(test_trigonometric_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    // Check for EMBEDDED_PROFILE
+    bool is_embedded_profile = false;
+    char profile[128];
+    last_error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    RETURN_ON_CL_ERROR(last_error, "clGetDeviceInfo")
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        is_embedded_profile = true;
+
+    // gentype acos(gentype x);
+    // gentype acosh(gentype x);
+    // gentype acospi(gentype x);
+    // gentype asin(gentype x);
+    // gentype asinh(gentype x);
+    // gentype asinpi(gentype x);
+    // gentype atan(gentype x);
+    // gentype atanh(gentype x);
+    // gentype atanpi(gentype x);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acosh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_acospi(is_embedded_profile))) 
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asinh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_asinpi(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atan(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atanh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_atanpi(is_embedded_profile)))
+
+    // gentype cos(gentype x);
+    // gentype cosh(gentype x);
+    // gentype cospi(gentype x);
+    // gentype sin(gentype x);
+    // gentype sinh(gentype x);
+    // gentype sinpi(gentype x);
+    // gentype tan(gentype x);
+    // gentype tanh(gentype x);
+    // gentype tanpi(gentype x);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cos(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cosh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_cospi(is_embedded_profile))) 
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sin(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sinh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sinpi(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tan(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tanh(is_embedded_profile)))
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_tanpi(is_embedded_profile)))
+
+    // gentype atan2(gentype y, gentype x);
+    // gentype atan2pi(gentype y, gentype x);
+    TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2(is_embedded_profile)))
+    TEST_BINARY_FUNC_MACRO((trigonometric_func_atan2pi(is_embedded_profile)))
+
+    // gentype sincos(gentype x, gentype * cosval);
+    TEST_UNARY_FUNC_MACRO((trigonometric_func_sincos(is_embedded_profile)))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_MATH_FUNCS_TRI_FUNCS_HPP
diff --git a/test_conformance/clcpp/pipes/CMakeLists.txt b/test_conformance/clcpp/pipes/CMakeLists.txt
new file mode 100644
index 00000000..037b432d
--- /dev/null
+++ b/test_conformance/clcpp/pipes/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_PIPES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/pipes/main.cpp b/test_conformance/clcpp/pipes/main.cpp
new file mode 100644
index 00000000..91a0ea83
--- /dev/null
+++ b/test_conformance/clcpp/pipes/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_pipes.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/pipes/test_pipes.hpp b/test_conformance/clcpp/pipes/test_pipes.hpp
new file mode 100644
index 00000000..3fc30dcd
--- /dev/null
+++ b/test_conformance/clcpp/pipes/test_pipes.hpp
@@ -0,0 +1,632 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
+#define TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
+
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_pipes {
+
+enum class pipe_source
+{
+    param,
+    storage
+};
+
+enum class pipe_operation
+{
+    work_item,
+    work_item_reservation,
+    work_group_reservation,
+    sub_group_reservation
+};
+
+struct test_options
+{
+    pipe_operation operation;
+    pipe_source source;
+    int max_packets;
+    int num_packets;
+};
+
+struct output_type
+{
+    cl_uint write_reservation_is_valid;
+    cl_uint write_success;
+
+    cl_uint num_packets;
+    cl_uint max_packets;
+    cl_uint read_reservation_is_valid;
+    cl_uint read_success;
+
+    cl_uint value;
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    uint write_reservation_is_valid;
+    uint write_success;
+
+    uint num_packets;
+    uint max_packets;
+    uint read_reservation_is_valid;
+    uint read_success;
+
+    uint value;
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    if (options.operation == pipe_operation::work_item)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].write_reservation_is_valid = 1;
+
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, &value) == 0;
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+
+        output[gid].read_reservation_is_valid = 1;
+
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, &value) == 0;
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_item_reservation)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        reserve_id_t reservation = reserve_write_pipe(out_pipe, 2);
+        output[gid + 0].write_reservation_is_valid = is_valid_reserve_id(reservation);
+        output[gid + 1].write_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value0 = gid + 0;
+        uint value1 = gid + 1;
+        output[gid + 0].write_success = write_pipe(out_pipe, reservation, 0, &value0) == 0;
+        output[gid + 1].write_success = write_pipe(out_pipe, reservation, 1, &value1) == 0;
+        commit_write_pipe(out_pipe, reservation);
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        output[gid + 0].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid + 0].max_packets = get_pipe_max_packets(in_pipe);
+        output[gid + 1].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid + 1].max_packets = get_pipe_max_packets(in_pipe);
+
+        reserve_id_t reservation = reserve_read_pipe(in_pipe, 2);
+        output[gid + 0].read_reservation_is_valid = is_valid_reserve_id(reservation);
+        output[gid + 1].read_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value0;
+        uint value1;
+        output[gid + 0].read_success = read_pipe(in_pipe, reservation, 1, &value0) == 0;
+        output[gid + 1].read_success = read_pipe(in_pipe, reservation, 0, &value1) == 0;
+        commit_read_pipe(in_pipe, reservation);
+        output[gid + 0].value = value0;
+        output[gid + 1].value = value1;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_group_reservation)
+    {
+        s << R"(
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        reserve_id_t reservation = work_group_reserve_write_pipe(out_pipe, get_local_size(0));
+        output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, reservation, get_local_id(0), &value) == 0;
+        work_group_commit_write_pipe(out_pipe, reservation);
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+
+        reserve_id_t reservation = work_group_reserve_read_pipe(in_pipe, get_local_size(0));
+        output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, reservation, get_local_size(0) - 1 - get_local_id(0), &value) == 0;
+        work_group_commit_read_pipe(in_pipe, reservation);
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::sub_group_reservation)
+    {
+        s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+    kernel void producer(write_only pipe uint out_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        reserve_id_t reservation = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());
+        output[gid].write_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value = gid;
+        output[gid].write_success = write_pipe(out_pipe, reservation, get_sub_group_local_id(), &value) == 0;
+        sub_group_commit_write_pipe(out_pipe, reservation);
+    }
+
+    kernel void consumer(read_only pipe uint in_pipe, global struct output_type *output)
+    {
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = get_pipe_num_packets(in_pipe);
+        output[gid].max_packets = get_pipe_max_packets(in_pipe);
+
+        reserve_id_t reservation = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());
+        output[gid].read_reservation_is_valid = is_valid_reserve_id(reservation);
+
+        uint value;
+        output[gid].read_success = read_pipe(in_pipe, reservation, get_sub_group_size() - 1 - get_sub_group_local_id(), &value) == 0;
+        sub_group_commit_read_pipe(in_pipe, reservation);
+        output[gid].value = value;
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_common>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+    #include <opencl_pipe>
+    using namespace cl;
+    )";
+
+    s << source_common;
+
+    std::string init_out_pipe;
+    std::string init_in_pipe;
+    if (options.source == pipe_source::param)
+    {
+        init_out_pipe = "auto out_pipe = pipe_param;";
+        init_in_pipe = "auto in_pipe = pipe_param;";
+    }
+    else if (options.source == pipe_source::storage)
+    {
+        s << "pipe_storage<uint, " << std::to_string(options.max_packets) << "> storage;";
+        init_out_pipe = "auto out_pipe = storage.get<pipe_access::write>();";
+        init_in_pipe = "auto in_pipe = make_pipe(storage);";
+    }
+
+    if (options.operation == pipe_operation::work_item)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].write_reservation_is_valid = 1;
+
+        uint value = gid;
+        output[gid].write_success = out_pipe.write(value);
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+
+        output[gid].read_reservation_is_valid = 1;
+
+        uint value;
+        output[gid].read_success = in_pipe.read(value);
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_item_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        auto reservation = out_pipe.reserve(2);
+        output[gid + 0].write_reservation_is_valid = reservation.is_valid();
+        output[gid + 1].write_reservation_is_valid = reservation.is_valid();
+
+        uint value0 = gid + 0;
+        uint value1 = gid + 1;
+        output[gid + 0].write_success = reservation.write(0, value0);
+        output[gid + 1].write_success = reservation.write(1, value1);
+        reservation.commit();
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+        if (gid % 2 == 1) return;
+
+        output[gid + 0].num_packets = in_pipe.num_packets();
+        output[gid + 0].max_packets = in_pipe.max_packets();
+        output[gid + 1].num_packets = in_pipe.num_packets();
+        output[gid + 1].max_packets = in_pipe.max_packets();
+
+        auto reservation = in_pipe.reserve(2);
+        output[gid + 0].read_reservation_is_valid = reservation.is_valid();
+        output[gid + 1].read_reservation_is_valid = reservation.is_valid();
+
+        uint value0;
+        uint value1;
+        output[gid + 0].read_success = reservation.read(1, value0);
+        output[gid + 1].read_success = reservation.read(0, value1);
+        reservation.commit();
+        output[gid + 0].value = value0;
+        output[gid + 1].value = value1;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::work_group_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        auto reservation = out_pipe.work_group_reserve(get_local_size(0));
+        output[gid].write_reservation_is_valid = reservation.is_valid();
+
+        uint value = gid;
+        output[gid].write_success = reservation.write(get_local_id(0), value);
+        reservation.commit();
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+
+        auto reservation = in_pipe.work_group_reserve(get_local_size(0));
+        output[gid].read_reservation_is_valid = reservation.is_valid();
+
+        uint value;
+        output[gid].read_success = reservation.read(get_local_size(0) - 1 - get_local_id(0), value);
+        reservation.commit();
+        output[gid].value = value;
+    }
+    )";
+    }
+    else if (options.operation == pipe_operation::sub_group_reservation)
+    {
+        s << R"(
+    kernel void producer(pipe<uint, pipe_access::write> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_out_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        auto reservation = out_pipe.sub_group_reserve(get_sub_group_size());
+        output[gid].write_reservation_is_valid = reservation.is_valid();
+
+        uint value = gid;
+        output[gid].write_success = reservation.write(get_sub_group_local_id(), value);
+        reservation.commit();
+    }
+
+    kernel void consumer(pipe<uint, pipe_access::read> pipe_param, global_ptr<output_type[]> output)
+    {
+        )" << init_in_pipe << R"(
+        const ulong gid = get_global_id(0);
+
+        output[gid].num_packets = in_pipe.num_packets();
+        output[gid].max_packets = in_pipe.max_packets();
+
+        auto reservation = in_pipe.sub_group_reserve(get_sub_group_size());
+        output[gid].read_reservation_is_valid = reservation.is_valid();
+
+        uint value;
+        output[gid].read_success = reservation.read(get_sub_group_size() - 1 - get_sub_group_local_id(), value);
+        reservation.commit();
+        output[gid].value = value;
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    if (options.num_packets % 2 != 0 || options.max_packets < options.num_packets)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Invalid test options")
+    }
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (options.operation == pipe_operation::sub_group_reservation && !is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    cl_program program;
+    cl_kernel producer_kernel;
+    cl_kernel consumer_kernel;
+
+    std::string producer_kernel_name = "producer";
+    std::string consumer_kernel_name = "consumer";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+    consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel")
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &producer_kernel,
+        source, producer_kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    consumer_kernel = clCreateKernel(program, consumer_kernel_name.c_str(), &error);
+    RETURN_ON_CL_ERROR(error, "clCreateKernel")
+#endif
+
+    size_t max_work_group_size;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    const size_t count = options.num_packets;
+    const size_t local_size = (std::min)((size_t)256, max_work_group_size);
+    const size_t global_size = count;
+
+    const cl_uint packet_size = sizeof(cl_uint);
+
+    cl_mem pipe = clCreatePipe(context, 0, packet_size, options.max_packets, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreatePipe")
+
+    cl_mem output_buffer;
+    output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    const char pattern = 0;
+    error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+    error = clSetKernelArg(producer_kernel, 0, sizeof(cl_mem), &pipe);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(producer_kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    error = clEnqueueNDRangeKernel(queue, producer_kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clSetKernelArg(consumer_kernel, 0, sizeof(cl_mem), &pipe);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(consumer_kernel, 1, sizeof(output_buffer), &output_buffer);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    error = clEnqueueNDRangeKernel(queue, consumer_kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    std::vector<output_type> output(count);
+    error = clEnqueueReadBuffer(
+        queue, output_buffer, CL_TRUE,
+        0, sizeof(output_type) * count,
+        static_cast<void *>(output.data()),
+        0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    std::vector<bool> existing_values(count, false);
+    for (size_t gid = 0; gid < count; gid++)
+    {
+        const output_type &o = output[gid];
+
+        if (!o.write_reservation_is_valid)
+        {
+            RETURN_ON_ERROR_MSG(-1, "write reservation is not valid")
+        }
+        if (!o.write_success)
+        {
+            RETURN_ON_ERROR_MSG(-1, "write did not succeed")
+        }
+
+        if (o.num_packets == 0 || o.num_packets > options.num_packets)
+        {
+            RETURN_ON_ERROR_MSG(-1, "num_packets did not return correct value")
+        }
+        if (o.max_packets != options.max_packets)
+        {
+            RETURN_ON_ERROR_MSG(-1, "max_packets did not return correct value")
+        }
+        if (!o.read_reservation_is_valid)
+        {
+            RETURN_ON_ERROR_MSG(-1, "read reservation is not valid")
+        }
+        if (!o.read_success)
+        {
+            RETURN_ON_ERROR_MSG(-1, "read did not succeed")
+        }
+
+        // Every value must be presented once in any order
+        if (o.value >= count || existing_values[o.value])
+        {
+            RETURN_ON_ERROR_MSG(-1, "kernel did not return correct value")
+        }
+        existing_values[o.value] = true;
+    }
+
+    clReleaseMemObject(pipe);
+    clReleaseMemObject(output_buffer);
+    clReleaseKernel(producer_kernel);
+    clReleaseKernel(consumer_kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+const pipe_operation pipe_operations[] = {
+    pipe_operation::work_item,
+    pipe_operation::work_item_reservation,
+    pipe_operation::work_group_reservation,
+    pipe_operation::sub_group_reservation
+};
+
+const std::tuple<int, int> max_and_num_packets[] = {
+    std::make_tuple<int, int>(2, 2),
+    std::make_tuple<int, int>(10, 8),
+    std::make_tuple<int, int>(256, 254),
+    std::make_tuple<int, int>(1 << 16, 1 << 16),
+    std::make_tuple<int, int>((1 << 16) + 5, 1 << 16),
+    std::make_tuple<int, int>(12345, 12344),
+    std::make_tuple<int, int>(1 << 18, 1 << 18)
+};
+
+AUTO_TEST_CASE(test_pipes_pipe)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    std::vector<std::tuple<int, int>> ps;
+    for (auto p : max_and_num_packets)
+    {
+        if (std::get<0>(p) < num_elements)
+            ps.push_back(p);
+    }
+    ps.push_back(std::tuple<int, int>(num_elements, num_elements));
+
+    int error = CL_SUCCESS;
+
+    for (auto operation : pipe_operations)
+    for (auto p : ps)
+    {
+        test_options options;
+        options.source = pipe_source::param;
+        options.max_packets = std::get<0>(p);
+        options.num_packets = std::get<1>(p);
+        options.operation = operation;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+AUTO_TEST_CASE(test_pipes_pipe_storage)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    std::vector<std::tuple<int, int>> ps;
+    for (auto p : max_and_num_packets)
+    {
+        if (std::get<0>(p) < num_elements)
+            ps.push_back(p);
+    }
+    ps.push_back(std::tuple<int, int>(num_elements, num_elements));
+
+    int error = CL_SUCCESS;
+
+    for (auto operation : pipe_operations)
+    for (auto p : ps)
+    {
+        test_options options;
+        options.source = pipe_source::storage;
+        options.max_packets = std::get<0>(p);
+        options.num_packets = std::get<1>(p);
+        options.operation = operation;
+
+        error = test(device, context, queue, options);
+        RETURN_ON_ERROR(error)
+    }
+
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_PIPES_TEST_PIPES_HPP
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt b/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt
new file mode 100644
index 00000000..4318ca81
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_PROGRAM_SCOPE_CTORS_DTORS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp
new file mode 100644
index 00000000..35bf81c9
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/common.hpp
@@ -0,0 +1,284 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
+
+#include <algorithm>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#define RUN_PS_CTORS_DTORS_TEST_MACRO(TEST_CLASS) \
+    last_error = run_ps_ctor_dtor_test(  \
+        device, context, queue, count, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+// Base class for all tests for kernels with program scope object with
+// non-trivial ctors and/or dtors
+struct ps_ctors_dtors_test_base : public detail::base_func_type<cl_uint>
+{
+    // ctor is true, if and only if OpenCL program of this test contains program
+    // scope variable with non-trivial ctor.
+    // dtor is true, if and only if OpenCL program of this test contains program
+    // scope variable with non-trivial dtor.
+    ps_ctors_dtors_test_base(const bool ctor,
+                             const bool dtor)
+        : m_ctor(ctor), m_dtor(dtor)
+    {
+
+    }
+    virtual ~ps_ctors_dtors_test_base() { };
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+    // Returns kernel names IN ORDER
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+    // Returns value that is expected to be in output_buffer[i]
+    virtual cl_uint operator()(size_t i) = 0;
+    // Executes kernels
+    // Typical case: execute every kernel once, every kernel has only
+    // one argument, that is, output buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+    // This method check if queries for CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
+    // and CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT using clGetProgramInfo()
+    // return correct values
+    virtual cl_int ctors_dtors_present_queries(cl_program program)
+    {
+        cl_int error = CL_SUCCESS;
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return error;
+        #else
+            // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT cl_bool
+            // This indicates that the program object contains non-trivial constructor(s) that will be
+            // executed by runtime before any kernel from the program is executed.
+
+            // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT cl_bool
+            // This indicates that the program object contains non-trivial destructor(s) that will be
+            // executed by runtime when program is destroyed.
+
+            // CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT
+            cl_bool ctors_present;
+            size_t cl_bool_size;
+            error = clGetProgramInfo(
+                program,
+                CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT,
+                sizeof(cl_bool),
+                static_cast<void*>(&ctors_present),
+                &cl_bool_size
+            );
+            RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+            if(cl_bool_size != sizeof(cl_bool))
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
+                    cl_bool_size,
+                    sizeof(cl_bool)
+                );
+            }
+
+            // CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT
+            cl_bool dtors_present = 0;
+            error = clGetProgramInfo(
+                program,
+                CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT,
+                sizeof(cl_bool),
+                static_cast<void*>(&ctors_present),
+                &cl_bool_size
+            );
+            RETURN_ON_CL_ERROR(error, "clGetProgramInfo")
+            if(cl_bool_size != sizeof(cl_bool))
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, param_value_size_ret != sizeof(cl_bool) (%lu != %lu).\n",
+                    cl_bool_size,
+                    sizeof(cl_bool)
+                );
+            }
+
+            // check constructors
+            if(m_ctor && ctors_present != CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 0, should be: 1.\n"
+                );
+            }
+            else if(!m_ctor && ctors_present == CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT: 1, should be: 0.\n"
+                );
+            }
+
+            // check destructors
+            if(m_dtor && dtors_present != CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 0, should be: 1.\n"
+                );
+            }
+            else if(!m_dtor && dtors_present == CL_TRUE)
+            {
+                error = -1;
+                CHECK_ERROR_MSG(
+                    error,
+                    "Test failed, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT: 1, should be: 0.\n"
+                );
+            }
+            return error;
+        #endif
+    }
+
+private:
+    bool m_ctor;
+    bool m_dtor;
+};
+
+template <class ps_ctor_dtor_test>
+int run_ps_ctor_dtor_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ps_ctor_dtor_test op)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t work_size[1];
+    cl_int err;
+
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0]);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+
+    work_size[0] = count;
+    // host output vector
+    std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
+
+    // device output buffer
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0]);
+    RETURN_ON_ERROR(err)
+
+    // Check if queries returns correct values
+    err = op.ctors_dtors_present_queries(program);
+    RETURN_ON_ERROR(err);
+
+    // Release kernels and program
+    // Destructors should be called now
+    for(auto& k : kernels)
+    {
+        err = clReleaseKernel(k);
+        RETURN_ON_CL_ERROR(err, "clReleaseKernel");
+    }
+    err = clReleaseProgram(program);
+    RETURN_ON_CL_ERROR(err, "clReleaseProgram");
+
+    // Finish
+    err = clFinish(queue);
+    RETURN_ON_CL_ERROR(err, "clFinish");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        cl_uint v = op(i);
+        if(!(are_equal(v, output[i], detail::make_value<cl_uint>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<cl_uint>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_COMMON_HPP
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp b/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp
new file mode 100644
index 00000000..ef70cdd3
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/main.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_ctors_dtors.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp b/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp
new file mode 100644
index 00000000..c9ac0821
--- /dev/null
+++ b/test_conformance/clcpp/program_scope_ctors_dtors/test_ctors_dtors.hpp
@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
+#define TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
+
+#include "common.hpp"
+
+// Test for program scope variable with non-trivial ctor
+struct ps_ctor_test : public ps_ctors_dtors_test_base
+{
+    ps_ctor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(true, false),
+          m_test_value(test_value)
+    {
+
+    }
+    
+    std::string str()
+    {
+        return "ps_ctor_test";
+    }
+
+    std::vector<std::string> get_kernel_names()
+    {
+        return { 
+            this->str() + "_set",
+            this->str() + "_read"
+        };
+    }
+
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        if(i % 2 == 0)
+            return m_test_value;
+        return cl_uint(0xbeefbeef);
+    }
+
+    // In 1st kernel 0th work-tem sets member m_x of program scope variable global_var to
+    // m_test_value and m_y to uint(0xbeefbeef),
+    // In 2nd kernel:
+    // 1) if global id is even, then work-item reads global_var.m_x and writes it to output[its-global-id];
+    // 2) otherwise, work-item reads global_var.m_y and writes it to output[its-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_names()[0] + "(global uint *output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   output[gid] = 0xbeefbeef;\n"
+                "}\n"
+                "__kernel void " + this->get_kernel_names()[1] + "(global uint *output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid % 2 == 0)\n"
+                "      output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class_base {\n"
+                // non-trivial ctor
+                "   ctor_test_class_base(T x) { m_x = x;};\n"
+                "   T m_x;\n"
+                "};\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class : public ctor_test_class_base<T> {\n"
+                // non-trivial ctor
+                "   ctor_test_class(T x, T y) : ctor_test_class_base<T>(x), m_y(y) { };\n"
+                "   T m_y;\n"
+                "};\n"
+                // global scope program variables
+                "ctor_test_class<uint> global_var(uint(0), uint(0));\n"
+
+                "__kernel void " + this->get_kernel_names()[0] + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid == 0) {\n"
+                "       global_var.m_x = " + std::to_string(m_test_value) + ";\n"  
+                "       global_var.m_y = 0xbeefbeef;\n"  
+                "   }\n"
+                "}\n"
+
+                "__kernel void " + this->get_kernel_names()[1] + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                "   if(gid % 2 == 0)\n"
+                "      output[gid] = global_var.m_x;\n"
+                "   else\n"
+                "      output[gid] = global_var.m_y;\n"
+                "}\n";        
+        #endif
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+// Test for program scope variable with non-trivial dtor
+struct ps_dtor_test : public ps_ctors_dtors_test_base
+{
+    ps_dtor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(false, true),
+          m_test_value(test_value)
+    {
+
+    }
+    
+    std::string str()
+    {
+        return "ps_dtor_test";
+    }
+
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        if(i % 2 == 0)
+            return m_test_value;
+        return 1;
+    }
+
+    // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
+    // variable global_var, it also sets counter to 1;
+    // After global_var is destroyed all even elements of output buffer should equal m_test_value, 
+    // and all odd should equal 1.
+    // If odd elements of output buffer are >1 it means dtor was executed more than once.
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    if(gid % 2 == 0)\n"
+                "        output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "    else\n"
+                "        output[gid] = 1;\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"
+                "struct dtor_test_class_base {\n"
+                // non-trivial dtor
+                // set all odd elements in buffer to counter
+                "   ~dtor_test_class_base() {\n"
+                "       for(size_t i = 1; i < this->size; i+=2)\n"
+                "       {\n"
+                "           this->buffer[i] = counter;\n"
+                "       }\n"
+                "       counter++;\n"
+                "   };\n"
+                "   global_ptr<uint[]> buffer;\n"
+                "   size_t size;\n"
+                "   T counter;\n"
+                "};\n" 
+                // struct   
+                "struct dtor_test_class : public dtor_test_class_base<uint> {\n"
+                // non-trivial dtor
+                // set all values in buffer to m_test_value
+                "   ~dtor_test_class() {\n"
+                "       for(size_t i = 0; i < this->size; i+=2)\n"
+                "           this->buffer[i] = " + std::to_string(m_test_value) + ";\n"
+                "   };\n"
+                "};\n" 
+                // global scope program variable
+                "dtor_test_class global_var;\n"
+
+                // When global_var is being destroyed, first dtor ~dtor_test_class is called,
+                // and then ~dtor_test_class_base is called.
+
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                // set buffer and size in global var
+                "   if(gid == 0){\n"
+                "       global_var.buffer = output;\n"
+                "       global_var.size = get_global_size(0);\n"
+                "       global_var.counter = 1;\n"
+                "   }\n"
+                "}\n";
+        #endif
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+// Test for program scope variable with both non-trivial ctor
+// and non-trivial dtor
+struct ps_ctor_dtor_test : public ps_ctors_dtors_test_base
+{
+    ps_ctor_dtor_test(const cl_uint test_value)
+        : ps_ctors_dtors_test_base(false, true),
+          m_test_value(test_value)
+    {
+
+    }
+    
+    std::string str()
+    {
+        return "ps_ctor_dtor_test";
+    }
+
+    // Returns value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i)
+    {
+        return m_test_value;
+    }
+
+    // In 1st kernel 0th work-item saves pointer to output buffer and its size in program scope
+    // variable global_var.
+    // After global_var is destroyed all even elements of output buffer should equal m_test_value, 
+    // and all odd should equal 1.
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    size_t gid = get_global_id(0);\n"
+                "    output[gid] = " + std::to_string(m_test_value) + ";\n"
+                "}\n";
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_array>\n"
+                "using namespace cl;\n"
+                // struct template
+                "template<class T>\n"    
+                "struct ctor_test_class {\n"
+                // non-trivial ctor
+                "   ctor_test_class(T value) : m_value(value) { };\n"
+                "   T m_value;\n"
+                "};\n\n"
+                // struct   
+                "struct ctor_dtor_test_class {\n"
+                // non-trivial ctor
+                "   ctor_dtor_test_class(uint value) : ctor_test(value) { } \n"
+                // non-trivial dtor
+                // set all values in buffer to m_test_value
+                "   ~ctor_dtor_test_class() {\n"
+                "       for(size_t i = 0; i < this->size; i++)\n"
+                "       {\n"
+                "          this->buffer[i] = ctor_test.m_value;\n"            
+                "       }\n"
+                "   };\n"
+                "   ctor_test_class<uint> ctor_test;\n"
+                "   global_ptr<uint[]> buffer;\n"
+                "   size_t size;\n"
+                "};\n" 
+                // global scope program variable
+                "ctor_dtor_test_class global_var(" + std::to_string(m_test_value) + ");\n"
+
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "   size_t gid = get_global_id(0);\n"
+                // set buffer and size in global var
+                "   if(gid == 0){\n"
+                "       global_var.buffer = output;\n"
+                "       global_var.size = get_global_size(0);\n"
+                "   }\n"
+                "}\n";
+        #endif
+    }
+
+private:
+    cl_uint m_test_value;
+};
+
+// This contains tests for program scope (global) constructors and destructors, more
+// detailed tests are also in clcpp/api.
+AUTO_TEST_CASE(test_program_scope_ctors_dtors)
+(cl_device_id device, cl_context context, cl_command_queue queue, int count)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_test(0xdeadbeefU))
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_dtor_test(0xbeefdeadU))
+    RUN_PS_CTORS_DTORS_TEST_MACRO(ps_ctor_dtor_test(0xdeaddeadU))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_PS_CTORS_DTORS_TEST_CTORS_DTORS_HPP
diff --git a/test_conformance/clcpp/reinterpret/CMakeLists.txt b/test_conformance/clcpp/reinterpret/CMakeLists.txt
new file mode 100644
index 00000000..56564476
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_REINTERPRET)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/reinterpret/as_type.hpp b/test_conformance/clcpp/reinterpret/as_type.hpp
new file mode 100644
index 00000000..da088cfa
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/as_type.hpp
@@ -0,0 +1,223 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <cstring>
+
+
+template<class IN1, class OUT1>
+struct as_type : public unary_func<IN1, OUT1>
+{
+    static_assert(sizeof(IN1) == sizeof(OUT1), "It is an error to use the as_type<T> operator to reinterpret data to a type of a different number of bytes");
+
+    std::string str()
+    {
+        return "as_type";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_reinterpret>\n";
+    }
+
+    OUT1 operator()(const IN1& x)
+    {
+        return *reinterpret_cast<const OUT1*>(&x);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_as_type(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    std::string function_call = "as_" + type_name<out_type>() + "(" + in1_value + ");";
+    return
+        "__kernel void test_" + func.str() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_as_type(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    std::string function_call = "as_type<" + type_name<out_type>() + ">(" + in1_value + ")";
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class as_type_op>
+bool verify_as_type(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, as_type_op op)
+{
+    // When the operand and result type contain a different number of elements, the result is implementation-defined,
+    // i.e. any result is correct
+    if (vector_size<INPUT>::value == vector_size<OUTPUT>::value)
+    {
+        for (size_t i = 0; i < in.size(); i++)
+        {
+            auto expected = op(in[i]);
+            if (std::memcmp(&expected, &out[i], sizeof(expected)) != 0)
+            {
+                print_error_msg(expected, out[i], i, op);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template <class as_type_op>
+int test_as_type_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, as_type_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int error;
+
+    typedef typename as_type_op::in_type INPUT;
+    typedef typename as_type_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if (!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_as_type<as_type_op, INPUT, OUTPUT>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(error)
+#else
+    error = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(error)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(INPUT) * input.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(OUTPUT) * output.size(), NULL, &error);
+    RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+    error = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueWriteBuffer")
+
+    error = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+    error = clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+    work_size[0] = count;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+    error = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+    if (!verify_as_type(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_as_type)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_AS_TYPE_MACRO(TYPE1, TYPE2) \
+    last_error = test_as_type_func( \
+        device, context, queue, n_elems, as_type<TYPE1, TYPE2>() \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_AS_TYPE_MACRO(cl_int, cl_int)
+    TEST_AS_TYPE_MACRO(cl_uint, cl_int)
+    TEST_AS_TYPE_MACRO(cl_int, cl_ushort2)
+    TEST_AS_TYPE_MACRO(cl_uchar, cl_uchar)
+    TEST_AS_TYPE_MACRO(cl_char4, cl_ushort2)
+    TEST_AS_TYPE_MACRO(cl_uchar16, cl_char16)
+    TEST_AS_TYPE_MACRO(cl_short8, cl_uchar16)
+    TEST_AS_TYPE_MACRO(cl_float4, cl_uint4)
+    TEST_AS_TYPE_MACRO(cl_float16, cl_int16)
+    TEST_AS_TYPE_MACRO(cl_long2, cl_float4)
+    TEST_AS_TYPE_MACRO(cl_ulong, cl_long)
+    TEST_AS_TYPE_MACRO(cl_ulong16, cl_double16)
+    TEST_AS_TYPE_MACRO(cl_uchar16, cl_double2)
+    TEST_AS_TYPE_MACRO(cl_ulong4, cl_short16)
+
+#undef TEST_AS_TYPE_MACRO
+
+    if (error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+
+#endif // TEST_CONFORMANCE_CLCPP_REINTERPRET_AS_TYPE_HPP
diff --git a/test_conformance/clcpp/reinterpret/main.cpp b/test_conformance/clcpp/reinterpret/main.cpp
new file mode 100644
index 00000000..3b5272df
--- /dev/null
+++ b/test_conformance/clcpp/reinterpret/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "as_type.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/relational_funcs/CMakeLists.txt b/test_conformance/clcpp/relational_funcs/CMakeLists.txt
new file mode 100644
index 00000000..7eca7a11
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_RELATIONAL_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/relational_funcs/common.hpp b/test_conformance/clcpp/relational_funcs/common.hpp
new file mode 100644
index 00000000..a13f7bac
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/common.hpp
@@ -0,0 +1,112 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <type_traits>
+#include <cmath>
+
+template<class IN1, class IN2, class IN3, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i], in2.s[i], in3.s[i]);
+    }
+    return result;
+}
+
+template<class IN1, class IN2, class IN3, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, const IN3& in3, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result = func(in1, in2, in3);
+    return result;
+}
+
+
+template<class IN1, class IN2, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i], in2.s[i]);
+    }
+    return result;
+}
+
+template<class IN1, class IN2, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, const IN2& in2, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result = func(in1, in2);
+    return result;
+}
+
+template<class IN1, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result;
+    for(size_t i = 0; i < vector_size<OUT1>::value; i++)
+    {
+        result.s[i] = func(in1.s[i]);
+    }
+    return result;
+}
+
+template<class IN1, class OUT1, class F>
+OUT1 perform_function(const IN1& in1, F func, typename std::enable_if<!is_vector_type<OUT1>::value>::type* = 0)
+{
+    OUT1 result = func(in1);
+    return result;
+}
+
+template<class IN1>
+cl_int perform_all_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
+{
+    cl_int result = 1;
+    for(size_t i = 0; i < vector_size<IN1>::value; i++)
+    {
+        result = (in1.s[i] != 0) ? result : cl_int(0);
+    }
+    return result;
+}
+
+cl_int perform_all_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
+{
+    return (in1 != 0) ? cl_int(1) : cl_int(0);
+}
+
+template<class IN1>
+cl_int perform_any_function(const IN1& in1, typename std::enable_if<is_vector_type<IN1>::value>::type* = 0)
+{
+    cl_int result = 0;
+    for(size_t i = 0; i < vector_size<IN1>::value; i++)
+    {
+        result = (in1.s[i] != 0) ? cl_int(1) : result;
+    }
+    return result;
+}
+
+cl_int perform_any_function(const cl_int& in1, typename std::enable_if<!is_vector_type<cl_int>::value>::type* = 0)
+{
+    return (in1 != 0) ? cl_int(1) : cl_int(0);
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
diff --git a/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp b/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp
new file mode 100644
index 00000000..980d67c8
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/comparison_funcs.hpp
@@ -0,0 +1,150 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
+
+#include "common.hpp"
+
+// This marco creates a class wrapper for comparision function we want to test.
+#define DEF_COMPARISION_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public binary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x, const input_type& y) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, input_type, result_type>( \
+            x, y, \
+            [](const SCALAR& a, const SCALAR& b) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    input_type min2() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max2() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+    \
+    std::vector<input_type> in2_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+};
+
+DEF_COMPARISION_FUNC(comparison_func_isequal, isequal, (a == b))
+DEF_COMPARISION_FUNC(comparison_func_isnotequal, isnotequal, !(a == b))
+DEF_COMPARISION_FUNC(comparison_func_isgreater, isgreater, (std::isgreater)(a, b))
+DEF_COMPARISION_FUNC(comparison_func_isgreaterequal, isgreaterequal, ((std::isgreater)(a, b) || a == b))
+DEF_COMPARISION_FUNC(comparison_func_isless, isless, (std::isless)(a, b))
+DEF_COMPARISION_FUNC(comparison_func_islessequal, islessequal, ((std::isless)(a, b) || a == b))
+DEF_COMPARISION_FUNC(comparison_func_islessgreater, islessgreater, ((a < b) || (a > b)))
+
+#undef DEF_COMPARISION_FUNC
+
+AUTO_TEST_CASE(test_relational_comparison_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// Helper macro, so we don't have to repreat the same code.  
+#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>())
+
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isnotequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreater)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isgreaterequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_isless)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessequal)
+    TEST_BINARY_REL_FUNC_MACRO(comparison_func_islessgreater)
+
+#undef TEST_BINARY_REL_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMPARISON_FUNCS_HPP
diff --git a/test_conformance/clcpp/relational_funcs/main.cpp b/test_conformance/clcpp/relational_funcs/main.cpp
new file mode 100644
index 00000000..6d708920
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/main.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "comparison_funcs.hpp"
+#include "select_funcs.hpp"
+#include "test_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/relational_funcs/select_funcs.hpp b/test_conformance/clcpp/relational_funcs/select_funcs.hpp
new file mode 100644
index 00000000..2e6f6bdd
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/select_funcs.hpp
@@ -0,0 +1,158 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
+
+#include "common.hpp"
+
+template <class IN1, cl_int N /* Vector size */>
+struct select_func_select : public ternary_func<
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    typename make_vector_type<IN1, N>::type /* create IN1N type */
+                                 >
+{
+    typedef typename make_vector_type<IN1, N>::type input1_type;
+    typedef typename make_vector_type<IN1, N>::type input2_type;
+    typedef typename make_vector_type<cl_int, N>::type input3_type;
+    typedef typename make_vector_type<IN1, N>::type result_type;
+   
+    std::string str()
+    {
+        return "select";
+    }
+   
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+   
+    result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
+    {   
+        typedef typename scalar_type<input1_type>::type SCALAR1;
+        typedef typename scalar_type<input2_type>::type SCALAR2;
+        typedef typename scalar_type<input3_type>::type SCALAR3;
+
+        return perform_function<input1_type, input2_type, input3_type, result_type>(
+            x, y, z,
+            [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
+            {
+                    return (c != 0) ? b : a;
+            }
+        );
+    }
+
+    bool is_in3_bool()
+    {
+        return true;
+    }
+   
+    std::vector<input3_type> in3_special_cases()
+    {
+        return { 
+            detail::make_value<input3_type>(0),
+            detail::make_value<input3_type>(1),
+            detail::make_value<input3_type>(12),
+            detail::make_value<input3_type>(-12)
+        };
+    }
+};
+
+template <class IN1, cl_int N /* Vector size */>
+struct select_func_bitselect : public ternary_func<
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create IN1N type */
+                                    typename make_vector_type<IN1, N>::type, /* create cl_intN type */
+                                    typename make_vector_type<IN1, N>::type /* create IN1N type */
+                                 >
+{
+    typedef typename make_vector_type<IN1, N>::type input1_type;
+    typedef typename make_vector_type<IN1, N>::type input2_type;
+    typedef typename make_vector_type<IN1, N>::type input3_type;
+    typedef typename make_vector_type<IN1, N>::type result_type;
+   
+    std::string str()
+    {
+        return "bitselect";
+    }
+   
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+   
+    result_type operator()(const input1_type& x, const input2_type& y, const input3_type& z)
+    {  
+        static_assert(
+            std::is_integral<IN1>::value,
+            "bitselect test is implemented only for integers."
+        ); 
+        static_assert(
+            std::is_unsigned<IN1>::value,
+            "IN1 type should be unsigned, bitwise operations on signed int may cause problems."
+        );
+        typedef typename scalar_type<input1_type>::type SCALAR1;
+        typedef typename scalar_type<input2_type>::type SCALAR2;
+        typedef typename scalar_type<input3_type>::type SCALAR3;
+
+        return perform_function<input1_type, input2_type, input3_type, result_type>(
+            x, y, z,
+            [](const SCALAR1& a, const SCALAR2& b, const SCALAR3& c)
+            {
+                return (~c & a) | (c & b);
+            }
+        );
+    }
+};
+
+AUTO_TEST_CASE(test_relational_select_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// Tests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS 
+// mode, because this functions in OpenCL C requires different reference functions on host
+// compared to their equivalent in OpenCL C++.
+// (In OpenCL C the result of select(), when gentype is vector type, is based on the most
+// significant bits of c components)
+#ifndef USE_OPENCLC_KERNELS
+    // gentype select(gentype a, gentype b, booln c)
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  1>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_float, 2>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_short, 4>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  8>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_select<cl_uint,  16>()))
+#else
+    log_info("WARNING:\n\tTests for select(gentype a, gentype b, booln c) are not run in USE_OPENCLC_KERNELS mode\n");
+#endif
+
+    // gentype bitselect(gentype a, gentype b, gentype c)
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 1>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 2>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uchar, 4>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_ushort, 8>()))
+    TEST_TERNARY_FUNC_MACRO((select_func_bitselect<cl_uint, 16>()))
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_SELECT_FUNCS_HPP
diff --git a/test_conformance/clcpp/relational_funcs/test_funcs.hpp b/test_conformance/clcpp/relational_funcs/test_funcs.hpp
new file mode 100644
index 00000000..77e3d871
--- /dev/null
+++ b/test_conformance/clcpp/relational_funcs/test_funcs.hpp
@@ -0,0 +1,336 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
+
+#include "common.hpp"
+
+// This marco creates a class wrapper for unary test function we want to test.
+#define DEF_UNARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public unary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, result_type>( \
+            x, \
+            [](const SCALAR& a) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+};
+
+// This marco creates a class wrapper for binary test function we want to test.
+#define DEF_BINARY_TEST_FUNC(CLASS_NAME, FUNC_NAME, HOST_FUNC_EXPRESSION) \
+template <cl_int N /* Vector size */> \
+struct CLASS_NAME : public binary_func< \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_float, N>::type, /* create cl_floatN type */ \
+                                    typename make_vector_type<cl_int, N>::type /* create cl_intN type */ \
+                                 > \
+{ \
+    typedef typename make_vector_type<cl_float, N>::type input_type; \
+    typedef typename make_vector_type<cl_int, N>::type result_type; \
+    \
+    std::string str() \
+    { \
+        return #FUNC_NAME; \
+    } \
+    \
+    std::string headers() \
+    { \
+        return "#include <opencl_relational>\n"; \
+    } \
+    \
+    result_type operator()(const input_type& x, const input_type& y) \
+    {    \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return perform_function<input_type, input_type, result_type>( \
+            x, y, \
+            [](const SCALAR& a, const SCALAR& b) \
+            { \
+                if(HOST_FUNC_EXPRESSION) \
+                { \
+                    return cl_int(1); \
+                } \
+                return cl_int(0); \
+            } \
+        ); \
+    } \
+    \
+    bool is_out_bool() \
+    { \
+        return true; \
+    } \
+    \
+    input_type min1() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max1() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    input_type min2() \
+    { \
+        return detail::def_limit<input_type>(-10000.0f); \
+    } \
+    \
+    input_type max2() \
+    { \
+        return detail::def_limit<input_type>(10000.0f); \
+    } \
+    \
+    std::vector<input_type> in1_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+    \
+    std::vector<input_type> in2_special_cases() \
+    { \
+        typedef typename scalar_type<input_type>::type SCALAR; \
+        return {  \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(-std::numeric_limits<SCALAR>::infinity()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::quiet_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::signaling_NaN()), \
+            detail::make_value<input_type>(std::numeric_limits<SCALAR>::denorm_min()), \
+            detail::make_value<input_type>(0.0f), \
+            detail::make_value<input_type>(-0.0f) \
+        }; \
+    } \
+};
+
+DEF_UNARY_TEST_FUNC(test_func_isfinite, isfinite, (std::isfinite)(a))
+DEF_UNARY_TEST_FUNC(test_func_isinf, isinf, (std::isinf)(a))
+DEF_UNARY_TEST_FUNC(test_func_isnan, isnan, (std::isnan)(a))
+DEF_UNARY_TEST_FUNC(test_func_isnormal, isnormal, (std::isnormal)(a))
+DEF_UNARY_TEST_FUNC(test_func_signbit, signbit , (std::signbit)(a))
+
+DEF_BINARY_TEST_FUNC(test_func_isordered, isordered, !(std::isunordered)(a, b))
+DEF_BINARY_TEST_FUNC(test_func_isunordered, isunordered, (std::isunordered)(a, b))
+
+#undef DEF_UNARY_TEST_FUNC
+#undef DEF_BINARY_TEST_FUNC
+
+template <cl_int N /* Vector size */>
+struct test_func_all : public unary_func<
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    cl_int /* create cl_intN type */
+                                 >
+{
+    typedef typename make_vector_type<cl_int, N>::type input_type;
+    typedef cl_int result_type;
+
+    std::string str()
+    {
+        return "all";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+
+    result_type operator()(const input_type& x)
+    {
+        return perform_all_function(x);
+    }
+
+    bool is_out_bool()
+    {
+        return true;
+    }
+
+    bool is_in1_bool()
+    {
+        return true;
+    }
+
+    std::vector<input_type> in1_special_cases()
+    {
+        return {
+            detail::make_value<input_type>(0),
+            detail::make_value<input_type>(1),
+            detail::make_value<input_type>(12),
+            detail::make_value<input_type>(-12)
+        };
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct test_func_any : public unary_func<
+                                    typename make_vector_type<cl_int, N>::type, /* create cl_intN type */
+                                    cl_int /* create cl_intN type */
+                                 >
+{
+    typedef typename make_vector_type<cl_int, N>::type input_type;
+    typedef cl_int result_type;
+
+    std::string str()
+    {
+        return "any";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_relational>\n";
+    }
+
+    result_type operator()(const input_type& x)
+    {
+        return perform_any_function(x);
+    }
+
+    bool is_out_bool()
+    {
+        return true;
+    }
+
+    bool is_in1_bool()
+    {
+        return true;
+    }
+
+    std::vector<input_type> in1_special_cases()
+    {
+        return {
+            detail::make_value<input_type>(0),
+            detail::make_value<input_type>(1),
+            detail::make_value<input_type>(12),
+            detail::make_value<input_type>(-12)
+        };
+    }
+};
+
+AUTO_TEST_CASE(test_relational_test_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// Helper macro, so we don't have to repreat the same code.
+#define TEST_UNARY_REL_FUNC_MACRO(CLASS_NAME) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<1>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<2>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<4>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<8>()) \
+    TEST_UNARY_FUNC_MACRO(CLASS_NAME<16>())
+
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isfinite)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isinf)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isnan)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_isnormal)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_signbit)
+
+// Tests for all(booln x) and any(booln x) are not run in USE_OPENCLC_KERNELS mode,
+// because those functions in OpenCL C require different reference functions on host
+// compared to their equivalents from OpenCL C++.
+// (In OpenCL C those functions returns true/false based on the most significant bits
+// in any/all component/s of x)
+#ifndef USE_OPENCLC_KERNELS
+    TEST_UNARY_REL_FUNC_MACRO(test_func_all)
+    TEST_UNARY_REL_FUNC_MACRO(test_func_any)
+#else
+    log_info("WARNING:\n\tTests for bool all(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
+    log_info("WARNING:\n\tTests for bool any(booln x) are not run in USE_OPENCLC_KERNELS mode\n");
+#endif
+
+#undef TEST_UNARY_REL_FUNC_MACRO
+
+#define TEST_BINARY_REL_FUNC_MACRO(CLASS_NAME) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<1>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<2>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<4>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<8>()) \
+    TEST_BINARY_FUNC_MACRO(CLASS_NAME<16>())
+
+    TEST_BINARY_REL_FUNC_MACRO(test_func_isordered)
+    TEST_BINARY_REL_FUNC_MACRO(test_func_isunordered)
+
+#undef TEST_BINARY_REL_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_TEST_FUNCS_HPP
diff --git a/test_conformance/clcpp/spec_constants/CMakeLists.txt b/test_conformance/clcpp/spec_constants/CMakeLists.txt
new file mode 100644
index 00000000..80904bf7
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_SPEC_CONSTANTS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/spec_constants/common.hpp b/test_conformance/clcpp/spec_constants/common.hpp
new file mode 100644
index 00000000..3846fe83
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/common.hpp
@@ -0,0 +1,257 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
+
+#include <algorithm>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#define RUN_SPEC_CONSTANTS_TEST_MACRO(TEST_CLASS) \
+    last_error = run_spec_constants_test(  \
+        device, context, queue, n_elems, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+// Base class for all tests of cl::spec_contatnt
+template <class T>
+struct spec_constants_test : public detail::base_func_type<T>
+{
+    // Output buffer type
+    typedef T type;
+
+    virtual ~spec_constants_test() {};
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    virtual std::string generate_program() = 0;
+
+    // Return names of test's kernels, in order.
+    // Typical case: one kernel.
+    virtual std::vector<std::string> get_kernel_names()
+    {
+        // Typical case, that is, only one kernel
+        return { this->get_kernel_name() };
+    }
+
+    // If local size has to be set in clEnqueueNDRangeKernel()
+    // this should return true; otherwise - false;
+    virtual bool set_local_size()
+    {
+        return false;
+    }
+
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                                      cl_device_id device,
+                                      size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t wg_size = work_group_size;
+        for(auto& k : kernels)
+        {
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            wg_size = (std::min)(wg_size, max_wg_size);
+        }
+        return wg_size;
+    }
+
+    // Sets spec constants
+    // Typical case: no spec constants to set
+    virtual cl_int set_spec_constants(const cl_program& program)
+    {
+        return CL_SUCCESS;
+    }
+
+    // This covers typical case:
+    // 1. each kernel is executed once,
+    // 2. the only argument in every kernel is output_buffer
+    virtual cl_int execute(const std::vector<cl_kernel>& kernels,
+                           cl_mem& output_buffer,
+                           cl_command_queue& queue,
+                           size_t work_size,
+                           size_t work_group_size)
+    {
+        cl_int err;
+        for(auto& k : kernels)
+        {
+            err = clSetKernelArg(k, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+            err = clEnqueueNDRangeKernel(
+                queue, k, 1,
+                NULL, &work_size, this->set_local_size() ? &work_group_size : NULL,
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+        }
+        return err;
+    }
+
+    // This is a function which performs additional queries and checks
+    // if the results are correct. This method is run after checking that
+    // test results (output values) are correct.
+    virtual cl_int check_queries(const std::vector<cl_kernel>& kernels,
+                                 cl_device_id device,
+                                 cl_context context,
+                                 cl_command_queue queue)
+    {
+        (void) kernels;
+        (void) device;
+        (void) context;
+        (void) queue;
+        return CL_SUCCESS;
+    }
+};
+
+template <class spec_constants_test>
+int run_spec_constants_test(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, spec_constants_test op)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    size_t wg_size;
+    size_t work_size[1];
+    cl_int err;
+
+    typedef typename spec_constants_test::type TYPE;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<TYPE>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = op.generate_program();
+    std::vector<std::string> kernel_names = op.get_kernel_names();
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+
+    std::string options = "";
+    if(is_extension_available(device, "cl_khr_fp16"))
+    {
+        options += " -cl-fp16-enable";
+    }
+    if(is_extension_available(device, "cl_khr_fp64"))
+    {
+        options += " -cl-fp64-enable";
+    }
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], options);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), code_str, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    const char * code_c_str = code_str.c_str();
+    err = create_openclcpp_program(context, &program, 1, &(code_c_str), options.c_str());
+    RETURN_ON_ERROR_MSG(err, "Creating OpenCL C++ program failed")
+
+    // Set spec constants
+    err = op.set_spec_constants(program);
+    RETURN_ON_ERROR_MSG(err, "Setting Spec Constants failed")
+
+    // Build program and create 1st kernel
+    err = build_program_create_kernel_helper(
+        context, &program, &(kernels[0]), 1, &(code_c_str), kernel_names[0].c_str()
+    );
+    RETURN_ON_ERROR_MSG(err, "Unable to build program or to create kernel")
+    // Create other kernels
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+
+    // Find the max possible wg size for among all the kernels
+    wg_size = op.get_max_local_size(kernels, device, 1024, err);
+    RETURN_ON_ERROR(err);
+
+    work_size[0] = count;
+    if(op.set_local_size())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size)
+        );
+        work_size[0] = wg_number * wg_size;
+    }
+
+    // host output vector
+    std::vector<TYPE> output = generate_output<TYPE>(work_size[0], 9999);
+
+    // device output buffer
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(TYPE) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    // Execute test
+    err = op.execute(kernels, buffers[0], queue, work_size[0], wg_size);
+    RETURN_ON_ERROR(err)
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        TYPE v = op(i, wg_size);
+        if(!(are_equal(v, output[i], detail::make_value<TYPE>(0), op)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", op.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+
+    // Check if queries returns correct values
+    err = op.check_queries(kernels, device, context, queue);
+    RETURN_ON_ERROR(err);
+
+    log_info("test_%s(%s) passed\n", op.str().c_str(), type_name<TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    for(auto& k : kernels)
+        clReleaseKernel(k);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_COMMON_HPP
diff --git a/test_conformance/clcpp/spec_constants/main.cpp b/test_conformance/clcpp/spec_constants/main.cpp
new file mode 100644
index 00000000..ce505e8f
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/main.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_spec_consts_attributes.hpp"
+#include "test_spec_consts_if.hpp"
+#include "test_spec_consts_init_vars.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp
new file mode 100644
index 00000000..539167fd
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_attributes.hpp
@@ -0,0 +1,281 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// In this test we check if specialization constant can be successfully used
+// in kernel attribute cl::required_work_group_size(X, Y, Z).
+struct spec_const_required_work_group_size_test : public spec_constants_test<cl_uint>
+{
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_required_work_group_size_test(const bool set_spec_constant,
+                                             const cl_uint work_group_size_0)
+        : m_set_spec_constant(set_spec_constant),
+          m_work_group_size_0(work_group_size_0)
+    {
+
+    }
+
+    std::string str()
+    {
+        if(m_set_spec_constant)
+            return "spec_const_in_required_work_group_size_" + std::to_string(m_work_group_size_0);
+        else
+            return "spec_const_in_required_work_group_size_not_set";
+    }
+
+    bool set_local_size()
+    {
+        return true;
+    }
+
+    size_t get_max_local_size(const std::vector<cl_kernel>& kernels,
+                              cl_device_id device,
+                              size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        if(m_set_spec_constant)
+        {
+            return m_work_group_size_0;
+        }
+        return size_t(1);
+    }
+
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_work_group_size_0;
+        }
+        return cl_uint(1);
+    }
+
+    // Check if query for CL_KERNEL_COMPILE_WORK_GROUP_SIZE using clGetKernelWorkGroupInfo
+    // return correct values. It should return the work-group size specified by the
+    // cl::required_work_group_size(X, Y, Z) qualifier.
+    cl_int check_queries(const std::vector<cl_kernel>& kernels,
+                         cl_device_id device,
+                         cl_context context,
+                         cl_command_queue queue)
+    {
+        (void) device;
+        (void) context;
+        size_t compile_wg_size[] = { 1, 1, 1 };
+        cl_int error = clGetKernelWorkGroupInfo(
+            kernels[0], device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
+            sizeof(compile_wg_size), compile_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        if(m_set_spec_constant)
+        {
+            if(compile_wg_size[0] != m_work_group_size_0
+               || compile_wg_size[1] != 1
+               || compile_wg_size[2] != 1)
+            {
+                error = -1;
+            }
+        }
+        else
+        {
+            if(compile_wg_size[0] != 1
+               || compile_wg_size[1] != 1
+               || compile_wg_size[2] != 1)
+            {
+                error = -1;
+            }
+        }
+        return error;
+    }
+
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(cl_uint), static_cast<void*>(&m_work_group_size_0)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+
+    // Each work-item writes get_local_size(0) to output[work-item-global-id]
+    std::string generate_program(bool with_attribute)
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            std::string att = " ";
+            if(with_attribute)
+            {
+                std::string work_group_size_0 = "1";
+                if(m_set_spec_constant)
+                {
+                    work_group_size_0 = std::to_string(m_work_group_size_0);
+                }
+                att = "\n__attribute__((reqd_work_group_size(" + work_group_size_0 + ",1,1)))\n";
+            }
+            return
+                "__kernel" + att + "void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = get_local_size(0);\n"
+                "}\n";
+
+        #else
+            std::string att = "";
+            if(with_attribute)
+            {
+                att = "[[cl::required_work_group_size(spec1, 1, 1)]]\n";
+            }
+            return
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "spec_constant<uint, 1> spec1{1};\n"
+                + att +
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = get_local_size(0);\n"
+                "}\n";
+        #endif
+    }
+
+    // Each work-item writes get_local_size(0) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        return generate_program(true);
+    }
+
+private:
+    bool m_set_spec_constant;
+    cl_uint m_work_group_size_0;
+};
+
+// This function return max work-group size that can be used
+// for kernels defined in source
+size_t get_max_wg_size(const std::string& source,
+                       const std::vector<std::string>& kernel_names,
+                       size_t work_group_size, // max wg size we want to have
+                       cl_device_id device,
+                       cl_context context,
+                       cl_command_queue queue,
+                       cl_int& err)
+{
+    cl_program program;
+    std::vector<cl_kernel> kernels;
+    if(kernel_names.empty())
+    {
+        RETURN_ON_ERROR_MSG(-1, "No kernel to run");
+    }
+    kernels.resize(kernel_names.size());
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0], "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#else
+    err = create_opencl_kernel(context, &program, &(kernels[0]), source, kernel_names[0]);
+    RETURN_ON_ERROR(err)
+    for(size_t i = 1; i < kernels.size(); i++)
+    {
+        kernels[i] = clCreateKernel(program, kernel_names[i].c_str(), &err);
+        RETURN_ON_CL_ERROR(err, "clCreateKernel");
+    }
+#endif
+    size_t wg_size = work_group_size;
+    for(auto& k : kernels)
+    {
+        size_t max_wg_size;
+        err = clGetKernelWorkGroupInfo(
+            k, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+        wg_size = (std::min)(wg_size, max_wg_size);
+    }
+    return wg_size;
+}
+
+AUTO_TEST_CASE(test_spec_constants_in_kernel_attributes)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// If ONLY_SPIRV_COMPILATION is defined we can't check the max work-group size for the
+// kernel because OpenCL kernel object is never created in that mode.
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    const size_t max_wg_size = 16;
+#else
+    // Get max work-group size that can be used in [[cl::required_work_group_size(X, 1, 1)]]
+    // We do this by building kernel without this attribute and checking what is the max
+    // work-group size we can use with it.
+    auto test = spec_const_required_work_group_size_test(true, 1);
+    const size_t max_wg_size = get_max_wg_size(
+        test.generate_program(false), test.get_kernel_names(),
+        1024, // max wg size we want to test
+        device, context, queue,
+        error
+    );
+    RETURN_ON_ERROR_MSG(error, "Can't get max work-group size");
+#endif
+
+    // Run tests when specialization constant spec1 is set (kernel
+    // attribute is [[cl::required_work_group_size(spec1, 1, 1)]]).
+    for(size_t i = 1; i <= max_wg_size; i *=2)
+    {
+        RUN_SPEC_CONSTANTS_TEST_MACRO(
+            spec_const_required_work_group_size_test(
+                true, i
+            )
+        );
+    }
+    // This test does not set spec constant
+    RUN_SPEC_CONSTANTS_TEST_MACRO(
+        spec_const_required_work_group_size_test(
+            false, 9999999 // This value is incorrect, but won't be set and kernel
+                           // attribute should be [[cl::required_work_group_size(1, 1, 1)]]
+        )
+    );
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_ATTRIBUTES_HPP
diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp
new file mode 100644
index 00000000..1c7cec2a
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_if.hpp
@@ -0,0 +1,161 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// This class tests using specialization constant in if statement
+template <class T /* spec constant type*/>
+struct spec_const_in_if_test : public spec_constants_test<cl_uint>
+{
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_in_if_test(const bool set_spec_constant)
+        : m_set_spec_constant(set_spec_constant)
+    {
+        static_assert(
+            is_vector_type<T>::value == false,
+            "Specialization constant can be only scalar int or float type"
+        );
+        switch (sizeof(T))
+        {
+            case 1:
+                m_test_value = T(127);
+                break;
+            case 2:
+                m_test_value = T(0xdeadU);
+                break;
+            // 4 and 8
+            default:
+                m_test_value = T(0xdeaddeadU);
+                break;
+        }
+    }
+
+    std::string str()
+    {
+        return "spec_const_in_if_" + type_name<T>();
+    }
+
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_test_value;
+        }
+        return static_cast<cl_uint>(i);
+    }
+
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            T spec1 = static_cast<T>(m_test_value);
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(T), static_cast<void*>(&spec1)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+
+    // IF set_spec_constant == true:
+    // each work-item writes T(m_test_value) to output[work-item-global-id]
+    // Otherwise:
+    // each work-item writes T(get_global_id(0)) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            std::string result = "gid";
+            if(m_set_spec_constant)
+                result = std::to_string(m_test_value);
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = " + result + ";\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "spec_constant<TYPE,  1> spec1{TYPE(0)};\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    if(get(spec1) == TYPE(" + std::to_string(m_test_value) +"))\n"
+                "    {\n"
+                "        output[gid] = " + std::to_string(m_test_value) +";\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "        output[gid] = gid;\n"
+                "    }\n"
+                "}\n";        
+        #endif
+    }
+
+private:
+    bool m_set_spec_constant;
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_spec_constants_in_if_statement)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    const std::vector<bool> set_spec_const_options { true, false };
+    for(auto option : set_spec_const_options)
+    {        
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_char>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uchar>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_int>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_uint>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_long>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_ulong>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_float>(option));
+        if(is_extension_available(device, "cl_khr_fp16"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_half>(option));
+        }
+        if(is_extension_available(device, "cl_khr_fp64"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_in_if_test<cl_double>(option));
+        }
+    }
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_IF_HPP
diff --git a/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp b/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp
new file mode 100644
index 00000000..20bbff06
--- /dev/null
+++ b/test_conformance/clcpp/spec_constants/test_spec_consts_init_vars.hpp
@@ -0,0 +1,174 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
+#define TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
+
+#include <type_traits>
+
+#include "common.hpp"
+
+// This class tests initializing variables with a specialization constant value.
+template <class T /* spec constant type*/>
+struct spec_const_init_var : public spec_constants_test<cl_uint>
+{
+    // See generate_program() to know what set_spec_constant is for.
+    spec_const_init_var(const bool set_spec_constant)
+        : m_set_spec_constant(set_spec_constant)
+    {
+        static_assert(
+            is_vector_type<T>::value == false,
+            "Specialization constant can be only scalar int or float type"
+        );
+        switch (sizeof(T))
+        {
+            case 1:
+                m_test_value = T(127);
+                break;
+            case 2:
+                m_test_value = T(0xdeadU);
+                break;
+            // 4 and 8
+            default:
+                m_test_value = T(0xdeaddeadU);
+                break;
+        }
+    }
+
+    std::string str()
+    {
+        return "spec_const_init_var_" + type_name<T>();
+    }
+
+    cl_uint operator()(size_t i, size_t work_group_size)
+    {
+        (void) work_group_size;
+        if(m_set_spec_constant)
+        {
+            return m_test_value;
+        }
+        return static_cast<cl_uint>(i);
+    }
+
+    // Sets spec constant
+    cl_int set_spec_constants(const cl_program& program)
+    {
+        cl_int error = CL_SUCCESS;
+        if(m_set_spec_constant)
+        {
+            T spec = static_cast<T>(m_test_value);
+            // spec1
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(1), sizeof(T), static_cast<void*>(&spec)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+
+            // spec2
+            error = clSetProgramSpecializationConstant(
+                program, cl_uint(2), sizeof(T), static_cast<void*>(&spec)
+            );
+            RETURN_ON_CL_ERROR(error, "clSetProgramSpecializationConstant")
+        }
+        return error;
+    }
+
+    // IF set_spec_constant == true:
+    // each work-item writes T(m_test_value) to output[work-item-global-id]
+    // Otherwise:
+    // each work-item writes T(get_global_id(0)) to output[work-item-global-id]
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS) 
+            std::string result = "gid";
+            if(m_set_spec_constant)
+                result = std::to_string(m_test_value);
+            return 
+                "__kernel void " + this->get_kernel_name() + "(global uint *output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    output[gid] = " + result + ";\n"
+                "}\n";
+
+        #else
+            return         
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_spec_constant>\n"
+                "using namespace cl;\n"
+                "typedef " + type_name<T>() + " TYPE;\n"
+                "spec_constant<TYPE,  1> spec1{TYPE(0)};\n"
+                "spec_constant<TYPE,  2> spec2{TYPE(0)};\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output)\n"
+                "{\n"
+                "    uint gid = get_global_id(0);\n"
+                "    TYPE var1(spec1.get());\n"
+                "    TYPE var2(spec2);\n"
+                "    TYPE var3; var3 = spec2;\n"
+                "    if((var1 == TYPE(" + std::to_string(m_test_value) +")) "
+                       "&& (var2 == TYPE(" + std::to_string(m_test_value) +"))\n"
+                       "&& (var3 == TYPE(" + std::to_string(m_test_value) +")))\n"
+                "    {\n"
+                "        output[gid] = " + std::to_string(m_test_value) +";\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "        output[gid] = gid;\n"
+                "    }\n"
+                "}\n";        
+        #endif
+    }
+
+private:
+    bool m_set_spec_constant;
+    cl_uint m_test_value;
+};
+
+AUTO_TEST_CASE(test_spec_constants_initializing_variables)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{    
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    const std::vector<bool> set_spec_const_options { true, false };
+    for(auto option : set_spec_const_options)
+    {        
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_char>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uchar>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_int>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_uint>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_long>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_ulong>(option));
+        RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_float>(option));
+        if(is_extension_available(device, "cl_khr_fp16"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_half>(option));
+        }
+        if(is_extension_available(device, "cl_khr_fp64"))
+        {
+            RUN_SPEC_CONSTANTS_TEST_MACRO(spec_const_init_var<cl_double>(option));
+        }
+    }
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }    
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SPEC_CONSTANTS_TEST_SPEC_CONSTS_INIT_VARS_HPP
diff --git a/test_conformance/clcpp/subgroups/CMakeLists.txt b/test_conformance/clcpp/subgroups/CMakeLists.txt
new file mode 100644
index 00000000..1d1e1907
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_SUBGROUPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/subgroups/common.hpp b/test_conformance/clcpp/subgroups/common.hpp
new file mode 100644
index 00000000..2b05a3cb
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/common.hpp
@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
+
+#include <string>
+#include <vector>
+#include <limits>
+
+enum class work_group_op : int {
+    add, min, max
+};
+
+std::string to_string(work_group_op op)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return "add";
+        case work_group_op::min:
+            return "min";
+        case work_group_op::max:
+            return "max";
+        default:
+            break;
+    }
+    return "";
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
+{
+    std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));
+    switch (op)
+    {
+        case work_group_op::add:
+            return input;
+        case work_group_op::min:
+            {
+                size_t j = wg_size;
+                for(size_t i = 0; i < count; i++)
+                {
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j--;
+                    if(j == 0)
+                    {
+                        j = wg_size;
+                    }
+                }
+            }
+            break;
+        case work_group_op::max:
+            {
+                size_t j = 0;
+                for(size_t i = 0; i < count; i++)
+                {
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j++;
+                    if(j == wg_size)
+                    {
+                        j = 0;
+                    }
+                }
+            }
+    }
+    return input;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+        case work_group_op::min:
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());
+        case work_group_op::max:
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
+    }
+    return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SG_COMMON_HPP
diff --git a/test_conformance/clcpp/subgroups/main.cpp b/test_conformance/clcpp/subgroups/main.cpp
new file mode 100644
index 00000000..3140a124
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/main.cpp
@@ -0,0 +1,34 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_sg_all.hpp"
+#include "test_sg_any.hpp"
+#include "test_sg_broadcast.hpp"
+#include "test_sg_reduce.hpp"
+#include "test_sg_scan_inclusive.hpp"
+#include "test_sg_scan_exclusive.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/subgroups/test_sg_all.hpp b/test_conformance/clcpp/subgroups/test_sg_all.hpp
new file mode 100644
index 00000000..01d66386
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_all.hpp
@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+std::string generate_sg_all_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = sub_group_all(input[tid] < input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+
+int verify_sg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group all
+            bool all = true;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if(!(in[i+j+k] < in[i+j+k+1]))
+                {
+                    all = false;
+                    break;
+                }
+            }
+
+            // Convert bool to uint
+            cl_uint all_uint = all ? 1 : 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (all_uint != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(all_uint),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_sg_all(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of work-groups (input[tid] < input[tid+1]) will
+        // generate false, it means that for sub_group_all(input[tid] < input[tid+1])
+        // should return false for all sub-groups in that work-groups
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_sg_all(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int sub_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_all_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_all");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_sg_all(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_all(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_all(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_all failed");
+    }
+    log_info("sub_group_all passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_all)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+    err = sub_group_all(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ALL_HPP
diff --git a/test_conformance/clcpp/subgroups/test_sg_any.hpp b/test_conformance/clcpp/subgroups/test_sg_any.hpp
new file mode 100644
index 00000000..769bef06
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_any.hpp
@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+std::string generate_sg_any_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = sub_group_any(input[tid] == input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+
+int verify_sg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group any
+            bool any = false;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if(in[i+j+k] == in[i+j+k+1])
+                {
+                    any = true;
+                    break;
+                }
+            }
+
+            // Convert bool to uint
+            cl_uint any_uint = any ? 1 : 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (any_uint != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(any_uint),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_sg_any(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of work-groups (input[tid] == input[tid+1]) will
+        // generate true, it means that for sub_group_all(input[tid] == input[tid+1])
+        // should return false for one sub-group in that work-groups
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_sg_any(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int sub_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_any_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_any");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_sg_any(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_any(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_any(input, output, flat_work_size, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_any failed");
+    }
+    log_info("sub_group_any passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_any)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+    err = sub_group_any(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_ANY_HPP
diff --git a/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp b/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp
new file mode 100644
index 00000000..39e420ac
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_broadcast.hpp
@@ -0,0 +1,204 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+std::string generate_sg_broadcast_kernel_code()
+{
+    return
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "#include <opencl_work_group>\n"
+        "using namespace cl;\n"
+        "__kernel void test_sg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "    uint result = sub_group_broadcast(input[tid], 0);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+
+int
+verify_sg_broadcast(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < count; i += wg_size)
+    {
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j+= sg_size)
+        {
+            // sub-group broadcast
+            cl_uint broadcast_result = in[i+j];
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (broadcast_result != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(broadcast_result),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_sg_broadcast(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(j);
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_sg_broadcast(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int sub_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[] = { 1 };
+    int err;
+
+    // Get kernel source code
+    std::string code_str = generate_sg_broadcast_kernel_code();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_broadcast");
+    RETURN_ON_ERROR(err)
+#endif
+
+    // Get max flat workgroup size
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size = count;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_sg_broadcast(flat_work_size, wg_size);
+    std::vector<cl_uint> output = generate_output_sg_broadcast(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL,&err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    int result = verify_sg_broadcast( input, output, work_size[0], wg_size, sg_max_size);
+    RETURN_ON_ERROR_MSG(result, "sub_group_broadcast failed")
+    log_info("sub_group_broadcast passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_broadcast)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+    err = sub_group_broadcast(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_BROADCAST_HPP
diff --git a/test_conformance/clcpp/subgroups/test_sg_reduce.hpp b/test_conformance/clcpp/subgroups/test_sg_reduce.hpp
new file mode 100644
index 00000000..6b20d507
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_reduce.hpp
@@ -0,0 +1,345 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_reduce_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                        "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                sum += in[i + j + k];
+            }
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+            }
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+            }
+
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_reduce_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_reduce_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_reduce_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_reduce_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_reduce");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_reduce<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_reduce_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_reduce_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_reduce_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_REDUCE_HPP
diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp
new file mode 100644
index 00000000..7bb2b18b
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_scan_exclusive.hpp
@@ -0,0 +1,325 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_scan_exclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                sum += in[i + j + k];
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_scan_exclusive_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_scan_exclusive_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_scan_exclusive_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_exclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_exclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_exclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_EXCLUSIVE_HPP
diff --git a/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp b/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp
new file mode 100644
index 00000000..803daa00
--- /dev/null
+++ b/test_conformance/clcpp/subgroups/test_sg_scan_inclusive.hpp
@@ -0,0 +1,332 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of sub-group functions
+#include "common.hpp"
+
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_sg_scan_inclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_sg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = sub_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE sum = 0;
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                sum += in[i + j + k];
+                if (sum != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(sum),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                min = std::min<CL_INT_TYPE>(min, in[i + j + k]);
+                if (min != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(min),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_sg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    size_t i, j, k;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j+= sg_size)
+        {
+            CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+            // Check if all work-items in sub-group stored correct value
+            for (k = 0; k < ((wg_size - j) > sg_size ? sg_size : (wg_size - j)); k++)
+            {
+                max = std::max<CL_INT_TYPE>(max, in[i + j + k]);
+                if (max != out[i + j + k])
+                {
+                    log_info(
+                        "sub_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                        type_name<cl_uint>().c_str(),
+                        i + j,
+                        static_cast<size_t>(max),
+                        static_cast<size_t>(out[i + j + k]));
+                    return -1;
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_sg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size, size_t sg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_sg_scan_inclusive_add(in, out, wg_size, sg_size);
+        case work_group_op::min:
+            return verify_sg_scan_inclusive_min(in, out, wg_size, sg_size);
+        case work_group_op::max:
+            return verify_sg_scan_inclusive_max(in, out, wg_size, sg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int sub_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t sg_max_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_sg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    log_info("SKIPPED: OpenCL C kernels not provided for this test. Skipping the test.\n");
+    return CL_SUCCESS;
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_sg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    size_t param_value_size = 0;
+    err = clGetKernelSubGroupInfo(
+        kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+        sizeof(size_t), static_cast<void*>(&wg_size),
+        sizeof(size_t), static_cast<void*>(&sg_max_size),
+        &param_value_size
+    );
+    RETURN_ON_CL_ERROR(err, "clGetKernelSubGroupInfo")
+
+    // Verify size of returned param
+    if(param_value_size != sizeof(size_t))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "Returned size of max sub group size not valid! (Expected %lu, got %lu)\n",
+            sizeof(size_t),
+            param_value_size
+        )
+    }
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_sg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size, sg_max_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "sub_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("sub_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_inclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_inclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_sub_group_scan_inclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = sub_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = sub_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_SUBGROUPS_TEST_SG_SCAN_INCLUSIVE_HPP
diff --git a/test_conformance/clcpp/synchronization/CMakeLists.txt b/test_conformance/clcpp/synchronization/CMakeLists.txt
new file mode 100644
index 00000000..793b7bd1
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_SYNCHRONIZATION)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/synchronization/main.cpp b/test_conformance/clcpp/synchronization/main.cpp
new file mode 100644
index 00000000..e2a72b07
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/main.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_work_group_barrier.hpp"
+#include "test_sub_group_barrier.hpp"
+#include "named_barrier/test_spec_example.hpp"
+#include "named_barrier/test_named_barrier.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/synchronization/named_barrier/common.hpp b/test_conformance/clcpp/synchronization/named_barrier/common.hpp
new file mode 100644
index 00000000..da34dceb
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/common.hpp
@@ -0,0 +1,171 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
+
+#include <vector>
+
+// Common for all OpenCL C++ tests
+#include "../../common.hpp"
+#include "../../funcs_test_utils.hpp"
+
+#define RUN_WG_NAMED_BARRIER_TEST_MACRO(TEST_CLASS) \
+    last_error = run_work_group_named_barrier_barrier_test(  \
+        device, context, queue, num_elements, TEST_CLASS \
+    );  \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+namespace named_barrier {
+
+struct work_group_named_barrier_test_base : public detail::base_func_type<cl_uint>
+{
+    // Returns test name
+    virtual std::string str() = 0;
+    // Returns OpenCL program source
+    // It's assumed that this program has only one kernel.
+    virtual std::string generate_program() = 0;
+    // Return value that is expected to be in output_buffer[i]
+    virtual cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size) = 0;
+    // Kernel execution
+    // This covers typical case: kernel is executed once, kernel
+    // has only one argument which is output buffer
+    virtual cl_int execute(const cl_kernel kernel,
+                           const cl_mem output_buffer,
+                           const cl_command_queue& queue,
+                           const size_t work_size,
+                           const size_t work_group_size)
+    {
+        cl_int err;
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+        return err;
+    }
+    // Calculates maximal work-group size (one dim)
+    virtual size_t get_max_local_size(const cl_kernel kernel,
+                                      const cl_device_id device,
+                                      const size_t work_group_size, // default work-group size
+                                      cl_int& error)
+    {
+        size_t max_wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_ERROR(error)
+        return (std::min)(work_group_size, max_wg_size);
+    }
+    // if work-groups should be uniform
+    virtual bool enforce_uniform()
+    {
+        return false;
+    }
+};
+
+template <class work_group_named_barrier_test>
+int run_work_group_named_barrier_barrier_test(cl_device_id device, cl_context context, cl_command_queue queue,
+                                              size_t count, work_group_named_barrier_test test)
+{
+    cl_mem buffers[1];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_group_size;
+    size_t work_size[1];
+    cl_int err;
+
+    std::string code_str = test.generate_program();
+    std::string kernel_name = test.get_kernel_name();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    // Find the max possible wg size for among all the kernels
+    work_group_size = test.get_max_local_size(kernel, device, 256, err);
+    RETURN_ON_ERROR(err);
+    if(work_group_size == 0)
+    {
+        log_info("SKIPPED: Can't produce local size with enough sub-groups. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+
+    work_size[0] = count;
+    // uniform work-group
+    if(test.enforce_uniform())
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(work_size[0]) / work_group_size)
+        );
+        work_size[0] = wg_number * work_group_size;
+    }
+
+    // host output vector
+    std::vector<cl_uint> output = generate_output<cl_uint>(work_size[0], 9999);
+
+    // device output buffer
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    // Execute test kernels
+    err = test.execute(kernel, buffers[0], queue, work_size[0], work_group_size);
+    RETURN_ON_ERROR(err)
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer")
+
+    // Check output values
+    for(size_t i = 0; i < output.size(); i++)
+    {
+        cl_uint v = test(i, work_group_size, i);
+        if(!(are_equal(v, output[i], ::detail::make_value<cl_uint>(0), test)))
+        {
+            RETURN_ON_ERROR_MSG(-1,
+                "test_%s(%s) failed. Expected: %s, got: %s", test.str().c_str(), type_name<cl_uint>().c_str(),
+                format_value(v).c_str(), format_value(output[i]).c_str()
+            );
+        }
+    }
+    log_info("test_%s(%s) passed\n", test.str().c_str(), type_name<cl_uint>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+} // namespace named_barrier
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_COMMON_HPP
diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp
new file mode 100644
index 00000000..a4f9a04a
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/test_named_barrier.hpp
@@ -0,0 +1,490 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
+
+#include "common.hpp"
+
+namespace named_barrier {
+
+struct local_fence_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "local_fence";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i);
+    }
+
+    // At the end every work-item writes its global id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint *output, "
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  output[gid] = gid;\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    a.wait(mem_fence::local);\n"
+                "    value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    b.wait(mem_fence::local);\n"
+                "    value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      lmem[other_lid] = other_gid;\n"
+                "      b.wait(mem_fence::local);\n" // subgroup 0 and 1 are sync (local)
+                "      value = lmem[lid];" // lmem[lid] shoule be equal to gid
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = gid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 1, wg_size * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            if(max_wg_size >= wg_size) break;
+        }
+        return wg_size;
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        return err;
+    }
+};
+
+struct global_fence_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "global_fence";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i % work_group_size);
+    }
+
+    // At the end every work-item writes its local id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint * output, "
+                                                              "global uint * temp)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "output[gid] = get_local_id(0);\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    temp[other_gid] = other_lid + 1;\n"
+                "    a.wait(mem_fence::global);\n"
+                "    size_t other_lid_same_subgroup = (lid + 2) % get_sub_group_size();\n"
+                "    size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "    temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "    a.wait(mem_fence::global, memory_scope_sub_group);\n"
+                "    value = temp[gid];" // temp[gid] shoule be equal to lid
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    temp[other_gid] = other_lid + 1;\n"
+                "    b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
+                "    size_t other_lid_same_subgroup = "
+                       "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
+                "    size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "    temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "    b.wait(mem_fence::global, memory_scope_sub_group);\n"  // both subgroups wait, sync only within subgroup
+                "    value = temp[gid];" // temp[gid] shoule be equal to lid
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      temp[other_gid] = other_lid + 1;\n"
+                "      b.wait(mem_fence::global);\n" // both subgroups wait, both are sync
+                       // local and global id of some other work-item within work-item subgroup
+                "      size_t other_lid_same_subgroup = "
+                         "((lid + 1) % get_sub_group_size()) + (get_sub_group_id() * get_sub_group_size());\n"
+                "      size_t other_gid_same_subgroup = (gid - lid) + other_lid_same_subgroup;\n"
+                "      temp[other_gid_same_subgroup] = temp[other_gid_same_subgroup] - 1;\n"
+                "      b.wait(mem_fence::global, memory_scope_sub_group);\n" // both subgroups wait, sync only within subgroup
+                "      value = temp[gid];" // temp[gid] shoule be equal to lid
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = lid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        size_t max_wg_size;
+        error = clGetKernelWorkGroupInfo(
+            kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+        return (std::min)(max_wg_size, work_group_size);
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * work_size, NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+
+        return err;
+    }
+};
+
+struct global_local_fence_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "global_local_fence";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t max_sub_group_size)
+    {
+        return static_cast<cl_uint>(i % work_group_size);
+    }
+
+    // At the end every work-item writes its local id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                "__kernel void " + this->get_kernel_name() + "(global uint * output, "
+                                                              "global uint * temp,"
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "output[gid] = get_local_id(0);\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp,"
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "  local<work_group_named_barrier> a(1);\n"
+                "  local<work_group_named_barrier> b(2);\n"
+                "  size_t gid = get_global_id(0);\n"
+                "  size_t lid = get_local_id(0);\n"
+                "  size_t value = 0;\n"
+                "  if(get_num_sub_groups() == 1)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + 1) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    temp[other_gid] = other_lid;\n"
+                "    a.wait(mem_fence::local | mem_fence::global);\n"
+                "    if(lmem[lid] == gid) value = temp[gid];\n"
+                "  }\n"
+                "  else if(get_num_sub_groups() == 2)\n"
+                "  {\n"
+                "    size_t other_lid = (lid + get_max_sub_group_size()) % get_enqueued_local_size(0);\n"
+                "    size_t other_gid = (gid - lid) + other_lid;\n"
+                "    lmem[other_lid] = other_gid;\n"
+                "    temp[other_gid] = other_lid;\n"
+                "    b.wait(mem_fence::local | mem_fence::global);\n"
+                "    if(lmem[lid] == gid) value = temp[gid];\n"
+                "  }\n"
+                "  else if(get_num_sub_groups() > 2)\n"
+                "  {\n"
+                "    if(get_sub_group_id() < 2)\n"
+                "    {\n"
+                "      const size_t two_first_subgroups = 2 * get_max_sub_group_size();"
+                       // local and global id of some work-item outside of work-item subgroup,
+                       // but within subgroups 0 and 1.
+                "      size_t other_lid = (lid + get_max_sub_group_size()) % two_first_subgroups;\n"
+                "      size_t other_gid = (gid - lid) + other_lid;\n"
+                "      lmem[other_lid] = other_gid;\n"
+                "      temp[other_gid] = other_lid;\n"
+                "      b.wait(mem_fence::local | mem_fence::global);\n"
+                "      if(lmem[lid] == gid) value = temp[gid];\n"
+                "    }\n"
+                "    else\n"
+                "    {\n"
+                "      value = lid;\n"
+                "    }\n"
+                "  }\n"
+                "  output[gid] = value;\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 2, wg_size * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+            if(max_wg_size >= wg_size) break;
+        }
+        return wg_size;
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(
+            context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+            sizeof(cl_uint) * work_size, NULL, &err
+        );
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+
+        return err;
+    }
+};
+
+// ------------------------------------------------------------------------------
+// -------------------------- RUN TESTS -----------------------------------------
+// ------------------------------------------------------------------------------
+AUTO_TEST_CASE(test_work_group_named_barrier)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION)))
+    if(!is_extension_available(device, "cl_khr_subgroup_named_barrier"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+
+    // An implementation shall support at least 8 named barriers per work-group. The exact
+    // maximum number can be queried using clGetDeviceInfo with CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR
+    // from the OpenCL 2.2 Extension Specification.
+    cl_uint named_barrier_count;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR, sizeof(cl_uint), &named_barrier_count, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+    if(named_barrier_count < 8)
+    {
+        RETURN_ON_ERROR_MSG(-1, "Maximum number of named barriers must be at least 8.");
+    }
+#endif
+
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(local_fence_named_barrier_test())
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(global_fence_named_barrier_test())
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(global_local_fence_named_barrier_test())
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_NAMED_BARRIER_HPP
diff --git a/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp b/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp
new file mode 100644
index 00000000..c80ab717
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/named_barrier/test_spec_example.hpp
@@ -0,0 +1,323 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP
+
+#include "common.hpp"
+
+namespace named_barrier {
+
+// ------------------------------------------------------------------------------
+// ----------------------- SPECIFICATION EXAMPLE TEST----------------------------
+// ------------------------------------------------------------------------------
+// This test is based on the example in OpenCL C++ 1.0 specification (OpenCL C++
+// Standard Library > Synchronization Functions > Named barriers > wait).
+struct spec_example_work_group_named_barrier_test : public work_group_named_barrier_test_base
+{
+    std::string str()
+    {
+        return "spec_example";
+    }
+
+    // Return value that is expected to be in output_buffer[i]
+    cl_uint operator()(size_t i, size_t work_group_size, size_t mas_sub_group_size)
+    {
+        return static_cast<cl_uint>(i);
+    }
+
+    // At the end every work-item writes its global id to ouput[work-item-global-id].
+    std::string generate_program()
+    {
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            return
+                // In OpenCL C kernel we imitate subgroups by partitioning work-group (based on
+                // local ids of work-items), work_group_named_barrier.wait(..) calls are replaced
+                // with work_group_barriers.
+                "__kernel void " + this->get_kernel_name() + "(global uint *output, "
+                                                              "global uint * temp, "
+                                                              "local uint * lmem)\n"
+                "{\n"
+                "size_t gid = get_global_id(0);\n"
+                "size_t lid = get_local_id(0);\n"
+
+                // We divide work-group into ranges:
+                // [0 - e_wg)[ew_g; q_wg)[q_wg; 3 * ew_g)[3 * ew_g; h_wg)[h_wg; get_local_size(0) - 1]
+                // to simulate 8 subgroups
+                "size_t h_wg = get_local_size(0) / 2;\n" // half of work-group
+                "size_t q_wg = get_local_size(0) / 4;\n" // quarter
+                "size_t e_wg = get_local_size(0) / 8;\n" // one-eighth
+
+                "if(lid < h_wg) lmem[lid] = gid;\n" // [0; h_wg)
+                "else           temp[gid] = gid;\n" // [h_wg; get_local_size(0) - 1)
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+                "size_t other_lid = (lid + q_wg) % h_wg;\n"
+                "size_t value = 0;\n"
+                "if(lmem[other_lid] == ((gid - lid) + other_lid)){\n"
+                "     value = gid;\n"
+                "}\n"
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    if(lid < e_wg) lmem[lid + e_wg] = gid;\n" // [0; e_wg)
+                "    else           lmem[lid - e_wg] = gid;\n" // [e_wg; q_wg)
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    if(lid < (3 * e_wg)) lmem[lid + e_wg] = gid;\n" // [q_ww; q_wg + e_wg)
+                "    else                 lmem[lid - e_wg] = gid;\n" // [q_wg + e_wg; h_wg)
+                "}\n"
+                "work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    output[gid + q_wg] = lmem[lid];\n"
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    output[gid - q_wg] = lmem[lid];\n"
+                "}\n"
+                "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+
+                "if(lid < q_wg){\n" // [0; q_wg)
+                "    if(lid < e_wg) temp[gid] = output[gid + (3 * e_wg)];\n" // [0; e_wg)
+                "    else           temp[gid] = output[gid + e_wg];\n" // [e_wg; q_wg)
+                "}\n"
+                "else if(lid < h_wg) {\n" // [q_wg; h_wg)
+                "    if(lid < (3 * e_wg)) temp[gid] = output[gid - e_wg];\n"  // [q_ww; q_wg + e_wg)
+                "    else                 temp[gid] = output[gid - (3 * e_wg)];\n"  // [q_wg + e_wg; h_wg)
+                "}\n"
+                "work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+
+                "output[gid] = temp[gid];\n"
+                "}\n";
+
+        #else
+            return
+                "#define cl_khr_subgroup_named_barrier\n"
+                "#include <opencl_memory>\n"
+                "#include <opencl_work_item>\n"
+                "#include <opencl_synchronization>\n"
+                "using namespace cl;\n"
+
+                "void b_function(work_group_named_barrier &b, size_t value, local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "size_t lid = get_local_id(0);\n"
+                // Work-items from the 1st subgroup writes to local memory that will be
+                // later read byt the 0th subgroup, and the other way around - 0th subgroup
+                // writes what 1st subgroup will later read.
+                // b.wait(mem_fence::local) should provide sync between those two subgroups.
+                "if(get_sub_group_id() < 1) lmem[lid + get_max_sub_group_size()] = value;\n"
+                "else                       lmem[lid - get_max_sub_group_size()] = value;\n"
+                "b.wait(mem_fence::local);\n\n" // sync writes to lmem for 2 subgroups (ids: 0, 1)
+                "}\n"
+
+                "__kernel void " + this->get_kernel_name() + "(global_ptr<uint[]> output, "
+                                                              "global_ptr<uint[]> temp, "
+                                                              "local_ptr<uint[]> lmem)\n"
+                "{\n\n"
+                "local<work_group_named_barrier> a(4);\n"
+                "local<work_group_named_barrier> b(2);\n"
+                "local<work_group_named_barrier> c(2);\n"
+
+                "size_t gid = get_global_id(0);\n"
+                "size_t lid = get_local_id(0);\n"
+                "if(get_sub_group_id() < 4)"
+                "{\n"
+                "    lmem[lid] = gid;\n"
+                "    a.wait(mem_fence::local);\n" // sync writes to lmem for 4 subgroups (ids: 0, 1, 2, 3)
+                     // Now all four subgroups should see changes in lmem.
+                "    size_t other_lid = (lid + (2 * get_max_sub_group_size())) % (4 * get_max_sub_group_size());\n"
+                "    size_t value = 0;\n"
+                "    if(lmem[other_lid] == ((gid - lid) + other_lid)){\n"
+                "        value = gid;\n"
+                "    }\n"
+                "    a.wait(mem_fence::local);\n" // sync reads from lmem for 4 subgroups (ids: 0, 1, 2, 3)
+
+                "    if(get_sub_group_id() < 2)" // ids: 0, 1
+                "    {\n"
+                "        b_function(b, value, lmem);\n"
+                "    }\n"
+                "    else" // ids: 2, 3
+                "    {\n"
+                         // Work-items from the 2nd subgroup writes to local memory that will be
+                         // later read byt the 3rd subgroup, and the other way around - 3rd subgroup
+                         // writes what 2nd subgroup will later read.
+                         // c.wait(mem_fence::local) should provide sync between those two subgroups.
+                "        if(get_sub_group_id() < 3) lmem[lid + get_max_sub_group_size()] = value ;\n"
+                "        else                       lmem[lid - get_max_sub_group_size()] = value;\n"
+                "        c.wait(mem_fence::local);\n" // sync writes to lmem for 2 subgroups (3, 4)
+                "    }\n"
+
+                     // Now (0, 1) are in sync (local mem), and (3, 4) are in sync (local mem).
+                     // However, subgroups (0, 1) are not in sync with (3, 4).
+                "    if(get_sub_group_id() < 4) {\n" // ids: 0, 1, 2, 3
+                "        if(get_sub_group_id() < 2) output[gid + (2 * get_max_sub_group_size())] = lmem[lid];\n"
+                "        else                       output[gid - (2 * get_max_sub_group_size())] = lmem[lid];\n"
+                "        a.wait(mem_fence::global);\n" // sync writes to global memory (output)
+                                                       // for 4 subgroups (0, 1, 2, 3)
+                "    }\n"
+                "}\n"
+                "else {\n" // subgroups with id > 4
+                "    temp[gid] = gid;\n"
+                "}\n"
+
+                // Now (0, 1, 2, 3) are in sync (global mem)
+                "if(get_sub_group_id() < 2) {\n"
+                "    if(get_sub_group_id() < 1) temp[gid] = output[gid + (3 * get_max_sub_group_size())];\n"
+                "    else                       temp[gid] = output[gid + (get_max_sub_group_size())];\n"
+                "}\n"
+                "else if(get_sub_group_id() < 4) {\n"
+                "    if(get_sub_group_id() < 3) temp[gid] = output[gid - (get_max_sub_group_size())];\n"
+                "    else                       temp[gid] = output[gid - (3 * get_max_sub_group_size())];\n"
+                "}\n"
+
+                // Synchronize the entire work-group (in terms of accesses to global memory)
+                "work_group_barrier(mem_fence::global);\n"
+                "output[gid] = temp[gid];\n\n"
+                "}\n";
+        #endif
+    }
+
+    size_t get_max_local_size(const cl_kernel kernel,
+                              const cl_device_id device,
+                              const size_t work_group_size, // default work-group size
+                              cl_int& error)
+    {
+        // Set size of the local memory, we need to to this to correctly calculate
+        // max possible work-group size.
+        size_t wg_size;
+        for(wg_size = work_group_size; wg_size > 1; wg_size /= 2)
+        {
+            error = clSetKernelArg(kernel, 2, ((wg_size / 2) + 1) * sizeof(cl_uint), NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            size_t max_wg_size;
+            error = clGetKernelWorkGroupInfo(
+                kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL
+            );
+            RETURN_ON_ERROR(error)
+            if(max_wg_size >= wg_size) break;
+        }
+
+        // -----------------------------------------------------------------------------------
+        // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+        // -----------------------------------------------------------------------------------
+        #if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+            // make sure wg_size is a multiple of 8
+            if(wg_size % 8 > 0) wg_size -= (wg_size % 8);
+            return wg_size;
+        #else
+            // make sure that wg_size will produce at least min_num_sub_groups
+            // subgroups in each work-group
+            size_t local_size[3] = { 1, 1, 1 };
+            size_t min_num_sub_groups = 8;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
+                                            sizeof(size_t), &min_num_sub_groups,
+                                            sizeof(size_t) * 3, &local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (local_size[0] == 0 || local_size[1] != 1 || local_size[2] != 1)
+            {
+                if(min_num_sub_groups == 1)
+                {
+                    RETURN_ON_ERROR_MSG(-1, "Can't produce local size with one subgroup")
+                }
+                return 0;
+            }
+            local_size[0] = (std::min)(wg_size, local_size[0]);
+
+            // double-check
+            size_t sub_group_count_for_ndrange;
+            error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                                            sizeof(size_t) * 3, local_size,
+                                            sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+            RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            if (sub_group_count_for_ndrange < min_num_sub_groups)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE did not return correct value (expected >=%lu, got %lu)",
+                    min_num_sub_groups, sub_group_count_for_ndrange
+                )
+            }
+
+            return local_size[0];
+        #endif
+    }
+
+    cl_int execute(const cl_kernel kernel,
+                   const cl_mem output_buffer,
+                   const cl_command_queue queue,
+                   const size_t work_size,
+                   const size_t work_group_size)
+    {
+        cl_int err;
+        // Get context from queue
+        cl_context context;
+        err = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), &context, NULL);
+        RETURN_ON_CL_ERROR(err, "clGetCommandQueueInfo")
+
+        // create temp buffer
+        auto temp_buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * work_size, NULL, &err);
+        RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+        err = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+        err |= clSetKernelArg(kernel, 1, sizeof(temp_buffer), &temp_buffer);
+        err |= clSetKernelArg(kernel, 2, work_group_size * sizeof(cl_uint), NULL);
+        RETURN_ON_CL_ERROR(err, "clSetKernelArg")
+
+        err = clEnqueueNDRangeKernel(
+            queue, kernel, 1,
+            NULL, &work_size, &work_group_size,
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel")
+
+        err = clFinish(queue);
+        err |= clReleaseMemObject(temp_buffer);
+
+        return err;
+    }
+};
+
+// ------------------------------------------------------------------------------
+// -------------------------- RUN TESTS -----------------------------------------
+// ------------------------------------------------------------------------------
+AUTO_TEST_CASE(test_work_group_named_barrier_spec_example)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+#if !(defined(DEVELOPMENT) && (defined(USE_OPENCLC_KERNELS) || defined(ONLY_SPIRV_COMPILATION)))
+    if(!is_extension_available(device, "cl_khr_subgroup_named_barrier"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroup_named_barrier` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+    RUN_WG_NAMED_BARRIER_TEST_MACRO(spec_example_work_group_named_barrier_test())
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_NAMED_BARRIER_TEST_SPEC_EXAMPLE_HPP
diff --git a/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp
new file mode 100644
index 00000000..20de1824
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/test_sub_group_barrier.hpp
@@ -0,0 +1,342 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_sub_group_barrier {
+
+enum class barrier_type
+{
+    local,
+    global
+};
+
+struct test_options
+{
+    barrier_type barrier;
+    size_t max_count;
+    size_t num_tests;
+};
+
+const std::string source_common = R"(
+    // Circular shift of sub-group local ids
+    size_t get_shifted_local_id(int sub_group_local_id_delta)
+    {
+        const int sub_group_size = (int)get_sub_group_size();
+        return (get_local_id(0) - get_sub_group_local_id()) +
+            (((int)get_sub_group_local_id() + sub_group_local_id_delta) % sub_group_size + sub_group_size) % sub_group_size;
+    }
+
+    // Get global ids from shifted local ids
+    size_t get_shifted_global_id(int sub_group_local_id_delta)
+    {
+        return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(sub_group_local_id_delta);
+    }
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+    )";
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+            output[gid] += gid;
+            sub_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+            values[lid] += gid;
+            sub_group_barrier(CLK_LOCAL_MEM_FENCE);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+
+    using namespace cl;
+
+    )";
+    s << source_common;
+
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        sub_group_barrier(mem_fence::global);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            sub_group_barrier(mem_fence::global);
+
+            output[gid] += gid;
+            sub_group_barrier(mem_fence::global);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output, local_ptr<long[]> values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        sub_group_barrier(mem_fence::local);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            sub_group_barrier(mem_fence::local);
+
+            values[lid] += gid;
+            sub_group_barrier(mem_fence::local);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    if (!is_extension_available(device, "cl_khr_subgroups"))
+    {
+        log_info("SKIPPED: Extension `cl_khr_subgroups` is not supported. Skipping tests.\n");
+        return CL_SUCCESS;
+    }
+#endif
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t max_work_group_size;
+    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+    if (options.barrier == barrier_type::local)
+    {
+        cl_ulong kernel_local_mem_size;
+        error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+        cl_ulong device_local_mem_size;
+        error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+        max_work_group_size = (std::min)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long));
+    }
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> global_size_dis(1, options.max_count);
+    std::uniform_int_distribution<size_t> local_size_dis(1, max_work_group_size);
+    std::uniform_int_distribution<int> iter_dis(0, 20);
+
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        const size_t global_size = global_size_dis(gen);
+        const size_t local_size = local_size_dis(gen);
+        const size_t count = global_size;
+
+        const int iter_lo = -iter_dis(gen);
+        const int iter_hi = +iter_dis(gen);
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        if (options.barrier == barrier_type::local)
+        {
+            error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        }
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<cl_long> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_long) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        error = clReleaseMemObject(output_buffer);
+        RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+
+        for (size_t gid = 0; gid < count; gid++)
+        {
+            const long value = output[gid];
+            const long expected = gid + 2 * gid * (iter_hi - iter_lo);
+
+            if (value != expected)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Element %lu has incorrect value. Expected: %ld, got: %ld",
+                    gid, expected, value
+                );
+            }
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_sub_group_barrier_global)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::global;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_sub_group_barrier_local)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::local;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_SUB_GROUP_BARRIER_HPP
diff --git a/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp b/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp
new file mode 100644
index 00000000..79451fb8
--- /dev/null
+++ b/test_conformance/clcpp/synchronization/test_work_group_barrier.hpp
@@ -0,0 +1,330 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP
+#define TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP
+
+#include <sstream>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_work_group_barrier {
+
+enum class barrier_type
+{
+    local,
+    global
+};
+
+struct test_options
+{
+    barrier_type barrier;
+    size_t max_count;
+    size_t num_tests;
+};
+
+const std::string source_common = R"(
+    // Circular shift of local ids
+    size_t get_shifted_local_id(int local_id_delta)
+    {
+        const int local_size = (int)get_local_size(0);
+        return (((int)get_local_id(0) + local_id_delta) % local_size + local_size) % local_size;
+    }
+
+    // Get global ids from shifted local ids
+    size_t get_shifted_global_id(int local_id_delta)
+    {
+        return get_group_id(0) * get_enqueued_local_size(0) + get_shifted_local_id(local_id_delta);
+    }
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << source_common;
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+
+            output[gid] += gid;
+            work_group_barrier(CLK_GLOBAL_MEM_FENCE);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global long *output, local long *values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        work_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+
+            values[lid] += gid;
+            work_group_barrier(CLK_LOCAL_MEM_FENCE);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#else
+std::string generate_source(test_options options)
+{
+    std::stringstream s;
+    s << R"(
+    #include <opencl_memory>
+    #include <opencl_work_item>
+    #include <opencl_synchronization>
+
+    using namespace cl;
+
+    )";
+    s << source_common;
+
+    if (options.barrier == barrier_type::global)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output)
+    {
+        const size_t gid = get_shifted_global_id(0);
+
+        output[gid] = gid;
+        work_group_barrier(mem_fence::global);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_gid = get_shifted_global_id(i);
+
+            output[other_gid] += other_gid;
+            work_group_barrier(mem_fence::global);
+
+            output[gid] += gid;
+            work_group_barrier(mem_fence::global);
+        }
+    }
+    )";
+    }
+    else if (options.barrier == barrier_type::local)
+    {
+        s << R"(
+    kernel void test(const int iter_lo, const int iter_hi, global_ptr<long[]> output, local_ptr<long[]> values)
+    {
+        const size_t gid = get_shifted_global_id(0);
+        const size_t lid = get_shifted_local_id(0);
+
+        values[lid] = gid;
+        work_group_barrier(mem_fence::local);
+
+        for (int i = iter_lo; i < iter_hi; i++)
+        {
+            const size_t other_lid = get_shifted_local_id(i);
+            const size_t other_gid = get_shifted_global_id(i);
+
+            values[other_lid] += other_gid;
+            work_group_barrier(mem_fence::local);
+
+            values[lid] += gid;
+            work_group_barrier(mem_fence::local);
+        }
+
+        output[gid] = values[lid];
+    }
+    )";
+    }
+
+    return s.str();
+}
+#endif
+
+int test(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+    std::string source = generate_source(options);
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t max_work_group_size;
+    error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_work_group_size), &max_work_group_size, NULL);
+    RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+    if (options.barrier == barrier_type::local)
+    {
+        cl_ulong kernel_local_mem_size;
+        error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetKernelWorkGroupInfo")
+
+        cl_ulong device_local_mem_size;
+        error = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(device_local_mem_size), &device_local_mem_size, NULL);
+        RETURN_ON_CL_ERROR(error, "clGetDeviceInfo")
+
+        max_work_group_size = (std::min)(max_work_group_size, (device_local_mem_size - kernel_local_mem_size) / sizeof(cl_long));
+    }
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> global_size_dis(1, options.max_count);
+    std::uniform_int_distribution<size_t> local_size_dis(1, max_work_group_size);
+    std::uniform_int_distribution<int> iter_dis(0, 20);
+
+    for (size_t test = 0; test < options.num_tests; test++)
+    {
+        const size_t global_size = global_size_dis(gen);
+        const size_t local_size = local_size_dis(gen);
+        const size_t count = global_size;
+
+        const int iter_lo = -iter_dis(gen);
+        const int iter_hi = +iter_dis(gen);
+
+        cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long) * count, NULL, &error);
+        RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+        error = clSetKernelArg(kernel, 0, sizeof(iter_lo), &iter_lo);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 1, sizeof(iter_hi), &iter_hi);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        error = clSetKernelArg(kernel, 2, sizeof(output_buffer), &output_buffer);
+        RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        if (options.barrier == barrier_type::local)
+        {
+            error = clSetKernelArg(kernel, 3, sizeof(cl_long) * local_size, NULL);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+        }
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, &local_size, 0, NULL, NULL);
+        RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+        std::vector<cl_long> output(count);
+        error = clEnqueueReadBuffer(
+            queue, output_buffer, CL_TRUE,
+            0, sizeof(cl_long) * count,
+            static_cast<void *>(output.data()),
+            0, NULL, NULL
+        );
+        RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+        error = clReleaseMemObject(output_buffer);
+        RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+
+        for (size_t gid = 0; gid < count; gid++)
+        {
+            const long value = output[gid];
+            const long expected = gid + 2 * gid * (iter_hi - iter_lo);
+
+            if (value != expected)
+            {
+                RETURN_ON_ERROR_MSG(-1,
+                    "Element %lu has incorrect value. Expected: %ld, got: %ld",
+                    gid, expected, value
+                );
+            }
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+AUTO_TEST_CASE(test_work_group_barrier_global)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::global;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_work_group_barrier_local)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.barrier = barrier_type::local;
+    options.num_tests = 1000;
+    options.max_count = num_elements;
+    return test(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_SYNCHRONIZATION_TEST_WORK_GROUP_BARRIER_HPP
diff --git a/test_conformance/clcpp/utils_common/errors.hpp b/test_conformance/clcpp/utils_common/errors.hpp
new file mode 100644
index 00000000..e47eff3e
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/errors.hpp
@@ -0,0 +1,134 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_ERRORS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_ERRORS_HPP
+
+#include <string>
+
+#include "../../../test_common/harness/errorHelpers.h"
+
+// ------------- Check OpenCL error helpers (marcos) -----------------
+
+std::string get_cl_error_string(cl_int error)
+{
+#define CASE_CL_ERROR(x) case x: return #x;
+    switch (error)
+    {
+        CASE_CL_ERROR(CL_SUCCESS)
+        CASE_CL_ERROR(CL_DEVICE_NOT_FOUND)
+        CASE_CL_ERROR(CL_DEVICE_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_COMPILER_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_MEM_OBJECT_ALLOCATION_FAILURE)
+        CASE_CL_ERROR(CL_OUT_OF_RESOURCES)
+        CASE_CL_ERROR(CL_OUT_OF_HOST_MEMORY)
+        CASE_CL_ERROR(CL_PROFILING_INFO_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_MEM_COPY_OVERLAP)
+        CASE_CL_ERROR(CL_IMAGE_FORMAT_MISMATCH)
+        CASE_CL_ERROR(CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        CASE_CL_ERROR(CL_BUILD_PROGRAM_FAILURE)
+        CASE_CL_ERROR(CL_MAP_FAILURE)
+        CASE_CL_ERROR(CL_MISALIGNED_SUB_BUFFER_OFFSET)
+        CASE_CL_ERROR(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
+        CASE_CL_ERROR(CL_COMPILE_PROGRAM_FAILURE)
+        CASE_CL_ERROR(CL_LINKER_NOT_AVAILABLE)
+        CASE_CL_ERROR(CL_LINK_PROGRAM_FAILURE)
+        CASE_CL_ERROR(CL_DEVICE_PARTITION_FAILED)
+        CASE_CL_ERROR(CL_KERNEL_ARG_INFO_NOT_AVAILABLE)
+
+        CASE_CL_ERROR(CL_INVALID_VALUE)
+        CASE_CL_ERROR(CL_INVALID_DEVICE_TYPE)
+        CASE_CL_ERROR(CL_INVALID_PLATFORM)
+        CASE_CL_ERROR(CL_INVALID_DEVICE)
+        CASE_CL_ERROR(CL_INVALID_CONTEXT)
+        CASE_CL_ERROR(CL_INVALID_QUEUE_PROPERTIES)
+        CASE_CL_ERROR(CL_INVALID_COMMAND_QUEUE)
+        CASE_CL_ERROR(CL_INVALID_HOST_PTR)
+        CASE_CL_ERROR(CL_INVALID_MEM_OBJECT)
+        CASE_CL_ERROR(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
+        CASE_CL_ERROR(CL_INVALID_IMAGE_SIZE)
+        CASE_CL_ERROR(CL_INVALID_SAMPLER)
+        CASE_CL_ERROR(CL_INVALID_BINARY)
+        CASE_CL_ERROR(CL_INVALID_BUILD_OPTIONS)
+        CASE_CL_ERROR(CL_INVALID_PROGRAM)
+        CASE_CL_ERROR(CL_INVALID_PROGRAM_EXECUTABLE)
+        CASE_CL_ERROR(CL_INVALID_KERNEL_NAME)
+        CASE_CL_ERROR(CL_INVALID_KERNEL_DEFINITION)
+        CASE_CL_ERROR(CL_INVALID_KERNEL)
+        CASE_CL_ERROR(CL_INVALID_ARG_INDEX)
+        CASE_CL_ERROR(CL_INVALID_ARG_VALUE)
+        CASE_CL_ERROR(CL_INVALID_ARG_SIZE)
+        CASE_CL_ERROR(CL_INVALID_KERNEL_ARGS)
+        CASE_CL_ERROR(CL_INVALID_WORK_DIMENSION)
+        CASE_CL_ERROR(CL_INVALID_WORK_GROUP_SIZE)
+        CASE_CL_ERROR(CL_INVALID_WORK_ITEM_SIZE)
+        CASE_CL_ERROR(CL_INVALID_GLOBAL_OFFSET)
+        CASE_CL_ERROR(CL_INVALID_EVENT_WAIT_LIST)
+        CASE_CL_ERROR(CL_INVALID_EVENT)
+        CASE_CL_ERROR(CL_INVALID_OPERATION)
+        CASE_CL_ERROR(CL_INVALID_GL_OBJECT)
+        CASE_CL_ERROR(CL_INVALID_BUFFER_SIZE)
+        CASE_CL_ERROR(CL_INVALID_MIP_LEVEL)
+        CASE_CL_ERROR(CL_INVALID_GLOBAL_WORK_SIZE)
+        CASE_CL_ERROR(CL_INVALID_PROPERTY)
+        CASE_CL_ERROR(CL_INVALID_IMAGE_DESCRIPTOR)
+        CASE_CL_ERROR(CL_INVALID_COMPILER_OPTIONS)
+        CASE_CL_ERROR(CL_INVALID_LINKER_OPTIONS)
+        CASE_CL_ERROR(CL_INVALID_DEVICE_PARTITION_COUNT)
+        CASE_CL_ERROR(CL_INVALID_PIPE_SIZE)
+        CASE_CL_ERROR(CL_INVALID_DEVICE_QUEUE)
+        CASE_CL_ERROR(CL_INVALID_SPEC_ID)
+        CASE_CL_ERROR(CL_MAX_SIZE_RESTRICTION_EXCEEDED)
+        default: return "(unknown error code)";
+    }
+#undef CASE_CL_ERROR
+}
+
+#define CHECK_ERROR(x) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+    }
+#define CHECK_ERROR_MSG(x, ...) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: " __VA_ARGS__);\
+        log_error("\n");\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+    }
+#define RETURN_ON_ERROR(x) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+#define RETURN_ON_ERROR_MSG(x, ...) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: " __VA_ARGS__);\
+        log_error("\n");\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+
+#define RETURN_ON_CL_ERROR(x, cl_func_name) \
+    if(x != CL_SUCCESS) \
+    { \
+        log_error("ERROR: %s failed: %s (%d)\n", cl_func_name, get_cl_error_string(x).c_str(), x);\
+        log_error("ERROR: %d, file: %s, line: %d\n", x, __FILE__, __LINE__);\
+        return x;\
+    }
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_ERRORS_HPP
diff --git a/test_conformance/clcpp/utils_common/is_vector_type.hpp b/test_conformance/clcpp/utils_common/is_vector_type.hpp
new file mode 100644
index 00000000..0232e513
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/is_vector_type.hpp
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP
+
+#include "../common.hpp"
+
+// is_vector_type<Type>::value is true if Type is an OpenCL
+// vector type; otherwise - false.
+//
+// Examples: 
+// * is_vector_type<cl_float>::value == false
+// * is_vector_type<cl_float4>::value == true
+template<class Type>
+struct is_vector_type
+{
+    const static bool value = false;
+};
+
+#define ADD_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct is_vector_type<Type ## n> \
+    { \
+        const static bool value = true; \
+    };
+
+#define ADD_VECTOR_TYPES(Type) \
+    ADD_VECTOR_TYPE(Type, 2) \
+    ADD_VECTOR_TYPE(Type, 4) \
+    ADD_VECTOR_TYPE(Type, 8) \
+    ADD_VECTOR_TYPE(Type, 16)
+
+ADD_VECTOR_TYPES(cl_char)
+ADD_VECTOR_TYPES(cl_uchar)
+ADD_VECTOR_TYPES(cl_short)
+ADD_VECTOR_TYPES(cl_ushort)
+ADD_VECTOR_TYPES(cl_int)
+ADD_VECTOR_TYPES(cl_uint)
+ADD_VECTOR_TYPES(cl_long)
+ADD_VECTOR_TYPES(cl_ulong)
+ADD_VECTOR_TYPES(cl_float)
+ADD_VECTOR_TYPES(cl_double)
+
+#undef ADD_VECTOR_TYPES
+#undef ADD_VECTOR_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_IS_VECTOR_TYPE_HPP
diff --git a/test_conformance/clcpp/utils_common/kernel_helpers.hpp b/test_conformance/clcpp/utils_common/kernel_helpers.hpp
new file mode 100644
index 00000000..189b8238
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/kernel_helpers.hpp
@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP
+
+#include "../common.hpp"
+
+// Creates a OpenCL C++/C program out_program and kernel out_kernel.
+int create_opencl_kernel(cl_context context,
+                         cl_program *out_program,
+                         cl_kernel *out_kernel,
+                         const char *source,
+                         const std::string& kernel_name,
+                         const std::string& build_options = "",
+                         const bool openclCXX = true)
+{
+    return create_single_kernel_helper(
+        context, out_program, out_kernel, 1, &source,
+        kernel_name.c_str(), build_options.c_str(), openclCXX
+    );
+}
+
+int create_opencl_kernel(cl_context context,
+                         cl_program *out_program,
+                         cl_kernel *out_kernel,
+                         const std::string& source,
+                         const std::string& kernel_name,
+                         const std::string& build_options = "",
+                         const bool openclCXX = true)
+{
+    return create_opencl_kernel(
+        context, out_program, out_kernel,
+        source.c_str(), kernel_name, build_options, openclCXX
+    );
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_KERNEL_HELPERS_HPP
diff --git a/test_conformance/clcpp/utils_common/make_vector_type.hpp b/test_conformance/clcpp/utils_common/make_vector_type.hpp
new file mode 100644
index 00000000..11b11856
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/make_vector_type.hpp
@@ -0,0 +1,65 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP
+
+#include "../common.hpp"
+
+// Using scalar_type and i creates a type scalar_typei.
+// 
+// Example:
+// * make_vector_type<cl_uint, 8>::type is cl_uint8
+// * make_vector_type<cl_uint, 1>::type is cl_uint
+template<class scalar_type, size_t i>
+struct make_vector_type
+{
+    typedef void type;
+};
+
+#define ADD_MAKE_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct make_vector_type<Type, n> \
+    { \
+        typedef Type ## n type; \
+    };
+
+#define ADD_MAKE_VECTOR_TYPES(Type) \
+    template<> \
+    struct make_vector_type<Type, 1> \
+    { \
+        typedef Type type; \
+    }; \
+    ADD_MAKE_VECTOR_TYPE(Type, 2) \
+    ADD_MAKE_VECTOR_TYPE(Type, 3) \
+    ADD_MAKE_VECTOR_TYPE(Type, 4) \
+    ADD_MAKE_VECTOR_TYPE(Type, 8) \
+    ADD_MAKE_VECTOR_TYPE(Type, 16)
+
+ADD_MAKE_VECTOR_TYPES(cl_char)
+ADD_MAKE_VECTOR_TYPES(cl_uchar)
+ADD_MAKE_VECTOR_TYPES(cl_short)
+ADD_MAKE_VECTOR_TYPES(cl_ushort)
+ADD_MAKE_VECTOR_TYPES(cl_int)
+ADD_MAKE_VECTOR_TYPES(cl_uint)
+ADD_MAKE_VECTOR_TYPES(cl_long)
+ADD_MAKE_VECTOR_TYPES(cl_ulong)
+ADD_MAKE_VECTOR_TYPES(cl_float)
+ADD_MAKE_VECTOR_TYPES(cl_double)
+
+#undef ADD_MAKE_VECTOR_TYPES
+#undef ADD_MAKE_VECTOR_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_MAKE_VECTOR_TYPE_HPP
diff --git a/test_conformance/clcpp/utils_common/scalar_type.hpp b/test_conformance/clcpp/utils_common/scalar_type.hpp
new file mode 100644
index 00000000..4c939bb2
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/scalar_type.hpp
@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP
+
+#include "../common.hpp"
+
+// scalar_type<Type>::type returns scalar type of Type.
+//
+// Examples:
+// * scalar_type<cl_float>::type is cl_float
+// * scalar_type<cl_float4>::types is cl_float
+template<class Type>
+struct scalar_type
+{
+    typedef void type;
+};
+
+#define ADD_VECTOR_TYPE(Type, n) \
+    template<> \
+    struct scalar_type<Type ## n> \
+    { \
+        typedef Type type; \
+    };
+
+#define ADD_VECTOR_TYPES(Type) \
+    template<> \
+    struct scalar_type<Type> \
+    { \
+        typedef Type type; \
+    }; \
+    ADD_VECTOR_TYPE(Type, 2) \
+    ADD_VECTOR_TYPE(Type, 4) \
+    ADD_VECTOR_TYPE(Type, 8) \
+    ADD_VECTOR_TYPE(Type, 16)
+
+ADD_VECTOR_TYPES(cl_char)
+ADD_VECTOR_TYPES(cl_uchar)
+ADD_VECTOR_TYPES(cl_short)
+ADD_VECTOR_TYPES(cl_ushort)
+ADD_VECTOR_TYPES(cl_int)
+ADD_VECTOR_TYPES(cl_uint)
+ADD_VECTOR_TYPES(cl_long)
+ADD_VECTOR_TYPES(cl_ulong)
+ADD_VECTOR_TYPES(cl_float)
+ADD_VECTOR_TYPES(cl_double)
+
+#undef ADD_VECTOR_TYPES
+#undef ADD_VECTOR_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_SCALAR_TYPE_HPP
diff --git a/test_conformance/clcpp/utils_common/string.hpp b/test_conformance/clcpp/utils_common/string.hpp
new file mode 100644
index 00000000..ad5ac9f0
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/string.hpp
@@ -0,0 +1,70 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP
+
+
+#include <string>
+#include <sstream>
+#include <iomanip>
+#include <type_traits>
+
+#include "is_vector_type.hpp"
+#include "scalar_type.hpp"
+#include "type_name.hpp"
+
+#include "../common.hpp"
+
+
+template<class type>
+std::string format_value(const type& value,
+                         typename std::enable_if<is_vector_type<type>::value>::type* = 0)
+{
+    std::stringstream s;
+    s << type_name<type>() << "{ ";
+    s << std::scientific << std::setprecision(6);
+    for (size_t j = 0; j < vector_size<type>::value; j++)
+    {
+        if (j > 0)
+            s << ", ";
+        s << value.s[j];
+    }
+    s << " }";
+    return s.str();
+}
+
+template<class type>
+std::string format_value(const type& value,
+                         typename std::enable_if<!is_vector_type<type>::value>::type* = 0)
+{
+    std::stringstream s;
+    s << type_name<type>() << "{ ";
+    s << std::scientific << std::setprecision(6);
+    s << value;
+    s << " }";
+    return s.str();
+}
+
+void replace_all(std::string& str, const std::string& from, const std::string& to)
+{
+    size_t start_pos = 0;
+    while((start_pos = str.find(from, start_pos)) != std::string::npos) {
+        str.replace(start_pos, from.length(), to);
+        start_pos += to.length();
+    }
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_STRING_HPP
diff --git a/test_conformance/clcpp/utils_common/type_name.hpp b/test_conformance/clcpp/utils_common/type_name.hpp
new file mode 100644
index 00000000..c66f6e49
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/type_name.hpp
@@ -0,0 +1,65 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP
+
+#include "../common.hpp"
+
+// Returns type name (in OpenCL device). 
+// cl_uint - "uint", cl_float2 -> "float2"
+template<class Type>
+std::string type_name()
+{
+    return "unknown";
+}
+
+#define ADD_TYPE_NAME(Type, str) \
+    template<> \
+    std::string type_name<Type>() \
+    { \
+        return #str; \
+    }
+
+#define ADD_TYPE_NAME2(Type) \
+    ADD_TYPE_NAME(cl_ ## Type, Type)
+
+#define ADD_TYPE_NAME3(Type, x) \
+    ADD_TYPE_NAME2(Type ## x)
+
+#define ADD_TYPE_NAMES(Type) \
+    ADD_TYPE_NAME2(Type) \
+    ADD_TYPE_NAME3(Type, 2) \
+    ADD_TYPE_NAME3(Type, 4) \
+    ADD_TYPE_NAME3(Type, 8) \
+    ADD_TYPE_NAME3(Type, 16)
+
+ADD_TYPE_NAMES(char)
+ADD_TYPE_NAMES(uchar)
+ADD_TYPE_NAMES(short)
+ADD_TYPE_NAMES(ushort)
+ADD_TYPE_NAMES(int)
+ADD_TYPE_NAMES(uint)
+ADD_TYPE_NAMES(long)
+ADD_TYPE_NAMES(ulong)
+ADD_TYPE_NAMES(float)
+ADD_TYPE_NAMES(double)
+
+#undef ADD_TYPE_NAMES
+#undef ADD_TYPE_NAME3
+#undef ADD_TYPE_NAME2
+#undef ADD_TYPE_NAME
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_NAME_HPP
diff --git a/test_conformance/clcpp/utils_common/type_supported.hpp b/test_conformance/clcpp/utils_common/type_supported.hpp
new file mode 100644
index 00000000..8d4f721b
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/type_supported.hpp
@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP
+
+#include "../common.hpp"
+
+// Returns true if type is supported by device; otherwise - false;
+template<class Type>
+bool type_supported(cl_device_id device)
+{
+    (void) device;
+    return false;
+}
+
+#define ADD_SUPPORTED_TYPE(Type) \
+    template<> \
+    bool type_supported<Type>(cl_device_id device) \
+    { \
+        (void) device; \
+        return true; \
+    }
+
+ADD_SUPPORTED_TYPE(cl_char)
+ADD_SUPPORTED_TYPE(cl_uchar)
+ADD_SUPPORTED_TYPE(cl_short)
+ADD_SUPPORTED_TYPE(cl_ushort)
+ADD_SUPPORTED_TYPE(cl_int)
+ADD_SUPPORTED_TYPE(cl_uint)
+
+// ulong
+template<>
+bool type_supported<cl_ulong>(cl_device_id device)
+{
+    // long types do not have to be supported in EMBEDDED_PROFILE.
+    char profile[128];
+    int error;
+
+    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error("ERROR: clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
+        return false;
+    }
+
+    if (std::strcmp(profile, "EMBEDDED_PROFILE") == 0)
+        return is_extension_available(device, "cles_khr_int64");
+
+    return true;
+}
+// long
+template<>
+bool type_supported<cl_long>(cl_device_id device)
+{
+    return type_supported<cl_ulong>(device);
+}
+ADD_SUPPORTED_TYPE(cl_float)
+// double
+template<>
+bool type_supported<cl_double>(cl_device_id device)
+{
+    return is_extension_available(device, "cl_khr_fp64");
+}
+
+#define ADD_SUPPORTED_VEC_TYPE1(Type, n) \
+    template<> \
+    bool type_supported<Type ## n>(cl_device_id device) \
+    { \
+        return type_supported<Type>(device); \
+    }
+
+#define ADD_SUPPORTED_VEC_TYPE2(Type) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 2) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 4) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 8) \
+    ADD_SUPPORTED_VEC_TYPE1(Type, 16)
+
+ADD_SUPPORTED_VEC_TYPE2(cl_char)
+ADD_SUPPORTED_VEC_TYPE2(cl_uchar)
+ADD_SUPPORTED_VEC_TYPE2(cl_short)
+ADD_SUPPORTED_VEC_TYPE2(cl_ushort)
+ADD_SUPPORTED_VEC_TYPE2(cl_int)
+ADD_SUPPORTED_VEC_TYPE2(cl_uint)
+ADD_SUPPORTED_VEC_TYPE2(cl_long)
+ADD_SUPPORTED_VEC_TYPE2(cl_ulong)
+ADD_SUPPORTED_VEC_TYPE2(cl_float)
+// ADD_SUPPORTED_VEC_TYPE2(cl_double)
+
+#undef ADD_SUPPORTED_VEC_TYPE2
+#undef ADD_SUPPORTED_VEC_TYPE1
+#undef ADD_SUPPORTED_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_TYPE_SUPPORTED_HPP
diff --git a/test_conformance/clcpp/utils_common/vector_size.hpp b/test_conformance/clcpp/utils_common/vector_size.hpp
new file mode 100644
index 00000000..4817506e
--- /dev/null
+++ b/test_conformance/clcpp/utils_common/vector_size.hpp
@@ -0,0 +1,61 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP
+
+#include "../common.hpp"
+
+// Returns 1 if Type is a scalar type; otherwise if it's a vector type, 
+// it returns number of components in that Type. 
+template<class Type>
+struct vector_size
+{
+    const static size_t value = 1;
+};
+
+#define ADD_VECTOR_SIZE_TYPE(Type, n) \
+    template<> \
+    struct vector_size<Type ## n> \
+    { \
+        const static size_t value = n; \
+    };
+
+#define ADD_VECTOR_SIZE_TYPES(Type) \
+    template<> \
+    struct vector_size<Type> \
+    { \
+        const static size_t value = 1; \
+    }; \
+    ADD_VECTOR_SIZE_TYPE(Type, 2) \
+    ADD_VECTOR_SIZE_TYPE(Type, 4) \
+    ADD_VECTOR_SIZE_TYPE(Type, 8) \
+    ADD_VECTOR_SIZE_TYPE(Type, 16)
+
+ADD_VECTOR_SIZE_TYPES(cl_char)
+ADD_VECTOR_SIZE_TYPES(cl_uchar)
+ADD_VECTOR_SIZE_TYPES(cl_short)
+ADD_VECTOR_SIZE_TYPES(cl_ushort)
+ADD_VECTOR_SIZE_TYPES(cl_int)
+ADD_VECTOR_SIZE_TYPES(cl_uint)
+ADD_VECTOR_SIZE_TYPES(cl_long)
+ADD_VECTOR_SIZE_TYPES(cl_ulong)
+ADD_VECTOR_SIZE_TYPES(cl_float)
+ADD_VECTOR_SIZE_TYPES(cl_double)
+
+#undef ADD_VECTOR_SIZE_TYPES
+#undef ADD_VECTOR_SIZE_TYPE
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_COMMON_VECTOR_SIZE_HPP
diff --git a/test_conformance/clcpp/utils_test/binary.hpp b/test_conformance/clcpp/utils_test/binary.hpp
new file mode 100644
index 00000000..5ff35c91
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/binary.hpp
@@ -0,0 +1,308 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP
+
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+
+#include "../common.hpp"
+
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+
+template<class IN1, class IN2, class OUT1>
+struct binary_func : public detail::base_func_type<OUT1>
+{
+    typedef IN1 in1_type;
+    typedef IN2 in2_type;
+    typedef OUT1 out_type;
+
+    virtual ~binary_func() {};
+    virtual std::string str() = 0;
+
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ", " + type_name<IN2>() + ")";
+    }
+
+    bool is_in1_bool()
+    {
+        return false;
+    }
+
+    bool is_in2_bool()
+    {
+        return false;
+    }
+
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+
+    IN2 min2()
+    {
+        return detail::get_min<IN2>();
+    }
+
+    IN2 max2()
+    {
+        return detail::get_max<IN2>();
+    }
+
+    std::vector<IN1> in1_special_cases()
+    {
+        return { };
+    }
+
+    std::vector<IN2> in2_special_cases()
+    {
+        return { };
+    }
+
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const IN2& in2, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        (void) in2;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in1_type, class in2_type, class out_type>
+std::string generate_kernel_binary(func_type func)
+{
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in1_type>() + " *input1,\n"
+        "                                      global " + type_name<in2_type>() + " *input2,\n"
+        "                                      global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in1_type, class in2_type, class out_type>
+std::string generate_kernel_binary(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + ", " + in2_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in1_type>() + "[]> input1,\n"
+        "                                      global_ptr<" + type_name<in2_type>() + "[]> input2,\n"
+        "                                      global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT1, class INPUT2, class OUTPUT, class binary_op>
+bool verify_binary(const std::vector<INPUT1> &in1,
+                   const std::vector<INPUT2> &in2,
+                   const std::vector<OUTPUT> &out,
+                   binary_op op)
+{
+    for(size_t i = 0; i < in1.size(); i++)
+    {
+        auto expected = op(in1[i], in2[i]);
+        if(!are_equal(expected, out[i], op.delta(in1[i], in2[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class binary_op>
+int test_binary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, binary_op op)
+{
+    cl_mem buffers[3];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename binary_op::in1_type INPUT1;
+    typedef typename binary_op::in2_type INPUT2;
+    typedef typename binary_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT1>(device)
+         && type_supported<INPUT2>(device)
+         && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_binary<binary_op, INPUT1, INPUT2, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT1> in1_spec_cases = op.in1_special_cases();
+    std::vector<INPUT2> in2_spec_cases = op.in2_special_cases();
+    prepare_special_cases(in1_spec_cases, in2_spec_cases);
+    std::vector<INPUT1> input1 = generate_input<INPUT1>(count, op.min1(), op.max1(), in1_spec_cases);
+    std::vector<INPUT2> input2 = generate_input<INPUT2>(count, op.min2(), op.max2(), in2_spec_cases);
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT1) * input1.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT2) * input2.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[2] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(),
+        static_cast<void *>(input1.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(),
+        static_cast<void *>(input2.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer")
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[2], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_binary(input1, input2, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "test_%s %s(%s, %s) failed", op.str().c_str(),
+            type_name<OUTPUT>().c_str(), type_name<INPUT1>().c_str(), type_name<INPUT2>().c_str()
+        );
+    }
+    log_info(
+        "test_%s %s(%s, %s) passed\n", op.str().c_str(),
+        type_name<OUTPUT>().c_str(), type_name<INPUT1>().c_str(), type_name<INPUT2>().c_str()
+    );
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_BINARY_HPP
diff --git a/test_conformance/clcpp/utils_test/compare.hpp b/test_conformance/clcpp/utils_test/compare.hpp
new file mode 100644
index 00000000..a22b88fd
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/compare.hpp
@@ -0,0 +1,161 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../common.hpp"
+
+// Checks if x is equal to y.
+template<class type, class delta_type, class op_type>
+inline bool are_equal(const type& x,
+                      const type& y,
+                      const delta_type& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        is_vector_type<type>::value
+                        && std::is_integral<typename scalar_type<type>::type>::value
+                      >::type* = 0)
+{
+    (void) delta;
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(op.is_out_bool())
+        {
+            if(!((x.s[i] != 0) == (y.s[i] != 0)))
+            {
+                return false;
+            }
+        }
+        else if(!(x.s[i] == y.s[i]))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template<class type, class delta_type, class op_type>
+inline bool are_equal(const type& x,
+                      const type& y,
+                      const delta_type& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        !is_vector_type<type>::value
+                        && std::is_integral<type>::value
+                      >::type* = 0)
+{
+    (void) delta;
+    if(op.is_out_bool())
+    {
+        if(!((x != 0) == (y != 0)))
+        {
+            return false;
+        }
+    }
+    return x == y;
+}
+
+template<class type, class type1, class type2, class op_type>
+inline bool are_equal(const type& x,
+                      const type1& y,
+                      const type2& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        !is_vector_type<type>::value
+                        && std::is_floating_point<type>::value
+                      >::type* = 0)
+{
+    // x - expected
+    // y - result
+
+    // INFO:
+    // Whe don't care about subnormal values in OpenCL C++ tests
+    if(std::fpclassify(static_cast<type1>(x)) == FP_SUBNORMAL || std::fpclassify(y) == FP_SUBNORMAL)
+    {
+        return true;
+    }
+
+    // both are NaN
+    if((std::isnan)(static_cast<type1>(x)) && (std::isnan)(y))
+    {
+        return true;
+    }
+    // one is NaN
+    else if((std::isnan)(static_cast<type1>(x)) || (std::isnan)(y))
+    {
+        return false;
+    }
+
+    // Check for perfect match, it also covers inf, -inf
+    if(static_cast<type1>(x) != y)
+    {
+        // Check if values are close
+        if(std::abs(static_cast<type1>(x) - y) > (std::max)(std::numeric_limits<type2>::epsilon(), std::abs(delta)))
+        {
+            return false;
+        }
+        // Check ulp
+        if(op.use_ulp())
+        {
+            return !(std::abs(Ulp_Error(x, y)) > op.ulp());
+        }
+    }
+    return true;
+}
+
+template<class type, class type1, class type2, class op_type>
+inline bool are_equal(const type& x,
+                      const type1& y,
+                      const type2& delta,
+                      op_type op,
+                      typename std::enable_if<
+                        is_vector_type<type>::value
+                        && std::is_floating_point<typename scalar_type<type>::type>::value
+                      >::type* = 0)
+{
+    // x - expected
+    // y - result
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+    {
+        if(!are_equal(x.s[i], y.s[i], delta.s[i], op))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template<class type, class type1, class func>
+inline void print_error_msg(const type& expected, const type1& result, size_t i, func op)
+{
+    log_error(
+        "ERROR: test_%s %s failed. Error at %lu: Expected: %s, got: %s\n",
+        op.str().c_str(),
+        op.decl_str().c_str(),
+        i,
+        format_value(expected).c_str(),
+        format_value(result).c_str()
+    );
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_COMPARE_HPP
diff --git a/test_conformance/clcpp/utils_test/detail/base_func_type.hpp b/test_conformance/clcpp/utils_test/detail/base_func_type.hpp
new file mode 100644
index 00000000..92e375d0
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/detail/base_func_type.hpp
@@ -0,0 +1,112 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../../common.hpp"
+
+#include "vec_helpers.hpp"
+
+namespace detail
+{
+
+template<class OUT1>
+struct base_func_type
+{   
+    virtual ~base_func_type() {};
+
+    // Returns function name
+    virtual std::string str() = 0;
+
+    // Returns name of the test kernel for that function
+    virtual std::string get_kernel_name()
+    {
+        std::string kn = this->str();
+        replace_all(kn, "::", "_");
+        return "test_" + kn;
+    }
+
+    // Returns required defines and pragmas.
+    virtual std::string defs()
+    {
+        return "";
+    }
+
+    // Returns required OpenCL C++ headers.
+    virtual std::string headers()
+    {
+        return "";
+    }
+
+    // Return true if OUT1 type in OpenCL kernel should be treated
+    // as bool type; false otherwise.
+    bool is_out_bool()
+    {
+        return false;
+    }
+
+    // Max ULP error, that is error should be raised when
+    // if Ulp_Error(result, expected) > ulp()
+    float ulp()
+    {
+        return 0.0f;
+    }
+
+    // Should we check ULP error when verifing if the result is
+    // correct? 
+    //
+    // (This effects how are_equal() function works, 
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    bool use_ulp()
+    {
+        return true;
+    }
+
+    // Max error. Error should be raised if
+    // abs(result - expected) > delta(.., expected)
+    //
+    // Default value: 0.001 * expected
+    //
+    // (This effects how are_equal() function works, 
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const T& expected)
+    {
+        typedef 
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+} // detail namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_BASE_FUNC_TYPE_HPP
diff --git a/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp b/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp
new file mode 100644
index 00000000..05df42aa
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/detail/vec_helpers.hpp
@@ -0,0 +1,104 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../../common.hpp"
+
+namespace detail
+{
+
+template<class T>
+T make_value(typename scalar_type<T>::type x, typename std::enable_if<is_vector_type<T>::value>::type* = 0)
+{
+    T value;
+    for(size_t i = 0; i < vector_size<T>::value; i++)
+    {
+        value.s[i] = x;
+    }
+    return value;
+}
+
+template<class T>
+T make_value(T x, typename std::enable_if<!is_vector_type<T>::value>::type* = 0)
+{
+    return x;
+}
+
+template<class result_type, class IN1, class IN2>
+result_type multiply(const IN1& x, const IN2& y, typename std::enable_if<is_vector_type<result_type>::value>::type* = 0)
+{
+    static_assert(
+        (vector_size<IN1>::value == vector_size<IN2>::value)
+            && (vector_size<IN2>::value == vector_size<result_type>::value),
+        "Vector sizes must be the same."
+    );
+    typedef typename scalar_type<result_type>::type SCALAR;
+    result_type value;
+    for(size_t i = 0; i < vector_size<result_type>::value; i++)
+    {
+        value.s[i] = static_cast<SCALAR>(x.s[i]) * static_cast<SCALAR>(y.s[i]);
+    }
+    return value;
+}
+
+template<class result_type, class IN1, class IN2>
+result_type multiply(const IN1& x, const IN2& y, typename std::enable_if<!is_vector_type<result_type>::value>::type* = 0)
+{
+    static_assert(
+        !is_vector_type<IN1>::value && !is_vector_type<IN2>::value,
+        "IN1 and IN2 must be scalar types"
+    );
+    return static_cast<result_type>(x) * static_cast<result_type>(y);
+}
+
+template<class T>
+T get_min()
+{
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::min)());
+}
+
+template<class T>
+T get_max()
+{
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::max)());
+}
+
+template<class T>
+T get_part_max(typename scalar_type<T>::type x)
+{
+    typedef typename scalar_type<T>::type SCALAR;
+    return make_value<T>((std::numeric_limits<SCALAR>::max)() / x);
+}
+
+template<class T>
+T def_limit(typename scalar_type<T>::type x)
+{
+    return make_value<T>(x);
+}
+
+} // detail namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_DETAIL_VEC_HELPERS_HPP
diff --git a/test_conformance/clcpp/utils_test/generate_inputs.hpp b/test_conformance/clcpp/utils_test/generate_inputs.hpp
new file mode 100644
index 00000000..bb0d7506
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/generate_inputs.hpp
@@ -0,0 +1,331 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP
+
+#include <random>
+#include <limits>
+#include <type_traits>
+#include <algorithm>
+
+#include <cmath>
+
+#include "../common.hpp"
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_integral<typename scalar_type<type>::type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && !(std::is_same<typename scalar_type<type>::type, cl_uchar>::value
+                                         || std::is_same<typename scalar_type<type>::type, cl_char>::value)
+                                 >::type* = 0)
+{
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_int_distribution<SCALAR>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        dists[i] = std::uniform_int_distribution<SCALAR>(min.s[i], max.s[i]);
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            i.s[j] = dists[j](gen);
+        }
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_integral<typename scalar_type<type>::type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && (std::is_same<typename scalar_type<type>::type, cl_uchar>::value
+                                        || std::is_same<typename scalar_type<type>::type, cl_char>::value)
+                                 >::type* = 0)
+{
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_int_distribution<cl_int>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        dists[i] = std::uniform_int_distribution<cl_int>(
+            static_cast<cl_int>(min.s[i]),
+            static_cast<cl_int>(max.s[i])
+        );
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            i.s[j] = static_cast<SCALAR>(dists[j](gen));
+        }
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_integral<type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && !(std::is_same<type, cl_uchar>::value || std::is_same<type, cl_char>::value)
+                                 >::type* = 0)
+{
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<type> dis(min, max);
+    for(auto& i : input)
+    {
+        i = dis(gen);
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_integral<type>::value
+                                    // std::uniform_int_distribution<> does not work in VS2015 for cl_uchar and cl_char,
+                                    // because VS2015 thinks that use cl_int, because VS2015 thinks cl_uchar cl_char are
+                                    // not int types
+                                    && (std::is_same<type, cl_uchar>::value || std::is_same<type, cl_char>::value)
+                                 >::type* = 0)
+{
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<cl_int> dis(
+        static_cast<cl_int>(min), static_cast<cl_int>(max)
+    );
+    for(auto& i : input)
+    {
+        i = static_cast<type>(dis(gen));
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    is_vector_type<type>::value
+                                    && std::is_floating_point<typename scalar_type<type>::type>::value
+                                 >::type* = 0)
+{
+    typedef typename scalar_type<type>::type SCALAR;
+    const size_t vec_size = vector_size<type>::value;
+
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::vector<std::uniform_real_distribution<SCALAR>> dists(vec_size);
+    for(size_t i = 0; i < vec_size; i++)
+    {
+        // Fatal error
+        if(std::fpclassify(max.s[i]) == FP_SUBNORMAL || std::fpclassify(min.s[i]) == FP_SUBNORMAL)
+        {
+            log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n");
+        }
+        dists[i] = std::uniform_real_distribution<SCALAR>(min.s[i], max.s[i]);
+    }
+    for(auto& i : input)
+    {
+        for(size_t j = 0; j < vec_size; j++)
+        {
+            SCALAR x = dists[j](gen);
+            while(std::fpclassify(x) == FP_SUBNORMAL)
+            {
+                x = dists[j](gen);
+            }
+            i.s[j] = x;
+        }
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_input(size_t count,
+                                 const type& min,
+                                 const type& max,
+                                 const std::vector<type> special_cases,
+                                 typename std::enable_if<
+                                    !is_vector_type<type>::value
+                                    && std::is_floating_point<type>::value
+                                 >::type* = 0)
+{
+    // Fatal error
+    if(std::fpclassify(max) == FP_SUBNORMAL || std::fpclassify(min) == FP_SUBNORMAL)
+    {
+        log_error("ERROR: min and max value for input generation CAN NOT BE subnormal\n");
+    }
+    std::vector<type> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<type> dis(min, max);
+    for(auto& i : input)
+    {
+        type x = dis(gen);
+        while(std::fpclassify(x) == FP_SUBNORMAL)
+        {
+            x = dis(gen);
+        }
+        i = x;
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+template <class type>
+std::vector<type> generate_output(size_t count,
+                                  typename scalar_type<type>::type svalue = typename scalar_type<type>::type(0),
+                                  typename std::enable_if<is_vector_type<type>::value>::type* = 0)
+{
+    type value;
+    for(size_t i = 0; i < vector_size<type>::value; i++)
+        value.s[i] = svalue;
+    return std::vector<type>(count, value);
+}
+
+template <class type>
+std::vector<type> generate_output(size_t count,
+                                  type svalue = type(0),
+                                  typename std::enable_if<!is_vector_type<type>::value>::type* = 0)
+{
+    return std::vector<type>(count, svalue);
+}
+
+template<class T, class K>
+void prepare_special_cases(std::vector<T>& in1_spec_cases, std::vector<K>& in2_spec_cases)
+{
+    if(in1_spec_cases.empty() || in2_spec_cases.empty())
+    {
+        return;
+    }
+
+    size_t new_size = in1_spec_cases.size() * in2_spec_cases.size();
+    std::vector<T> new_in1(new_size);
+    std::vector<K> new_in2(new_size);
+    for(size_t i = 0; i < in1_spec_cases.size(); i++)
+    {
+        for(size_t j = 0; j < in2_spec_cases.size(); j++)
+        {
+            new_in1[(i * in2_spec_cases.size()) + j] = in1_spec_cases[i];
+            new_in2[(i * in2_spec_cases.size()) + j] = in2_spec_cases[j];
+        }
+    }
+    in1_spec_cases = new_in1;
+    in2_spec_cases = new_in2;
+}
+
+template<class T, class K, class M>
+void prepare_special_cases(std::vector<T>& in1_spec_cases,
+                           std::vector<K>& in2_spec_cases,
+                           std::vector<M>& in3_spec_cases)
+{
+    if(in3_spec_cases.empty())
+    {
+        return prepare_special_cases(in1_spec_cases, in2_spec_cases);
+    }
+    else if (in2_spec_cases.empty())
+    {
+        return prepare_special_cases(in1_spec_cases, in3_spec_cases);
+    }
+    else if (in1_spec_cases.empty())
+    {
+        return prepare_special_cases(in2_spec_cases, in3_spec_cases);
+    }
+
+    size_t new_size = in1_spec_cases.size() * in2_spec_cases.size() * in3_spec_cases.size();
+    std::vector<T> new_in1(new_size);
+    std::vector<K> new_in2(new_size);
+    std::vector<M> new_in3(new_size);
+    for(size_t i = 0; i < in1_spec_cases.size(); i++)
+    {
+        for(size_t j = 0; j < in2_spec_cases.size(); j++)
+        {
+            for(size_t k = 0; k < in3_spec_cases.size(); k++)
+            {
+                size_t idx =
+                    (i * in2_spec_cases.size() * in3_spec_cases.size())
+                    + (j * in3_spec_cases.size())
+                    + k;
+                new_in1[idx] = in1_spec_cases[i];
+                new_in2[idx] = in2_spec_cases[j];
+                new_in3[idx] = in3_spec_cases[k];
+            }
+        }
+    }
+    in1_spec_cases = new_in1;
+    in2_spec_cases = new_in2;
+    in3_spec_cases = new_in3;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_GENERATE_INPUTS_HPP
diff --git a/test_conformance/clcpp/utils_test/ternary.hpp b/test_conformance/clcpp/utils_test/ternary.hpp
new file mode 100644
index 00000000..342681e1
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/ternary.hpp
@@ -0,0 +1,368 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP
+
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+
+#include "../common.hpp"
+
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+
+template<class IN1, class IN2, class IN3, class OUT1>
+struct ternary_func : public detail::base_func_type<OUT1>
+{
+    typedef IN1 in1_type;
+    typedef IN2 in2_type;
+    typedef IN3 in3_type;
+    typedef OUT1 out_type;
+
+    virtual ~ternary_func() {};
+    virtual std::string str() = 0;
+
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ", " + type_name<IN2>()+  ", " + type_name<IN3>() + ")";
+    }
+
+    bool is_in1_bool()
+    {
+        return false;
+    }
+
+    bool is_in2_bool()
+    {
+        return false;
+    }
+
+    bool is_in3_bool()
+    {
+        return false;
+    }
+
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+
+    IN2 min2()
+    {
+        return detail::get_min<IN2>();
+    }
+
+    IN2 max2()
+    {
+        return detail::get_max<IN2>();
+    }
+
+    IN3 min3()
+    {
+        return detail::get_min<IN3>();
+    }
+
+    IN3 max3()
+    {
+        return detail::get_max<IN3>();
+    }
+
+    std::vector<IN1> in1_special_cases()
+    {
+        return { };
+    }
+
+    std::vector<IN2> in2_special_cases()
+    {
+        return { };
+    }
+
+    std::vector<IN3> in3_special_cases()
+    {
+        return { };
+    }
+
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const IN2& in2, const IN3& in3, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        (void) in2;
+        (void) in3;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in1_type, class in2_type, class in3_type, class out_type>
+std::string generate_kernel_ternary(func_type func)
+{
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string in3_value = "input3[gid]";
+    if(func.is_in3_bool())
+    {
+        std::string i = vector_size<in3_type>::value == 1 ? "" : std::to_string(vector_size<in3_type>::value);
+        in3_value = "(input3[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in1_type>() + " *input1,\n"
+        "                                      global " + type_name<in2_type>() + " *input2,\n"
+        "                                      global " + type_name<in3_type>() + " *input3,\n"
+        "                                      global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in1_type, class in2_type, class in3_type, class out_type>
+std::string generate_kernel_ternary(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input1[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in1_type>::value == 1 ? "" : std::to_string(vector_size<in1_type>::value);
+        in1_value = "(input1[gid] != (int" + i + ")(0))";
+    }
+    std::string in2_value = "input2[gid]";
+    if(func.is_in2_bool())
+    {
+        std::string i = vector_size<in2_type>::value == 1 ? "" : std::to_string(vector_size<in2_type>::value);
+        in2_value = "(input2[gid] != (int" + i + ")(0))";
+    }
+    std::string in3_value = "input3[gid]";
+    if(func.is_in3_bool())
+    {
+        std::string i = vector_size<in3_type>::value == 1 ? "" : std::to_string(vector_size<in3_type>::value);
+        in3_value = "(input3[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + ", " + in2_value + ", " + in3_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool() || func.is_in2_bool() || func.is_in3_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in1_type>() + "[]> input1,\n"
+        "                                      global_ptr<" + type_name<in2_type>() + "[]> input2,\n"
+        "                                      global_ptr<" + type_name<in3_type>() + "[]> input3,\n"
+        "                                      global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT1, class INPUT2, class INPUT3, class OUTPUT, class ternary_op>
+bool verify_ternary(const std::vector<INPUT1> &in1,
+                    const std::vector<INPUT2> &in2,
+                    const std::vector<INPUT3> &in3,
+                    const std::vector<OUTPUT> &out,
+                    ternary_op op)
+{
+    for(size_t i = 0; i < in1.size(); i++)
+    {
+        auto expected = op(in1[i], in2[i], in3[i]);
+        if(!are_equal(expected, out[i], op.delta(in1[i], in2[i], in3[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class ternary_op>
+int test_ternary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, ternary_op op)
+{
+    cl_mem buffers[4];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename ternary_op::in1_type INPUT1;
+    typedef typename ternary_op::in2_type INPUT2;
+    typedef typename ternary_op::in3_type INPUT3;
+    typedef typename ternary_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT1>(device)
+         && type_supported<INPUT2>(device)
+         && type_supported<INPUT3>(device)
+         && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_ternary<ternary_op, INPUT1, INPUT2, INPUT3, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT1> in1_spec_cases = op.in1_special_cases();
+    std::vector<INPUT2> in2_spec_cases = op.in2_special_cases();
+    std::vector<INPUT3> in3_spec_cases = op.in3_special_cases();
+    prepare_special_cases(in1_spec_cases, in2_spec_cases, in3_spec_cases);
+    std::vector<INPUT1> input1 = generate_input<INPUT1>(count, op.min1(), op.max1(), in1_spec_cases);
+    std::vector<INPUT2> input2 = generate_input<INPUT2>(count, op.min2(), op.max2(), in2_spec_cases);
+    std::vector<INPUT3> input3 = generate_input<INPUT3>(count, op.min3(), op.max3(), in3_spec_cases);
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT1) * input1.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT2) * input2.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[2] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT3) * input3.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[3] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT1) * input1.size(),
+        static_cast<void *>(input1.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(INPUT2) * input2.size(),
+        static_cast<void *>(input2.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[2], CL_TRUE, 0, sizeof(INPUT3) * input3.size(),
+        static_cast<void *>(input3.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof(buffers[2]), &buffers[2]);
+    err |= clSetKernelArg(kernel, 3, sizeof(buffers[3]), &buffers[3]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[3], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_ternary(input1, input2, input3, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1,
+            "test_%s %s(%s, %s, %s) failed", op.str().c_str(),
+            type_name<OUTPUT>().c_str(),
+            type_name<INPUT1>().c_str(),
+            type_name<INPUT2>().c_str(),
+            type_name<INPUT3>().c_str()
+        );
+    }
+    log_info(
+        "test_%s %s(%s, %s, %s) passed\n", op.str().c_str(),
+        type_name<OUTPUT>().c_str(),
+        type_name<INPUT1>().c_str(),
+        type_name<INPUT2>().c_str(),
+        type_name<INPUT3>().c_str()
+    );
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseMemObject(buffers[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_TERNARY_HPP
diff --git a/test_conformance/clcpp/utils_test/unary.hpp b/test_conformance/clcpp/utils_test/unary.hpp
new file mode 100644
index 00000000..2dbc6471
--- /dev/null
+++ b/test_conformance/clcpp/utils_test/unary.hpp
@@ -0,0 +1,261 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP
+#define TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP
+
+#include <type_traits>
+#include <algorithm>
+#include <string>
+#include <cmath>
+
+#include "../common.hpp"
+
+#include "detail/base_func_type.hpp"
+#include "generate_inputs.hpp"
+#include "compare.hpp"
+
+template<class IN1, class OUT1>
+struct unary_func : public detail::base_func_type<OUT1>
+{
+    typedef IN1 in_type;
+    typedef OUT1 out_type;
+
+    virtual ~unary_func() {};
+    virtual std::string str() = 0;
+
+    // Return string with function type, for example: int(float).
+    std::string decl_str()
+    {
+        return type_name<OUT1>() + "(" + type_name<IN1>() + ")";
+    }
+
+    // Return true if IN1 type in OpenCL kernel should be treated
+    // as bool type; false otherwise.
+    bool is_in1_bool()
+    {
+        return false;
+    }
+
+    // Return min value that can be used as a first argument.
+    IN1 min1()
+    {
+        return detail::get_min<IN1>();
+    }
+
+    // Return max value that can be used as a first argument.
+    IN1 max1()
+    {
+        return detail::get_max<IN1>();
+    }
+
+    // This returns a list of special cases input values we want to
+    // test.
+    std::vector<IN1> in_special_cases()
+    {
+        return { };
+    }
+
+    // Max error. Error should be raised if
+    // abs(result - expected) > delta(.., expected)
+    //
+    // Default value: 0.001 * expected
+    //
+    // (This effects how are_equal() function works,
+    // it may not have effect if verify() method in derived
+    // class does not use are_equal() function.)
+    //
+    // Only for FP numbers/vectors
+    template<class T>
+    typename make_vector_type<cl_double, vector_size<T>::value>::type
+    delta(const IN1& in1, const T& expected)
+    {
+        typedef
+            typename make_vector_type<cl_double, vector_size<T>::value>::type
+            delta_vector_type;
+        // Take care of unused variable warning
+        (void) in1;
+        auto e = detail::make_value<delta_vector_type>(1e-3);
+        return detail::multiply<delta_vector_type>(e, expected);
+    }
+};
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_unary(func_type func)
+{
+    std::string in1_value = "input[gid]";
+    // Convert uintN to boolN values
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in_type>::value == 1 ? "" : std::to_string(vector_size<in_type>::value);
+        in1_value = "(input[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ");";
+    // Convert boolN result of funtion func_type to uintN
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_int" + i + "(" + func.str() + "(" + in1_value + "))";
+    }
+    return
+        "__kernel void " + func.get_kernel_name() + "(global " + type_name<in_type>() + " *input, global " + type_name<out_type>() + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type>
+std::string generate_kernel_unary(func_type func)
+{
+    std::string headers = func.headers();
+    std::string in1_value = "input[gid]";
+    if(func.is_in1_bool())
+    {
+        std::string i = vector_size<in_type>::value == 1 ? "" : std::to_string(vector_size<in_type>::value);
+        in1_value = "(input[gid] != (int" + i + ")(0))";
+    }
+    std::string function_call = func.str() + "(" + in1_value + ")";
+    if(func.is_out_bool())
+    {
+        std::string i = vector_size<out_type>::value == 1 ? "" : std::to_string(vector_size<out_type>::value);
+        function_call = "convert_cast<int" + i + ">(" + func.str() + "(" + in1_value + "))";
+    }
+    if(func.is_out_bool() || func.is_in1_bool())
+    {
+        if(headers.find("#include <opencl_convert>") == std::string::npos)
+        {
+            headers += "#include <opencl_convert>\n";
+        }
+    }
+    return
+        "" + func.defs() +
+        "" + headers +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void " + func.get_kernel_name() + "(global_ptr<" + type_name<in_type>() +  "[]> input,"
+                                              "global_ptr<" + type_name<out_type>() + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + function_call + ";\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class unary_op>
+bool verify_unary(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, unary_op op)
+{
+    for(size_t i = 0; i < in.size(); i++)
+    {
+        auto expected = op(in[i]);
+        if(!are_equal(expected, out[i], op.delta(in[i], expected), op))
+        {
+            print_error_msg(expected, out[i], i, op);
+            return false;
+        }
+    }
+    return true;
+}
+
+template <class unary_op>
+int test_unary_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, unary_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename unary_op::in_type INPUT;
+    typedef typename unary_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_unary<unary_op, INPUT, OUTPUT>(op);
+    std::string kernel_name = op.get_kernel_name();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL,  &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    buffers[1] = clCreateBuffer(
+        context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL,  &err
+    );
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer")
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_unary(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_UTILS_TEST_UNARY_HPP
diff --git a/test_conformance/clcpp/vload_vstore/CMakeLists.txt b/test_conformance/clcpp/vload_vstore/CMakeLists.txt
new file mode 100644
index 00000000..0204dc5c
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_VLOAD_VSTORE_FUNCS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/vload_vstore/common.hpp b/test_conformance/clcpp/vload_vstore/common.hpp
new file mode 100644
index 00000000..84cd539d
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/common.hpp
@@ -0,0 +1,81 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
+
+#include <type_traits>
+#include <cmath>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "half_utils.hpp"
+
+// Generates cl_half input
+std::vector<cl_half> generate_half_input(size_t count,
+                                         const cl_float& min,
+                                         const cl_float& max,
+                                         const std::vector<cl_half> special_cases)
+{
+    std::vector<cl_half> input(count);
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_real_distribution<cl_float> dis(min, max);
+    for(auto& i : input)
+    {
+        i = float2half_rte(dis(gen));
+    }
+
+    input.insert(input.begin(), special_cases.begin(), special_cases.end());
+    input.resize(count);
+    return input;
+}
+
+// Generates input for vload_vstore tests, we can't just simply use function
+// generate_input<type>(...), because cl_half is typedef of cl_short (but generating
+// cl_shorts and generating cl_halfs are different operations).
+template <class type>
+std::vector<type> vload_vstore_generate_input(size_t count,
+                                              const type& min,
+                                              const type& max, 
+                                              const std::vector<type> special_cases,
+                                              const bool generate_half,
+                                              typename std::enable_if<
+                                                  std::is_same<type, cl_half>::value
+                                              >::type* = 0)
+{
+    if(!generate_half)
+    {
+        return generate_input<type>(count, min, max, special_cases);
+    }
+    return generate_half_input(count, -(CL_HALF_MAX/4.f), (CL_HALF_MAX/4.f), special_cases);
+}
+
+// If !std::is_same<type, cl_half>::value, we can just use generate_input<type>(...).
+template <class type>
+std::vector<type> vload_vstore_generate_input(size_t count,
+                                              const type& min,
+                                              const type& max, 
+                                              const std::vector<type> special_cases,
+                                              const bool generate_half,
+                                              typename std::enable_if<
+                                                  !std::is_same<type, cl_half>::value
+                                              >::type* = 0)
+{
+    return generate_input<type>(count, min, max, special_cases);
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_RELATIONAL_FUNCS_COMMON_HPP
diff --git a/test_conformance/clcpp/vload_vstore/half_utils.hpp b/test_conformance/clcpp/vload_vstore/half_utils.hpp
new file mode 100644
index 00000000..5c60599d
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/half_utils.hpp
@@ -0,0 +1,136 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP
+#define TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <cmath>
+
+namespace detail 
+{
+
+template<class INT_TYPE>
+inline int clz(INT_TYPE x)
+{
+    int count = 0;
+    if(std::is_unsigned<INT_TYPE>::value)
+    {
+        cl_ulong value = x;
+        value <<= 8 * sizeof(value) - (8 * sizeof(x));
+        for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+        {
+            value <<= 1;
+        }
+    }
+    else
+    {            
+        cl_long value = x;
+        value <<= 8 * sizeof(value) - (8 * sizeof(x));
+        for(count = 0; 0 == (value & (CL_LONG_MIN)); count++)
+        {
+            value <<= 1;
+        }
+    }
+    return count;
+}
+
+} // namespace detail 
+
+inline cl_float half2float(cl_half us)
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ cl_uint u; cl_float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = detail::clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+inline cl_ushort float2half_rte(cl_float f)
+{
+    union{ cl_float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    cl_float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_HALF_UTILS_HPP
diff --git a/test_conformance/clcpp/vload_vstore/main.cpp b/test_conformance/clcpp/vload_vstore/main.cpp
new file mode 100644
index 00000000..6e5978c0
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "vload_funcs.hpp"
+#include "vstore_funcs.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/vload_vstore/vload_funcs.hpp b/test_conformance/clcpp/vload_vstore/vload_funcs.hpp
new file mode 100644
index 00000000..f0bbcfc5
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/vload_funcs.hpp
@@ -0,0 +1,363 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP
+
+#include <iterator>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vload(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(func.is_in1_half())
+    {
+        input1_type_str = "half";
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(N == 3)
+    {
+        output1_type_str[output1_type_str.size() - 1] = '3';
+    }
+    return
+        "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + func.str() + std::to_string(N) + "(gid, input);\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vload(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(func.is_in1_half())
+    {
+        input1_type_str = "half";
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(N == 3)
+    {
+        output1_type_str[output1_type_str.size() - 1] = '3';
+    }
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str +  "[]> input,"
+                                              "global_ptr<" + output1_type_str + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    output[gid] = " + func.str() + "<" + std::to_string(N) + ">(gid, input.get());\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class vload_op>
+bool verify_vload(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, vload_op op)
+{
+    for(size_t i = 0; i < out.size(); i++)
+    {
+        auto expected = op(i, in.begin());
+        for(size_t j = 0; j < vload_op::vector_size; j++)
+        {
+            size_t idx = (i * vector_size<OUTPUT>::value) + j;
+            if(!are_equal(expected.s[j], out[i].s[j], op.delta(in[idx], expected.s[j]), op))
+            {
+                print_error_msg(expected, out[i], i, op);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template <class vload_op>
+int test_vload_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename vload_op::in_type INPUT;
+    typedef typename vload_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_vload<vload_op, INPUT, OUTPUT, vload_op::vector_size>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT> input = vload_vstore_generate_input<INPUT>(
+        count * vector_size<OUTPUT>::value, op.min1(), op.max1(), op.in_special_cases(), op.is_in1_half()
+    );
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_vload(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed",
+            op.str().c_str(),
+            type_name<OUTPUT>().c_str(),
+            type_name<INPUT>().c_str()
+        );
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+template <class IN1, cl_int N /* Vector size */>
+struct vload_func : public unary_func<
+                        IN1,
+                        typename make_vector_type<IN1, N>::type /* create IN1N type */
+                    >
+{
+    typedef typename make_vector_type<IN1, N>::type result_type;
+    const static size_t vector_size = N;
+
+    std::string str()
+    {
+        return "vload";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            !is_vector_type<IN1>::value,
+            "IN1 must be scalar type"
+        );
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, IN1>::value,
+            "std::iterator_traits<Iterator>::value_type must be IN1"
+        );
+
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+
+        result_type r;
+        Iterator temp = x + static_cast<diff_type>(offset * N);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = *temp;
+            temp++;
+        }
+        return r;
+    }
+
+    bool is_in1_half()
+    {
+        return false;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vload_half_func : public unary_func<
+                            cl_half,
+                            typename make_vector_type<cl_float, N>::type /* create IN1N type */
+                         >
+{
+    typedef typename make_vector_type<cl_float, N>::type result_type;
+    const static size_t vector_size = N;
+
+    std::string str()
+    {
+        return "vload_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, cl_half>::value,
+            "std::iterator_traits<Iterator>::value_type must be cl_half"
+        );
+
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+
+        result_type r;
+        Iterator temp = x + static_cast<diff_type>(offset * N);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = half2float(*temp);
+            temp++;
+        }
+        return r;
+    }
+
+    bool is_in1_half()
+    {
+        return true;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vloada_half_func : public unary_func<
+                            cl_half,
+                            typename make_vector_type<cl_float, N>::type /* create IN1N type */
+                         >
+{
+    typedef typename make_vector_type<cl_float, N>::type result_type;
+    const static size_t vector_size = N;
+
+    std::string str()
+    {
+        return "vloada_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    template<class Iterator>
+    result_type operator()(const size_t offset, Iterator x)
+    {
+        static_assert(
+            std::is_same<typename std::iterator_traits<Iterator>::value_type, cl_half>::value,
+            "std::iterator_traits<Iterator>::value_type must be cl_half"
+        );
+
+        typedef typename std::iterator_traits<Iterator>::difference_type diff_type;
+
+        result_type r;
+        size_t alignment = N == 3 ? 4 : N;
+        Iterator temp = x + static_cast<diff_type>(offset * alignment);
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = half2float(*temp);
+            temp++;
+        }
+        return r;
+    }
+
+    bool is_in1_half()
+    {
+        return true;
+    }
+};
+
+AUTO_TEST_CASE(test_vload_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_VLOAD_FUNC_MACRO(CLASS) \
+    last_error = test_vload_func( \
+        device, context, queue, n_elems, CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_uint,  2>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_float, 4>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_short, 8>()))
+    TEST_VLOAD_FUNC_MACRO((vload_func<cl_int, 16>()))
+
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<2>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<3>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<4>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<8>()))
+    TEST_VLOAD_FUNC_MACRO((vload_half_func<16>()))
+
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<2>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<3>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<4>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<8>()))
+    TEST_VLOAD_FUNC_MACRO((vloada_half_func<16>()))
+
+#undef TEST_VLOAD_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VLOAD_FUNCS_HPP
diff --git a/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp b/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp
new file mode 100644
index 00000000..5bf83f51
--- /dev/null
+++ b/test_conformance/clcpp/vload_vstore/vstore_funcs.hpp
@@ -0,0 +1,348 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP
+#define TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include <iterator>
+
+#include "../common.hpp"
+#include "../funcs_test_utils.hpp"
+
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vstore(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(N == 3)
+    {
+        input1_type_str[input1_type_str.size() - 1] = '3';
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(func.is_out_half())
+    {
+        output1_type_str = "half";
+    }
+    return
+        "__kernel void test_" + func.str() + "(global " + input1_type_str + " *input, global " + output1_type_str + " *output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + func.str() + std::to_string(N) + "(input[gid], gid, output);\n"
+        "}\n";
+}
+#else
+template <class func_type, class in_type, class out_type, size_t N>
+std::string generate_kernel_vstore(func_type func)
+{
+    std::string input1_type_str = type_name<in_type>();
+    if(N == 3)
+    {
+        input1_type_str[input1_type_str.size() - 1] = '3';
+    }
+    std::string output1_type_str = type_name<out_type>();
+    if(func.is_out_half())
+    {
+        output1_type_str = "half";
+    }
+    return
+        "" + func.defs() +
+        "" + func.headers() +
+        "#include <opencl_memory>\n"
+        "#include <opencl_work_item>\n"
+        "using namespace cl;\n"
+        "__kernel void test_" + func.str() + "(global_ptr<" + input1_type_str +  "[]> input,"
+                                              "global_ptr<" + output1_type_str + "[]> output)\n"
+        "{\n"
+        "    size_t gid = get_global_id(0);\n"
+        "    " + func.str() + "(input[gid], gid, output.get());\n"
+        "}\n";
+}
+#endif
+
+template<class INPUT, class OUTPUT, class vload_op>
+bool verify_vstore(const std::vector<INPUT> &in, const std::vector<OUTPUT> &out, vload_op op)
+{
+    for(size_t i = 0; i < in.size(); i++)
+    {
+        auto expected = op(in[i]);
+        for(size_t j = 0; j < vload_op::vector_size; j++)
+        {
+            size_t idx = (i * vload_op::vec_alignment) + j;
+            if(!are_equal(expected.s[j], out[idx], op.delta(in[i], expected).s[j], op))
+            {
+                print_error_msg(expected.s[j], out[idx], idx, op);
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+template <class vload_op>
+int test_vstore_func(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, vload_op op)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t work_size[1];
+    int err;
+
+    typedef typename vload_op::in_type INPUT;
+    typedef typename vload_op::out_type OUTPUT;
+
+    // Don't run test for unsupported types
+    if(!(type_supported<INPUT>(device) && type_supported<OUTPUT>(device)))
+    {
+        return CL_SUCCESS;
+    }
+
+    std::string code_str = generate_kernel_vstore<vload_op, INPUT, OUTPUT, vload_op::vector_size>(op);
+    std::string kernel_name("test_"); kernel_name += op.str();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name, "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, kernel_name);
+    RETURN_ON_ERROR(err)
+#endif
+
+    std::vector<INPUT> input = generate_input<INPUT>(count, op.min1(), op.max1(), op.in_special_cases());
+    std::vector<OUTPUT> output = generate_output<OUTPUT>(count * vector_size<INPUT>::value);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(INPUT) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(OUTPUT) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(INPUT) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    work_size[0] = count;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(OUTPUT) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (!verify_vstore(input, output, op))
+    {
+        RETURN_ON_ERROR_MSG(-1, "test_%s %s(%s) failed", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+    }
+    log_info("test_%s %s(%s) passed\n", op.str().c_str(), type_name<OUTPUT>().c_str(), type_name<INPUT>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+template <class T, cl_int N /* Vector size */>
+struct vstore_func : public unary_func<
+                        typename make_vector_type<T, N>::type,
+                        T
+                     >
+{
+    typedef typename make_vector_type<T, N>::type input1_type;
+    typedef typename make_vector_type<T, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N;
+
+    std::string str()
+    {
+        return "vstore";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    result_type operator()(const input1_type& in)
+    {
+        static_assert(
+            !is_vector_type<T>::value,
+            "T must be scalar type"
+        );
+        return in;
+    }
+
+    bool is_out_half()
+    {
+        return false;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vstore_half_func : public unary_func<
+                            typename make_vector_type<cl_float, N>::type,
+                            cl_half
+                          >
+{
+    typedef typename make_vector_type<cl_float, N>::type input1_type;
+    typedef typename make_vector_type<cl_half, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N;
+
+    std::string str()
+    {
+        return "vstore_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    result_type operator()(const input1_type& in)
+    {
+        result_type r;
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = float2half_rte(in.s[i]);
+        }
+        return r;
+    }
+
+    input1_type min1()
+    {
+        return detail::make_value<input1_type>(-512.f);
+    }
+
+    input1_type max1()
+    {
+        return detail::make_value<input1_type>(512.f);
+    }
+
+    bool is_out_half()
+    {
+        return true;
+    }
+};
+
+template <cl_int N /* Vector size */>
+struct vstorea_half_func : public unary_func<
+                            typename make_vector_type<cl_float, N>::type,
+                            cl_half
+                          >
+{
+    typedef typename make_vector_type<cl_float, N>::type input1_type;
+    typedef typename make_vector_type<cl_half, N>::type result_type;
+    const static size_t vector_size = N;
+    const static size_t vec_alignment = N == 3 ? 4 : N;
+
+    std::string str()
+    {
+        return "vstorea_half";
+    }
+
+    std::string headers()
+    {
+        return "#include <opencl_vector_load_store>\n";
+    }
+
+    result_type operator()(const input1_type& in)
+    {
+        result_type r;
+        for(size_t i = 0; i < N; i++)
+        {
+            r.s[i] = float2half_rte(in.s[i]);
+        }
+        return r;
+    }
+
+    input1_type min1()
+    {
+        return detail::make_value<input1_type>(-512.f);
+    }
+
+    input1_type max1()
+    {
+        return detail::make_value<input1_type>(512.f);
+    }
+
+    bool is_out_half()
+    {
+        return true;
+    }
+};
+
+AUTO_TEST_CASE(test_vstore_funcs)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int last_error = CL_SUCCESS;
+
+#define TEST_VSTORE_FUNC_MACRO(CLASS) \
+    last_error = test_vstore_func( \
+        device, context, queue, n_elems, CLASS \
+    ); \
+    CHECK_ERROR(last_error) \
+    error |= last_error;
+
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uint, 2>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uint, 3>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_int, 4>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_float, 8>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_func<cl_uchar, 16>()))
+
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<2>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<3>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<4>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<8>()))
+    TEST_VSTORE_FUNC_MACRO((vstore_half_func<16>()))
+
+    TEST_VSTORE_FUNC_MACRO((vstorea_half_func<2>()))
+    TEST_VSTORE_FUNC_MACRO((vstorea_half_func<3>()))
+
+#undef TEST_VSTORE_FUNC_MACRO
+
+    if(error != CL_SUCCESS)
+    {
+        return -1;
+    }
+    return error;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_VLOAD_VSTORE_FUNCS_VSTORE_FUNCS_HPP
diff --git a/test_conformance/clcpp/workgroups/CMakeLists.txt b/test_conformance/clcpp/workgroups/CMakeLists.txt
new file mode 100644
index 00000000..de99e367
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_WORKGROUPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/workgroups/common.hpp b/test_conformance/clcpp/workgroups/common.hpp
new file mode 100644
index 00000000..ab7b100d
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/common.hpp
@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
+
+#include <string>
+#include <vector>
+#include <limits>
+
+enum class work_group_op : int {
+    add, min, max    
+};
+
+std::string to_string(work_group_op op)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return "add";
+        case work_group_op::min:
+            return "min";
+        case work_group_op::max:
+            return "max";
+        default:
+            break;
+    }
+    return "";
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_input(size_t count, size_t wg_size)
+{
+    std::vector<CL_INT_TYPE> input(count, CL_INT_TYPE(1));      
+    switch (op)
+    {
+        case work_group_op::add:
+            return input;
+        case work_group_op::min:
+            {                
+                size_t j = wg_size;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j--;
+                    if(j == 0)
+                    {
+                        j = wg_size;
+                    }
+                }
+            }
+            break;         
+        case work_group_op::max:          
+            {                
+                size_t j = 0;
+                for(size_t i = 0; i < count; i++)
+                {                
+                    input[i] = static_cast<CL_INT_TYPE>(j);
+                    j++;
+                    if(j == wg_size)
+                    {
+                        j = 0;
+                    }
+                }
+            }
+    }
+    return input;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+std::vector<CL_INT_TYPE> generate_output(size_t count, size_t wg_size)
+{       
+    switch (op)
+    {
+        case work_group_op::add:
+            return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+        case work_group_op::min:  
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::max)());       
+        case work_group_op::max:          
+            return std::vector<CL_INT_TYPE>(count, (std::numeric_limits<CL_INT_TYPE>::min)());
+    }
+    return std::vector<CL_INT_TYPE>(count, CL_INT_TYPE(0));
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_COMMON_HPP
diff --git a/test_conformance/clcpp/workgroups/main.cpp b/test_conformance/clcpp/workgroups/main.cpp
new file mode 100644
index 00000000..72d83e26
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/main.cpp
@@ -0,0 +1,34 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_wg_all.hpp"
+#include "test_wg_any.hpp"
+#include "test_wg_broadcast.hpp"
+#include "test_wg_reduce.hpp"
+#include "test_wg_scan_inclusive.hpp"
+#include "test_wg_scan_exclusive.hpp"
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/workgroups/test_wg_all.hpp b/test_conformance/clcpp/workgroups/test_wg_all.hpp
new file mode 100644
index 00000000..103ce2bf
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_all.hpp
@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_all_kernel_code()
+{
+    return
+        "__kernel void test_wg_all(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_all(input[tid] < input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_all_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_all(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_all(input[tid] < input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+#endif
+
+int verify_wg_all(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group all
+        bool all = true;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(!(in[i+j] < in[i+j+1]))
+            {
+                all = false;
+                break;
+            }
+        }
+
+        // Convert bool to uint
+        cl_uint all_uint = all ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (all_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_all %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(all_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_all(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] < input[tid+1] will
+        // generate false, that means for that workgroups work_group_all()
+        // should return false
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_all(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_all_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_all");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_wg_all(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_all(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_all(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_all failed");
+    }
+    log_info("work_group_all passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_all)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+
+    err = work_group_all(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ALL_HPP
diff --git a/test_conformance/clcpp/workgroups/test_wg_any.hpp b/test_conformance/clcpp/workgroups/test_wg_any.hpp
new file mode 100644
index 00000000..724b3ceb
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_any.hpp
@@ -0,0 +1,218 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_any_kernel_code()
+{
+    return
+        "__kernel void test_wg_any(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    int result = work_group_any(input[tid] == input[tid+1]);\n"
+        "    if(result == 0) {\n        output[tid] = 0;\n        return;\n    }\n"
+        "    output[tid] = 1;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_any_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_any(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    bool result = work_group_any(input[tid] == input[tid+1]);\n"
+           "    if(!result) {\n        output[tid] = 0;\n        return;\n    }\n"
+           "    output[tid] = 1;\n"
+           "}\n";
+}
+#endif
+
+int verify_wg_any(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t count, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < count; i += wg_size)
+    {
+        // Work-group any
+        bool any = false;
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if(in[i+j] == in[i+j+1])
+            {
+                any = true;
+                break;
+            }
+        }
+
+        // Convert bool to uint
+        cl_uint any_uint = any ? 1 : 0;
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((count - i) > wg_size ? wg_size : (count - i)); j++)
+        {
+            if (any_uint != out[i + j])
+            {
+                log_info(
+                    "work_group_any %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<cl_uint>().c_str(),
+                    i + j,
+                    static_cast<size_t>(any_uint),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_any(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(i);
+        // In one place in ~half of workgroups input[tid] == input[tid+1] will
+        // generate true, that means for that workgroups work_group_any()
+        // should return true
+        if((j == wg_size/2) && (i > count/2))
+        {
+            input[i] = input[i - 1];
+        }
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_any(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_any(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_any_kernel_code();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_any");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<cl_uint> input = generate_input_wg_any(flat_work_size + 1, wg_size);
+    std::vector<cl_uint> output = generate_output_wg_any(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_any(input, output, flat_work_size, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_any failed");
+    }
+    log_info("work_group_any passed\n");
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_any)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err = CL_SUCCESS;
+
+    err = work_group_any(device, context, queue, n_elems);
+    CHECK_ERROR(err)
+
+    if(err != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_ANY_HPP
diff --git a/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp
new file mode 100644
index 00000000..4dc5559e
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_broadcast.hpp
@@ -0,0 +1,458 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+std::string generate_wg_broadcast_1D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+std::string generate_wg_broadcast_2D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+}
+std::string generate_wg_broadcast_3D_kernel_code()
+{
+    return
+        "__kernel void test_wg_broadcast(global uint *input, global uint *output)\n"
+        "{\n"
+        "    ulong tid_x = get_global_id(0);\n"
+        "    ulong tid_y = get_global_id(1);\n"
+        "    ulong tid_z = get_global_id(2);\n"
+        "    size_t x = get_group_id(0) % get_local_size(0);\n"
+        "    size_t y = get_group_id(1) % get_local_size(1);\n"
+        "    size_t z = get_group_id(2) % get_local_size(2);\n"
+        "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+        "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+        "    output[idx] = result;\n"
+        "}\n";
+}
+#else
+std::string generate_wg_broadcast_1D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    uint result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+std::string generate_wg_broadcast_2D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t idx = (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+}
+std::string generate_wg_broadcast_3D_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_broadcast(global_ptr<uint[]> input, global_ptr<uint[]> output)\n"
+           "{\n"
+           "    ulong tid_x = get_global_id(0);\n"
+           "    ulong tid_y = get_global_id(1);\n"
+           "    ulong tid_z = get_global_id(2);\n"
+           "    size_t x = get_group_id(0) % get_local_size(0);\n"
+           "    size_t y = get_group_id(1) % get_local_size(1);\n"
+           "    size_t z = get_group_id(2) % get_local_size(2);\n"
+           "    ulong idx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+           "    uint result = work_group_broadcast(input[idx], x, y, z);\n"
+           "    output[idx] = result;\n"
+           "}\n";
+}
+#endif
+
+int
+verify_wg_broadcast_1D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out, size_t n, size_t wg_size)
+{
+    size_t i, j;
+    size_t group_id;
+
+    for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
+    {
+        int local_size = (n-i) > wg_size ? wg_size : (n-i);
+        cl_uint broadcast_result = in[i + (group_id % local_size)];
+        for (j=0; j<local_size; j++)
+        {
+            if ( broadcast_result != out[i+j] )
+            {
+                log_info("work_group_broadcast: Error at %lu: expected = %u, got = %u\n", i+j, broadcast_result, out[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int
+verify_wg_broadcast_2D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny,
+                       size_t wg_size_x, size_t wg_size_y)
+{
+    size_t i, j, _i, _j;
+    size_t group_id_x, group_id_y;
+
+    for (i=0,group_id_y=0; i<ny; i+=wg_size_y,group_id_y++)
+    {
+        size_t y = group_id_y % wg_size_y;
+        size_t local_size_y = (ny-i) > wg_size_y ? wg_size_y : (ny-i);
+        for (_i=0; _i < local_size_y; _i++)
+        {
+            for (j=0,group_id_x=0; j<nx; j+=wg_size_x,group_id_x++)
+            {
+                size_t x = group_id_x % wg_size_x;
+                size_t local_size_x = (nx-j) > wg_size_x ? wg_size_x : (nx-j);
+                cl_uint broadcast_result = in[(i + y) * nx + (j + x)];
+                for (_j=0; _j < local_size_x; _j++)
+                {
+                    size_t indx = (i + _i) * nx + (j + _j);
+                    if ( broadcast_result != out[indx] )
+                    {
+                        log_info("%lu\n", indx);
+                        log_info("%lu\n", ((i + y) * nx + (j + x)));
+                         log_info("%lu\n", out.size());
+                        log_info("work_group_broadcast: Error at (%lu, %lu): expected = %u, got = %u\n", j+_j, i+_i, broadcast_result, out[indx]);
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int
+verify_wg_broadcast_3D(const std::vector<cl_uint> &in, const std::vector<cl_uint> &out,
+                       size_t nx, size_t ny, size_t nz,
+                       size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
+{
+    size_t i, j, k, _i, _j, _k;
+    size_t group_id_x, group_id_y, group_id_z;
+
+    for (i=0,group_id_z=0; i<nz; i+=wg_size_z,group_id_z++)
+    {
+        size_t z = group_id_z % wg_size_z;
+        size_t local_size_z = (nz-i) > wg_size_z ? wg_size_z : (nz-i);
+        for (_i=0; _i < local_size_z; _i++)
+        {
+            for (j=0,group_id_y=0; j<ny; j+=wg_size_y,group_id_y++)
+            {
+                size_t y = group_id_y % wg_size_y;
+                size_t local_size_y = (ny-j) > wg_size_y ? wg_size_y : (ny-j);
+                for (_j=0; _j < local_size_y; _j++)
+                {
+                    for (k=0,group_id_x=0; k<nx; k+=wg_size_x,group_id_x++)
+                    {
+                        size_t x = group_id_x % wg_size_x;
+                        size_t local_size_x = (nx-k) > wg_size_x ? wg_size_x : (nx-k);
+                        cl_uint broadcast_result = in[(i + z) * ny * nz + (j + y) * nx + (k + x)];
+                        for (_k=0; _k < local_size_x; _k++)
+                        {
+                            size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k);
+                            if ( broadcast_result != out[indx] )
+                            {
+                                log_info(
+                                    "work_group_broadcast: Error at (%lu, %lu, %lu): expected = %u, got = %u\n",
+                                    k+_k, j+_j, i+_i,
+                                    broadcast_result, out[indx]);
+                                return -1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+std::vector<cl_uint> generate_input_wg_broadcast(size_t count, size_t wg_size)
+{
+    std::vector<cl_uint> input(count, cl_uint(0));
+    size_t j = wg_size;
+    for(size_t i = 0; i < count; i++)
+    {
+        input[i] = static_cast<cl_uint>(j);
+        j--;
+        if(j == 0)
+        {
+            j = wg_size;
+        }
+    }
+    return input;
+}
+
+std::vector<cl_uint> generate_output_wg_broadcast(size_t count, size_t wg_size)
+{
+    (void) wg_size;
+    return std::vector<cl_uint>(count, cl_uint(1));
+}
+
+int work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, size_t count, size_t dim)
+{
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t flat_wg_size;
+    size_t wg_size[] = { 1, 1, 1};
+    size_t work_size[] = { 1, 1, 1};
+    int err;
+
+    // Get kernel source code
+    std::string code_str;
+    if(dim > 2) code_str = generate_wg_broadcast_3D_kernel_code();
+    else if(dim > 1) code_str = generate_wg_broadcast_2D_kernel_code();
+    else code_str = generate_wg_broadcast_1D_kernel_code();
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_broadcast");
+    RETURN_ON_ERROR(err)
+#endif
+
+    // Get max flat workgroup size
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &flat_wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Set local work size
+    wg_size[0] = flat_wg_size;
+    if(dim > 2)
+    {
+        if (flat_wg_size >=512)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 8;
+        }
+        else if (flat_wg_size >= 64)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 4;
+        }
+        else if (flat_wg_size >= 8)
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 2;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = wg_size[2] = 1;
+        }
+    }
+    else if(dim > 1)
+    {
+        if (flat_wg_size >= 256)
+        {
+            wg_size[0] = wg_size[1] = 16;
+        }
+        else if (flat_wg_size >=64)
+        {
+            wg_size[0] = wg_size[1] = 8;
+        }
+        else if (flat_wg_size >= 16)
+        {
+            wg_size[0] = wg_size[1] = 4;
+        }
+        else
+        {
+            wg_size[0] = wg_size[1] = 1;
+        }
+    }
+
+    // Calculate flat local work size
+    flat_wg_size = wg_size[0];
+    if(dim > 1) flat_wg_size *= wg_size[1];
+    if(dim > 2) flat_wg_size *= wg_size[2];
+
+    // Calculate global work size
+    size_t flat_work_size = count;
+    // 3D
+    if(dim > 2)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 3) / (wg_size[0] * wg_size[1] * wg_size[2]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        work_size[2] = wg_number * wg_size[2];
+        flat_work_size = work_size[0] * work_size[1] * work_size[2];
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count / 2) / (wg_size[0] * wg_size[1]))
+        );
+        work_size[0] = wg_number * wg_size[0];
+        work_size[1] = wg_number * wg_size[1];
+        flat_work_size = work_size[0] * work_size[1];
+    }
+    // 1D
+    else
+    {
+        size_t wg_number = static_cast<size_t>(
+            std::ceil(static_cast<double>(count) / wg_size[0])
+        );
+        flat_work_size = wg_number * wg_size[0];
+        work_size[0] = flat_work_size;
+    }
+
+    std::vector<cl_uint> input = generate_input_wg_broadcast(flat_work_size, flat_wg_size);
+    std::vector<cl_uint> output = generate_output_wg_broadcast(flat_work_size, flat_wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(cl_uint) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, dim, NULL, work_size, wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(cl_uint) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    int result = CL_SUCCESS;
+    // 3D
+    if(dim > 2)
+    {
+        result = verify_wg_broadcast_3D(
+            input, output,
+            work_size[0], work_size[1], work_size[2],
+            wg_size[0], wg_size[1], wg_size[2]
+        );
+    }
+    // 2D
+    else if(dim > 1)
+    {
+        result = verify_wg_broadcast_2D(
+            input, output,
+            work_size[0], work_size[1],
+            wg_size[0], wg_size[1]
+        );
+    }
+    // 1D
+    else
+    {
+        result = verify_wg_broadcast_1D(
+            input, output,
+            work_size[0],
+            wg_size[0]
+        );
+    }
+
+    RETURN_ON_ERROR_MSG(result, "work_group_broadcast_%luD failed", dim);
+    log_info("work_group_broadcast_%luD passed\n", dim);
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_broadcast)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 1);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 2);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_broadcast(device, context, queue, n_elems, 3);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_BROADCAST_HPP
diff --git a/test_conformance/clcpp/workgroups/test_wg_reduce.hpp b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp
new file mode 100644
index 00000000..616cbdb0
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_reduce.hpp
@@ -0,0 +1,331 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+{
+    return
+        "__kernel void test_wg_reduce(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_reduce_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_reduce(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                        "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_reduce<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+        // Work-group sum
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            sum += in[i + j];
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+        // Work-group min
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            min = std::min<CL_INT_TYPE>(min, in[i + j]);
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_reduce_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+        // Work-group max
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+            max = std::max<CL_INT_TYPE>(max, in[i + j]);
+
+        // Check if all work-items in work-group stored correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_reduce_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_reduce(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_reduce_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_reduce_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_reduce_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_reduce(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_reduce_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_reduce");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_reduce<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_reduce_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_reduce_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_reduce_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_reduce<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_reduce<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_REDUCE_HPP
diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp
new file mode 100644
index 00000000..35ec4b13
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_exclusive.hpp
@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+{
+    return
+        "__kernel void test_wg_scan_exclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_exclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_exclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_exclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            sum += in[i + j];
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            min = (std::min)(min, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_exclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_exclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+            max = (std::max)(max, in[i + j]);
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_exclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_exclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_exclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_exclusive_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_exclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_scan_exclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_exclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_scan_exclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_exclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_exclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_exclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_exclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_exclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_EXCLUSIVE_HPP
diff --git a/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp
new file mode 100644
index 00000000..34096ebb
--- /dev/null
+++ b/test_conformance/clcpp/workgroups/test_wg_scan_inclusive.hpp
@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
+#define TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
+
+#include <vector>
+#include <algorithm>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+// Common for tests of work-group functions
+#include "common.hpp"
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+{
+    return
+        "__kernel void test_wg_scan_inclusive(global " + type_name<CL_INT_TYPE>() + " *input, global " + type_name<CL_INT_TYPE>() + " *output)\n"
+        "{\n"
+        "    ulong tid = get_global_id(0);\n"
+        "\n"
+        "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive_" + to_string(op) + "(input[tid]);\n"
+        "    output[tid] = result;\n"
+        "}\n";
+}
+#else
+template <class CL_INT_TYPE, work_group_op op>
+std::string generate_wg_scan_inclusive_kernel_code()
+{
+    return "#include <opencl_memory>\n"
+           "#include <opencl_work_item>\n"
+           "#include <opencl_work_group>\n"
+           "using namespace cl;\n"
+           "__kernel void test_wg_scan_inclusive(global_ptr<" + type_name<CL_INT_TYPE>() + "[]> input, "
+                                                "global_ptr<" + type_name<CL_INT_TYPE>() + "[]> output)\n"
+           "{\n"
+           "    ulong tid = get_global_id(0);\n"
+           "    " + type_name<CL_INT_TYPE>() + " result = work_group_scan_inclusive<work_group_op::" + to_string(op) + ">(input[tid]);\n"
+           "    output[tid] = result;\n"
+           "}\n";
+}
+#endif
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_add(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE sum = 0;
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            sum += in[i + j];
+            if (sum != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_add %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(sum),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_min(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE min = (std::numeric_limits<CL_INT_TYPE>::max)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            min = (std::min)(min, in[i + j]);
+            if (min != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_min %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(min),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE>
+int verify_wg_scan_inclusive_max(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    size_t i, j;
+    for (i = 0; i < in.size(); i += wg_size)
+    {
+        CL_INT_TYPE max = (std::numeric_limits<CL_INT_TYPE>::min)();
+
+        // Check if all work-items in work-group wrote correct value
+        for (j = 0; j < ((in.size() - i) > wg_size ? wg_size : (in.size() - i)); j++)
+        {
+            max = (std::max)(max, in[i + j]);
+            if (max != out[i + j])
+            {
+                log_info(
+                    "work_group_scan_inclusive_max %s: Error at %lu: expected = %lu, got = %lu\n",
+                    type_name<CL_INT_TYPE>().c_str(),
+                    i + j,
+                    static_cast<size_t>(max),
+                    static_cast<size_t>(out[i + j]));
+                return -1;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int verify_wg_scan_inclusive(const std::vector<CL_INT_TYPE> &in, const std::vector<CL_INT_TYPE> &out, size_t wg_size)
+{
+    switch (op)
+    {
+        case work_group_op::add:
+            return verify_wg_scan_inclusive_add(in, out, wg_size);
+        case work_group_op::min:
+            return verify_wg_scan_inclusive_min(in, out, wg_size);
+        case work_group_op::max:
+            return verify_wg_scan_inclusive_max(in, out, wg_size);
+    }
+    return -1;
+}
+
+template <class CL_INT_TYPE, work_group_op op>
+int work_group_scan_inclusive(cl_device_id device, cl_context context, cl_command_queue queue, size_t count)
+{
+    // don't run test for unsupported types
+    if(!type_supported<CL_INT_TYPE>(device))
+    {
+        return CL_SUCCESS;
+    }
+
+    cl_mem buffers[2];
+    cl_program program;
+    cl_kernel kernel;
+    size_t wg_size;
+    size_t work_size[1];
+    int err;
+
+    std::string code_str = generate_wg_scan_inclusive_kernel_code<CL_INT_TYPE, op>();
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+    return err;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive", "-cl-std=CL2.0", false);
+    RETURN_ON_ERROR(err)
+#else
+    err = create_opencl_kernel(context, &program, &kernel, code_str, "test_wg_scan_inclusive");
+    RETURN_ON_ERROR(err)
+#endif
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL);
+    RETURN_ON_CL_ERROR(err, "clGetKernelWorkGroupInfo")
+
+    // Calculate global work size
+    size_t flat_work_size;
+    size_t wg_number = static_cast<size_t>(
+        std::ceil(static_cast<double>(count) / wg_size)
+    );
+    flat_work_size = wg_number * wg_size;
+    work_size[0] = flat_work_size;
+
+    std::vector<CL_INT_TYPE> input = generate_input<CL_INT_TYPE, op>(flat_work_size, wg_size);
+    std::vector<CL_INT_TYPE> output = generate_output<CL_INT_TYPE, op>(flat_work_size, wg_size);
+
+    buffers[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * input.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    buffers[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(CL_INT_TYPE) * output.size(), NULL, &err);
+    RETURN_ON_CL_ERROR(err, "clCreateBuffer");
+
+    err = clEnqueueWriteBuffer(
+        queue, buffers[0], CL_TRUE, 0, sizeof(CL_INT_TYPE) * input.size(),
+        static_cast<void *>(input.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueWriteBuffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(buffers[0]), &buffers[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(buffers[1]), &buffers[1]);
+    RETURN_ON_CL_ERROR(err, "clSetKernelArg");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, &wg_size, 0, NULL, NULL);
+    RETURN_ON_CL_ERROR(err, "clEnqueueNDRangeKernel");
+
+    err = clEnqueueReadBuffer(
+        queue, buffers[1], CL_TRUE, 0, sizeof(CL_INT_TYPE) * output.size(),
+        static_cast<void *>(output.data()), 0, NULL, NULL
+    );
+    RETURN_ON_CL_ERROR(err, "clEnqueueReadBuffer");
+
+    if (verify_wg_scan_inclusive<CL_INT_TYPE, op>(input, output, wg_size) != CL_SUCCESS)
+    {
+        RETURN_ON_ERROR_MSG(-1, "work_group_scan_inclusive_%s %s failed", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+    }
+    log_info("work_group_scan_inclusive_%s %s passed\n", to_string(op).c_str(), type_name<CL_INT_TYPE>().c_str());
+
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return err;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_add)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::add>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_min)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::min>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+AUTO_TEST_CASE(test_work_group_scan_inclusive_max)
+(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int error = CL_SUCCESS;
+    int local_error = CL_SUCCESS;
+
+    local_error = work_group_scan_inclusive<cl_int, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_uint, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_long, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    local_error = work_group_scan_inclusive<cl_ulong, work_group_op::max>(device, context, queue, n_elems);
+    CHECK_ERROR(local_error)
+    error |= local_error;
+
+    if(error != CL_SUCCESS)
+        return -1;
+    return CL_SUCCESS;
+}
+
+#endif // TEST_CONFORMANCE_CLCPP_WG_TEST_WG_SCAN_INCLUSIVE_HPP
diff --git a/test_conformance/clcpp/workitems/CMakeLists.txt b/test_conformance/clcpp/workitems/CMakeLists.txt
new file mode 100644
index 00000000..9aa2cdf3
--- /dev/null
+++ b/test_conformance/clcpp/workitems/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME CPP_WORKITEMS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/clcpp/workitems/main.cpp b/test_conformance/clcpp/workitems/main.cpp
new file mode 100644
index 00000000..412e4ea6
--- /dev/null
+++ b/test_conformance/clcpp/workitems/main.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../common.hpp"
+
+#include "test_workitems.hpp"
+
+
+int main(int argc, const char *argv[])
+{
+    // Get list to all test functions
+    std::vector<basefn> testfn_list = autotest::test_suite::get_test_functions();
+    // Get names of all test functions
+    std::vector<std::string> testfn_names = autotest::test_suite::get_test_names();
+    // Create a vector of pointers to the names test functions
+    std::vector<const char *> testfn_names_c_str = autotest::get_strings_ptrs(testfn_names);
+    return runTestHarness(argc, argv, testfn_list.size(), testfn_list.data(), testfn_names_c_str.data(), false, false, 0);
+}
diff --git a/test_conformance/clcpp/workitems/test_workitems.hpp b/test_conformance/clcpp/workitems/test_workitems.hpp
new file mode 100644
index 00000000..eb5087c5
--- /dev/null
+++ b/test_conformance/clcpp/workitems/test_workitems.hpp
@@ -0,0 +1,418 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP
+#define TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP
+
+#include <vector>
+#include <algorithm>
+#include <random>
+
+// Common for all OpenCL C++ tests
+#include "../common.hpp"
+
+
+namespace test_workitems {
+
+struct test_options
+{
+    bool uniform_work_group_size;
+    size_t max_count;
+    size_t num_tests;
+};
+
+struct output_type
+{
+    cl_uint  work_dim;
+    cl_ulong global_size[3];
+    cl_ulong global_id[3];
+    cl_ulong local_size[3];
+    cl_ulong enqueued_local_size[3];
+    cl_ulong local_id[3];
+    cl_ulong num_groups[3];
+    cl_ulong group_id[3];
+    cl_ulong global_offset[3];
+    cl_ulong global_linear_id;
+    cl_ulong local_linear_id;
+    cl_ulong sub_group_size;
+    cl_ulong max_sub_group_size;
+    cl_ulong num_sub_groups;
+    cl_ulong enqueued_num_sub_groups;
+    cl_ulong sub_group_id;
+    cl_ulong sub_group_local_id;
+};
+
+const std::string source_common = R"(
+struct output_type
+{
+    uint  work_dim;
+    ulong global_size[3];
+    ulong global_id[3];
+    ulong local_size[3];
+    ulong enqueued_local_size[3];
+    ulong local_id[3];
+    ulong num_groups[3];
+    ulong group_id[3];
+    ulong global_offset[3];
+    ulong global_linear_id;
+    ulong local_linear_id;
+    ulong sub_group_size;
+    ulong max_sub_group_size;
+    ulong num_sub_groups;
+    ulong enqueued_num_sub_groups;
+    ulong sub_group_id;
+    ulong sub_group_local_id;
+};
+)";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+const std::string source =
+    source_common +
+    R"(
+        #ifdef cl_khr_subgroups
+        #pragma OPENCL EXTENSION cl_khr_subgroups : enable
+        #endif
+
+        kernel void test(global struct output_type *output)
+        {
+           const ulong gid = get_global_linear_id();
+           output[gid].work_dim = get_work_dim();
+           for (uint dimindx = 0; dimindx < 3; dimindx++)
+           {
+               output[gid].global_size[dimindx] = get_global_size(dimindx);
+               output[gid].global_id[dimindx] = get_global_id(dimindx);
+               output[gid].local_size[dimindx] = get_local_size(dimindx);
+               output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx);
+               output[gid].local_id[dimindx] = get_local_id(dimindx);
+               output[gid].num_groups[dimindx] = get_num_groups(dimindx);
+               output[gid].group_id[dimindx] = get_group_id(dimindx);
+               output[gid].global_offset[dimindx] = get_global_offset(dimindx);
+           }
+           output[gid].global_linear_id = get_global_linear_id();
+           output[gid].local_linear_id = get_local_linear_id();
+        #ifdef cl_khr_subgroups
+           output[gid].sub_group_size = get_sub_group_size();
+           output[gid].max_sub_group_size = get_max_sub_group_size();
+           output[gid].num_sub_groups = get_num_sub_groups();
+           output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+           output[gid].sub_group_id = get_sub_group_id();
+           output[gid].sub_group_local_id = get_sub_group_local_id();
+        #endif
+        }
+    )";
+#else
+const std::string source =
+    R"(
+        #include <opencl_memory>
+        #include <opencl_work_item>
+        using namespace cl;
+    )" +
+    source_common +
+    R"(
+
+        kernel void test(global_ptr<output_type[]> output)
+        {
+           const size_t gid = get_global_linear_id();
+           output[gid].work_dim = get_work_dim();
+           for (uint dimindx = 0; dimindx < 3; dimindx++)
+           {
+               output[gid].global_size[dimindx] = get_global_size(dimindx);
+               output[gid].global_id[dimindx] = get_global_id(dimindx);
+               output[gid].local_size[dimindx] = get_local_size(dimindx);
+               output[gid].enqueued_local_size[dimindx] = get_enqueued_local_size(dimindx);
+               output[gid].local_id[dimindx] = get_local_id(dimindx);
+               output[gid].num_groups[dimindx] = get_num_groups(dimindx);
+               output[gid].group_id[dimindx] = get_group_id(dimindx);
+               output[gid].global_offset[dimindx] = get_global_offset(dimindx);
+           }
+           output[gid].global_linear_id = get_global_linear_id();
+           output[gid].local_linear_id = get_local_linear_id();
+           output[gid].sub_group_size = get_sub_group_size();
+           output[gid].max_sub_group_size = get_max_sub_group_size();
+           output[gid].num_sub_groups = get_num_sub_groups();
+           output[gid].enqueued_num_sub_groups = get_enqueued_num_sub_groups();
+           output[gid].sub_group_id = get_sub_group_id();
+           output[gid].sub_group_local_id = get_sub_group_local_id();
+        }
+
+    )";
+#endif
+
+#define CHECK_EQUAL(result, expected, func_name) \
+    if (result != expected) \
+    { \
+        RETURN_ON_ERROR_MSG(-1, \
+            "Function %s failed. Expected: %s, got: %s", func_name, \
+            format_value(expected).c_str(), format_value(result).c_str() \
+        ); \
+    }
+
+#define CHECK(expression, func_name) \
+    if (expression) \
+    { \
+        RETURN_ON_ERROR_MSG(-1, \
+            "Function %s returned incorrect result", func_name \
+        ); \
+    }
+
+int test_workitems(cl_device_id device, cl_context context, cl_command_queue queue, test_options options)
+{
+    int error = CL_SUCCESS;
+
+    cl_program program;
+    cl_kernel kernel;
+
+    std::string kernel_name = "test";
+
+// -----------------------------------------------------------------------------------
+// ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
+// -----------------------------------------------------------------------------------
+// Only OpenCL C++ to SPIR-V compilation
+#if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+    return error;
+// Use OpenCL C kernels instead of OpenCL C++ kernels (test C++ host code)
+#elif defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name, "-cl-std=CL2.0", false
+    );
+    RETURN_ON_ERROR(error)
+// Normal run
+#else
+    error = create_opencl_kernel(
+        context, &program, &kernel,
+        source, kernel_name
+    );
+    RETURN_ON_ERROR(error)
+#endif
+
+    size_t max_work_group_size;
+    size_t max_local_sizes[3];
+    error = get_max_allowed_work_group_size(context, kernel, &max_work_group_size, max_local_sizes);
+    RETURN_ON_ERROR(error)
+
+    bool check_sub_groups = true;
+    bool check_sub_groups_limits = true;
+#if defined(DEVELOPMENT) && defined(USE_OPENCLC_KERNELS)
+    check_sub_groups = false;
+    check_sub_groups_limits = false;
+    if (is_extension_available(device, "cl_khr_subgroups"))
+    {
+        size_t major, minor;
+        error = get_device_version(device, &major, &minor);
+        RETURN_ON_ERROR(error)
+        check_sub_groups_limits = (major > 2 || major == 2 && minor >= 1); // clGetKernelSubGroupInfo is from 2.1
+        check_sub_groups = true;
+    }
+#endif
+
+    std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<size_t> count_dis(1, options.max_count);
+
+    for (int test = 0; test < options.num_tests; test++)
+    {
+        for (size_t dim = 1; dim <= 3; dim++)
+        {
+            size_t global_size[3] = { 1, 1, 1 };
+            size_t global_offset[3] = { 0, 0, 0 };
+            size_t enqueued_local_size[3] = { 1, 1, 1 };
+            size_t count = count_dis(gen);
+            std::uniform_int_distribution<size_t> global_size_dis(1, static_cast<size_t>(pow(count, 1.0 / dim)));
+            for (int d = 0; d < dim; d++)
+            {
+                std::uniform_int_distribution<size_t> enqueued_local_size_dis(1, max_local_sizes[d]);
+                global_size[d] = global_size_dis(gen);
+                global_offset[d] = global_size_dis(gen);
+                enqueued_local_size[d] = enqueued_local_size_dis(gen);
+            }
+            // Local work size must not exceed CL_KERNEL_WORK_GROUP_SIZE for this kernel
+            while (enqueued_local_size[0] * enqueued_local_size[1] * enqueued_local_size[2] > max_work_group_size)
+            {
+                // otherwise decrease it until it fits
+                for (int d = 0; d < dim; d++)
+                {
+                    enqueued_local_size[d] = (std::max)((size_t)1, enqueued_local_size[d] / 2);
+                }
+            }
+            if (options.uniform_work_group_size)
+            {
+                for (int d = 0; d < dim; d++)
+                {
+                    global_size[d] = get_uniform_global_size(global_size[d], enqueued_local_size[d]);
+                }
+            }
+            count = global_size[0] * global_size[1] * global_size[2];
+
+            cl_mem output_buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(output_type) * count, NULL, &error);
+            RETURN_ON_CL_ERROR(error, "clCreateBuffer")
+
+            const char pattern = 0;
+            error = clEnqueueFillBuffer(queue, output_buffer, &pattern, sizeof(pattern), 0, sizeof(output_type) * count, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueFillBuffer")
+
+            error = clSetKernelArg(kernel, 0, sizeof(output_buffer), &output_buffer);
+            RETURN_ON_CL_ERROR(error, "clSetKernelArg")
+
+            error = clEnqueueNDRangeKernel(queue, kernel, dim, global_offset, global_size, enqueued_local_size, 0, NULL, NULL);
+            RETURN_ON_CL_ERROR(error, "clEnqueueNDRangeKernel")
+
+            std::vector<output_type> output(count);
+            error = clEnqueueReadBuffer(
+                queue, output_buffer, CL_TRUE,
+                0, sizeof(output_type) * count,
+                static_cast<void *>(output.data()),
+                0, NULL, NULL
+            );
+            RETURN_ON_CL_ERROR(error, "clEnqueueReadBuffer")
+
+            error = clReleaseMemObject(output_buffer);
+            RETURN_ON_CL_ERROR(error, "clReleaseMemObject")
+
+            size_t sub_group_count_for_ndrange = 0;
+            size_t max_sub_group_size_for_ndrange = 0;
+            if (check_sub_groups_limits)
+            {
+                error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
+                    sizeof(size_t) * dim, enqueued_local_size,
+                    sizeof(size_t), &sub_group_count_for_ndrange, NULL);
+                RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+
+                error = clGetKernelSubGroupInfo(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
+                    sizeof(size_t) * dim, enqueued_local_size,
+                    sizeof(size_t), &max_sub_group_size_for_ndrange, NULL);
+                RETURN_ON_CL_ERROR(error, "clGetKernelSubGroupInfo")
+            }
+
+            size_t num_groups[3];
+            for (int d = 0; d < 3; d++)
+                num_groups[d] = static_cast<size_t>(std::ceil(static_cast<double>(global_size[d]) / enqueued_local_size[d]));
+
+            size_t group_id[3];
+            for (group_id[0] = 0; group_id[0] < num_groups[0]; group_id[0]++)
+            for (group_id[1] = 0; group_id[1] < num_groups[1]; group_id[1]++)
+            for (group_id[2] = 0; group_id[2] < num_groups[2]; group_id[2]++)
+            {
+                size_t local_size[3];
+                for (int d = 0; d < 3; d++)
+                {
+                    if (group_id[d] == num_groups[d] - 1)
+                        local_size[d] = global_size[d] - group_id[d] * enqueued_local_size[d];
+                    else
+                        local_size[d] = enqueued_local_size[d];
+                }
+
+                size_t local_id[3];
+                for (local_id[0] = 0; local_id[0] < local_size[0]; local_id[0]++)
+                for (local_id[1] = 0; local_id[1] < local_size[1]; local_id[1]++)
+                for (local_id[2] = 0; local_id[2] < local_size[2]; local_id[2]++)
+                {
+                    size_t global_id_wo_offset[3];
+                    size_t global_id[3];
+                    for (int d = 0; d < 3; d++)
+                    {
+                        global_id_wo_offset[d] = group_id[d] * enqueued_local_size[d] + local_id[d];
+                        global_id[d] = global_id_wo_offset[d] + global_offset[d];
+                    }
+
+                    // Ignore if the current work-item is outside of global work size (i.e. the work-group is non-uniform)
+                    if (global_id_wo_offset[0] >= global_size[0] ||
+                        global_id_wo_offset[1] >= global_size[1] ||
+                        global_id_wo_offset[2] >= global_size[2]) break;
+
+                    const size_t global_linear_id =
+                        global_id_wo_offset[2] * global_size[1] * global_size[0] +
+                        global_id_wo_offset[1] * global_size[0] +
+                        global_id_wo_offset[0];
+                    const size_t local_linear_id =
+                        local_id[2] * local_size[1] * local_size[0] +
+                        local_id[1] * local_size[0] +
+                        local_id[0];
+
+                    const output_type &o = output[global_linear_id];
+
+                    CHECK_EQUAL(o.work_dim, dim, "get_work_dim")
+                    for (int d = 0; d < 3; d++)
+                    {
+                        CHECK_EQUAL(o.global_size[d], global_size[d], "get_global_size")
+                        CHECK_EQUAL(o.global_id[d], global_id[d], "get_global_id")
+                        CHECK_EQUAL(o.local_size[d], local_size[d], "get_local_size")
+                        CHECK_EQUAL(o.enqueued_local_size[d], enqueued_local_size[d], "get_enqueued_local_size")
+                        CHECK_EQUAL(o.local_id[d], local_id[d], "get_local_id")
+                        CHECK_EQUAL(o.num_groups[d], num_groups[d], "get_num_groups")
+                        CHECK_EQUAL(o.group_id[d], group_id[d], "get_group_id")
+                        CHECK_EQUAL(o.global_offset[d], global_offset[d], "get_global_offset")
+                    }
+
+                    CHECK_EQUAL(o.global_linear_id, global_linear_id, "get_global_linear_id")
+                    CHECK_EQUAL(o.local_linear_id, local_linear_id, "get_local_linear_id")
+
+                    // A few (but not all possible) sub-groups related checks
+                    if (check_sub_groups)
+                    {
+                        if (check_sub_groups_limits)
+                        {
+                            CHECK_EQUAL(o.max_sub_group_size, max_sub_group_size_for_ndrange, "get_max_sub_group_size")
+                            CHECK_EQUAL(o.enqueued_num_sub_groups, sub_group_count_for_ndrange, "get_enqueued_num_sub_groups")
+                        }
+                        CHECK(o.sub_group_size == 0 || o.sub_group_size > o.max_sub_group_size, "get_sub_group_size or get_max_sub_group_size")
+                        CHECK(o.num_sub_groups == 0 || o.num_sub_groups > o.enqueued_num_sub_groups, "get_enqueued_num_sub_groups")
+                        CHECK(o.sub_group_id >= o.num_sub_groups, "get_sub_group_id or get_num_sub_groups")
+                        CHECK(o.sub_group_local_id >= o.sub_group_size, "get_sub_group_local_id or get_sub_group_size")
+                    }
+                }
+            }
+        }
+    }
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    return error;
+}
+
+#undef CHECK_EQUAL
+#undef CHECK
+
+AUTO_TEST_CASE(test_workitems_uniform)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.uniform_work_group_size = true;
+    options.max_count = num_elements;
+    options.num_tests = 1000;
+    return test_workitems(device, context, queue, options);
+}
+
+AUTO_TEST_CASE(test_workitems_non_uniform)
+(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    test_options options;
+    options.uniform_work_group_size = false;
+    options.max_count = num_elements;
+    options.num_tests = 1000;
+    return test_workitems(device, context, queue, options);
+}
+
+} // namespace
+
+#endif // TEST_CONFORMANCE_CLCPP_WI_TEST_WORKITEMS_HPP
diff --git a/test_conformance/commonfns/CMakeLists.txt b/test_conformance/commonfns/CMakeLists.txt
new file mode 100644
index 00000000..677bca15
--- /dev/null
+++ b/test_conformance/commonfns/CMakeLists.txt
@@ -0,0 +1,33 @@
+set(MODULE_NAME COMMONFNS)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_clamp.c
+    test_degrees.c
+    test_max.c
+    test_maxf.c
+    test_min.c
+    test_minf.c
+    test_mix.c
+    test_radians.c
+    test_step.c
+    test_stepf.c
+    test_smoothstep.c
+    test_smoothstepf.c
+    test_sign.c
+    test_fmax.c
+    test_fmin.c
+    test_fmaxf.c
+    test_fminf.c
+    test_binary_fn.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/commonfns/Jamfile b/test_conformance/commonfns/Jamfile
new file mode 100644
index 00000000..48bdaed3
--- /dev/null
+++ b/test_conformance/commonfns/Jamfile
@@ -0,0 +1,33 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_commonfns
+    : main.c
+      test_binary_fn.c
+      test_clamp.c
+      test_degrees.c
+      test_fmax.c
+      test_fmaxf.c
+      test_fmin.c
+      test_fminf.c
+      test_max.c
+      test_maxf.c
+      test_min.c
+      test_minf.c
+      test_mix.c
+      test_radians.c
+      test_sign.c
+      test_smoothstep.c
+      test_smoothstepf.c
+      test_step.c
+      test_stepf.c
+    ;
+
+install dist
+    : test_commonfns
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/commonfns
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/commonfns
+    ;
diff --git a/test_conformance/commonfns/Makefile b/test_conformance/commonfns/Makefile
new file mode 100644
index 00000000..474066f7
--- /dev/null
+++ b/test_conformance/commonfns/Makefile
@@ -0,0 +1,45 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c test_clamp.c test_degrees.c \
+			test_max.c test_maxf.c test_min.c test_minf.c \
+            test_mix.c test_radians.c test_step.c test_stepf.c\
+            test_smoothstep.c test_smoothstepf.c test_sign.c \
+			test_fmax.c test_fmin.c test_fmaxf.c test_fminf.c test_binary_fn.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+                  ../../test_common/harness/conversions.c \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/kernelHelpers.c 
+
+SOURCES = $(abspath $(SRCS))
+
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_commonfns
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${RC_CFLAGS} ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/commonfns/main.c b/test_conformance/commonfns/main.c
new file mode 100644
index 00000000..44b0b510
--- /dev/null
+++ b/test_conformance/commonfns/main.c
@@ -0,0 +1,87 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+
+int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
+int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
+
+static void initVecSizes() {
+    int i;
+    for(i = 0; i < kVectorSizeCount; ++i) {
+    g_arrVecSizes[i] = (1<<i);
+    }
+    for(; i < kVectorSizeCount + kStrangeVectorSizeCount; ++i) {
+    g_arrVecSizes[i] = g_arrStrangeVectorSizes[i-kVectorSizeCount];
+    }
+}
+
+
+basefn    commonfn_list[] = {
+                test_clamp,
+                test_degrees,
+                test_fmax,
+                test_fmaxf,
+                test_fmin,
+                test_fminf,
+                test_max,
+                test_maxf,
+                test_min,
+                test_minf,
+                test_mix,
+                test_radians,
+                test_step,
+                test_stepf,
+                test_smoothstep,
+                test_smoothstepf,
+                test_sign,
+};
+
+const char *commonfn_names[] = {
+    "clamp",
+    "degrees",
+    "fmax",
+    "fmaxf",
+    "fmin",
+    "fminf",
+    "max",
+    "maxf",
+    "min",
+    "minf",
+    "mix",
+    "radians",
+    "step",
+    "stepf",
+    "smoothstep",
+    "smoothstepf",
+    "sign",
+};
+
+ct_assert((sizeof(commonfn_names) / sizeof(commonfn_names[0])) == (sizeof(commonfn_list) / sizeof(commonfn_list[0])));
+
+int    num_commonfns = sizeof(commonfn_names) / sizeof(char *);
+
+int
+main(int argc, const char *argv[])
+{
+    initVecSizes();
+    return runTestHarness( argc, argv, num_commonfns, commonfn_list, commonfn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/commonfns/procs.h b/test_conformance/commonfns/procs.h
new file mode 100644
index 00000000..1c6e9dab
--- /dev/null
+++ b/test_conformance/commonfns/procs.h
@@ -0,0 +1,54 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+#define kVectorSizeCount 5
+#define kStrangeVectorSizeCount 1
+#define kTotalVecCount (kVectorSizeCount + kStrangeVectorSizeCount)
+
+extern int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
+// int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
+
+extern int        test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_max(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_step(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+
+typedef int     (*binary_verify_float_fn)( float *x, float *y, float *out, int numElements, int vecSize );
+typedef int     (*binary_verify_double_fn)( double *x, double *y, double *out, int numElements, int vecSize );
+
+extern int      test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
+                           const char *fnName, bool vectorSecondParam,
+                           binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn );
+
+
diff --git a/test_conformance/commonfns/test_binary_fn.c b/test_conformance/commonfns/test_binary_fn.c
new file mode 100644
index 00000000..f0a1109f
--- /dev/null
+++ b/test_conformance/commonfns/test_binary_fn.c
@@ -0,0 +1,260 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+const char *binary_fn_code_pattern =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = %s(x[tid], y[tid]);\n"
+"}\n";
+
+const char *binary_fn_code_pattern_v3 =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(%s(vload3(tid,x), vload3(tid,y) ), tid, dst);\n"
+"}\n";
+
+const char *binary_fn_code_pattern_v3_scalar =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"
+"}\n";
+
+int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
+                    const char *fnName, bool vectorSecondParam,
+                    binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn )
+{
+    cl_mem      streams[6];
+    cl_float      *input_ptr[2], *output_ptr;
+    cl_double     *input_ptr_double[2], *output_ptr_double=NULL;
+    cl_program  *program;
+    cl_kernel   *kernel;
+    size_t threads[1];
+    int num_elements;
+    int err;
+    int i, j;
+    MTdata d;
+
+      program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
+      kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    int test_double = 0;
+    if(is_extension_available( device, "cl_khr_fp64" ))
+    {
+        log_info("Testing doubles.\n");
+        test_double = 1;
+    }
+
+    for( i = 0; i < 2; i++ )
+    {
+        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    }
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+
+    for( i = 0; i < 3; i++ )
+    {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, &err );
+        test_error( err, "clCreateBuffer failed");
+    }
+
+    if (test_double)
+        for( i = 3; i < 6; i++ )
+        {
+          streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, &err );
+          test_error( err, "clCreateBuffer failed");
+        }
+
+    d = init_genrand( gRandomSeed );
+    for( j = 0; j < num_elements; j++ )
+    {
+        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
+        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
+        if (test_double)
+        {
+            input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+        }
+    }
+    free_mtdata(d);     d = NULL;
+
+    for( i = 0; i < 2; i++ )
+    {
+        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
+        test_error( err, "Unable to write input buffer" );
+
+        if (test_double)
+        {
+          err = clEnqueueWriteBuffer( queue, streams[ 3 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
+          test_error( err, "Unable to write input buffer" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        char programSrc[ 10240 ];
+        char vecSizeNames[][ 3 ] = { "", "2", "4", "8", "16", "3" };
+
+        if(i >= kVectorSizeCount) {
+            // do vec3 print
+
+            if(vectorSecondParam) {
+            sprintf( programSrc,binary_fn_code_pattern_v3, "", "float", "float", "float", fnName );
+        } else  {
+            sprintf( programSrc,binary_fn_code_pattern_v3_scalar, "", "float", "float", "float", fnName );
+            }
+        } else  {
+            // do regular
+            sprintf( programSrc, binary_fn_code_pattern, "", "float", vecSizeNames[ i ], "float", vectorSecondParam ? vecSizeNames[ i ] : "", "float", vecSizeNames[ i ], fnName );
+        }
+        const char *ptr = programSrc;
+        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &ptr, "test_fn" );
+        test_error( err, "Unable to create kernel" );
+
+        if (test_double)
+        {
+        if(i >= kVectorSizeCount) {
+        if(vectorSecondParam) {
+            sprintf( programSrc, binary_fn_code_pattern_v3, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
+            "double",  "double",  "double",  fnName );
+        } else {
+
+        sprintf( programSrc, binary_fn_code_pattern_v3_scalar, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
+                 "double",  "double",  "double",  fnName );
+        }
+        } else {
+        sprintf( programSrc, binary_fn_code_pattern, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
+            "double", vecSizeNames[ i ], "double", vectorSecondParam ? vecSizeNames[ i ] : "", "double", vecSizeNames[ i ], fnName );
+        }
+            ptr = programSrc;
+            err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &ptr, "test_fn" );
+            test_error( err, "Unable to create kernel" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        for( j = 0; j < 3; j++ )
+        {
+            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
+            test_error( err, "Unable to set kernel argument" );
+        }
+
+        threads[0] = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( err, "Unable to execute kernel" );
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        test_error( err, "Unable to read results" );
+
+
+
+        if( floatVerifyFn( input_ptr[0], input_ptr[1], output_ptr, n_elems, ((g_arrVecSizes[i])) ) )
+        {
+            log_error(" float%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            err = -1;
+        }
+        else
+        {
+            log_info(" float%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            err = 0;
+        }
+
+        if (err)
+            break;
+    }
+
+    if (test_double)
+    {
+        for( i = 0; i < kTotalVecCount; i++ )
+        {
+            for( j = 0; j < 3; j++ )
+            {
+                err = clSetKernelArg( kernel[ kTotalVecCount + i ], j, sizeof( streams[ 3 + j ] ), &streams[ 3 + j ] );
+                test_error( err, "Unable to set kernel argument" );
+            }
+
+            threads[0] = (size_t)n_elems;
+
+            err = clEnqueueNDRangeKernel( queue, kernel[kTotalVecCount + i], 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( err, "Unable to execute kernel" );
+
+            err = clEnqueueReadBuffer( queue, streams[5], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
+            test_error( err, "Unable to read results" );
+
+            if( doubleVerifyFn( input_ptr_double[0], input_ptr_double[1], output_ptr_double, n_elems, ((g_arrVecSizes[i]))))
+            {
+                log_error(" double%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
+                err = -1;
+            }
+            else
+            {
+                log_info(" double%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
+                err = 0;
+            }
+
+            if (err)
+            break;
+        }
+    }
+
+
+    for( i = 0; i < ((test_double) ? 6 : 3); i++ )
+    {
+        clReleaseMemObject(streams[i]);
+    }
+    for (i=0; i < ((test_double) ? kTotalVecCount * 2 : kTotalVecCount) ; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+      free(program);
+      free(kernel);
+
+    if (test_double)
+    {
+        free(input_ptr_double[0]);
+        free(input_ptr_double[1]);
+        free(output_ptr_double);
+    }
+
+    return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_clamp.c b/test_conformance/commonfns/test_clamp.c
new file mode 100644
index 00000000..9fb46749
--- /dev/null
+++ b/test_conformance/commonfns/test_clamp.c
@@ -0,0 +1,311 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846264338327950288
+#endif
+
+#define CLAMP_KERNEL( type )                        \
+    const char *clamp_##type##_kernel_code =                \
+    EMIT_PRAGMA_DIRECTIVE                        \
+    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
+    "{\n"                                \
+    "    int  tid = get_global_id(0);\n"                \
+    "\n"                                \
+    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
+    "}\n";
+
+#define CLAMP_KERNEL_V( type, size)                    \
+    const char *clamp_##type##size##_kernel_code =            \
+    EMIT_PRAGMA_DIRECTIVE                        \
+    "__kernel void test_clamp(__global " #type #size " *x, __global " #type #size " *minval, __global " #type #size " *maxval, __global " #type #size " *dst)\n" \
+    "{\n"                                \
+    "    int  tid = get_global_id(0);\n"                \
+    "\n"                                \
+    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
+    "}\n";
+
+#define CLAMP_KERNEL_V3( type, size)                    \
+    const char *clamp_##type##size##_kernel_code =            \
+    EMIT_PRAGMA_DIRECTIVE                        \
+    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
+    "{\n"                                \
+    "    int  tid = get_global_id(0);\n"                \
+    "\n"                                \
+    "    vstore3(clamp(vload3(tid, x), vload3(tid,minval), vload3(tid,maxval)), tid, dst);\n"    \
+    "}\n";
+
+#define EMIT_PRAGMA_DIRECTIVE " "
+CLAMP_KERNEL( float )
+CLAMP_KERNEL_V( float, 2 )
+CLAMP_KERNEL_V( float, 4 )
+CLAMP_KERNEL_V( float, 8 )
+CLAMP_KERNEL_V( float, 16 )
+CLAMP_KERNEL_V3( float, 3)
+#undef EMIT_PRAGMA_DIRECTIVE
+
+#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+CLAMP_KERNEL( double )
+CLAMP_KERNEL_V( double, 2 )
+CLAMP_KERNEL_V( double, 4 )
+CLAMP_KERNEL_V( double, 8 )
+CLAMP_KERNEL_V( double, 16 )
+CLAMP_KERNEL_V3( double, 3 )
+#undef EMIT_PRAGMA_DIRECTIVE
+
+const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, clamp_float16_kernel_code, clamp_float3_kernel_code };
+const char *clamp_double_codes[] = { clamp_double_kernel_code, clamp_double2_kernel_code, clamp_double4_kernel_code, clamp_double8_kernel_code, clamp_double16_kernel_code, clamp_double3_kernel_code };
+
+static int verify_clamp(float *x, float *minval, float *maxval, float *outptr, int n)
+{
+    float       t;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        t = fminf( fmaxf( x[ i ], minval[ i ] ), maxval[ i ] );
+        if (t != outptr[i])
+        {
+            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int verify_clamp_double(double *x, double *minval, double *maxval, double *outptr, int n)
+{
+    double       t;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        t = fmin( fmax( x[ i ], minval[ i ] ), maxval[ i ] );
+        if (t != outptr[i])
+        {
+            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int
+test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem      streams[8];
+    cl_float      *input_ptr[3], *output_ptr;
+    cl_double     *input_ptr_double[3], *output_ptr_double = NULL;
+    cl_program  *program;
+    cl_kernel   *kernel;
+    size_t threads[1];
+    int num_elements;
+    int err;
+    int i, j;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
+
+    num_elements = n_elems * (1 << (kVectorSizeCount-1));
+
+    int test_double = 0;
+    if(is_extension_available( device, "cl_khr_fp64" )) {
+    log_info("Testing doubles.\n");
+      test_double = 1;
+    }
+
+
+    // why does this go from 0 to 2?? -- Oh, I see, there are four function
+    // arguments to the function, and 3 of them are inputs?
+    for( i = 0; i < 3; i++ )
+    {
+        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    }
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+
+    // why does this go from 0 to 3?
+    for( i = 0; i < 4; i++ )
+    {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+    }
+    if (test_double)
+    for( i = 4; i < 8; i++ )
+        {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+        if (!streams[0])
+            {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+            }
+        }
+
+    d = init_genrand( gRandomSeed );
+    for( j = 0; j < num_elements; j++ )
+    {
+        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
+        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
+        input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x20000000, d);
+
+        if (test_double) {
+        input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+        input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+        input_ptr_double[2][j] = get_random_double(input_ptr_double[1][j], 0x20000000, d);
+        }
+    }
+    free_mtdata(d); d = NULL;
+
+    for( i = 0; i < 3; i++ )
+    {
+        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
+        test_error( err, "Unable to write input buffer" );
+
+        if (test_double) {
+        err = clEnqueueWriteBuffer( queue, streams[ 4 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
+        test_error( err, "Unable to write input buffer" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &clamp_float_codes[ i ], "test_clamp" );
+        test_error( err, "Unable to create kernel" );
+
+        log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], i);
+        fflush(stdout);
+
+        if (test_double) {
+        err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &clamp_double_codes[ i ], "test_clamp" );
+        log_info("Just made a program for double, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], kTotalVecCount+i);
+        fflush(stdout);
+        test_error( err, "Unable to create kernel" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        for( j = 0; j < 4; j++ )
+        {
+            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
+            test_error( err, "Unable to set kernel argument" );
+        }
+
+        threads[0] = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( err, "Unable to execute kernel" );
+
+        err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        test_error( err, "Unable to read results" );
+
+        if (verify_clamp(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        {
+            log_error("CLAMP float%d test failed\n", ((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("CLAMP float%d test passed\n", ((g_arrVecSizes[i])));
+            err = 0;
+        }
+
+
+
+        if (err)
+        break;
+    }
+
+    // If the device supports double precision then test that
+    if (test_double)
+    {
+        for( ; i < 2*kTotalVecCount; i++ )
+        {
+
+            log_info("Start of test_double loop, i is %d\n", i);
+            for( j = 0; j < 4; j++ )
+            {
+                err = clSetKernelArg( kernel[i], j, sizeof( streams[j+4] ), &streams[j+4] );
+                test_error( err, "Unable to set kernel argument" );
+            }
+
+            threads[0] = (size_t)n_elems;
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( err, "Unable to execute kernel" );
+
+            err = clEnqueueReadBuffer( queue, streams[7], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
+            test_error( err, "Unable to read results" );
+
+            if (verify_clamp_double(input_ptr_double[0], input_ptr_double[1], input_ptr_double[2], output_ptr_double, n_elems*g_arrVecSizes[(i-kTotalVecCount)]))
+            {
+                log_error("CLAMP double%d test failed\n", g_arrVecSizes[(i-kTotalVecCount)]);
+                err = -1;
+            }
+            else
+            {
+                log_info("CLAMP double%d test passed\n", g_arrVecSizes[(i-kTotalVecCount)]);
+                err = 0;
+            }
+
+            if (err)
+            break;
+        }
+    }
+
+
+    for( i = 0; i < ((test_double) ? 8 : 4); i++ )
+    {
+        clReleaseMemObject(streams[i]);
+    }
+    for (i=0; i < ((test_double) ? kTotalVecCount * 2-1 : kTotalVecCount); i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    free(program);
+    free(kernel);
+    if (test_double) {
+        free(input_ptr_double[0]);
+        free(input_ptr_double[1]);
+        free(input_ptr_double[2]);
+        free(output_ptr_double);
+    }
+
+    return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_degrees.c b/test_conformance/commonfns/test_degrees.c
new file mode 100644
index 00000000..f3871fc9
--- /dev/null
+++ b/test_conformance/commonfns/test_degrees.c
@@ -0,0 +1,472 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846264338327950288
+#endif
+
+static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+const char *degrees_kernel_code =
+"__kernel void test_degrees(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees2_kernel_code =
+"__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees4_kernel_code =
+"__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees8_kernel_code =
+"__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees16_kernel_code =
+"__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees3_kernel_code =
+"__kernel void test_degrees3(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static int
+verify_degrees(float *inptr, float *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (180.0 / M_PI) * inptr[i];
+        error = Ulp_Error( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+int
+test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[2];
+    size_t threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata        d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code, "test_degrees" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code, "test_degrees2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code, "test_degrees4" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code, "test_degrees8" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code, "test_degrees16" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code, "test_degrees3" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+
+        // Line below is troublesome...
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    if( err )
+        return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64" ) )
+    {
+        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
+        return 0;
+    }
+
+    return test_degrees_double( device, context, queue, n_elems);
+}
+
+#pragma mark -
+
+const char *degrees_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static int
+verify_degrees_double(double *inptr, double *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
+        error = Ulp_Error_Double( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "degreesd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+static int
+test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_double    *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[2];
+    size_t threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata        d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    // TODO: line below is clearly wrong
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code_double, "test_degrees_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code_double, "test_degrees2_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code_double, "test_degrees4_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code_double, "test_degrees8_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code_double, "test_degrees16_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code_double, "test_degrees3_double" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+
+        // Line below is troublesome...
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/commonfns/test_fmax.c b/test_conformance/commonfns/test_fmax.c
new file mode 100644
index 00000000..0605eff9
--- /dev/null
+++ b/test_conformance/commonfns/test_fmax.c
@@ -0,0 +1,235 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmax_kernel_code =
+    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax2_kernel_code =
+    "__kernel void test_fmax2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax4_kernel_code =
+    "__kernel void test_fmax4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax8_kernel_code =
+    "__kernel void test_fmax8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax16_kernel_code =
+    "__kernel void test_fmax16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+
+static const char *fmax3_kernel_code =
+    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmax(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
+    "}\n";
+
+static int
+verify_fmax(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = (inptrA[i] >= inptrB[i]) ? inptrA[i] : inptrB[i];
+        if (r != outptr[i])
+        return -1;
+    }
+
+    return 0;
+}
+
+int
+test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float     *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000,d );
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
+    if (err)
+    return -1;
+
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        {
+            log_error("FMAX float%d test failed\n", (g_arrVecSizes[i]));
+            err = -1;
+        }
+        else
+        {
+            log_info("FMAX float%d test passed\n", (g_arrVecSizes[i]));
+            err = 0;
+        }
+
+        if (err)
+        break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_fmaxf.c b/test_conformance/commonfns/test_fmaxf.c
new file mode 100644
index 00000000..bba7f2ce
--- /dev/null
+++ b/test_conformance/commonfns/test_fmaxf.c
@@ -0,0 +1,243 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmax_kernel_code =
+    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax2_kernel_code =
+    "__kernel void test_fmax2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax4_kernel_code =
+    "__kernel void test_fmax4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax8_kernel_code =
+    "__kernel void test_fmax8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax16_kernel_code =
+    "__kernel void test_fmax16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax3_kernel_code =
+    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmax(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
+    "}\n";
+
+static int
+verify_fmax(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
+{
+    float       r;
+    int         i, j;
+
+    for (i=0; i<n; ) {
+        int ii = i/veclen;
+        for (j=0; j<veclen && i<n; ++j, ++i) {
+            r = (inptrA[i] >= inptrB[ii]) ? inptrA[i] : inptrB[ii];
+            if (r != outptr[i]) {
+                log_info("Verify noted discrepancy at %d (of %d) (vec %d, pos %d)\n",
+                         i,n,ii,j);
+                log_info("SHould be %f, is %f\n", r, outptr[i]);
+                log_info("Taking max of (%f,%f)\n", inptrA[i], inptrB[i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int
+test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float    *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        {
+            p[i] = get_random_float(-0x20000000, 0x20000000, d);
+        }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        {
+            p[i] = get_random_float(-0x20000000, 0x20000000, d);
+        }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
+                                (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+        {
+            log_error("clWriteArray failed\n");
+            return -1;
+        }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
+                                (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+        {
+            log_error("clWriteArray failed\n");
+            return -1;
+        }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i < kTotalVecCount; i++)
+        {
+            err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+            err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+            err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+            if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+        }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i < kTotalVecCount; i++)
+        {
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+
+            err = clEnqueueReadBuffer(queue, streams[2], true, 0, sizeof(cl_float)*num_elements,
+                                      output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueReadBuffer failed\n");
+                    return -1;
+                }
+
+            if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
+                {
+                    log_error("FMAX float%d,float test failed\n", (g_arrVecSizes[i]));
+                    err = -1;
+                }
+            else
+                {
+                    log_info("FMAX float%d,float test passed\n", (g_arrVecSizes[i]));
+                    err = 0;
+                }
+
+            if (err)
+                break;
+        }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i < kTotalVecCount; i++)
+        {
+            clReleaseKernel(kernel[i]);
+            clReleaseProgram(program[i]);
+        }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_fmin.c b/test_conformance/commonfns/test_fmin.c
new file mode 100644
index 00000000..20baed62
--- /dev/null
+++ b/test_conformance/commonfns/test_fmin.c
@@ -0,0 +1,239 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmin_kernel_code =
+    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin2_kernel_code =
+    "__kernel void test_fmin2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin4_kernel_code =
+    "__kernel void test_fmin4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin8_kernel_code =
+    "__kernel void test_fmin8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin16_kernel_code =
+    "__kernel void test_fmin16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+
+static const char *fmin3_kernel_code =
+    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmin(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
+    "}\n";
+
+int
+verify_fmin(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = (inptrA[i] > inptrB[i]) ? inptrB[i] : inptrA[i];
+        if (r != outptr[i])
+        return -1;
+    }
+
+    return 0;
+}
+
+int
+test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float    *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
+    if (err)
+    return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        {
+            log_error("FMIN float%d test failed\n", (g_arrVecSizes[i]));
+            err = -1;
+        }
+        else
+        {
+            log_info("FMIN float%d test passed\n", (g_arrVecSizes[i]));
+            err = 0;
+        }
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_fminf.c b/test_conformance/commonfns/test_fminf.c
new file mode 100644
index 00000000..47abfd17
--- /dev/null
+++ b/test_conformance/commonfns/test_fminf.c
@@ -0,0 +1,237 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmin_kernel_code =
+    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin2_kernel_code =
+    "__kernel void test_fmin2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin4_kernel_code =
+    "__kernel void test_fmin4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin8_kernel_code =
+    "__kernel void test_fmin8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin16_kernel_code =
+    "__kernel void test_fmin16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin3_kernel_code =
+    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmin(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
+    "}\n";
+
+static int
+verify_fmin(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
+{
+    float       r;
+    int         i, j;
+
+    for (i=0; i<n; ) {
+    int ii = i/veclen;
+    for (j=0; j<veclen && i<n; ++j, ++i) {
+        r = (inptrA[i] > inptrB[ii]) ? inptrB[ii] : inptrA[i];
+        if (r != outptr[i])
+        return -1;
+    }
+    }
+
+    return 0;
+}
+
+int
+test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float     *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata      d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
+    if (err)
+    return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
+        {
+            log_error("fmin float%d,float test failed\n", (g_arrVecSizes[i]));
+            err = -1;
+        }
+        else
+        {
+            log_info("fmin float%d,float test passed\n", (g_arrVecSizes[i]));
+            err = 0;
+        }
+
+        if (err)
+        break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_max.c b/test_conformance/commonfns/test_max.c
new file mode 100644
index 00000000..cdb49c59
--- /dev/null
+++ b/test_conformance/commonfns/test_max.c
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        float v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] )
+        {
+            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
+                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        double v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] )
+        {
+            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
+                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int test_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "max", true, max_verify_float, max_verify_double );
+}
+
+
diff --git a/test_conformance/commonfns/test_maxf.c b/test_conformance/commonfns/test_maxf.c
new file mode 100644
index 00000000..b3992e46
--- /dev/null
+++ b/test_conformance/commonfns/test_maxf.c
@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            float v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if( v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            double v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if(    v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "max", false, max_verify_float, max_verify_double );
+}
+
+
diff --git a/test_conformance/commonfns/test_min.c b/test_conformance/commonfns/test_min.c
new file mode 100644
index 00000000..e93b39f0
--- /dev/null
+++ b/test_conformance/commonfns/test_min.c
@@ -0,0 +1,56 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        float v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] ) {
+      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+    }
+    }
+    return 0;
+}
+
+static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        double v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] ) {
+      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+    }
+    }
+    return 0;
+}
+
+int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "min", true, min_verify_float, min_verify_double );
+}
+
+
diff --git a/test_conformance/commonfns/test_minf.c b/test_conformance/commonfns/test_minf.c
new file mode 100644
index 00000000..16f38e4f
--- /dev/null
+++ b/test_conformance/commonfns/test_minf.c
@@ -0,0 +1,70 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            float v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if( v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    int maxFail = 1;
+    int numFails = 0;
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            double v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if(    v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                ++numFails;
+                if(numFails >= maxFail) {
+                return -1;
+            }
+        }
+    }
+    }
+    return 0;
+}
+
+int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "min", false, min_verify_float, min_verify_double );
+}
+
+
diff --git a/test_conformance/commonfns/test_mix.c b/test_conformance/commonfns/test_mix.c
new file mode 100644
index 00000000..b7beb822
--- /dev/null
+++ b/test_conformance/commonfns/test_mix.c
@@ -0,0 +1,195 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+const char *mix_kernel_code =
+"__kernel void test_mix(__global float *srcA, __global float *srcB, __global float *srcC, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mix(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+#define MAX_ERR 1e-3
+
+float
+verify_mix(float *inptrA, float *inptrB, float *inptrC, float *outptr, int n)
+{
+    float       r, delta, max_err = 0.0f;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + ((inptrB[i] - inptrA[i]) * inptrC[i]);
+        delta = fabsf(r - outptr[i]) / r;
+        if(delta > max_err) max_err = delta;
+    }
+    return max_err;
+}
+
+int
+test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[4];
+    cl_float        *input_ptr[3], *output_ptr, *p;
+    cl_program        program;
+    cl_kernel        kernel;
+    void            *values[4];
+    size_t            lengths[1];
+    size_t    threads[1];
+    float            max_err;
+    int                err;
+    int                i;
+    MTdata          d;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] =  (float) genrand_real1(d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = (float) genrand_real1(d);
+    }
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = (float) genrand_real1(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    lengths[0] = strlen(mix_kernel_code);
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &mix_kernel_code, "test_mix" );
+    test_error( err, "Unable to create test kernel" );
+
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    values[3] = streams[3];
+  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2] );
+  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    max_err = verify_mix(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+    if (max_err > MAX_ERR)
+    {
+        log_error("MIX test failed %g max err\n", max_err);
+        err = -1;
+    }
+    else
+    {
+        log_info("MIX test passed %g max err\n", max_err);
+        err = 0;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/commonfns/test_radians.c b/test_conformance/commonfns/test_radians.c
new file mode 100644
index 00000000..0f2dd9d5
--- /dev/null
+++ b/test_conformance/commonfns/test_radians.c
@@ -0,0 +1,470 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846264338327950288
+#endif
+
+static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+const char *radians_kernel_code =
+"__kernel void test_radians(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians2_kernel_code =
+"__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians4_kernel_code =
+"__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians8_kernel_code =
+"__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians16_kernel_code =
+"__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians3_kernel_code =
+"__kernel void test_radians3(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static float
+verify_radians(float *inptr, float *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (M_PI / 180.0) * inptr[i];
+        error = Ulp_Error( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+
+int
+test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void         *values[2];
+    size_t       threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code, "test_radians" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code, "test_radians2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code, "test_radians4" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code, "test_radians8" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code, "test_radians16" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code, "test_radians3" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_radians(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("RADIANS float%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("RADIANS float%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+    if( err )
+        return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64" ) )
+    {
+        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
+        return 0;
+    }
+
+    return test_radians_double( device,  context,  queue,  n_elems);
+}
+
+
+
+#pragma mark -
+
+const char *radians_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians2_double(__global double2 *src, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians4_double(__global double4 *src, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians8_double(__global double8 *src, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians16_double(__global double16 *src, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians3_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static double
+verify_radians_double(double *inptr, double *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (3.14159265358979323846264338327950288L / 180.0L) * inptr[i];
+        error = Ulp_Error_Double( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "radiansd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+
+int
+test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_double     *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void         *values[2];
+    size_t       threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    //TODO: line below is clearly wrong
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_double((float)(-100000.0 * M_PI), (float)(100000.0 * M_PI) ,d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code_double, "test_radians_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code_double, "test_radians2_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code_double, "test_radians4_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code_double, "test_radians8_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code_double, "test_radians16_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code_double, "test_radians3_double" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_radians_double(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("RADIANS double%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("RADIANS double%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
diff --git a/test_conformance/commonfns/test_sign.c b/test_conformance/commonfns/test_sign.c
new file mode 100644
index 00000000..6a3a2c74
--- /dev/null
+++ b/test_conformance/commonfns/test_sign.c
@@ -0,0 +1,440 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int
+test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+const char *sign_kernel_code =
+"__kernel void test_sign(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign2_kernel_code =
+"__kernel void test_sign2(__global float2 *src, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign4_kernel_code =
+"__kernel void test_sign4(__global float4 *src, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign8_kernel_code =
+"__kernel void test_sign8(__global float8 *src, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign16_kernel_code =
+"__kernel void test_sign16(__global float16 *src, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign3_kernel_code =
+"__kernel void test_sign3(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
+"}\n";
+
+
+
+static int
+verify_sign(float *inptr, float *outptr, int n)
+{
+  float       r;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    if (inptr[i] > 0.0f)
+      r = 1.0f;
+    else if (inptr[i] < 0.0f)
+      r = -1.0f;
+    else
+      r = 0.0f;
+    if (r != outptr[i])
+      return -1;
+  }
+
+  return 0;
+}
+
+static const char *fn_names[] = { "SIGN float", "SIGN float2", "SIGN float4", "SIGN float8", "SIGN float16", "SIGN float3" };
+
+int
+test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[2];
+  cl_float    *input_ptr[1], *output_ptr, *p;
+  cl_program  program[kTotalVecCount];
+  cl_kernel   kernel[kTotalVecCount];
+  void        *values[2];
+  size_t  threads[1];
+  int num_elements;
+  int err;
+  int i;
+  MTdata    d;
+
+  num_elements = n_elems * 16;
+
+  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  d = init_genrand( gRandomSeed );
+  p = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x20000000, 0x20000000, d);
+  }
+  free_mtdata(d);   d = NULL;
+
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code, "test_sign" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code, "test_sign2" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code, "test_sign4" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code, "test_sign8" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code, "test_sign16" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code, "test_sign3" );
+  if (err)
+    return -1;
+
+  values[0] = streams[0];
+  values[1] = streams[1];
+  for (i=0; i<kTotalVecCount; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<kTotalVecCount; i++) // change this so we test all
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    if (verify_sign(input_ptr[0], output_ptr, n_elems*(i+1)))
+    {
+      log_error("%s test failed\n", fn_names[i]);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed\n", fn_names[i]);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(output_ptr);
+
+  if(err)
+    return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64"))
+    {
+        log_info( "skipping double test -- cl_khr_fp64 not supported.\n" );
+        return 0;
+    }
+
+    return test_sign_double( device, context, queue, n_elems);
+}
+
+#pragma mark -
+
+const char *sign_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign2_double(__global double2 *src, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign4_double(__global double4 *src, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign8_double(__global double8 *src, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign16_double(__global double16 *src, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign3_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
+"}\n";
+
+
+static int
+verify_sign_double(double *inptr, double *outptr, int n)
+{
+  double       r;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    if (inptr[i] > 0.0)
+      r = 1.0;
+    else if (inptr[i] < 0.0)
+      r = -1.0;
+    else
+      r = 0.0f;
+    if (r != outptr[i])
+      return -1;
+  }
+
+  return 0;
+}
+
+static const char *fn_names_double[] = { "SIGN double", "SIGN double2", "SIGN double4", "SIGN double8", "SIGN double16", "SIGN double3" };
+
+int
+test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[2];
+  cl_double    *input_ptr[1], *output_ptr, *p;
+  cl_program  program[kTotalVecCount];
+  cl_kernel   kernel[kTotalVecCount];
+  void        *values[2];
+  size_t  threads[1];
+  int num_elements;
+  int err;
+  int i;
+  MTdata    d;
+
+  num_elements = n_elems * 16;
+
+  input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+  output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  d = init_genrand( gRandomSeed );
+  p = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+    p[i] = get_random_double(-0x20000000, 0x20000000, d);
+
+  free_mtdata(d);   d = NULL;
+
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code_double, "test_sign_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code_double, "test_sign2_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code_double, "test_sign4_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code_double, "test_sign8_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code_double, "test_sign16_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code_double, "test_sign3_double" );
+  if (err)
+    return -1;
+
+  values[0] = streams[0];
+  values[1] = streams[1];
+  for (i=0; i<kTotalVecCount; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<kTotalVecCount; i++) // this hsould be changed
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    if (verify_sign_double(input_ptr[0], output_ptr, n_elems*(i+1)))
+    {
+      log_error("%s test failed\n", fn_names_double[i]);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed\n", fn_names_double[i]);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(output_ptr);
+
+  return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_smoothstep.c b/test_conformance/commonfns/test_smoothstep.c
new file mode 100644
index 00000000..ec3ff016
--- /dev/null
+++ b/test_conformance/commonfns/test_smoothstep.c
@@ -0,0 +1,278 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *smoothstep_kernel_code =
+"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep2_kernel_code =
+"__kernel void test_smoothstep2(__global float2 *edge0, __global float2 *edge1, __global float2 *x, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep4_kernel_code =
+"__kernel void test_smoothstep4(__global float4 *edge0, __global float4 *edge1, __global float4 *x, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep8_kernel_code =
+"__kernel void test_smoothstep8(__global float8 *edge0, __global float8 *edge1, __global float8 *x, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep16_kernel_code =
+"__kernel void test_smoothstep16(__global float16 *edge0, __global float16 *edge1, __global float16 *x, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep3_kernel_code =
+"__kernel void test_smoothstep3(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(smoothstep(vload3(tid,edge0),vload3(tid,edge1),vload3(tid,x)), tid, dst);\n"
+"}\n";
+
+#define MAX_ERR (1e-5f)
+
+static float
+verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n)
+{
+  float       r, t, delta, max_err = 0.0f;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
+    if (t < 0.0f)
+      t = 0.0f;
+    else if (t > 1.0f)
+      t = 1.0f;
+    r = t * t * (3.0f - 2.0f * t);
+    delta = (float)fabs(r - outptr[i]);
+    if (delta > max_err)
+      max_err = delta;
+  }
+
+  return max_err;
+}
+
+const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4", "SMOOTHSTEP float8", "SMOOTHSTEP float16", "SMOOTHSTEP float3" };
+
+int
+test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[4];
+  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
+  cl_program  program[kTotalVecCount];
+  cl_kernel   kernel[kTotalVecCount];
+  size_t  threads[1];
+  float max_err;
+  int num_elements;
+  int err;
+  int i;
+  MTdata d;
+
+  num_elements = n_elems * 16;
+
+  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[2])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[3])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  p = input_ptr[0];
+  d = init_genrand( gRandomSeed );
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00400000, 0x00400000, d);
+  }
+
+  p = input_ptr[1];
+  p_edge0 = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    float edge0 = p_edge0[i];
+    float edge1;
+    do {
+      edge1 = get_random_float(-0x00400000, 0x00400000, d);
+      if (edge0 < edge1)
+        break;
+    } while (1);
+    p[i] = edge1;
+  }
+
+  p = input_ptr[2];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00400000, 0x00400000, d);
+  }
+  free_mtdata(d);
+  d = NULL;
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &smoothstep8_kernel_code, "test_smoothstep8" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &smoothstep16_kernel_code, "test_smoothstep16" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &smoothstep3_kernel_code, "test_smoothstep3" );
+  if (err)
+    return -1;
+
+  for (i=0; i<kTotalVecCount; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+
+    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * g_arrVecSizes[i]);
+
+    if (max_err > MAX_ERR)
+    {
+      log_error("%s test failed %g max err\n", fn_names[i], max_err);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed %g max err\n", fn_names[i], max_err);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(input_ptr[1]);
+  free(input_ptr[2]);
+  free(output_ptr);
+
+  return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_smoothstepf.c b/test_conformance/commonfns/test_smoothstepf.c
new file mode 100644
index 00000000..7b93ac12
--- /dev/null
+++ b/test_conformance/commonfns/test_smoothstepf.c
@@ -0,0 +1,255 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *smoothstep_kernel_code =
+"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep2_kernel_code =
+"__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep4_kernel_code =
+"__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+#define MAX_ERR (1e-5f)
+
+extern "C" float
+verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n, int veclen)
+{
+  float       r, t, delta, max_err = 0.0f;
+  int         i, j;
+
+  for (i = 0; i < n; ++i) {
+    int vi = i * veclen;
+    for (j = 0; j < veclen; ++j, ++vi) {
+      t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
+      if (t < 0.0f)
+        t = 0.0f;
+      else if (t > 1.0f)
+        t = 1.0f;
+      r = t * t * (3.0f - 2.0f * t);
+      delta = (float)fabs(r - outptr[vi]);
+      if (delta > max_err)
+        max_err = delta;
+    }
+  }
+  return max_err;
+}
+
+const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"};
+
+int
+test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[4];
+  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
+  cl_program  program[3];
+  cl_kernel   kernel[3];
+  size_t  threads[1];
+  float max_err = 0.0f;
+  int num_elements;
+  int err;
+  int i;
+  MTdata d;
+
+  num_elements = n_elems * 4;
+
+  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[2])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[3])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  d = init_genrand( gRandomSeed );
+  p = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00200000, 0x00200000, d);
+  }
+
+  p = input_ptr[1];
+  p_edge0 = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    float edge0 = p_edge0[i];
+    float edge1;
+    do {
+      edge1 = get_random_float( -0x00200000, 0x00200000, d);
+      if (edge0 < edge1)
+        break;
+    } while (1);
+    p[i] = edge1;
+  }
+
+  p = input_ptr[2];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00200000, 0x00200000, d);
+  }
+  free_mtdata(d);
+  d = NULL;
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2f" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4f" );
+  if (err)
+    return -1;
+
+  for (i=0; i<3; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<3; i++)
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 1);
+        break;
+      case 1:
+        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 2);
+        break;
+      case 2:
+        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 4);
+        break;
+    }
+
+    if (max_err > MAX_ERR)
+    {
+      log_error("%s test failed %g max err\n", fn_names[i], max_err);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed %g max err\n", fn_names[i], max_err);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+  for (i=0; i<3; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(input_ptr[1]);
+  free(input_ptr[2]);
+  free(output_ptr);
+
+  return err;
+}
+
+
diff --git a/test_conformance/commonfns/test_step.c b/test_conformance/commonfns/test_step.c
new file mode 100644
index 00000000..b81a0d95
--- /dev/null
+++ b/test_conformance/commonfns/test_step.c
@@ -0,0 +1,541 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int
+test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+const char *step_kernel_code =
+"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step2_kernel_code =
+"__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step4_kernel_code =
+"__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step8_kernel_code =
+"__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step16_kernel_code =
+"__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step3_kernel_code =
+"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
+"}\n";
+
+
+int
+verify_step(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = (inptrB[i] < inptrA[i]) ? 0.0f : 1.0f;
+        if (r != outptr[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+int
+test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem      streams[3];
+    cl_float    *input_ptr[2], *output_ptr, *p;
+  cl_program  program[kTotalVecCount];
+  cl_kernel   kernel[kTotalVecCount];
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+  num_elements = n_elems * 16;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x40000000, 0x40000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x40000000, 0x40000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
+    if (err)
+        return -1;
+  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
+  if (err)
+    return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+  for (i=0; i <kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+  for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        switch (i)
+        {
+            case 0:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems);
+                if (err)
+                    log_error("STEP float test failed\n");
+                else
+                    log_info("STEP float test passed\n");
+                break;
+
+            case 1:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
+                if (err)
+                    log_error("STEP float2 test failed\n");
+                else
+                    log_info("STEP float2 test passed\n");
+                break;
+
+            case 2:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
+                if (err)
+                    log_error("STEP float4 test failed\n");
+                else
+                    log_info("STEP float4 test passed\n");
+                break;
+
+        case 3:
+        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
+        if (err)
+          log_error("STEP float8 test failed\n");
+        else
+          log_info("STEP float8 test passed\n");
+        break;
+
+        case 4:
+        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
+        if (err)
+          log_error("STEP float16 test failed\n");
+        else
+          log_info("STEP float16 test passed\n");
+        break;
+
+        case 5:
+        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
+        if (err)
+          log_error("STEP float3 test failed\n");
+        else
+          log_info("STEP float3 test passed\n");
+        break;
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+  for (i=0; i<kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    if( err )
+        return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64" ))
+        return 0;
+
+    return test_step_double( device, context, queue, n_elems);
+}
+
+
+#pragma mark -
+
+const char *step_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *step3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
+"}\n";
+
+
+int
+verify_step_double(double *inptrA, double *inptrB, double *outptr, int n)
+{
+    double       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
+        if (r != outptr[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+static int
+test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem      streams[3];
+    cl_double    *input_ptr[2], *output_ptr, *p;
+    cl_program  program[kTotalVecCount];
+    cl_kernel   kernel[kTotalVecCount];
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+    num_elements = n_elems * 16;
+
+    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_double(-0x40000000, 0x40000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_double(-0x40000000, 0x40000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        switch (i)
+        {
+            case 0:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems);
+                if (err)
+                    log_error("STEP double test failed\n");
+                else
+                    log_info("STEP double test passed\n");
+                break;
+
+            case 1:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
+                if (err)
+                    log_error("STEP double2 test failed\n");
+                else
+                    log_info("STEP double2 test passed\n");
+                break;
+
+            case 2:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
+                if (err)
+                    log_error("STEP double4 test failed\n");
+                else
+                    log_info("STEP double4 test passed\n");
+                break;
+
+        case 3:
+        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
+        if (err)
+          log_error("STEP double8 test failed\n");
+        else
+          log_info("STEP double8 test passed\n");
+        break;
+
+        case 4:
+        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
+        if (err)
+          log_error("STEP double16 test failed\n");
+        else
+          log_info("STEP double16 test passed\n");
+        break;
+
+        case 5:
+        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
+        if (err)
+          log_error("STEP double3 test failed\n");
+        else
+          log_info("STEP double3 test passed\n");
+        break;
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
diff --git a/test_conformance/commonfns/test_stepf.c b/test_conformance/commonfns/test_stepf.c
new file mode 100644
index 00000000..55b7ee45
--- /dev/null
+++ b/test_conformance/commonfns/test_stepf.c
@@ -0,0 +1,540 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+static const char *step_kernel_code =
+"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step2_kernel_code =
+"__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step4_kernel_code =
+"__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step8_kernel_code =
+"__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step16_kernel_code =
+"__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step3_kernel_code =
+"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
+"}\n";
+
+
+static int
+verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen)
+{
+    float       r;
+    int         i, j;
+
+    for (i=0; i<n; ) {
+        int ii = i/veclen;
+        for (j=0; j<veclen && i<n; ++j, ++i) {
+            r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
+            if (r != outptr[i])
+            {
+                log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem      streams[3];
+    cl_float    *input_ptr[2], *output_ptr, *p;
+    cl_program  program[kTotalVecCount];
+    cl_kernel   kernel[kTotalVecCount];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+    num_elements = n_elems * 16;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x40000000, 0x40000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x40000000, 0x40000000, d);
+    }
+    free_mtdata(d);   d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
+    if (err)
+        return -1;
+
+    for (i=0; i <kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        switch (i)
+        {
+            case 0:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
+                if (err)
+                    log_error("STEP float test failed\n");
+                else
+                    log_info("STEP float test passed\n");
+                break;
+
+            case 1:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
+                if (err)
+                    log_error("STEP float2 test failed\n");
+                else
+                    log_info("STEP float2 test passed\n");
+                break;
+
+            case 2:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
+                if (err)
+                    log_error("STEP float4 test failed\n");
+                else
+                    log_info("STEP float4 test passed\n");
+                break;
+
+            case 3:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
+                if (err)
+                    log_error("STEP float8 test failed\n");
+                else
+                    log_info("STEP float8 test passed\n");
+                break;
+
+            case 4:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
+                if (err)
+                    log_error("STEP float16 test failed\n");
+                else
+                    log_info("STEP float16 test passed\n");
+                break;
+
+            case 5:
+                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
+                if (err)
+                    log_error("STEP float3 test failed\n");
+                else
+                    log_info("STEP float3 test passed\n");
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    if(err)
+        return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64" ))
+    {
+        log_info( "Device does not support cl_khr_fp64.  Skipping double precision tests.\n" );
+        return 0;
+    }
+
+    return test_stepf_double( device, context, queue, n_elems);
+}
+
+#pragma mark -
+
+static const char *step_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step2_double(__global double *srcA, __global double2 *srcB, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step4_double(__global double *srcA, __global double4 *srcB, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step8_double(__global double *srcA, __global double8 *srcB, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step16_double(__global double *srcA, __global double16 *srcB, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+static const char *step3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
+"}\n";
+
+
+static int
+verify_step_double(cl_double *inptrA, cl_double *inptrB, cl_double *outptr, int n, int veclen)
+{
+    double r;
+    int    i, j;
+
+    for (i=0; i<n; ) {
+        int ii = i/veclen;
+        for (j=0; j<veclen && i<n; ++j, ++i) {
+            r = (inptrB[i] < inptrA[ii]) ? 0.0 : 1.0;
+            if (r != outptr[i])
+            {
+                log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem      streams[3];
+    cl_double  *input_ptr[2], *output_ptr, *p;
+    cl_program  program[kTotalVecCount];
+    cl_kernel   kernel[kTotalVecCount];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata    d;
+    num_elements = n_elems * 16;
+
+    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_double(-0x40000000, 0x40000000, d);
+
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_double(-0x40000000, 0x40000000, d);
+
+    free_mtdata(d);   d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
+    if (err)
+        return -1;
+
+    for (i=0; i <kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        switch (i)
+        {
+            case 0:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
+                if (err)
+                    log_error("STEP double test failed\n");
+                else
+                    log_info("STEP double test passed\n");
+                break;
+
+            case 1:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
+                if (err)
+                    log_error("STEP double2 test failed\n");
+                else
+                    log_info("STEP double2 test passed\n");
+                break;
+
+            case 2:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
+                if (err)
+                    log_error("STEP double4 test failed\n");
+                else
+                    log_info("STEP double4 test passed\n");
+                break;
+
+            case 3:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
+                if (err)
+                    log_error("STEP double8 test failed\n");
+                else
+                    log_info("STEP double8 test passed\n");
+                break;
+
+            case 4:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
+                if (err)
+                    log_error("STEP double16 test failed\n");
+                else
+                    log_info("STEP double16 test passed\n");
+                break;
+
+            case 5:
+                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
+                if (err)
+                    log_error("STEP double3 test failed\n");
+                else
+                    log_info("STEP double3 test passed\n");
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
diff --git a/test_conformance/compatibility/CMakeLists.txt b/test_conformance/compatibility/CMakeLists.txt
new file mode 100644
index 00000000..fd375068
--- /dev/null
+++ b/test_conformance/compatibility/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(test_conformance)
diff --git a/test_conformance/compatibility/test_common/Makefile b/test_conformance/compatibility/test_common/Makefile
new file mode 100644
index 00000000..b57d19c8
--- /dev/null
+++ b/test_conformance/compatibility/test_common/Makefile
@@ -0,0 +1,26 @@
+
+PRODUCTS = harness/\
+
+# utils/
+ 
+TOP=$(shell pwd)
+
+all: $(PRODUCTS)
+
+clean:
+	@for testdir in $(dir $(PRODUCTS))  ; \
+		do ( \
+			echo "==================================================================================" ; \
+			echo "Cleaning $$testdir" ; \
+			echo "==================================================================================" ; \
+			cd $$testdir && make clean \
+			); \
+		done \
+
+$(PRODUCTS): 
+	@echo "==================================================================================" ;
+	@echo "(`date "+%H:%M:%S"`) Make $@" ;
+	@echo "==================================================================================" ;
+	cd $(dir $@) && make
+
+.PHONY: clean $(PRODUCTS)  all
diff --git a/test_conformance/compatibility/test_common/gl/gl_headers.h b/test_conformance/compatibility/test_common/gl/gl_headers.h
new file mode 100644
index 00000000..85adbac4
--- /dev/null
+++ b/test_conformance/compatibility/test_common/gl/gl_headers.h
@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _gl_headers_h
+#define _gl_headers_h
+
+#if defined( __APPLE__ )
+    #include <OpenGL/OpenGL.h>
+#if defined(CGL_VERSION_1_3)
+    #include <OpenGL/gl3.h>
+    #include <OpenGL/gl3ext.h>
+#else
+    #include <OpenGL/gl.h>
+    #include <OpenGL/glext.h>
+#endif
+    #include <GLUT/glut.h>
+#else
+#ifdef _WIN32
+    #include <windows.h>
+#endif
+     #include <GL/glew.h>
+    #include <GL/gl.h>
+     #include <GL/glext.h>
+#ifdef _WIN32
+    #include <GL/glut.h>
+#else
+    #include <GL/freeglut.h>
+#endif
+
+#endif
+
+#ifdef _WIN32
+    GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
+    // No glutGetProcAddress in the standard glut v3.7.
+    #define glutGetProcAddress(procName) wglGetProcAddress(procName)
+#endif
+
+
+#endif    // __gl_headers_h
+
diff --git a/test_conformance/compatibility/test_common/gl/helpers.cpp b/test_conformance/compatibility/test_common/gl/helpers.cpp
new file mode 100644
index 00000000..1cc5bcb1
--- /dev/null
+++ b/test_conformance/compatibility/test_common/gl/helpers.cpp
@@ -0,0 +1,1622 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "helpers.h"
+
+// convert_float_to_half and convert_half_to_float may be found in test_conformance/images/image_helpers.cpp
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+#endif
+
+#if defined(__linux__)
+// On linux we dont link to GLU library to avoid comaptibility issues with
+// libstdc++
+// FIXME: Implement this
+const GLubyte* gluErrorString (GLenum error)
+{
+    const char* gl_Error = "OpenGL Error";
+    return (const GLubyte*)gl_Error;
+}
+#endif
+
+void * CreateGLTexture1DArray(size_t width, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d)
+{
+  *outError = 0;
+  GLenum err = 0;
+
+  // width_in_pixels * pixel_width * number_of_images:
+  char* buffer = (char *)CreateRandomData(type, width * length * 4, d);
+
+  glGenTextures( 1, outTextureID );
+  glBindTexture( get_base_gl_target( target ), *outTextureID );
+  err = glGetError();
+  if( err != GL_NO_ERROR ) {
+    log_error( "ERROR: Failed to create GL texture object: %s!\n", gluErrorString( err ));
+    *outError = -1;
+    free( buffer );
+    return NULL;
+  }
+
+  glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+  // use TexImage2D to pump the 1D array fill of bits:
+  glTexImage2D( get_base_gl_target(target), 0, internalFormat, (GLsizei)width,
+    (GLsizei)length, 0, glFormat, glType, buffer );
+
+  err = glGetError();
+  if( err != GL_NO_ERROR ) {
+    log_error( "ERROR: Unable to load data using glTexImage2D for "
+      "TEXTURE_1D_ARRAY : %s : %s : %d : %d : %s : %s : Error %s\n",
+    GetGLTargetName(target),
+    GetGLFormatName(internalFormat),
+    (int)(width), (int)(length),
+    GetGLFormatName(glFormat),
+    GetGLTypeName(glType),
+    gluErrorString( err ));
+
+    *outError = -1;
+    free( buffer );
+    return NULL;
+  }
+
+
+  if( !allocateMem ) {
+    free( buffer );
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * length; i++ ) {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * length; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+
+  return buffer;
+}
+
+void * CreateGLTexture2DArray(size_t width, size_t height, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d)
+{
+  *outError = 0;
+
+  char * buffer = (char *)create_random_data( type, d, width * height * length * 4 );
+
+  if( type == kFloat && allocateMem )
+  {
+    // Re-fill the created buffer to just have [0-1] floats, since that's what it'd expect
+    cl_float *p = (cl_float *)buffer;
+    for( size_t i = 0; i < width * height * length * 4; i++ )
+    {
+      p[ i ] = (float) genrand_real1( d );
+    }
+  }
+  else if( !allocateMem )
+    memset( buffer, 0, width * height * length * 4 * get_explicit_type_size( type ) );
+
+  glGenTextures( 1, outTextureID );
+
+  glBindTexture( target, *outTextureID );
+  glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+  glTexParameteri( target, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+  glTexParameteri( target, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+  glGetError();
+  glTexImage3D( target, 0, internalFormat, (GLsizei)width, (GLsizei)height,
+    (GLsizei)length, 0, glFormat, glType, buffer );
+  GLenum err = glGetError();
+  if( err != GL_NO_ERROR )
+  {
+    log_error( "ERROR: Unable to load data into GL texture (%s) format %s "
+      "type %s internal format %s\n", gluErrorString( err ),
+      GetGLFormatName( glFormat ), get_explicit_type_name( type ),
+      GetGLFormatName( internalFormat ) );
+    *outError = -1;
+    delete [] buffer;
+    return NULL;
+  }
+
+  if( !allocateMem )
+  {
+    delete [] buffer;
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * height * length; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width * length; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+
+  return buffer;
+}
+
+void * CreateGLTextureBuffer(size_t width, GLenum target,
+  GLenum glFormat, GLenum internalFormat, GLenum glType, ExplicitType type,
+  GLuint *outTex, GLuint *outBuf, int *outError, bool allocateMem, MTdata d)
+{
+  // First, generate a regular GL Buffer from random data.
+  *outError = 0;
+  GLenum err = 0;
+
+  char * buffer = (char*)CreateRandomData(type, width * 4, d);
+
+  glGenBuffers(1, outBuf);
+  glBindBuffer(GL_TEXTURE_BUFFER, *outBuf);
+
+  // The buffer should be the array width * number of elements * element pitch
+  size_t size = width * 4;
+
+  // Need to multiply by the type size:
+  size *= ( GetGLTypeSize( GetGLTypeForExplicitType(type) ) );
+
+  glBufferData(GL_TEXTURE_BUFFER, size, buffer, GL_DYNAMIC_DRAW);
+
+  // Now make a Texture out of this Buffer:
+
+  glGenTextures(1, outTex);
+  glBindTexture(GL_TEXTURE_BUFFER, *outTex);
+  glTexBuffer(GL_TEXTURE_BUFFER, internalFormat, *outBuf);
+
+
+
+  if( !allocateMem ) {
+    free( buffer );
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ ) {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+
+  return buffer;
+}
+
+void* CreateGLTexture1D( size_t width, GLenum target, GLenum glFormat,
+    GLenum internalFormat, GLenum glType, ExplicitType type,
+    GLuint *outTextureID, int *outError, bool allocateMem, MTdata d )
+{
+  *outError = 0;
+  GLenum err = 0;
+
+  char * buffer = (char*)CreateRandomData(type, width * 4, d);
+
+  glGenTextures( 1, outTextureID );
+  glBindTexture( get_base_gl_target( target ), *outTextureID );
+  err = glGetError();
+  if( err != GL_NO_ERROR )
+  {
+    log_error( "ERROR: Failed to create GL texture object: %s!\n", gluErrorString( err ));
+    *outError = -1;
+    free( buffer );
+    return NULL;
+  }
+
+  glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+  glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+  glTexImage1D( get_base_gl_target(target), 0, internalFormat, (GLsizei)width,
+    0, glFormat, glType, buffer );
+
+  err = glGetError();
+  if( err != GL_NO_ERROR )
+  {
+      log_error( "ERROR: Unable to load data into glTexImage1D : %s : %s : %d : %s : %s : Error %s\n",
+          GetGLTargetName(target),
+          GetGLFormatName(internalFormat),
+          (int)(width),
+          GetGLFormatName(glFormat),
+          GetGLTypeName(glType),
+          gluErrorString( err ));
+
+      *outError = -1;
+      free( buffer );
+      return NULL;
+  }
+
+  if( !allocateMem ) {
+    free( buffer );
+    return NULL;
+  }
+
+  if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ ) {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc2;
+      p[ i * 4 + 1 ] = uc1;
+      p[ i * 4 + 2 ] = uc0;
+      p[ i * 4 + 3 ] = uc3;
+    }
+  }
+  else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+  {
+    // Reverse and reorder to validate since in the
+    // kernel the read_imagef() call always returns RGBA
+    cl_uchar *p = (cl_uchar *)buffer;
+    for( size_t i = 0; i < width; i++ )
+    {
+      cl_uchar uc0 = p[i * 4 + 0];
+      cl_uchar uc1 = p[i * 4 + 1];
+      cl_uchar uc2 = p[i * 4 + 2];
+      cl_uchar uc3 = p[i * 4 + 3];
+
+      p[ i * 4 + 0 ] = uc1;
+      p[ i * 4 + 1 ] = uc2;
+      p[ i * 4 + 2 ] = uc3;
+      p[ i * 4 + 3 ] = uc0;
+    }
+  }
+
+  return buffer;
+}
+
+void * CreateGLTexture2D( size_t width, size_t height,
+                        GLenum target, GLenum glFormat,
+                        GLenum internalFormat, GLenum glType,
+                        ExplicitType type, GLuint *outTextureID,
+                        int *outError, bool allocateMem, MTdata d )
+{
+    *outError = 0;
+    GLenum err = 0;
+
+    char * buffer = (char *)CreateRandomData(type, width * height * 4, d);
+
+    glGenTextures( 1, outTextureID );
+    glBindTexture( get_base_gl_target( target ), *outTextureID );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Failed to create GL texture object: %s!\n", gluErrorString( err ));
+        *outError = -1;
+        free( buffer );
+        return NULL;
+    }
+
+    glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+    glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( get_base_gl_target( target ), GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+    if( get_base_gl_target( target ) == GL_TEXTURE_CUBE_MAP )
+    {
+        char * temp = (char *)malloc(width * height * 4 * get_explicit_type_size( type ) * sizeof(cl_char));
+        if(allocateMem)
+            memcpy( temp, buffer, width * height * 4 * get_explicit_type_size( type ) );
+        else
+            memset( temp, 0, width * height * 4 * get_explicit_type_size( type ) );
+
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_X, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_Y, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_POSITIVE_Z, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_X, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        glTexImage2D( GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, temp );
+        free(temp);
+    }
+    else
+    {
+#ifdef DEBUG
+        log_info("- glTexImage2D : %s : %s : %d : %d : %s : %s\n",
+            GetGLTargetName(target),
+            GetGLFormatName(internalFormat),
+            width, height,
+            GetGLFormatName(glFormat),
+            GetGLTypeName(glType));
+
+        DumpGLBuffer(glType, width, height, buffer);
+
+#endif
+        glTexImage2D( get_base_gl_target(target), 0, internalFormat, (GLsizei)width, (GLsizei)height, 0, glFormat, glType, buffer );
+    }
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to load data into glTexImage2D : %s : %s : %d : %d : %s : %s : Error %s\n",
+            GetGLTargetName(target),
+            GetGLFormatName(internalFormat),
+            (int)(width), (int)(height),
+            GetGLFormatName(glFormat),
+            GetGLTypeName(glType),
+            gluErrorString( err ));
+
+        *outError = -1;
+        free( buffer );
+        return NULL;
+    }
+
+#ifdef DEBUG
+    memset(buffer, 0, width * height * 4 * get_explicit_type_size( type ));
+
+    log_info("- glGetTexImage : %s : %s : %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType));
+
+    glGetTexImage(target, 0, glFormat, glType, buffer);
+
+    DumpGLBuffer(type, width, height, buffer);
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to read data from glGetTexImage : %s : %s : %s : Error %s\n",
+        GetGLTargetName(target),
+        GetGLFormatName(glFormat),
+        GetGLTypeName(glType),
+        gluErrorString( err ));
+        return NULL;
+    }
+#endif
+
+    if( !allocateMem )
+    {
+        free( buffer );
+        return NULL;
+    }
+
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < width * height; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+    else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+    {
+      // Reverse and reorder to validate since in the
+      // kernel the read_imagef() call always returns RGBA
+      cl_uchar *p = (cl_uchar *)buffer;
+      for( size_t i = 0; i < width * height; i++ )
+      {
+        cl_uchar uc0 = p[i * 4 + 0];
+        cl_uchar uc1 = p[i * 4 + 1];
+        cl_uchar uc2 = p[i * 4 + 2];
+        cl_uchar uc3 = p[i * 4 + 3];
+
+        p[ i * 4 + 0 ] = uc1;
+        p[ i * 4 + 1 ] = uc2;
+        p[ i * 4 + 2 ] = uc3;
+        p[ i * 4 + 3 ] = uc0;
+      }
+    }
+
+    return buffer;
+}
+
+void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
+                          GLenum target, GLenum glFormat,
+                          GLenum internalFormat, GLenum glType,
+                          ExplicitType type, GLuint *outTextureID,
+                          int *outError, MTdata d, bool allocateMem)
+{
+    *outError = 0;
+
+    char * buffer = (char *)create_random_data( type, d, width * height * depth * 4 );
+
+    if( type == kFloat && allocateMem )
+    {
+        // Re-fill the created buffer to just have [0-1] floats, since that's what it'd expect
+        cl_float *p = (cl_float *)buffer;
+        for( size_t i = 0; i < width * height * depth * 4; i++ )
+        {
+            p[ i ] = (float) genrand_real1( d );
+        }
+    }
+    else if( !allocateMem )
+        memset( buffer, 0, width * height * depth * 4 * get_explicit_type_size( type ) );
+
+    glGenTextures( 1, outTextureID );
+
+    glBindTexture( target, *outTextureID );
+    glTexEnvi( GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE );
+    glTexParameteri( target, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( target, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+
+    glGetError();
+    glTexImage3D( target, 0, internalFormat, (GLsizei)width, (GLsizei)height, (GLsizei)depth, 0, glFormat, glType, buffer );
+    GLenum err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to load data into GL texture (%s) format %s type %s internal format %s\n", gluErrorString( err ), GetGLFormatName( glFormat ), get_explicit_type_name( type ), GetGLFormatName( internalFormat ) );
+        *outError = -1;
+        delete [] buffer;
+        return NULL;
+    }
+
+    if( !allocateMem )
+    {
+        delete [] buffer;
+        return NULL;
+    }
+
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < width * height * depth; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+    else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+    {
+      // Reverse and reorder to validate since in the
+      // kernel the read_imagef() call always returns RGBA
+      cl_uchar *p = (cl_uchar *)buffer;
+      for( size_t i = 0; i < width * height * depth; i++ )
+      {
+        cl_uchar uc0 = p[i * 4 + 0];
+        cl_uchar uc1 = p[i * 4 + 1];
+        cl_uchar uc2 = p[i * 4 + 2];
+        cl_uchar uc3 = p[i * 4 + 3];
+
+        p[ i * 4 + 0 ] = uc1;
+        p[ i * 4 + 1 ] = uc2;
+        p[ i * 4 + 2 ] = uc3;
+        p[ i * 4 + 3 ] = uc0;
+      }
+    }
+
+    return buffer;
+}
+
+void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
+                        GLenum glFormat, GLenum glInternalFormat,
+                        GLenum glType, ExplicitType typeToReadAs,
+                        size_t outWidth, size_t outHeight )
+{
+    // Read results from the GL texture
+    glBindTexture(get_base_gl_target(glTarget), glTexture);
+
+    GLint realWidth, realHeight, realDepth;
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_WIDTH, &realWidth );
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_HEIGHT, &realHeight );
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_WIDTH, &realDepth );
+
+    realDepth = realDepth == 0 ? realDepth : 1;
+
+    GLint realInternalFormat;
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &realInternalFormat );
+
+#ifdef DEBUG
+    log_info( "- Reading back from GL: %d x %d : %s : %s : %s : %s (stored as %s)\n",
+        realWidth, realHeight,
+        GetGLTargetName( glTarget),
+        GetGLFormatName( glInternalFormat ),
+        GetGLFormatName( glFormat ),
+        GetGLTypeName( glType ),
+        GetGLFormatName( realInternalFormat ));
+#endif
+
+    GLenum readBackFormat = glFormat == GL_RGBA_INTEGER_EXT ? GL_RGBA_INTEGER_EXT : GL_RGBA;
+
+    GLenum readBackType;
+    switch (glType) {
+#ifdef __APPLE__
+      case GL_UNSIGNED_INT_8_8_8_8:
+      case GL_UNSIGNED_INT_8_8_8_8_REV:
+        readBackType = GL_UNSIGNED_BYTE;
+        break;
+#endif
+      case GL_HALF_FLOAT:
+      case GL_UNSIGNED_BYTE:
+      case GL_UNSIGNED_SHORT:
+      case GL_UNSIGNED_INT:
+      case GL_BYTE:
+      case GL_SHORT:
+      case GL_INT:
+      case GL_FLOAT:
+      default:
+        readBackType = glType;
+    }
+
+    size_t outBytes;
+    if (get_base_gl_target(glTarget) != GL_TEXTURE_BUFFER) {
+        outBytes = realWidth * realHeight * realDepth * 4
+          * GetGLTypeSize(readBackType);
+    }
+    else {
+        outBytes = width * 4;
+
+        outBytes *= ( GetGLTypeSize( GetGLTypeForExplicitType(typeToReadAs) ) );
+    }
+
+    cl_char *outBuffer = (cl_char *)malloc( outBytes );
+    memset(outBuffer, 0, outBytes);
+
+    if (get_base_gl_target(glTarget) != GL_TEXTURE_BUFFER) {
+        glGetTexImage( glTarget, 0, readBackFormat, readBackType, outBuffer );
+    }
+    else {
+        glBindBuffer(GL_ARRAY_BUFFER, glBuf);
+        glGetBufferSubData(GL_ARRAY_BUFFER, 0, outBytes, outBuffer);
+    }
+
+#ifdef DEBUG
+
+    log_info( "- glGetTexImage: %s : %s : %s \n",
+        GetGLTargetName( glTarget),
+        GetGLFormatName(readBackFormat),
+        GetGLTypeName(readBackType));
+
+    DumpGLBuffer(readBackType, realWidth, realHeight, (void*)outBuffer);
+
+#endif
+
+    return (void *)outBuffer;
+}
+
+int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
+                            GLenum attachment, GLenum glFormat,
+                            GLenum internalFormat, GLenum glType,
+                            GLuint *outFramebuffer,
+                            GLuint *outRenderbuffer )
+{
+    GLenum err = 0;
+
+    // Generate a renderbuffer and bind
+    glGenRenderbuffersEXT( 1, outRenderbuffer );
+    glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, *outRenderbuffer );
+
+    // Allocate storage to the renderbuffer
+    glGetError();
+    glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, internalFormat, (GLsizei)width,  (GLsizei)height );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error("Failed to allocate render buffer storage!\n");
+        return 1701;
+    }
+
+    GLint realInternalFormat;
+    glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &realInternalFormat );
+    internalFormat = realInternalFormat;
+
+#ifdef DEBUG
+    GLint rsize, gsize, bsize, asize;
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_RED_SIZE_EXT,&rsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_GREEN_SIZE_EXT,&gsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_BLUE_SIZE_EXT,&bsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_ALPHA_SIZE_EXT,&asize);
+
+    log_info("Renderbuffer internal format requested: %s actual: %s sizes: r=%d g=%d b=%d a=%d\n",
+             GetGLFormatName( internalFormat ), GetGLFormatName( realInternalFormat ),
+             rsize, gsize, bsize, asize );
+#endif
+
+    // Create and bind a framebuffer to render with
+    glGenFramebuffersEXT( 1, outFramebuffer );
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, *outFramebuffer );
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to bind framebuffer : Error %s\n",
+                  gluErrorString( err ));
+
+        return -1;
+    }
+
+    // Attach to the framebuffer
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, *outRenderbuffer );
+    err = glGetError();
+    GLint status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
+    if( status != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s, status %x)\n", gluErrorString( err ), (int)status );
+        return -1;
+    }
+
+    return 0;
+}
+
+void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
+                             GLenum attachment, GLenum glFormat,
+                             GLenum internalFormat, GLenum glType,
+                             ExplicitType type,
+                             GLuint *outFramebuffer,
+                             GLuint *outRenderbuffer,
+                             int *outError, MTdata d, bool allocateMem )
+{
+    *outError = CreateGLRenderbufferRaw( width, height, attachment, glFormat, internalFormat,
+                            glType, outFramebuffer, outRenderbuffer );
+
+    if( *outError != 0 )
+        return NULL;
+
+    GLenum err = 0;
+
+    // Generate a renderbuffer and bind
+    glGenRenderbuffersEXT( 1, outRenderbuffer );
+    glBindRenderbufferEXT( GL_RENDERBUFFER_EXT, *outRenderbuffer );
+
+    // Allocate storage to the renderbuffer
+    glGetError();
+    glRenderbufferStorageEXT( GL_RENDERBUFFER_EXT, internalFormat, (GLsizei)width,  (GLsizei)height );
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        *outError = 1701;
+        log_error("Failed to allocate render buffer storage!\n");
+        return NULL;
+    }
+
+    GLint realInternalFormat;
+    glGetRenderbufferParameterivEXT( GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &realInternalFormat );
+    internalFormat = realInternalFormat;
+
+#ifdef DEBUG
+    GLint rsize, gsize, bsize, asize;
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_RED_SIZE_EXT,&rsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_GREEN_SIZE_EXT,&gsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_BLUE_SIZE_EXT,&bsize);
+    glGetRenderbufferParameterivEXT(GL_RENDERBUFFER_EXT, GL_RENDERBUFFER_ALPHA_SIZE_EXT,&asize);
+
+    log_info("Renderbuffer internal format requested: %s actual: %s sizes: r=%d g=%d b=%d a=%d\n",
+              GetGLFormatName( internalFormat ), GetGLFormatName( realInternalFormat ),
+              rsize, gsize, bsize, asize );
+#endif
+
+    // Create and bind a framebuffer to render with
+    glGenFramebuffersEXT( 1, outFramebuffer );
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, *outFramebuffer );
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to bind framebuffer : Error %s\n",
+                  gluErrorString( err ));
+
+        *outError = -1;
+        return NULL;
+    }
+
+    // Attach to the framebuffer
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, *outRenderbuffer );
+    err = glGetError();
+    GLint status = glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT );
+    if( status != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        *outError = -1;
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s, status %x)\n", gluErrorString( err ), (int)status );
+        return NULL;
+    }
+
+    void* buffer = CreateRandomData(type, width * height * 4, d);
+
+#ifdef DEBUG
+    log_info( "- Fillling renderbuffer: %d : %d : %s : %s \n",
+             (int)width, (int)height,
+             GetGLFormatName(glFormat),
+             GetGLTypeName(glType));
+
+    DumpGLBuffer(glType, (int)width, (int)height, (void*)buffer);
+#endif
+
+    // Fill a texture with our input data
+    glTextureWrapper texture;
+    glGenTextures( 1, &texture );
+    glBindTexture( GL_TEXTURE_RECTANGLE_ARB, texture );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
+    glTexParameteri( GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
+    glTexImage2D( GL_TEXTURE_RECTANGLE_ARB, 0, internalFormat, width, height, 0, glFormat, glType, buffer );
+    glEnable( GL_TEXTURE_RECTANGLE_ARB );
+
+    // Render fullscreen textured quad
+    glDisable( GL_LIGHTING );
+    glViewport(0, 0, width, height);
+    glMatrixMode(GL_MODELVIEW);
+    glLoadIdentity();
+    glMatrixMode( GL_TEXTURE );
+    glLoadIdentity();
+    glMatrixMode(GL_PROJECTION);
+    glLoadIdentity();
+    glClear(GL_COLOR_BUFFER_BIT);
+    gluOrtho2D( -1.0, 1.0, -1.0, 1.0 );
+    glMatrixMode( GL_MODELVIEW );
+    glBegin( GL_QUADS );
+    {
+        glColor3f(1.0f, 1.0f, 1.0f);
+        glTexCoord2f( 0.0f, 0.0f );
+        glVertex3f( -1.0f, -1.0f, 0.0f );
+        glTexCoord2f( 0.0f, height );
+        glVertex3f( -1.0f, 1.0f, 0.0f );
+        glTexCoord2f( width, height );
+        glVertex3f( 1.0f, 1.0f, 0.0f );
+        glTexCoord2f( width, 0.0f );
+        glVertex3f( 1.0f, -1.0f, 0.0f );
+    }
+    glEnd();
+    glBindTexture( GL_TEXTURE_RECTANGLE_ARB, 0 );
+    glDisable( GL_TEXTURE_RECTANGLE_ARB );
+
+    glFlush();
+
+    // Read back the data in the renderbuffer
+    memset(buffer, 0, width * height * 4 * get_explicit_type_size( type ));
+    glReadBuffer( attachment );
+    glReadPixels( 0, 0, (GLsizei)width, (GLsizei)height, glFormat, glType, buffer );
+
+    err = glGetError();
+    if( err != GL_NO_ERROR )
+    {
+        log_error( "ERROR: Unable to read data via glReadPixels : %d : %d : %s : %s : Error %s\n",
+                  (int)width, (int)height,
+                  GetGLFormatName(glFormat),
+                  GetGLTypeName(glType),
+                  gluErrorString( err ));
+        *outError = -1;
+    }
+
+#ifdef DEBUG
+    log_info( "- glReadPixels: %d : %d : %s : %s \n",
+             (int)width, (int)height,
+             GetGLFormatName(glFormat),
+             GetGLTypeName(glType));
+
+    DumpGLBuffer(glType, (int)width, (int)height, (void*)buffer);
+#endif
+
+    if( !allocateMem )
+    {
+        free( buffer );
+        return NULL;
+    }
+
+    if( glType == GL_UNSIGNED_INT_8_8_8_8_REV && glFormat == GL_BGRA && allocateMem )
+    {
+        // Reverse and reorder to validate since in the
+        // kernel the read_imagef() call always returns RGBA
+        cl_uchar *p = (cl_uchar *)buffer;
+        for( size_t i = 0; i < (size_t)width * height; i++ )
+        {
+            cl_uchar uc0 = p[i * 4 + 0];
+            cl_uchar uc1 = p[i * 4 + 1];
+            cl_uchar uc2 = p[i * 4 + 2];
+            cl_uchar uc3 = p[i * 4 + 3];
+
+            p[ i * 4 + 0 ] = uc2;
+            p[ i * 4 + 1 ] = uc1;
+            p[ i * 4 + 2 ] = uc0;
+            p[ i * 4 + 3 ] = uc3;
+        }
+    }
+    else if( glType == GL_UNSIGNED_INT_8_8_8_8 && glFormat == GL_BGRA && allocateMem )
+    {
+      // Reverse and reorder to validate since in the
+      // kernel the read_imagef() call always returns RGBA
+      cl_uchar *p = (cl_uchar *)buffer;
+      for( size_t i = 0; i < width * height; i++ )
+      {
+        cl_uchar uc0 = p[i * 4 + 0];
+        cl_uchar uc1 = p[i * 4 + 1];
+        cl_uchar uc2 = p[i * 4 + 2];
+        cl_uchar uc3 = p[i * 4 + 3];
+
+        p[ i * 4 + 0 ] = uc1;
+        p[ i * 4 + 1 ] = uc2;
+        p[ i * 4 + 2 ] = uc3;
+        p[ i * 4 + 3 ] = uc0;
+      }
+    }
+
+    return buffer;
+}
+
+void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
+                           GLenum attachment, GLenum glFormat,
+                           GLenum glInternalFormat, GLenum glType,
+                           ExplicitType typeToReadAs,
+                           size_t outWidth, size_t outHeight )
+{
+    glBindFramebufferEXT( GL_FRAMEBUFFER_EXT, glFramebuffer );
+    glFramebufferRenderbufferEXT( GL_FRAMEBUFFER_EXT, attachment, GL_RENDERBUFFER_EXT, glRenderbuffer );
+
+    // Attach to the framebuffer
+    GLint err = glGetError();
+    if( glCheckFramebufferStatusEXT( GL_FRAMEBUFFER_EXT ) != GL_FRAMEBUFFER_COMPLETE_EXT )
+    {
+        log_error( "ERROR: Unable to attach renderbuffer to framebuffer (%s)\n", gluErrorString( err ) );
+        return NULL;
+    }
+
+    // Read results from the GL renderbuffer
+#ifdef DEBUG
+    log_info( "- Reading back from GL: %d x %d : %s : %s : %s\n",
+             (int)outWidth, (int)outHeight,
+             GetGLFormatName( glInternalFormat ),
+             GetGLFormatName( glFormat ),
+             GetGLTypeName( glType ));
+#endif
+
+    GLenum readBackFormat = glFormat == GL_RGBA_INTEGER_EXT ? GL_RGBA_INTEGER_EXT : GL_RGBA;
+    GLenum readBackType = glType;
+
+    size_t outBytes = outWidth * outHeight * 4 * GetGLTypeSize(readBackType);
+    void *outBuffer = malloc( outBytes );
+    memset(outBuffer, 0, outBytes);
+
+    glReadPixels( 0, 0, (GLsizei)outWidth, (GLsizei)outHeight, readBackFormat, readBackType, outBuffer );
+
+#ifdef DEBUG
+    log_info( "- glReadPixels: %d : %d : %s : %s \n",
+             (int)outWidth, (int)outHeight,
+             GetGLFormatName(readBackFormat),
+             GetGLTypeName(readBackType));
+
+    DumpGLBuffer(readBackType, outWidth, outHeight, outBuffer);
+#endif
+
+    return (void *)outBuffer;
+}
+
+GLenum
+GetGLFormat(GLenum internalFormat)
+{
+    GLenum glFormat;
+    switch (internalFormat)
+    {
+        case GL_BGRA:
+        case GL_RGBA8:
+        case GL_RGBA16:
+        case GL_RGBA32F_ARB:
+            glFormat = GL_RGBA;
+            break;
+        case GL_RGBA8I_EXT:
+        case GL_RGBA16I_EXT:
+        case GL_RGBA32I_EXT:
+        case GL_RGBA8UI_EXT:
+        case GL_RGBA16UI_EXT:
+        case GL_RGBA32UI_EXT:
+            glFormat = GL_RGBA_INTEGER_EXT;
+            break;
+        default:
+            glFormat = GL_RGBA;
+            break;
+    }
+
+    return glFormat;
+}
+
+GLenum GetGLTypeForExplicitType(ExplicitType type)
+{
+    switch( type )
+    {
+        case kFloat:
+            return GL_FLOAT;
+        case kInt:
+            return GL_INT;
+        case kUInt:
+            return GL_UNSIGNED_INT;
+        case kShort:
+            return GL_SHORT;
+        case kUShort:
+            return GL_UNSIGNED_SHORT;
+        case kChar:
+            return GL_BYTE;
+        case kUChar:
+            return GL_UNSIGNED_BYTE;
+        case kHalf:
+#if defined( __APPLE__ )
+            return GL_HALF_FLOAT;
+#else
+            return GL_HALF_FLOAT_ARB;
+#endif
+        default:
+            return GL_INT;
+    };
+}
+
+size_t GetGLTypeSize(GLenum type)
+{
+    switch( type )
+    {
+        case GL_FLOAT:
+            return sizeof(GLfloat);
+        case GL_INT:
+            return sizeof(GLint);
+        case GL_UNSIGNED_INT:
+            return sizeof(GLuint);
+        case GL_SHORT:
+            return sizeof(GLshort);
+        case GL_UNSIGNED_SHORT:
+            return sizeof(GLushort);
+        case GL_UNSIGNED_INT_8_8_8_8_REV:
+        case GL_BYTE:
+            return sizeof(GLbyte);
+        case GL_UNSIGNED_BYTE:
+            return sizeof(GLubyte);
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:
+#else
+        case GL_HALF_FLOAT_ARB:
+#endif
+            return sizeof(GLhalf);
+        default:
+            log_error("Unknown type 0x%x\n",type);
+            return 0;
+    };
+}
+
+ExplicitType GetExplicitTypeForGLType(GLenum type)
+{
+    switch( type )
+    {
+        case GL_FLOAT:
+            return kFloat;
+        case GL_INT:
+            return kInt;
+        case GL_UNSIGNED_INT:
+            return kUInt;
+        case GL_SHORT:
+            return kShort;
+        case GL_UNSIGNED_SHORT:
+            return kUShort;
+        case GL_BYTE:
+            return kChar;
+        case GL_UNSIGNED_BYTE:
+            return kUChar;
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:
+#else
+        case GL_HALF_FLOAT_ARB:
+#endif
+            return kHalf;
+        default:
+            return kFloat;
+    };
+}
+
+GLenum get_base_gl_target( GLenum target )
+{
+    switch( target )
+    {
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+            return GL_TEXTURE_CUBE_MAP;
+        default:
+            return target;
+    }
+}
+
+const char *GetGLTypeName( GLenum type )
+{
+    switch( type )
+    {
+        case GL_BYTE:            return "GL_BYTE";
+        case GL_UNSIGNED_BYTE:   return "GL_UNSIGNED_BYTE";
+        case GL_INT:             return "GL_INT";
+        case GL_UNSIGNED_INT:    return "GL_UNSIGNED_INT";
+        case GL_SHORT:           return "GL_SHORT";
+        case GL_UNSIGNED_SHORT:  return "GL_UNSIGNED_SHORT";
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:      return "GL_HALF_FLOAT";
+#else
+        case GL_HALF_FLOAT_ARB:  return "GL_HALF_FLOAT_ARB";
+#endif
+        case GL_FLOAT:           return "GL_FLOAT";
+        case GL_UNSIGNED_INT_8_8_8_8: return "GL_UNSIGNED_INT_8_8_8_8";
+        case GL_UNSIGNED_INT_8_8_8_8_REV: return "GL_UNSIGNED_INT_8_8_8_8_REV";
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "(Unknown:0x%08x)", (int)type );
+            return foo;
+        }
+    }
+}
+
+const char *GetGLTargetName( GLenum tgt )
+{
+    if( tgt == GL_TEXTURE_2D ) return "GL_TEXTURE_2D";
+    if( tgt == GL_TEXTURE_3D ) return "GL_TEXTURE_3D";
+    if( tgt == GL_TEXTURE_RECTANGLE_EXT ) return "GL_TEXTURE_RECTANGLE_EXT";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_X ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_X";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_Y ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_Y";
+    if( tgt == GL_TEXTURE_CUBE_MAP_POSITIVE_Z ) return "GL_TEXTURE_CUBE_MAP_POSITIVE_Z";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_X ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_X";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_Y ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_Y";
+    if( tgt == GL_TEXTURE_CUBE_MAP_NEGATIVE_Z ) return "GL_TEXTURE_CUBE_MAP_NEGATIVE_Z";
+    return "";
+}
+
+const char *GetGLAttachmentName( GLenum att )
+{
+    if( att == GL_COLOR_ATTACHMENT0_EXT ) return "GL_COLOR_ATTACHMENT0_EXT";
+    if( att == GL_COLOR_ATTACHMENT1_EXT ) return "GL_COLOR_ATTACHMENT1_EXT";
+    if( att == GL_COLOR_ATTACHMENT2_EXT ) return "GL_COLOR_ATTACHMENT2_EXT";
+    if( att == GL_COLOR_ATTACHMENT3_EXT ) return "GL_COLOR_ATTACHMENT3_EXT";
+    if( att == GL_COLOR_ATTACHMENT4_EXT ) return "GL_COLOR_ATTACHMENT4_EXT";
+    if( att == GL_COLOR_ATTACHMENT5_EXT ) return "GL_COLOR_ATTACHMENT5_EXT";
+    if( att == GL_COLOR_ATTACHMENT6_EXT ) return "GL_COLOR_ATTACHMENT6_EXT";
+    if( att == GL_COLOR_ATTACHMENT7_EXT ) return "GL_COLOR_ATTACHMENT7_EXT";
+    if( att == GL_COLOR_ATTACHMENT8_EXT ) return "GL_COLOR_ATTACHMENT8_EXT";
+    if( att == GL_DEPTH_ATTACHMENT_EXT ) return "GL_DEPTH_ATTACHMENT_EXT";
+    return "";
+}
+const char *GetGLBaseFormatName( GLenum baseformat )
+{
+    switch( baseformat )
+    {
+        case GL_RGBA8:          return "GL_RGBA";
+        case GL_RGBA16:         return "GL_RGBA";
+        case GL_RGBA:           return "GL_RGBA";
+        case GL_BGRA:           return "GL_BGRA";
+        case GL_RGBA8I_EXT:     return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA16I_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32I_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA8UI_EXT:    return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA16UI_EXT:   return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32UI_EXT:   return "GL_RGBA_INTEGER_EXT";
+        case GL_RGBA32F_ARB:    return "GL_RGBA";
+
+        case GL_RGBA_INTEGER_EXT:   return "GL_RGBA_INTEGER_EXT";
+
+        case GL_ALPHA4: return "GL_ALPHA";
+        case GL_ALPHA8: return "GL_ALPHA";
+        case GL_ALPHA12: return "GL_ALPHA";
+        case GL_ALPHA16: return "GL_ALPHA";
+        case GL_LUMINANCE4: return "GL_LUMINANCE";
+        case GL_LUMINANCE8: return "GL_LUMINANCE";
+        case GL_LUMINANCE12: return "GL_LUMINANCE";
+        case GL_LUMINANCE16: return "GL_LUMINANCE";
+        case GL_LUMINANCE4_ALPHA4: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE6_ALPHA2: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE8_ALPHA8: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE12_ALPHA4: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE12_ALPHA12: return "GL_LUMINANCE_ALPHA";
+        case GL_LUMINANCE16_ALPHA16: return "GL_LUMINANCE_ALPHA";
+        case GL_INTENSITY: return "GL_INTENSITY";
+        case GL_INTENSITY4: return "GL_INTENSITY";
+        case GL_INTENSITY8: return "GL_INTENSITY";
+        case GL_INTENSITY12: return "GL_INTENSITY";
+        case GL_INTENSITY16: return "GL_INTENSITY";
+        case GL_R3_G3_B2: return "GL_RGB";
+        case GL_RGB4: return "GL_RGB";
+        case GL_RGB5: return "GL_RGB";
+        case GL_RGB8: return "GL_RGB";
+        case GL_RGB10: return "GL_RGB";
+        case GL_RGB12: return "GL_RGB";
+        case GL_RGB16: return "GL_RGB";
+        case GL_RGBA2: return "GL_RGBA";
+        case GL_RGBA4: return "GL_RGBA";
+        case GL_RGB5_A1: return "GL_RGBA";
+        case GL_RGB10_A2: return "GL_RGBA";
+        case GL_RGBA12: return "GL_RGBA";
+
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "(Unknown:0x%08x)", (int)baseformat );
+            return foo;
+        }
+    }
+}
+
+const char *GetGLFormatName( GLenum format )
+{
+    switch( format )
+    {
+        case GL_RGBA8:          return "GL_RGBA8";
+        case GL_RGBA16:         return "GL_RGBA16";
+        case GL_RGBA:           return "GL_RGBA";
+        case GL_BGRA:           return "GL_BGRA";
+        case GL_RGBA8I_EXT:     return "GL_RGBA8I_EXT";
+        case GL_RGBA16I_EXT:    return "GL_RGBA16I_EXT";
+        case GL_RGBA32I_EXT:    return "GL_RGBA32I_EXT";
+        case GL_RGBA8UI_EXT:    return "GL_RGBA8UI_EXT";
+        case GL_RGBA16UI_EXT:   return "GL_RGBA16UI_EXT";
+        case GL_RGBA32UI_EXT:   return "GL_RGBA32UI_EXT";
+        case GL_RGBA16F_ARB:    return "GL_RGBA16F_ARB";
+        case GL_RGBA32F_ARB:    return "GL_RGBA32F_ARB";
+
+        case GL_RGBA_INTEGER_EXT:   return "GL_RGBA_INTEGER_EXT";
+
+        case GL_ALPHA4: return "GL_ALPHA4";
+        case GL_ALPHA8: return "GL_ALPHA8";
+        case GL_ALPHA12: return "GL_ALPHA12";
+        case GL_ALPHA16: return "GL_ALPHA16";
+        case GL_LUMINANCE4: return "GL_LUMINANCE4";
+        case GL_LUMINANCE8: return "GL_LUMINANCE8";
+        case GL_LUMINANCE12: return "GL_LUMINANCE12";
+        case GL_LUMINANCE16: return "GL_LUMINANCE16";
+        case GL_LUMINANCE4_ALPHA4: return "GL_LUMINANCE4_ALPHA4";
+        case GL_LUMINANCE6_ALPHA2: return "GL_LUMINANCE6_ALPHA2";
+        case GL_LUMINANCE8_ALPHA8: return "GL_LUMINANCE8_ALPHA8";
+        case GL_LUMINANCE12_ALPHA4: return "GL_LUMINANCE12_ALPHA4";
+        case GL_LUMINANCE12_ALPHA12: return "GL_LUMINANCE12_ALPHA12";
+        case GL_LUMINANCE16_ALPHA16: return "GL_LUMINANCE16_ALPHA16";
+        case GL_INTENSITY: return "GL_INTENSITY";
+        case GL_INTENSITY4: return "GL_INTENSITY4";
+        case GL_INTENSITY8: return "GL_INTENSITY8";
+        case GL_INTENSITY12: return "GL_INTENSITY12";
+        case GL_INTENSITY16: return "GL_INTENSITY16";
+        case GL_R3_G3_B2: return "GL_R3_G3_B2";
+        case GL_RGB4: return "GL_RGB4";
+        case GL_RGB5: return "GL_RGB5";
+        case GL_RGB8: return "GL_RGB8";
+        case GL_RGB10: return "GL_RGB10";
+        case GL_RGB12: return "GL_RGB12";
+        case GL_RGB16: return "GL_RGB16";
+        case GL_RGBA2: return "GL_RGBA2";
+        case GL_RGBA4: return "GL_RGBA4";
+        case GL_RGB5_A1: return "GL_RGB5_A1";
+        case GL_RGB10_A2: return "GL_RGB10_A2";
+        case GL_RGBA12: return "GL_RGBA12";
+
+        case GL_INT:            return "GL_INT";
+        case GL_UNSIGNED_INT:   return "GL_UNSIGNED_INT";
+        case GL_SHORT:          return "GL_SHORT";
+        case GL_UNSIGNED_SHORT: return "GL_UNSIGNED_SHORT";
+        case GL_BYTE:           return "GL_BYTE";
+        case GL_UNSIGNED_BYTE:  return "GL_UNSIGNED_BYTE";
+        case GL_FLOAT:          return "GL_FLOAT";
+#if defined( __APPLE__ )
+        case GL_HALF_FLOAT:     return "GL_HALF_FLOAT";
+#else
+        case GL_HALF_FLOAT_ARB: return "GL_HALF_FLOAT_ARB";
+#endif
+
+        default:
+        {
+            static char foo[ 128 ];
+            sprintf( foo, "(Unknown:0x%08x)", (int)format );
+            return foo;
+        }
+    }
+}
+
+void* CreateRandomData( ExplicitType type, size_t count, MTdata d )
+{
+    switch(type)
+    {
+        case (kChar):
+        {
+            cl_char *p = (cl_char *)malloc(count * sizeof(cl_char));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_char)genrand_int32(d);
+            }
+            return (void*)p;
+        }
+        case (kUChar):
+        {
+            cl_uchar *p = (cl_uchar *)malloc(count * sizeof(cl_uchar));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] =  (cl_uchar)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kShort):
+        {
+            cl_short *p = (cl_short *)malloc(count * sizeof(cl_short));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_short)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kUShort):
+        {
+            cl_ushort *p = (cl_ushort *)malloc(count * sizeof(cl_ushort));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_ushort)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kInt):
+        {
+            cl_int *p = (cl_int *)malloc(count * sizeof(cl_int));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = (cl_int)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+        case (kUInt):
+        {
+            cl_uint *p = (cl_uint *)malloc(count * sizeof(cl_uint));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] =  (cl_uint)genrand_int32(d);
+            }
+
+            return (void*)p;
+        }
+
+        case (kFloat):
+        {
+            cl_float *p = (cl_float *)malloc(count * sizeof(cl_float));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = get_random_float( 0.f, 1.f, d );
+            }
+
+            return (void*)p;
+        }
+        case (kHalf):
+        {
+            cl_half *p = (cl_half *)malloc(count * sizeof(cl_half));
+            if(!p) return 0;
+
+            for( size_t i = 0; i < count; i++ )
+            {
+                p[ i ] = convert_float_to_half( get_random_float( 0.f, 1.f, d ) );
+            }
+
+            return (void*)p;
+        }
+        default:
+        {
+            log_error("Invalid explicit type specified for create random data!\n");
+            return 0;
+        }
+    }
+    return 0;
+}
+
+void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer)
+{
+    size_t i;
+    size_t count = width * height;
+    if(type == GL_BYTE)
+    {
+        cl_char* p = (cl_char*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_BYTE)
+    {
+        cl_uchar* p = (cl_uchar*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_INT)
+    {
+        cl_int* p = (cl_int*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_INT)
+    {
+        cl_uint* p = (cl_uint*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_SHORT)
+    {
+        cl_short* p = (cl_short*)buffer;
+        for(i = 0; i < count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_UNSIGNED_SHORT)
+    {
+        cl_ushort* p = (cl_ushort*)buffer;
+        for(i = 0; i <  count; i++)
+            log_info("[%4d] %3d %3d %3d %3d\n", (unsigned int)(i),
+                p[i* 4 + 0],
+                p[i* 4 + 1],
+                p[i* 4 + 2],
+                p[i* 4 + 3]);
+    }
+    else if(type == GL_FLOAT)
+    {
+        cl_float* p = (cl_float*)buffer;
+        for(i = 0; i < count; i++)
+        log_info("[%4d] %#f %#f %#f %#f\n", (unsigned int)(i),
+            p[i* 4 + 0],
+            p[i* 4 + 1],
+            p[i* 4 + 2],
+            p[i* 4 + 3]);
+    }
+}
+
+#if defined(_WIN32)
+#include <string.h>
+
+GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString)
+{
+  const size_t len = strlen((const char*)extName);
+  const char* str = (const char*)extString;
+
+  while (str != NULL) {
+    str = strstr(str, (const char*)extName);
+    if (str == NULL) {
+      break;
+    }
+    if ((str > (const char*)extString || str[-1] == ' ')
+        && (str[len] == ' ' || str[len] == '\0')) {
+      return GL_TRUE;
+    }
+    str = strchr(str + len, ' ');
+  }
+
+  return GL_FALSE;
+}
+
+#endif
+
+// Function pointers for the GL/CL calls
+clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
+clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
+clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
+clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
+clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
+clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
+clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
+clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
+clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
+
+int init_clgl_ext() {
+
+    // As OpenCL for the platforms.  Warn if more than one platform found,
+    // since this might not be the platform we want.  By default, we simply
+    // use the first returned platform.
+
+    cl_uint nplatforms;
+    cl_platform_id platform;
+    clGetPlatformIDs(0, NULL, &nplatforms);
+    clGetPlatformIDs(1, &platform, NULL);
+
+    if (nplatforms > 1) {
+        log_info("clGetPlatformIDs returned multiple values.  This is not "
+            "an error, but might result in obtaining incorrect function "
+            "pointers if you do not want the first returned platform.\n");
+
+        // Show them the platform name, in case it is a problem.
+
+        size_t size;
+        char *name;
+
+        clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
+        name = (char*)malloc(size);
+        clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
+
+        log_info("Using platform with name: %s \n", name);
+        free(name);
+    }
+
+    // Create the function pointer table
+    clCreateFromGLBuffer_ptr = (clCreateFromGLBuffer_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLBuffer");
+    if (clCreateFromGLBuffer_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLBuffer) returned NULL.\n");
+        return -1;
+    }
+    clCreateFromGLTexture2D_ptr = (clCreateFromGLTexture2D_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLTexture2D");
+    if (clCreateFromGLTexture2D_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLTexture2D) returned NULL.\n");
+        return -1;
+    }
+    clCreateFromGLTexture3D_ptr = (clCreateFromGLTexture3D_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLTexture3D");
+    if (clCreateFromGLTexture3D_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLTexture3D\") returned NULL.\n");
+        return -1;
+    }
+    clCreateFromGLTexture_ptr = (clCreateFromGLTexture_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLTexture");
+    if (clCreateFromGLTexture_ptr == NULL) {
+         log_error("clGetExtensionFunctionAddressForPlatform(platform,\"clCreateFromGLTexture\") returned NULL.\n");
+         return -1;
+    }
+    clCreateFromGLRenderbuffer_ptr = (clCreateFromGLRenderbuffer_fn)clGetExtensionFunctionAddressForPlatform(platform,"clCreateFromGLRenderbuffer");
+    if (clCreateFromGLRenderbuffer_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clCreateFromGLRenderbuffer) returned NULL.\n");
+        return -1;
+    }
+    clGetGLObjectInfo_ptr = (clGetGLObjectInfo_fn)clGetExtensionFunctionAddressForPlatform(platform,"clGetGLObjectInfo");
+    if (clGetGLObjectInfo_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clGetGLObjectInfo) returned NULL.\n");
+        return -1;
+    }
+    clGetGLTextureInfo_ptr = (clGetGLTextureInfo_fn)clGetExtensionFunctionAddressForPlatform(platform,"clGetGLTextureInfo");
+    if (clGetGLTextureInfo_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clGetGLTextureInfo) returned NULL.\n");
+        return -1;
+    }
+    clEnqueueAcquireGLObjects_ptr = (clEnqueueAcquireGLObjects_fn)clGetExtensionFunctionAddressForPlatform(platform,"clEnqueueAcquireGLObjects");
+    if (clEnqueueAcquireGLObjects_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clEnqueueAcquireGLObjects) returned NULL.\n");
+        return -1;
+    }
+    clEnqueueReleaseGLObjects_ptr = (clEnqueueReleaseGLObjects_fn)clGetExtensionFunctionAddressForPlatform(platform,"clEnqueueReleaseGLObjects");
+    if (clEnqueueReleaseGLObjects_ptr == NULL) {
+        log_error("clGetExtensionFunctionAddressForPlatform(platform,clEnqueueReleaseGLObjects) returned NULL.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_common/gl/helpers.h b/test_conformance/compatibility/test_common/gl/helpers.h
new file mode 100644
index 00000000..86e7dde9
--- /dev/null
+++ b/test_conformance/compatibility/test_common/gl/helpers.h
@@ -0,0 +1,283 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _helpers_h
+#define _helpers_h
+
+#include "../harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#include "gl_headers.h"
+#include <CL/cl_gl.h>
+#else
+#include "gl_headers.h"
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context     context,
+                          cl_mem_flags   flags,
+                          GLuint         bufobj,
+                          int *          errcode_ret);
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context       context ,
+                        cl_mem_flags     flags ,
+                        GLenum           target ,
+                        GLint            miplevel ,
+                        GLuint           texture ,
+                        cl_int *         errcode_ret) ;
+
+typedef cl_mem
+(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context    context ,
+                           cl_mem_flags  flags ,
+                           GLuint        renderbuffer ,
+                           cl_int *      errcode_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem                 memobj ,
+                  cl_gl_object_type *    gl_object_type ,
+                  GLuint *               gl_object_name) ;
+
+typedef cl_int
+(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem                memobj ,
+                   cl_gl_texture_info    param_name ,
+                   size_t                param_value_size ,
+                   void *                param_value ,
+                   size_t *              param_value_size_ret) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+typedef cl_int
+(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue       command_queue ,
+                          cl_uint                num_objects ,
+                          const cl_mem *         mem_objects ,
+                          cl_uint                num_events_in_wait_list ,
+                          const cl_event *       event_wait_list ,
+                                cl_event *             event) ;
+
+
+extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
+extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
+extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
+extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
+extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
+extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
+extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
+extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
+extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
+
+
+class glBufferWrapper
+{
+    public:
+        glBufferWrapper() { mBuffer = 0; }
+        glBufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
+
+        glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glTextureWrapper
+{
+    public:
+        glTextureWrapper() { mHandle = 0; }
+        glTextureWrapper( GLuint b ) { mHandle = b; }
+        ~glTextureWrapper() {
+         if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
+        }
+
+        glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
+        operator GLuint() { return mHandle; }
+        operator GLuint *() { return &mHandle; }
+
+        GLuint * operator&() { return &mHandle; }
+
+        bool operator==( GLuint rhs ) { return mHandle == rhs; }
+
+    protected:
+
+    // The texture handle.
+        GLuint mHandle;
+};
+
+class glRenderbufferWrapper
+{
+    public:
+        glRenderbufferWrapper() { mBuffer = 0; }
+        glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
+
+        glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+class glFramebufferWrapper
+{
+    public:
+        glFramebufferWrapper() { mBuffer = 0; }
+        glFramebufferWrapper( GLuint b ) { mBuffer = b; }
+        ~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
+
+        glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
+        operator GLuint() { return mBuffer; }
+        operator GLuint *() { return &mBuffer; }
+
+        GLuint * operator&() { return &mBuffer; }
+
+        bool operator==( GLuint rhs ) { return mBuffer == rhs; }
+
+    protected:
+
+        GLuint mBuffer;
+};
+
+
+// Helper functions (defined in helpers.cpp)
+
+extern void * CreateGLTexture1DArray( size_t width, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTextureID, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTextureBuffer( size_t width,
+  GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
+  ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
+  bool allocateMem, MTdata d);
+
+extern void * CreateGLTexture1D(size_t width,
+                                GLenum target, GLenum glFormat,
+                                GLenum internalFormat, GLenum glType,
+                                ExplicitType type, GLuint *outTextureID,
+                                int *outError, bool allocateMem, MTdata d );
+
+extern void * CreateGLTexture2D( size_t width, size_t height,
+                               GLenum target, GLenum glFormat,
+                               GLenum internalFormat, GLenum glType,
+                               ExplicitType type, GLuint *outTextureID,
+                               int *outError, bool allocateMem, MTdata d );
+
+
+extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
+                                 GLenum target, GLenum glFormat,
+                                 GLenum internalFormat, GLenum glType,
+                                 ExplicitType type, GLuint *outTextureID,
+                                 int *outError, MTdata d, bool allocateMem = true );
+
+extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
+                             GLenum glFormat, GLenum glInternalFormat,
+                             GLenum glType, ExplicitType typeToReadAs,
+                             size_t outWidth, size_t outHeight );
+
+extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
+                                   GLenum target, GLenum glFormat,
+                                   GLenum internalFormat, GLenum glType,
+                                   GLuint *outFramebuffer,
+                                   GLuint *outRenderbuffer );
+
+extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
+                                    GLenum target, GLenum glFormat,
+                                    GLenum internalFormat, GLenum glType,
+                                    ExplicitType type,
+                                    GLuint *outFramebuffer,
+                                    GLuint *outRenderbuffer,
+                                    int *outError, MTdata d, bool allocateMem );
+
+extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
+                                  GLenum attachment, GLenum glFormat,
+                                  GLenum glInternalFormat, GLenum glType,
+                                  ExplicitType typeToReadAs,
+                                  size_t outWidth, size_t outHeight );
+
+extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
+extern const char *GetGLTypeName( GLenum type );
+extern const char *GetGLAttachmentName( GLenum att );
+extern const char *GetGLTargetName( GLenum tgt );
+extern const char *GetGLBaseFormatName( GLenum baseformat );
+extern const char *GetGLFormatName( GLenum format );
+
+extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
+
+extern GLenum GetGLFormat(GLenum internalFormat);
+extern GLenum GetGLTypeForExplicitType(ExplicitType type);
+extern size_t GetGLTypeSize(GLenum type);
+extern ExplicitType GetExplicitTypeForGLType(GLenum type);
+
+extern GLenum get_base_gl_target( GLenum target );
+
+extern int init_clgl_ext( void );
+
+#endif // _helpers_h
+
+
+
diff --git a/test_conformance/compatibility/test_common/gl/setup.h b/test_conformance/compatibility/test_common/gl/setup.h
new file mode 100644
index 00000000..6ee810bb
--- /dev/null
+++ b/test_conformance/compatibility/test_common/gl/setup.h
@@ -0,0 +1,48 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _setup_h
+#define _setup_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "gl_headers.h"
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+
+// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
+// subclass internally, then return it as a definition for GLEnvironment::Create
+
+class GLEnvironment
+{
+    public:
+        GLEnvironment() {}
+        virtual ~GLEnvironment() {}
+
+         virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
+        virtual cl_context CreateCLContext( void ) = 0;
+        virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
+
+        static GLEnvironment *    Instance( void );
+
+
+};
+
+#endif // _setup_h
diff --git a/test_conformance/compatibility/test_common/gl/setup_osx.cpp b/test_conformance/compatibility/test_common/gl/setup_osx.cpp
new file mode 100644
index 00000000..01641be0
--- /dev/null
+++ b/test_conformance/compatibility/test_common/gl/setup_osx.cpp
@@ -0,0 +1,156 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "setup.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include <OpenGL/CGLDevice.h>
+
+class OSXGLEnvironment : public GLEnvironment
+{
+    public:
+        OSXGLEnvironment()
+        {
+      mCGLContext = NULL;
+        }
+
+  virtual int Init( int *argc, char **argv, int use_opengl_32 )
+        {
+      if (!use_opengl_32) {
+
+        // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+      }
+
+      else {
+
+        CGLPixelFormatAttribute attribs[] = {
+          kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
+          kCGLPFAAllowOfflineRenderers,
+          kCGLPFANoRecovery,
+          kCGLPFAAccelerated,
+          kCGLPFADoubleBuffer,
+          (CGLPixelFormatAttribute)0
+        };
+
+        CGLError err;
+        CGLPixelFormatObj pix;
+        GLint npix;
+        err = CGLChoosePixelFormat (attribs, &pix, &npix);
+        if(err != kCGLNoError)
+          {
+            log_error("Failed to choose pixel format\n");
+            return -1;
+          }
+        err = CGLCreateContext(pix, NULL, &mCGLContext);
+        if(err != kCGLNoError)
+          {
+            log_error("Failed to create GL context\n");
+            return -1;
+          }
+        CGLSetCurrentContext(mCGLContext);
+      }
+
+            return 0;
+        }
+
+        virtual cl_context CreateCLContext( void )
+    {
+      int error;
+
+      if( mCGLContext == NULL )
+        mCGLContext = CGLGetCurrentContext();
+
+      CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
+      cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
+      cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
+      if (error) {
+        print_error(error, "clCreateContext failed");
+        return NULL;
+      }
+
+      // Verify that all devices in the context support the required extension
+      cl_device_id devices[64];
+      size_t size_out;
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
+      if (error) {
+        print_error(error, "clGetContextInfo failed");
+        return NULL;
+      }
+
+      char extensions[8192];
+      for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        if (error) {
+          print_error(error, "clGetDeviceInfo failed");
+          return NULL;
+        }
+
+        if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
+          log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
+          return NULL;
+        }
+      }
+      return context;
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+      int found_valid_device = 0;
+      cl_device_id devices[64];
+      cl_uint num_of_devices;
+      int error;
+      error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
+      if (error) {
+        print_error(error, "clGetDeviceIDs failed");
+        return -1;
+      }
+
+      char extensions[8192];
+      for (int i=0; i<(int)num_of_devices; i++) {
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        if (error) {
+          print_error(error, "clGetDeviceInfo failed");
+          return -1;
+        }
+
+        if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
+          log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
+        } else {
+          log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
+          found_valid_device = 1;
+        }
+      }
+            return found_valid_device;
+    }
+
+        virtual ~OSXGLEnvironment()
+        {
+            CGLDestroyContext( mCGLContext );
+        }
+
+        CGLContextObj mCGLContext;
+
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static OSXGLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new OSXGLEnvironment();
+    return env;
+}
diff --git a/test_conformance/compatibility/test_common/gl/setup_win32.cpp b/test_conformance/compatibility/test_common/gl/setup_win32.cpp
new file mode 100644
index 00000000..ff40dc25
--- /dev/null
+++ b/test_conformance/compatibility/test_common/gl/setup_win32.cpp
@@ -0,0 +1,204 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define GL_GLEXT_PROTOTYPES
+
+#include "setup.h"
+#include "testBase.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#include <GL/gl.h>
+#include <GL/glut.h>
+#include <GL/glext.h>
+#include <GL/glut.h>
+#include <CL/cl_ext.h>
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
+    const cl_context_properties *properties,
+    cl_gl_context_info param_name,
+    size_t param_value_size,
+    void *param_value,
+    size_t *param_value_size_ret);
+
+// Rename references to this dynamically linked function to avoid
+// collision with static link version
+#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
+static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
+
+#define MAX_DEVICES 32
+
+class WGLEnvironment : public GLEnvironment
+{
+private:
+    cl_device_id m_devices[MAX_DEVICES];
+    int m_device_count;
+    cl_platform_id m_platform;
+
+public:
+    WGLEnvironment()
+    {
+        m_device_count = 0;
+        m_platform = 0;
+
+    }
+    virtual int Init( int *argc, char **argv, int use_opengl_32 )
+    {
+         // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+        glewInit();
+        return 0;
+    }
+
+    virtual cl_context CreateCLContext( void )
+    {
+        HGLRC hGLRC = wglGetCurrentContext();
+        HDC hDC = wglGetCurrentDC();
+        cl_context_properties properties[] = {
+            CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
+            CL_GL_CONTEXT_KHR,   (cl_context_properties) hGLRC,
+            CL_WGL_HDC_KHR,      (cl_context_properties) hDC,
+            0
+        };
+        cl_device_id devices[MAX_DEVICES];
+        size_t dev_size;
+        cl_int status;
+
+        if (!hGLRC || !hDC) {
+            print_error(CL_INVALID_CONTEXT, "No GL context bound");
+            return 0;
+        }
+
+        if (!clGetGLContextInfoKHR) {
+            // As OpenCL for the platforms.  Warn if more than one platform found,
+            // since this might not be the platform we want.  By default, we simply
+            // use the first returned platform.
+
+            cl_uint nplatforms;
+            cl_platform_id platform;
+            clGetPlatformIDs(0, NULL, &nplatforms);
+            clGetPlatformIDs(1, &platform, NULL);
+
+            if (nplatforms > 1) {
+                log_info("clGetPlatformIDs returned multiple values.  This is not "
+                    "an error, but might result in obtaining incorrect function "
+                    "pointers if you do not want the first returned platform.\n");
+
+                // Show them the platform name, in case it is a problem.
+
+                size_t size;
+                char *name;
+
+                clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
+                name = (char*)malloc(size);
+                clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
+
+                log_info("Using platform with name: %s \n", name);
+                free(name);
+            }
+
+            clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
+            if (!clGetGLContextInfoKHR) {
+                print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
+            }
+        }
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_DEVICES_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return 0;
+        }
+        dev_size /= sizeof(cl_device_id);
+        log_info("GL context supports %d compute devices\n", dev_size);
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return 0;
+        }
+
+        cl_device_id ctxDevice = m_devices[0];
+        if (dev_size > 0) {
+            log_info("GL context current device: 0x%x\n", devices[0]);
+            for (int i = 0; i < m_device_count; i++) {
+                if (m_devices[i] == devices[0]) {
+                    ctxDevice = devices[0];
+                    break;
+                }
+            }
+        } else {
+            log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
+        }
+
+        return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+        cl_device_id devices[MAX_DEVICES];
+        cl_uint num_of_devices;
+        int error;
+        error = clGetPlatformIDs(1, &m_platform, NULL);
+        if (error) {
+            print_error(error, "clGetPlatformIDs failed");
+            return -1;
+        }
+        error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
+        if (error) {
+            print_error(error, "clGetDeviceIDs failed");
+            return -1;
+        }
+
+        // Check all devices, search for one that supports cl_khr_gl_sharing
+        char extensions[8192];
+        for (int i=0; i<(int)num_of_devices; i++) {
+            error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+            if (error) {
+                print_error(error, "clGetDeviceInfo failed");
+                return -1;
+            }
+
+            if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
+                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            } else {
+                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+                m_devices[m_device_count++] = devices[i];
+            }
+        }
+        return m_device_count > 0;
+    }
+
+    virtual ~WGLEnvironment()
+    {
+    }
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static WGLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new WGLEnvironment();
+    return env;
+}
diff --git a/test_conformance/compatibility/test_common/gl/setup_x11.cpp b/test_conformance/compatibility/test_common/gl/setup_x11.cpp
new file mode 100644
index 00000000..22bc7eed
--- /dev/null
+++ b/test_conformance/compatibility/test_common/gl/setup_x11.cpp
@@ -0,0 +1,122 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define GL_GLEXT_PROTOTYPES
+
+#include "setup.h"
+#include "testBase.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#include <GL/gl.h>
+#include <GL/glut.h>
+#include <GL/glext.h>
+#include <GL/freeglut.h>
+#include <GL/glx.h>
+#include <CL/cl_ext.h>
+
+class X11GLEnvironment : public GLEnvironment
+{
+private:
+    cl_device_id m_devices[64];
+    cl_uint m_device_count;
+
+public:
+    X11GLEnvironment()
+    {
+        m_device_count = 0;
+    }
+    virtual int Init( int *argc, char **argv, int use_opencl_32 )
+    {
+         // Create a GLUT window to render into
+        glutInit( argc, argv );
+        glutInitWindowSize( 512, 512 );
+        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
+        glutCreateWindow( "OpenCL <-> OpenGL Test" );
+        glewInit();
+        return 0;
+    }
+
+    virtual cl_context CreateCLContext( void )
+    {
+        GLXContext context = glXGetCurrentContext();
+        Display *dpy = glXGetCurrentDisplay();
+
+        cl_context_properties properties[] = {
+            CL_GL_CONTEXT_KHR,  (cl_context_properties) context,
+            CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
+            0
+        };
+        cl_int status;
+
+        if (!context || !dpy) {
+            print_error(CL_INVALID_CONTEXT, "No GL context bound");
+            return 0;
+        }
+
+        return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+        int found_valid_device = 0;
+        cl_platform_id platform;
+        cl_device_id devices[64];
+        cl_uint num_of_devices;
+        int error;
+        error = clGetPlatformIDs(1, &platform, NULL);
+        if (error) {
+            print_error(error, "clGetPlatformIDs failed");
+            return -1;
+        }
+        error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
+        // If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
+        if (error == CL_DEVICE_NOT_FOUND)
+          return 0;
+        if (error) {
+            print_error(error, "clGetDeviceIDs failed");
+            return -1;
+        }
+
+        char extensions[8192];
+        for (int i=0; i<(int)num_of_devices; i++) {
+            error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+            if (error) {
+                print_error(error, "clGetDeviceInfo failed");
+                return -1;
+            }
+
+            if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
+                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            } else {
+                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+                found_valid_device = 1;
+                m_devices[m_device_count++] = devices[i];
+            }
+        }
+        return found_valid_device;
+    }
+
+    virtual ~X11GLEnvironment()
+    {
+    }
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    static X11GLEnvironment * env = NULL;
+    if( env == NULL )
+        env = new X11GLEnvironment();
+    return env;
+}
diff --git a/test_conformance/compatibility/test_common/harness/Jamfile b/test_conformance/compatibility/test_common/harness/Jamfile
new file mode 100644
index 00000000..21ebcc24
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements <include>.
+      <toolset>gcc:<cflags>"-xc++"
+      <toolset>msvc:<cflags>"/TP"
+      <warnings-as-errors>off
+    : usage-requirements <include>.
+    ;
+
+local harness.objs ;
+for source in [ glob *.c *.cpp ]
+{
+    harness.objs += [ obj $(source:B).obj : $(source) ] ;
+}
+
+alias harness : $(harness.objs)
+    : <use>/Runtime//OpenCL.lib : 
+    : <library>/Runtime//OpenCL.lib
+    ;
diff --git a/test_conformance/compatibility/test_common/harness/Makefile b/test_conformance/compatibility/test_common/harness/Makefile
new file mode 100644
index 00000000..2ac60643
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/Makefile
@@ -0,0 +1,41 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = conversions.c \
+	errorHelpers.c \
+	genericThread.cpp \
+	imageHelpers.cpp \
+	kernelHelpers.c \
+	mt19937.c \
+	rounding_mode.c \
+	testHarness.c \
+	testHarness.cpp \
+	ThreadPool.c \
+	threadTesting.c \
+	typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+all: $(OBJECTS)
+
+clean:
+	rm -f $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/compatibility/test_common/harness/ThreadPool.c b/test_conformance/compatibility/test_common/harness/ThreadPool.c
new file mode 100644
index 00000000..a4598f2e
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/ThreadPool.c
@@ -0,0 +1,899 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "ThreadPool.h"
+#include "errorHelpers.h"
+#include "fpcontrol.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if  defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 )  // or any other POSIX system
+
+#if defined( _WIN32 )
+#include <windows.h>
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+#include "mingw_compat.h"
+#include <process.h>
+#else // !_WIN32
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/errno.h>
+#endif // !_WIN32
+
+// declarations
+#ifdef  _WIN32
+void ThreadPool_WorkerFunc( void *p );
+#else
+void *ThreadPool_WorkerFunc( void *p );
+#endif
+void ThreadPool_Init(void);
+void ThreadPool_Exit(void);
+
+#if defined (__MINGW32__)
+    // Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
+    CRITICAL_SECTION     gAtomicLock;
+#elif defined( __GNUC__ ) || defined( _MSC_VER)
+#else
+    pthread_mutex_t     gAtomicLock;
+#endif
+
+// Atomic add operator with mem barrier.  Mem barrier needed to protect state modified by the worker functions.
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+#if defined (__MINGW32__)
+    // No atomics on Mingw32
+    EnterCriticalSection(&gAtomicLock);
+    cl_int old = *a;
+    *a = old + b;
+    LeaveCriticalSection(&gAtomicLock);
+    return old;
+#elif defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    return __sync_fetch_and_add( a, b );
+    // do we need __sync_synchronize() here, too?  GCC docs are unclear whether __sync_fetch_and_add does a synchronize
+#elif defined( _MSC_VER )
+    return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
+#else
+    #warning  Please add a atomic add implementation here, with memory barrier.  Fallback code is slow.
+    if( pthread_mutex_lock(&gAtomicLock) )
+        log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+    cl_int old = *a;
+    *a = old + b;
+    if( pthread_mutex_unlock(&gAtomicLock) )
+        log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
+    return old;
+#endif
+}
+
+#if defined( _WIN32 )
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
+
+#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
+#define _INIT_ONCE           INIT_ONCE
+#define _PINIT_ONCE          PINIT_ONCE
+#define _InitOnceExecuteOnce InitOnceExecuteOnce
+#else // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+typedef volatile LONG _INIT_ONCE;
+typedef _INIT_ONCE *_PINIT_ONCE;
+typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
+
+#define _INIT_ONCE_UNINITIALIZED 0
+#define _INIT_ONCE_IN_PROGRESS   1
+#define _INIT_ONCE_DONE          2
+
+static BOOL _InitOnceExecuteOnce(
+  _PINIT_ONCE InitOnce,
+  _PINIT_ONCE_FN InitFn,
+  PVOID Parameter,
+  LPVOID *Context
+)
+{
+    while ( *InitOnce != _INIT_ONCE_DONE )
+    {
+        if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
+        {
+            InitFn( InitOnce, Parameter, Context );
+            *InitOnce = _INIT_ONCE_DONE;
+            return TRUE;
+        }
+        Sleep( 1 );
+    }
+    return TRUE;
+}
+#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
+
+// Uncomment the following line if Windows XP support is not required.
+// #define HAS_CONDITION_VARIABLE 1
+
+#if defined(HAS_CONDITION_VARIABLE)
+#define _CONDITION_VARIABLE          CONDITION_VARIABLE
+#define _InitializeConditionVariable InitializeConditionVariable
+#define _SleepConditionVariableCS    SleepConditionVariableCS
+#define _WakeAllConditionVariable    WakeAllConditionVariable
+#else // !HAS_CONDITION_VARIABLE
+typedef struct
+{
+    HANDLE           mEvent; // Used to park the thread.
+    CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
+    volatile cl_int  mWaiters; // Number of threads waiting on this cond var.
+    volatile cl_int  mGeneration; // Wait generation count.
+    volatile cl_int  mReleaseCount; // Number of releases to execute before reseting the event.
+} _CONDITION_VARIABLE;
+
+typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
+
+static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
+    InitializeCriticalSection( cond_var->mLock );
+    cond_var->mWaiters = 0;
+    cond_var->mGeneration = 0;
+#if !defined ( NDEBUG )
+    cond_var->mReleaseCount = 0;
+#endif // !NDEBUG
+}
+
+static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
+{
+    EnterCriticalSection( cond_var->mLock );
+    cl_int generation = cond_var->mGeneration;
+    ++cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+    LeaveCriticalSection( cond_lock );
+
+    while ( TRUE )
+    {
+        WaitForSingleObject( cond_var->mEvent, INFINITE );
+        EnterCriticalSection( cond_var->mLock );
+        BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
+        LeaveCriticalSection( cond_var->mLock );
+        if ( done )
+        {
+            break;
+        }
+    }
+
+    EnterCriticalSection( cond_lock );
+    EnterCriticalSection( cond_var->mLock );
+    if ( --cond_var->mReleaseCount == 0 )
+    {
+        ResetEvent( cond_var->mEvent );
+    }
+    --cond_var->mWaiters;
+    LeaveCriticalSection( cond_var->mLock );
+}
+
+static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
+{
+    EnterCriticalSection( cond_var->mLock );
+    if (cond_var->mWaiters > 0 )
+    {
+        ++cond_var->mGeneration;
+        cond_var->mReleaseCount = cond_var->mWaiters;
+        SetEvent( cond_var->mEvent );
+    }
+    LeaveCriticalSection( cond_var->mLock );
+}
+#endif // !HAS_CONDITION_VARIABLE
+#endif // _WIN32
+
+#define MAX_COUNT   (1<<29)
+
+// Global state to coordinate whether the threads have been launched successfully or not
+#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
+static _INIT_ONCE threadpool_init_control;
+#elif defined (_WIN32)  // MingW of XP
+static int threadpool_init_control;
+#else // Posix platforms
+pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
+#endif
+cl_int threadPoolInitErr = -1;          // set to CL_SUCCESS on successful thread launch
+
+// critical region lock around ThreadPool_Do.  We can only run one ThreadPool_Do at a time,
+// because we are too lazy to set up a queue here, and don't expect to need one.
+#if defined( _WIN32 )
+CRITICAL_SECTION    gThreadPoolLock[1];
+#else // !_WIN32
+pthread_mutex_t     gThreadPoolLock;
+#endif // !_WIN32
+
+// Condition variable to park ThreadPool threads when not working
+#if defined( _WIN32 )
+CRITICAL_SECTION    cond_lock[1];
+_CONDITION_VARIABLE cond_var[1];
+#else // !_WIN32
+pthread_mutex_t     cond_lock;
+pthread_cond_t      cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunCount = 0;              // Condition variable state. How many iterations on the function left to run.
+                                                // set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
+
+// State that only changes when the threadpool is not working.
+volatile TPFuncPtr  gFunc_ptr = NULL;
+volatile void       *gUserInfo = NULL;
+volatile cl_int     gJobCount = 0;
+
+// State that may change while the thread pool is working
+volatile cl_int     jobError = CL_SUCCESS;      // err code return for the job as a whole
+
+// Condition variable to park caller while waiting
+#if defined( _WIN32 )
+HANDLE              caller_event;
+#else // !_WIN32
+pthread_mutex_t     caller_cond_lock;
+pthread_cond_t      caller_cond_var;
+#endif // !_WIN32
+volatile cl_int     gRunning = 0;       // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
+
+// The total number of threads launched.
+volatile cl_int     gThreadCount = 0;
+#ifdef _WIN32
+void ThreadPool_WorkerFunc( void *p )
+#else
+void *ThreadPool_WorkerFunc( void *p )
+#endif
+{
+    cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
+    cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+//    log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+
+    while( MAX_COUNT > item )
+    {
+        cl_int err;
+
+        // check for more work to do
+        if( 0 >= item )
+        {
+//            log_info( "Thread %d has run out of work.\n", threadID );
+
+            // No work to do. Attempt to block waiting for work
+#if defined( _WIN32 )
+            EnterCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_lock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
+                goto exit;
+            }
+#endif // !_WIN32
+
+            cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
+//            log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
+            if( 1 == remaining )
+            { // last thread out signal the main thread to wake up
+#if defined( _WIN32 )
+                SetEvent( caller_event );
+#else // !_WIN32
+                if((err = pthread_mutex_lock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+                if( (err = pthread_cond_broadcast( &caller_cond_var )))
+                {
+                    log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
+                    goto exit;
+                }
+                if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+                {
+                    log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
+                    goto exit;
+                }
+#endif // !_WIN32
+            }
+
+            // loop in case we are woken only to discover that some other thread already did all the work
+            while( 0 >= item )
+            {
+#if defined( _WIN32 )
+                _SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
+#else // !_WIN32
+                if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
+                {
+                    log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                    pthread_mutex_unlock( &cond_lock);
+                    goto exit;
+                }
+#endif // !_WIN32
+
+                // try again to get a valid item id
+                item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+                if( MAX_COUNT <= item )  // exit if we are done
+                {
+#if defined( _WIN32 )
+                    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+                    pthread_mutex_unlock( &cond_lock);
+#endif // !_WIN32
+                    goto exit;
+                }
+            }
+
+            ThreadPool_AtomicAdd( &gRunning, 1 );
+//            log_info( "Thread %d has found work.\n", threadID);
+
+#if defined( _WIN32 )
+            LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+            if((err = pthread_mutex_unlock( &cond_lock) ))
+            {
+                log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
+                goto exit;
+            }
+#endif // !_WIN32
+
+        }
+
+        // we have a valid item, so do the work
+        if( CL_SUCCESS == jobError )  // but only if we haven't already encountered an error
+        {
+//            log_info( "Thread %d doing job %d\n", threadID, item - 1);
+
+#if defined(__APPLE__) && defined(__arm__)
+            // On most platforms which support denorm, default is FTZ off. However,
+            // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+            // This creates issues in result verification. Since spec allows the implementation to either flush or
+            // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+            // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+            // where reference is being computed to make sure we get non-flushed reference result. If implementation
+            // returns flushed result, we correctly take care of that in verification code.
+            FPU_mode_type oldMode;
+            DisableFTZ( &oldMode );
+#endif
+
+            // Call the user's function with this item ID
+            err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
+#if defined(__APPLE__) && defined(__arm__)
+            // Restore FP state
+            RestoreFPState( &oldMode );
+#endif
+
+            if( err )
+            {
+#if (__MINGW32__)
+                EnterCriticalSection(&gAtomicLock);
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                LeaveCriticalSection(&gAtomicLock);
+#elif defined( __GNUC__ )
+                // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+                // set the new error if we are the first one there.
+                __sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
+
+                // drop run count to 0
+                gRunCount = 0;
+                __sync_synchronize();
+#elif defined( _MSC_VER )
+                // set the new error if we are the first one there.
+                _InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
+
+                // drop run count to 0
+                gRunCount = 0;
+                _mm_mfence();
+#else
+                if( pthread_mutex_lock(&gAtomicLock) )
+                    log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
+                if( jobError == CL_SUCCESS );
+                    jobError = err;
+                gRunCount = 0;
+                if( pthread_mutex_unlock(&gAtomicLock) )
+                    log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
+#endif
+            }
+        }
+
+        // get the next item
+        item = ThreadPool_AtomicAdd( &gRunCount, -1 );
+    }
+
+exit:
+    log_info( "ThreadPool: thread %d exiting.\n", threadID );
+    ThreadPool_AtomicAdd( &gThreadCount, -1 );
+#if !defined(_WIN32)
+    return NULL;
+#endif
+}
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined.
+void        SetThreadCount( int count )
+{
+    if( threadPoolInitErr == CL_SUCCESS )
+    {
+        log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
+        abort();
+    }
+
+    gThreadCount = count;
+}
+
+void ThreadPool_Init(void)
+{
+    cl_int i;
+    int err;
+    volatile cl_uint threadID = 0;
+
+    // Check for manual override of multithreading code. We add this for better debuggability.
+    if( getenv( "CL_TEST_SINGLE_THREADED" ) )
+    {
+        log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
+        gThreadCount = 1;
+        return;
+    }
+
+    // Figure out how many threads to run -- check first for non-zero to give the implementation the chance
+    if( 0 == gThreadCount )
+    {
+#if defined(_MSC_VER) || defined (__MINGW64__)
+        PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
+        DWORD length = 0;
+
+        GetLogicalProcessorInformation( NULL, &length );
+        buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
+        if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
+        {
+            PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
+            while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
+            {
+                if( ptr->Relationship == RelationProcessorCore )
+                {
+                    // Count the number of bits in ProcessorMask (number of logical cores)
+                    ULONG mask = ptr->ProcessorMask;
+                    while( mask )
+                    {
+                        ++gThreadCount;
+                        mask &= mask - 1; // Remove 1 bit at a time
+                    }
+                }
+                ++ptr;
+            }
+            free(buffer);
+        }
+#elif defined (__MINGW32__)
+        {
+            #warning  How about this, instead of hard coding it to 2?
+            SYSTEM_INFO sysinfo;
+            GetSystemInfo( &sysinfo );
+            gThreadCount = sysinfo.dwNumberOfProcessors;
+        }
+#else // !_WIN32
+        gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF);       // Hopefully your system returns logical cpus here, as does MacOS X
+#endif // !_WIN32
+
+        // Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
+        if( 1 == gThreadCount )
+            gThreadCount = 2;
+    }
+
+    //Allow the app to set thread count to <0 for debugging purposes.  This will cause the test to run single threaded.
+    if( gThreadCount < 2 )
+    {
+        log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
+        gThreadCount = 1;
+        return;
+    }
+
+#if defined( _WIN32 )
+    InitializeCriticalSection( gThreadPoolLock );
+    InitializeCriticalSection( cond_lock );
+    _InitializeConditionVariable( cond_var );
+    caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
+#elif defined (__GNUC__)
+    // Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
+    // with some flavors of gcc compilers.
+    pthread_cond_init(&cond_var, NULL);
+    pthread_mutex_init(&cond_lock ,NULL);
+    pthread_cond_init(&caller_cond_var, NULL);
+    pthread_mutex_init(&caller_cond_lock, NULL);
+    pthread_mutex_init(&gThreadPoolLock, NULL);
+#endif
+
+#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
+    pthread_mutex_initialize(gAtomicLock);
+#elif defined (__MINGW32__)
+    InitializeCriticalSection(&gAtomicLock);
+#endif
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        gThreadCount = 1;
+        return;
+    }
+#endif // !_WIN32
+
+    gRunning = gThreadCount;
+    // init threads
+    for( i = 0; i < gThreadCount; i++ )
+    {
+#if defined( _WIN32 )
+        uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
+        err = ( handle == 0 );
+#else // !_WIN32
+        pthread_t tid = 0;
+        err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
+#endif // !_WIN32
+        if( err )
+        {
+            log_error( "Error %d launching thread %d\n", err, i );
+            threadPoolInitErr = err;
+            gThreadCount = i;
+            break;
+        }
+    }
+
+    atexit( ThreadPool_Exit );
+
+// block until they are done launching.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            return;
+        }
+#endif // !_WIN32
+    }
+    while( gRunCount != -gThreadCount );
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
+        return;
+    }
+#endif // !_WIN32
+
+    threadPoolInitErr = CL_SUCCESS;
+}
+
+#if defined(_MSC_VER)
+static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
+{
+    ThreadPool_Init();
+    return TRUE;
+}
+#endif
+
+void ThreadPool_Exit(void)
+{
+    int err, count;
+    gRunCount = CL_INT_MAX;
+
+#if defined( __GNUC__ )
+    // GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
+    __sync_synchronize();
+#elif defined( _MSC_VER )
+    _mm_mfence();
+#else
+    #warning   If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
+#endif
+
+    // spin waiting for threads to die
+    for (count = 0; 0 != gThreadCount && count < 1000; count++)
+    {
+#if defined( _WIN32 )
+        _WakeAllConditionVariable( cond_var );
+        Sleep(1);
+#else // !_WIN32
+        if( (err = pthread_cond_broadcast( &cond_var )))
+        {
+            log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
+            break;
+        }
+        usleep(1000);
+#endif // !_WIN32
+    }
+
+    if( gThreadCount )
+        log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
+    else
+        log_info( "Thread pool exited in a orderly fashion.\n" );
+}
+
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+//
+// This function obviously has its shortcommings. Only one call to ThreadPool_Do
+// can be running at a time. It is not intended for general purpose use.
+// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
+// all available then it would make more sense to use those features.
+cl_int ThreadPool_Do( TPFuncPtr func_ptr,
+                      cl_uint count,
+                      void *userInfo )
+{
+    cl_int newErr;
+    cl_int err = 0;
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else //posix platform
+    err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif
+    // Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
+    if( threadPoolInitErr )
+    {
+        cl_uint currentJob = 0;
+        cl_int  result = CL_SUCCESS;
+
+#if defined(__APPLE__) && defined(__arm__)
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+        for( currentJob = 0; currentJob < count; currentJob++ )
+            if((result = func_ptr( currentJob, 0, userInfo )))
+            {
+#if defined(__APPLE__) && defined(__arm__)
+                // Restore FP state before leaving
+                RestoreFPState( &oldMode );
+#endif
+                return result;
+            }
+
+#if defined(__APPLE__) && defined(__arm__)
+        // Restore FP state before leaving
+        RestoreFPState( &oldMode );
+#endif
+
+        return CL_SUCCESS;
+    }
+
+    if( count >= MAX_COUNT )
+    {
+        log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
+        return -1;
+    }
+
+    // Enter critical region
+#if defined( _WIN32 )
+    EnterCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    if( (err = pthread_mutex_lock( &gThreadPoolLock )))
+    {
+        switch (err)
+        {
+            case EDEADLK:
+                log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
+                break;
+            case EINVAL:
+                log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
+                break;
+            default:
+                break;
+        }
+        return err;
+    }
+#endif // !_WIN32
+
+    // Start modifying the job state observable by worker threads
+#if defined( _WIN32 )
+    EnterCriticalSection( cond_lock );
+#else // !_WIN32
+    if((err = pthread_mutex_lock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
+    //  That would cause a deadlock.
+#if !defined( _WIN32 )
+    if((err = pthread_mutex_lock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    // Prime the worker threads to get going
+    jobError = CL_SUCCESS;
+    gRunCount = gJobCount = count;
+    gFunc_ptr = func_ptr;
+    gUserInfo = userInfo;
+
+#if defined( _WIN32 )
+    _WakeAllConditionVariable( cond_var );
+    LeaveCriticalSection( cond_lock );
+#else // !_WIN32
+    if( (err = pthread_cond_broadcast( &cond_var )))
+    {
+        log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+    if((err = pthread_mutex_unlock( &cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+// block until they are done.  It would be slightly more efficient to do some of the work here though.
+    do
+    {
+#if defined( _WIN32 )
+        WaitForSingleObject( caller_event, INFINITE );
+#else // !_WIN32
+        if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
+        {
+            log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+            pthread_mutex_unlock( &caller_cond_lock);
+            goto exit;
+        }
+#endif // !_WIN32
+    }
+    while( gRunning );
+#if !defined(_WIN32)
+    if((err = pthread_mutex_unlock( &caller_cond_lock) ))
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
+        goto exit;
+    }
+#endif // !_WIN32
+
+    err = jobError;
+
+exit:
+    // exit critical region
+#if defined( _WIN32 )
+    LeaveCriticalSection( gThreadPoolLock );
+#else // !_WIN32
+    newErr = pthread_mutex_unlock( &gThreadPoolLock );
+    if( newErr)
+    {
+        log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
+        return err;
+    }
+#endif // !_WIN32
+
+    return err;
+}
+
+cl_uint GetThreadCount( void )
+{
+    // Lazily set up our threads
+#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
+    cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
+#elif defined (_WIN32)
+    if (threadpool_init_control == 0) {
+    #warning  This is buggy and race prone.  Find a better way.
+        ThreadPool_Init();
+        threadpool_init_control = 1;
+    }
+#else
+    cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
+    if( err )
+    {
+        log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
+        return err;
+    }
+#endif // !_WIN32
+
+    if( gThreadCount < 1 )
+        return 1;
+
+    return gThreadCount;
+}
+
+#else
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    #error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
+#endif
+//
+// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
+// of OpenCL API, while also checking
+//
+// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
+// It is not okay to use this for conformance testing!!!
+//
+// Exception:  If your operating system does not support multithreaded execution of any kind, then you may use this code.
+//
+
+cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
+{
+    cl_uint r = *a;
+
+    // since this fallback code path is not multithreaded, we just do a regular add here
+    // If your operating system supports memory-barrier-atomics, use those here
+    *a = r + b;
+
+    return r;
+}
+
+// Blocking API that farms out count jobs to a thread pool.
+// It may return with some work undone if func_ptr() returns a non-zero
+// result.
+cl_int ThreadPool_Do(   TPFuncPtr func_ptr,
+                        cl_uint count,
+                        void *userInfo )
+{
+    cl_uint currentJob = 0;
+    cl_int  result = CL_SUCCESS;
+
+#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
+    // THIS FUNCTION IS NOT INTENDED FOR USE!!
+    log_error( "ERROR:  Test must be multithreaded!\n" );
+    exit(-1);
+#else
+    static int spewCount = 0;
+
+    if( 0 == spewCount )
+    {
+        log_info( "\nWARNING:  The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
+        spewCount = 1;
+    }
+#endif
+
+// The multithreaded code should mimic this behavior:
+    for( currentJob = 0; currentJob < count; currentJob++ )
+        if((result = func_ptr( currentJob, 0, userInfo )))
+            return result;
+
+    return CL_SUCCESS;
+}
+
+cl_uint GetThreadCount( void )
+{
+    return 1;
+}
+
+void SetThreadCount( int count )
+{
+    if( count > 1 )
+        log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
+}
+
+#endif
diff --git a/test_conformance/compatibility/test_common/harness/ThreadPool.h b/test_conformance/compatibility/test_common/harness/ThreadPool.h
new file mode 100644
index 00000000..7c392306
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/ThreadPool.h
@@ -0,0 +1,76 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef THREAD_POOL_H
+#define THREAD_POOL_H
+
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/cl.h>
+#endif
+
+#if defined(__cplusplus)
+    extern "C" {
+#endif
+
+//
+// An atomic add operator
+cl_int     ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b );    // returns old value
+
+// Your function prototype
+//
+// A function pointer to the function you want to execute in a multithreaded context.  No
+// synchronization primitives are provided, other than the atomic add above. You may not
+// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
+// work, however.
+//
+// job ids and thread ids are 0 based.  If number of jobs or threads was 8, they will numbered be 0 through 7.
+// Note that while every job will be run, it is not guaranteed that every thread will wake up before
+// the work is done.
+typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
+
+// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
+// Some workitems may not run if a non-zero result is returned from func_ptr().
+// This function may not be called from a TPFuncPtr.
+cl_int      ThreadPool_Do(  TPFuncPtr func_ptr,
+                            cl_uint count,
+                            void *userInfo );
+
+// Returns the number of worker threads that underlie the threadpool.  The value passed
+// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
+// This is safe to call from a TPFuncPtr.
+cl_uint     GetThreadCount( void );
+
+// SetThreadCount() may be used to artifically set the number of worker threads
+// If the value is 0 (the default) the number of threads will be determined based on
+// the number of CPU cores.  If it is a unicore machine, then 2 will be used, so
+// that we still get some testing for thread safety.
+//
+// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
+// code will run single threaded, but will report an error to indicate that the test
+// is invalid.  This option is intended for debugging purposes only. It is suggested
+// as a convention that test apps set the thread count to 1 in response to the -m flag.
+//
+// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
+// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
+void        SetThreadCount( int count );
+
+#ifdef __cplusplus
+    }   /* extern "C" */
+#endif
+
+
+#endif  /* THREAD_POOL_H  */
diff --git a/test_conformance/compatibility/test_common/harness/clImageHelper.h b/test_conformance/compatibility/test_common/harness/clImageHelper.h
new file mode 100644
index 00000000..8537ddcd
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/clImageHelper.h
@@ -0,0 +1,253 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_clImageHelper_h
+#define test_conformance_clImageHelper_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <stdio.h>
+#include "errorHelpers.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+  // helper function to replace clCreateImage2D , to make the existing code use
+  // the functions of version 1.2 and veriosn 1.1  respectively
+
+  inline cl_mem create_image_2d  (cl_context context,
+                           cl_mem_flags flags,
+                           const cl_image_format *image_format,
+                           size_t image_width,
+                           size_t image_height,
+                           size_t image_row_pitch,
+                           void *host_ptr,
+                           cl_int *errcode_ret)
+  {
+    cl_mem mImage = NULL;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc_dest;
+    image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
+    image_desc_dest.image_width = image_width;
+    image_desc_dest.image_height = image_height;
+    image_desc_dest.image_depth= 0;// not usedfor 2d
+    image_desc_dest.image_array_size = 0;// not used for 2d
+    image_desc_dest.image_row_pitch = image_row_pitch;
+    image_desc_dest.image_slice_pitch = 0;
+    image_desc_dest.num_mip_levels = 0;
+    image_desc_dest.num_samples = 0;
+    image_desc_dest.buffer = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+  inline cl_mem create_image_3d (cl_context context,
+                          cl_mem_flags flags,
+                          const cl_image_format *image_format,
+                          size_t image_width,
+                          size_t image_height,
+                          size_t image_depth,
+                          size_t image_row_pitch,
+                          size_t image_slice_pitch,
+                          void *host_ptr,
+                          cl_int *errcode_ret)
+  {
+    cl_mem mImage;
+
+#ifdef CL_VERSION_1_2
+    cl_image_desc image_desc;
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+    image_desc.image_width = image_width;
+    image_desc.image_height = image_height;
+    image_desc.image_depth = image_depth;
+    image_desc.image_array_size = 0;// not used for one image
+    image_desc.image_row_pitch = image_row_pitch;
+    image_desc.image_slice_pitch = image_slice_pitch;
+    image_desc.num_mip_levels = 0;
+    image_desc.num_samples = 0;
+    image_desc.buffer = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
+    mImage = clCreateImage( context,
+                           flags,
+                           image_format,
+                           &image_desc,
+                           host_ptr,
+                           errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage failed (%d)\n", *errcode_ret);
+    }
+
+#else
+    mImage = clCreateImage3D( context,
+                             flags, image_format,
+                             image_width,
+                             image_height,
+                             image_depth,
+                             image_row_pitch,
+                             image_slice_pitch,
+                             host_ptr,
+                             errcode_ret );
+    if (errcode_ret && (*errcode_ret)) {
+      // Log an info message and rely on the calling function to produce an error
+      // if necessary.
+      log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
+    }
+#endif
+
+    return mImage;
+  }
+
+    inline cl_mem create_image_2d_array (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_height,
+                                   size_t image_array_size,
+                                   size_t image_row_pitch,
+                                   size_t image_slice_pitch,
+                                   void *host_ptr,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = image_height;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.buffer = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    inline cl_mem create_image_1d_array (cl_context context,
+                                         cl_mem_flags flags,
+                                         const cl_image_format *image_format,
+                                         size_t image_width,
+                                         size_t image_array_size,
+                                         size_t image_row_pitch,
+                                         size_t image_slice_pitch,
+                                         void *host_ptr,
+                                         cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_array_size = image_array_size;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = image_slice_pitch;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.buffer = NULL;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+    inline cl_mem create_image_1d (cl_context context,
+                                   cl_mem_flags flags,
+                                   const cl_image_format *image_format,
+                                   size_t image_width,
+                                   size_t image_row_pitch,
+                                   void *host_ptr,
+                                   cl_mem buffer,
+                                   cl_int *errcode_ret)
+    {
+        cl_mem mImage;
+
+        cl_image_desc image_desc;
+        image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
+        image_desc.image_width = image_width;
+        image_desc.image_height = 1;
+        image_desc.image_depth = 1;
+        image_desc.image_row_pitch = image_row_pitch;
+        image_desc.image_slice_pitch = 0;
+        image_desc.num_mip_levels = 0;
+        image_desc.num_samples = 0;
+        image_desc.buffer = buffer;
+        mImage = clCreateImage( context,
+                               flags,
+                               image_format,
+                               &image_desc,
+                               host_ptr,
+                               errcode_ret );
+        if (errcode_ret && (*errcode_ret)) {
+            // Log an info message and rely on the calling function to produce an error
+            // if necessary.
+            log_info("clCreateImage failed (%d)\n", *errcode_ret);
+        }
+
+        return mImage;
+    }
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/test_conformance/compatibility/test_common/harness/compat.h b/test_conformance/compatibility/test_common/harness/compat.h
new file mode 100644
index 00000000..22119c46
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/compat.h
@@ -0,0 +1,216 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _COMPAT_H_
+#define _COMPAT_H_
+
+#if defined(_WIN32) && defined (_MSC_VER)
+
+#include <Windows.h>
+#include <Winbase.h>
+#include <CL/cl.h>
+#include <float.h>
+#include <xmmintrin.h>
+#include <math.h>
+
+#define MAKE_HEX_FLOAT(x,y,z)  ((float)ldexp( (float)(y), z))
+#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
+#define MAKE_HEX_LONG(x,y,z)   ((long double) ldexp( (long double)(y), z))
+
+#define isfinite(x) _finite(x)
+
+#if !defined(__cplusplus)
+typedef char bool;
+#define inline
+
+#else
+extern "C" {
+#endif
+
+typedef unsigned char       uint8_t;
+typedef char                int8_t;
+typedef unsigned short      uint16_t;
+typedef short               int16_t;
+typedef unsigned int        uint32_t;
+typedef int                 int32_t;
+typedef unsigned long long  uint64_t;
+typedef long long           int64_t;
+
+#define MAXPATHLEN MAX_PATH
+
+typedef unsigned short ushort;
+typedef unsigned int   uint;
+typedef unsigned long  ulong;
+
+
+#define INFINITY    (FLT_MAX + FLT_MAX)
+//#define NAN (INFINITY | 1)
+//const static int PINFBITPATT_SP32  = INFINITY;
+
+#ifndef M_PI
+    #define M_PI    3.14159265358979323846264338327950288
+#endif
+
+
+#define    isnan( x )       ((x) != (x))
+#define     isinf( _x)      ((_x) == INFINITY || (_x) == -INFINITY)
+
+double rint( double x);
+float  rintf( float x);
+long double rintl( long double x);
+
+float cbrtf( float );
+double cbrt( double );
+
+int    ilogb( double x);
+int    ilogbf (float x);
+int    ilogbl(long double x);
+
+double fmax(double x, double y);
+double fmin(double x, double y);
+float  fmaxf( float x, float y );
+float  fminf(float x, float y);
+
+double      log2(double x);
+long double log2l(long double x);
+
+double      exp2(double x);
+long double exp2l(long double x);
+
+double      fdim(double x, double y);
+float       fdimf(float x, float y);
+long double fdiml(long double x, long double y);
+
+double      remquo( double x, double y, int *quo);
+float       remquof( float x, float y, int *quo);
+long double remquol( long double x, long double y, int *quo);
+
+long double scalblnl(long double x, long n);
+
+inline long long
+llabs(long long __x) { return __x >= 0 ? __x : -__x; }
+
+
+// end of math functions
+
+uint64_t ReadTime( void );
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+
+#define sleep(X)   Sleep(1000*X)
+// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
+#if _MSC_VER < 1900
+	#define snprintf   sprintf_s
+#endif
+//#define hypotl     _hypot
+
+float   make_nan();
+float nanf( const char* str);
+double  nan( const char* str);
+long double nanl( const char* str);
+
+//#if defined USE_BOOST
+//#include <boost/math/tr1.hpp>
+//double hypot(double x, double y);
+float hypotf(float x, float y);
+long double hypotl(long double x, long double y) ;
+double lgamma(double x);
+float  lgammaf(float x);
+
+double trunc(double x);
+float  truncf(float x);
+
+double log1p(double x);
+float  log1pf(float x);
+long double log1pl(long double x);
+
+double copysign(double x, double y);
+float  copysignf(float x, float y);
+long double copysignl(long double x, long double y);
+
+long lround(double x);
+long lroundf(float x);
+//long lroundl(long double x)
+
+double round(double x);
+float  roundf(float x);
+long double roundl(long double x);
+
+// Added in _MSC_VER == 1800 (Visual Studio 2013)
+#if _MSC_VER < 1800
+	int signbit(double x);
+#endif
+int signbitf(float x);
+
+//bool signbitl(long double x)         { return boost::math::tr1::signbit<long double>(x); }
+//#endif // USE_BOOST
+
+long int lrint (double flt);
+long int lrintf (float flt);
+
+
+float   int2float (int32_t ix);
+int32_t float2int (float   fx);
+
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+int __builtin_clz(unsigned int pattern);
+
+
+static const double zero=  0.00000000000000000000e+00;
+#define NAN  (INFINITY - INFINITY)
+#define HUGE_VALF (float)HUGE_VAL
+
+int usleep(int usec);
+
+// reimplement fenv.h because windows doesn't have it
+#define FE_INEXACT          0x0020
+#define FE_UNDERFLOW        0x0010
+#define FE_OVERFLOW         0x0008
+#define FE_DIVBYZERO        0x0004
+#define FE_INVALID          0x0001
+#define FE_ALL_EXCEPT       0x003D
+
+int fetestexcept(int excepts);
+int feclearexcept(int excepts);
+
+#ifdef __cplusplus
+}
+#endif
+
+#else // !((defined(_WIN32) && defined(_MSC_VER)
+#if defined(__MINGW32__)
+#include <windows.h>
+#define sleep(X)   Sleep(1000*X)
+
+#endif
+#if defined(__linux__) || defined(__MINGW32__) || defined(__APPLE__)
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+#include <fenv.h>
+#include <math.h>
+#include <float.h>
+#include <stdint.h>
+#endif
+#define MAKE_HEX_FLOAT(x,y,z) x
+#define MAKE_HEX_DOUBLE(x,y,z) x
+#define MAKE_HEX_LONG(x,y,z) x
+
+#endif // !((defined(_WIN32) && defined(_MSC_VER)
+
+
+#endif // _COMPAT_H_
diff --git a/test_conformance/compatibility/test_common/harness/conversions.c b/test_conformance/compatibility/test_common/harness/conversions.c
new file mode 100644
index 00000000..72fd8cb3
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/conversions.c
@@ -0,0 +1,1198 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "conversions.h"
+#include <limits.h>
+#include <time.h>
+#include <assert.h>
+#include "mt19937.h"
+#include "compat.h"
+
+#if defined( __SSE__ ) || defined (_MSC_VER)
+    #include <xmmintrin.h>
+#endif
+#if defined( __SSE2__ ) || defined (_MSC_VER)
+    #include <emmintrin.h>
+#endif
+
+void print_type_to_string(ExplicitType type, void *data, char* string) {
+     switch (type) {
+       case kBool:
+      if (*(char*)data)
+        sprintf(string, "true");
+      else
+        sprintf(string, "false");
+            return;
+    case kChar:
+      sprintf(string, "%d", (int)*((cl_char*)data));
+      return;
+    case kUChar:
+    case kUnsignedChar:
+      sprintf(string, "%u", (int)*((cl_uchar*)data));
+      return;
+    case kShort:
+      sprintf(string, "%d", (int)*((cl_short*)data));
+      return;
+    case kUShort:
+    case kUnsignedShort:
+      sprintf(string, "%u", (int)*((cl_ushort*)data));
+      return;
+    case kInt:
+      sprintf(string, "%d", *((cl_int*)data));
+      return;
+    case kUInt:
+    case kUnsignedInt:
+      sprintf(string, "%u", *((cl_uint*)data));
+      return;
+    case kLong:
+      sprintf(string, "%lld", *((cl_long*)data));
+      return;
+    case kULong:
+    case kUnsignedLong:
+      sprintf(string, "%llu", *((cl_ulong*)data));
+      return;
+    case kFloat:
+      sprintf(string, "%f", *((cl_float*)data));
+      return;
+    case kHalf:
+      sprintf(string, "half");
+      return;
+    case kDouble:
+      sprintf(string, "%g", *((cl_double*)data));
+      return;
+    default:
+      sprintf(string, "INVALID");
+      return;
+  }
+
+}
+
+size_t get_explicit_type_size( ExplicitType type )
+{
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static size_t    sExplicitTypeSizes[] = {
+            sizeof( cl_bool ),
+            sizeof( cl_char ),
+            sizeof( cl_uchar ),
+            sizeof( cl_uchar ),
+            sizeof( cl_short ),
+            sizeof( cl_ushort ),
+            sizeof( cl_ushort ),
+            sizeof( cl_int ),
+            sizeof( cl_uint ),
+            sizeof( cl_uint ),
+            sizeof( cl_long ),
+            sizeof( cl_ulong ),
+            sizeof( cl_ulong ),
+            sizeof( cl_float ),
+            sizeof( cl_half ),
+            sizeof( cl_double )
+        };
+
+    return sExplicitTypeSizes[ type ];
+}
+
+const char * get_explicit_type_name( ExplicitType type )
+{
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static const char *sExplicitTypeNames[] = { "bool", "char", "uchar", "unsigned char", "short", "ushort", "unsigned short", "int",
+                            "uint", "unsigned int", "long", "ulong", "unsigned long", "float", "half", "double" };
+
+    return sExplicitTypeNames[ type ];
+}
+
+static long lrintf_clamped( float f );
+static long lrintf_clamped( float f )
+{
+    static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+
+    if( f >= -(float) LONG_MIN )
+        return LONG_MAX;
+
+    if( f <= (float) LONG_MIN )
+        return LONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
+    {
+        volatile float x = f;
+        float magicVal = magic[ f < 0 ];
+
+#if defined( __SSE__ ) || defined (_WIN32)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128 v = _mm_set_ss( x );
+        __m128 m = _mm_set_ss( magicVal );
+        v = _mm_add_ss( v, m );
+        v = _mm_sub_ss( v, m );
+        _mm_store_ss( (float*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long) f;
+}
+
+static long lrint_clamped( double f );
+static long lrint_clamped( double f )
+{
+    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+
+    if( sizeof( long ) > 4 )
+    {
+        if( f >= -(double) LONG_MIN )
+            return LONG_MAX;
+    }
+    else
+    {
+        if( f >= LONG_MAX )
+            return LONG_MAX;
+    }
+
+    if( f <= (double) LONG_MIN )
+        return LONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
+    {
+        volatile double x = f;
+        double magicVal = magic[ f < 0 ];
+#if defined( __SSE2__ ) || (defined (_MSC_VER))
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128d v = _mm_set_sd( x );
+        __m128d m = _mm_set_sd( magicVal );
+        v = _mm_add_sd( v, m );
+        v = _mm_sub_sd( v, m );
+        _mm_store_sd( (double*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long) f;
+}
+
+
+typedef cl_long Long;
+typedef cl_ulong ULong;
+
+static ULong sUpperLimits[ kNumExplicitTypes ] =
+    {
+        0,
+        127, 255, 255,
+        32767, 65535, 65535,
+        0x7fffffffLL, 0xffffffffLL, 0xffffffffLL,
+        0x7fffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL,
+        0, 0 };    // Last two values aren't stored here
+
+static Long sLowerLimits[ kNumExplicitTypes ] =
+    {
+        -1,
+        -128, 0, 0,
+        -32768, 0, 0,
+        0xffffffff80000000LL, 0, 0,
+        0x8000000000000000LL, 0, 0,
+        0, 0 };    // Last two values aren't stored here
+
+#define BOOL_CASE(inType) \
+        case kBool:    \
+            boolPtr = (bool *)outRaw; \
+            *boolPtr = ( *inType##Ptr ) != 0 ? true : false; \
+            break;
+
+#define SIMPLE_CAST_CASE(inType,outEnum,outType) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            *outType##Ptr = (outType)(*inType##Ptr);    \
+            break;
+
+// Sadly, the ULong downcasting cases need a separate #define to get rid of signed/unsigned comparison warnings
+#define DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && *inType##Ptr > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)*inType##Ptr > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( *inType##Ptr < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+
+#define U_DOWN_CAST_CASE(inType,outEnum,outType,sat) \
+        case outEnum:                                \
+            outType##Ptr = (outType *)outRaw;        \
+            if( sat )                                \
+            {                                        \
+                if( (ULong)*inType##Ptr > sUpperLimits[outEnum] )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else                                            \
+                    *outType##Ptr = (outType)*inType##Ptr;    \
+            } else {                                \
+                *outType##Ptr = (outType)( *inType##Ptr & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+            break;
+
+#define TO_FLOAT_CASE(inType)                \
+        case kFloat:                        \
+            floatPtr = (float *)outRaw;        \
+            *floatPtr = (float)(*inType##Ptr);    \
+            break;
+#define TO_DOUBLE_CASE(inType)                \
+        case kDouble:                        \
+            doublePtr = (double *)outRaw;        \
+            *doublePtr = (double)(*inType##Ptr);    \
+            break;
+
+
+/* Note: we use lrintf here to force the rounding instead of whatever the processor's current rounding mode is */
+#define FLOAT_ROUND_TO_NEAREST_CASE(outEnum,outType)    \
+        case outEnum:                                    \
+            outType##Ptr = (outType *)outRaw;            \
+            *outType##Ptr = (outType)lrintf_clamped( *floatPtr );    \
+            break;
+
+#define FLOAT_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*floatPtr;\
+            float largeRemainder = ( *floatPtr - (float)wholeValue ) * 10.f; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.f && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.f && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrintf_clamped( *floatPtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
+        break;
+
+#define DOUBLE_ROUND_CASE(outEnum,outType,rounding,sat)    \
+        case outEnum:                                    \
+        {                                                \
+            outType##Ptr = (outType *)outRaw;            \
+            /* Get the tens digit */                    \
+            Long wholeValue = (Long)*doublePtr;\
+            double largeRemainder = ( *doublePtr - (double)wholeValue ) * 10.0; \
+            /* What do we do based on that? */                \
+            if( rounding == kRoundToEven )                    \
+            {                                                \
+                if( wholeValue & 1LL )    /*between 1 and 1.99 */    \
+                    wholeValue += 1LL;    /* round up to even */  \
+            }                                                \
+            else if( rounding == kRoundToZero )                \
+            {                                                \
+                /* Nothing to do, round-to-zero is what C casting does */                            \
+            }                                                \
+            else if( rounding == kRoundToPosInf )            \
+            {                                                \
+                /* Only positive numbers are wrong */        \
+                if( largeRemainder != 0.0 && wholeValue >= 0 )    \
+                    wholeValue++;                            \
+            }                                                \
+            else if( rounding == kRoundToNegInf )            \
+            {                                                \
+                /* Only negative numbers are off */            \
+                if( largeRemainder != 0.0 && wholeValue < 0 ) \
+                    wholeValue--;                            \
+            }                                                \
+            else                                            \
+            {   /* Default is round-to-nearest */            \
+                wholeValue = (Long)lrint_clamped( *doublePtr );    \
+            }                                                \
+            /* Now apply saturation rules */                \
+            if( sat )                                \
+            {                                        \
+                if( ( sLowerLimits[outEnum] < 0 && wholeValue > (Long)sUpperLimits[outEnum] ) || ( sLowerLimits[outEnum] == 0 && (ULong)wholeValue > sUpperLimits[outEnum] ) )\
+                    *outType##Ptr = (outType)sUpperLimits[outEnum];\
+                else if( wholeValue < sLowerLimits[outEnum] )\
+                    *outType##Ptr = (outType)sLowerLimits[outEnum]; \
+                else                                            \
+                    *outType##Ptr = (outType)wholeValue;    \
+            } else {                                \
+                *outType##Ptr = (outType)( wholeValue & ( 0xffffffffffffffffLL >> ( 64 - ( sizeof( outType ) * 8 ) ) ) ); \
+            }                                        \
+        }                \
+        break;
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType )
+{
+    bool *boolPtr;
+    char *charPtr;
+    uchar *ucharPtr;
+    short *shortPtr;
+    ushort *ushortPtr;
+    int *intPtr;
+    uint *uintPtr;
+    Long *LongPtr;
+    ULong *ULongPtr;
+    float *floatPtr;
+    double *doublePtr;
+
+
+    switch( inType )
+    {
+        case kBool:
+            boolPtr = (bool *)inRaw;
+            switch( outType )
+            {
+                case kBool:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                case kChar:
+                case kUChar:
+                case kUnsignedChar:
+                case kShort:
+                case kUShort:
+                case kUnsignedShort:
+                case kInt:
+                case kUInt:
+                case kUnsignedInt:
+                case kLong:
+                case kULong:
+                case kUnsignedLong:
+                    memset( outRaw, *boolPtr ? 0xff : 0, get_explicit_type_size( outType ) );
+                    break;
+
+                case kFloat:
+                    floatPtr = (float *)outRaw;
+                    *floatPtr = ( *boolPtr ) ? -1.f : 0.f;
+                    break;
+                case kDouble:
+                    doublePtr = (double *)outRaw;
+                    *doublePtr = ( *boolPtr ) ? -1.0 : 0.0;
+                    break;
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kChar:
+            charPtr = (char *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(char)
+
+                case kChar:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(char,kUChar,uchar,saturate)
+                SIMPLE_CAST_CASE(char,kUnsignedChar,uchar)
+                SIMPLE_CAST_CASE(char,kShort,short)
+                SIMPLE_CAST_CASE(char,kUShort,ushort)
+                SIMPLE_CAST_CASE(char,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(char,kInt,int)
+                SIMPLE_CAST_CASE(char,kUInt,uint)
+                SIMPLE_CAST_CASE(char,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(char,kLong,Long)
+                SIMPLE_CAST_CASE(char,kULong,ULong)
+                SIMPLE_CAST_CASE(char,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(char)
+                TO_DOUBLE_CASE(char)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUChar:
+            ucharPtr = (uchar *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uchar)
+
+                case kUChar:
+                case kUnsignedChar:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedChar:
+            ucharPtr = (uchar *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uchar)
+
+                case kUChar:
+                case kUnsignedChar:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uchar,kChar,char,saturate)
+                SIMPLE_CAST_CASE(uchar,kShort,short)
+                SIMPLE_CAST_CASE(uchar,kUShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kUnsignedShort,ushort)
+                SIMPLE_CAST_CASE(uchar,kInt,int)
+                SIMPLE_CAST_CASE(uchar,kUInt,uint)
+                SIMPLE_CAST_CASE(uchar,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(uchar,kLong,Long)
+                SIMPLE_CAST_CASE(uchar,kULong,ULong)
+                SIMPLE_CAST_CASE(uchar,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uchar)
+                TO_DOUBLE_CASE(uchar)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kShort:
+            shortPtr = (short *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(short)
+
+                case kShort:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(short,kChar,char,saturate)
+                DOWN_CAST_CASE(short,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(short,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(short,kUnsignedShort,ushort,saturate)
+                SIMPLE_CAST_CASE(short,kInt,int)
+                SIMPLE_CAST_CASE(short,kUInt,uint)
+                SIMPLE_CAST_CASE(short,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(short,kLong,Long)
+                SIMPLE_CAST_CASE(short,kULong,ULong)
+                SIMPLE_CAST_CASE(short,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(short)
+                TO_DOUBLE_CASE(short)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUShort:
+            ushortPtr = (ushort *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ushort)
+
+                case kUShort:
+                case kUnsignedShort:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedShort:
+            ushortPtr = (ushort *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ushort)
+
+                case kUShort:
+                case kUnsignedShort:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(ushort,kChar,char,saturate)
+                DOWN_CAST_CASE(ushort,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(ushort,kShort,short,saturate)
+                SIMPLE_CAST_CASE(ushort,kInt,int)
+                SIMPLE_CAST_CASE(ushort,kUInt,uint)
+                SIMPLE_CAST_CASE(ushort,kUnsignedInt,uint)
+                SIMPLE_CAST_CASE(ushort,kLong,Long)
+                SIMPLE_CAST_CASE(ushort,kULong,ULong)
+                SIMPLE_CAST_CASE(ushort,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(ushort)
+                TO_DOUBLE_CASE(ushort)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kInt:
+            intPtr = (int *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(int)
+
+                case kInt:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(int,kChar,char,saturate)
+                DOWN_CAST_CASE(int,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(int,kShort,short,saturate)
+                DOWN_CAST_CASE(int,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(int,kUInt,uint,saturate)
+                DOWN_CAST_CASE(int,kUnsignedInt,uint,saturate)
+                SIMPLE_CAST_CASE(int,kLong,Long)
+                SIMPLE_CAST_CASE(int,kULong,ULong)
+                SIMPLE_CAST_CASE(int,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(int)
+                TO_DOUBLE_CASE(int)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUInt:
+            uintPtr = (uint *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uint)
+
+                case kUInt:
+                case kUnsignedInt:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedInt:
+            uintPtr = (uint *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(uint)
+
+                case kUInt:
+                case kUnsignedInt:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(uint,kChar,char,saturate)
+                DOWN_CAST_CASE(uint,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(uint,kShort,short,saturate)
+                DOWN_CAST_CASE(uint,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(uint,kInt,int,saturate)
+                SIMPLE_CAST_CASE(uint,kLong,Long)
+                SIMPLE_CAST_CASE(uint,kULong,ULong)
+                SIMPLE_CAST_CASE(uint,kUnsignedLong,ULong)
+
+                TO_FLOAT_CASE(uint)
+                TO_DOUBLE_CASE(uint)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kLong:
+            LongPtr = (Long *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(Long)
+
+                case kLong:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                DOWN_CAST_CASE(Long,kChar,char,saturate)
+                DOWN_CAST_CASE(Long,kUChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedChar,uchar,saturate)
+                DOWN_CAST_CASE(Long,kShort,short,saturate)
+                DOWN_CAST_CASE(Long,kUShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedShort,ushort,saturate)
+                DOWN_CAST_CASE(Long,kInt,int,saturate)
+                DOWN_CAST_CASE(Long,kUInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedInt,uint,saturate)
+                DOWN_CAST_CASE(Long,kULong,ULong,saturate)
+                DOWN_CAST_CASE(Long,kUnsignedLong,ULong,saturate)
+
+                TO_FLOAT_CASE(Long)
+                TO_DOUBLE_CASE(Long)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kULong:
+            ULongPtr = (ULong *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ULong)
+
+                case kUnsignedLong:
+                case kULong:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
+
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kUnsignedLong:
+            ULongPtr = (ULong *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(ULong)
+
+                case kULong:
+                case kUnsignedLong:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                U_DOWN_CAST_CASE(ULong,kChar,char,saturate)
+                U_DOWN_CAST_CASE(ULong,kUChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedChar,uchar,saturate)
+                U_DOWN_CAST_CASE(ULong,kShort,short,saturate)
+                U_DOWN_CAST_CASE(ULong,kUShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedShort,ushort,saturate)
+                U_DOWN_CAST_CASE(ULong,kInt,int,saturate)
+                U_DOWN_CAST_CASE(ULong,kUInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kUnsignedInt,uint,saturate)
+                U_DOWN_CAST_CASE(ULong,kLong,Long,saturate)
+
+                TO_FLOAT_CASE(ULong)
+                TO_DOUBLE_CASE(ULong)
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kFloat:
+            floatPtr = (float *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(float)
+
+                FLOAT_ROUND_CASE(kChar,char,roundType,saturate)
+                FLOAT_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                FLOAT_ROUND_CASE(kShort,short,roundType,saturate)
+                FLOAT_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                FLOAT_ROUND_CASE(kInt,int,roundType,saturate)
+                FLOAT_ROUND_CASE(kUInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                FLOAT_ROUND_CASE(kLong,Long,roundType,saturate)
+                FLOAT_ROUND_CASE(kULong,ULong,roundType,saturate)
+                FLOAT_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
+
+                case kFloat:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                TO_DOUBLE_CASE(float);
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        case kDouble:
+            doublePtr = (double *)inRaw;
+            switch( outType )
+            {
+                BOOL_CASE(double)
+
+                DOUBLE_ROUND_CASE(kChar,char,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedChar,uchar,roundType,saturate)
+                DOUBLE_ROUND_CASE(kShort,short,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedShort,ushort,roundType,saturate)
+                DOUBLE_ROUND_CASE(kInt,int,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedInt,uint,roundType,saturate)
+                DOUBLE_ROUND_CASE(kLong,Long,roundType,saturate)
+                DOUBLE_ROUND_CASE(kULong,ULong,roundType,saturate)
+                DOUBLE_ROUND_CASE(kUnsignedLong,ULong,roundType,saturate)
+
+                TO_FLOAT_CASE(double);
+
+                case kDouble:
+                    memcpy( outRaw, inRaw, get_explicit_type_size( inType ) );
+                    break;
+
+                default:
+                    log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+                    break;
+            }
+            break;
+
+        default:
+            log_error( "ERROR: Invalid type given to convert_explicit_value!!\n" );
+            break;
+    }
+}
+
+void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData )
+{
+    bool *boolPtr;
+    cl_char *charPtr;
+    cl_uchar *ucharPtr;
+    cl_short *shortPtr;
+    cl_ushort *ushortPtr;
+    cl_int *intPtr;
+    cl_uint *uintPtr;
+    cl_long *longPtr;
+    cl_ulong *ulongPtr;
+    cl_float *floatPtr;
+    cl_double *doublePtr;
+    cl_ushort *halfPtr;
+    size_t i;
+    cl_uint bits = genrand_int32(d);
+    cl_uint bitsLeft = 32;
+
+    switch( type )
+    {
+        case kBool:
+            boolPtr = (bool *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                boolPtr[i] = ( bits & 1 ) ? true : false;
+                bits >>= 1; bitsLeft -= 1;
+            }
+            break;
+
+        case kChar:
+            charPtr = (cl_char *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                charPtr[i] = (cl_char)( (cl_int)(bits & 255 ) - 127 );
+                bits >>= 8; bitsLeft -= 8;
+            }
+            break;
+
+        case kUChar:
+        case kUnsignedChar:
+            ucharPtr = (cl_uchar *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                ucharPtr[i] = (cl_uchar)( bits & 255 );
+                bits >>= 8; bitsLeft -= 8;
+            }
+            break;
+
+        case kShort:
+            shortPtr = (cl_short *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                shortPtr[i] = (cl_short)( (cl_int)( bits & 65535 ) - 32767 );
+                bits >>= 16; bitsLeft -= 16;
+            }
+            break;
+
+        case kUShort:
+        case kUnsignedShort:
+            ushortPtr = (cl_ushort *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                ushortPtr[i] = (cl_ushort)( (cl_int)( bits & 65535 ) );
+                bits >>= 16; bitsLeft -= 16;
+            }
+            break;
+
+        case kInt:
+            intPtr = (cl_int *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                intPtr[i] = (cl_int)genrand_int32(d);
+            }
+            break;
+
+        case kUInt:
+        case kUnsignedInt:
+            uintPtr = (cl_uint *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                uintPtr[i] = (unsigned int)genrand_int32(d);
+            }
+            break;
+
+        case kLong:
+            longPtr = (cl_long *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
+            }
+            break;
+
+        case kULong:
+        case kUnsignedLong:
+            ulongPtr = (cl_ulong *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+            }
+            break;
+
+        case kFloat:
+            floatPtr = (cl_float *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                // [ -(double) 0x7fffffff, (double) 0x7fffffff ]
+                double t = genrand_real1(d);
+                floatPtr[i] = (float) ((1.0 - t) * -(double) 0x7fffffff + t * (double) 0x7fffffff);
+            }
+            break;
+
+        case kDouble:
+            doublePtr = (cl_double *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                cl_long u = (cl_long)genrand_int32(d) | ( (cl_long)genrand_int32(d) << 32 );
+                double t = (double) u;
+                t *= MAKE_HEX_DOUBLE( 0x1.0p-32, 0x1, -32 );        // scale [-2**63, 2**63] to [-2**31, 2**31]
+                doublePtr[i] = t;
+            }
+            break;
+
+        case kHalf:
+            halfPtr = (ushort *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                if( 0 == bitsLeft)
+                {
+                    bits = genrand_int32(d);
+                    bitsLeft = 32;
+                }
+                halfPtr[i] = bits & 65535;     /* Kindly generates random bits for us */
+                bits >>= 16; bitsLeft -= 16;
+            }
+            break;
+
+        default:
+            log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
+            break;
+    }
+}
+
+void * create_random_data( ExplicitType type, MTdata d, size_t count )
+{
+    void *data = malloc( get_explicit_type_size( type ) * count );
+    generate_random_data( type, count, d, data );
+    return data;
+}
+
+cl_long read_upscale_signed( void *inRaw, ExplicitType inType )
+{
+    switch( inType )
+    {
+        case kChar:
+            return (cl_long)( *( (cl_char *)inRaw ) );
+        case kUChar:
+        case kUnsignedChar:
+            return (cl_long)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_long)( *( (cl_short *)inRaw ) );
+        case kUShort:
+        case kUnsignedShort:
+            return (cl_long)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_long)( *( (cl_int *)inRaw ) );
+        case kUInt:
+        case kUnsignedInt:
+            return (cl_long)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_long)( *( (cl_long *)inRaw ) );
+        case kULong:
+        case kUnsignedLong:
+            return (cl_long)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
+    }
+}
+
+cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType )
+{
+    switch( inType )
+    {
+        case kChar:
+            return (cl_ulong)( *( (cl_char *)inRaw ) );
+        case kUChar:
+        case kUnsignedChar:
+            return (cl_ulong)( *( (cl_uchar *)inRaw ) );
+        case kShort:
+            return (cl_ulong)( *( (cl_short *)inRaw ) );
+        case kUShort:
+        case kUnsignedShort:
+            return (cl_ulong)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (cl_ulong)( *( (cl_int *)inRaw ) );
+        case kUInt:
+        case kUnsignedInt:
+            return (cl_ulong)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (cl_ulong)( *( (cl_long *)inRaw ) );
+        case kULong:
+        case kUnsignedLong:
+            return (cl_ulong)( *( (cl_ulong *)inRaw ) );
+        default:
+            return 0;
+    }
+}
+
+float read_as_float( void *inRaw, ExplicitType inType )
+{
+    switch( inType )
+    {
+        case kChar:
+            return (float)( *( (cl_char *)inRaw ) );
+        case kUChar:
+        case kUnsignedChar:
+            return (float)( *( (cl_char *)inRaw ) );
+        case kShort:
+            return (float)( *( (cl_short *)inRaw ) );
+        case kUShort:
+        case kUnsignedShort:
+            return (float)( *( (cl_ushort *)inRaw ) );
+        case kInt:
+            return (float)( *( (cl_int *)inRaw ) );
+        case kUInt:
+        case kUnsignedInt:
+            return (float)( *( (cl_uint *)inRaw ) );
+        case kLong:
+            return (float)( *( (cl_long *)inRaw ) );
+        case kULong:
+        case kUnsignedLong:
+            return (float)( *( (cl_ulong *)inRaw ) );
+        case kFloat:
+            return *( (float *)inRaw );
+        case kDouble:
+            return (float) *( (double*)inRaw );
+        default:
+            return 0;
+    }
+}
+
+float get_random_float(float low, float high, MTdata d)
+{
+    float t = (float)((double)genrand_int32(d) / (double)0xFFFFFFFF);
+    return (1.0f - t) * low + t * high;
+}
+
+double get_random_double(double low, double high, MTdata d)
+{
+    cl_ulong u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32 );
+    double t = (double) u * MAKE_HEX_DOUBLE( 0x1.0p-64, 0x1, -64);
+    return (1.0f - t) * low + t * high;
+}
+
+float  any_float( MTdata d )
+{
+    union
+    {
+        float   f;
+        cl_uint u;
+    }u;
+
+    u.u = genrand_int32(d);
+    return u.f;
+}
+
+
+double  any_double( MTdata d )
+{
+    union
+    {
+        double   f;
+        cl_ulong u;
+    }u;
+
+    u.u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+    return u.f;
+}
+
+int          random_in_range( int minV, int maxV, MTdata d )
+{
+    cl_ulong r = ((cl_ulong) genrand_int32(d) ) * (maxV - minV + 1);
+    return (cl_uint)(r >> 32) + minV;
+}
+
+size_t get_random_size_t(size_t low, size_t high, MTdata d)
+{
+  enum { N = sizeof(size_t)/sizeof(int) };
+
+  union {
+    int word[N];
+    size_t size;
+  } u;
+
+  for (unsigned i=0; i != N; ++i) {
+    u.word[i] = genrand_int32(d);
+  }
+
+  assert(low <= high && "Invalid random number range specified");
+  size_t range = high - low;
+
+  return (range) ? low + ((u.size - low) % range) : low;
+}
+
+
diff --git a/test_conformance/compatibility/test_common/harness/conversions.h b/test_conformance/compatibility/test_common/harness/conversions.h
new file mode 100644
index 00000000..aa3cb6b4
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/conversions.h
@@ -0,0 +1,126 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _conversions_h
+#define _conversions_h
+
+#include "compat.h"
+
+#include "errorHelpers.h"
+#include "mt19937.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Note: the next three all have to match in size and order!! */
+
+enum ExplicitTypes
+{
+    kBool        = 0,
+    kChar,
+    kUChar,
+    kUnsignedChar,
+    kShort,
+    kUShort,
+    kUnsignedShort,
+    kInt,
+    kUInt,
+    kUnsignedInt,
+    kLong,
+    kULong,
+    kUnsignedLong,
+    kFloat,
+    kHalf,
+    kDouble,
+    kNumExplicitTypes
+};
+
+typedef enum ExplicitTypes    ExplicitType;
+
+enum RoundingTypes
+{
+    kRoundToEven = 0,
+    kRoundToZero,
+    kRoundToPosInf,
+    kRoundToNegInf,
+    kRoundToNearest,
+
+    kNumRoundingTypes,
+
+    kDefaultRoundingType = kRoundToNearest
+};
+
+typedef enum RoundingTypes    RoundingType;
+
+extern void             print_type_to_string(ExplicitType type, void *data, char* string);
+extern size_t           get_explicit_type_size( ExplicitType type );
+extern const char *     get_explicit_type_name( ExplicitType type );
+extern void             convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
+
+extern void             generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
+extern void    *         create_random_data( ExplicitType type, MTdata d, size_t count );
+
+extern cl_long          read_upscale_signed( void *inRaw, ExplicitType inType );
+extern cl_ulong         read_upscale_unsigned( void *inRaw, ExplicitType inType );
+extern float            read_as_float( void *inRaw, ExplicitType inType );
+
+extern float            get_random_float(float low, float high, MTdata d);
+extern double           get_random_double(double low, double high, MTdata d);
+extern float            any_float( MTdata d );
+extern double           any_double( MTdata d );
+
+extern int              random_in_range( int minV, int maxV, MTdata d );
+
+size_t get_random_size_t(size_t low, size_t high, MTdata d);
+
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( float x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf(x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+static inline int IsDoubleSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs( x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // _conversions_h
+
+
diff --git a/test_conformance/compatibility/test_common/harness/errorHelpers.c b/test_conformance/compatibility/test_common/harness/errorHelpers.c
new file mode 100644
index 00000000..79c4ca21
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/errorHelpers.c
@@ -0,0 +1,579 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "errorHelpers.h"
+
+const char    *IGetErrorString( int clErrorCode )
+{
+    switch( clErrorCode )
+    {
+        case CL_SUCCESS:                return "CL_SUCCESS";
+        case CL_DEVICE_NOT_FOUND:        return "CL_DEVICE_NOT_FOUND";
+        case CL_DEVICE_NOT_AVAILABLE:    return "CL_DEVICE_NOT_AVAILABLE";
+        case CL_COMPILER_NOT_AVAILABLE:    return "CL_COMPILER_NOT_AVAILABLE";
+        case CL_MEM_OBJECT_ALLOCATION_FAILURE:    return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
+        case CL_OUT_OF_RESOURCES:        return "CL_OUT_OF_RESOURCES";
+        case CL_OUT_OF_HOST_MEMORY:        return "CL_OUT_OF_HOST_MEMORY";
+        case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
+        case CL_MEM_COPY_OVERLAP:        return "CL_MEM_COPY_OVERLAP";
+        case CL_IMAGE_FORMAT_MISMATCH:    return "CL_IMAGE_FORMAT_MISMATCH";
+        case CL_IMAGE_FORMAT_NOT_SUPPORTED:    return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
+        case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
+        case CL_MAP_FAILURE:            return "CL_MAP_FAILURE";
+        case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
+        case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
+        case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
+        case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
+        case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
+        case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
+        case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
+        case CL_INVALID_VALUE:            return "CL_INVALID_VALUE";
+        case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
+        case CL_INVALID_DEVICE:            return "CL_INVALID_DEVICE";
+        case CL_INVALID_CONTEXT:        return "CL_INVALID_CONTEXT";
+        case CL_INVALID_QUEUE_PROPERTIES:    return "CL_INVALID_QUEUE_PROPERTIES";
+        case CL_INVALID_COMMAND_QUEUE:    return "CL_INVALID_COMMAND_QUEUE";
+        case CL_INVALID_HOST_PTR:    return "CL_INVALID_HOST_PTR";
+        case CL_INVALID_MEM_OBJECT:        return "CL_INVALID_MEM_OBJECT";
+        case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:        return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
+        case CL_INVALID_IMAGE_SIZE:        return "CL_INVALID_IMAGE_SIZE";
+        case CL_INVALID_SAMPLER:        return "CL_INVALID_SAMPLER";
+        case CL_INVALID_BINARY:        return "CL_INVALID_BINARY";
+        case CL_INVALID_BUILD_OPTIONS:        return "CL_INVALID_BUILD_OPTIONS";
+        case CL_INVALID_PROGRAM:        return "CL_INVALID_PROGRAM";
+        case CL_INVALID_PROGRAM_EXECUTABLE:        return "CL_INVALID_PROGRAM_EXECUTABLE";
+        case CL_INVALID_KERNEL_NAME:    return "CL_INVALID_KERNEL_NAME";
+        case CL_INVALID_KERNEL_DEFINITION:    return "CL_INVALID_KERNEL_DEFINITION";
+        case CL_INVALID_KERNEL:            return "CL_INVALID_KERNEL";
+        case CL_INVALID_ARG_INDEX:        return "CL_INVALID_ARG_INDEX";
+        case CL_INVALID_ARG_VALUE:        return "CL_INVALID_ARG_VALUE";
+        case CL_INVALID_ARG_SIZE:        return "CL_INVALID_ARG_SIZE";
+        case CL_INVALID_KERNEL_ARGS:    return "CL_INVALID_KERNEL_ARGS";
+        case CL_INVALID_WORK_DIMENSION:        return "CL_INVALID_WORK_DIMENSION";
+        case CL_INVALID_WORK_GROUP_SIZE:    return "CL_INVALID_WORK_GROUP_SIZE";
+        case CL_INVALID_WORK_ITEM_SIZE:    return "CL_INVALID_WORK_ITEM_SIZE";
+        case CL_INVALID_GLOBAL_OFFSET:        return "CL_INVALID_GLOBAL_OFFSET";
+        case CL_INVALID_EVENT_WAIT_LIST:    return "CL_INVALID_EVENT_WAIT_LIST";
+        case CL_INVALID_EVENT:            return "CL_INVALID_EVENT";
+        case CL_INVALID_OPERATION:        return "CL_INVALID_OPERATION";
+        case CL_INVALID_GL_OBJECT:        return "CL_INVALID_GL_OBJECT";
+        case CL_INVALID_BUFFER_SIZE:    return "CL_INVALID_BUFFER_SIZE";
+        case CL_INVALID_MIP_LEVEL:      return "CL_INVALID_MIP_LEVEL";
+        case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
+        case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
+        case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
+        case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
+        case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
+        case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
+        default: return "(unknown)";
+    }
+}
+
+const char *GetChannelOrderName( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:      return "CL_R";
+        case CL_A:      return "CL_A";
+        case CL_Rx:     return "CL_Rx";
+        case CL_RG:     return "CL_RG";
+        case CL_RA:     return "CL_RA";
+        case CL_RGx:    return "CL_RGx";
+        case CL_RGB:    return "CL_RGB";
+        case CL_RGBx:   return "CL_RGBx";
+        case CL_RGBA:      return "CL_RGBA";
+        case CL_ARGB:      return "CL_ARGB";
+        case CL_BGRA:      return "CL_BGRA";
+        case CL_INTENSITY: return "CL_INTENSITY";
+        case CL_LUMINANCE: return "CL_LUMINANCE";
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
+#endif
+        default:        return NULL;
+    }
+}
+
+int IsChannelOrderSupported( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+        case CL_RGB:
+        case CL_RGBx:
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+            return 1;
+#if defined CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+            return 1;
+#endif
+#if defined CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetChannelTypeName( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:         return "CL_SNORM_INT8";
+        case CL_SNORM_INT16:        return "CL_SNORM_INT16";
+        case CL_UNORM_INT8:         return "CL_UNORM_INT8";
+        case CL_UNORM_INT16:        return "CL_UNORM_INT16";
+        case CL_UNORM_SHORT_565:    return "CL_UNORM_SHORT_565";
+        case CL_UNORM_SHORT_555:    return "CL_UNORM_SHORT_555";
+        case CL_UNORM_INT_101010:   return "CL_UNORM_INT_101010";
+        case CL_SIGNED_INT8:        return "CL_SIGNED_INT8";
+        case CL_SIGNED_INT16:       return "CL_SIGNED_INT16";
+        case CL_SIGNED_INT32:       return "CL_SIGNED_INT32";
+        case CL_UNSIGNED_INT8:      return "CL_UNSIGNED_INT8";
+        case CL_UNSIGNED_INT16:     return "CL_UNSIGNED_INT16";
+        case CL_UNSIGNED_INT32:     return "CL_UNSIGNED_INT32";
+        case CL_HALF_FLOAT:         return "CL_HALF_FLOAT";
+        case CL_FLOAT:              return "CL_FLOAT";
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:     return "CL_SFIXED14_APPLE";
+#endif
+        default:                    return NULL;
+    }
+}
+
+int IsChannelTypeSupported( cl_channel_type type )
+{
+    switch( type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_INT_101010:
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+            return 1;
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+            return 1;
+#endif
+        default:
+            return 0;
+    }
+}
+
+const char *GetAddressModeName( cl_addressing_mode mode )
+{
+    switch( mode )
+    {
+        case CL_ADDRESS_NONE:                return "CL_ADDRESS_NONE";
+        case CL_ADDRESS_CLAMP_TO_EDGE:        return "CL_ADDRESS_CLAMP_TO_EDGE";
+        case CL_ADDRESS_CLAMP:                return "CL_ADDRESS_CLAMP";
+        case CL_ADDRESS_REPEAT:                return "CL_ADDRESS_REPEAT";
+        case CL_ADDRESS_MIRRORED_REPEAT:    return "CL_ADDRESS_MIRRORED_REPEAT";
+        default:                            return NULL;
+    }
+}
+
+const char *GetDeviceTypeName( cl_device_type type )
+{
+    switch( type )
+    {
+        case CL_DEVICE_TYPE_GPU:    return "CL_DEVICE_TYPE_GPU";
+        case CL_DEVICE_TYPE_CPU:    return "CL_DEVICE_TYPE_CPU";
+        case CL_DEVICE_TYPE_ACCELERATOR:    return "CL_DEVICE_TYPE_ACCELERATOR";
+        case CL_DEVICE_TYPE_ALL:    return "CL_DEVICE_TYPE_ALL";
+        default:                    return NULL;
+    }
+}
+
+const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
+{
+    static char scratch[ 1024 ];
+    size_t i, j;
+
+    if( buffer == NULL )
+        buffer = scratch;
+
+    unsigned char *p = (unsigned char *)dataBuffer;
+    char *bPtr;
+
+    buffer[ 0 ] = 0;
+    bPtr = buffer;
+    for( i = 0; i < vecSize; i++ )
+    {
+        if( i > 0 )
+        {
+            bPtr[ 0 ] = ' ';
+            bPtr++;
+        }
+        for( j = 0; j < typeSize; j++ )
+        {
+            sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
+            bPtr += 2;
+        }
+        p += typeSize;
+    }
+    bPtr[ 0 ] = 0;
+
+    return buffer;
+}
+
+#ifndef MAX
+#define MAX( _a, _b )       ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+#if defined( _MSC_VER )
+#define scalbnf(_a, _i )    ldexpf( _a, _i )
+#define scalbn(_a, _i )     ldexp( _a, _i )
+#define scalbnl(_a, _i )    ldexpl( _a, _i )
+#endif
+
+static float Ulp_Error_Half_Float( float test, double reference );
+static inline float  half2float( cl_ushort half );
+
+// taken from math tests
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+static float Ulp_Error_Half_Float( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+    double testVal = test;
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    // reference is a normal power of two or a zero
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+// Taken from vLoadHalf test
+static inline float half2float( cl_ushort us )
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+float Ulp_Error_Half( cl_ushort test, float reference )
+{
+    return Ulp_Error_Half_Float( half2float(test), reference );
+}
+
+
+float Ulp_Error( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+    double testVal = test;
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    if( isinf( reference ) )
+    {
+        if( testVal == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    if( isinf( testVal) )
+    { // infinite test value, but finite (but possibly overflowing in float) reference.
+      //
+      // The function probably overflowed prematurely here. Formally, the spec says this is
+      // an infinite ulp error and should not be tolerated. Unfortunately, this would mean
+      // that the internal precision of some half_pow implementations would have to be 29+ bits
+      // at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
+      // is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
+      // after rounding to single is 4*32 = 128, which will ultimately result in premature
+      // overflow, even though a good faith representation would be correct to within 2**-29
+      // interally.
+
+        // In the interest of not requiring the implementation go to extraordinary lengths to
+        // deliver a half precision function, we allow premature overflow within the limit
+        // of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
+        // 2**128, the next value that would appear in the number line if float had sufficient range.
+        testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
+
+        // Note that the same hack may not work in long double, which is not guaranteed to have
+        // more range than double.  It is not clear that premature overflow should be tolerated for
+        // double.
+    }
+
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+
+float Ulp_Error_Double( double test, long double reference )
+{
+  // Deal with long double = double
+  // On most systems long double is a higher precision type than double. They provide either
+  // a 80-bit or greater floating point type, or they provide a head-tail double double format.
+  // That is sufficient to represent the accuracy of a floating point result to many more bits
+  // than double and we can calculate sub-ulp errors. This is the standard system for which this
+  // test suite is designed.
+  //
+  // On some systems double and long double are the same thing. Then we run into a problem,
+  // because our representation of the infinitely precise result (passed in as reference above)
+  // can be off by as much as a half double precision ulp itself.  In this case, we inflate the
+  // reported error by half an ulp to take this into account.  A more correct and permanent fix
+  // would be to undertake refactoring the reference code to return results in this format:
+  //
+  //    typedef struct DoubleReference
+  //    { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult)        (infinitely precise)
+  //        double  correctlyRoundedResult;     // as best we can
+  //        double  ulps;                       // plus a fractional amount to account for the difference
+  //    }DoubleReference;                       //     between infinitely precise result and correctlyRoundedResult, in units of ulps.
+  //
+  // This would provide a useful higher-than-double precision format for everyone that we can use,
+  // and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
+  // that use a head to tail double double for long double.
+
+    // Note: This function presumes that someone has already tested whether the result is correctly,
+    // rounded before calling this function.  That test:
+    //
+    //    if( (float) reference == test )
+    //        return 0.0f;
+    //
+    // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+    // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+    // results.
+
+
+    int x;
+    long double testVal = test;
+    if( 0.5L != frexpl( reference, &x) )
+    { // Non-power of two and NaN
+        if( isinf( reference ) )
+        {
+            if( testVal == reference )
+                return 0.0f;
+
+            return (float) ( testVal - reference );
+        }
+
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+        // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+        if( sizeof(long double) == sizeof( double ) )
+            result += copysignf( 0.5f, result);
+
+        return result;
+
+    }
+
+    // reference is a normal power of two or a zero
+    // The unbiased exponent of the ulp unit place
+    int ulp_exp =  DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    float result = (float) scalbnl( testVal - reference, ulp_exp );
+
+    // account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
+    if( sizeof(long double) == sizeof( double ) )
+        result += copysignf( 0.5f, result);
+
+    return result;
+}
+
+cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
+{
+  int error;
+  size_t size_ret;
+
+  // Does the program object exist?
+  if (program != NULL) {
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then allocate and query the device list from the context
+      cl_context context;
+      error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
+      test_error( error, "Unable to query program's context" );
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
+      test_error( error, "Unable to query context's device size" );
+      num_devices = size_ret / sizeof(cl_device_id);
+      device_list = (cl_device_id *) malloc(size_ret);
+      if (device_list == NULL) {
+          print_error( error, "malloc failed" );
+          return CL_OUT_OF_HOST_MEMORY;
+      }
+      error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
+      test_error( error, "Unable to query context's devices" );
+
+    }
+
+    // For each device in the device_list
+    unsigned int i;
+    for (i = 0; i < num_devices; i++) {
+
+      // Get the build status
+      cl_build_status build_status;
+      error = clGetProgramBuildInfo(program,
+                                    device_list[i],
+                                    CL_PROGRAM_BUILD_STATUS,
+                                    sizeof(build_status),
+                                    &build_status,
+                                    &size_ret);
+      test_error( error, "Unable to query build status" );
+
+      // If the build failed then log the status, and allocate the build log, log it and free it
+      if (build_status != CL_BUILD_SUCCESS) {
+
+        log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to query build log size" );
+        char *build_log = (char *) malloc(size_ret);
+        error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to query build log" );
+        log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        free(build_log);
+
+      }
+
+    }
+
+    // Was the number of devices given
+    if (num_devices == 0) {
+
+      // If zero devices were specified then free the device list
+      free(device_list);
+
+    }
+
+  }
+
+  return CL_SUCCESS;
+}
diff --git a/test_conformance/compatibility/test_common/harness/errorHelpers.h b/test_conformance/compatibility/test_common/harness/errorHelpers.h
new file mode 100644
index 00000000..9b5d7097
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/errorHelpers.h
@@ -0,0 +1,149 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _errorHelpers_h
+#define _errorHelpers_h
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+#include <stdlib.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+
+// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
+// functions as described below. This is helpful for integration into an automated testing
+// system.
+#if USE_ATF
+// export BUILD_WITH_ATF=1
+    #include <ATF/ATF.h>
+    #define test_start() ATFTestStart()
+    #define log_info ATFLogInfo
+    #define log_error ATFLogError
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
+    #define test_finish() ATFTestFinish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
+    #define vlog ATFLogInfo
+    #define vlog_error ATFLogError
+#else
+    #define test_start()
+    #define log_info printf
+    #define log_error printf
+    #define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,        \
+                        _higherBetter?"higher is better":"lower is better", _number )
+    #define test_finish()
+    #define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType,    \
+                        _higherBetter?"higher is better":"lower is better" , _number)
+    #ifdef _WIN32
+        #ifdef __MINGW32__
+            // Use __mingw_printf since it supports "%a" format specifier
+            #define vlog __mingw_printf
+            #define vlog_error __mingw_printf
+        #else
+            // Use home-baked function that treats "%a" as "%f"
+        static int vlog_win32(const char *format, ...);
+        #define vlog vlog_win32
+        #define vlog_error vlog_win32
+        #endif
+    #else
+        #define vlog_error printf
+        #define vlog printf
+    #endif
+#endif
+
+#define ct_assert(b)          ct_assert_i(b, __LINE__)
+#define ct_assert_i(b, line)  ct_assert_ii(b, line)
+#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
+
+#define test_error(errCode,msg)    test_error_ret(errCode,msg,errCode)
+#define test_error_ret(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
+#define print_error(errCode,msg)    log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
+
+// expected error code vs. what we got
+#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
+#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
+#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
+#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
+
+extern const char    *IGetErrorString( int clErrorCode );
+
+extern float Ulp_Error_Half( cl_ushort test, float reference );
+extern float Ulp_Error( float test, double reference );
+extern float Ulp_Error_Double( double test, long double reference );
+
+extern const char *GetChannelTypeName( cl_channel_type type );
+extern int IsChannelTypeSupported( cl_channel_type type );
+extern const char *GetChannelOrderName( cl_channel_order order );
+extern int IsChannelOrderSupported( cl_channel_order order );
+extern const char *GetAddressModeName( cl_addressing_mode mode );
+
+extern const char *GetDeviceTypeName( cl_device_type type );
+
+// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
+extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
+
+#if defined (_WIN32) && !defined(__MINGW32__)
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+static int vlog_win32(const char *format, ...)
+{
+    const char *new_format = format;
+
+    if (strstr(format, "%a")) {
+        char *temp;
+        if ((temp = strdup(format)) == NULL) {
+            printf("vlog_win32: Failed to allocate memory for strdup\n");
+            return -1;
+        }
+        new_format = temp;
+        while (*temp) {
+            // replace %a with %f
+            if ((*temp == '%') && (*(temp+1) == 'a')) {
+                *(temp+1) = 'f';
+            }
+            temp++;
+        }
+    }
+
+    va_list args;
+    va_start(args, format);
+    vprintf(new_format, args);
+    va_end(args);
+
+    if (new_format != format) {
+        free((void*)new_format);
+    }
+
+    return 0;
+}
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _errorHelpers_h
+
+
diff --git a/test_conformance/compatibility/test_common/harness/fpcontrol.h b/test_conformance/compatibility/test_common/harness/fpcontrol.h
new file mode 100644
index 00000000..d6de0048
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/fpcontrol.h
@@ -0,0 +1,104 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _fpcontrol_h
+#define _fpcontrol_h
+
+// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
+// to FTZ mode if the device hardware is running in that mode.  We have explored all other options short of writing correctly rounded operations
+// in integer code, and have found this is the only way to correctly verify operation.
+//
+// Non-Apple implementations will need to provide their own implentation for these features.  If the reference hardware and device are both
+// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty.  If the device is running in non-default
+// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
+#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
+    typedef int     FPU_mode_type;
+#if defined( __i386__ ) || defined( __x86_64__ )
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+    extern __thread fpu_control_t fpu_control;
+#endif
+    // Set the reference hardware floating point unit to FTZ mode
+    static inline void ForceFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode | 0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control |= _FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
+#else
+        #error ForceFTZ needs an implentation
+#endif
+    }
+
+    // Disable the denorm flush to zero
+    static inline void DisableFTZ( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        *mode = _mm_getcsr();
+        _mm_setcsr( *mode & ~0x8040);
+#elif defined( __PPC__ )
+        *mode = fpu_control;
+        fpu_control &= ~_FPU_MASK_NI;
+#elif defined ( __arm__ )
+        unsigned fpscr;
+        __asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        unsigned fpscr;
+        __asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
+        *mode = fpscr;
+        __asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
+#else
+#error DisableFTZ needs an implentation
+#endif
+    }
+
+    // Restore the reference hardware to floating point state indicated by *mode
+    static inline void RestoreFPState( FPU_mode_type *mode )
+    {
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
+        _mm_setcsr( *mode );
+#elif defined( __PPC__)
+        fpu_control = *mode;
+#elif defined (__arm__)
+        __asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
+        // Add 64 bit support
+#elif defined (__aarch64__)
+        __asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
+#else
+        #error RestoreFPState needs an implementation
+#endif
+    }
+#else
+        #error ForceFTZ and RestoreFPState need implentations
+#endif
+
+#endif
diff --git a/test_conformance/compatibility/test_common/harness/genericThread.cpp b/test_conformance/compatibility/test_common/harness/genericThread.cpp
new file mode 100644
index 00000000..2b742fa3
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/genericThread.cpp
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "genericThread.h"
+
+#if defined(_WIN32)
+#include <windows.h>
+#else // !_WIN32
+#include <pthread.h>
+#endif
+
+void * genericThread::IStaticReflector( void * data )
+{
+    genericThread *t = (genericThread *)data;
+    return t->IRun();
+}
+
+bool genericThread::Start( void )
+{
+#if defined(_WIN32)
+    mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
+    return ( mHandle != NULL );
+#else // !_WIN32
+    int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
+    return ( error == 0 );
+#endif // !_WIN32
+}
+
+void * genericThread::Join( void )
+{
+#if defined(_WIN32)
+    WaitForSingleObject( (HANDLE)mHandle, INFINITE );
+    return NULL;
+#else // !_WIN32
+    void * retVal;
+    int error = pthread_join( (pthread_t)mHandle, &retVal );
+    if( error != 0 )
+        retVal = NULL;
+    return retVal;
+#endif // !_WIN32
+}
diff --git a/test_conformance/compatibility/test_common/harness/genericThread.h b/test_conformance/compatibility/test_common/harness/genericThread.h
new file mode 100644
index 00000000..168b7407
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/genericThread.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _genericThread_h
+#define _genericThread_h
+
+#include <stdio.h>
+
+class genericThread
+{
+    public:
+
+        virtual ~genericThread() {}
+
+        bool    Start( void );
+        void *    Join( void );
+
+    protected:
+
+        virtual void *    IRun( void ) = 0;
+
+    private:
+
+        void* mHandle;
+
+        static void * IStaticReflector( void * data );
+};
+
+#endif // _genericThread_h
+
diff --git a/test_conformance/compatibility/test_common/harness/imageHelpers.cpp b/test_conformance/compatibility/test_common/harness/imageHelpers.cpp
new file mode 100644
index 00000000..60f744b4
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/imageHelpers.cpp
@@ -0,0 +1,249 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "imageHelpers.h"
+
+size_t get_format_type_size( const cl_image_format *format )
+{
+    return get_channel_data_type_size( format->image_channel_data_type );
+}
+
+size_t get_channel_data_type_size( cl_channel_type channelType )
+{
+    switch( channelType )
+    {
+        case CL_SNORM_INT8:
+        case CL_UNORM_INT8:
+        case CL_SIGNED_INT8:
+        case CL_UNSIGNED_INT8:
+            return 1;
+
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT16:
+        case CL_SIGNED_INT16:
+        case CL_UNSIGNED_INT16:
+        case CL_HALF_FLOAT:
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+            return sizeof( cl_short );
+
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT32:
+            return sizeof( cl_int );
+
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_SHORT_565_REV:
+        case CL_UNORM_SHORT_555_REV:
+#endif
+            return 2;
+
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_INT_8888:
+        case CL_UNORM_INT_8888_REV:
+            return 4;
+#endif
+
+        case CL_UNORM_INT_101010:
+#ifdef OBSOLETE_FORAMT
+        case CL_UNORM_INT_101010_REV:
+#endif
+            return 4;
+
+        case CL_FLOAT:
+            return sizeof( cl_float );
+
+        default:
+            return 0;
+    }
+}
+
+size_t get_format_channel_count( const cl_image_format *format )
+{
+    return get_channel_order_channel_count( format->image_channel_order );
+}
+
+size_t get_channel_order_channel_count( cl_channel_order order )
+{
+    switch( order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+            return 1;
+
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+            return 2;
+
+        case CL_RGB:
+        case CL_RGBx:
+            return 3;
+
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+#endif
+            return 4;
+
+        default:
+            return 0;
+    }
+}
+
+int is_format_signed( const cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        case CL_SIGNED_INT8:
+        case CL_SNORM_INT16:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+            return 1;
+
+        default:
+            return 0;
+    }
+}
+
+size_t get_pixel_size( cl_image_format *format )
+{
+  switch( format->image_channel_data_type )
+  {
+    case CL_SNORM_INT8:
+    case CL_UNORM_INT8:
+    case CL_SIGNED_INT8:
+    case CL_UNSIGNED_INT8:
+      return get_format_channel_count( format );
+
+    case CL_SNORM_INT16:
+    case CL_UNORM_INT16:
+    case CL_SIGNED_INT16:
+    case CL_UNSIGNED_INT16:
+    case CL_HALF_FLOAT:
+#ifdef  CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+#endif
+      return get_format_channel_count( format ) * sizeof( cl_ushort );
+
+    case CL_SIGNED_INT32:
+    case CL_UNSIGNED_INT32:
+      return get_format_channel_count( format ) * sizeof( cl_int );
+
+    case CL_UNORM_SHORT_565:
+    case CL_UNORM_SHORT_555:
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_SHORT_565_REV:
+    case CL_UNORM_SHORT_555_REV:
+#endif
+      return 2;
+
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_INT_8888:
+    case CL_UNORM_INT_8888_REV:
+      return 4;
+#endif
+
+    case CL_UNORM_INT_101010:
+#ifdef OBSOLETE_FORAMT
+    case CL_UNORM_INT_101010_REV:
+#endif
+      return 4;
+
+    case CL_FLOAT:
+      return get_format_channel_count( format ) * sizeof( cl_float );
+
+    default:
+      return 0;
+  }
+}
+
+int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
+{
+    cl_image_format formatList[ 128 ];
+    unsigned int outFormatCount, i;
+    int error;
+
+
+    /* Make sure each image format is supported */
+    if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
+
+
+    /* Look for one that is an 8-bit format */
+    for( i = 0; i < outFormatCount; i++ )
+    {
+        if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
+       formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
+           formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
+        {
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+        }
+    }
+
+    return -1;
+}
+
+int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
+{
+    cl_image_format formatList[ 128 ];
+    unsigned int outFormatCount, i;
+    int error;
+
+
+  /* Make sure each image format is supported */
+  if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
+    return error;
+
+  /* Look for one that is an 8-bit format */
+  for( i = 0; i < outFormatCount; i++ )
+  {
+        if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
+           formatList[ i ].image_channel_data_type == CL_FLOAT ||
+           formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
+           formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
+    {
+      if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
+      {
+        *outFormat = formatList[ i ];
+        return 0;
+      }
+    }
+    }
+
+    return -1;
+}
+
diff --git a/test_conformance/compatibility/test_common/harness/imageHelpers.h b/test_conformance/compatibility/test_common/harness/imageHelpers.h
new file mode 100644
index 00000000..05b7832f
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/imageHelpers.h
@@ -0,0 +1,37 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _imageHelpers_h
+#define _imageHelpers_h
+
+#include "errorHelpers.h"
+
+
+extern size_t get_format_type_size( const cl_image_format *format );
+extern size_t get_channel_data_type_size( cl_channel_type channelType );
+extern size_t get_format_channel_count( const cl_image_format *format );
+extern size_t get_channel_order_channel_count( cl_channel_order order );
+extern int    is_format_signed( const cl_image_format *format );
+extern size_t get_pixel_size( cl_image_format *format );
+
+/* Helper to get any ol image format as long as it is 8-bits-per-channel */
+extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+/* Helper to get any ol image format as long as it is 32-bits-per-channel */
+extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
+
+
+#endif // _imageHelpers_h
+
diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.c b/test_conformance/compatibility/test_common/harness/kernelHelpers.c
new file mode 100644
index 00000000..3680c06e
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.c
@@ -0,0 +1,684 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include "imageHelpers.h"
+
+#if defined(__MINGW32__)
+#include "mingw_compat.h"
+#endif
+
+int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
+{
+    int error = CL_SUCCESS;
+
+    /* Create the program object from source */
+    *outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
+    if( *outProgram == NULL || error != CL_SUCCESS)
+    {
+        print_error( error, "clCreateProgramWithSource failed" );
+        return error;
+    }
+
+    /* Compile the program */
+  int buildProgramFailed = 0;
+  int printedSource = 0;
+    error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
+  if (error != CL_SUCCESS)
+  {
+    unsigned int i;
+    print_error(error, "clBuildProgram failed");
+    buildProgramFailed = 1;
+    printedSource = 1;
+    log_error( "Original source is: ------------\n" );
+    for( i = 0; i < numKernelLines; i++ )
+      log_error( "%s", kernelProgram[ i ] );
+  }
+
+  // Verify the build status on all devices
+  cl_uint deviceCount = 0;
+  error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
+  if (error != CL_SUCCESS) {
+    print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
+      return error;
+  }
+
+  if (deviceCount == 0) {
+    log_error("No devices found for program.\n");
+    return -1;
+  }
+
+  cl_device_id    *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
+  if( NULL == devices )
+    return -1;
+  memset( devices, 0, deviceCount * sizeof( cl_device_id ));
+  error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
+  if (error != CL_SUCCESS) {
+    print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
+    free( devices );
+    return error;
+  }
+
+  cl_uint z;
+  for( z = 0; z < deviceCount; z++ )
+  {
+    char deviceName[4096] = "";
+    error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
+    if (error != CL_SUCCESS || deviceName[0] == '\0') {
+      log_error("Device \"%d\" failed to return a name\n", z);
+      print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
+    }
+
+    cl_build_status buildStatus;
+    error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
+    if (error != CL_SUCCESS) {
+      print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+      free( devices );
+      return error;
+    }
+
+    if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
+      char log[10240] = "";
+      if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
+
+      char statusString[64] = "";
+      if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
+        sprintf(statusString, "CL_BUILD_SUCCESS");
+      else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
+        sprintf(statusString, "CL_BUILD_NONE");
+      else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
+        sprintf(statusString, "CL_BUILD_ERROR");
+      else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
+        sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+      else
+        sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+
+      if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+      error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
+      if (error != CL_SUCCESS || log[0]=='\0'){
+        log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
+        if (error) {
+               print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+            free( devices );
+            return error;
+        } else {
+          log_error("clGetProgramBuildInfo returned an empty log.\n");
+          free( devices );
+          return -1;
+        }
+      }
+      // In this case we've already printed out the code above.
+      if (!printedSource)
+      {
+        unsigned int i;
+        log_error( "Original source is: ------------\n" );
+        for( i = 0; i < numKernelLines; i++ )
+          log_error( "%s", kernelProgram[ i ] );
+        printedSource = 1;
+      }
+      log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
+      log_error( "%s\n", log );
+      log_error( "\n----------\n" );
+      free( devices );
+      return -1;
+    }
+  }
+
+    /* And create a kernel from it */
+    *outKernel = clCreateKernel( *outProgram, kernelName, &error );
+    if( *outKernel == NULL || error != CL_SUCCESS)
+    {
+        print_error( error, "Unable to create kernel" );
+        free( devices );
+        return error;
+    }
+
+    free( devices );
+    return 0;
+}
+
+int get_device_version( cl_device_id id, size_t* major, size_t* minor)
+{
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    char *p2;
+    while( *p1 == ' ' )
+        p1++;
+    *major = strtol( p1, &p2, 10 );
+    error = *p2 != '.';
+    test_error(error, "ERROR: Version number must contain a decimal point!");
+    *minor = strtol( ++p2, NULL, 10 );
+    return error;
+}
+
+int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
+{
+    cl_device_id *devices;
+    size_t size, maxCommonSize = 0;
+    int numDevices, i, j, error;
+  cl_uint numDims;
+    size_t outSize;
+  size_t sizeLimit[]={1,1,1};
+
+
+    /* Assume fewer than 16 devices will be returned */
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
+  test_error( error, "Unable to obtain list of devices size for context" );
+  devices = (cl_device_id *)malloc(outSize);
+  error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
+  test_error( error, "Unable to obtain list of devices for context" );
+
+    numDevices = (int)( outSize / sizeof( cl_device_id ) );
+
+    for( i = 0; i < numDevices; i++ )
+    {
+        error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device" );
+        if( size < maxCommonSize || maxCommonSize == 0)
+            maxCommonSize = size;
+
+        error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
+        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+        if( size < maxCommonSize  || maxCommonSize == 0)
+            maxCommonSize = size;
+
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
+    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    sizeLimit[0] = 1;
+    error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
+        test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+        if (outLimits != NULL)
+        {
+      if (i == 0) {
+        for (j=0; j<3; j++)
+          outLimits[j] = sizeLimit[j];
+      } else {
+        for (j=0; j<(int)numDims; j++) {
+          if (sizeLimit[j] < outLimits[j])
+            outLimits[j] = sizeLimit[j];
+        }
+      }
+    }
+    }
+    free(devices);
+
+    *outMaxSize = (unsigned int)maxCommonSize;
+    return 0;
+}
+
+
+int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t globalThreadSize, size_t *outMaxSize )
+{
+  size_t sizeLimit[3];
+    int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+
+    /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
+    /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
+     the modulo test will succeed and break the loop anyway */
+    for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
+        ;
+    return 0;
+}
+
+int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
+                                   size_t *globalThreadSizes, size_t *outMaxSizes )
+{
+  size_t sizeLimit[3];
+    size_t maxSize;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+       sizes */
+
+    /* Simple case */
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
+    {
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] &&  globalThreadSizes[ 1 ] <= sizeLimit[1]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      return 0;
+    }
+    }
+
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<2; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
+    remainingSize = maxSize;
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
+
+    return 0;
+}
+
+int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
+                                      size_t *globalThreadSizes, size_t *outMaxSizes )
+{
+  size_t sizeLimit[3];
+    size_t maxSize;
+    int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
+    if( error != 0 )
+        return error;
+    /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
+     sizes */
+
+    /* Simple case */
+    if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
+    {
+    if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
+      outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
+      outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
+      outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
+      return 0;
+    }
+    }
+
+  size_t remainingSize, sizeForThisOne;
+  remainingSize = maxSize;
+  int i, j;
+  for (i=0 ; i<3; i++) {
+    if (globalThreadSizes[i] > remainingSize)
+      sizeForThisOne = remainingSize;
+    else
+      sizeForThisOne = globalThreadSizes[i];
+    for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
+    outMaxSizes[i] = sizeForThisOne;
+    remainingSize = maxSize;
+    for (j=0; j<=i; j++)
+      remainingSize /=outMaxSizes[j];
+  }
+
+    return 0;
+}
+
+/* Helper to determine if an extension is supported by a device */
+int is_extension_available( cl_device_id device, const char *extensionName )
+{
+    char *extString;
+    size_t size = 0;
+    int err;
+    int result = 0;
+
+    if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
+    {
+        log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        return 0;
+    }
+
+    if( 0 == size )
+        return 0;
+
+    extString = (char*) malloc( size );
+    if( NULL == extString )
+    {
+        log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__,  err );
+        return 0;
+    }
+
+    if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
+    {
+        log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        free( extString );
+        return 0;
+    }
+
+    if( strstr( extString, extensionName ) )
+        result = 1;
+
+    free( extString );
+    return result;
+}
+
+/* Helper to determine if a device supports an image format */
+int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
+{
+    cl_image_format *list;
+    cl_uint count = 0;
+    cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
+    if( count == 0 )
+        return 0;
+
+    list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
+    if( NULL == list )
+    {
+        log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__,  err );
+        return 0;
+    }
+
+    cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
+    if( error )
+    {
+        log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
+        free( list );
+        return 0;
+    }
+
+    // iterate looking for a match.
+    cl_uint i;
+    for( i = 0; i < count; i++ )
+    {
+        if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
+            fmt->image_channel_order == list[ i ].image_channel_order )
+            break;
+    }
+
+    free( list );
+    return ( i < count ) ? true : false;
+}
+
+size_t get_pixel_bytes( const cl_image_format *fmt );
+size_t get_pixel_bytes( const cl_image_format *fmt )
+{
+    size_t chanCount;
+    switch( fmt->image_channel_order )
+    {
+        case CL_R:
+        case CL_A:
+        case CL_Rx:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+            chanCount = 1;
+            break;
+        case CL_RG:
+        case CL_RA:
+        case CL_RGx:
+            chanCount = 2;
+            break;
+        case CL_RGB:
+        case CL_RGBx:
+            chanCount = 3;
+            break;
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+#ifdef CL_1RGB_APPLE
+        case CL_1RGB_APPLE:
+#endif
+#ifdef CL_BGR1_APPLE
+        case CL_BGR1_APPLE:
+#endif
+            chanCount = 4;
+            break;
+        default:
+            log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
+            abort();
+            break;
+    }
+
+    switch( fmt->image_channel_data_type )
+    {
+          case CL_UNORM_SHORT_565:
+          case CL_UNORM_SHORT_555:
+            return 2;
+
+          case CL_UNORM_INT_101010:
+            return 4;
+
+          case CL_SNORM_INT8:
+          case CL_UNORM_INT8:
+          case CL_SIGNED_INT8:
+          case CL_UNSIGNED_INT8:
+            return chanCount;
+
+          case CL_SNORM_INT16:
+          case CL_UNORM_INT16:
+          case CL_HALF_FLOAT:
+          case CL_SIGNED_INT16:
+          case CL_UNSIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+          case CL_SFIXED14_APPLE:
+#endif
+            return chanCount * 2;
+
+          case CL_SIGNED_INT32:
+          case CL_UNSIGNED_INT32:
+          case CL_FLOAT:
+            return chanCount * 4;
+
+        default:
+            log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
+            abort();
+    }
+
+    return 0;
+}
+
+int verifyImageSupport( cl_device_id device )
+{
+    if( checkForImageSupport( device ) )
+    {
+        log_error( "ERROR: Device does not supported images as required by this test!\n" );
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+    return 0;
+}
+
+int checkForImageSupport( cl_device_id device )
+{
+    cl_uint i;
+    int error;
+
+
+    /* Check the device props to see if images are supported at all first */
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
+    {
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    /* So our support is good */
+    return 0;
+}
+
+int checkFor3DImageSupport( cl_device_id device )
+{
+    cl_uint i;
+    int error;
+
+    /* Check the device props to see if images are supported at all first */
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
+    test_error( error, "Unable to query device for image support" );
+    if( i == 0 )
+    {
+        return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    char profile[128];
+    error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
+    test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
+    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
+    {
+        size_t width = -1L;
+        size_t height = -1L;
+        size_t depth = -1L;
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
+        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
+        test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
+
+        if( 0 == (height | width | depth ))
+            return CL_IMAGE_FORMAT_NOT_SUPPORTED;
+    }
+
+    /* So our support is good */
+    return 0;
+}
+
+void * align_malloc(size_t size, size_t alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    return _aligned_malloc(size, alignment);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
+    return NULL;
+#elif defined(__MINGW32__)
+    return __mingw_aligned_malloc(size, alignment);
+#else
+    #error "Please add support OS for aligned malloc"
+#endif
+}
+
+void   align_free(void * ptr)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    _aligned_free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
+#elif defined(__MINGW32__)
+    return __mingw_aligned_free(ptr);
+#else
+    #error "Please add support OS for aligned free"
+#endif
+}
+
+size_t get_min_alignment(cl_context context)
+{
+    static cl_uint align_size = 0;
+
+    if( 0 == align_size )
+    {
+        cl_device_id * devices;
+        size_t devices_size = 0;
+        cl_uint result = 0;
+        cl_int error;
+        int i;
+
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  0,
+                                  NULL,
+                                  &devices_size);
+        test_error_ret(error, "clGetContextInfo failed", 0);
+
+        devices = (cl_device_id*)malloc(devices_size);
+        if (devices == NULL) {
+            print_error( error, "malloc failed" );
+            return 0;
+        }
+
+        error = clGetContextInfo (context,
+                                  CL_CONTEXT_DEVICES,
+                                  devices_size,
+                                  (void*)devices,
+                                  NULL);
+        test_error_ret(error, "clGetContextInfo failed", 0);
+
+        for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
+        {
+            cl_uint alignment = 0;
+
+            error = clGetDeviceInfo (devices[i],
+                                     CL_DEVICE_MEM_BASE_ADDR_ALIGN,
+                                     sizeof(cl_uint),
+                                     (void*)&alignment,
+                                     NULL);
+
+            if (error == CL_SUCCESS)
+            {
+                alignment >>= 3;    // convert bits to bytes
+                result = (alignment > result) ? alignment : result;
+            }
+            else
+                print_error( error, "clGetDeviceInfo failed" );
+        }
+
+        align_size = result;
+        free(devices);
+    }
+
+    return align_size;
+}
+
+cl_device_fp_config get_default_rounding_mode( cl_device_id device )
+{
+    char profileStr[128] = "";
+    cl_device_fp_config single = 0;
+    int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
+    if( error )
+        test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
+
+    if( single & CL_FP_ROUND_TO_NEAREST )
+        return CL_FP_ROUND_TO_NEAREST;
+
+    if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
+        test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
+
+    // Make sure we are an embedded device before allowing a pass
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
+        test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
+
+    if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
+        test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
+
+    return CL_FP_ROUND_TO_ZERO;
+}
+
+int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
+{
+    cl_command_queue_properties realProps;
+    cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
+    test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
+
+    return ( realProps & prop ) ? 1 : 0;
+}
+
+int printDeviceHeader( cl_device_id device )
+{
+    char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
+    int error;
+
+    error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
+    test_error( error, "Unable to get CL_DEVICE_NAME for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
+    test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
+
+    log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
+             deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
+             ( error == CL_SUCCESS ) ? cLangVersion : "" );
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/compatibility/test_common/harness/kernelHelpers.h b/test_conformance/compatibility/test_common/harness/kernelHelpers.h
new file mode 100644
index 00000000..59c01b5f
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/kernelHelpers.h
@@ -0,0 +1,128 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _kernelHelpers_h
+#define _kernelHelpers_h
+
+#include "compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined (__MINGW32__)
+#include <malloc.h>
+#endif
+
+#include <string.h>
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+
+/*
+ *  The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
+ *
+ *  const char *source = {
+ *      INIT_OPENCL_DEBUG_INFO
+ *      "__kernel void foo( int x )\n"
+ *      "{\n"
+ *      "   ...\n"
+ *      "}\n"
+ *  };
+ */
+#define INIT_OPENCL_DEBUG_INFO                      SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
+#define SET_OPENCL_LINE_INFO(_line, _file)          "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
+#ifndef STRINGIFY_VALUE
+    #define STRINGIFY_VALUE(_x)                     STRINGIFY(_x)
+#endif
+#ifndef STRINGIFY
+    #define STRINGIFY(_x)                           #_x
+#endif
+
+/* Helper that creates a single program and kernel from a single-kernel program source */
+extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
+extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
+
+/* Helper to get major/minor number for a device */
+extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
+
+/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
+extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
+
+/* Helper to determine if an extension is supported by a device */
+extern int is_extension_available( cl_device_id device, const char *extensionName );
+
+/* Helper to determine if a device supports an image format */
+extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
+
+/* Helper to get pixel size for a pixel format */
+size_t get_pixel_bytes( const cl_image_format *fmt );
+
+/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
+extern int verifyImageSupport( cl_device_id device );
+
+/* Checks that the given device supports images. Same as verify, but doesn't print an error */
+extern int checkForImageSupport( cl_device_id device );
+extern int checkFor3DImageSupport( cl_device_id device );
+
+/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
+extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
+
+/* Helper for aligned memory allocation */
+void * align_malloc(size_t size, size_t alignment);
+void   align_free(void *);
+
+/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
+size_t get_min_alignment(cl_context context);
+
+/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
+cl_device_fp_config get_default_rounding_mode( cl_device_id device );
+
+#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device )    \
+    if( checkForImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )    \
+    if( checkFor3DImageSupport( device ) )    \
+    {    \
+        log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" );    \
+        return 0;    \
+    }
+
+/* Prints out the standard device header for all tests given the device to print for */
+extern int printDeviceHeader( cl_device_id device );
+
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#endif // _kernelHelpers_h
diff --git a/test_conformance/compatibility/test_common/harness/mingw_compat.c b/test_conformance/compatibility/test_common/harness/mingw_compat.c
new file mode 100644
index 00000000..54c44635
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/mingw_compat.c
@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined(__MINGW32__)
+
+#include "mingw_compat.h"
+#include <stdio.h>
+#include <string.h>
+
+//This function is unavailable on various mingw compilers,
+//especially 64 bit so implementing it here
+const char *basename_dot=".";
+char*
+basename(char *path)
+{
+    char *p = path, *b = NULL;
+    int len = strlen(path);
+
+    if (path == NULL) {
+        return (char*)basename_dot;
+    }
+
+    // Not absolute path on windows
+    if (path[1] != ':') {
+        return path;
+    }
+
+    // Trim trailing path seperators
+    if (path[len - 1]  == '\\' ||
+        path[len - 1]  == '/' ) {
+        len--;
+        path[len] = '\0';
+    }
+
+    while (len) {
+        while((*p != '\\' || *p != '/')  && len) {
+            p++;
+            len--;
+        }
+        p++;
+        b = p;
+     }
+
+     return b;
+}
+
+#endif
\ No newline at end of file
diff --git a/test_conformance/compatibility/test_common/harness/mingw_compat.h b/test_conformance/compatibility/test_common/harness/mingw_compat.h
new file mode 100644
index 00000000..ab28f398
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/mingw_compat.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef MINGW_COMPAT_H
+#define MINGW_COMPAT_H
+
+#if defined(__MINGW32__)
+char *basename(char *path);
+#include <malloc.h>
+
+#if defined(__MINGW64__)
+//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
+#define __mingw_aligned_malloc _aligned_malloc
+#define __mingw_aligned_free _aligned_free
+#include <stddef.h>
+#endif //(__MINGW64__)
+
+#endif //(__MINGW32__)
+#endif // MINGW_COMPAT_H
diff --git a/test_conformance/compatibility/test_common/harness/msvc9.c b/test_conformance/compatibility/test_common/harness/msvc9.c
new file mode 100644
index 00000000..811c6654
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/msvc9.c
@@ -0,0 +1,754 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined(_WIN32) && defined (_MSC_VER)
+
+#include "compat.h"
+#include <math.h>
+#include <float.h>
+#include <assert.h>
+#include <CL/cl_platform.h>
+
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                   rint, rintf
+//
+///////////////////////////////////////////////////////////////////
+
+float copysignf( float x, float y )
+{
+    union{ cl_uint u; float f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
+
+    return ux.f;
+}
+
+double copysign( double x, double y )
+{
+    union{ cl_ulong u; double f; }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
+
+    return ux.f;
+}
+
+long double copysignl( long double x, long double y )
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    }ux, uy;
+
+    ux.f = x;
+    uy.f = y;
+
+    ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
+
+    return ux.f;
+}
+
+float rintf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        x = copysignf( rounded, x );
+    }
+
+    return x;
+}
+
+double rint(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        x = copysign( rounded, x );
+    }
+
+    return x;
+}
+
+long double rintl(long double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
+    {
+        long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
+        long double rounded = x + magic;
+        rounded -= magic;
+        x = copysignl( rounded, x );
+    }
+
+    return x;
+}
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                   ilogb, ilogbf, ilogbl
+//
+///////////////////////////////////////////////////////////////////
+#ifndef FP_ILOGB0
+    #define FP_ILOGB0   INT_MIN
+#endif
+
+#ifndef FP_ILOGBNAN
+    #define FP_ILOGBNAN INT_MIN
+#endif
+
+int ilogb (double x)
+{
+    union{ double f; cl_ulong u;} u;
+    u.f = x;
+
+    cl_ulong absx = u.u & CL_LONG_MAX;
+    if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7ff0000000000000ULL:
+                return INT_MAX;
+            default:
+                if( absx > 0x7ff0000000000000ULL )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3ff0000000000000ULL;
+                u.f -= 1.0;
+                return (u.u >> 52) - (1023 + 1022);
+        }
+    }
+
+    return (absx >> 52) - 1023;
+}
+
+
+int ilogbf (float x)
+{
+    union{ float f; cl_uint u;} u;
+    u.f = x;
+
+    cl_uint absx = u.u & 0x7fffffff;
+    if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
+    {
+        switch( absx )
+        {
+            case 0:
+                return FP_ILOGB0;
+            case 0x7f800000U:
+                return INT_MAX;
+            default:
+                if( absx > 0x7f800000 )
+                    return FP_ILOGBNAN;
+
+                // subnormal
+                u.u = absx | 0x3f800000U;
+                u.f -= 1.0f;
+                return (u.u >> 23) - (127 + 126);
+        }
+    }
+
+    return (absx >> 23) - 127;
+}
+
+int ilogbl (long double x)
+{
+    union
+    {
+        long double f;
+        struct{ cl_ulong m; cl_ushort sexp; }u;
+    } u;
+    u.f = x;
+
+    int exp = u.u.sexp & 0x7fff;
+    if( 0 == exp )
+    {
+        if( 0 == u.u.m )
+            return FP_ILOGB0;
+
+        //subnormal
+        u.u.sexp = 0x3fff;
+        u.f -= 1.0f;
+        exp = u.u.sexp & 0x7fff;
+
+        return exp - (0x3fff + 0x3ffe);
+    }
+    else if( 0x7fff == exp )
+    {
+        if( u.u.m & CL_LONG_MAX )
+            return FP_ILOGBNAN;
+
+        return INT_MAX;
+    }
+
+    return exp - 0x3fff;
+}
+
+
+
+///////////////////////////////////////////////////////////////////
+//
+//                 fmax, fmin, fmaxf, fminf
+//
+///////////////////////////////////////////////////////////////////
+
+static void GET_BITS_SP32(float fx, unsigned int* ux)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.f = (fx);
+    *ux = _bitsy.u;
+}
+/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.f = (fx); */
+/*     *ux = _bitsy.i; */
+/* } */
+static void PUT_BITS_SP32(unsigned int ux, float* fx)
+{
+    volatile union {float f; unsigned int u;} _bitsy;
+    _bitsy.u = (ux);
+    *fx = _bitsy.f;
+}
+/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
+/* { */
+/*     volatile union {float f; unsigned int i;} _bitsy; */
+/*     _bitsy.i = (ux); */
+/*     *fx = _bitsy.f; */
+/* } */
+static void GET_BITS_DP64(double dx, unsigned __int64* lx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.d = (dx);
+    *lx = _bitsy.l;
+}
+static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
+{
+    volatile union {double d; unsigned __int64 l;} _bitsy;
+    _bitsy.l = (lx);
+    *dx = _bitsy.d;
+}
+
+#if 0
+int SIGNBIT_DP64(double x )
+{
+    int hx;
+    _GET_HIGH_WORD(hx,x);
+    return((hx>>31));
+}
+#endif
+
+/* fmax(x, y) returns the larger (more positive) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+/* This works so long as the compiler knows that (x != x) means
+   that x is NaN; gcc does. */
+double fmax(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+
+/* fmin(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+double fmin(double x, double y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+
+float fmaxf( float x, float y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+/* fminf(x, y) returns the smaller (more negative) of x and y.
+   NaNs are treated as missing values: if one argument is NaN,
+   the other argument is returned. If both arguments are NaN,
+   the first argument is returned. */
+
+float fminf(float x, float y)
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+long double scalblnl(long double x, long n)
+{
+    union
+    {
+        long double d;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
+    u.u.m = CL_LONG_MIN;
+
+    if( x == 0.0L || n < -2200)
+        return copysignl( 0.0L, x );
+
+    if( n > 2200 )
+        return INFINITY;
+
+    if( n < 0 )
+    {
+        u.u.sexp = 0x3fff - 1022;
+        while( n <= -1022 )
+        {
+            x *= u.d;
+            n += 1022;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    if( n > 0 )
+    {
+        u.u.sexp = 0x3fff + 1023;
+        while( n >= 1023 )
+        {
+            x *= u.d;
+            n -= 1023;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    return x;
+}
+
+///////////////////////////////////////////////////////////////////
+//
+//                          log2
+//
+///////////////////////////////////////////////////////////////////
+const static cl_double log_e_base2   = 1.4426950408889634074;
+const static cl_double log_10_base2  = 3.3219280948873623478;
+
+//double log10(double x);
+
+double log2(double x)
+{
+    return 1.44269504088896340735992468100189214 * log(x);
+}
+
+long double log2l(long double x)
+{
+    return 1.44269504088896340735992468100189214L * log(x);
+}
+
+///////////////////////////////////////////////////////////////////
+//
+//                  misc functions
+//
+///////////////////////////////////////////////////////////////////
+
+/*
+// This function is commented out because the Windows implementation should never call munmap.
+// If it is calling it, we have a bug. Please file a bugzilla.
+int munmap(void *addr, size_t len)
+{
+// FIXME: this is not correct.  munmap is like free()    http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
+
+    return (int)VirtualAlloc( (LPVOID)addr, len,
+                  MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
+}
+*/
+
+uint64_t ReadTime( void )
+{
+    LARGE_INTEGER current;
+    QueryPerformanceCounter(&current);
+    return (uint64_t)current.QuadPart;
+}
+
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    static double PerformanceFrequency = 0.0;
+
+    if (PerformanceFrequency == 0.0) {
+        LARGE_INTEGER frequency;
+        QueryPerformanceFrequency(&frequency);
+        PerformanceFrequency = (double) frequency.QuadPart;
+    }
+
+    return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
+}
+
+float make_nan()
+{
+/* This is the IEEE 754 single-precision format:
+    unsigned int mantissa:  22;
+    unsigned int quiet_nan:  1;
+    unsigned int exponent:   8;
+    unsigned int negative:   1;
+*/
+     //const static unsigned
+     static const int32_t _nan = 0x7fc00000;
+     return *(const float*)(&_nan);
+}
+
+float nanf( const char* str)
+{
+    cl_uint u = atoi( str );
+    u |= 0x7fc00000U;
+    return *( float*)(&u);
+}
+
+
+double nan( const char* str)
+{
+    cl_ulong u = atoi( str );
+    u |= 0x7ff8000000000000ULL;
+    return *( double*)(&u);
+}
+
+// double check this implementatation
+long double nanl( const char* str)
+{
+    union
+    {
+        long double f;
+        struct { cl_ulong m; cl_ushort sexp; }u;
+    }u;
+    u.u.sexp = 0x7fff;
+    u.u.m = 0x8000000000000000ULL | atoi( str );
+
+    return u.f;
+}
+
+double trunc(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
+    {
+        cl_long rounded = x;
+        x = copysign( (double) rounded, x );
+    }
+
+    return x;
+}
+
+float  truncf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        cl_int rounded = x;
+        x = copysignf( (float) rounded, x );
+    }
+
+    return x;
+}
+
+long lround(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return 0;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+long lroundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return 0;
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf(  absx, x );
+    }
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    return (long) x;
+}
+
+double round(double x)
+{
+    double absx = fabs(x);
+
+    if( absx < 0.5 )
+        return copysign( 0.0, x);
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */)
+    {
+        absx += 0.5;
+        cl_long rounded = absx;
+        absx = rounded;
+        x = copysign( absx, x );
+    }
+
+    return x;
+}
+
+float  roundf(float x)
+{
+    float absx = fabsf(x);
+
+    if( absx < 0.5f )
+        return copysignf( 0.0f, x);
+
+    if( absx < 8388608.0f )
+    {
+        absx += 0.5f;
+        cl_int rounded = absx;
+        absx = rounded;
+        x = copysignf( absx, x );
+    }
+
+    return x;
+}
+
+long double roundl(long double x)
+{
+    long double absx = fabsl(x);
+
+    if( absx < 0.5L )
+        return copysignl( 0.0L, x);
+
+    if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
+    {
+        absx += 0.5L;
+        cl_ulong rounded = absx;
+        absx = rounded;
+        x = copysignl( absx, x );
+    }
+
+    return x;
+}
+
+// Added in _MSC_VER == 1800 (Visual Studio 2013)
+#if _MSC_VER < 1800
+int signbit(double x)
+{
+    union
+    {
+        double f;
+        cl_ulong u;
+    }u;
+    u.f = x;
+    return u.u >> 63;
+}
+#endif
+
+int signbitf(float x)
+{
+    union
+    {
+        float f;
+        cl_uint u;
+    }u;
+    u.f = x;
+    return u.u >> 31;
+}
+
+float cbrtf( float x )
+{
+    float z = pow( fabs((double) x), 1.0 / 3.0 );
+    return copysignf( z, x );
+}
+
+double cbrt( double x )
+{
+    return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
+}
+
+float int2float (int32_t ix)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.i = ix;
+    return u.f;
+}
+
+int32_t float2int (float   fx)
+{
+    union {
+        float   f;
+        int32_t i;
+    } u;
+    u.f = fx;
+    return u.i;
+}
+
+#if defined(_MSC_VER) && !defined(_WIN64)
+/** Returns the number of leading 0-bits in x,
+    starting at the most significant bit position.
+    If x is 0, the result is undefined.
+*/
+int __builtin_clz(unsigned int pattern)
+{
+#if 0
+    int res;
+    __asm {
+        mov eax, pattern
+        bsr eax, eax
+        mov res, eax
+    }
+    return 31 - res;
+#endif
+    unsigned long index;
+    unsigned char res = _BitScanReverse( &index, pattern);
+    if (res) {
+        return 8*sizeof(int) - 1 - index;
+    } else {
+        return 8*sizeof(int);
+    }
+}
+#else
+int __builtin_clz(unsigned int pattern)
+{
+   int count;
+   if (pattern == 0u) {
+       return 32;
+   }
+   count = 31;
+   if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
+   if (pattern >=  1u<<8) { pattern >>=  8; count -=  8; }
+   if (pattern >=  1u<<4) { pattern >>=  4; count -=  4; }
+   if (pattern >=  1u<<2) { pattern >>=  2; count -=  2; }
+   if (pattern >=  1u<<1) {                 count -=  1; }
+   return count;
+}
+
+#endif //defined(_MSC_VER) && !defined(_WIN64)
+
+#include <intrin.h>
+#include <emmintrin.h>
+long int lrint (double x)
+{
+    double absx = fabs(x);
+
+    if( x >= (double) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
+    {
+        double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
+        double rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+long int lrintf (float x)
+{
+    float absx = fabsf(x);
+
+    if( x >= (float) LONG_MAX )
+        return LONG_MAX;
+
+    if( absx < 8388608.0f /* 0x1.0p23f */ )
+    {
+        float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
+        float rounded = x + magic;
+        rounded -= magic;
+        return (long int) rounded;
+    }
+
+    return (long int) x;
+}
+
+int usleep(int usec)
+{
+    Sleep((usec + 999) / 1000);
+    return 0;
+}
+
+#if _MSC_VER < 1900
+int fetestexcept(int excepts)
+{
+    unsigned int status = _statusfp();
+    return excepts & (
+        ((status & _SW_INEXACT) ? FE_INEXACT : 0)      |
+        ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)  |
+        ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)    |
+        ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
+        ((status & _SW_INVALID) ? FE_INVALID : 0)
+    );
+}
+
+int feclearexcept(int excepts)
+{
+    _clearfp();
+    return 0;
+}
+#endif
+
+#endif //defined(_WIN32)
diff --git a/test_conformance/compatibility/test_common/harness/mt19937.c b/test_conformance/compatibility/test_common/harness/mt19937.c
new file mode 100644
index 00000000..75b76a74
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/mt19937.c
@@ -0,0 +1,274 @@
+/*
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+
+   Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mt19937.h"
+#include "mingw_compat.h"
+
+#ifdef __SSE2__
+    #include <emmintrin.h>
+#endif
+
+static void * align_malloc(size_t size, size_t alignment)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    return _aligned_malloc(size, alignment);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    void * ptr = NULL;
+    if (0 == posix_memalign(&ptr, alignment, size))
+        return ptr;
+    return NULL;
+#elif defined(__MINGW32__)
+    return __mingw_aligned_malloc(size, alignment);
+#else
+    #error "Please add support OS for aligned malloc"
+#endif
+}
+
+static void   align_free(void * ptr)
+{
+#if defined(_WIN32) && defined(_MSC_VER)
+    _aligned_free(ptr);
+#elif  defined(__linux__) || defined (linux) || defined(__APPLE__)
+    return  free(ptr);
+#elif defined(__MINGW32__)
+    return __mingw_aligned_free(ptr);
+#else
+    #error "Please add support OS for aligned free"
+#endif
+}
+
+
+/* Period parameters */
+#define N 624   /* vector code requires multiple of 4 here */
+#define M 397
+#define MATRIX_A    (cl_uint) 0x9908b0dfUL   /* constant vector a */
+#define UPPER_MASK  (cl_uint) 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK  (cl_uint) 0x7fffffffUL /* least significant r bits */
+
+typedef struct _MTdata
+{
+    cl_uint mt[N];
+#ifdef __SSE2__
+    cl_uint cache[N];
+#endif
+    cl_int  mti;
+}_MTdata;
+
+/* initializes mt[N] with a seed */
+MTdata init_genrand(cl_uint s)
+{
+    MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
+    if( NULL != r )
+    {
+        cl_uint *mt = r->mt;
+        int mti = 0;
+        mt[0]= s; // & 0xffffffffUL;
+        for (mti=1; mti<N; mti++) {
+            mt[mti] = (cl_uint)
+            (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+            /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+            /* In the previous versions, MSBs of the seed affect   */
+            /* only MSBs of the array mt[].                        */
+            /* 2002/01/09 modified by Makoto Matsumoto             */
+    //        mt[mti] &= 0xffffffffUL;
+            /* for >32 bit machines */
+        }
+        r->mti = mti;
+    }
+
+    return r;
+}
+
+void    free_mtdata( MTdata d )
+{
+    if(d)
+        align_free(d);
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata d)
+{
+    /* mag01[x] = x * MATRIX_A  for x=0,1 */
+    static const cl_uint mag01[2]={0x0UL, MATRIX_A};
+#ifdef __SSE2__
+    static volatile int init = 0;
+    static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
+#endif
+
+
+    cl_uint *mt = d->mt;
+    cl_uint y;
+
+    if (d->mti == N)
+    { /* generate N words at one time */
+        int kk;
+
+#ifdef __SSE2__
+        if( 0 == init )
+        {
+            upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
+            lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
+            one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
+            matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
+            c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
+            c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
+            init = 1;
+        }
+#endif
+
+        kk = 0;
+#ifdef __SSE2__
+        // vector loop
+        for( ; kk + 4 <= N-M; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) );    // mt[kk+M] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+        for ( ;kk<N-M;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+#ifdef __SSE2__
+        // advance to next aligned location
+        for (;kk<N-1 && (kk & 3);kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+
+        // vector loop
+        for( ; kk + 4 <= N-1; kk += 4 )
+        {
+            __m128i vy = _mm_or_si128(  _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
+                                        _mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v ));        //  ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
+
+            __m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v );                                         // y & 1 ? -1 : 0
+            __m128i vmag01 = _mm_and_si128( mask, matrix_a.v );                                                         // y & 1 ? MATRIX_A, 0    =  mag01[y & (cl_uint) 0x1UL]
+            __m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) );          // mt[kk+M-N] ^ (y >> 1)
+            vr = _mm_xor_si128( vr, vmag01 );                                                                           // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
+            _mm_store_si128( (__m128i*) (mt + kk ), vr );
+        }
+#endif
+
+        for (;kk<N-1;kk++) {
+            y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
+            mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+        }
+        y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
+        mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
+
+#ifdef __SSE2__
+        // Do the tempering ahead of time in vector code
+        for( kk = 0; kk + 4 <= N; kk += 4 )
+        {
+            __m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) );                            // y = mt[k];
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) );                             // y ^= (y >> 11);
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) );        // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+            vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) );       // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+            vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) );                             // y ^= (y >> 18);
+            _mm_store_si128( (__m128i*)(d->cache+kk), vy );
+        }
+#endif
+
+        d->mti = 0;
+    }
+#ifdef __SSE2__
+    y = d->cache[d->mti++];
+#else
+    y = mt[d->mti++];
+
+    /* Tempering */
+    y ^= (y >> 11);
+    y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
+    y ^= (y << 15) & (cl_uint) 0xefc60000UL;
+    y ^= (y >> 18);
+#endif
+
+
+    return y;
+}
+
+cl_ulong genrand_int64( MTdata d)
+{
+    return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
+}
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967295.0);
+    /* divided by 2^32-1 */
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2(MTdata d)
+{
+    return genrand_int32(d)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3(MTdata d)
+{
+    return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
+    /* divided by 2^32 */
+}
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53(MTdata d)
+{
+    unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
+    return(a*67108864.0+b)*(1.0/9007199254740992.0);
+}
diff --git a/test_conformance/compatibility/test_common/harness/mt19937.h b/test_conformance/compatibility/test_common/harness/mt19937.h
new file mode 100644
index 00000000..d05beed1
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/mt19937.h
@@ -0,0 +1,99 @@
+
+/*
+ *  mt19937.h
+ *
+ *  Mersenne Twister.
+ *
+   A C-program for MT19937, with initialization improved 2002/1/26.
+   Coded by Takuji Nishimura and Makoto Matsumoto.
+
+   Before using, initialize the state by using init_genrand(seed)
+   or init_by_array(init_key, key_length).
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+   Any feedback is very welcome.
+   http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+   email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+ */
+
+#ifndef MT19937_H
+#define MT19937_H   1
+
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
+#else
+    #include <CL/cl_platform.h>
+#endif
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/*
+ *      Interfaces here have been modified from original sources so that they
+ *      are safe to call reentrantly, so long as a different MTdata is used
+ *      on each thread.
+ */
+
+typedef struct _MTdata  *MTdata;
+
+/* Create the random number generator with seed */
+MTdata init_genrand( cl_uint /*seed*/ );
+
+/* release memory used by a MTdata private data */
+void   free_mtdata( MTdata /*data*/ );
+
+/* generates a random number on [0,0xffffffff]-interval */
+cl_uint genrand_int32( MTdata /*data*/);
+
+/* generates a random number on [0,0xffffffffffffffffULL]-interval */
+cl_ulong genrand_int64( MTdata /*data*/);
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1( MTdata /*data*/);
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2( MTdata /*data*/);
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3( MTdata /*data*/);
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53( MTdata /*data*/ );
+
+
+#ifdef __cplusplus
+    }
+#endif
+
+#endif  /* MT19937_H */
diff --git a/test_conformance/compatibility/test_common/harness/ref_counting.h b/test_conformance/compatibility/test_common/harness/ref_counting.h
new file mode 100644
index 00000000..1a2aceee
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/ref_counting.h
@@ -0,0 +1,49 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _ref_counting_h
+#define _ref_counting_h
+
+#define MARK_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
+    test_error( error, "Unable to check reference count for " #type );
+
+#define TEST_REF_COUNT_BASE( c, type, bigType ) \
+    cl_uint c##_refCount_new; \
+    error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
+    test_error( error, "Unable to check reference count for " #type ); \
+    if( c##_refCount != c##_refCount_new ) \
+    {    \
+        log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new );    \
+        return -1; \
+    }
+
+#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
+#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
+
+#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
+#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
+
+#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
+
+#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
+#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
+
+#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
+#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
+
+#endif // _ref_counting_h
diff --git a/test_conformance/compatibility/test_common/harness/rounding_mode.c b/test_conformance/compatibility/test_common/harness/rounding_mode.c
new file mode 100644
index 00000000..f77da958
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/rounding_mode.c
@@ -0,0 +1,175 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "rounding_mode.h"
+
+#if !(defined(_WIN32) && defined(_MSC_VER))
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
+    const int *p = int_rounds;
+    if( outType == kfloat || outType == kdouble )
+        p = flt_rounds;
+    int oldRound = fegetround();
+    fesetround( p[r] );
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+        default:
+            abort();    // ??!
+    }
+    return kDefaultRoundingMode;    //never happens
+}
+
+RoundingMode get_round( void )
+{
+    int oldRound = fegetround();
+
+    switch( oldRound )
+    {
+        case FE_TONEAREST:
+            return kRoundToNearestEven;
+        case FE_UPWARD:
+            return kRoundUp;
+        case FE_DOWNWARD:
+            return kRoundDown;
+        case FE_TOWARDZERO:
+            return kRoundTowardZero;
+    }
+
+    return kDefaultRoundingMode;
+}
+
+#else
+RoundingMode set_round( RoundingMode r, Type outType )
+{
+    static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
+    const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    if (err) {
+        vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
+        return kDefaultRoundingMode;    //what else never happens
+    }
+
+    oldRound &= _MCW_RC;
+
+    RoundingMode old =
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+
+    _controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
+    return old;    //returning old rounding mode
+}
+
+RoundingMode get_round( void )
+{
+    unsigned int oldRound;
+
+    int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
+    oldRound &= _MCW_RC;
+    return
+        (oldRound == _RC_NEAR)? kRoundToNearestEven :
+        (oldRound == _RC_UP)?   kRoundUp :
+        (oldRound == _RC_DOWN)? kRoundDown :
+        (oldRound == _RC_CHOP)? kRoundTowardZero:
+        kDefaultRoundingMode;
+}
+
+#endif
+
+//
+// FlushToZero() sets the host processor into ftz mode.  It is intended to have a remote effect on the behavior of the code in
+// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
+// software by testing against FLT_MIN or DBL_MIN in that file.
+//
+// Note: IEEE-754 says conversions are basic operations.  As such they do *NOT* have the behavior in section 7.5.3 of
+// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
+// operators do (e.g. add, subtract, multiply, divide, etc.)
+//
+// Configuring hardware to FTZ mode varies by platform.
+// CAUTION: Some C implementations may also fail to behave properly in this mode.
+//
+//  On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
+//  On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
+//          is used for floating point computation! If your OS uses x87, you'll need to figure out how
+//          to turn that off for the conversions code in basic_test_conversions.c so that they flush to
+//          zero properly.  Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
+//          in which case, these function are at liberty to do nothing.
+//
+#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
+    #include <xmmintrin.h>
+#elif defined( __PPC__ )
+    #include <fpu_control.h>
+#endif
+void *FlushToZero( void )
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ int i;  void *p; }u = { _mm_getcsr() };
+        _mm_setcsr( u.i | 0x8040 );
+        return u.p;
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        // processor is already in FTZ mode -- do nothing
+        return NULL;
+    #elif defined( __PPC__ )
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags |= _FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        return NULL;
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
+
+// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
+void UnFlushToZero( void *p)
+{
+#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
+    #if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
+        union{ void *p; int i;  }u = { p };
+        _mm_setcsr( u.i );
+    #elif defined( __arm__ ) || defined(__aarch64__)
+        // processor is already in FTZ mode -- do nothing
+    #elif defined( __PPC__)
+        fpu_control_t flags = 0;
+        _FPU_GETCW(flags);
+        flags &= ~_FPU_MASK_NI;
+        _FPU_SETCW(flags);
+        #else
+        #error Unknown arch
+    #endif
+#else
+    #error  Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
+#endif
+}
diff --git a/test_conformance/compatibility/test_common/harness/rounding_mode.h b/test_conformance/compatibility/test_common/harness/rounding_mode.h
new file mode 100644
index 00000000..837ec687
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/rounding_mode.h
@@ -0,0 +1,71 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __ROUNDING_MODE_H__
+#define __ROUNDING_MODE_H__
+
+#include "compat.h"
+
+#include <stdlib.h>
+
+#if (defined(_WIN32) && defined (_MSC_VER))
+#include "errorHelpers.h"
+#include "testHarness.h"
+#endif
+
+typedef enum
+{
+    kDefaultRoundingMode = 0,
+    kRoundToNearestEven,
+    kRoundUp,
+    kRoundDown,
+    kRoundTowardZero,
+
+    kRoundingModeCount
+}RoundingMode;
+
+typedef enum
+{
+    kuchar = 0,
+    kchar = 1,
+    kushort = 2,
+    kshort = 3,
+    kuint = 4,
+    kint = 5,
+    kfloat = 6,
+    kdouble = 7,
+    kulong = 8,
+    klong = 9,
+
+    //This goes last
+    kTypeCount
+}Type;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern RoundingMode set_round( RoundingMode r, Type outType );
+extern RoundingMode get_round( void );
+extern void *FlushToZero( void );
+extern void UnFlushToZero( void *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+#endif /* __ROUNDING_MODE_H__ */
diff --git a/test_conformance/compatibility/test_common/harness/testHarness.c b/test_conformance/compatibility/test_common/harness/testHarness.c
new file mode 100644
index 00000000..1994399d
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/testHarness.c
@@ -0,0 +1,842 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testHarness.h"
+#include "compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <string.h>
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+#include "fpcontrol.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <time.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#endif
+
+int gTestsPassed = 0;
+int gTestsFailed = 0;
+cl_uint gRandomSeed = 0;
+cl_uint gReSeed = 0;
+
+int     gFlushDenormsToZero = 0;
+int     gInfNanSupport = 1;
+int     gIsEmbedded = 0;
+int     gIsOpenCL_C_1_0_Device = 0;
+int     gIsOpenCL_1_0_Device = 0;
+int     gHasLong = 1;
+
+#define DEFAULT_NUM_ELEMENTS        0x4000
+
+int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                   basefn fnList[], const char *fnNames[],
+                   int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
+{
+    return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
+                          ( imageSupportRequired ) ? verifyImageSupport : NULL );
+}
+
+int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                 basefn fnList[], const char *fnNames[],
+                int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
+                DeviceCheckFn deviceCheckFn )
+{
+    test_start();
+    log_info("*** Compatibility with Previous Versions test ***\n");
+
+    cl_device_type    device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_uint            num_platforms = 0;
+    cl_platform_id     *platforms;
+    cl_device_id       device;
+    int                num_elements = DEFAULT_NUM_ELEMENTS;
+    cl_uint            num_devices = 0;
+    cl_device_id       *devices = NULL;
+    cl_uint            choosen_device_index = 0;
+    cl_uint            choosen_platform_index = 0;
+
+    int            err, ret;
+    char *endPtr;
+    unsigned int            i;
+    int based_on_env_var = 0;
+
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        based_on_env_var = 1;
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+            abort();
+        }
+    }
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+    env_mode = getenv( "CL_DEVICE_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_device_index = atoi(env_mode);
+    }
+
+    env_mode = getenv( "CL_PLATFORM_INDEX" );
+    if( env_mode != NULL )
+    {
+        choosen_platform_index = atoi(env_mode);
+    }
+
+    /* Process the command line arguments */
+
+    /* Special case: just list the tests */
+    if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
+    {
+        log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
+        log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
+        log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
+        log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
+        log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
+
+        for( i = 0; i < num_fns - 1; i++ )
+        {
+            log_info( "\t\t%s\n", fnNames[ i ] );
+        }
+        test_finish();
+        return 0;
+    }
+
+    /* How are we supposed to seed the random # generators? */
+    if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
+    {
+        log_info(" Initializing random seed based on the clock.\n");
+        gRandomSeed = (unsigned)clock();
+        gReSeed = 1;
+        argc--;
+    }
+    else
+    {
+        log_info(" Initializing random seed to 0.\n");
+    }
+
+    /* Do we have an integer to specify the number of elements to pass to tests? */
+    if( argc > 1 )
+    {
+        ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
+        if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
+        {
+            /* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
+            /* (hence why we stored the result in ret first) */
+            num_elements = ret;
+            log_info( "Testing with num_elements of %d\n", num_elements );
+            argc--;
+        }
+    }
+
+    /* Do we have a CPU/GPU specification? */
+    if( argc > 1 )
+    {
+        if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_GPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_CPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+        {
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific device index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
+        {
+            choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific platform index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
+        {
+            choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
+            argc--;
+        }
+    }
+
+    switch( device_type )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default:                            log_error( "Requesting unknown device "); return -1;
+    }
+    log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
+    log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
+
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
+    /* check our environment for a hint to disable SSE variants */
+    {
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
+        {
+            extern int _cpu_capabilities;
+            int mask = 0;
+            if( 0 == strcasecmp( env, "SSE4.1" ) )
+                mask = kHasSSE4_2;
+            else if( 0 == strcasecmp( env, "SSSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1;
+            else if( 0 == strcasecmp( env, "SSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+            else if( 0 == strcasecmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            else
+            {
+                log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
+                return -2;
+            }
+
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+            _cpu_capabilities &= ~mask;
+        }
+    }
+#endif
+#endif
+
+    /* Get the platform */
+    err = clGetPlatformIDs(0, NULL, &num_platforms);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
+    if (!platforms || choosen_platform_index >= num_platforms) {
+        log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
+        test_finish();
+        return -1;
+    }
+
+    err = clGetPlatformIDs(num_platforms, platforms, NULL);
+    if (err) {
+        print_error(err, "clGetPlatformIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    /* Get the number of requested devices */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, 0, NULL, &num_devices );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || choosen_device_index >= num_devices) {
+        log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
+        test_finish();
+        return -1;
+    }
+
+    /* Get the requested device */
+    err = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, num_devices, devices, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceIDs failed");
+        test_finish();
+        return -1;
+    }
+
+    device = devices[choosen_device_index];
+    free(devices);
+    devices = NULL;
+    free(platforms);
+    platforms = NULL;
+
+    if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    cl_device_fp_config fpconfig = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
+    if (err) {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
+        test_finish();
+        return -1;
+    }
+
+    gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
+    log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
+    log_info( "sizeof( void*) = %d  (host)\n", (int) sizeof( void* ) );
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        test_finish();
+        return -1;
+    }
+    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+
+    //detect the floating point capabilities
+    cl_device_fp_config floatCapabilities = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
+        test_finish();
+        return -1;
+    }
+
+    // Check for problems that only embedded will have
+    if( gIsEmbedded )
+    {
+        //If the device is embedded, we need to detect if the device supports Infinity and NaN
+        if ((floatCapabilities & CL_FP_INF_NAN) == 0)
+            gInfNanSupport = 0;
+
+        // check the extensions list to see if ulong and long are supported
+        size_t extensionsStringSize = 0;
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
+        {
+            print_error( err, "Unable to get extensions string size for embedded device" );
+            test_finish();
+            return -1;
+        }
+        char *extensions_string = (char*) malloc(extensionsStringSize);
+        if( NULL == extensions_string )
+        {
+            print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
+        {
+            print_error( err, "Unable to get extensions string for embedded device" );
+            test_finish();
+            return -1;
+        }
+
+        if( extensions_string[extensionsStringSize-1] != '\0' )
+        {
+            log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
+            test_finish();
+            return -1;
+        }
+
+        if( NULL == strstr( extensions_string, "cles_khr_int64" ))
+            gHasLong = 0;
+
+        free(extensions_string);
+    }
+
+    if( getenv( "OPENCL_1_0_DEVICE" ) )
+    {
+        char c_version[1024];
+        gIsOpenCL_1_0_Device = 1;
+        memset( c_version, 0, sizeof( c_version ) );
+
+        if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
+        {
+            log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
+            test_finish();
+            return -1;
+        }
+
+        if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
+        {
+            gIsOpenCL_C_1_0_Device = 1;
+            log_info( "Device is a OpenCL C 1.0 device\n" );
+        }
+        else
+            log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
+    }
+
+    cl_uint device_address_bits = 0;
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
+    {
+        print_error( err, "Unable to obtain device address bits" );
+        test_finish();
+        return -1;
+    }
+    if( device_address_bits )
+        log_info( "sizeof( void*) = %d  (device)\n", device_address_bits/8 );
+    else
+    {
+        log_error("Invalid device address bit size returned by device.\n");
+        test_finish();
+        return -1;
+    }
+
+
+    /* If we have a device checking function, run it */
+    if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    if (num_elements <= 0)
+        num_elements = DEFAULT_NUM_ELEMENTS;
+
+        // On most platforms which support denorm, default is FTZ off. However,
+        // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+        // This creates issues in result verification. Since spec allows the implementation to either flush or
+        // not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
+        // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+        // where reference is being computed to make sure we get non-flushed reference result. If implementation
+        // returns flushed result, we correctly take care of that in verification code.
+#if defined(__APPLE__) && defined(__arm__)
+        FPU_mode_type oldMode;
+        DisableFTZ( &oldMode );
+#endif
+
+    int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
+
+ #if defined(__APPLE__) && defined(__arm__)
+     // Restore the old FP mode before leaving.
+    RestoreFPState( &oldMode );
+#endif
+
+    return error;
+}
+
+static int find_wildcard_matching_functions( const char *fnNames[], unsigned char fnsToCall[], unsigned int num_fns,
+                                             const char *wildcard )
+{
+    int found_tests = 0;
+    size_t wildcard_length = strlen( wildcard ) - 1; /* -1 for the asterisk */
+
+    for( unsigned int fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strncmp( fnNames[ fnIndex ], wildcard, wildcard_length ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+
+            fnsToCall[ fnIndex ] = 1;
+            found_tests = 1;
+        }
+    }
+
+    if( !found_tests )
+    {
+        log_error( "ERROR: The wildcard '%s' did not match any test names.\n", wildcard );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+static int find_argument_matching_function( const char *fnNames[], unsigned char *fnsToCall, unsigned int num_fns,
+                                            const char *argument )
+{
+    unsigned int fnIndex;
+
+    for( fnIndex = 0; fnIndex < num_fns; fnIndex++ )
+    {
+        if( strcmp( argument, fnNames[ fnIndex ] ) == 0 )
+        {
+            if( fnsToCall[ fnIndex ] )
+            {
+                log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
+                return EXIT_FAILURE;
+            }
+            else
+            {
+                fnsToCall[ fnIndex ] = 1;
+                break;
+            }
+        }
+    }
+
+    if( fnIndex == num_fns )
+    {
+        log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}
+
+int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                  basefn fnList[], const char *fnNames[], int forceNoContextCreation,
+                                  cl_command_queue_properties queueProps, int num_elements )
+{
+    int ret = EXIT_SUCCESS;
+
+    unsigned char *fnsToCall = ( unsigned char* ) calloc( num_fns, 1 );
+
+    if( argc == 1 )
+    {
+        /* No actual arguments, all tests will be run. */
+        memset( fnsToCall, 1, num_fns );
+    }
+    else
+    {
+        for( int argIndex = 1; argIndex < argc; argIndex++ )
+        {
+            if( strchr( argv[ argIndex ], '*' ) != NULL )
+            {
+                ret = find_wildcard_matching_functions( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+            }
+            else
+            {
+                if( strcmp( argv[ argIndex ], "all" ) == 0 )
+                {
+                    memset( fnsToCall, 1, num_fns );
+                    break;
+                }
+                else
+                {
+                    ret = find_argument_matching_function( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
+                }
+            }
+
+            if( ret == EXIT_FAILURE )
+            {
+                break;
+            }
+        }
+    }
+
+    if( ret == EXIT_SUCCESS )
+    {
+        ret = callTestFunctions( fnList, fnNames, fnsToCall, num_fns, device, forceNoContextCreation, num_elements, queueProps );
+
+        if( gTestsFailed == 0 )
+        {
+            if( gTestsPassed > 1 )
+            {
+                log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
+            }
+            else if( gTestsPassed > 0 )
+            {
+                log_info("PASSED test.\n");
+            }
+        }
+        else if( gTestsFailed > 0 )
+        {
+            if( gTestsFailed+gTestsPassed > 1 )
+            {
+                log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
+            }
+            else
+            {
+                log_error("FAILED test.\n");
+            }
+        }
+    }
+
+    test_finish();
+
+    free( fnsToCall );
+
+    return ret;
+}
+
+int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                       int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                       int numElementsToUse, cl_command_queue_properties queueProps )
+{
+    int numErrors = 0;
+
+    for( int i = 0; i < numFunctions; ++i )
+    {
+        if( functionsToCall[ i ] )
+        {
+            /* Skip any unimplemented tests. */
+            if( functionList[ i ] != NULL )
+            {
+                numErrors += callSingleTestFunction( functionList[ i ], functionNames[ i ], deviceToUse,
+                                                     forceNoContextCreation, numElementsToUse, queueProps );
+            }
+            else
+            {
+                log_info( "%s test currently not implemented\n", functionNames[ i ] );
+            }
+        }
+    }
+
+    return numErrors;
+}
+
+void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    log_info( "%s\n", errinfo );
+}
+
+// Actual function execution
+int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                           cl_device_id deviceToUse, int forceNoContextCreation,
+                           int numElementsToUse, cl_command_queue_properties queueProps )
+{
+    int numErrors = 0, ret;
+    cl_int error;
+    cl_context context = NULL;
+    cl_command_queue queue = NULL;
+
+    /* Create a context to work with, unless we're told not to */
+    if( !forceNoContextCreation )
+    {
+        context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
+        if (!context)
+        {
+            print_error( error, "Unable to create testing context" );
+            return 1;
+        }
+
+        queue = clCreateCommandQueue( context, deviceToUse, queueProps, &error );
+        if( queue == NULL )
+        {
+            print_error( error, "Unable to create testing command queue" );
+            return 1;
+        }
+    }
+
+    /* Run the test and print the result */
+    log_info( "%s...\n", functionName );
+    fflush( stdout );
+
+    ret = functionToCall( deviceToUse, context, queue, numElementsToUse);        //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
+    if( ret == TEST_NOT_IMPLEMENTED )
+    {
+        /* Tests can also let us know they're not implemented yet */
+        log_info("%s test currently not implemented\n\n", functionName);
+    }
+    else
+    {
+        /* Print result */
+        if( ret == 0 ) {
+            log_info( "%s passed\n", functionName );
+            gTestsPassed++;
+        }
+        else
+        {
+            numErrors++;
+            log_error( "%s FAILED\n", functionName );
+            gTestsFailed++;
+        }
+    }
+
+    /* Release the context */
+    if( !forceNoContextCreation )
+    {
+        int error = clFinish(queue);
+        if (error) {
+            log_error("clFinish failed: %d", error);
+            numErrors++;
+        }
+        clReleaseCommandQueue( queue );
+        clReleaseContext( context );
+    }
+
+    return numErrors;
+}
+
+void checkDeviceTypeOverride( cl_device_type *inOutType )
+{
+    /* Check if we are forced to CPU mode */
+    char *force_cpu = getenv( "CL_DEVICE_TYPE" );
+    if( force_cpu != NULL )
+    {
+        if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            *inOutType = CL_DEVICE_TYPE_DEFAULT;
+    }
+
+    switch( *inOutType )
+    {
+        case CL_DEVICE_TYPE_GPU:            log_info( "Requesting GPU device " ); break;
+        case CL_DEVICE_TYPE_CPU:            log_info( "Requesting CPU device " ); break;
+        case CL_DEVICE_TYPE_ACCELERATOR:    log_info( "Requesting Accelerator device " ); break;
+        case CL_DEVICE_TYPE_DEFAULT:        log_info( "Requesting Default device " ); break;
+        default: break;
+    }
+    log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
+
+#if defined( __APPLE__ )
+    {
+        // report on any unusual library search path indirection
+        char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
+
+        // report on any unusual framework search path indirection
+        char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
+        if( libSearchPath )
+            log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
+    }
+#endif
+
+}
+
+#if ! defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
+{
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
+    size_t count = bytes / 4;
+    size_t i;
+    uint32_t *d = (uint32_t*)dest;
+
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
+
+    d += i;
+
+    bytes &= 3;
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
+}
+#endif
+
+extern cl_device_type GetDeviceType( cl_device_id d )
+{
+    cl_device_type result = -1;
+    cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
+    if( CL_SUCCESS != err )
+        log_error( "ERROR: Unable to get device type for device %p\n", d );
+    return result;
+}
+
+
+cl_device_id GetOpposingDevice( cl_device_id device )
+{
+    cl_int error;
+    cl_device_id *otherDevices;
+    cl_uint actualCount;
+    cl_platform_id plat;
+
+    // Get the platform of the device to use for getting a list of devices
+    error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get device's platform" );
+        return NULL;
+    }
+
+    // Get a list of all devices
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices size" );
+        return NULL;
+    }
+    otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
+    error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get list of devices" );
+        free(otherDevices);
+        return NULL;
+    }
+
+    if( actualCount == 1 )
+    {
+        free(otherDevices);
+        return device;    // NULL means error, returning self means we couldn't find another one
+    }
+
+    // Loop and just find one that isn't the one we were given
+    cl_uint i;
+    for( i = 0; i < actualCount; i++ )
+    {
+        if( otherDevices[ i ] != device )
+        {
+            cl_device_type newType;
+            error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
+            if( error != CL_SUCCESS )
+            {
+                print_error( error, "Unable to get device type for other device" );
+                free(otherDevices);
+                return NULL;
+            }
+            cl_device_id result = otherDevices[ i ];
+            free(otherDevices);
+            return result;
+        }
+    }
+
+    // Should never get here
+    free(otherDevices);
+    return NULL;
+}
+
+
diff --git a/test_conformance/compatibility/test_common/harness/testHarness.h b/test_conformance/compatibility/test_common/harness/testHarness.h
new file mode 100644
index 00000000..c2620647
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/testHarness.h
@@ -0,0 +1,100 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testHarness_h
+#define _testHarness_h
+
+#include "threadTesting.h"
+#include "clImageHelper.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern cl_uint gReSeed;
+extern cl_uint gRandomSeed;
+
+// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
+// setup work, and then call each function in turn as dictatated by the passed arguments.
+extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
+                            basefn fnList[], const char *fnNames[],
+                            int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
+
+// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
+typedef int (*DeviceCheckFn)( cl_device_id device );
+
+// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
+extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
+                              basefn fnList[], const char *fnNames[],
+                              int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
+
+// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
+extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
+                                        basefn *fnList, const char *fnNames[],
+                                        int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
+
+// Call this function if you need to do all the setup work yourself, and just need the function list called/
+// managed.
+//    functionList is the actual array of functions
+//    functionNames is an array of strings representing the name of each function
+//    functionsToCall is an array of integers (treated as bools) which tell which function is to be called,
+//       each element at index i, corresponds to the element in functionList at index i
+//    numFunctions is the number of elements in the arrays
+//    contextProps are used to create a testing context for each test
+//    deviceToUse and numElementsToUse are all just passed to each test function
+extern int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
+                              int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
+                              int numElementsToUse, cl_command_queue_properties queueProps );
+
+// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
+extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
+                                   cl_device_id deviceToUse, int forceNoContextCreation,
+                                   int numElementsToUse, cl_command_queue_properties queueProps );
+
+///// Miscellaneous steps
+
+// Given a pre-existing device type choice, check the environment for an override, then print what
+// choice was made and how (and return the overridden choice, if there is one)
+extern void checkDeviceTypeOverride( cl_device_type *inOutType );
+
+// standard callback function for context pfn_notify
+extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
+
+extern cl_device_type GetDeviceType( cl_device_id );
+
+// Given a device (most likely passed in by the harness, but not required), will attempt to find
+// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
+// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
+// is the only device available, the SAME device is returned, so check!
+extern cl_device_id GetOpposingDevice( cl_device_id device );
+
+
+extern int      gFlushDenormsToZero;    // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
+extern int      gInfNanSupport;         // This is set to 1 if the device supports infinities and NaNs
+extern int        gIsEmbedded;            // This is set to 1 if the device is an embedded device
+extern int        gHasLong;               // This is set to 1 if the device suppots long and ulong types in OpenCL C.
+extern int      gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
+
+#if ! defined( __APPLE__ )
+    void     memset_pattern4(void *, const void *, size_t);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _testHarness_h
+
+
diff --git a/test_conformance/compatibility/test_common/harness/test_mt19937.c b/test_conformance/compatibility/test_common/harness/test_mt19937.c
new file mode 100644
index 00000000..c0498ea9
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/test_mt19937.c
@@ -0,0 +1,51 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "mt19937.h"
+#include <stdio.h>
+
+int main( void )
+{
+    MTdata d = init_genrand(42);
+    int i;
+    const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
+                                    0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
+                                    0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
+                                    0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
+    int errcount = 0;
+
+    for( i = 0; i < 65536; i++ )
+    {
+        cl_uint u = genrand_int32( d );
+        if( 0 == (i & 4095) )
+        {
+            if( u != reference[i>>12] )
+            {
+                printf("ERROR: expected *0x%8.8x at %d.  Got 0x%8.8x\n", reference[i>>12], i, u );
+                errcount++;
+            }
+        }
+    }
+
+    free_mtdata(d);
+
+    if( errcount )
+        printf("mt19937 test failed.\n");
+    else
+        printf("mt19937 test passed.\n");
+
+
+    return 0;
+}
\ No newline at end of file
diff --git a/test_conformance/compatibility/test_common/harness/threadTesting.c b/test_conformance/compatibility/test_common/harness/threadTesting.c
new file mode 100644
index 00000000..2f16dcca
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/threadTesting.c
@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "threadTesting.h"
+#include "errorHelpers.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <pthread.h>
+#endif
+
+#if 0 // Disabed for now
+
+typedef struct
+{
+    basefn            mFunction;
+    cl_device_id    mDevice;
+    cl_context        mContext;
+    int                mNumElements;
+} TestFnArgs;
+
+////////////////////////////////////////////////////////////////////////////////
+// Thread-based testing. Spawns a new thread to run the given test function,
+// then waits for it to complete. The entire idea is that, if the thread crashes,
+// we can catch it and report it as a failure instead of crashing the entire suite
+////////////////////////////////////////////////////////////////////////////////
+
+void *test_thread_wrapper( void *data )
+{
+    TestFnArgs *args;
+    int retVal;
+    cl_context context;
+
+    args = (TestFnArgs *)data;
+
+    /* Create a new context to use (contexts can't cross threads) */
+    context = clCreateContext(NULL, args->mDeviceGroup);
+    if( context == NULL )
+    {
+        log_error("clCreateContext failed for new thread\n");
+        return (void *)(-1);
+    }
+
+    /* Call function */
+    retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
+
+    clReleaseContext( context );
+
+    return (void *)retVal;
+}
+
+int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int error;
+    pthread_t threadHdl;
+    void *retVal;
+    TestFnArgs args;
+
+
+    args.mFunction = fnToTest;
+    args.mDeviceGroup = deviceGroup;
+    args.mDevice = device;
+    args.mContext = context;
+    args.mNumElements = numElements;
+
+
+    error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to create thread for testing!\n" );
+        return -1;
+    }
+
+    /* Thread has been started, now just wait for it to complete (or crash) */
+    error = pthread_join( threadHdl, &retVal );
+    if( error != 0 )
+    {
+        log_error( "ERROR: Unable to join testing thread!\n" );
+        return -1;
+    }
+
+    return (int)((intptr_t)retVal);
+}
+#endif
+
+
diff --git a/test_conformance/compatibility/test_common/harness/threadTesting.h b/test_conformance/compatibility/test_common/harness/threadTesting.h
new file mode 100644
index 00000000..81a5757b
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/threadTesting.h
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _threadTesting_h
+#define _threadTesting_h
+
+#ifdef __APPLE__
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#define TEST_NOT_IMPLEMENTED        -99
+
+typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+
+#endif // _threadTesting_h
+
+
diff --git a/test_conformance/compatibility/test_common/harness/typeWrappers.cpp b/test_conformance/compatibility/test_common/harness/typeWrappers.cpp
new file mode 100644
index 00000000..d4e08fb9
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/typeWrappers.cpp
@@ -0,0 +1,481 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "typeWrappers.h"
+#include "kernelHelpers.h"
+#include "errorHelpers.h"
+#include <stdlib.h>
+#include "clImageHelper.h"
+
+#define ROUND_SIZE_UP( _size, _align )      (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
+
+#if defined( __APPLE__ )
+    #define kPageSize       4096
+    #include <sys/mman.h>
+    #include <stdlib.h>
+#elif defined(__linux__)
+    #include <unistd.h>
+    #define kPageSize  (getpagesize())
+#endif
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        backingStoreSize = rowStride + 8 * rowStride;
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        mprotect( p, kPageSize, PROT_NONE );        p += rowStride;
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+    } else {
+        backingStore = NULL;
+        image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+    }
+#else
+
+    backingStore = NULL;
+    image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+
+#endif
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+      static int spewEnv = 1;
+      if(spewEnv)
+      {
+        log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+        spewEnv = 0;
+      }
+      imagePtr += rowBytes - pixelBytes * width;
+    }
+
+      image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+      image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+  }
+#else
+
+  backingStore = NULL;
+  image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+
+#endif
+    return error;
+}
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, mem_flags, fmt, width, height, depth );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
+{
+    cl_int error;
+
+#if defined( __APPLE__ )
+  int protect_pages = 1;
+  cl_device_id devices[16];
+  size_t number_of_devices;
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  number_of_devices /= sizeof(cl_device_id);
+  for (int i=0; i<(int)number_of_devices; i++) {
+    cl_device_type type;
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+    if (type == CL_DEVICE_TYPE_GPU) {
+      protect_pages = 0;
+      break;
+    }
+  }
+
+  if (protect_pages) {
+    size_t pixelBytes = get_pixel_bytes(fmt);
+    size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+    size_t rowStride = rowBytes + kPageSize;
+
+    // create backing store
+    backingStoreSize = height * depth * rowStride + 8 * rowStride;
+    backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    // add guard pages
+    size_t row;
+    char *p = (char*) backingStore;
+    char *imagePtr = (char*) backingStore + 4 * rowStride;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+    p += rowBytes;
+    for( row = 0; row < height*depth; row++ )
+    {
+        mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+    }
+    p -= rowBytes;
+    for( row = 0; row < 4; row++ )
+    {
+        mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+    }
+
+    if(  getenv( "CL_ALIGN_RIGHT" ) )
+    {
+        static int spewEnv = 1;
+        if(spewEnv)
+        {
+            log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+            spewEnv = 0;
+        }
+        imagePtr += rowBytes - pixelBytes * width;
+    }
+
+    image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+  } else {
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+  }
+#else
+
+    backingStore = NULL;
+    image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
+
+#endif
+
+    return error;
+}
+
+
+clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
+{
+    cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
+    if( errcode_ret != NULL )
+        *errcode_ret = err;
+}
+
+cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
+{
+    cl_int error;
+#if defined( __APPLE__ )
+    int protect_pages = 1;
+    cl_device_id devices[16];
+    size_t number_of_devices;
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+    number_of_devices /= sizeof(cl_device_id);
+    for (int i=0; i<(int)number_of_devices; i++) {
+        cl_device_type type;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
+        if (type == CL_DEVICE_TYPE_GPU) {
+            protect_pages = 0;
+            break;
+        }
+    }
+
+    if (protect_pages) {
+        size_t pixelBytes = get_pixel_bytes(fmt);
+        size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
+        size_t rowStride = rowBytes + kPageSize;
+
+        // create backing store
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                backingStoreSize = rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                backingStoreSize = height * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                backingStoreSize = height * depth * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                backingStoreSize = arraySize * rowStride + 8 * rowStride;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
+                break;
+        }
+        backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+        // add guard pages
+        size_t row;
+        char *p = (char*) backingStore;
+        char *imagePtr = (char*) backingStore + 4 * rowStride;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+        p += rowBytes;
+        size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
+        for( row = 0; row < sz; row++ )
+        {
+            mprotect( p, kPageSize, PROT_NONE );    p += rowStride;
+        }
+        p -= rowBytes;
+        for( row = 0; row < 4; row++ )
+        {
+            mprotect( p, rowStride, PROT_NONE );    p += rowStride;
+        }
+
+        if(  getenv( "CL_ALIGN_RIGHT" ) )
+        {
+            static int spewEnv = 1;
+            if(spewEnv)
+            {
+                log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
+                spewEnv = 0;
+            }
+            imagePtr += rowBytes - pixelBytes * width;
+        }
+
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
+                break;
+        }
+    } else {
+        backingStore = NULL;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+                break;
+        }
+
+    }
+#else
+
+    backingStore = NULL;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
+            break;
+    }
+#endif
+    return error;
+}
+
+
+
+/*******
+ * clProtectedArray implementation
+ *******/
+clProtectedArray::clProtectedArray()
+{
+    mBuffer = mValidBuffer = NULL;
+}
+
+clProtectedArray::clProtectedArray( size_t sizeInBytes )
+{
+    mBuffer = mValidBuffer = NULL;
+    Allocate( sizeInBytes );
+}
+
+clProtectedArray::~clProtectedArray()
+{
+    if( mBuffer != NULL ) {
+#if defined( __APPLE__ )
+        int error = munmap( mBuffer, mRealSize );
+      if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
+#else
+    free( mBuffer );
+#endif
+  }
+}
+
+void clProtectedArray::Allocate( size_t sizeInBytes )
+{
+
+#if defined( __APPLE__ )
+
+    // Allocate enough space to: round up our actual allocation to an even number of pages
+    // and allocate two pages on either side
+    mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
+    mRealSize = mRoundedSize + kPageSize * 2;
+
+    // Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
+    mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
+
+    mValidBuffer = mBuffer + kPageSize;
+
+    // Protect guard area from access
+    mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
+    mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
+#else
+  mRoundedSize = mRealSize = sizeInBytes;
+  mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
+#endif
+}
+
+
diff --git a/test_conformance/compatibility/test_common/harness/typeWrappers.h b/test_conformance/compatibility/test_common/harness/typeWrappers.h
new file mode 100644
index 00000000..32f8966a
--- /dev/null
+++ b/test_conformance/compatibility/test_common/harness/typeWrappers.h
@@ -0,0 +1,333 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _typeWrappers_h
+#define _typeWrappers_h
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#include "compat.h"
+#include <stdio.h>
+#include "mt19937.h"
+#include "errorHelpers.h"
+#include "kernelHelpers.h"
+
+extern "C" cl_uint gReSeed;
+extern "C" cl_uint gRandomSeed;
+
+/* cl_context wrapper */
+
+class clContextWrapper
+{
+    public:
+        clContextWrapper() { mContext = NULL; }
+        clContextWrapper( cl_context program ) { mContext = program; }
+        ~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
+
+        clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
+        operator cl_context() { return mContext; }
+
+        cl_context * operator&() { return &mContext; }
+
+        bool operator==( const cl_context &rhs ) { return mContext == rhs; }
+
+    protected:
+
+        cl_context mContext;
+};
+
+/* cl_program wrapper */
+
+class clProgramWrapper
+{
+    public:
+        clProgramWrapper() { mProgram = NULL; }
+        clProgramWrapper( cl_program program ) { mProgram = program; }
+        ~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
+
+        clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
+        operator cl_program() { return mProgram; }
+
+        cl_program * operator&() { return &mProgram; }
+
+        bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
+
+    protected:
+
+        cl_program mProgram;
+};
+
+/* cl_kernel wrapper */
+
+class clKernelWrapper
+{
+    public:
+        clKernelWrapper() { mKernel = NULL; }
+        clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
+        ~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
+
+        clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
+        operator cl_kernel() { return mKernel; }
+
+        cl_kernel * operator&() { return &mKernel; }
+
+        bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
+
+    protected:
+
+        cl_kernel mKernel;
+};
+
+/* cl_mem (stream) wrapper */
+
+class clMemWrapper
+{
+    public:
+        clMemWrapper() { mMem = NULL; }
+        clMemWrapper( cl_mem mem ) { mMem = mem; }
+        ~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
+
+        clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
+        operator cl_mem() { return mMem; }
+
+        cl_mem * operator&() { return &mMem; }
+
+        bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_mem mMem;
+};
+
+class clProtectedImage
+{
+    public:
+        clProtectedImage() { image = NULL; backingStore = NULL; }
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
+        clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
+        ~clProtectedImage()
+        {
+            if( image != NULL )
+                clReleaseMemObject( image );
+
+#if defined( __APPLE__ )
+            if(backingStore)
+                munmap(backingStore, backingStoreSize);
+#endif
+        }
+
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
+        cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
+        cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
+
+        clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
+        operator cl_mem() { return image; }
+
+        cl_mem * operator&() { return &image; }
+
+        bool operator==( const cl_mem &rhs ) { return image == rhs; }
+
+    protected:
+        void *backingStore;
+        size_t backingStoreSize;
+        cl_mem  image;
+};
+
+/* cl_command_queue wrapper */
+
+class clCommandQueueWrapper
+{
+    public:
+        clCommandQueueWrapper() { mMem = NULL; }
+        clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
+        ~clCommandQueueWrapper() { if( mMem != NULL ) {int error = clFinish(mMem); if (error) print_error(error, "clFinish failed"); clReleaseCommandQueue( mMem );} }
+
+        clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
+        operator cl_command_queue() { return mMem; }
+
+        cl_command_queue * operator&() { return &mMem; }
+
+        bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_command_queue mMem;
+};
+
+/* cl_sampler wrapper */
+class clSamplerWrapper
+{
+    public:
+        clSamplerWrapper() { mMem = NULL; }
+        clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
+        ~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
+
+        clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
+        operator cl_sampler() { return mMem; }
+
+        cl_sampler * operator&() { return &mMem; }
+
+        bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_sampler mMem;
+};
+
+/* cl_event wrapper */
+class clEventWrapper
+{
+    public:
+        clEventWrapper() { mMem = NULL; }
+        clEventWrapper( cl_event mem ) { mMem = mem; }
+        ~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
+
+        clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
+        operator cl_event() { return mMem; }
+
+        cl_event * operator&() { return &mMem; }
+
+        bool operator==( const cl_event &rhs ) { return mMem == rhs; }
+
+    protected:
+
+        cl_event mMem;
+};
+
+/* Generic protected memory buffer, for verifying access within bounds */
+class clProtectedArray
+{
+    public:
+        clProtectedArray();
+        clProtectedArray( size_t sizeInBytes );
+        virtual ~clProtectedArray();
+
+        void    Allocate( size_t sizeInBytes );
+
+        operator void *()        { return (void *)mValidBuffer; }
+        operator const void *() const { return (const void *)mValidBuffer; }
+
+    protected:
+
+         char *    mBuffer;
+         char * mValidBuffer;
+        size_t    mRealSize, mRoundedSize;
+};
+
+class RandomSeed
+{
+    public:
+        RandomSeed( cl_uint seed  ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
+        ~RandomSeed()
+        {
+            if( gReSeed )
+                gRandomSeed = genrand_int32( mtData );
+            free_mtdata(mtData);
+        }
+
+        operator MTdata ()     {return mtData;}
+
+    protected:
+        MTdata mtData;
+};
+
+template <typename T> class BufferOwningPtr
+{
+  BufferOwningPtr(BufferOwningPtr const &); // do not implement
+    void operator=(BufferOwningPtr const &);  // do not implement
+
+    void *ptr;
+    void *map;
+  size_t mapsize;   // Bytes allocated total, pointed to by map.
+  size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
+  bool aligned;
+  public:
+  explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
+  explicit BufferOwningPtr(void *p, void *m, size_t s)
+    : ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
+      {
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+      }
+    ~BufferOwningPtr() {
+      if (map) {
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+    }
+  void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
+      if (map){
+#if defined( __APPLE__ )
+        int error = munmap(map, mapsize);
+        if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
+#else
+        log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
+        abort();
+#endif
+      } else {
+          if ( aligned )
+          {
+              align_free(ptr);
+          }
+          else
+          {
+            free(ptr);
+          }
+      }
+      ptr = p;
+      map = m;
+      mapsize = mapsize_;
+      allocsize = allocsize_;
+      aligned = aligned_;
+#if ! defined( __APPLE__ )
+        if(m)
+        {
+            log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
+            abort();
+        }
+#endif
+    }
+    operator T*() { return (T*)ptr; }
+
+      size_t getSize() const { return allocsize; };
+};
+
+#endif // _typeWrappers_h
+
+
diff --git a/test_conformance/compatibility/test_conformance/CMakeLists.txt b/test_conformance/compatibility/test_conformance/CMakeLists.txt
new file mode 100644
index 00000000..84858574
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(api)
+add_subdirectory(basic)
diff --git a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
new file mode 100644
index 00000000..50dd64ef
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
@@ -0,0 +1,34 @@
+set(MODULE_NAME COMPATIBILITY_API)
+
+set(${MODULE_NAME}_SOURCES
+         main.c
+         test_bool.c
+         test_retain.cpp
+         test_retain_program.c
+         test_queries.cpp
+         test_create_kernels.c
+         test_kernels.c
+         test_api_min_max.c
+         test_kernel_arg_changes.cpp
+         test_kernel_arg_multi_setup.cpp
+         test_binary.cpp
+         test_native_kernel.cpp
+         test_mem_objects.cpp
+         test_create_context_from_type.cpp
+         test_device_min_data_type_align_size_alignment.cpp
+         test_platform.cpp
+         test_mem_object_info.cpp
+         test_null_buffer_arg.c
+         test_kernel_arg_info.c
+         ../../test_common/harness/errorHelpers.c
+         ../../test_common/harness/threadTesting.c
+         ../../test_common/harness/testHarness.c
+         ../../test_common/harness/kernelHelpers.c
+         ../../test_common/harness/typeWrappers.cpp
+         ../../test_common/harness/conversions.c
+         ../../test_common/harness/mt19937.c
+         ../../test_common/harness/msvc9.c
+         ../../test_common/harness/imageHelpers.cpp
+)
+
+include(../../../CMakeCommon.txt)
diff --git a/test_conformance/compatibility/test_conformance/api/Jamfile b/test_conformance/compatibility/test_conformance/api/Jamfile
new file mode 100644
index 00000000..704b5ce1
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/Jamfile
@@ -0,0 +1,27 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+
+exe test_api
+    : main.c
+      test_api_min_max.c
+      test_binary.cpp
+      test_create_kernels.c
+      test_create_context_from_type.cpp
+      test_kernel_arg_changes.cpp
+      test_kernel_arg_multi_setup.cpp
+      test_kernels.c
+      test_native_kernel.cpp
+      test_queries.cpp
+      test_retain_program.c
+      test_platform.cpp 
+    ;
+
+install dist
+    : test_api #test.lst
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/api
+    ;
diff --git a/test_conformance/compatibility/test_conformance/api/Makefile b/test_conformance/compatibility/test_conformance/api/Makefile
new file mode 100644
index 00000000..44cfaee3
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/Makefile
@@ -0,0 +1,61 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+			test_retain_program.c \
+			test_queries.cpp \
+			test_create_kernels.c \
+			test_kernels.c \
+            test_kernel_arg_info.c \
+			test_api_min_max.c \
+			test_kernel_arg_changes.cpp \
+			test_kernel_arg_multi_setup.cpp \
+			test_binary.cpp \
+			test_native_kernel.cpp \
+			test_create_context_from_type.cpp \
+			test_platform.cpp \
+			test_retain.cpp \
+			test_device_min_data_type_align_size_alignment.cpp \
+			test_mem_objects.cpp \
+            test_bool.c \
+            test_null_buffer_arg.c \
+            test_mem_object_info.cpp \
+            ../../test_common/harness/errorHelpers.c \
+			../../test_common/harness/threadTesting.c \
+			../../test_common/harness/testHarness.c \
+			../../test_common/harness/imageHelpers.cpp \
+			../../test_common/harness/kernelHelpers.c \
+			../../test_common/harness/typeWrappers.cpp \
+			../../test_common/harness/mt19937.c \
+			../../test_common/harness/conversions.c
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_api
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/compatibility/test_conformance/api/main.c b/test_conformance/compatibility/test_conformance/api/main.c
new file mode 100644
index 00000000..1870bf8f
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/main.c
@@ -0,0 +1,214 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables:
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = false;
+
+basefn    basefn_list[] = {
+    test_get_platform_info,
+    test_get_sampler_info,
+    test_get_command_queue_info,
+    test_get_context_info,
+    test_get_device_info,
+    test_enqueue_task,
+    test_binary_get,
+    test_program_binary_create,
+    test_kernel_required_group_size,
+
+    test_release_kernel_order,
+    test_release_during_execute,
+
+    test_load_single_kernel,
+    test_load_two_kernels,
+    test_load_two_kernels_in_one,
+    test_load_two_kernels_manually,
+    test_get_program_info_kernel_names,
+    test_get_kernel_arg_info,
+    test_create_kernels_in_program,
+    test_get_kernel_info,
+    test_execute_kernel_local_sizes,
+    test_set_kernel_arg_by_index,
+    test_set_kernel_arg_constant,
+    test_set_kernel_arg_struct_array,
+    test_kernel_global_constant,
+
+    test_min_max_thread_dimensions,
+    test_min_max_work_items_sizes,
+    test_min_max_work_group_size,
+    test_min_max_read_image_args,
+    test_min_max_write_image_args,
+    test_min_max_mem_alloc_size,
+    test_min_max_image_2d_width,
+    test_min_max_image_2d_height,
+    test_min_max_image_3d_width,
+    test_min_max_image_3d_height,
+    test_min_max_image_3d_depth,
+    test_min_max_image_array_size,
+    test_min_max_image_buffer_size,
+    test_min_max_parameter_size,
+    test_min_max_samplers,
+    test_min_max_constant_buffer_size,
+    test_min_max_constant_args,
+    test_min_max_compute_units,
+    test_min_max_address_bits,
+    test_min_max_single_fp_config,
+    test_min_max_double_fp_config,
+    test_min_max_local_mem_size,
+    test_min_max_kernel_preferred_work_group_size_multiple,
+    test_min_max_execution_capabilities,
+    test_min_max_queue_properties,
+    test_min_max_device_version,
+    test_min_max_language_version,
+
+    test_kernel_arg_changes,
+    test_kernel_arg_multi_setup_random,
+
+    test_native_kernel,
+
+    test_create_context_from_type,
+
+    test_platform_extensions,
+    test_get_platform_ids,
+    test_for_bool_type,
+
+    test_repeated_setup_cleanup,
+
+    test_retain_queue_single,
+    test_retain_queue_multiple,
+    test_retain_mem_object_single,
+    test_retain_mem_object_multiple,
+    test_min_data_type_align_size_alignment,
+
+    test_mem_object_destructor_callback,
+    test_null_buffer_arg,
+    test_get_buffer_info,
+    test_get_image2d_info,
+    test_get_image3d_info,
+    test_get_image1d_info,
+    test_get_image1d_array_info,
+    test_get_image2d_array_info,
+};
+
+
+const char    *basefn_names[] = {
+    "get_platform_info",
+    "get_sampler_info",
+    "get_command_queue_info",
+    "get_context_info",
+    "get_device_info",
+    "enqueue_task",
+    "binary_get",
+    "binary_create",
+    "kernel_required_group_size",
+
+    "release_kernel_order",
+    "release_during_execute",
+
+    "load_single_kernel",
+    "load_two_kernels",
+    "load_two_kernels_in_one",
+    "load_two_kernels_manually",
+    "get_program_info_kernel_names",
+    "get_kernel_arg_info",
+    "create_kernels_in_program",
+    "get_kernel_info",
+    "execute_kernel_local_sizes",
+    "set_kernel_arg_by_index",
+    "set_kernel_arg_constant",
+    "set_kernel_arg_struct_array",
+    "kernel_global_constant",
+
+    "min_max_thread_dimensions",
+    "min_max_work_items_sizes",
+    "min_max_work_group_size",
+    "min_max_read_image_args",
+    "min_max_write_image_args",
+    "min_max_mem_alloc_size",
+    "min_max_image_2d_width",
+    "min_max_image_2d_height",
+    "min_max_image_3d_width",
+    "min_max_image_3d_height",
+    "min_max_image_3d_depth",
+    "min_max_image_array_size",
+    "min_max_image_buffer_size",
+    "min_max_parameter_size",
+    "min_max_samplers",
+    "min_max_constant_buffer_size",
+    "min_max_constant_args",
+    "min_max_compute_units",
+    "min_max_address_bits",
+    "min_max_single_fp_config",
+    "min_max_double_fp_config",
+    "min_max_local_mem_size",
+    "min_max_kernel_preferred_work_group_size_multiple",
+    "min_max_execution_capabilities",
+    "min_max_queue_properties",
+    "min_max_device_version",
+    "min_max_language_version",
+
+    "kernel_arg_changes",
+    "kernel_arg_multi_setup_random",
+
+    "native_kernel",
+
+    "create_context_from_type",
+    "platform_extensions",
+
+    "get_platform_ids",
+    "bool_type",
+
+    "repeated_setup_cleanup",
+
+    "retain_queue_single",
+    "retain_queue_multiple",
+    "retain_mem_object_single",
+    "retain_mem_object_multiple",
+
+    "min_data_type_align_size_alignment",
+
+    "mem_object_destructor_callback",
+    "null_buffer_arg",
+    "get_buffer_info",
+    "get_image2d_info",
+    "get_image3d_info",
+    "get_image1d_info",
+    "get_image1d_array_info",
+    "get_image2d_array_info",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/procs.h b/test_conformance/compatibility/test_conformance/api/procs.h
new file mode 100644
index 00000000..ff796a38
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/procs.h
@@ -0,0 +1,108 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/clImageHelper.h"
+#include "../../test_common/harness/imageHelpers.h"
+extern float    calculate_ulperror(float a, float b);
+
+extern int        test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int      test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int        test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
+extern int      test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
diff --git a/test_conformance/compatibility/test_conformance/api/testBase.h b/test_conformance/compatibility/test_conformance/api/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_api_min_max.c b/test_conformance/compatibility/test_conformance/api/test_api_min_max.c
new file mode 100644
index 00000000..61225e51
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_api_min_max.c
@@ -0,0 +1,2059 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+#include <ctype.h>
+#include <string.h>
+
+extern cl_uint gRandomSeed;
+
+const char *sample_single_param_kernel[] = {
+    "__kernel void sample_test(__global int *src)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "}\n" };
+
+const char *sample_single_param_write_kernel[] = {
+    "__kernel void sample_test(__global int *src)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     src[tid] = tid;\n"
+    "\n"
+    "}\n" };
+
+const char *sample_read_image_kernel_pattern[] = {
+    "__kernel void sample_test( __global float *result, ",  " )\n"
+    "{\n"
+    "  sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    result[0] = 0.0f;\n",
+    "\n"
+    "}\n" };
+
+const char *sample_write_image_kernel_pattern[] = {
+    "__kernel void sample_test( ",  " )\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n",
+    "\n"
+    "}\n" };
+
+
+const char *sample_large_parmam_kernel_pattern[] = {
+    "__kernel void sample_test(%s, __global long *result)\n"
+    "{\n"
+    "result[0] = 0;\n"
+    "%s"
+    "\n"
+    "}\n" };
+
+const char *sample_large_int_parmam_kernel_pattern[] = {
+    "__kernel void sample_test(%s, __global int *result)\n"
+    "{\n"
+    "result[0] = 0;\n"
+    "%s"
+    "\n"
+    "}\n" };
+
+const char *sample_sampler_kernel_pattern[] = {
+    "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n",
+    "     dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
+    "\n"
+    "}\n" };
+
+const char *sample_const_arg_kernel[] = {
+    "__kernel void sample_test(__constant int *src1, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src1[tid];\n"
+    "\n"
+    "}\n" };
+
+const char *sample_local_arg_kernel[] = {
+    "__kernel void sample_test(__local int *src1, __global int *global_src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    src1[tid] = global_src[tid];\n"
+    "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
+    "    dst[tid] = src1[tid];\n"
+    "\n"
+    "}\n" };
+
+const char *sample_const_max_arg_kernel_pattern =
+"__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid];\n"
+"%s"
+"\n"
+"}\n";
+
+int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error, retVal;
+    unsigned int maxThreadDim, threadDim, i;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[1];
+    size_t *threads, *localThreads;
+    cl_event event;
+    cl_int event_status;
+
+
+    /* Get the max thread dimensions */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxThreadDim ), &maxThreadDim, NULL );
+    test_error( error, "Unable to get max work item dimensions from device" );
+
+    if( maxThreadDim < 3 )
+    {
+        log_error( "ERROR: Reported max work item dimensions is less than required! (%d)\n", maxThreadDim );
+        return -1;
+    }
+
+    log_info("Reported max thread dimensions of %d.\n", maxThreadDim);
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_param_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error );
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating test array failed!\n");
+        return -1;
+    }
+
+    /* Set the arguments */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    retVal = 0;
+
+    /* Now try running the kernel with up to that many threads */
+    for (threadDim=1; threadDim <= maxThreadDim; threadDim++)
+    {
+        threads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
+        localThreads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
+        for( i = 0; i < maxThreadDim; i++ )
+        {
+            threads[ i ] = 1;
+            localThreads[i] = 1;
+        }
+
+        error = clEnqueueNDRangeKernel( queue, kernel, maxThreadDim, NULL, threads, localThreads, 0, NULL, &event );
+        test_error( error, "Failed clEnqueueNDRangeKernel");
+
+        // Verify that the event does not return an error from the execution
+        error = clWaitForEvents(1, &event);
+        test_error( error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        clReleaseEvent(event);
+        if (event_status < 0)
+            test_error(error, "Kernel execution event returned error");
+
+        /* All done */
+        free( threads );
+        free( localThreads );
+    }
+
+    return retVal;
+}
+
+
+int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t *deviceMaxWorkItemSize;
+    unsigned int maxWorkItemDim;
+
+    /* Get the max work item dimensions */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxWorkItemDim ), &maxWorkItemDim, NULL );
+    test_error( error, "Unable to get max work item dimensions from device" );
+
+    log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", maxWorkItemDim);
+    deviceMaxWorkItemSize = (size_t*)malloc(sizeof(size_t)*maxWorkItemDim);
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxWorkItemDim, deviceMaxWorkItemSize, NULL );
+    test_error( error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed" );
+
+    unsigned int i;
+    int errors = 0;
+    for(i=0; i<maxWorkItemDim; i++) {
+        if (deviceMaxWorkItemSize[i]<1) {
+            log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i, deviceMaxWorkItemSize[i]);
+            errors++;
+        } else {
+            log_info("Dimension %d has max work item size %lu\n", i, deviceMaxWorkItemSize[i]);
+        }
+    }
+
+    free(deviceMaxWorkItemSize);
+
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t deviceMaxThreadSize;
+
+    /* Get the max thread dimensions */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( deviceMaxThreadSize ), &deviceMaxThreadSize, NULL );
+    test_error( error, "Unable to get max work group size from device" );
+
+    log_info("Reported %ld max device work group size.\n", deviceMaxThreadSize);
+
+    if( deviceMaxThreadSize == 0 )
+    {
+        log_error( "ERROR: Max work group size is reported as zero!\n" );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    unsigned int maxReadImages, i;
+    clProgramWrapper program;
+    char readArgLine[128], *programSrc;
+    const char *readArgPattern = ", read_only image2d_t srcimg%d";
+    clKernelWrapper kernel;
+    clMemWrapper    *streams, result;
+    size_t threads[2];
+    cl_image_format    image_format_desc;
+    size_t maxParameterSize;
+    cl_event event;
+    cl_int event_status;
+    cl_float image_data[4*4];
+    float image_result = 0.0f;
+    float actual_image_result;
+    cl_uint minRequiredReadImages = gIsEmbedded ? 8 : 128;
+    cl_device_type deviceType;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    image_format_desc.image_channel_order = CL_RGBA;
+    image_format_desc.image_channel_data_type = CL_FLOAT;
+
+    /* Get the max read image arg count */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof( maxReadImages ), &maxReadImages, NULL );
+    test_error( error, "Unable to get max read image arg count from device" );
+
+    if( maxReadImages < minRequiredReadImages )
+    {
+        log_error( "ERROR: Reported max read image arg count is less than required! (%d)\n", maxReadImages );
+        return -1;
+    }
+
+    log_info("Reported %d max read image args.\n", maxReadImages);
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    if (!gIsEmbedded && maxReadImages >= 128 && maxParameterSize == 1024)
+    {
+        error = clGetDeviceInfo( deviceID, CL_DEVICE_TYPE, sizeof( deviceType ), &deviceType, NULL );
+        test_error( error, "Unable to get device type from device" );
+
+        if(deviceType != CL_DEVICE_TYPE_CUSTOM)
+        {
+            maxReadImages = 127;
+        }
+    }
+
+    // Subtract the size of the result
+    maxParameterSize -= sizeof(cl_mem);
+
+    // Calculate the number we can use
+    if (maxParameterSize/sizeof(cl_mem) < maxReadImages) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
+        maxReadImages = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+    }
+
+    /* Create a program with that many read args */
+    programSrc = (char *)malloc( strlen( sample_read_image_kernel_pattern[ 0 ] ) + ( strlen( readArgPattern ) + 6 ) * ( maxReadImages ) +
+                                strlen( sample_read_image_kernel_pattern[ 1 ] ) + 1 + 40240);
+
+    strcpy( programSrc, sample_read_image_kernel_pattern[ 0 ] );
+    strcat( programSrc, "read_only image2d_t srcimg0" );
+    for( i = 0; i < maxReadImages-1; i++ )
+    {
+        sprintf( readArgLine, readArgPattern, i+1 );
+        strcat( programSrc, readArgLine );
+    }
+    strcat( programSrc, sample_read_image_kernel_pattern[ 1 ] );
+    for ( i = 0; i < maxReadImages; i++) {
+        sprintf( readArgLine, "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", i);
+        strcat( programSrc, readArgLine );
+    }
+    strcat( programSrc, sample_read_image_kernel_pattern[ 2 ] );
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
+    test_error( error, "Failed to create the program and kernel.");
+    free( programSrc );
+
+    result = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float), NULL, &error);
+    test_error( error, "clCreateBufer failed");
+
+    /* Create some I/O streams */
+    streams = new clMemWrapper[maxReadImages + 1];
+    for( i = 0; i < maxReadImages; i++ )
+    {
+        image_data[0]=i;
+        image_result+= image_data[0];
+        streams[i] = create_image_2d( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &image_format_desc, 4, 4, 0, image_data, &error );
+        test_error( error, "Unable to allocate test image" );
+    }
+
+    error = clSetKernelArg( kernel, 0, sizeof( result ), &result );
+    test_error( error, "Unable to set kernel arguments" );
+
+    /* Set the arguments */
+    for( i = 1; i < maxReadImages+1; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[i-1] ), &streams[i-1] );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+
+    /* Now try running the kernel */
+    threads[0] = threads[1] = 1;
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
+    test_error( error, "clEnqueueNDRangeKernel failed");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), &actual_image_result, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    delete[] streams;
+
+    if (actual_image_result != image_result) {
+        log_error("Result failed to verify. Got %g, expected %g.\n", actual_image_result, image_result);
+        return 1;
+    }
+
+    return 0;
+}
+
+int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    unsigned int maxWriteImages, i;
+    clProgramWrapper program;
+    char writeArgLine[128], *programSrc;
+    const char *writeArgPattern = ", write_only image2d_t dstimg%d";
+    clKernelWrapper kernel;
+    clMemWrapper    *streams;
+    size_t threads[2];
+    cl_image_format    image_format_desc;
+    size_t maxParameterSize;
+    cl_event event;
+    cl_int event_status;
+    cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    image_format_desc.image_channel_order = CL_RGBA;
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+
+    /* Get the max read image arg count */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof( maxWriteImages ), &maxWriteImages, NULL );
+    test_error( error, "Unable to get max write image arg count from device" );
+
+    if( maxWriteImages == 0 )
+    {
+        log_info( "WARNING: Device reports 0 for a max write image arg count (write image arguments unsupported). Skipping test (implicitly passes). This is only valid if the number of image formats is also 0.\n" );
+        return 0;
+    }
+
+    if( maxWriteImages < minRequiredWriteImages )
+    {
+        log_error( "ERROR: Reported max write image arg count is less than required! (%d)\n", maxWriteImages );
+        return -1;
+    }
+
+    log_info("Reported %d max write image args.\n", maxWriteImages);
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    // Calculate the number we can use
+    if (maxParameterSize/sizeof(cl_mem) < maxWriteImages) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
+        maxWriteImages = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+    }
+
+    /* Create a program with that many write args + 1 */
+    programSrc = (char *)malloc( strlen( sample_write_image_kernel_pattern[ 0 ] ) + ( strlen( writeArgPattern ) + 6 ) * ( maxWriteImages + 1 ) +
+                                strlen( sample_write_image_kernel_pattern[ 1 ] ) + 1 + 40240 );
+
+    strcpy( programSrc, sample_write_image_kernel_pattern[ 0 ] );
+    strcat( programSrc, "write_only image2d_t dstimg0" );
+    for( i = 1; i < maxWriteImages; i++ )
+    {
+        sprintf( writeArgLine, writeArgPattern, i );
+        strcat( programSrc, writeArgLine );
+    }
+    strcat( programSrc, sample_write_image_kernel_pattern[ 1 ] );
+    for ( i = 0; i < maxWriteImages; i++) {
+        sprintf( writeArgLine, "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", i);
+        strcat( programSrc, writeArgLine );
+    }
+    strcat( programSrc, sample_write_image_kernel_pattern[ 2 ] );
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
+    test_error( error, "Failed to create the program and kernel.");
+    free( programSrc );
+
+
+    /* Create some I/O streams */
+    streams = new clMemWrapper[maxWriteImages + 1];
+    for( i = 0; i < maxWriteImages; i++ )
+    {
+        streams[i] = create_image_2d( context, CL_MEM_READ_WRITE, &image_format_desc, 16, 16, 0, NULL, &error );
+        test_error( error, "Unable to allocate test image" );
+    }
+
+    /* Set the arguments */
+    for( i = 0; i < maxWriteImages; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+
+    /* Now try running the kernel */
+    threads[0] = threads[1] = 16;
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
+    test_error( error, "clEnqueueNDRangeKernel failed.");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    /* All done */
+    delete[] streams;
+    return 0;
+}
+
+int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_ulong maxAllocSize, memSize, minSizeToTry;
+    clMemWrapper memHdl;
+
+    cl_ulong requiredAllocSize;
+
+    if (gIsEmbedded)
+        requiredAllocSize = 1 * 1024 * 1024;
+    else
+        requiredAllocSize = 128 * 1024 * 1024;
+
+    /* Get the max mem alloc size */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get max mem alloc size from device" );
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get global memory size from device" );
+
+    if( maxAllocSize < requiredAllocSize)
+    {
+        log_error( "ERROR: Reported max allocation size is less than required %lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", (requiredAllocSize / 1024) / 1024, maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 );
+        return -1;
+    }
+
+    if( maxAllocSize < memSize / 4 )
+    {
+        log_error( "ERROR: Reported max allocation size is less than required 1/4 of total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 );
+        return -1;
+    }
+
+    log_info("Reported max allocation size of %lld bytes (%gMB) and global mem size of %lld bytes (%gMB).\n",
+             maxAllocSize, maxAllocSize/(1024.0*1024.0), memSize, memSize/(1024.0*1024.0));
+
+    if ( memSize < maxAllocSize ) {
+        log_info("Global memory size is less than max allocation size, using that.\n");
+        maxAllocSize = memSize;
+    }
+
+    if ( maxAllocSize > (cl_ulong)4 * 1024 * 1024 * 1024) {
+        log_info("Limiting max allocation size to 4GB for test.\n");
+        maxAllocSize =  (cl_ulong)4 * 1024 * 1024 * 1024;
+    }
+
+    minSizeToTry = maxAllocSize/16;
+    while (maxAllocSize > (maxAllocSize/4)) {
+
+        log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+        memHdl = clCreateBuffer( context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, NULL, &error );
+        if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) {
+            log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+            maxAllocSize -= minSizeToTry;
+            continue;
+        }
+        test_error( error, "clCreateBuffer failed for maximum sized buffer.");
+        return 0;
+    }
+    log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+    return -1;
+}
+
+int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format image_format_desc;
+    cl_ulong maxAllocSize;
+    cl_uint minRequiredDimension;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 4096;
+    else
+    {
+        log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 2d width from device" );
+
+    if( maxDimension < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image 2d width is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported width is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size %d x 1 = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 2D creation failed for maximum width" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format image_format_desc;
+    cl_ulong maxAllocSize;
+    cl_uint minRequiredDimension;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) == 0 )
+        minRequiredDimension = gIsEmbedded ? 2048 : 8192;
+    else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) == 0 )
+       minRequiredDimension = gIsEmbedded ? 2048 : 4096;
+    else
+    {
+        log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 2d height from device" );
+
+    if( maxDimension < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image 2d height is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported height is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 2D creation failed for maximum height" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 3d width from device" );
+
+    if( maxDimension < 2048 )
+    {
+        log_error( "ERROR: Reported max image 3d width is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported width is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*2*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 2, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 3D creation failed for maximum width" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 3d height from device" );
+
+    if( maxDimension < 2048 )
+    {
+        log_error( "ERROR: Reported max image 3d height is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported height is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*2*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 2, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 3D creation failed for maximum height" );
+        return -1;
+    }
+
+    return 0;
+}
+
+
+int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE3D, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max 2d image width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image 3d depth from device" );
+
+    if( maxDimension < 2048 )
+    {
+        log_error( "ERROR: Reported max image 3d depth is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported depth is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Image 3D creation failed for maximum depth" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimension;
+    clMemWrapper streams[1];
+    cl_image_format    image_format_desc;
+    cl_ulong maxAllocSize;
+    size_t minRequiredDimension = gIsEmbedded ? 256 : 2048;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+
+    /* Just get any ol format to test with */
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_READ_WRITE, 0, &image_format_desc );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    /* Get the max image array width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxDimension ), &maxDimension, NULL );
+    test_error( error, "Unable to get max image array size from device" );
+
+    if( maxDimension < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image array size is less than required! (%d)\n", (int)maxDimension );
+        return -1;
+    }
+    log_info("Max reported image array size is %ld.\n", maxDimension);
+
+    /* Verify we can use the format */
+    image_format_desc.image_channel_data_type = CL_UNORM_INT8;
+    image_format_desc.image_channel_order = CL_RGBA;
+    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D_ARRAY, &image_format_desc)) {
+        log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
+        return -1;
+    }
+
+    /* Verify that we can actually allocate an image that large */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
+        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+        return -1;
+    }
+
+    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+
+    /* Try to allocate a very big image */
+    streams[0] = create_image_2d_array( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "2D Image Array creation failed for maximum array size" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t maxDimensionPixels;
+    clMemWrapper streams[2];
+    cl_image_format image_format_desc = {0};
+    cl_ulong maxAllocSize;
+    size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536;
+    unsigned int i = 0;
+    size_t pixelBytes = 0;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+
+    /* Get the max memory allocation size */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
+    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+
+    /* Get the max image array width */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxDimensionPixels ), &maxDimensionPixels, NULL );
+    test_error( error, "Unable to get max image buffer size from device" );
+
+    if( maxDimensionPixels < minRequiredDimension )
+    {
+        log_error( "ERROR: Reported max image buffer size is less than required! (%d)\n", (int)maxDimensionPixels );
+        return -1;
+    }
+    log_info("Max reported image buffer size is %ld pixels.\n", maxDimensionPixels);
+
+    pixelBytes = maxAllocSize / maxDimensionPixels;
+    if ( pixelBytes == 0 )
+    {
+        log_error( "Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image of maximum size!\n" );
+        return -1;
+    }
+
+    error = -1;
+    for ( i = pixelBytes; i > 0; --i )
+    {
+        error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE1D, CL_MEM_READ_ONLY, i, &image_format_desc );
+        if ( error == CL_SUCCESS )
+        {
+            pixelBytes = i;
+            break;
+        }
+    }
+    test_error( error, "Device does not support format to be used to allocate image of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n" );
+
+    log_info("Attempting to create an 1D image with channel order %s from buffer of size %d = %gMB.\n",
+        GetChannelOrderName( image_format_desc.image_channel_order ), (int)maxDimensionPixels, ((float)maxDimensionPixels*pixelBytes/1024.0/1024.0));
+
+    /* Try to allocate a buffer */
+    streams[0] = clCreateBuffer( context, CL_MEM_READ_ONLY, maxDimensionPixels*pixelBytes, NULL, &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "Buffer creation failed for maximum image buffer size" );
+        return -1;
+    }
+
+    /* Try to allocate a 1D image array from buffer */
+    streams[1] = create_image_1d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimensionPixels, 0, NULL, streams[0], &error );
+    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    {
+        print_error( error, "1D Image from buffer creation failed for maximum image buffer size" );
+        return -1;
+    }
+
+    return 0;
+}
+
+
+
+int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error, retVal, i;
+    size_t maxSize;
+    char *programSrc;
+    char *ptr;
+    size_t numberExpected;
+    long numberOfIntParametersToTry;
+    char *argumentLine, *codeLines;
+    void *data;
+    cl_long long_result, expectedResult;
+    cl_int int_result;
+    size_t decrement;
+    cl_event event;
+    cl_int event_status;
+
+
+    /* Get the max param size */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxSize ), &maxSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    if( ((!gIsEmbedded) && (maxSize < 1024)) || ((gIsEmbedded) && (maxSize < 256)) )
+    {
+        log_error( "ERROR: Reported max parameter size is less than required! (%d)\n", (int)maxSize );
+        return -1;
+    }
+
+    /* The embedded profile does not require longs, so use ints */
+    if(gIsEmbedded)
+        numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int);
+    else
+        numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long);
+
+    decrement = (size_t)(numberOfIntParametersToTry/8);
+    if (decrement < 1)
+        decrement = 1;
+    log_info("Reported max parameter size of %d bytes.\n", (int)maxSize);
+
+    while (numberOfIntParametersToTry > 0) {
+        // These need to be inside to be deallocated automatically on each loop iteration.
+        clProgramWrapper program;
+        clMemWrapper mem;
+        clKernelWrapper kernel;
+
+        if(gIsEmbedded)
+        {
+            log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
+                     numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem),
+                     sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+        }
+        else
+        {
+            log_info("Trying a kernel with %ld long arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
+                     numberOfIntParametersToTry, sizeof(cl_long)*numberOfIntParametersToTry, sizeof(cl_mem),
+                     sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+        }
+
+        // Allocate memory for the program storage
+        data = malloc(sizeof(cl_long)*numberOfIntParametersToTry);
+
+        argumentLine = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
+        codeLines = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
+        programSrc = (char*)malloc(sizeof(char)*(numberOfIntParametersToTry*64+1024));
+        argumentLine[0] = '\0';
+        codeLines[0] = '\0';
+        programSrc[0] = '\0';
+
+        // Generate our results
+        expectedResult = 0;
+        for (i=0; i<(int)numberOfIntParametersToTry; i++)
+            {
+            if( gHasLong )
+            {
+                ((cl_long *)data)[i] = i;
+                expectedResult += i;
+            }
+            else
+            {
+                ((cl_int *)data)[i] = i;
+                expectedResult += i;
+            }
+        }
+
+        // Build the program
+        if( gHasLong)
+            sprintf(argumentLine, "%s", "long arg0");
+        else
+            sprintf(argumentLine, "%s", "int arg0");
+
+        sprintf(codeLines, "%s", "result[0] += arg0;");
+        for (i=1; i<(int)numberOfIntParametersToTry; i++)
+        {
+            if( gHasLong)
+                sprintf(argumentLine + strlen( argumentLine), ", long arg%d", i);
+            else
+                sprintf(argumentLine + strlen( argumentLine), ", int arg%d", i);
+
+            sprintf(codeLines + strlen( codeLines), "\nresult[0] += arg%d;", i);
+        }
+
+        /* Create a kernel to test with */
+        sprintf( programSrc, gHasLong ?  sample_large_parmam_kernel_pattern[0]:
+                                        sample_large_int_parmam_kernel_pattern[0], argumentLine, codeLines);
+
+        ptr = programSrc;
+        if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" ) != 0 )
+        {
+            log_info("Create program failed, decrementing number of parameters to try.\n");
+            numberOfIntParametersToTry -= decrement;
+            continue;
+        }
+
+        /* Try to set a large argument to the kernel */
+        retVal = 0;
+
+        mem = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_long), NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        for (i=0; i<(int)numberOfIntParametersToTry; i++) {
+            if(gHasLong)
+                error = clSetKernelArg(kernel, i, sizeof(cl_long), &(((cl_long*)data)[i]));
+            else
+                error = clSetKernelArg(kernel, i, sizeof(cl_int), &(((cl_int*)data)[i]));
+
+            if (error != CL_SUCCESS) {
+                log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+                numberOfIntParametersToTry -= decrement;
+                break;
+            }
+        }
+        if (error != CL_SUCCESS)
+            continue;
+
+
+        error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem);
+        if (error != CL_SUCCESS) {
+            log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+            numberOfIntParametersToTry -= decrement;
+            continue;
+        }
+
+        size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
+        if (error != CL_SUCCESS) {
+            log_info( "clEnqueueNDRangeKernel failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+            numberOfIntParametersToTry -= decrement;
+            continue;
+        }
+
+        // Verify that the event does not return an error from the execution
+        error = clWaitForEvents(1, &event);
+        test_error( error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        clReleaseEvent(event);
+        if (event_status < 0)
+            test_error(error, "Kernel execution event returned error");
+
+        if(gHasLong)
+            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), &long_result, 0, NULL, NULL);
+        else
+            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), &int_result, 0, NULL, NULL);
+
+        test_error(error, "clEnqueueReadBuffer failed")
+
+        free(data);
+        free(argumentLine);
+        free(codeLines);
+        free(programSrc);
+
+        if(gHasLong)
+        {
+            if (long_result != expectedResult) {
+                log_error("Expected result (%lld) does not equal actual result (%lld).\n", expectedResult, long_result);
+                numberOfIntParametersToTry -= decrement;
+                continue;
+            } else {
+                log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+                break;
+            }
+        }
+        else
+        {
+            if (int_result != expectedResult) {
+                log_error("Expected result (%lld) does not equal actual result (%d).\n", expectedResult, int_result);
+                numberOfIntParametersToTry -= decrement;
+                continue;
+            } else {
+                log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+                break;
+            }
+        }
+    }
+
+    if (numberOfIntParametersToTry == (long)numberExpected)
+        return 0;
+    return -1;
+}
+
+int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_uint maxSamplers, i;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    char *programSrc, samplerLine[1024];
+    size_t maxParameterSize;
+    cl_event event;
+    cl_int event_status;
+    cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    /* Get the max value */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_SAMPLERS, sizeof( maxSamplers ), &maxSamplers, NULL );
+    test_error( error, "Unable to get max sampler count from device" );
+
+    if( maxSamplers < minRequiredSamplers )
+    {
+        log_error( "ERROR: Reported max sampler count is less than required! (%d)\n", (int)maxSamplers );
+        return -1;
+    }
+
+    log_info("Reported max %d samplers.\n", maxSamplers);
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    // Subtract the size of the result
+    maxParameterSize -= 2*sizeof(cl_mem);
+
+    // Calculate the number we can use
+    if (maxParameterSize/sizeof(cl_sampler) < maxSamplers) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max sampler arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_sampler)));
+        maxSamplers = (unsigned int)(maxParameterSize/sizeof(cl_sampler));
+    }
+
+    /* Create a kernel to test with */
+    programSrc = (char *)malloc( ( strlen( sample_sampler_kernel_pattern[ 1 ] ) + 8 ) * ( maxSamplers ) +
+                                strlen( sample_sampler_kernel_pattern[ 0 ] ) + strlen( sample_sampler_kernel_pattern[ 2 ] ) +
+                                ( strlen( sample_sampler_kernel_pattern[ 3 ] ) + 8 ) * maxSamplers +
+                                strlen( sample_sampler_kernel_pattern[ 4 ] ) );
+    strcpy( programSrc, sample_sampler_kernel_pattern[ 0 ] );
+    for( i = 0; i < maxSamplers; i++ )
+    {
+        sprintf( samplerLine, sample_sampler_kernel_pattern[ 1 ], i );
+        strcat( programSrc, samplerLine );
+    }
+    strcat( programSrc, sample_sampler_kernel_pattern[ 2 ] );
+    for( i = 0; i < maxSamplers; i++ )
+    {
+        sprintf( samplerLine, sample_sampler_kernel_pattern[ 3 ], i );
+        strcat( programSrc, samplerLine );
+    }
+    strcat( programSrc, sample_sampler_kernel_pattern[ 4 ] );
+
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
+    test_error( error, "Failed to create the program and kernel.");
+
+    // We have to set up some fake parameters so it'll work
+    clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers];
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+
+    clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error );
+    test_error( error, "Unable to create a test image" );
+
+    clMemWrapper stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), 16, NULL, &error );
+    test_error( error, "Unable to create test buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image );
+    error |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), &stream );
+    test_error( error, "Unable to set kernel arguments" );
+    for( i = 0; i < maxSamplers; i++ )
+    {
+        samplers[ i ] = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+        test_error( error, "Unable to create sampler" );
+
+        error = clSetKernelArg( kernel, 2 + i, sizeof( cl_sampler ), &samplers[ i ] );
+        test_error( error, "Unable to set sampler argument" );
+    }
+
+    size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
+    test_error(error, "clEnqueueNDRangeKernel failed with maximum number of samplers.");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    free( programSrc );
+    delete[] samplers;
+    return 0;
+}
+
+#define PASSING_FRACTION 4
+int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[3];
+    size_t    threads[1], localThreads[1];
+    cl_int *constantData, *resultData;
+    cl_ulong maxSize, stepSize, currentSize;
+    int i;
+    cl_event event;
+    cl_int event_status;
+    MTdata d;
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max constant buffer size" );
+
+    if( ( 0 == gIsEmbedded && maxSize < 64L * 1024L ) || maxSize <  1L * 1024L )
+    {
+        log_error( "ERROR: Reported max constant buffer size less than required by OpenCL 1.0 (reported %d KB)\n", (int)( maxSize / 1024L ) );
+        return -1;
+    }
+
+    log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_arg_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Try the returned max size and decrease it until we get one that works. */
+    stepSize = maxSize/16;
+    currentSize = maxSize;
+    int allocPassed = 0;
+    d = init_genrand( gRandomSeed );
+    while (!allocPassed && currentSize >= maxSize/PASSING_FRACTION) {
+        log_info("Attempting to allocate constant buffer of size %lld bytes\n", maxSize);
+
+        /* Create some I/O streams */
+        size_t sizeToAllocate = ((size_t)currentSize/sizeof( cl_int ))*sizeof(cl_int);
+        size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
+        constantData = (cl_int *)malloc( sizeToAllocate);
+        for(i=0; i<(int)(numberOfInts); i++)
+            constantData[i] = (int)genrand_int32(d);
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, constantData, &error);
+        test_error( error, "Creating test array failed" );
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
+        test_error( error, "Creating test array failed" );
+
+
+        /* Set the arguments */
+        error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+
+        /* Test running the kernel and verifying it */
+        threads[0] = numberOfInts;
+        localThreads[0] = 1;
+        log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", (int)threads[0], (int)(threads[0]*sizeof(cl_int)));
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
+        /* If we failed due to a resource issue, reduce the size and try again. */
+        if ((error == CL_OUT_OF_RESOURCES) || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (error == CL_OUT_OF_HOST_MEMORY)) {
+            log_info("Kernel enqueue failed at size %lld, trying at a reduced size.\n", currentSize);
+            currentSize -= stepSize;
+            free(constantData);
+            continue;
+        }
+        test_error( error, "clEnqueueNDRangeKernel with maximum constant buffer size failed.");
+
+        // Verify that the event does not return an error from the execution
+        error = clWaitForEvents(1, &event);
+        test_error( error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        clReleaseEvent(event);
+        if (event_status < 0) {
+            if ((event_status == CL_OUT_OF_RESOURCES) || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (event_status == CL_OUT_OF_HOST_MEMORY)) {
+                log_info("Kernel event indicates failure at size %lld, trying at a reduced size.\n", currentSize);
+                currentSize -= stepSize;
+                free(constantData);
+                continue;
+            } else {
+                test_error(error, "Kernel execution event returned error");
+            }
+        }
+
+        /* Otherwise we did not fail due to resource issues. */
+        allocPassed = 1;
+
+        resultData = (cl_int *)malloc(sizeToAllocate);
+        error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
+        test_error( error, "clEnqueueReadBuffer failed");
+
+        for(i=0; i<(int)(numberOfInts); i++)
+            if (constantData[i] != resultData[i]) {
+                log_error("Data failed to verify: constantData[%d]=%d != resultData[%d]=%d\n",
+                          i, constantData[i], i, resultData[i]);
+                free( constantData );
+                free(resultData);
+                free_mtdata(d);   d = NULL;
+                return -1;
+            }
+
+        free( constantData );
+        free(resultData);
+    }
+    free_mtdata(d);   d = NULL;
+
+    if (allocPassed) {
+        if (currentSize < maxSize/PASSING_FRACTION) {
+            log_error("Failed to allocate at least 1/4 of the reported constant size.\n");
+            return -1;
+        } else if (currentSize != maxSize) {
+            log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);
+            return 0;
+        }
+        return 0;
+    }
+    return -1;
+}
+
+int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    *streams;
+    size_t    threads[1], localThreads[1];
+    cl_uint i, maxArgs;
+    cl_ulong maxSize;
+    cl_ulong maxParameterSize;
+    size_t individualBufferSize;
+    char *programSrc, *constArgs, *str2;
+    char str[512];
+    const char *ptr;
+    cl_event event;
+    cl_int event_status;
+
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof( maxArgs ), &maxArgs, 0 );
+    test_error( error, "Unable to get max constant arg count" );
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+    // Subtract the size of the result
+    maxParameterSize -= sizeof(cl_mem);
+
+    // Calculate the number we can use
+    if (maxParameterSize/sizeof(cl_mem) < maxArgs) {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
+        maxArgs = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+    }
+
+
+    if( maxArgs < (gIsEmbedded ? 4 : 8) )
+    {
+        log_error( "ERROR: Reported max constant arg count less than required by OpenCL 1.0 (reported %d)\n", (int)maxArgs );
+        return -1;
+    }
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max constant buffer size" );
+    individualBufferSize = ((int)maxSize/2)/maxArgs;
+
+    log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n",
+             (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize);
+
+    str2 = (char*)malloc(sizeof(char)*32*(maxArgs+2));
+    constArgs = (char*)malloc(sizeof(char)*32*(maxArgs+2));
+    programSrc = (char*)malloc(sizeof(char)*32*2*(maxArgs+2)+1024);
+
+    /* Create a test program */
+    constArgs[0] = 0;
+    str2[0] = 0;
+    for( i = 0; i < maxArgs-1; i++ )
+    {
+        sprintf( str, ", __constant int *src%d", (int)( i + 2 ) );
+        strcat( constArgs, str );
+        sprintf( str2 + strlen( str2), "\tdst[tid] += src%d[tid];\n", (int)(i+2));
+        if (strlen(str2) > (sizeof(char)*32*(maxArgs+2)-32) || strlen(constArgs) > (sizeof(char)*32*(maxArgs+2)-32)) {
+            log_info("Limiting number of arguments tested to %d due to test program allocation size.\n", i);
+            break;
+        }
+    }
+    sprintf( programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2 );
+
+    /* Create a kernel to test with */
+    ptr = programSrc;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams = new clMemWrapper[ maxArgs + 1 ];
+    for( i = 0; i < maxArgs + 1; i++ )
+    {
+        streams[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), individualBufferSize, NULL, &error);
+        test_error( error, "Creating test array failed" );
+    }
+
+    /* Set the arguments */
+    for( i = 0; i < maxArgs + 1; i++ )
+    {
+        error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]);
+        test_error( error, "Unable to set kernel argument" );
+    }
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+    while (threads[0]*sizeof(cl_int) > individualBufferSize)
+        threads[0]--;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
+    test_error( error, "clEnqueueNDRangeKernel failed");
+
+    // Verify that the event does not return an error from the execution
+    error = clWaitForEvents(1, &event);
+    test_error( error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
+    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    clReleaseEvent(event);
+    if (event_status < 0)
+        test_error(error, "Kernel execution event returned error");
+
+    error = clFinish(queue);
+    test_error( error, "clFinish failed.");
+
+    delete [] streams;
+    free(str2);
+    free(constArgs);
+    free(programSrc);
+    return 0;
+}
+
+int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_uint value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get compute unit count" );
+
+    if( value < 1 )
+    {
+        log_error( "ERROR: Reported compute unit count less than required by OpenCL 1.0 (reported %d)\n", (int)value );
+        return -1;
+    }
+
+    log_info("Reported %d max compute units.\n", value);
+
+    return 0;
+}
+
+int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_uint value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get address bit count" );
+
+    if( value != 32 && value != 64 )
+    {
+        log_error( "ERROR: Reported address bit count not valid by OpenCL 1.0 (reported %d)\n", (int)value );
+        return -1;
+    }
+
+    log_info("Reported %d device address bits.\n", value);
+
+    return 0;
+}
+
+int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_device_fp_config value;
+    char profile[128] = "";
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get device single fp config" );
+
+    //Check to see if we are an embedded profile device
+    if((error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL )))
+    {
+        log_error( "FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", error );
+        return error;
+    }
+
+    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ))
+    { // embedded device
+
+        if( 0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
+        {
+            log_error( "FAILURE: embedded device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" );
+            return -1;
+        }
+    }
+    else
+    { // Full profile
+        if( ( value & ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN )) != ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN ) )
+        {
+            log_error( "ERROR: Reported single fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_device_fp_config value;
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get device double fp config" );
+
+    if (value == 0)
+        return 0;
+
+    if( ( value & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) != ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) )
+    {
+        log_error( "ERROR: Reported double fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[3];
+    size_t    threads[1], localThreads[1];
+    cl_int *localData, *resultData;
+    cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
+    cl_char buffer[ 4098 ];
+    size_t length;
+    int i;
+    int err = 0;
+    MTdata d;
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max local buffer size" );
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if (!gIsEmbedded)
+    {
+        if( memcmp( buffer, "OpenCL 2.0", strlen( "OpenCL 2.0" ) ) == 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.2", strlen( "OpenCL 1.2" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if( memcmp( buffer, "OpenCL 1.1", strlen( "OpenCL 1.1" ) ) != 0 )
+            min_max_local_mem_size = 16L * 1024L;
+        else if ( memcmp( buffer, "OpenCL 1.0", strlen( "OpenCL 1.0" ) ) != 0 )
+            min_max_local_mem_size = 32L * 1024L;
+        else
+        {
+            log_error( "ERROR: device version string does not match required format! (returned: %s)\n", (char *)buffer );
+            return -1;
+        }
+    }
+
+    if( maxSize < (gIsEmbedded ? 1L * 1024L : min_max_local_mem_size) )
+    {
+        log_error( "ERROR: Reported local mem size less than required by OpenCL 1.1 (reported %dKb)\n", (int)( maxSize / 1024L ) );
+        return -1;
+    }
+
+    log_info("Reported max local buffer size for device: %lld bytes.\n", maxSize);
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalUsage), &kernelLocalUsage, NULL);
+    test_error(error, "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
+
+    log_info("Reported local buffer usage for kernel (CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", kernelLocalUsage);
+
+    /* Create some I/O streams */
+    size_t sizeToAllocate = ((size_t)(maxSize-kernelLocalUsage)/sizeof( cl_int ))*sizeof(cl_int);
+    size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
+
+    log_info("Attempting to use %lld bytes of local memory.\n", (cl_ulong)sizeToAllocate);
+
+    localData = (cl_int *)malloc( sizeToAllocate );
+    d = init_genrand( gRandomSeed );
+    for(i=0; i<(int)(numberOfInts); i++)
+        localData[i] = (int)genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeToAllocate, localData, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeToAllocate, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = numberOfInts;
+    localThreads[0] = 1;
+    log_info("Creating local buffer with %d cl_ints (%d bytes).\n", (int)numberOfInts, (int)sizeToAllocate);
+
+    cl_event evt;
+    cl_int   evt_err;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &evt );
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    error = clFinish(queue);
+    test_error( error, "clFinish failed");
+
+    error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof evt_err, &evt_err, NULL);
+    test_error( error, "clGetEventInfo with maximum local buffer size failed.");
+
+    if (evt_err != CL_COMPLETE) {
+        print_error(evt_err, "Kernel event returned error");
+        clReleaseEvent(evt);
+        return -1;
+    }
+
+    resultData = (cl_int *)malloc(sizeToAllocate);
+
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
+    test_error( error, "clEnqueueReadBuffer failed");
+
+    for(i=0; i<(int)(numberOfInts); i++)
+        if (localData[i] != resultData[i]) {
+            clReleaseEvent(evt);
+            free( localData );
+            free(resultData);
+            log_error("Results failed to verify.\n");
+            return -1;
+        }
+    clReleaseEvent(evt);
+    free( localData );
+    free(resultData);
+
+    return err;
+}
+
+int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                err;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t max_local_workgroup_size[3];
+    size_t max_workgroup_size = 0, preferred_workgroup_size = 0;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" );
+    test_error(err, "Failed to build kernel/program.");
+
+    err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE,
+                                   sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+                                   sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Since the preferred size is only a performance hint, we can only really check that we get a sane value
+    // back
+    log_info( "size: %ld     preferred: %ld      max: %ld\n", max_workgroup_size, preferred_workgroup_size, max_local_workgroup_size[0] );
+
+    if( preferred_workgroup_size > max_workgroup_size )
+    {
+        log_error( "ERROR: Reported preferred workgroup multiple larger than max workgroup size (preferred %ld, max %ld)\n", preferred_workgroup_size, max_workgroup_size );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_device_exec_capabilities value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get execution capabilities" );
+
+    if( ( value & CL_EXEC_KERNEL ) != CL_EXEC_KERNEL )
+    {
+        log_error( "ERROR: Reported execution capabilities less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_command_queue_properties value;
+
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof( value ), &value, 0 );
+    test_error( error, "Unable to get queue properties" );
+
+    if( ( value & CL_QUEUE_PROFILING_ENABLE ) != CL_QUEUE_PROFILING_ENABLE )
+    {
+        log_error( "ERROR: Reported queue properties less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        return -1;
+    }
+    return 0;
+}
+
+int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int error, i;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device version string" );
+    if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
+    {
+        log_error( "ERROR: Initial part of device version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+
+    log_info("Returned version %s.\n", buffer);
+
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    while( *p1 == ' ' )
+        p1++;
+    char *p2 = p1;
+    if( ! isdigit(*p2) )
+    {
+        log_error( "ERROR: Major revision number must follow space behind OpenCL! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p2 ) )
+        p2++;
+    if( *p2 != '.' )
+    {
+        log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p3 = p2 + 1;
+    if( ! isdigit(*p3) )
+    {
+        log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p3 ) )
+        p3++;
+    if( *p3 != ' ' )
+    {
+        log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    *p2 = ' '; // Put in a space for atoi below.
+    p2++;
+
+    int major = atoi( p1 );
+    int minor = atoi( p2 );
+    int minor_revision = 2;
+    if( getenv("OPENCL_1_0_DEVICE"))
+    {
+        minor_revision = 0;
+        log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined!  This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" );
+    }
+    else if( getenv("OPENCL_1_1_DEVICE"))
+    {
+        minor_revision = 1;
+        log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined!  This is not a OpenCL 1.2 compatible device!!!\n" );
+    }
+    if( major * 10 + minor < 10 + minor_revision )
+    {
+        log_error( "ERROR: OpenCL device version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer );
+        return -1;
+    }
+
+    // Sanity checks on the returned values
+    if( length != (strlen( (char *)buffer ) + 1 ))
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length );
+        return -1;
+    }
+
+    // Make sure 1.1 devices support required extensions for 1.1
+    const char *requiredExtensions[] =
+    {
+        "cl_khr_global_int32_base_atomics",
+        "cl_khr_global_int32_extended_atomics",
+        "cl_khr_local_int32_base_atomics",
+        "cl_khr_local_int32_extended_atomics",
+        "cl_khr_byte_addressable_store",
+        NULL
+    };
+
+    if( major * 10 + minor >= 11 )
+    {
+        char *extensions;
+        size_t extensions_size = 0;
+
+        log_info( "Checking for required extensions for OpenCL 1.1 and later devices...\n" );
+
+        if( (error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &extensions_size)))
+        {
+            log_error( "ERROR: could not get extensions size.  Err # %d\n", error );
+            return -1;
+        }
+
+        if( extensions_size < 1 )
+        {
+            log_error( "ERROR: invalid extensions size.  Err # %d\n", error );
+            return -1;
+        }
+
+        extensions = (char*) malloc(extensions_size);
+        if( NULL == extensions )
+        {
+            log_error( "ERROR: cannot allocate %ld bytes to hold extension string.\n", extensions_size );
+            return -1;
+        }
+        memset( extensions, -1, extensions_size );
+
+        if( (error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, extensions_size, extensions, NULL)))
+        {
+            log_error( "ERROR: could not get extensions.  Err # %d\n", error );
+            free( extensions );
+            return -1;
+        }
+
+        if( '\0' != extensions[ extensions_size - 1 ] )
+        {
+            if( -1 == extensions[ extensions_size - 1 ] )
+                log_error( "ERROR: extensions size reported incorrectly.  Last byte is not NUL. Size too big. Reported: %ld.  Should be: %ld\n", extensions_size, strlen(extensions) + 1  );
+            else
+                log_error( "ERROR: extensions size reported incorrectly.  Last byte is not NUL. Size too small. \n" );
+
+            free( extensions );
+            return -1;
+        }
+
+        for( i = 0; NULL != requiredExtensions[i]; i++ )
+        {
+            if( NULL == strstr( extensions, requiredExtensions[i] ) )
+            {
+                log_error( "ERROR: Required extension for 1.1 and greater devices is not in extension string: %s\n", requiredExtensions[i] );
+                free( extensions );
+                return -1;
+            }
+            else
+                log_info( "\t%s\n", requiredExtensions[i] );
+        }
+        free( extensions );
+    }
+    else
+        log_info( "WARNING: skipping required extension test -- OpenCL 1.0 device.\n" );
+
+
+    return 0;
+}
+
+int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int error;
+    cl_char buffer[ 4098 ];
+    size_t length;
+
+    // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get device opencl c version string" );
+    if( memcmp( buffer, "OpenCL C ", strlen( "OpenCL C " ) ) != 0 )
+    {
+        log_error( "ERROR: Initial part of device language version string does not match required format! (returned: \"%s\")\n", (char *)buffer );
+        return -1;
+    }
+
+    log_info("Returned version \"%s\".\n", buffer);
+
+    char *p1 = (char *)buffer + strlen( "OpenCL C " );
+    while( *p1 == ' ' )
+        p1++;
+    char *p2 = p1;
+    if( ! isdigit(*p2) )
+    {
+        log_error( "ERROR: Major revision number must follow space behind OpenCL C! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p2 ) )
+        p2++;
+    if( *p2 != '.' )
+    {
+        log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p3 = p2 + 1;
+    if( ! isdigit(*p3) )
+    {
+        log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer );
+        return -1;
+    }
+    while( isdigit( *p3 ) )
+        p3++;
+    if( *p3 != ' ' )
+    {
+        log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    *p2 = ' '; // Put in a space for atoi below.
+    p2++;
+
+    int major = atoi( p1 );
+    int minor = atoi( p2 );
+    int minor_revision = 2;
+
+    if( major * 10 + minor < 10 + minor_revision )
+    {
+        // If the language version did not match, check to see if OPENCL_1_0_DEVICE is set.
+        if( getenv("OPENCL_1_0_DEVICE"))
+        {
+          log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined!  This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" );
+        }
+        else if( getenv("OPENCL_1_1_DEVICE"))
+        {
+          log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined!  This is not a OpenCL 1.2 compatible device!!!\n" );
+        }
+        else
+        {
+          log_error( "ERROR: OpenCL device language version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer );
+          return -1;
+        }
+    }
+
+    // Sanity checks on the returned values
+    if( length != (strlen( (char *)buffer ) + 1 ))
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length );
+        return -1;
+    }
+
+    return 0;
+}
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_binary.cpp b/test_conformance/compatibility/test_conformance/api/test_binary.cpp
new file mode 100644
index 00000000..02edd49d
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_binary.cpp
@@ -0,0 +1,226 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+static const char *sample_binary_kernel_source[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid] + 1;\n"
+"\n"
+"}\n" };
+
+
+int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    size_t            binarySize;
+
+
+    program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
+    test_error( error, "Unable to create program from source" );
+
+    // Build so we have a binary to get
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build test program" );
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero\n" );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    unsigned char *binary;
+  binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+    unsigned char *buffers[ 1 ] = { binary };
+
+    // Do another sanity check here first
+    size_t size;
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
+    test_error( error, "Unable to get expected size of binaries array" );
+    if( size != sizeof( buffers ) )
+    {
+        log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
+        free(binary);
+    return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    // No way to verify the binary is correct, so just be good with that
+  free(binary);
+    return 0;
+}
+
+
+int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    /* To test this in a self-contained fashion, we have to create a program with
+   source, then get the binary, then use that binary to reload the program, and then verify */
+
+    int error;
+    clProgramWrapper program, program_from_binary;
+    size_t            binarySize;
+
+
+    program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
+    test_error( error, "Unable to create program from source" );
+
+    // Build so we have a binary to get
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build test program" );
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero\n" );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    unsigned char *binary = (unsigned char*)malloc(binarySize);
+    const unsigned char *buffers[ 1 ] = { binary };
+
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    cl_int loadErrors[ 1 ];
+    program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
+    test_error( error, "Unable to load valid program binary" );
+    test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
+
+  error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
+  test_error( error, "Unable to build binary program" );
+
+    // Get the size of the binary built from the first binary
+    size_t binary2Size;
+    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
+    test_error( error, "Unable to get size for the binary program" );
+
+    // Now get the binary one more time and verify it loaded the right binary
+    unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
+    buffers[ 0 ] = binary2;
+    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary second time" );
+
+    // Try again, this time without passing the status ptr in, to make sure we still
+    // get a valid binary
+    clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
+    test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
+
+    error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build binary program created without binary_status" );
+
+    // Get the size of the binary created without passing binary_status
+    size_t binary3Size;
+    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
+    test_error( error, "Unable to get size for the binary program created without binary_status" );
+
+    // Now get the binary one more time
+    unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
+    buffers[ 0 ] = binary3;
+    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary from the program created without binary_status" );
+
+    // We no longer need these intermediate binaries
+    free(binary);
+    free(binary2);
+    free(binary3);
+
+  // Now execute them both to see that they both do the same thing.
+  clMemWrapper in, out, out_binary;
+  clKernelWrapper kernel, kernel_binary;
+  cl_int *out_data, *out_data_binary;
+  cl_float *in_data;
+  size_t size_to_run = 1000;
+
+  // Allocate some data
+  in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
+  out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
+  out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
+  memset(out_data, 0, sizeof(cl_int)*size_to_run);
+  memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
+  for (size_t i=0; i<size_to_run; i++)
+    in_data[i] = (cl_float)i;
+
+  // Create the buffers
+  in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
+  test_error( error, "clCreateBuffer failed");
+  out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
+  test_error( error, "clCreateBuffer failed");
+  out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
+  test_error( error, "clCreateBuffer failed");
+
+  // Create the kernels
+  kernel = clCreateKernel(program, "sample_test", &error);
+  test_error( error, "clCreateKernel failed");
+  kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
+  test_error( error, "clCreateKernel from binary failed");
+
+  // Set the arguments
+  error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
+  test_error( error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
+  test_error( error, "clSetKernelArg failed");
+
+  // Execute the kernels
+  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
+  test_error( error, "clEnqueueNDRangeKernel failed");
+  error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
+  test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
+
+  // Finish up
+  error = clFinish(queue);
+  test_error( error, "clFinish failed");
+
+  // Get the results back
+  error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
+  test_error( error, "clEnqueueReadBuffer failed");
+  error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
+  test_error( error, "clEnqueueReadBuffer failed");
+
+  // Compare the results
+    if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
+    {
+        log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
+        return -1;
+    }
+
+    // All done!
+  free(in_data);
+  free(out_data);
+  free(out_data_binary);
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_bool.c b/test_conformance/compatibility/test_conformance/api/test_bool.c
new file mode 100644
index 00000000..46d85907
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_bool.c
@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+
+const char *kernel_with_bool[] = {
+    "__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
+    "    if(myBool)\n"
+    "    {\n"
+    "        dst[tid] = (int)src[tid];\n"
+    "    }\n"
+    "    else\n"
+    "    {\n"
+    "        dst[tid] = 0;\n"
+    "    }\n"
+    "\n"
+    "}\n"
+};
+
+int test_for_bool_type(cl_device_id deviceID, cl_context context,
+               cl_command_queue queue, int num_elements)
+{
+
+    cl_program program;
+    cl_kernel kernel;
+
+    int err = create_single_kernel_helper(context,
+                      &program,
+                      &kernel,
+                      1, kernel_with_bool,
+                      "kernel_with_bool" );
+    return err;
+}
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_create_context_from_type.cpp b/test_conformance/compatibility/test_conformance/api/test_create_context_from_type.cpp
new file mode 100644
index 00000000..2339aad8
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_create_context_from_type.cpp
@@ -0,0 +1,130 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    streams[2];
+    clContextWrapper context_to_test;
+    clCommandQueueWrapper queue_to_test;
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[10];
+    cl_int outputData[10];
+    int i;
+    RandomSeed seed( gRandomSeed );
+
+    const char *sample_single_test_kernel[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n" };
+
+    cl_device_type type;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
+
+    cl_platform_id platform;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
+
+    cl_context_properties properties[3] = {
+      (cl_context_properties)CL_CONTEXT_PLATFORM,
+      (cl_context_properties)platform,
+      NULL
+    };
+
+    context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
+    test_error(error, "clCreateContextFromType failed");
+    if (context_to_test == NULL) {
+        log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
+        return -1;
+    }
+
+    queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error);
+    test_error(error, "clCreateCommandQueue failed");
+    if (queue_to_test == NULL) {
+        log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
+        return -1;
+    }
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<10; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Test setting the arguments by index manually */
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+  return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_create_kernels.c b/test_conformance/compatibility/test_conformance/api/test_create_kernels.c
new file mode 100644
index 00000000..438300b1
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_create_kernels.c
@@ -0,0 +1,643 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+
+const char *sample_single_kernel[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_single_kernel_lengths[1];
+
+const char *sample_two_kernels[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n",
+    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_two_kernel_lengths[2];
+
+const char *sample_two_kernels_in_1[] = {
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (int)src[tid];\n"
+    "\n"
+    "}\n"
+    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = (float)src[tid];\n"
+    "\n"
+    "}\n" };
+
+size_t sample_two_kernels_in_1_lengths[1];
+
+
+const char *repeate_test_kernel =
+"__kernel void test_kernel(__global int *src, __global int *dst)\n"
+"{\n"
+" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
+"}\n";
+
+
+
+int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    cl_program testProgram;
+    clKernelWrapper kernel;
+    cl_context testContext;
+    unsigned int numKernels;
+    cl_char testName[512];
+    cl_uint testArgCount;
+    size_t realSize;
+
+
+    /* Preprocess: calc the length of each source file line */
+    sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
+
+    /* Create a program */
+    program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create single kernel program" );
+        return -1;
+    }
+
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build single kernel program" );
+    error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
+    test_error( error, "Unable to create single kernel program" );
+
+    /* Check program and context pointers */
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
+    test_error( error, "Unable to get kernel's program" );
+    if( (cl_program)testProgram != (cl_program)program )
+    {
+        log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
+        return -1;
+    }
+    if( realSize != sizeof( cl_program ) )
+    {
+        log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
+    test_error( error, "Unable to get kernel's context" );
+    if( (cl_context)testContext != (cl_context)context )
+    {
+        log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
+        return -1;
+    }
+    if( realSize != sizeof( cl_context ) )
+    {
+        log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
+        return -1;
+    }
+
+    /* Test arg count */
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
+    test_error( error, "Unable to get size of arg count info from kernel" );
+
+    if( realSize != sizeof( testArgCount ) )
+    {
+        log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: Kernel arg count does not match!\n" );
+        return -1;
+    }
+
+
+    /* Test function name */
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
+    test_error( error, "Unable to get name from kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) != 0 )
+    {
+        log_error( "ERROR: Kernel names do not match!\n" );
+        return -1;
+    }
+    if( realSize != strlen( (char *)testName ) + 1 )
+    {
+        log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
+        return -1;
+    }
+
+    /* All done */
+
+    return 0;
+}
+
+int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel[2];
+    unsigned int numKernels;
+    cl_char testName[ 512 ];
+    cl_uint testArgCount;
+
+
+    /* Preprocess: calc the length of each source file line */
+    sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
+    sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program!" );
+        return -1;
+    }
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build dual kernel program" );
+    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
+    test_error( error, "Unable to create dual kernel program" );
+
+    if( numKernels != 2 )
+    {
+        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
+        return -1;
+    }
+
+    /* Check first kernel */
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    int found_kernel1 = 0, found_kernel2 = 0;
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from second kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        if (found_kernel1) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        if (found_kernel2) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    if( !found_kernel1 || !found_kernel2 )
+    {
+        log_error( "ERROR: Kernel names do not match.\n" );
+        if (!found_kernel1)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        if (!found_kernel2)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: wrong # of args for kernel\n" );
+        return -1;
+    }
+
+    /* All done */
+    return 0;
+}
+
+int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel[2];
+    unsigned int numKernels;
+    cl_char testName[512];
+    cl_uint testArgCount;
+
+
+    /* Preprocess: calc the length of each source file line */
+    sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program" );
+        return -1;
+    }
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build dual kernel program" );
+    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
+    test_error( error, "Unable to create dual kernel program" );
+
+    if( numKernels != 2 )
+    {
+        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
+        return -1;
+    }
+
+    /* Check first kernel */
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    int found_kernel1 = 0, found_kernel2 = 0;
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
+    test_error( error, "Unable to get arg count from kernel" );
+
+    if( testArgCount != 2 )
+    {
+        log_error( "ERROR: wrong # of args for kernel\n" );
+        return -1;
+    }
+
+    /* Check second kernel */
+    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
+    test_error( error, "Unable to get function name from kernel" );
+
+    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
+        if (found_kernel1) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel1 = 1;
+    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
+        if (found_kernel2) {
+            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
+            return -1;
+        }
+        found_kernel2 = 1;
+    } else {
+        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
+        return -1;
+    }
+
+    if( !found_kernel1 || !found_kernel2 )
+    {
+        log_error( "ERROR: Kernel names do not match.\n" );
+        if (!found_kernel1)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        if (!found_kernel2)
+            log_error("Kernel \"%s\" not returned.\n", "sample_test");
+        return -1;
+    }
+
+    /* All done */
+    return 0;
+}
+
+int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2;
+    int error;
+
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program" );
+        return -1;
+    }
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build kernel program" );
+
+    /* Try manually creating kernels (backwards just in case) */
+    kernel1 = clCreateKernel( program, "sample_test2", &error );
+
+    if( kernel1 == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Could not get kernel 1" );
+        return -1;
+    }
+
+    kernel2 = clCreateKernel( program, "sample_test", &error );
+
+    if( kernel2 == NULL )
+    {
+        print_error( error, "Could not get kernel 2" );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2;
+    int error;
+    size_t i;
+
+    /* Now create a test program */
+    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create dual kernel program" );
+        return -1;
+    }
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build kernel program" );
+
+    /* Lookup the number of kernels in the program. */
+    size_t total_kernels = 0;
+    error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
+    test_error( error, "Unable to get program info num kernels");
+
+    if (total_kernels != 2)
+    {
+        print_error( error, "Program did not contain two kernels" );
+        return -1;
+    }
+
+    /* Lookup the kernel names. */
+    const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
+
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
+    test_error( error, "Unable to get length of kernel names list." );
+
+    if (kernel_names_len != (strlen(actual_names[0])+1))
+    {
+        print_error( error, "Kernel names length did not match");
+        return -1;
+    }
+
+    const size_t len = (kernel_names_len+1)*sizeof(char);
+    char* kernel_names = (char*)malloc(len);
+    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
+    test_error( error, "Unable to get kernel names list." );
+
+    /* Check to see if the kernel name array is null terminated. */
+    if (kernel_names[kernel_names_len-1] != '\0')
+    {
+        free(kernel_names);
+        print_error( error, "Kernel name list was not null terminated");
+        return -1;
+    }
+
+    /* Check to see if the correct kernel name string was returned. */
+    for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
+        if( 0 == strcmp(actual_names[i],kernel_names) )
+            break;
+
+    if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
+    {
+        free(kernel_names);
+        log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
+        for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
+            log_error( "\t\t\"%s\"\n", actual_names[0] );
+        return -1;
+    }
+    free(kernel_names);
+
+    /* Try manually creating kernels (backwards just in case) */
+    kernel1 = clCreateKernel( program, "sample_test", &error );
+    if( kernel1 == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Could not get kernel 1" );
+        return -1;
+    }
+
+    kernel2 = clCreateKernel( program, "sample_test2", &error );
+    if( kernel2 == NULL )
+    {
+        print_error( error, "Could not get kernel 2" );
+        return -1;
+    }
+
+    return 0;
+}
+
+static const char *single_task_kernel[] = {
+    "__kernel void sample_test(__global int *dst, int count)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    for( int i = 0; i < count; i++ )\n"
+    "        dst[i] = tid + i;\n"
+    "\n"
+    "}\n" };
+
+int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper output;
+    cl_int count;
+
+
+    if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
+        return -1;
+
+    // Create args
+    count = 100;
+    output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Run task
+    error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
+    test_error( error, "Unable to run task" );
+
+    // Read results
+    cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
+    error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Validate
+    for( cl_int i = 0; i < count; i++ )
+    {
+        if( results[ i ] != i )
+        {
+            log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
+            free(results);
+            return -1;
+        }
+    }
+
+    /* All done */
+    free(results);
+    return 0;
+}
+
+
+
+#define TEST_SIZE 1000
+int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+
+    cl_context local_context;
+    cl_command_queue local_queue;
+    cl_program local_program;
+    cl_kernel local_kernel;
+    cl_mem local_mem_in, local_mem_out;
+    cl_event local_event;
+    size_t global_dim[3];
+    int i, j, error;
+    global_dim[0] = TEST_SIZE;
+    global_dim[1] = 1; global_dim[2] = 1;
+    cl_int *inData, *outData;
+    cl_int status;
+
+    inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
+    outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
+    for (i=0; i<TEST_SIZE; i++) {
+        inData[i] = i;
+    }
+
+
+    for (i=0; i<100; i++) {
+        memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
+
+        local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
+        test_error( error, "clCreateContext failed");
+
+        local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
+        test_error( error, "clCreateCommandQueue failed");
+
+        local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
+        test_error( error, "clCreateProgramWithSource failed");
+
+        error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "clBuildProgram failed");
+
+        local_kernel = clCreateKernel(local_program, "test_kernel", &error);
+        test_error( error, "clCreateKernel failed");
+
+        local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
+        test_error( error, "clCreateBuffer failed");
+
+        local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
+        test_error( error, "clCreateBuffer failed");
+
+        error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
+        test_error( error, "clEnqueueWriteBuffer failed");
+
+        error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
+        test_error( error, "clEnqueueWriteBuffer failed");
+
+        error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
+        test_error( error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
+        test_error( error, "clSetKernelArg failed");
+
+        error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
+        test_error( error, "clEnqueueNDRangeKernel failed");
+
+        error = clWaitForEvents(1, &local_event);
+        test_error( error, "clWaitForEvents failed");
+
+        error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
+        test_error( error, "clGetEventInfo failed");
+
+        if (status != CL_COMPLETE) {
+            log_error( "Kernel execution not complete: status %d.\n", status);
+            free(inData);
+            free(outData);
+            return -1;
+        }
+
+        error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
+        test_error( error, "clEnqueueReadBuffer failed");
+
+        clReleaseEvent(local_event);
+        clReleaseMemObject(local_mem_in);
+        clReleaseMemObject(local_mem_out);
+        clReleaseKernel(local_kernel);
+        clReleaseProgram(local_program);
+        clReleaseCommandQueue(local_queue);
+        clReleaseContext(local_context);
+
+        for (j=0; j<TEST_SIZE; j++) {
+            if (outData[j] != inData[j] + 1) {
+                log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
+                free(inData);
+                free(outData);
+                return -1;
+            }
+        }
+    }
+
+    free(inData);
+    free(outData);
+
+    return 0;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp b/test_conformance/compatibility/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp
new file mode 100644
index 00000000..4a742205
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_device_min_data_type_align_size_alignment.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+int IsAPowerOfTwo( unsigned long x )
+{
+  return 0 == (x & (x-1));
+}
+
+
+int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+  cl_uint min_alignment;
+
+  if (gHasLong)
+    min_alignment = sizeof(cl_long)*16;
+  else
+    min_alignment = sizeof(cl_int)*16;
+
+  int error = 0;
+  cl_uint alignment;
+
+  error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
+  log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
+
+  // Verify the size is large enough
+  if (alignment < min_alignment*8) {
+    log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
+              (gHasLong) ? "long" : "int",
+              (unsigned long)(min_alignment*8), (unsigned long)alignment);
+    return -1;
+  }
+
+  // Verify the size is a power of two
+  if (!IsAPowerOfTwo((unsigned long)alignment)) {
+    log_error("ERROR: alignment is not a power of two.\n");
+    return -1;
+  }
+
+  return 0;
+
+}
diff --git a/test_conformance/compatibility/test_conformance/api/test_kernel_arg_changes.cpp b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_changes.cpp
new file mode 100644
index 00000000..b7aba632
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_changes.cpp
@@ -0,0 +1,141 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+extern "C" { extern cl_uint gRandomSeed;}
+
+// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
+// potentially overlapping) to make sure each kernel gets the right arguments
+
+// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
+const char *inspect_image_kernel_source[] = {
+"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
+"{\n"
+"    int tid = get_global_id(0), i;\n"
+"     for( i = 0; i < 100000; i++ ); \n"
+"    outDimensions[tid * 2] = get_image_width(src) * tid;\n"
+"    outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
+"\n"
+"}\n" };
+
+#define NUM_TRIES    100
+#define NUM_THREADS 2048
+
+int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error, i;
+    clMemWrapper images[ NUM_TRIES ];
+    size_t         sizes[ NUM_TRIES ][ 2 ];
+    clMemWrapper results[ NUM_TRIES ];
+    cl_image_format    imageFormat;
+    size_t maxWidth, maxHeight;
+    size_t threads[1], localThreads[1];
+    cl_int resultArray[ NUM_THREADS * 2 ];
+    char errStr[ 128 ];
+    RandomSeed seed( gRandomSeed );
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    // Just get any ol format to test with
+    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
+    test_error( error, "Unable to obtain suitable image format to test with!" );
+
+    // Create our testing kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Get max dimensions for each of our images
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    test_error( error, "Unable to get max image dimensions for device" );
+
+    // Get the number of threads we'll be able to run
+    threads[0] = NUM_THREADS;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size for kernel" );
+
+    // Create a variety of images and output arrays
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
+        sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
+
+        images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
+                                     &imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
+        if( images[i] == NULL )
+        {
+            log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
+            return -1;
+        }
+        results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof( cl_int ) * threads[0] * 2, NULL, &error );
+        if( results[i] == NULL)
+        {
+            log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
+            return -1;
+        }
+    }
+
+    // Start setting arguments and executing kernels
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        // Set the arguments for this try
+        error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
+        sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
+        test_error( error, errStr );
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
+        sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
+        test_error( error, errStr );
+
+        // Queue up execution
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+        sprintf( errStr, "Unable to execute kernel try %d", i );
+        test_error( error, errStr );
+    }
+
+    // Read the results back out, one at a time, and verify
+    for( i = 0; i < NUM_TRIES; i++ )
+    {
+        error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
+        sprintf( errStr, "Unable to read results for kernel try %d", i );
+        test_error( error, errStr );
+
+        // Verify. Each entry should be n * the (width/height) of image i
+        for( int j = 0; j < NUM_THREADS; j++ )
+        {
+            if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
+            {
+                log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
+                          i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
+                return -1;
+            }
+            if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
+            {
+                log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
+                          i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
+                return -1;
+            }
+        }
+    }
+
+    // If we got here, everything verified successfully
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_kernel_arg_info.c b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_info.c
new file mode 100644
index 00000000..5b1e5e6d
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_info.c
@@ -0,0 +1,5192 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include <limits.h>
+#include <ctype.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#define ARG_INFO_FIELD_COUNT        5
+
+#define ARG_INFO_ADDR_OFFSET        1
+#define ARG_INFO_ACCESS_OFFSET        2
+#define ARG_INFO_TYPE_QUAL_OFFSET    3
+#define ARG_INFO_TYPE_NAME_OFFSET    4
+#define ARG_INFO_ARG_NAME_OFFSET    5
+
+typedef char const * kernel_args_t[];
+
+kernel_args_t required_kernel_args = {
+    "typedef float4 typedef_type;\n"
+    "\n"
+    "typedef struct struct_type {\n"
+    "    float4 float4d;\n"
+    "    int intd;\n"
+    "} typedef_struct_type;\n"
+    "\n"
+    "typedef union union_type {\n"
+    "    float4 float4d;\n"
+    "    uint4 uint4d;\n"
+    "} typedef_union_type;\n"
+    "\n"
+    "typedef enum enum_type {\n"
+    "    enum_type_zero,\n"
+    "    enum_type_one,\n"
+    "    enum_type_two\n"
+    "} typedef_enum_type;\n"
+    "\n"
+    "kernel void constant_scalar_p0(constant void*constantvoidp,\n"
+    "                              constant char *constantcharp,\n"
+    "                              constant uchar* constantucharp,\n"
+    "                              constant unsigned char * constantunsignedcharp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p1(constant short*constantshortp,\n"
+    "                              constant ushort *constantushortp,\n"
+    "                              constant unsigned short* constantunsignedshortp,\n"
+    "                              constant int * constantintp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p2(constant uint*constantuintp,\n"
+    "                              constant unsigned int *constantunsignedintp,\n"
+    "                              constant long* constantlongp,\n"
+    "                              constant ulong * constantulongp)\n"
+  "{}\n",
+    "kernel void constant_scalar_p3(constant unsigned long*constantunsignedlongp,\n"
+    "                              constant float *constantfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_scalar_restrict_p0(constant void* restrict constantvoidrestrictp,\n"
+    "                                       constant char * restrict constantcharrestrictp,\n"
+    "                                       constant uchar*restrict constantucharrestrictp,\n"
+    "                                       constant unsigned char *restrict constantunsignedcharrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p1(constant short* restrict constantshortrestrictp,\n"
+    "                                       constant ushort * restrict constantushortrestrictp,\n"
+    "                                       constant unsigned short*restrict constantunsignedshortrestrictp,\n"
+    "                                       constant int *restrict constantintrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p2(constant uint* restrict constantuintrestrictp,\n"
+    "                                       constant unsigned int * restrict constantunsignedintrestrictp,\n"
+    "                                       constant long*restrict constantlongrestrictp,\n"
+    "                                       constant ulong *restrict constantulongrestrictp)\n"
+    "{}\n",
+    "kernel void constant_scalar_restrict_p3(constant unsigned long* restrict constantunsignedlongrestrictp,\n"
+    "                                       constant float * restrict constantfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_p(global void*globalvoidp,\n"
+    "                            global char *globalcharp,\n"
+    "                            global uchar* globalucharp,\n"
+    "                            global unsigned char * globalunsignedcharp,\n"
+    "                            global short*globalshortp,\n"
+    "                            global ushort *globalushortp,\n"
+    "                            global unsigned short* globalunsignedshortp,\n"
+    "                            global int * globalintp,\n"
+    "                            global uint*globaluintp,\n"
+    "                            global unsigned int *globalunsignedintp,\n"
+    "                            global long* globallongp,\n"
+    "                            global ulong * globalulongp,\n"
+    "                            global unsigned long*globalunsignedlongp,\n"
+    "                            global float *globalfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_scalar_restrict_p(global void* restrict globalvoidrestrictp,\n"
+    "                                     global char * restrict globalcharrestrictp,\n"
+    "                                     global uchar*restrict globalucharrestrictp,\n"
+    "                                     global unsigned char *restrict globalunsignedcharrestrictp,\n"
+    "                                     global short* restrict globalshortrestrictp,\n"
+    "                                     global ushort * restrict globalushortrestrictp,\n"
+    "                                     global unsigned short*restrict globalunsignedshortrestrictp,\n"
+    "                                     global int *restrict globalintrestrictp,\n"
+    "                                     global uint* restrict globaluintrestrictp,\n"
+    "                                     global unsigned int * restrict globalunsignedintrestrictp,\n"
+    "                                     global long*restrict globallongrestrictp,\n"
+    "                                     global ulong *restrict globalulongrestrictp,\n"
+    "                                     global unsigned long* restrict globalunsignedlongrestrictp,\n"
+    "                                     global float * restrict globalfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_p(global const void*globalconstvoidp,\n"
+    "                                  global const char *globalconstcharp,\n"
+    "                                  global const uchar* globalconstucharp,\n"
+    "                                  global const unsigned char * globalconstunsignedcharp,\n"
+    "                                  global const short*globalconstshortp,\n"
+    "                                  global const ushort *globalconstushortp,\n"
+    "                                  global const unsigned short* globalconstunsignedshortp,\n"
+    "                                  global const int * globalconstintp,\n"
+    "                                  global const uint*globalconstuintp,\n"
+    "                                  global const unsigned int *globalconstunsignedintp,\n"
+    "                                  global const long* globalconstlongp,\n"
+    "                                  global const ulong * globalconstulongp,\n"
+    "                                  global const unsigned long*globalconstunsignedlongp,\n"
+    "                                  global const float *globalconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_scalar_restrict_p(global const void* restrict globalconstvoidrestrictp,\n"
+    "                                           global const char * restrict globalconstcharrestrictp,\n"
+    "                                           global const uchar*restrict globalconstucharrestrictp,\n"
+    "                                           global const unsigned char *restrict globalconstunsignedcharrestrictp,\n"
+    "                                           global const short* restrict globalconstshortrestrictp,\n"
+    "                                           global const ushort * restrict globalconstushortrestrictp,\n"
+    "                                           global const unsigned short*restrict globalconstunsignedshortrestrictp,\n"
+    "                                           global const int *restrict globalconstintrestrictp,\n"
+    "                                           global const uint* restrict globalconstuintrestrictp,\n"
+    "                                           global const unsigned int * restrict globalconstunsignedintrestrictp,\n"
+    "                                           global const long*restrict globalconstlongrestrictp,\n"
+    "                                           global const ulong *restrict globalconstulongrestrictp,\n"
+    "                                           global const unsigned long* restrict globalconstunsignedlongrestrictp,\n"
+    "                                           global const float * restrict globalconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_p(global volatile void*globalvolatilevoidp,\n"
+    "                                     global volatile char *globalvolatilecharp,\n"
+    "                                     global volatile uchar* globalvolatileucharp,\n"
+    "                                     global volatile unsigned char * globalvolatileunsignedcharp,\n"
+    "                                     global volatile short*globalvolatileshortp,\n"
+    "                                     global volatile ushort *globalvolatileushortp,\n"
+    "                                     global volatile unsigned short* globalvolatileunsignedshortp,\n"
+    "                                     global volatile int * globalvolatileintp,\n"
+    "                                     global volatile uint*globalvolatileuintp,\n"
+    "                                     global volatile unsigned int *globalvolatileunsignedintp,\n"
+    "                                     global volatile long* globalvolatilelongp,\n"
+    "                                     global volatile ulong * globalvolatileulongp,\n"
+    "                                     global volatile unsigned long*globalvolatileunsignedlongp,\n"
+    "                                     global volatile float *globalvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_scalar_restrict_p(global volatile void* restrict globalvolatilevoidrestrictp,\n"
+    "                                              global volatile char * restrict globalvolatilecharrestrictp,\n"
+    "                                              global volatile uchar*restrict globalvolatileucharrestrictp,\n"
+    "                                              global volatile unsigned char *restrict globalvolatileunsignedcharrestrictp,\n"
+    "                                              global volatile short* restrict globalvolatileshortrestrictp,\n"
+    "                                              global volatile ushort * restrict globalvolatileushortrestrictp,\n"
+    "                                              global volatile unsigned short*restrict globalvolatileunsignedshortrestrictp,\n"
+    "                                              global volatile int *restrict globalvolatileintrestrictp,\n"
+    "                                              global volatile uint* restrict globalvolatileuintrestrictp,\n"
+    "                                              global volatile unsigned int * restrict globalvolatileunsignedintrestrictp,\n"
+    "                                              global volatile long*restrict globalvolatilelongrestrictp,\n"
+    "                                              global volatile ulong *restrict globalvolatileulongrestrictp,\n"
+    "                                              global volatile unsigned long* restrict globalvolatileunsignedlongrestrictp,\n"
+    "                                              global volatile float * restrict globalvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_p(global const volatile void*globalconstvolatilevoidp,\n"
+    "                                           global const volatile char *globalconstvolatilecharp,\n"
+    "                                           global const volatile uchar* globalconstvolatileucharp,\n"
+    "                                           global const volatile unsigned char * globalconstvolatileunsignedcharp,\n"
+    "                                           global const volatile short*globalconstvolatileshortp,\n"
+    "                                           global const volatile ushort *globalconstvolatileushortp,\n"
+    "                                           global const volatile unsigned short* globalconstvolatileunsignedshortp,\n"
+    "                                           global const volatile int * globalconstvolatileintp,\n"
+    "                                           global const volatile uint*globalconstvolatileuintp,\n"
+    "                                           global const volatile unsigned int *globalconstvolatileunsignedintp,\n"
+    "                                           global const volatile long* globalconstvolatilelongp,\n"
+    "                                           global const volatile ulong * globalconstvolatileulongp,\n"
+    "                                           global const volatile unsigned long*globalconstvolatileunsignedlongp,\n"
+    "                                           global const volatile float *globalconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_scalar_restrict_p(global const volatile void* restrict globalconstvolatilevoidrestrictp,\n"
+    "                                                    global const volatile char * restrict globalconstvolatilecharrestrictp,\n"
+    "                                                    global const volatile uchar*restrict globalconstvolatileucharrestrictp,\n"
+    "                                                    global const volatile unsigned char *restrict globalconstvolatileunsignedcharrestrictp,\n"
+    "                                                    global const volatile short* restrict globalconstvolatileshortrestrictp,\n"
+    "                                                    global const volatile ushort * restrict globalconstvolatileushortrestrictp,\n"
+    "                                                    global const volatile unsigned short*restrict globalconstvolatileunsignedshortrestrictp,\n"
+    "                                                    global const volatile int *restrict globalconstvolatileintrestrictp,\n"
+    "                                                    global const volatile uint* restrict globalconstvolatileuintrestrictp,\n"
+    "                                                    global const volatile unsigned int * restrict globalconstvolatileunsignedintrestrictp,\n"
+    "                                                    global const volatile long*restrict globalconstvolatilelongrestrictp,\n"
+    "                                                    global const volatile ulong *restrict globalconstvolatileulongrestrictp,\n"
+    "                                                    global const volatile unsigned long* restrict globalconstvolatileunsignedlongrestrictp,\n"
+    "                                                    global const volatile float * restrict globalconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_p(local void*localvoidp,\n"
+    "                           local char *localcharp,\n"
+    "                           local uchar* localucharp,\n"
+    "                           local unsigned char * localunsignedcharp,\n"
+    "                           local short*localshortp,\n"
+    "                           local ushort *localushortp,\n"
+    "                           local unsigned short* localunsignedshortp,\n"
+    "                           local int * localintp,\n"
+    "                           local uint*localuintp,\n"
+    "                           local unsigned int *localunsignedintp,\n"
+    "                           local long* locallongp,\n"
+    "                           local ulong * localulongp,\n"
+    "                           local unsigned long*localunsignedlongp,\n"
+    "                           local float *localfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_scalar_restrict_p(local void* restrict localvoidrestrictp,\n"
+    "                                    local char * restrict localcharrestrictp,\n"
+    "                                    local uchar*restrict localucharrestrictp,\n"
+    "                                    local unsigned char *restrict localunsignedcharrestrictp,\n"
+    "                                    local short* restrict localshortrestrictp,\n"
+    "                                    local ushort * restrict localushortrestrictp,\n"
+    "                                    local unsigned short*restrict localunsignedshortrestrictp,\n"
+    "                                    local int *restrict localintrestrictp,\n"
+    "                                    local uint* restrict localuintrestrictp,\n"
+    "                                    local unsigned int * restrict localunsignedintrestrictp,\n"
+    "                                    local long*restrict locallongrestrictp,\n"
+    "                                    local ulong *restrict localulongrestrictp,\n"
+    "                                    local unsigned long* restrict localunsignedlongrestrictp,\n"
+    "                                    local float * restrict localfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_p(local const void*localconstvoidp,\n"
+    "                                 local const char *localconstcharp,\n"
+    "                                 local const uchar* localconstucharp,\n"
+    "                                 local const unsigned char * localconstunsignedcharp,\n"
+    "                                 local const short*localconstshortp,\n"
+    "                                 local const ushort *localconstushortp,\n"
+    "                                 local const unsigned short* localconstunsignedshortp,\n"
+    "                                 local const int * localconstintp,\n"
+    "                                 local const uint*localconstuintp,\n"
+    "                                 local const unsigned int *localconstunsignedintp,\n"
+    "                                 local const long* localconstlongp,\n"
+    "                                 local const ulong * localconstulongp,\n"
+    "                                 local const unsigned long*localconstunsignedlongp,\n"
+    "                                 local const float *localconstfloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_scalar_restrict_p(local const void* restrict localconstvoidrestrictp,\n"
+    "                                          local const char * restrict localconstcharrestrictp,\n"
+    "                                          local const uchar*restrict localconstucharrestrictp,\n"
+    "                                          local const unsigned char *restrict localconstunsignedcharrestrictp,\n"
+    "                                          local const short* restrict localconstshortrestrictp,\n"
+    "                                          local const ushort * restrict localconstushortrestrictp,\n"
+    "                                          local const unsigned short*restrict localconstunsignedshortrestrictp,\n"
+    "                                          local const int *restrict localconstintrestrictp,\n"
+    "                                          local const uint* restrict localconstuintrestrictp,\n"
+    "                                          local const unsigned int * restrict localconstunsignedintrestrictp,\n"
+    "                                          local const long*restrict localconstlongrestrictp,\n"
+    "                                          local const ulong *restrict localconstulongrestrictp,\n"
+    "                                          local const unsigned long* restrict localconstunsignedlongrestrictp,\n"
+    "                                          local const float * restrict localconstfloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_p(local volatile void*localvolatilevoidp,\n"
+    "                                    local volatile char *localvolatilecharp,\n"
+    "                                    local volatile uchar* localvolatileucharp,\n"
+    "                                    local volatile unsigned char * localvolatileunsignedcharp,\n"
+    "                                    local volatile short*localvolatileshortp,\n"
+    "                                    local volatile ushort *localvolatileushortp,\n"
+    "                                    local volatile unsigned short* localvolatileunsignedshortp,\n"
+    "                                    local volatile int * localvolatileintp,\n"
+    "                                    local volatile uint*localvolatileuintp,\n"
+    "                                    local volatile unsigned int *localvolatileunsignedintp,\n"
+    "                                    local volatile long* localvolatilelongp,\n"
+    "                                    local volatile ulong * localvolatileulongp,\n"
+    "                                    local volatile unsigned long*localvolatileunsignedlongp,\n"
+    "                                    local volatile float *localvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_scalar_restrict_p(local volatile void* restrict localvolatilevoidrestrictp,\n"
+    "                                             local volatile char * restrict localvolatilecharrestrictp,\n"
+    "                                             local volatile uchar*restrict localvolatileucharrestrictp,\n"
+    "                                             local volatile unsigned char *restrict localvolatileunsignedcharrestrictp,\n"
+    "                                             local volatile short* restrict localvolatileshortrestrictp,\n"
+    "                                             local volatile ushort * restrict localvolatileushortrestrictp,\n"
+    "                                             local volatile unsigned short*restrict localvolatileunsignedshortrestrictp,\n"
+    "                                             local volatile int *restrict localvolatileintrestrictp,\n"
+    "                                             local volatile uint* restrict localvolatileuintrestrictp,\n"
+    "                                             local volatile unsigned int * restrict localvolatileunsignedintrestrictp,\n"
+    "                                             local volatile long*restrict localvolatilelongrestrictp,\n"
+    "                                             local volatile ulong *restrict localvolatileulongrestrictp,\n"
+    "                                             local volatile unsigned long* restrict localvolatileunsignedlongrestrictp,\n"
+    "                                             local volatile float * restrict localvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_p(local const volatile void*localconstvolatilevoidp,\n"
+    "                                          local const volatile char *localconstvolatilecharp,\n"
+    "                                          local const volatile uchar* localconstvolatileucharp,\n"
+    "                                          local const volatile unsigned char * localconstvolatileunsignedcharp,\n"
+    "                                          local const volatile short*localconstvolatileshortp,\n"
+    "                                          local const volatile ushort *localconstvolatileushortp,\n"
+    "                                          local const volatile unsigned short* localconstvolatileunsignedshortp,\n"
+    "                                          local const volatile int * localconstvolatileintp,\n"
+    "                                          local const volatile uint*localconstvolatileuintp,\n"
+    "                                          local const volatile unsigned int *localconstvolatileunsignedintp,\n"
+    "                                          local const volatile long* localconstvolatilelongp,\n"
+    "                                          local const volatile ulong * localconstvolatileulongp,\n"
+    "                                          local const volatile unsigned long*localconstvolatileunsignedlongp,\n"
+    "                                          local const volatile float *localconstvolatilefloatp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_scalar_restrict_p(local const volatile void* restrict localconstvolatilevoidrestrictp,\n"
+    "                                                   local const volatile char * restrict localconstvolatilecharrestrictp,\n"
+    "                                                   local const volatile uchar*restrict localconstvolatileucharrestrictp,\n"
+    "                                                   local const volatile unsigned char *restrict localconstvolatileunsignedcharrestrictp,\n"
+    "                                                   local const volatile short* restrict localconstvolatileshortrestrictp,\n"
+    "                                                   local const volatile ushort * restrict localconstvolatileushortrestrictp,\n"
+    "                                                   local const volatile unsigned short*restrict localconstvolatileunsignedshortrestrictp,\n"
+    "                                                   local const volatile int *restrict localconstvolatileintrestrictp,\n"
+    "                                                   local const volatile uint* restrict localconstvolatileuintrestrictp,\n"
+    "                                                   local const volatile unsigned int * restrict localconstvolatileunsignedintrestrictp,\n"
+    "                                                   local const volatile long*restrict localconstvolatilelongrestrictp,\n"
+    "                                                   local const volatile ulong *restrict localconstvolatileulongrestrictp,\n"
+    "                                                   local const volatile unsigned long* restrict localconstvolatileunsignedlongrestrictp,\n"
+    "                                                   local const volatile float * restrict localconstvolatilefloatrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void scalar_d(char chard,\n"
+    "                     uchar uchard,\n"
+    "                     unsigned char unsignedchard,\n"
+    "                     short shortd,\n"
+    "                     ushort ushortd,\n"
+    "                     unsigned short unsignedshortd,\n"
+    "                     int intd,\n"
+    "                     uint uintd,\n"
+    "                     unsigned int unsignedintd,\n"
+    "                     long longd,\n"
+    "                     ulong ulongd,\n"
+    "                     unsigned long unsignedlongd,\n"
+    "                     float floatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_scalar_d(const char constchard,\n"
+    "                           const uchar constuchard,\n"
+    "                           const unsigned char constunsignedchard,\n"
+    "                           const short constshortd,\n"
+    "                           const ushort constushortd,\n"
+    "                           const unsigned short constunsignedshortd,\n"
+    "                           const int constintd,\n"
+    "                           const uint constuintd,\n"
+    "                           const unsigned int constunsignedintd,\n"
+    "                           const long constlongd,\n"
+    "                           const ulong constulongd,\n"
+    "                           const unsigned long constunsignedlongd,\n"
+    "                           const float constfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_scalar_d(private char privatechard,\n"
+    "                             private uchar privateuchard,\n"
+    "                             private unsigned char privateunsignedchard,\n"
+    "                             private short privateshortd,\n"
+    "                             private ushort privateushortd,\n"
+    "                             private unsigned short privateunsignedshortd,\n"
+    "                             private int privateintd,\n"
+    "                             private uint privateuintd,\n"
+    "                             private unsigned int privateunsignedintd,\n"
+    "                             private long privatelongd,\n"
+    "                             private ulong privateulongd,\n"
+    "                             private unsigned long privateunsignedlongd,\n"
+    "                             private float privatefloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_scalar_d(private const char privateconstchard,\n"
+    "                                   private const uchar privateconstuchard,\n"
+    "                                   private const unsigned char privateconstunsignedchard,\n"
+    "                                   private const short privateconstshortd,\n"
+    "                                   private const ushort privateconstushortd,\n"
+    "                                   private const unsigned short privateconstunsignedshortd,\n"
+    "                                   private const int privateconstintd,\n"
+    "                                   private const uint privateconstuintd,\n"
+    "                                   private const unsigned int privateconstunsignedintd,\n"
+    "                                   private const long privateconstlongd,\n"
+    "                                   private const ulong privateconstulongd,\n"
+    "                                   private const unsigned long privateconstunsignedlongd,\n"
+    "                                   private const float privateconstfloatd)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_p0(constant char2*constantchar2p,\n"
+    "                               constant uchar2 *constantuchar2p,\n"
+    "                               constant short2* constantshort2p,\n"
+    "                               constant ushort2 * constantushort2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p1(constant int2*constantint2p,\n"
+    "                               constant uint2 *constantuint2p,\n"
+    "                               constant long2* constantlong2p,\n"
+    "                               constant ulong2 * constantulong2p)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_p2(constant float2*constantfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p0(constant char2 *restrict constantchar2restrictp,\n"
+    "                                        constant uchar2* restrict constantuchar2restrictp,\n"
+    "                                        constant short2 * restrict constantshort2restrictp,\n"
+    "                                        constant ushort2*restrict constantushort2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p1(constant int2 *restrict constantint2restrictp,\n"
+    "                                        constant uint2* restrict constantuint2restrictp,\n"
+    "                                        constant long2 * restrict constantlong2restrictp,\n"
+    "                                        constant ulong2*restrict constantulong2restrictp)\n"
+  "{}\n",
+    "\n"
+    "kernel void constant_vector2_restrict_p2(constant float2 *restrict constantfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_p(global char2*globalchar2p,\n"
+    "                             global uchar2 *globaluchar2p,\n"
+    "                             global short2* globalshort2p,\n"
+    "                             global ushort2 * globalushort2p,\n"
+    "                             global int2*globalint2p,\n"
+    "                             global uint2 *globaluint2p,\n"
+    "                             global long2* globallong2p,\n"
+    "                             global ulong2 * globalulong2p,\n"
+    "                             global float2*globalfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector2_restrict_p(global char2 *restrict globalchar2restrictp,\n"
+    "                                      global uchar2* restrict globaluchar2restrictp,\n"
+    "                                      global short2 * restrict globalshort2restrictp,\n"
+    "                                      global ushort2*restrict globalushort2restrictp,\n"
+    "                                      global int2 *restrict globalint2restrictp,\n"
+    "                                      global uint2* restrict globaluint2restrictp,\n"
+    "                                      global long2 * restrict globallong2restrictp,\n"
+    "                                      global ulong2*restrict globalulong2restrictp,\n"
+    "                                      global float2 *restrict globalfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_p(global const char2* globalconstchar2p,\n"
+    "                                   global const uchar2 * globalconstuchar2p,\n"
+    "                                   global const short2*globalconstshort2p,\n"
+    "                                   global const ushort2 *globalconstushort2p,\n"
+    "                                   global const int2* globalconstint2p,\n"
+    "                                   global const uint2 * globalconstuint2p,\n"
+    "                                   global const long2*globalconstlong2p,\n"
+    "                                   global const ulong2 *globalconstulong2p,\n"
+    "                                   global const float2* globalconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector2_restrict_p(global const char2 * restrict globalconstchar2restrictp,\n"
+    "                                            global const uchar2*restrict globalconstuchar2restrictp,\n"
+    "                                            global const short2 *restrict globalconstshort2restrictp,\n"
+    "                                            global const ushort2* restrict globalconstushort2restrictp,\n"
+    "                                            global const int2 * restrict globalconstint2restrictp,\n"
+    "                                            global const uint2*restrict globalconstuint2restrictp,\n"
+    "                                            global const long2 *restrict globalconstlong2restrictp,\n"
+    "                                            global const ulong2* restrict globalconstulong2restrictp,\n"
+    "                                            global const float2 * restrict globalconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_p(global volatile char2*globalvolatilechar2p,\n"
+    "                                      global volatile uchar2 *globalvolatileuchar2p,\n"
+    "                                      global volatile short2* globalvolatileshort2p,\n"
+    "                                      global volatile ushort2 * globalvolatileushort2p,\n"
+    "                                      global volatile int2*globalvolatileint2p,\n"
+    "                                      global volatile uint2 *globalvolatileuint2p,\n"
+    "                                      global volatile long2* globalvolatilelong2p,\n"
+    "                                      global volatile ulong2 * globalvolatileulong2p,\n"
+    "                                      global volatile float2*globalvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector2_restrict_p(global volatile char2 *restrict globalvolatilechar2restrictp,\n"
+    "                                               global volatile uchar2* restrict globalvolatileuchar2restrictp,\n"
+    "                                               global volatile short2 * restrict globalvolatileshort2restrictp,\n"
+    "                                               global volatile ushort2*restrict globalvolatileushort2restrictp,\n"
+    "                                               global volatile int2 *restrict globalvolatileint2restrictp,\n"
+    "                                               global volatile uint2* restrict globalvolatileuint2restrictp,\n"
+    "                                               global volatile long2 * restrict globalvolatilelong2restrictp,\n"
+    "                                               global volatile ulong2*restrict globalvolatileulong2restrictp,\n"
+    "                                               global volatile float2 *restrict globalvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_p(global const volatile char2* globalconstvolatilechar2p,\n"
+    "                                            global const volatile uchar2 * globalconstvolatileuchar2p,\n"
+    "                                            global const volatile short2*globalconstvolatileshort2p,\n"
+    "                                            global const volatile ushort2 *globalconstvolatileushort2p,\n"
+    "                                            global const volatile int2* globalconstvolatileint2p,\n"
+    "                                            global const volatile uint2 * globalconstvolatileuint2p,\n"
+    "                                            global const volatile long2*globalconstvolatilelong2p,\n"
+    "                                            global const volatile ulong2 *globalconstvolatileulong2p,\n"
+    "                                            global const volatile float2* globalconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector2_restrict_p(global const volatile char2 * restrict globalconstvolatilechar2restrictp,\n"
+    "                                                     global const volatile uchar2*restrict globalconstvolatileuchar2restrictp,\n"
+    "                                                     global const volatile short2 *restrict globalconstvolatileshort2restrictp,\n"
+    "                                                     global const volatile ushort2* restrict globalconstvolatileushort2restrictp,\n"
+    "                                                     global const volatile int2 * restrict globalconstvolatileint2restrictp,\n"
+    "                                                     global const volatile uint2*restrict globalconstvolatileuint2restrictp,\n"
+    "                                                     global const volatile long2 *restrict globalconstvolatilelong2restrictp,\n"
+    "                                                     global const volatile ulong2* restrict globalconstvolatileulong2restrictp,\n"
+    "                                                     global const volatile float2 * restrict globalconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_p(local char2*localchar2p,\n"
+    "                            local uchar2 *localuchar2p,\n"
+    "                            local short2* localshort2p,\n"
+    "                            local ushort2 * localushort2p,\n"
+    "                            local int2*localint2p,\n"
+    "                            local uint2 *localuint2p,\n"
+    "                            local long2* locallong2p,\n"
+    "                            local ulong2 * localulong2p,\n"
+    "                            local float2*localfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector2_restrict_p(local char2 *restrict localchar2restrictp,\n"
+    "                                     local uchar2* restrict localuchar2restrictp,\n"
+    "                                     local short2 * restrict localshort2restrictp,\n"
+    "                                     local ushort2*restrict localushort2restrictp,\n"
+    "                                     local int2 *restrict localint2restrictp,\n"
+    "                                     local uint2* restrict localuint2restrictp,\n"
+    "                                     local long2 * restrict locallong2restrictp,\n"
+    "                                     local ulong2*restrict localulong2restrictp,\n"
+    "                                     local float2 *restrict localfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_p(local const char2* localconstchar2p,\n"
+    "                                  local const uchar2 * localconstuchar2p,\n"
+    "                                  local const short2*localconstshort2p,\n"
+    "                                  local const ushort2 *localconstushort2p,\n"
+    "                                  local const int2* localconstint2p,\n"
+    "                                  local const uint2 * localconstuint2p,\n"
+    "                                  local const long2*localconstlong2p,\n"
+    "                                  local const ulong2 *localconstulong2p,\n"
+    "                                  local const float2* localconstfloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector2_restrict_p(local const char2 * restrict localconstchar2restrictp,\n"
+    "                                           local const uchar2*restrict localconstuchar2restrictp,\n"
+    "                                           local const short2 *restrict localconstshort2restrictp,\n"
+    "                                           local const ushort2* restrict localconstushort2restrictp,\n"
+    "                                           local const int2 * restrict localconstint2restrictp,\n"
+    "                                           local const uint2*restrict localconstuint2restrictp,\n"
+    "                                           local const long2 *restrict localconstlong2restrictp,\n"
+    "                                           local const ulong2* restrict localconstulong2restrictp,\n"
+    "                                           local const float2 * restrict localconstfloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_p(local volatile char2*localvolatilechar2p,\n"
+    "                                     local volatile uchar2 *localvolatileuchar2p,\n"
+    "                                     local volatile short2* localvolatileshort2p,\n"
+    "                                     local volatile ushort2 * localvolatileushort2p,\n"
+    "                                     local volatile int2*localvolatileint2p,\n"
+    "                                     local volatile uint2 *localvolatileuint2p,\n"
+    "                                     local volatile long2* localvolatilelong2p,\n"
+    "                                     local volatile ulong2 * localvolatileulong2p,\n"
+    "                                     local volatile float2*localvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector2_restrict_p(local volatile char2 *restrict localvolatilechar2restrictp,\n"
+    "                                              local volatile uchar2* restrict localvolatileuchar2restrictp,\n"
+    "                                              local volatile short2 * restrict localvolatileshort2restrictp,\n"
+    "                                              local volatile ushort2*restrict localvolatileushort2restrictp,\n"
+    "                                              local volatile int2 *restrict localvolatileint2restrictp,\n"
+    "                                              local volatile uint2* restrict localvolatileuint2restrictp,\n"
+    "                                              local volatile long2 * restrict localvolatilelong2restrictp,\n"
+    "                                              local volatile ulong2*restrict localvolatileulong2restrictp,\n"
+    "                                              local volatile float2 *restrict localvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_p(local const volatile char2* localconstvolatilechar2p,\n"
+    "                                           local const volatile uchar2 * localconstvolatileuchar2p,\n"
+    "                                           local const volatile short2*localconstvolatileshort2p,\n"
+    "                                           local const volatile ushort2 *localconstvolatileushort2p,\n"
+    "                                           local const volatile int2* localconstvolatileint2p,\n"
+    "                                           local const volatile uint2 * localconstvolatileuint2p,\n"
+    "                                           local const volatile long2*localconstvolatilelong2p,\n"
+    "                                           local const volatile ulong2 *localconstvolatileulong2p,\n"
+    "                                           local const volatile float2* localconstvolatilefloat2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector2_restrict_p(local const volatile char2 * restrict localconstvolatilechar2restrictp,\n"
+    "                                                    local const volatile uchar2*restrict localconstvolatileuchar2restrictp,\n"
+    "                                                    local const volatile short2 *restrict localconstvolatileshort2restrictp,\n"
+    "                                                    local const volatile ushort2* restrict localconstvolatileushort2restrictp,\n"
+    "                                                    local const volatile int2 * restrict localconstvolatileint2restrictp,\n"
+    "                                                    local const volatile uint2*restrict localconstvolatileuint2restrictp,\n"
+    "                                                    local const volatile long2 *restrict localconstvolatilelong2restrictp,\n"
+    "                                                    local const volatile ulong2* restrict localconstvolatileulong2restrictp,\n"
+    "                                                    local const volatile float2 * restrict localconstvolatilefloat2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector2_d(char2 char2d,\n"
+    "                      uchar2 uchar2d,\n"
+    "                      short2 short2d,\n"
+    "                      ushort2 ushort2d,\n"
+    "                      int2 int2d,\n"
+    "                      uint2 uint2d,\n"
+    "                      long2 long2d,\n"
+    "                      ulong2 ulong2d,\n"
+    "                      float2 float2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector2_d(const char2 constchar2d,\n"
+    "                            const uchar2 constuchar2d,\n"
+    "                            const short2 constshort2d,\n"
+    "                            const ushort2 constushort2d,\n"
+    "                            const int2 constint2d,\n"
+    "                            const uint2 constuint2d,\n"
+    "                            const long2 constlong2d,\n"
+    "                            const ulong2 constulong2d,\n"
+    "                            const float2 constfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector2_d(private char2 privatechar2d,\n"
+    "                              private uchar2 privateuchar2d,\n"
+    "                              private short2 privateshort2d,\n"
+    "                              private ushort2 privateushort2d,\n"
+    "                              private int2 privateint2d,\n"
+    "                              private uint2 privateuint2d,\n"
+    "                              private long2 privatelong2d,\n"
+    "                              private ulong2 privateulong2d,\n"
+    "                              private float2 privatefloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector2_d(private const char2 privateconstchar2d,\n"
+    "                                    private const uchar2 privateconstuchar2d,\n"
+    "                                    private const short2 privateconstshort2d,\n"
+    "                                    private const ushort2 privateconstushort2d,\n"
+    "                                    private const int2 privateconstint2d,\n"
+    "                                    private const uint2 privateconstuint2d,\n"
+    "                                    private const long2 privateconstlong2d,\n"
+    "                                    private const ulong2 privateconstulong2d,\n"
+    "                                    private const float2 privateconstfloat2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p0(constant char3*constantchar3p,\n"
+    "                               constant uchar3 *constantuchar3p,\n"
+    "                               constant short3* constantshort3p,\n"
+    "                               constant ushort3 * constantushort3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p1(constant int3*constantint3p,\n"
+    "                               constant uint3 *constantuint3p,\n"
+    "                               constant long3* constantlong3p,\n"
+    "                               constant ulong3 * constantulong3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_p2(constant float3*constantfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p0(constant char3 *restrict constantchar3restrictp,\n"
+    "                                        constant uchar3* restrict constantuchar3restrictp,\n"
+    "                                        constant short3 * restrict constantshort3restrictp,\n"
+    "                                        constant ushort3*restrict constantushort3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p1(constant int3 *restrict constantint3restrictp,\n"
+    "                                        constant uint3* restrict constantuint3restrictp,\n"
+    "                                        constant long3 * restrict constantlong3restrictp,\n"
+    "                                        constant ulong3*restrict constantulong3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector3_restrict_p2(constant float3 *restrict constantfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_p(global char3*globalchar3p,\n"
+    "                             global uchar3 *globaluchar3p,\n"
+    "                             global short3* globalshort3p,\n"
+    "                             global ushort3 * globalushort3p,\n"
+    "                             global int3*globalint3p,\n"
+    "                             global uint3 *globaluint3p,\n"
+    "                             global long3* globallong3p,\n"
+    "                             global ulong3 * globalulong3p,\n"
+    "                             global float3*globalfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector3_restrict_p(global char3 *restrict globalchar3restrictp,\n"
+    "                                      global uchar3* restrict globaluchar3restrictp,\n"
+    "                                      global short3 * restrict globalshort3restrictp,\n"
+    "                                      global ushort3*restrict globalushort3restrictp,\n"
+    "                                      global int3 *restrict globalint3restrictp,\n"
+    "                                      global uint3* restrict globaluint3restrictp,\n"
+    "                                      global long3 * restrict globallong3restrictp,\n"
+    "                                      global ulong3*restrict globalulong3restrictp,\n"
+    "                                      global float3 *restrict globalfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_p(global const char3* globalconstchar3p,\n"
+    "                                   global const uchar3 * globalconstuchar3p,\n"
+    "                                   global const short3*globalconstshort3p,\n"
+    "                                   global const ushort3 *globalconstushort3p,\n"
+    "                                   global const int3* globalconstint3p,\n"
+    "                                   global const uint3 * globalconstuint3p,\n"
+    "                                   global const long3*globalconstlong3p,\n"
+    "                                   global const ulong3 *globalconstulong3p,\n"
+    "                                   global const float3* globalconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector3_restrict_p(global const char3 * restrict globalconstchar3restrictp,\n"
+    "                                            global const uchar3*restrict globalconstuchar3restrictp,\n"
+    "                                            global const short3 *restrict globalconstshort3restrictp,\n"
+    "                                            global const ushort3* restrict globalconstushort3restrictp,\n"
+    "                                            global const int3 * restrict globalconstint3restrictp,\n"
+    "                                            global const uint3*restrict globalconstuint3restrictp,\n"
+    "                                            global const long3 *restrict globalconstlong3restrictp,\n"
+    "                                            global const ulong3* restrict globalconstulong3restrictp,\n"
+    "                                            global const float3 * restrict globalconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_p(global volatile char3*globalvolatilechar3p,\n"
+    "                                      global volatile uchar3 *globalvolatileuchar3p,\n"
+    "                                      global volatile short3* globalvolatileshort3p,\n"
+    "                                      global volatile ushort3 * globalvolatileushort3p,\n"
+    "                                      global volatile int3*globalvolatileint3p,\n"
+    "                                      global volatile uint3 *globalvolatileuint3p,\n"
+    "                                      global volatile long3* globalvolatilelong3p,\n"
+    "                                      global volatile ulong3 * globalvolatileulong3p,\n"
+    "                                      global volatile float3*globalvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector3_restrict_p(global volatile char3 *restrict globalvolatilechar3restrictp,\n"
+    "                                               global volatile uchar3* restrict globalvolatileuchar3restrictp,\n"
+    "                                               global volatile short3 * restrict globalvolatileshort3restrictp,\n"
+    "                                               global volatile ushort3*restrict globalvolatileushort3restrictp,\n"
+    "                                               global volatile int3 *restrict globalvolatileint3restrictp,\n"
+    "                                               global volatile uint3* restrict globalvolatileuint3restrictp,\n"
+    "                                               global volatile long3 * restrict globalvolatilelong3restrictp,\n"
+    "                                               global volatile ulong3*restrict globalvolatileulong3restrictp,\n"
+    "                                               global volatile float3 *restrict globalvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_p(global const volatile char3* globalconstvolatilechar3p,\n"
+    "                                            global const volatile uchar3 * globalconstvolatileuchar3p,\n"
+    "                                            global const volatile short3*globalconstvolatileshort3p,\n"
+    "                                            global const volatile ushort3 *globalconstvolatileushort3p,\n"
+    "                                            global const volatile int3* globalconstvolatileint3p,\n"
+    "                                            global const volatile uint3 * globalconstvolatileuint3p,\n"
+    "                                            global const volatile long3*globalconstvolatilelong3p,\n"
+    "                                            global const volatile ulong3 *globalconstvolatileulong3p,\n"
+    "                                            global const volatile float3* globalconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector3_restrict_p(global const volatile char3 * restrict globalconstvolatilechar3restrictp,\n"
+    "                                                     global const volatile uchar3*restrict globalconstvolatileuchar3restrictp,\n"
+    "                                                     global const volatile short3 *restrict globalconstvolatileshort3restrictp,\n"
+    "                                                     global const volatile ushort3* restrict globalconstvolatileushort3restrictp,\n"
+    "                                                     global const volatile int3 * restrict globalconstvolatileint3restrictp,\n"
+    "                                                     global const volatile uint3*restrict globalconstvolatileuint3restrictp,\n"
+    "                                                     global const volatile long3 *restrict globalconstvolatilelong3restrictp,\n"
+    "                                                     global const volatile ulong3* restrict globalconstvolatileulong3restrictp,\n"
+    "                                                     global const volatile float3 * restrict globalconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_p(local char3*localchar3p,\n"
+    "                            local uchar3 *localuchar3p,\n"
+    "                            local short3* localshort3p,\n"
+    "                            local ushort3 * localushort3p,\n"
+    "                            local int3*localint3p,\n"
+    "                            local uint3 *localuint3p,\n"
+    "                            local long3* locallong3p,\n"
+    "                            local ulong3 * localulong3p,\n"
+    "                            local float3*localfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector3_restrict_p(local char3 *restrict localchar3restrictp,\n"
+    "                                     local uchar3* restrict localuchar3restrictp,\n"
+    "                                     local short3 * restrict localshort3restrictp,\n"
+    "                                     local ushort3*restrict localushort3restrictp,\n"
+    "                                     local int3 *restrict localint3restrictp,\n"
+    "                                     local uint3* restrict localuint3restrictp,\n"
+    "                                     local long3 * restrict locallong3restrictp,\n"
+    "                                     local ulong3*restrict localulong3restrictp,\n"
+    "                                     local float3 *restrict localfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_p(local const char3* localconstchar3p,\n"
+    "                                  local const uchar3 * localconstuchar3p,\n"
+    "                                  local const short3*localconstshort3p,\n"
+    "                                  local const ushort3 *localconstushort3p,\n"
+    "                                  local const int3* localconstint3p,\n"
+    "                                  local const uint3 * localconstuint3p,\n"
+    "                                  local const long3*localconstlong3p,\n"
+    "                                  local const ulong3 *localconstulong3p,\n"
+    "                                  local const float3* localconstfloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector3_restrict_p(local const char3 * restrict localconstchar3restrictp,\n"
+    "                                           local const uchar3*restrict localconstuchar3restrictp,\n"
+    "                                           local const short3 *restrict localconstshort3restrictp,\n"
+    "                                           local const ushort3* restrict localconstushort3restrictp,\n"
+    "                                           local const int3 * restrict localconstint3restrictp,\n"
+    "                                           local const uint3*restrict localconstuint3restrictp,\n"
+    "                                           local const long3 *restrict localconstlong3restrictp,\n"
+    "                                           local const ulong3* restrict localconstulong3restrictp,\n"
+    "                                           local const float3 * restrict localconstfloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_p(local volatile char3*localvolatilechar3p,\n"
+    "                                     local volatile uchar3 *localvolatileuchar3p,\n"
+    "                                     local volatile short3* localvolatileshort3p,\n"
+    "                                     local volatile ushort3 * localvolatileushort3p,\n"
+    "                                     local volatile int3*localvolatileint3p,\n"
+    "                                     local volatile uint3 *localvolatileuint3p,\n"
+    "                                     local volatile long3* localvolatilelong3p,\n"
+    "                                     local volatile ulong3 * localvolatileulong3p,\n"
+    "                                     local volatile float3*localvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector3_restrict_p(local volatile char3 *restrict localvolatilechar3restrictp,\n"
+    "                                              local volatile uchar3* restrict localvolatileuchar3restrictp,\n"
+    "                                              local volatile short3 * restrict localvolatileshort3restrictp,\n"
+    "                                              local volatile ushort3*restrict localvolatileushort3restrictp,\n"
+    "                                              local volatile int3 *restrict localvolatileint3restrictp,\n"
+    "                                              local volatile uint3* restrict localvolatileuint3restrictp,\n"
+    "                                              local volatile long3 * restrict localvolatilelong3restrictp,\n"
+    "                                              local volatile ulong3*restrict localvolatileulong3restrictp,\n"
+    "                                              local volatile float3 *restrict localvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_p(local const volatile char3* localconstvolatilechar3p,\n"
+    "                                           local const volatile uchar3 * localconstvolatileuchar3p,\n"
+    "                                           local const volatile short3*localconstvolatileshort3p,\n"
+    "                                           local const volatile ushort3 *localconstvolatileushort3p,\n"
+    "                                           local const volatile int3* localconstvolatileint3p,\n"
+    "                                           local const volatile uint3 * localconstvolatileuint3p,\n"
+    "                                           local const volatile long3*localconstvolatilelong3p,\n"
+    "                                           local const volatile ulong3 *localconstvolatileulong3p,\n"
+    "                                           local const volatile float3* localconstvolatilefloat3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector3_restrict_p(local const volatile char3 * restrict localconstvolatilechar3restrictp,\n"
+    "                                                    local const volatile uchar3*restrict localconstvolatileuchar3restrictp,\n"
+    "                                                    local const volatile short3 *restrict localconstvolatileshort3restrictp,\n"
+    "                                                    local const volatile ushort3* restrict localconstvolatileushort3restrictp,\n"
+    "                                                    local const volatile int3 * restrict localconstvolatileint3restrictp,\n"
+    "                                                    local const volatile uint3*restrict localconstvolatileuint3restrictp,\n"
+    "                                                    local const volatile long3 *restrict localconstvolatilelong3restrictp,\n"
+    "                                                    local const volatile ulong3* restrict localconstvolatileulong3restrictp,\n"
+    "                                                    local const volatile float3 * restrict localconstvolatilefloat3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector3_d(char3 char3d,\n"
+    "                      uchar3 uchar3d,\n"
+    "                      short3 short3d,\n"
+    "                      ushort3 ushort3d,\n"
+    "                      int3 int3d,\n"
+    "                      uint3 uint3d,\n"
+    "                      long3 long3d,\n"
+    "                      ulong3 ulong3d,\n"
+    "                      float3 float3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector3_d(const char3 constchar3d,\n"
+    "                            const uchar3 constuchar3d,\n"
+    "                            const short3 constshort3d,\n"
+    "                            const ushort3 constushort3d,\n"
+    "                            const int3 constint3d,\n"
+    "                            const uint3 constuint3d,\n"
+    "                            const long3 constlong3d,\n"
+    "                            const ulong3 constulong3d,\n"
+    "                            const float3 constfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector3_d(private char3 privatechar3d,\n"
+    "                              private uchar3 privateuchar3d,\n"
+    "                              private short3 privateshort3d,\n"
+    "                              private ushort3 privateushort3d,\n"
+    "                              private int3 privateint3d,\n"
+    "                              private uint3 privateuint3d,\n"
+    "                              private long3 privatelong3d,\n"
+    "                              private ulong3 privateulong3d,\n"
+    "                              private float3 privatefloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector3_d(private const char3 privateconstchar3d,\n"
+    "                                    private const uchar3 privateconstuchar3d,\n"
+    "                                    private const short3 privateconstshort3d,\n"
+    "                                    private const ushort3 privateconstushort3d,\n"
+    "                                    private const int3 privateconstint3d,\n"
+    "                                    private const uint3 privateconstuint3d,\n"
+    "                                    private const long3 privateconstlong3d,\n"
+    "                                    private const ulong3 privateconstulong3d,\n"
+    "                                    private const float3 privateconstfloat3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p0(constant char4*constantchar4p,\n"
+    "                               constant uchar4 *constantuchar4p,\n"
+    "                               constant short4* constantshort4p,\n"
+    "                               constant ushort4 * constantushort4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p1(constant int4*constantint4p,\n"
+    "                               constant uint4 *constantuint4p,\n"
+    "                               constant long4* constantlong4p,\n"
+    "                               constant ulong4 * constantulong4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_p2(constant float4*constantfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p0(constant char4 *restrict constantchar4restrictp,\n"
+    "                                        constant uchar4* restrict constantuchar4restrictp,\n"
+    "                                        constant short4 * restrict constantshort4restrictp,\n"
+    "                                        constant ushort4*restrict constantushort4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p1(constant int4 *restrict constantint4restrictp,\n"
+    "                                        constant uint4* restrict constantuint4restrictp,\n"
+    "                                        constant long4 * restrict constantlong4restrictp,\n"
+    "                                        constant ulong4*restrict constantulong4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector4_restrict_p2(constant float4 *restrict constantfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_p(global char4*globalchar4p,\n"
+    "                             global uchar4 *globaluchar4p,\n"
+    "                             global short4* globalshort4p,\n"
+    "                             global ushort4 * globalushort4p,\n"
+    "                             global int4*globalint4p,\n"
+    "                             global uint4 *globaluint4p,\n"
+    "                             global long4* globallong4p,\n"
+    "                             global ulong4 * globalulong4p,\n"
+    "                             global float4*globalfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector4_restrict_p(global char4 *restrict globalchar4restrictp,\n"
+    "                                      global uchar4* restrict globaluchar4restrictp,\n"
+    "                                      global short4 * restrict globalshort4restrictp,\n"
+    "                                      global ushort4*restrict globalushort4restrictp,\n"
+    "                                      global int4 *restrict globalint4restrictp,\n"
+    "                                      global uint4* restrict globaluint4restrictp,\n"
+    "                                      global long4 * restrict globallong4restrictp,\n"
+    "                                      global ulong4*restrict globalulong4restrictp,\n"
+    "                                      global float4 *restrict globalfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_p(global const char4* globalconstchar4p,\n"
+    "                                   global const uchar4 * globalconstuchar4p,\n"
+    "                                   global const short4*globalconstshort4p,\n"
+    "                                   global const ushort4 *globalconstushort4p,\n"
+    "                                   global const int4* globalconstint4p,\n"
+    "                                   global const uint4 * globalconstuint4p,\n"
+    "                                   global const long4*globalconstlong4p,\n"
+    "                                   global const ulong4 *globalconstulong4p,\n"
+    "                                   global const float4* globalconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector4_restrict_p(global const char4 * restrict globalconstchar4restrictp,\n"
+    "                                            global const uchar4*restrict globalconstuchar4restrictp,\n"
+    "                                            global const short4 *restrict globalconstshort4restrictp,\n"
+    "                                            global const ushort4* restrict globalconstushort4restrictp,\n"
+    "                                            global const int4 * restrict globalconstint4restrictp,\n"
+    "                                            global const uint4*restrict globalconstuint4restrictp,\n"
+    "                                            global const long4 *restrict globalconstlong4restrictp,\n"
+    "                                            global const ulong4* restrict globalconstulong4restrictp,\n"
+    "                                            global const float4 * restrict globalconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_p(global volatile char4*globalvolatilechar4p,\n"
+    "                                      global volatile uchar4 *globalvolatileuchar4p,\n"
+    "                                      global volatile short4* globalvolatileshort4p,\n"
+    "                                      global volatile ushort4 * globalvolatileushort4p,\n"
+    "                                      global volatile int4*globalvolatileint4p,\n"
+    "                                      global volatile uint4 *globalvolatileuint4p,\n"
+    "                                      global volatile long4* globalvolatilelong4p,\n"
+    "                                      global volatile ulong4 * globalvolatileulong4p,\n"
+    "                                      global volatile float4*globalvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector4_restrict_p(global volatile char4 *restrict globalvolatilechar4restrictp,\n"
+    "                                               global volatile uchar4* restrict globalvolatileuchar4restrictp,\n"
+    "                                               global volatile short4 * restrict globalvolatileshort4restrictp,\n"
+    "                                               global volatile ushort4*restrict globalvolatileushort4restrictp,\n"
+    "                                               global volatile int4 *restrict globalvolatileint4restrictp,\n"
+    "                                               global volatile uint4* restrict globalvolatileuint4restrictp,\n"
+    "                                               global volatile long4 * restrict globalvolatilelong4restrictp,\n"
+    "                                               global volatile ulong4*restrict globalvolatileulong4restrictp,\n"
+    "                                               global volatile float4 *restrict globalvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_p(global const volatile char4* globalconstvolatilechar4p,\n"
+    "                                            global const volatile uchar4 * globalconstvolatileuchar4p,\n"
+    "                                            global const volatile short4*globalconstvolatileshort4p,\n"
+    "                                            global const volatile ushort4 *globalconstvolatileushort4p,\n"
+    "                                            global const volatile int4* globalconstvolatileint4p,\n"
+    "                                            global const volatile uint4 * globalconstvolatileuint4p,\n"
+    "                                            global const volatile long4*globalconstvolatilelong4p,\n"
+    "                                            global const volatile ulong4 *globalconstvolatileulong4p,\n"
+    "                                            global const volatile float4* globalconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector4_restrict_p(global const volatile char4 * restrict globalconstvolatilechar4restrictp,\n"
+    "                                                     global const volatile uchar4*restrict globalconstvolatileuchar4restrictp,\n"
+    "                                                     global const volatile short4 *restrict globalconstvolatileshort4restrictp,\n"
+    "                                                     global const volatile ushort4* restrict globalconstvolatileushort4restrictp,\n"
+    "                                                     global const volatile int4 * restrict globalconstvolatileint4restrictp,\n"
+    "                                                     global const volatile uint4*restrict globalconstvolatileuint4restrictp,\n"
+    "                                                     global const volatile long4 *restrict globalconstvolatilelong4restrictp,\n"
+    "                                                     global const volatile ulong4* restrict globalconstvolatileulong4restrictp,\n"
+    "                                                     global const volatile float4 * restrict globalconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_p(local char4*localchar4p,\n"
+    "                            local uchar4 *localuchar4p,\n"
+    "                            local short4* localshort4p,\n"
+    "                            local ushort4 * localushort4p,\n"
+    "                            local int4*localint4p,\n"
+    "                            local uint4 *localuint4p,\n"
+    "                            local long4* locallong4p,\n"
+    "                            local ulong4 * localulong4p,\n"
+    "                            local float4*localfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector4_restrict_p(local char4 *restrict localchar4restrictp,\n"
+    "                                     local uchar4* restrict localuchar4restrictp,\n"
+    "                                     local short4 * restrict localshort4restrictp,\n"
+    "                                     local ushort4*restrict localushort4restrictp,\n"
+    "                                     local int4 *restrict localint4restrictp,\n"
+    "                                     local uint4* restrict localuint4restrictp,\n"
+    "                                     local long4 * restrict locallong4restrictp,\n"
+    "                                     local ulong4*restrict localulong4restrictp,\n"
+    "                                     local float4 *restrict localfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_p(local const char4* localconstchar4p,\n"
+    "                                  local const uchar4 * localconstuchar4p,\n"
+    "                                  local const short4*localconstshort4p,\n"
+    "                                  local const ushort4 *localconstushort4p,\n"
+    "                                  local const int4* localconstint4p,\n"
+    "                                  local const uint4 * localconstuint4p,\n"
+    "                                  local const long4*localconstlong4p,\n"
+    "                                  local const ulong4 *localconstulong4p,\n"
+    "                                  local const float4* localconstfloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector4_restrict_p(local const char4 * restrict localconstchar4restrictp,\n"
+    "                                           local const uchar4*restrict localconstuchar4restrictp,\n"
+    "                                           local const short4 *restrict localconstshort4restrictp,\n"
+    "                                           local const ushort4* restrict localconstushort4restrictp,\n"
+    "                                           local const int4 * restrict localconstint4restrictp,\n"
+    "                                           local const uint4*restrict localconstuint4restrictp,\n"
+    "                                           local const long4 *restrict localconstlong4restrictp,\n"
+    "                                           local const ulong4* restrict localconstulong4restrictp,\n"
+    "                                           local const float4 * restrict localconstfloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_p(local volatile char4*localvolatilechar4p,\n"
+    "                                     local volatile uchar4 *localvolatileuchar4p,\n"
+    "                                     local volatile short4* localvolatileshort4p,\n"
+    "                                     local volatile ushort4 * localvolatileushort4p,\n"
+    "                                     local volatile int4*localvolatileint4p,\n"
+    "                                     local volatile uint4 *localvolatileuint4p,\n"
+    "                                     local volatile long4* localvolatilelong4p,\n"
+    "                                     local volatile ulong4 * localvolatileulong4p,\n"
+    "                                     local volatile float4*localvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector4_restrict_p(local volatile char4 *restrict localvolatilechar4restrictp,\n"
+    "                                              local volatile uchar4* restrict localvolatileuchar4restrictp,\n"
+    "                                              local volatile short4 * restrict localvolatileshort4restrictp,\n"
+    "                                              local volatile ushort4*restrict localvolatileushort4restrictp,\n"
+    "                                              local volatile int4 *restrict localvolatileint4restrictp,\n"
+    "                                              local volatile uint4* restrict localvolatileuint4restrictp,\n"
+    "                                              local volatile long4 * restrict localvolatilelong4restrictp,\n"
+    "                                              local volatile ulong4*restrict localvolatileulong4restrictp,\n"
+    "                                              local volatile float4 *restrict localvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_p(local const volatile char4* localconstvolatilechar4p,\n"
+    "                                           local const volatile uchar4 * localconstvolatileuchar4p,\n"
+    "                                           local const volatile short4*localconstvolatileshort4p,\n"
+    "                                           local const volatile ushort4 *localconstvolatileushort4p,\n"
+    "                                           local const volatile int4* localconstvolatileint4p,\n"
+    "                                           local const volatile uint4 * localconstvolatileuint4p,\n"
+    "                                           local const volatile long4*localconstvolatilelong4p,\n"
+    "                                           local const volatile ulong4 *localconstvolatileulong4p,\n"
+    "                                           local const volatile float4* localconstvolatilefloat4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector4_restrict_p(local const volatile char4 * restrict localconstvolatilechar4restrictp,\n"
+    "                                                    local const volatile uchar4*restrict localconstvolatileuchar4restrictp,\n"
+    "                                                    local const volatile short4 *restrict localconstvolatileshort4restrictp,\n"
+    "                                                    local const volatile ushort4* restrict localconstvolatileushort4restrictp,\n"
+    "                                                    local const volatile int4 * restrict localconstvolatileint4restrictp,\n"
+    "                                                    local const volatile uint4*restrict localconstvolatileuint4restrictp,\n"
+    "                                                    local const volatile long4 *restrict localconstvolatilelong4restrictp,\n"
+    "                                                    local const volatile ulong4* restrict localconstvolatileulong4restrictp,\n"
+    "                                                    local const volatile float4 * restrict localconstvolatilefloat4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector4_d(char4 char4d,\n"
+    "                      uchar4 uchar4d,\n"
+    "                      short4 short4d,\n"
+    "                      ushort4 ushort4d,\n"
+    "                      int4 int4d,\n"
+    "                      uint4 uint4d,\n"
+    "                      long4 long4d,\n"
+    "                      ulong4 ulong4d,\n"
+    "                      float4 float4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector4_d(const char4 constchar4d,\n"
+    "                            const uchar4 constuchar4d,\n"
+    "                            const short4 constshort4d,\n"
+    "                            const ushort4 constushort4d,\n"
+    "                            const int4 constint4d,\n"
+    "                            const uint4 constuint4d,\n"
+    "                            const long4 constlong4d,\n"
+    "                            const ulong4 constulong4d,\n"
+    "                            const float4 constfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector4_d(private char4 privatechar4d,\n"
+    "                              private uchar4 privateuchar4d,\n"
+    "                              private short4 privateshort4d,\n"
+    "                              private ushort4 privateushort4d,\n"
+    "                              private int4 privateint4d,\n"
+    "                              private uint4 privateuint4d,\n"
+    "                              private long4 privatelong4d,\n"
+    "                              private ulong4 privateulong4d,\n"
+    "                              private float4 privatefloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector4_d(private const char4 privateconstchar4d,\n"
+    "                                    private const uchar4 privateconstuchar4d,\n"
+    "                                    private const short4 privateconstshort4d,\n"
+    "                                    private const ushort4 privateconstushort4d,\n"
+    "                                    private const int4 privateconstint4d,\n"
+    "                                    private const uint4 privateconstuint4d,\n"
+    "                                    private const long4 privateconstlong4d,\n"
+    "                                    private const ulong4 privateconstulong4d,\n"
+    "                                    private const float4 privateconstfloat4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p0(constant char8*constantchar8p,\n"
+    "                               constant uchar8 *constantuchar8p,\n"
+    "                               constant short8* constantshort8p,\n"
+    "                               constant ushort8 * constantushort8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p1(constant int8*constantint8p,\n"
+    "                               constant uint8 *constantuint8p,\n"
+    "                               constant long8* constantlong8p,\n"
+    "                               constant ulong8 * constantulong8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_p2(constant float8*constantfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p0(constant char8 *restrict constantchar8restrictp,\n"
+    "                                        constant uchar8* restrict constantuchar8restrictp,\n"
+    "                                        constant short8 * restrict constantshort8restrictp,\n"
+    "                                        constant ushort8*restrict constantushort8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p1(constant int8 *restrict constantint8restrictp,\n"
+    "                                        constant uint8* restrict constantuint8restrictp,\n"
+    "                                        constant long8 * restrict constantlong8restrictp,\n"
+    "                                        constant ulong8*restrict constantulong8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector8_restrict_p2(constant float8 *restrict constantfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_p(global char8*globalchar8p,\n"
+    "                             global uchar8 *globaluchar8p,\n"
+    "                             global short8* globalshort8p,\n"
+    "                             global ushort8 * globalushort8p,\n"
+    "                             global int8*globalint8p,\n"
+    "                             global uint8 *globaluint8p,\n"
+    "                             global long8* globallong8p,\n"
+    "                             global ulong8 * globalulong8p,\n"
+    "                             global float8*globalfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector8_restrict_p(global char8 *restrict globalchar8restrictp,\n"
+    "                                      global uchar8* restrict globaluchar8restrictp,\n"
+    "                                      global short8 * restrict globalshort8restrictp,\n"
+    "                                      global ushort8*restrict globalushort8restrictp,\n"
+    "                                      global int8 *restrict globalint8restrictp,\n"
+    "                                      global uint8* restrict globaluint8restrictp,\n"
+    "                                      global long8 * restrict globallong8restrictp,\n"
+    "                                      global ulong8*restrict globalulong8restrictp,\n"
+    "                                      global float8 *restrict globalfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_p(global const char8* globalconstchar8p,\n"
+    "                                   global const uchar8 * globalconstuchar8p,\n"
+    "                                   global const short8*globalconstshort8p,\n"
+    "                                   global const ushort8 *globalconstushort8p,\n"
+    "                                   global const int8* globalconstint8p,\n"
+    "                                   global const uint8 * globalconstuint8p,\n"
+    "                                   global const long8*globalconstlong8p,\n"
+    "                                   global const ulong8 *globalconstulong8p,\n"
+    "                                   global const float8* globalconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector8_restrict_p(global const char8 * restrict globalconstchar8restrictp,\n"
+    "                                            global const uchar8*restrict globalconstuchar8restrictp,\n"
+    "                                            global const short8 *restrict globalconstshort8restrictp,\n"
+    "                                            global const ushort8* restrict globalconstushort8restrictp,\n"
+    "                                            global const int8 * restrict globalconstint8restrictp,\n"
+    "                                            global const uint8*restrict globalconstuint8restrictp,\n"
+    "                                            global const long8 *restrict globalconstlong8restrictp,\n"
+    "                                            global const ulong8* restrict globalconstulong8restrictp,\n"
+    "                                            global const float8 * restrict globalconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_p(global volatile char8*globalvolatilechar8p,\n"
+    "                                      global volatile uchar8 *globalvolatileuchar8p,\n"
+    "                                      global volatile short8* globalvolatileshort8p,\n"
+    "                                      global volatile ushort8 * globalvolatileushort8p,\n"
+    "                                      global volatile int8*globalvolatileint8p,\n"
+    "                                      global volatile uint8 *globalvolatileuint8p,\n"
+    "                                      global volatile long8* globalvolatilelong8p,\n"
+    "                                      global volatile ulong8 * globalvolatileulong8p,\n"
+    "                                      global volatile float8*globalvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector8_restrict_p(global volatile char8 *restrict globalvolatilechar8restrictp,\n"
+    "                                               global volatile uchar8* restrict globalvolatileuchar8restrictp,\n"
+    "                                               global volatile short8 * restrict globalvolatileshort8restrictp,\n"
+    "                                               global volatile ushort8*restrict globalvolatileushort8restrictp,\n"
+    "                                               global volatile int8 *restrict globalvolatileint8restrictp,\n"
+    "                                               global volatile uint8* restrict globalvolatileuint8restrictp,\n"
+    "                                               global volatile long8 * restrict globalvolatilelong8restrictp,\n"
+    "                                               global volatile ulong8*restrict globalvolatileulong8restrictp,\n"
+    "                                               global volatile float8 *restrict globalvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_p(global const volatile char8* globalconstvolatilechar8p,\n"
+    "                                            global const volatile uchar8 * globalconstvolatileuchar8p,\n"
+    "                                            global const volatile short8*globalconstvolatileshort8p,\n"
+    "                                            global const volatile ushort8 *globalconstvolatileushort8p,\n"
+    "                                            global const volatile int8* globalconstvolatileint8p,\n"
+    "                                            global const volatile uint8 * globalconstvolatileuint8p,\n"
+    "                                            global const volatile long8*globalconstvolatilelong8p,\n"
+    "                                            global const volatile ulong8 *globalconstvolatileulong8p,\n"
+    "                                            global const volatile float8* globalconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector8_restrict_p(global const volatile char8 * restrict globalconstvolatilechar8restrictp,\n"
+    "                                                     global const volatile uchar8*restrict globalconstvolatileuchar8restrictp,\n"
+    "                                                     global const volatile short8 *restrict globalconstvolatileshort8restrictp,\n"
+    "                                                     global const volatile ushort8* restrict globalconstvolatileushort8restrictp,\n"
+    "                                                     global const volatile int8 * restrict globalconstvolatileint8restrictp,\n"
+    "                                                     global const volatile uint8*restrict globalconstvolatileuint8restrictp,\n"
+    "                                                     global const volatile long8 *restrict globalconstvolatilelong8restrictp,\n"
+    "                                                     global const volatile ulong8* restrict globalconstvolatileulong8restrictp,\n"
+    "                                                     global const volatile float8 * restrict globalconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_p(local char8*localchar8p,\n"
+    "                            local uchar8 *localuchar8p,\n"
+    "                            local short8* localshort8p,\n"
+    "                            local ushort8 * localushort8p,\n"
+    "                            local int8*localint8p,\n"
+    "                            local uint8 *localuint8p,\n"
+    "                            local long8* locallong8p,\n"
+    "                            local ulong8 * localulong8p,\n"
+    "                            local float8*localfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector8_restrict_p(local char8 *restrict localchar8restrictp,\n"
+    "                                     local uchar8* restrict localuchar8restrictp,\n"
+    "                                     local short8 * restrict localshort8restrictp,\n"
+    "                                     local ushort8*restrict localushort8restrictp,\n"
+    "                                     local int8 *restrict localint8restrictp,\n"
+    "                                     local uint8* restrict localuint8restrictp,\n"
+    "                                     local long8 * restrict locallong8restrictp,\n"
+    "                                     local ulong8*restrict localulong8restrictp,\n"
+    "                                     local float8 *restrict localfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_p(local const char8* localconstchar8p,\n"
+    "                                  local const uchar8 * localconstuchar8p,\n"
+    "                                  local const short8*localconstshort8p,\n"
+    "                                  local const ushort8 *localconstushort8p,\n"
+    "                                  local const int8* localconstint8p,\n"
+    "                                  local const uint8 * localconstuint8p,\n"
+    "                                  local const long8*localconstlong8p,\n"
+    "                                  local const ulong8 *localconstulong8p,\n"
+    "                                  local const float8* localconstfloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector8_restrict_p(local const char8 * restrict localconstchar8restrictp,\n"
+    "                                           local const uchar8*restrict localconstuchar8restrictp,\n"
+    "                                           local const short8 *restrict localconstshort8restrictp,\n"
+    "                                           local const ushort8* restrict localconstushort8restrictp,\n"
+    "                                           local const int8 * restrict localconstint8restrictp,\n"
+    "                                           local const uint8*restrict localconstuint8restrictp,\n"
+    "                                           local const long8 *restrict localconstlong8restrictp,\n"
+    "                                           local const ulong8* restrict localconstulong8restrictp,\n"
+    "                                           local const float8 * restrict localconstfloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_p(local volatile char8*localvolatilechar8p,\n"
+    "                                     local volatile uchar8 *localvolatileuchar8p,\n"
+    "                                     local volatile short8* localvolatileshort8p,\n"
+    "                                     local volatile ushort8 * localvolatileushort8p,\n"
+    "                                     local volatile int8*localvolatileint8p,\n"
+    "                                     local volatile uint8 *localvolatileuint8p,\n"
+    "                                     local volatile long8* localvolatilelong8p,\n"
+    "                                     local volatile ulong8 * localvolatileulong8p,\n"
+    "                                     local volatile float8*localvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector8_restrict_p(local volatile char8 *restrict localvolatilechar8restrictp,\n"
+    "                                              local volatile uchar8* restrict localvolatileuchar8restrictp,\n"
+    "                                              local volatile short8 * restrict localvolatileshort8restrictp,\n"
+    "                                              local volatile ushort8*restrict localvolatileushort8restrictp,\n"
+    "                                              local volatile int8 *restrict localvolatileint8restrictp,\n"
+    "                                              local volatile uint8* restrict localvolatileuint8restrictp,\n"
+    "                                              local volatile long8 * restrict localvolatilelong8restrictp,\n"
+    "                                              local volatile ulong8*restrict localvolatileulong8restrictp,\n"
+    "                                              local volatile float8 *restrict localvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_p(local const volatile char8* localconstvolatilechar8p,\n"
+    "                                           local const volatile uchar8 * localconstvolatileuchar8p,\n"
+    "                                           local const volatile short8*localconstvolatileshort8p,\n"
+    "                                           local const volatile ushort8 *localconstvolatileushort8p,\n"
+    "                                           local const volatile int8* localconstvolatileint8p,\n"
+    "                                           local const volatile uint8 * localconstvolatileuint8p,\n"
+    "                                           local const volatile long8*localconstvolatilelong8p,\n"
+    "                                           local const volatile ulong8 *localconstvolatileulong8p,\n"
+    "                                           local const volatile float8* localconstvolatilefloat8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector8_restrict_p(local const volatile char8 * restrict localconstvolatilechar8restrictp,\n"
+    "                                                    local const volatile uchar8*restrict localconstvolatileuchar8restrictp,\n"
+    "                                                    local const volatile short8 *restrict localconstvolatileshort8restrictp,\n"
+    "                                                    local const volatile ushort8* restrict localconstvolatileushort8restrictp,\n"
+    "                                                    local const volatile int8 * restrict localconstvolatileint8restrictp,\n"
+    "                                                    local const volatile uint8*restrict localconstvolatileuint8restrictp,\n"
+    "                                                    local const volatile long8 *restrict localconstvolatilelong8restrictp,\n"
+    "                                                    local const volatile ulong8* restrict localconstvolatileulong8restrictp,\n"
+    "                                                    local const volatile float8 * restrict localconstvolatilefloat8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector8_d(char8 char8d,\n"
+    "                      uchar8 uchar8d,\n"
+    "                      short8 short8d,\n"
+    "                      ushort8 ushort8d,\n"
+    "                      int8 int8d,\n"
+    "                      uint8 uint8d,\n"
+    "                      long8 long8d,\n"
+    "                      ulong8 ulong8d,\n"
+    "                      float8 float8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector8_d(const char8 constchar8d,\n"
+    "                            const uchar8 constuchar8d,\n"
+    "                            const short8 constshort8d,\n"
+    "                            const ushort8 constushort8d,\n"
+    "                            const int8 constint8d,\n"
+    "                            const uint8 constuint8d,\n"
+    "                            const long8 constlong8d,\n"
+    "                            const ulong8 constulong8d,\n"
+    "                            const float8 constfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector8_d(private char8 privatechar8d,\n"
+    "                              private uchar8 privateuchar8d,\n"
+    "                              private short8 privateshort8d,\n"
+    "                              private ushort8 privateushort8d,\n"
+    "                              private int8 privateint8d,\n"
+    "                              private uint8 privateuint8d,\n"
+    "                              private long8 privatelong8d,\n"
+    "                              private ulong8 privateulong8d,\n"
+    "                              private float8 privatefloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector8_d(private const char8 privateconstchar8d,\n"
+    "                                    private const uchar8 privateconstuchar8d,\n"
+    "                                    private const short8 privateconstshort8d,\n"
+    "                                    private const ushort8 privateconstushort8d,\n"
+    "                                    private const int8 privateconstint8d,\n"
+    "                                    private const uint8 privateconstuint8d,\n"
+    "                                    private const long8 privateconstlong8d,\n"
+    "                                    private const ulong8 privateconstulong8d,\n"
+    "                                    private const float8 privateconstfloat8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p0(constant char16*constantchar16p,\n"
+    "                                constant uchar16 *constantuchar16p,\n"
+    "                                constant short16* constantshort16p,\n"
+    "                                constant ushort16 * constantushort16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p1(constant int16*constantint16p,\n"
+    "                                constant uint16 *constantuint16p,\n"
+    "                                constant long16* constantlong16p,\n"
+    "                                constant ulong16 * constantulong16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_p2(constant float16*constantfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p0(constant char16 *restrict constantchar16restrictp,\n"
+    "                                         constant uchar16* restrict constantuchar16restrictp,\n"
+    "                                         constant short16 * restrict constantshort16restrictp,\n"
+    "                                         constant ushort16*restrict constantushort16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p1(constant int16 *restrict constantint16restrictp,\n"
+    "                                         constant uint16* restrict constantuint16restrictp,\n"
+    "                                         constant long16 * restrict constantlong16restrictp,\n"
+    "                                         constant ulong16*restrict constantulong16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_vector16_restrict_p2(constant float16 *restrict constantfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_p(global char16*globalchar16p,\n"
+    "                              global uchar16 *globaluchar16p,\n"
+    "                              global short16* globalshort16p,\n"
+    "                              global ushort16 * globalushort16p,\n"
+    "                              global int16*globalint16p,\n"
+    "                              global uint16 *globaluint16p,\n"
+    "                              global long16* globallong16p,\n"
+    "                              global ulong16 * globalulong16p,\n"
+    "                              global float16*globalfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_vector16_restrict_p(global char16 *restrict globalchar16restrictp,\n"
+    "                                       global uchar16* restrict globaluchar16restrictp,\n"
+    "                                       global short16 * restrict globalshort16restrictp,\n"
+    "                                       global ushort16*restrict globalushort16restrictp,\n"
+    "                                       global int16 *restrict globalint16restrictp,\n"
+    "                                       global uint16* restrict globaluint16restrictp,\n"
+    "                                       global long16 * restrict globallong16restrictp,\n"
+    "                                       global ulong16*restrict globalulong16restrictp,\n"
+    "                                       global float16 *restrict globalfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_p(global const char16* globalconstchar16p,\n"
+    "                                    global const uchar16 * globalconstuchar16p,\n"
+    "                                    global const short16*globalconstshort16p,\n"
+    "                                    global const ushort16 *globalconstushort16p,\n"
+    "                                    global const int16* globalconstint16p,\n"
+    "                                    global const uint16 * globalconstuint16p,\n"
+    "                                    global const long16*globalconstlong16p,\n"
+    "                                    global const ulong16 *globalconstulong16p,\n"
+    "                                    global const float16* globalconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_vector16_restrict_p(global const char16 * restrict globalconstchar16restrictp,\n"
+    "                                             global const uchar16*restrict globalconstuchar16restrictp,\n"
+    "                                             global const short16 *restrict globalconstshort16restrictp,\n"
+    "                                             global const ushort16* restrict globalconstushort16restrictp,\n"
+    "                                             global const int16 * restrict globalconstint16restrictp,\n"
+    "                                             global const uint16*restrict globalconstuint16restrictp,\n"
+    "                                             global const long16 *restrict globalconstlong16restrictp,\n"
+    "                                             global const ulong16* restrict globalconstulong16restrictp,\n"
+    "                                             global const float16 * restrict globalconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_p(global volatile char16*globalvolatilechar16p,\n"
+    "                                       global volatile uchar16 *globalvolatileuchar16p,\n"
+    "                                       global volatile short16* globalvolatileshort16p,\n"
+    "                                       global volatile ushort16 * globalvolatileushort16p,\n"
+    "                                       global volatile int16*globalvolatileint16p,\n"
+    "                                       global volatile uint16 *globalvolatileuint16p,\n"
+    "                                       global volatile long16* globalvolatilelong16p,\n"
+    "                                       global volatile ulong16 * globalvolatileulong16p,\n"
+    "                                       global volatile float16*globalvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_vector16_restrict_p(global volatile char16 *restrict globalvolatilechar16restrictp,\n"
+    "                                                global volatile uchar16* restrict globalvolatileuchar16restrictp,\n"
+    "                                                global volatile short16 * restrict globalvolatileshort16restrictp,\n"
+    "                                                global volatile ushort16*restrict globalvolatileushort16restrictp,\n"
+    "                                                global volatile int16 *restrict globalvolatileint16restrictp,\n"
+    "                                                global volatile uint16* restrict globalvolatileuint16restrictp,\n"
+    "                                                global volatile long16 * restrict globalvolatilelong16restrictp,\n"
+    "                                                global volatile ulong16*restrict globalvolatileulong16restrictp,\n"
+    "                                                global volatile float16 *restrict globalvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_p(global const volatile char16* globalconstvolatilechar16p,\n"
+    "                                             global const volatile uchar16 * globalconstvolatileuchar16p,\n"
+    "                                             global const volatile short16*globalconstvolatileshort16p,\n"
+    "                                             global const volatile ushort16 *globalconstvolatileushort16p,\n"
+    "                                             global const volatile int16* globalconstvolatileint16p,\n"
+    "                                             global const volatile uint16 * globalconstvolatileuint16p,\n"
+    "                                             global const volatile long16*globalconstvolatilelong16p,\n"
+    "                                             global const volatile ulong16 *globalconstvolatileulong16p,\n"
+    "                                             global const volatile float16* globalconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_vector16_restrict_p(global const volatile char16 * restrict globalconstvolatilechar16restrictp,\n"
+    "                                                      global const volatile uchar16*restrict globalconstvolatileuchar16restrictp,\n"
+    "                                                      global const volatile short16 *restrict globalconstvolatileshort16restrictp,\n"
+    "                                                      global const volatile ushort16* restrict globalconstvolatileushort16restrictp,\n"
+    "                                                      global const volatile int16 * restrict globalconstvolatileint16restrictp,\n"
+    "                                                      global const volatile uint16*restrict globalconstvolatileuint16restrictp,\n"
+    "                                                      global const volatile long16 *restrict globalconstvolatilelong16restrictp,\n"
+    "                                                      global const volatile ulong16* restrict globalconstvolatileulong16restrictp,\n"
+    "                                                      global const volatile float16 * restrict globalconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_p(local char16*localchar16p,\n"
+    "                             local uchar16 *localuchar16p,\n"
+    "                             local short16* localshort16p,\n"
+    "                             local ushort16 * localushort16p,\n"
+    "                             local int16*localint16p,\n"
+    "                             local uint16 *localuint16p,\n"
+    "                             local long16* locallong16p,\n"
+    "                             local ulong16 * localulong16p,\n"
+    "                             local float16*localfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_vector16_restrict_p(local char16 *restrict localchar16restrictp,\n"
+    "                                      local uchar16* restrict localuchar16restrictp,\n"
+    "                                      local short16 * restrict localshort16restrictp,\n"
+    "                                      local ushort16*restrict localushort16restrictp,\n"
+    "                                      local int16 *restrict localint16restrictp,\n"
+    "                                      local uint16* restrict localuint16restrictp,\n"
+    "                                      local long16 * restrict locallong16restrictp,\n"
+    "                                      local ulong16*restrict localulong16restrictp,\n"
+    "                                      local float16 *restrict localfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_p(local const char16* localconstchar16p,\n"
+    "                                   local const uchar16 * localconstuchar16p,\n"
+    "                                   local const short16*localconstshort16p,\n"
+    "                                   local const ushort16 *localconstushort16p,\n"
+    "                                   local const int16* localconstint16p,\n"
+    "                                   local const uint16 * localconstuint16p,\n"
+    "                                   local const long16*localconstlong16p,\n"
+    "                                   local const ulong16 *localconstulong16p,\n"
+    "                                   local const float16* localconstfloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_vector16_restrict_p(local const char16 * restrict localconstchar16restrictp,\n"
+    "                                            local const uchar16*restrict localconstuchar16restrictp,\n"
+    "                                            local const short16 *restrict localconstshort16restrictp,\n"
+    "                                            local const ushort16* restrict localconstushort16restrictp,\n"
+    "                                            local const int16 * restrict localconstint16restrictp,\n"
+    "                                            local const uint16*restrict localconstuint16restrictp,\n"
+    "                                            local const long16 *restrict localconstlong16restrictp,\n"
+    "                                            local const ulong16* restrict localconstulong16restrictp,\n"
+    "                                            local const float16 * restrict localconstfloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_p(local volatile char16*localvolatilechar16p,\n"
+    "                                      local volatile uchar16 *localvolatileuchar16p,\n"
+    "                                      local volatile short16* localvolatileshort16p,\n"
+    "                                      local volatile ushort16 * localvolatileushort16p,\n"
+    "                                      local volatile int16*localvolatileint16p,\n"
+    "                                      local volatile uint16 *localvolatileuint16p,\n"
+    "                                      local volatile long16* localvolatilelong16p,\n"
+    "                                      local volatile ulong16 * localvolatileulong16p,\n"
+    "                                      local volatile float16*localvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_vector16_restrict_p(local volatile char16 *restrict localvolatilechar16restrictp,\n"
+    "                                               local volatile uchar16* restrict localvolatileuchar16restrictp,\n"
+    "                                               local volatile short16 * restrict localvolatileshort16restrictp,\n"
+    "                                               local volatile ushort16*restrict localvolatileushort16restrictp,\n"
+    "                                               local volatile int16 *restrict localvolatileint16restrictp,\n"
+    "                                               local volatile uint16* restrict localvolatileuint16restrictp,\n"
+    "                                               local volatile long16 * restrict localvolatilelong16restrictp,\n"
+    "                                               local volatile ulong16*restrict localvolatileulong16restrictp,\n"
+    "                                               local volatile float16 *restrict localvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_p(local const volatile char16* localconstvolatilechar16p,\n"
+    "                                            local const volatile uchar16 * localconstvolatileuchar16p,\n"
+    "                                            local const volatile short16*localconstvolatileshort16p,\n"
+    "                                            local const volatile ushort16 *localconstvolatileushort16p,\n"
+    "                                            local const volatile int16* localconstvolatileint16p,\n"
+    "                                            local const volatile uint16 * localconstvolatileuint16p,\n"
+    "                                            local const volatile long16*localconstvolatilelong16p,\n"
+    "                                            local const volatile ulong16 *localconstvolatileulong16p,\n"
+    "                                            local const volatile float16* localconstvolatilefloat16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_vector16_restrict_p(local const volatile char16 * restrict localconstvolatilechar16restrictp,\n"
+    "                                                     local const volatile uchar16*restrict localconstvolatileuchar16restrictp,\n"
+    "                                                     local const volatile short16 *restrict localconstvolatileshort16restrictp,\n"
+    "                                                     local const volatile ushort16* restrict localconstvolatileushort16restrictp,\n"
+    "                                                     local const volatile int16 * restrict localconstvolatileint16restrictp,\n"
+    "                                                     local const volatile uint16*restrict localconstvolatileuint16restrictp,\n"
+    "                                                     local const volatile long16 *restrict localconstvolatilelong16restrictp,\n"
+    "                                                     local const volatile ulong16* restrict localconstvolatileulong16restrictp,\n"
+    "                                                     local const volatile float16 * restrict localconstvolatilefloat16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void vector16_d(char16 char16d,\n"
+    "                       uchar16 uchar16d,\n"
+    "                       short16 short16d,\n"
+    "                       ushort16 ushort16d,\n"
+    "                       int16 int16d,\n"
+    "                       uint16 uint16d,\n"
+    "                       long16 long16d,\n"
+    "                       ulong16 ulong16d,\n"
+    "                       float16 float16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_vector16_d(const char16 constchar16d,\n"
+    "                             const uchar16 constuchar16d,\n"
+    "                             const short16 constshort16d,\n"
+    "                             const ushort16 constushort16d,\n"
+    "                             const int16 constint16d,\n"
+    "                             const uint16 constuint16d,\n"
+    "                             const long16 constlong16d,\n"
+    "                             const ulong16 constulong16d,\n"
+    "                             const float16 constfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_vector16_d(private char16 privatechar16d,\n"
+    "                               private uchar16 privateuchar16d,\n"
+    "                               private short16 privateshort16d,\n"
+    "                               private ushort16 privateushort16d,\n"
+    "                               private int16 privateint16d,\n"
+    "                               private uint16 privateuint16d,\n"
+    "                               private long16 privatelong16d,\n"
+    "                               private ulong16 privateulong16d,\n"
+    "                               private float16 privatefloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_vector16_d(private const char16 privateconstchar16d,\n"
+    "                                     private const uchar16 privateconstuchar16d,\n"
+    "                                     private const short16 privateconstshort16d,\n"
+    "                                     private const ushort16 privateconstushort16d,\n"
+    "                                     private const int16 privateconstint16d,\n"
+    "                                     private const uint16 privateconstuint16d,\n"
+    "                                     private const long16 privateconstlong16d,\n"
+    "                                     private const ulong16 privateconstulong16d,\n"
+    "                                     private const float16 privateconstfloat16d)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p0(constant typedef_type*constanttypedef_typep,\n"
+    "                               constant struct struct_type *constantstructstruct_typep,\n"
+    "                               constant typedef_struct_type* constanttypedef_struct_typep,\n"
+    "                               constant union union_type * constantunionunion_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_p1(constant typedef_union_type*constanttypedef_union_typep,\n"
+    "                               constant enum enum_type *constantenumenum_typep,\n"
+    "                               constant typedef_enum_type* constanttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p0(constant typedef_type * restrict constanttypedef_typerestrictp,\n"
+    "                                        constant struct struct_type*restrict constantstructstruct_typerestrictp,\n"
+    "                                        constant typedef_struct_type *restrict constanttypedef_struct_typerestrictp,\n"
+    "                                        constant union union_type* restrict constantunionunion_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void constant_derived_restrict_p1(constant typedef_union_type * restrict constanttypedef_union_typerestrictp,\n"
+    "                                        constant enum enum_type*restrict constantenumenum_typerestrictp,\n"
+    "                                        constant typedef_enum_type *restrict constanttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_p(global typedef_type*globaltypedef_typep,\n"
+    "                             global struct struct_type *globalstructstruct_typep,\n"
+    "                             global typedef_struct_type* globaltypedef_struct_typep,\n"
+    "                             global union union_type * globalunionunion_typep,\n"
+    "                             global typedef_union_type*globaltypedef_union_typep,\n"
+    "                             global enum enum_type *globalenumenum_typep,\n"
+    "                             global typedef_enum_type* globaltypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_derived_restrict_p(global typedef_type * restrict globaltypedef_typerestrictp,\n"
+    "                                      global struct struct_type*restrict globalstructstruct_typerestrictp,\n"
+    "                                      global typedef_struct_type *restrict globaltypedef_struct_typerestrictp,\n"
+    "                                      global union union_type* restrict globalunionunion_typerestrictp,\n"
+    "                                      global typedef_union_type * restrict globaltypedef_union_typerestrictp,\n"
+    "                                      global enum enum_type*restrict globalenumenum_typerestrictp,\n"
+    "                                      global typedef_enum_type *restrict globaltypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_p(global const typedef_type* globalconsttypedef_typep,\n"
+    "                                   global const struct struct_type * globalconststructstruct_typep,\n"
+    "                                   global const typedef_struct_type*globalconsttypedef_struct_typep,\n"
+    "                                   global const union union_type *globalconstunionunion_typep,\n"
+    "                                   global const typedef_union_type* globalconsttypedef_union_typep,\n"
+    "                                   global const enum enum_type * globalconstenumenum_typep,\n"
+    "                                   global const typedef_enum_type*globalconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_derived_restrict_p(global const typedef_type *restrict globalconsttypedef_typerestrictp,\n"
+    "                                            global const struct struct_type* restrict globalconststructstruct_typerestrictp,\n"
+    "                                            global const typedef_struct_type * restrict globalconsttypedef_struct_typerestrictp,\n"
+    "                                            global const union union_type*restrict globalconstunionunion_typerestrictp,\n"
+    "                                            global const typedef_union_type *restrict globalconsttypedef_union_typerestrictp,\n"
+    "                                            global const enum enum_type* restrict globalconstenumenum_typerestrictp,\n"
+    "                                            global const typedef_enum_type * restrict globalconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_p(global volatile typedef_type*globalvolatiletypedef_typep,\n"
+    "                                      global volatile struct struct_type *globalvolatilestructstruct_typep,\n"
+    "                                      global volatile typedef_struct_type* globalvolatiletypedef_struct_typep,\n"
+    "                                      global volatile union union_type * globalvolatileunionunion_typep,\n"
+    "                                      global volatile typedef_union_type*globalvolatiletypedef_union_typep,\n"
+    "                                      global volatile enum enum_type *globalvolatileenumenum_typep,\n"
+    "                                      global volatile typedef_enum_type* globalvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_volatile_derived_restrict_p(global volatile typedef_type * restrict globalvolatiletypedef_typerestrictp,\n"
+    "                                               global volatile struct struct_type*restrict globalvolatilestructstruct_typerestrictp,\n"
+    "                                               global volatile typedef_struct_type *restrict globalvolatiletypedef_struct_typerestrictp,\n"
+    "                                               global volatile union union_type* restrict globalvolatileunionunion_typerestrictp,\n"
+    "                                               global volatile typedef_union_type * restrict globalvolatiletypedef_union_typerestrictp,\n"
+    "                                               global volatile enum enum_type*restrict globalvolatileenumenum_typerestrictp,\n"
+    "                                               global volatile typedef_enum_type *restrict globalvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_p(global const volatile typedef_type* globalconstvolatiletypedef_typep,\n"
+    "                                            global const volatile struct struct_type * globalconstvolatilestructstruct_typep,\n"
+    "                                            global const volatile typedef_struct_type*globalconstvolatiletypedef_struct_typep,\n"
+    "                                            global const volatile union union_type *globalconstvolatileunionunion_typep,\n"
+    "                                            global const volatile typedef_union_type* globalconstvolatiletypedef_union_typep,\n"
+    "                                            global const volatile enum enum_type * globalconstvolatileenumenum_typep,\n"
+    "                                            global const volatile typedef_enum_type*globalconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void global_const_volatile_derived_restrict_p(global const volatile typedef_type *restrict globalconstvolatiletypedef_typerestrictp,\n"
+    "                                                     global const volatile struct struct_type* restrict globalconstvolatilestructstruct_typerestrictp,\n"
+    "                                                     global const volatile typedef_struct_type * restrict globalconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                     global const volatile union union_type*restrict globalconstvolatileunionunion_typerestrictp,\n"
+    "                                                     global const volatile typedef_union_type *restrict globalconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                     global const volatile enum enum_type* restrict globalconstvolatileenumenum_typerestrictp,\n"
+    "                                                     global const volatile typedef_enum_type * restrict globalconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_p(local typedef_type*localtypedef_typep,\n"
+    "                            local struct struct_type *localstructstruct_typep,\n"
+    "                            local typedef_struct_type* localtypedef_struct_typep,\n"
+    "                            local union union_type * localunionunion_typep,\n"
+    "                            local typedef_union_type*localtypedef_union_typep,\n"
+    "                            local enum enum_type *localenumenum_typep,\n"
+    "                            local typedef_enum_type* localtypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_derived_restrict_p(local typedef_type * restrict localtypedef_typerestrictp,\n"
+    "                                     local struct struct_type*restrict localstructstruct_typerestrictp,\n"
+    "                                     local typedef_struct_type *restrict localtypedef_struct_typerestrictp,\n"
+    "                                     local union union_type* restrict localunionunion_typerestrictp,\n"
+    "                                     local typedef_union_type * restrict localtypedef_union_typerestrictp,\n"
+    "                                     local enum enum_type*restrict localenumenum_typerestrictp,\n"
+    "                                     local typedef_enum_type *restrict localtypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_p(local const typedef_type* localconsttypedef_typep,\n"
+    "                                  local const struct struct_type * localconststructstruct_typep,\n"
+    "                                  local const typedef_struct_type*localconsttypedef_struct_typep,\n"
+    "                                  local const union union_type *localconstunionunion_typep,\n"
+    "                                  local const typedef_union_type* localconsttypedef_union_typep,\n"
+    "                                  local const enum enum_type * localconstenumenum_typep,\n"
+    "                                  local const typedef_enum_type*localconsttypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_derived_restrict_p(local const typedef_type *restrict localconsttypedef_typerestrictp,\n"
+    "                                           local const struct struct_type* restrict localconststructstruct_typerestrictp,\n"
+    "                                           local const typedef_struct_type * restrict localconsttypedef_struct_typerestrictp,\n"
+    "                                           local const union union_type*restrict localconstunionunion_typerestrictp,\n"
+    "                                           local const typedef_union_type *restrict localconsttypedef_union_typerestrictp,\n"
+    "                                           local const enum enum_type* restrict localconstenumenum_typerestrictp,\n"
+    "                                           local const typedef_enum_type * restrict localconsttypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_p(local volatile typedef_type*localvolatiletypedef_typep,\n"
+    "                                     local volatile struct struct_type *localvolatilestructstruct_typep,\n"
+    "                                     local volatile typedef_struct_type* localvolatiletypedef_struct_typep,\n"
+    "                                     local volatile union union_type * localvolatileunionunion_typep,\n"
+    "                                     local volatile typedef_union_type*localvolatiletypedef_union_typep,\n"
+    "                                     local volatile enum enum_type *localvolatileenumenum_typep,\n"
+    "                                     local volatile typedef_enum_type* localvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_volatile_derived_restrict_p(local volatile typedef_type * restrict localvolatiletypedef_typerestrictp,\n"
+    "                                              local volatile struct struct_type*restrict localvolatilestructstruct_typerestrictp,\n"
+    "                                              local volatile typedef_struct_type *restrict localvolatiletypedef_struct_typerestrictp,\n"
+    "                                              local volatile union union_type* restrict localvolatileunionunion_typerestrictp,\n"
+    "                                              local volatile typedef_union_type * restrict localvolatiletypedef_union_typerestrictp,\n"
+    "                                              local volatile enum enum_type*restrict localvolatileenumenum_typerestrictp,\n"
+    "                                              local volatile typedef_enum_type *restrict localvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_p(local const volatile typedef_type* localconstvolatiletypedef_typep,\n"
+    "                                           local const volatile struct struct_type * localconstvolatilestructstruct_typep,\n"
+    "                                           local const volatile typedef_struct_type*localconstvolatiletypedef_struct_typep,\n"
+    "                                           local const volatile union union_type *localconstvolatileunionunion_typep,\n"
+    "                                           local const volatile typedef_union_type* localconstvolatiletypedef_union_typep,\n"
+    "                                           local const volatile enum enum_type * localconstvolatileenumenum_typep,\n"
+    "                                           local const volatile typedef_enum_type*localconstvolatiletypedef_enum_typep)\n"
+    "{}\n",
+    "\n"
+    "kernel void local_const_volatile_derived_restrict_p(local const volatile typedef_type *restrict localconstvolatiletypedef_typerestrictp,\n"
+    "                                                    local const volatile struct struct_type* restrict localconstvolatilestructstruct_typerestrictp,\n"
+    "                                                    local const volatile typedef_struct_type * restrict localconstvolatiletypedef_struct_typerestrictp,\n"
+    "                                                    local const volatile union union_type*restrict localconstvolatileunionunion_typerestrictp,\n"
+    "                                                    local const volatile typedef_union_type *restrict localconstvolatiletypedef_union_typerestrictp,\n"
+    "                                                    local const volatile enum enum_type* restrict localconstvolatileenumenum_typerestrictp,\n"
+    "                                                    local const volatile typedef_enum_type * restrict localconstvolatiletypedef_enum_typerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void derived_d(typedef_type typedef_typed,\n"
+    "                      struct struct_type structstruct_typed,\n"
+    "                      typedef_struct_type typedef_struct_typed,\n"
+    "                      union union_type unionunion_typed,\n"
+    "                      typedef_union_type typedef_union_typed,\n"
+    "                      enum enum_type enumenum_typed,\n"
+    "                      typedef_enum_type typedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void const_derived_d(const typedef_type consttypedef_typed,\n"
+    "                            const struct struct_type conststructstruct_typed,\n"
+    "                            const typedef_struct_type consttypedef_struct_typed,\n"
+    "                            const union union_type constunionunion_typed,\n"
+    "                            const typedef_union_type consttypedef_union_typed,\n"
+    "                            const enum enum_type constenumenum_typed,\n"
+    "                            const typedef_enum_type consttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_derived_d(private typedef_type privatetypedef_typed,\n"
+    "                              private struct struct_type privatestructstruct_typed,\n"
+    "                              private typedef_struct_type privatetypedef_struct_typed,\n"
+    "                              private union union_type privateunionunion_typed,\n"
+    "                              private typedef_union_type privatetypedef_union_typed,\n"
+    "                              private enum enum_type privateenumenum_typed,\n"
+    "                              private typedef_enum_type privatetypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+    "kernel void private_const_derived_d(private const typedef_type privateconsttypedef_typed,\n"
+    "                                    private const struct struct_type privateconststructstruct_typed,\n"
+    "                                    private const typedef_struct_type privateconsttypedef_struct_typed,\n"
+    "                                    private const union union_type privateconstunionunion_typed,\n"
+    "                                    private const typedef_union_type privateconsttypedef_union_typed,\n"
+    "                                    private const enum enum_type privateconstenumenum_typed,\n"
+    "                                    private const typedef_enum_type privateconsttypedef_enum_typed)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * required_arg_info[][72] = {
+  // The minimum value of CL_DEVICE_MAX_CONSTANT_ARGS is 4
+    {
+        "constant_scalar_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "constantvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "constantcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "constantunsignedcharp",
+    NULL
+  },
+  {
+    "constant_scalar_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "constantshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "constantunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "constantintp",
+    NULL
+  },
+  {
+    "constant_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "constantunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "constantlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantulongp",
+    NULL
+  },
+  {
+    "constant_scalar_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "constantunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "constantfloatp",
+        NULL
+    },
+    {
+        "constant_scalar_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "constantvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "constantcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "constantunsignedcharrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "constantshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "constantunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "constantintrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "constantunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "constantlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantulongrestrictp",
+    NULL
+  },
+  {
+    "constant_scalar_restrict_p3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "constantunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "constantfloatrestrictp",
+        NULL
+    },
+    {
+        "global_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "globalvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "globalcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "globalunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "globalshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "globalunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "globalintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globaluintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "globalunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "globallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "globalunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "globalfloatp",
+        NULL
+    },
+    {
+        "global_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globaluintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalfloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "globalconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "globalconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "globalconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "globalconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "globalconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "globalconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "globalconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "globalconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "globalconstunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "globalconstfloatp",
+        NULL
+    },
+    {
+        "global_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstfloatrestrictp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalvolatilefloatp",
+        NULL
+    },
+    {
+        "global_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "globalconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "globalconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "globalconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "globalconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "globalconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "globalconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "globalconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "globalconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "globalconstvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "globalconstvolatilefloatp",
+        NULL
+    },
+    {
+        "global_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "globalconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "globalconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "globalconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "globalconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "globalconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "globalconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "globalconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "globalconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "globalconstvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "globalconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "void*", "localvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char*", "localcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar*", "localunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short*", "localshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort*", "localunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int*", "localintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint*", "localunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long*", "locallongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong*", "localunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float*", "localfloatp",
+        NULL
+    },
+    {
+        "local_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "locallongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localfloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "void*", "localconstvoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char*", "localconstcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar*", "localconstunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short*", "localconstshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort*", "localconstunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int*", "localconstintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint*", "localconstunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long*", "localconstlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong*", "localconstunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float*", "localconstfloatp",
+        NULL
+    },
+    {
+        "local_const_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstfloatrestrictp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localvolatilefloatp",
+        NULL
+    },
+    {
+        "local_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "void*", "localconstvolatilevoidp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char*", "localconstvolatilecharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileucharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar*", "localconstvolatileunsignedcharp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short*", "localconstvolatileshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileushortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort*", "localconstvolatileunsignedshortp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int*", "localconstvolatileintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileuintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint*", "localconstvolatileunsignedintp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long*", "localconstvolatilelongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileulongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong*", "localconstvolatileunsignedlongp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float*", "localconstvolatilefloatp",
+        NULL
+    },
+    {
+        "local_const_volatile_scalar_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "void*", "localconstvolatilevoidrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char*", "localconstvolatilecharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileucharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar*", "localconstvolatileunsignedcharrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short*", "localconstvolatileshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileushortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort*", "localconstvolatileunsignedshortrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int*", "localconstvolatileintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileuintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint*", "localconstvolatileunsignedintrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long*", "localconstvolatilelongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileulongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong*", "localconstvolatileunsignedlongrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float*", "localconstvolatilefloatrestrictp",
+        NULL
+    },
+    {
+        "scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "chard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "uchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "unsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "shortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "ushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "unsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "intd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "uintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "unsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "longd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "ulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "unsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "floatd",
+        NULL
+    },
+    {
+        "const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "constchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "constunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "constshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "constunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "constintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "constunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "constlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "constunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "constfloatd",
+        NULL
+    },
+    {
+        "private_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privatechard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privatelongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privatefloatd",
+        NULL
+    },
+    {
+        "private_const_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char", "privateconstchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstuchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar", "privateconstunsignedchard",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short", "privateconstshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstushortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort", "privateconstunsignedshortd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int", "privateconstintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstuintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint", "privateconstunsignedintd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long", "privateconstlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstulongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong", "privateconstunsignedlongd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float", "privateconstfloatd",
+        NULL
+    },
+    {
+        "constant_vector2_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "constantchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "constantuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "constantshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "constantushort2p",
+    NULL
+    },
+    {
+        "constant_vector2_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "constantint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "constantuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "constantlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "constantulong2p",
+    NULL
+    },
+    {
+        "constant_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "constantfloat2p",
+        NULL
+    },
+    {
+        "constant_vector2_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "constantchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "constantuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "constantshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "constantushort2restrictp",
+    NULL
+    },
+    {
+        "constant_vector2_restrict_p1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "constantint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "constantuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "constantlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "constantulong2restrictp",
+    NULL
+    },
+    {
+        "constant_vector2_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "constantfloat2restrictp",
+        NULL
+    },
+    {
+        "global_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "globalchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "globaluchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "globalshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "globalushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "globalint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "globaluint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "globallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "globalulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "globalfloat2p",
+        NULL
+    },
+    {
+        "global_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globaluchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globaluint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalfloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "globalconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "globalconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "globalconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "globalconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "globalconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "globalconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "globalconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "globalconstulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "globalconstfloat2p",
+        NULL
+    },
+    {
+        "global_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstfloat2restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "globalconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "globalconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "globalconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "globalconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "globalconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "globalconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "globalconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "globalconstvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "globalconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "globalconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "globalconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "globalconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "globalconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "globalconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "globalconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "globalconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "globalconstvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "globalconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2*", "localchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2*", "localuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2*", "localshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2*", "localushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2*", "localint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2*", "localuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2*", "locallong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2*", "localulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2*", "localfloat2p",
+        NULL
+    },
+    {
+        "local_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "locallong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localfloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char2*", "localconstchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar2*", "localconstuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short2*", "localconstshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort2*", "localconstushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int2*", "localconstint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint2*", "localconstuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long2*", "localconstlong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong2*", "localconstulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float2*", "localconstfloat2p",
+        NULL
+    },
+    {
+        "local_const_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstlong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstfloat2restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char2*", "localconstvolatilechar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar2*", "localconstvolatileuchar2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short2*", "localconstvolatileshort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort2*", "localconstvolatileushort2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int2*", "localconstvolatileint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint2*", "localconstvolatileuint2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long2*", "localconstvolatilelong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong2*", "localconstvolatileulong2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float2*", "localconstvolatilefloat2p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector2_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char2*", "localconstvolatilechar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar2*", "localconstvolatileuchar2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short2*", "localconstvolatileshort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort2*", "localconstvolatileushort2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int2*", "localconstvolatileint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint2*", "localconstvolatileuint2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long2*", "localconstvolatilelong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong2*", "localconstvolatileulong2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float2*", "localconstvolatilefloat2restrictp",
+        NULL
+    },
+    {
+        "vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "char2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "uchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "short2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "ushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "int2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "uint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "long2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "ulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "float2d",
+        NULL
+    },
+    {
+        "const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "constchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "constuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "constshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "constushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "constint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "constuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "constlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "constulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "constfloat2d",
+        NULL
+    },
+    {
+        "private_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privatechar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privatelong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privatefloat2d",
+        NULL
+    },
+    {
+        "private_const_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char2", "privateconstchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar2", "privateconstuchar2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short2", "privateconstshort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort2", "privateconstushort2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int2", "privateconstint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint2", "privateconstuint2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long2", "privateconstlong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong2", "privateconstulong2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float2", "privateconstfloat2d",
+        NULL
+    },
+    {
+        "constant_vector3_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "constantchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "constantuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "constantshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "constantushort3p",
+        NULL
+    },
+    {
+        "constant_vector3_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "constantint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "constantuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "constantlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "constantulong3p",
+    NULL
+    },
+    {
+        "constant_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "constantfloat3p",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "constantchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "constantuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "constantshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "constantushort3restrictp",
+        NULL
+    },
+    {
+        "constant_vector3_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "constantint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "constantuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "constantlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "constantulong3restrictp",
+    NULL
+    },
+    {
+        "constant_vector3_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "constantfloat3restrictp",
+        NULL
+    },
+    {
+        "global_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "globalchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "globaluchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "globalshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "globalushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "globalint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "globaluint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "globallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "globalulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "globalfloat3p",
+        NULL
+    },
+    {
+        "global_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globaluchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globaluint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalfloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "globalconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "globalconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "globalconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "globalconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "globalconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "globalconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "globalconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "globalconstulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "globalconstfloat3p",
+        NULL
+    },
+    {
+        "global_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstfloat3restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "globalconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "globalconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "globalconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "globalconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "globalconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "globalconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "globalconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "globalconstvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "globalconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "globalconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "globalconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "globalconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "globalconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "globalconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "globalconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "globalconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "globalconstvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "globalconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3*", "localchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3*", "localuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3*", "localshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3*", "localushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3*", "localint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3*", "localuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3*", "locallong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3*", "localulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3*", "localfloat3p",
+        NULL
+    },
+    {
+        "local_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "locallong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localfloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char3*", "localconstchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar3*", "localconstuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short3*", "localconstshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort3*", "localconstushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int3*", "localconstint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint3*", "localconstuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long3*", "localconstlong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong3*", "localconstulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float3*", "localconstfloat3p",
+        NULL
+    },
+    {
+        "local_const_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstlong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstfloat3restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char3*", "localconstvolatilechar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar3*", "localconstvolatileuchar3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short3*", "localconstvolatileshort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort3*", "localconstvolatileushort3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int3*", "localconstvolatileint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint3*", "localconstvolatileuint3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long3*", "localconstvolatilelong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong3*", "localconstvolatileulong3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float3*", "localconstvolatilefloat3p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector3_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char3*", "localconstvolatilechar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar3*", "localconstvolatileuchar3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short3*", "localconstvolatileshort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort3*", "localconstvolatileushort3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int3*", "localconstvolatileint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint3*", "localconstvolatileuint3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long3*", "localconstvolatilelong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong3*", "localconstvolatileulong3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float3*", "localconstvolatilefloat3restrictp",
+        NULL
+    },
+    {
+        "vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "char3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "uchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "short3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "ushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "int3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "uint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "long3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "ulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "float3d",
+        NULL
+    },
+    {
+        "const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "constchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "constuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "constshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "constushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "constint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "constuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "constlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "constulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "constfloat3d",
+        NULL
+    },
+    {
+        "private_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privatechar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privatelong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privatefloat3d",
+        NULL
+    },
+    {
+        "private_const_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char3", "privateconstchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar3", "privateconstuchar3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short3", "privateconstshort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort3", "privateconstushort3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int3", "privateconstint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint3", "privateconstuint3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long3", "privateconstlong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong3", "privateconstulong3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float3", "privateconstfloat3d",
+        NULL
+    },
+    {
+        "constant_vector4_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "constantchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "constantuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "constantshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "constantushort4p",
+        NULL
+    },
+    {
+        "constant_vector4_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "constantint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "constantuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "constantlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "constantulong4p",
+        NULL
+    },
+    {
+        "constant_vector4_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "constantfloat4p",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "constantchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "constantuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "constantshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "constantushort4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "constantint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "constantuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "constantlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "constantulong4restrictp",
+        NULL
+    },
+    {
+        "constant_vector4_restrict_p2",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "constantfloat4restrictp",
+        NULL
+    },
+    {
+        "global_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "globalchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "globaluchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "globalshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "globalushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "globalint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "globaluint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "globallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "globalulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "globalfloat4p",
+        NULL
+    },
+    {
+        "global_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globaluchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globaluint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalfloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "globalconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "globalconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "globalconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "globalconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "globalconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "globalconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "globalconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "globalconstulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "globalconstfloat4p",
+        NULL
+    },
+    {
+        "global_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstfloat4restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "globalconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "globalconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "globalconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "globalconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "globalconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "globalconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "globalconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "globalconstvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "globalconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "globalconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "globalconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "globalconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "globalconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "globalconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "globalconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "globalconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "globalconstvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "globalconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4*", "localchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4*", "localuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4*", "localshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4*", "localushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4*", "localint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4*", "localuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4*", "locallong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4*", "localulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4*", "localfloat4p",
+        NULL
+    },
+    {
+        "local_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "locallong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localfloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char4*", "localconstchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar4*", "localconstuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short4*", "localconstshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort4*", "localconstushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int4*", "localconstint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint4*", "localconstuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long4*", "localconstlong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong4*", "localconstulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float4*", "localconstfloat4p",
+        NULL
+    },
+    {
+        "local_const_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstlong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstfloat4restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char4*", "localconstvolatilechar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar4*", "localconstvolatileuchar4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short4*", "localconstvolatileshort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort4*", "localconstvolatileushort4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int4*", "localconstvolatileint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint4*", "localconstvolatileuint4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long4*", "localconstvolatilelong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong4*", "localconstvolatileulong4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float4*", "localconstvolatilefloat4p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector4_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char4*", "localconstvolatilechar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar4*", "localconstvolatileuchar4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short4*", "localconstvolatileshort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort4*", "localconstvolatileushort4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int4*", "localconstvolatileint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint4*", "localconstvolatileuint4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long4*", "localconstvolatilelong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong4*", "localconstvolatileulong4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float4*", "localconstvolatilefloat4restrictp",
+        NULL
+    },
+    {
+        "vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "char4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "uchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "short4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "ushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "int4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "uint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "long4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "ulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "float4d",
+        NULL
+    },
+    {
+        "const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "constchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "constuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "constshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "constushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "constint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "constuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "constlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "constulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "constfloat4d",
+        NULL
+    },
+    {
+        "private_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privatechar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privatelong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privatefloat4d",
+        NULL
+    },
+    {
+        "private_const_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char4", "privateconstchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar4", "privateconstuchar4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short4", "privateconstshort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort4", "privateconstushort4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int4", "privateconstint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint4", "privateconstuint4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long4", "privateconstlong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong4", "privateconstulong4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float4", "privateconstfloat4d",
+        NULL
+    },
+    {
+        "constant_vector8_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "constantchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "constantuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "constantshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "constantushort8p",
+        NULL
+    },
+    {
+        "constant_vector8_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "constantint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "constantuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "constantlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "constantulong8p",
+    NULL
+    },
+    {
+        "constant_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "constantfloat8p",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "constantchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "constantuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "constantshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "constantushort8restrictp",
+        NULL
+    },
+    {
+        "constant_vector8_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "constantint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "constantuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "constantlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "constantulong8restrictp",
+    NULL
+    },
+    {
+        "constant_vector8_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "constantfloat8restrictp",
+        NULL
+    },
+    {
+        "global_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "globalchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "globaluchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "globalshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "globalushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "globalint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "globaluint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "globallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "globalulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "globalfloat8p",
+        NULL
+    },
+    {
+        "global_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globaluchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globaluint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalfloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "globalconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "globalconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "globalconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "globalconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "globalconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "globalconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "globalconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "globalconstulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "globalconstfloat8p",
+        NULL
+    },
+    {
+        "global_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstfloat8restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "globalconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "globalconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "globalconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "globalconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "globalconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "globalconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "globalconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "globalconstvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "globalconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "globalconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "globalconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "globalconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "globalconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "globalconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "globalconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "globalconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "globalconstvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "globalconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8*", "localchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8*", "localuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8*", "localshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8*", "localushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8*", "localint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8*", "localuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8*", "locallong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8*", "localulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8*", "localfloat8p",
+        NULL
+    },
+    {
+        "local_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "locallong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localfloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char8*", "localconstchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar8*", "localconstuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short8*", "localconstshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort8*", "localconstushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int8*", "localconstint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint8*", "localconstuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long8*", "localconstlong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong8*", "localconstulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float8*", "localconstfloat8p",
+        NULL
+    },
+    {
+        "local_const_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstlong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstfloat8restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char8*", "localconstvolatilechar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar8*", "localconstvolatileuchar8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short8*", "localconstvolatileshort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort8*", "localconstvolatileushort8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int8*", "localconstvolatileint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint8*", "localconstvolatileuint8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long8*", "localconstvolatilelong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong8*", "localconstvolatileulong8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float8*", "localconstvolatilefloat8p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector8_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char8*", "localconstvolatilechar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar8*", "localconstvolatileuchar8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short8*", "localconstvolatileshort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort8*", "localconstvolatileushort8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int8*", "localconstvolatileint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint8*", "localconstvolatileuint8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long8*", "localconstvolatilelong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong8*", "localconstvolatileulong8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float8*", "localconstvolatilefloat8restrictp",
+        NULL
+    },
+    {
+        "vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "char8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "uchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "short8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "ushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "int8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "uint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "long8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "ulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "float8d",
+        NULL
+    },
+    {
+        "const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "constchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "constuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "constshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "constushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "constint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "constuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "constlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "constulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "constfloat8d",
+        NULL
+    },
+    {
+        "private_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privatechar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privatelong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privatefloat8d",
+        NULL
+    },
+    {
+        "private_const_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char8", "privateconstchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar8", "privateconstuchar8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short8", "privateconstshort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort8", "privateconstushort8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int8", "privateconstint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint8", "privateconstuint8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long8", "privateconstlong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong8", "privateconstulong8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float8", "privateconstfloat8d",
+        NULL
+    },
+    {
+        "constant_vector16_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "constantchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "constantuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "constantshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "constantushort16p",
+        NULL
+    },
+    {
+        "constant_vector16_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "constantint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "constantuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "constantlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "constantulong16p",
+    NULL
+    },
+    {
+        "constant_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "constantfloat16p",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "constantchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "constantuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "constantshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "constantushort16restrictp",
+        NULL
+    },
+    {
+        "constant_vector16_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "constantint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "constantuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "constantlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "constantulong16restrictp",
+    NULL
+    },
+    {
+        "constant_vector16_restrict_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "constantfloat16restrictp",
+        NULL
+    },
+    {
+        "global_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "globalchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "globaluchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "globalshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "globalushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "globalint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "globaluint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "globallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "globalulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "globalfloat16p",
+        NULL
+    },
+    {
+        "global_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globaluchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globaluint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalfloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "globalconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "globalconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "globalconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "globalconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "globalconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "globalconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "globalconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "globalconstulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "globalconstfloat16p",
+        NULL
+    },
+    {
+        "global_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstfloat16restrictp",
+        NULL
+    },
+    {
+        "global_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "globalconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "globalconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "globalconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "globalconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "globalconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "globalconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "globalconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "globalconstvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "globalconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "global_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "globalconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "globalconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "globalconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "globalconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "globalconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "globalconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "globalconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "globalconstvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "globalconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16*", "localchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16*", "localuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16*", "localshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16*", "localushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16*", "localint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16*", "localuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16*", "locallong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16*", "localulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16*", "localfloat16p",
+        NULL
+    },
+    {
+        "local_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "locallong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localfloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "char16*", "localconstchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uchar16*", "localconstuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "short16*", "localconstshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ushort16*", "localconstushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "int16*", "localconstint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "uint16*", "localconstuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "long16*", "localconstlong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "ulong16*", "localconstulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "float16*", "localconstfloat16p",
+        NULL
+    },
+    {
+        "local_const_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstlong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstfloat16restrictp",
+        NULL
+    },
+    {
+        "local_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "char16*", "localconstvolatilechar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uchar16*", "localconstvolatileuchar16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "short16*", "localconstvolatileshort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ushort16*", "localconstvolatileushort16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "int16*", "localconstvolatileint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "uint16*", "localconstvolatileuint16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "long16*", "localconstvolatilelong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "ulong16*", "localconstvolatileulong16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "float16*", "localconstvolatilefloat16p",
+        NULL
+    },
+    {
+        "local_const_volatile_vector16_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "char16*", "localconstvolatilechar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uchar16*", "localconstvolatileuchar16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "short16*", "localconstvolatileshort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ushort16*", "localconstvolatileushort16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "int16*", "localconstvolatileint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "uint16*", "localconstvolatileuint16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "long16*", "localconstvolatilelong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "ulong16*", "localconstvolatileulong16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "float16*", "localconstvolatilefloat16restrictp",
+        NULL
+    },
+    {
+        "vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "char16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "uchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "short16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "ushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "int16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "uint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "long16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "ulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "float16d",
+        NULL
+    },
+    {
+        "const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "constchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "constuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "constshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "constushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "constint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "constuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "constlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "constulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "constfloat16d",
+        NULL
+    },
+    {
+        "private_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privatechar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privatelong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privatefloat16d",
+        NULL
+    },
+    {
+        "private_const_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "char16", "privateconstchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uchar16", "privateconstuchar16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "short16", "privateconstshort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ushort16", "privateconstushort16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "int16", "privateconstint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "uint16", "privateconstuint16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "long16", "privateconstlong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "ulong16", "privateconstulong16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "float16", "privateconstfloat16d",
+        NULL
+    },
+    {
+        "constant_derived_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "constanttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "constantstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "constanttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "constantunionunion_typep",
+        NULL
+    },
+    {
+        "constant_derived_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "constanttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "constantenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "constanttypedef_enum_typep",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "constanttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "constantstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "constanttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "constantunionunion_typerestrictp",
+        NULL
+    },
+    {
+        "constant_derived_restrict_p1",
+    (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "constanttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "constantenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "constanttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "globaltypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "globalstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "globaltypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "globalunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "globaltypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "globalenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "globaltypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globaltypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globaltypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globaltypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globaltypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "globalconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "globalconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "globalconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "globalconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "globalconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "globalconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "globalconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "globalconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "globalconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "globalconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "globalconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "globalconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "global_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "globalconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "globalconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "globalconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "globalconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "globalconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "globalconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "globalconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type*", "localtypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type*", "localstructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type*", "localtypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type*", "localunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type*", "localtypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type*", "localenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type*", "localtypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localtypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localstructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localtypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localtypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localtypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_type*", "localconsttypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "struct struct_type*", "localconststructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_struct_type*", "localconsttypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "union union_type*", "localconstunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_union_type*", "localconsttypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "enum enum_type*", "localconstenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "typedef_enum_type*", "localconsttypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconsttypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconststructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconsttypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconsttypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconsttypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_type*", "localconstvolatiletypedef_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "struct struct_type*", "localconstvolatilestructstruct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_struct_type*", "localconstvolatiletypedef_struct_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "union union_type*", "localconstvolatileunionunion_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_union_type*", "localconstvolatiletypedef_union_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "enum enum_type*", "localconstvolatileenumenum_typep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "typedef_enum_type*", "localconstvolatiletypedef_enum_typep",
+        NULL
+    },
+    {
+        "local_const_volatile_derived_restrict_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_type*", "localconstvolatiletypedef_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "struct struct_type*", "localconstvolatilestructstruct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_struct_type*", "localconstvolatiletypedef_struct_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "union union_type*", "localconstvolatileunionunion_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_union_type*", "localconstvolatiletypedef_union_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "enum enum_type*", "localconstvolatileenumenum_typerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "typedef_enum_type*", "localconstvolatiletypedef_enum_typerestrictp",
+        NULL
+    },
+    {
+        "derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "typedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "structstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "typedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "unionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "typedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "enumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "typedef_enum_typed",
+        NULL
+    },
+    {
+        "const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "consttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "conststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "consttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "constunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "consttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "constenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "consttypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privatetypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privatestructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privatetypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privatetypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privatetypedef_enum_typed",
+        NULL
+    },
+    {
+        "private_const_derived_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_type", "privateconsttypedef_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "struct struct_type", "privateconststructstruct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_struct_type", "privateconsttypedef_struct_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "union union_type", "privateconstunionunion_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_union_type", "privateconsttypedef_union_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "enum enum_type", "privateconstenumenum_typed",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "typedef_enum_type", "privateconsttypedef_enum_typed",
+        NULL
+    },
+};
+
+// Support for optional image data type
+const char * image_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n"
+    "kernel void image_d(read_only image2d_t image2d_td0,\n"
+    "                    write_only image2d_t image2d_td1,\n"
+    "                    read_only image3d_t image3d_td2,\n"
+    "                    write_only image3d_t image3d_td3,\n"
+    "                    read_only image2d_array_t image2d_array_td4,\n"
+    "                    write_only image2d_array_t image2d_array_td5,\n"
+    "                    read_only image1d_t image1d_td6,\n"
+    "                    write_only image1d_t image1d_td7,\n"
+    "                    read_only image1d_buffer_t image1d_buffer_td8,\n"
+    "                    write_only image1d_buffer_t image1d_buffer_td9,\n"
+    "                    read_only image1d_array_t image1d_array_td10,\n"
+    "                    write_only image1d_array_t image1d_array_td11,\n"
+    "                    sampler_t sampler_td12)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * image_arg_info[][67] = {
+    {
+        "image_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td0",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_t", "image2d_td1",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image3d_t", "image3d_td3",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td4",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image2d_array_t", "image2d_array_td5",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td6",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_t", "image1d_td7",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td8",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_buffer_t", "image1d_buffer_td9",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_READ_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td10",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_WRITE_ONLY, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "image1d_array_t", "image1d_array_td11",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "sampler_t", "sampler_td12",
+        NULL
+    },
+};
+
+// Support for optional double data type
+const char * double_kernel_args[] = {
+    "kernel void double_scalar_p(constant double*constantdoublep,\n"
+    "                            constant double *restrict constantdoublerestrictp,\n"
+    "                            global double*globaldoublep,\n"
+    "                            global double *restrict globaldoublerestrictp,\n"
+    "                            global const double* globalconstdoublep,\n"
+    "                            global const double * restrict globalconstdoublerestrictp,\n"
+    "                            global volatile double*globalvolatiledoublep,\n"
+    "                            global volatile double *restrict globalvolatiledoublerestrictp,\n"
+    "                            global const volatile double* globalconstvolatiledoublep)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_p2(global const volatile double * restrict globalconstvolatiledoublerestrictp,\n"
+    "                             local double*localdoublep,\n"
+    "                             local double *restrict localdoublerestrictp,\n"
+    "                             local const double* localconstdoublep,\n"
+    "                             local const double * restrict localconstdoublerestrictp,\n"
+    "                             local volatile double*localvolatiledoublep,\n"
+    "                             local volatile double *restrict localvolatiledoublerestrictp,\n"
+    "                             local const volatile double* localconstvolatiledoublep,\n"
+    "                             local const volatile double * restrict localconstvolatiledoublerestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_scalar_d(double doubled,\n"
+    "                            const double constdoubled,\n"
+    "                            private double privatedoubled,\n"
+    "                            private const double privateconstdoubled)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p(constant double2*constantdouble2p,\n"
+    "                             constant double2 *restrict constantdouble2restrictp,\n"
+    "                             global double2*globaldouble2p,\n"
+    "                             global double2 *restrict globaldouble2restrictp,\n"
+    "                             global const double2* globalconstdouble2p,\n"
+    "                             global const double2 * restrict globalconstdouble2restrictp,\n"
+    "                             global volatile double2*globalvolatiledouble2p,\n"
+    "                             global volatile double2 *restrict globalvolatiledouble2restrictp,\n"
+    "                             global const volatile double2* globalconstvolatiledouble2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_p2(global const volatile double2 * restrict globalconstvolatiledouble2restrictp,\n"
+    "                              local double2*localdouble2p,\n"
+    "                              local double2 *restrict localdouble2restrictp,\n"
+    "                              local const double2* localconstdouble2p,\n"
+    "                              local const double2 * restrict localconstdouble2restrictp,\n"
+    "                              local volatile double2*localvolatiledouble2p,\n"
+    "                              local volatile double2 *restrict localvolatiledouble2restrictp,\n"
+    "                              local const volatile double2* localconstvolatiledouble2p,\n"
+    "                              local const volatile double2 * restrict localconstvolatiledouble2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector2_d(double2 double2d,\n"
+    "                             const double2 constdouble2d,\n"
+    "                             private double2 privatedouble2d,\n"
+    "                             private const double2 privateconstdouble2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p(constant double3*constantdouble3p,\n"
+    "                             constant double3 *restrict constantdouble3restrictp,\n"
+    "                             global double3*globaldouble3p,\n"
+    "                             global double3 *restrict globaldouble3restrictp,\n"
+    "                             global const double3* globalconstdouble3p,\n"
+    "                             global const double3 * restrict globalconstdouble3restrictp,\n"
+    "                             global volatile double3*globalvolatiledouble3p,\n"
+    "                             global volatile double3 *restrict globalvolatiledouble3restrictp,\n"
+    "                             global const volatile double3* globalconstvolatiledouble3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_p2(global const volatile double3 * restrict globalconstvolatiledouble3restrictp,\n"
+    "                              local double3*localdouble3p,\n"
+    "                              local double3 *restrict localdouble3restrictp,\n"
+    "                              local const double3* localconstdouble3p,\n"
+    "                              local const double3 * restrict localconstdouble3restrictp,\n"
+    "                              local volatile double3*localvolatiledouble3p,\n"
+    "                              local volatile double3 *restrict localvolatiledouble3restrictp,\n"
+    "                              local const volatile double3* localconstvolatiledouble3p,\n"
+    "                              local const volatile double3 * restrict localconstvolatiledouble3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector3_d(double3 double3d,\n"
+    "                             const double3 constdouble3d,\n"
+    "                             private double3 privatedouble3d,\n"
+    "                             private const double3 privateconstdouble3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p(constant double4*constantdouble4p,\n"
+    "                             constant double4 *restrict constantdouble4restrictp,\n"
+    "                             global double4*globaldouble4p,\n"
+    "                             global double4 *restrict globaldouble4restrictp,\n"
+    "                             global const double4* globalconstdouble4p,\n"
+    "                             global const double4 * restrict globalconstdouble4restrictp,\n"
+    "                             global volatile double4*globalvolatiledouble4p,\n"
+    "                             global volatile double4 *restrict globalvolatiledouble4restrictp,\n"
+    "                             global const volatile double4* globalconstvolatiledouble4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_p2(global const volatile double4 * restrict globalconstvolatiledouble4restrictp,\n"
+    "                              local double4*localdouble4p,\n"
+    "                              local double4 *restrict localdouble4restrictp,\n"
+    "                              local const double4* localconstdouble4p,\n"
+    "                              local const double4 * restrict localconstdouble4restrictp,\n"
+    "                              local volatile double4*localvolatiledouble4p,\n"
+    "                              local volatile double4 *restrict localvolatiledouble4restrictp,\n"
+    "                              local const volatile double4* localconstvolatiledouble4p,\n"
+    "                              local const volatile double4 * restrict localconstvolatiledouble4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector4_d(double4 double4d,\n"
+    "                             const double4 constdouble4d,\n"
+    "                             private double4 privatedouble4d,\n"
+    "                             private const double4 privateconstdouble4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p(constant double8*constantdouble8p,\n"
+    "                             constant double8 *restrict constantdouble8restrictp,\n"
+    "                             global double8*globaldouble8p,\n"
+    "                             global double8 *restrict globaldouble8restrictp,\n"
+    "                             global const double8* globalconstdouble8p,\n"
+    "                             global const double8 * restrict globalconstdouble8restrictp,\n"
+    "                             global volatile double8*globalvolatiledouble8p,\n"
+    "                             global volatile double8 *restrict globalvolatiledouble8restrictp,\n"
+    "                             global const volatile double8* globalconstvolatiledouble8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_p2(global const volatile double8 * restrict globalconstvolatiledouble8restrictp,\n"
+    "                              local double8*localdouble8p,\n"
+    "                              local double8 *restrict localdouble8restrictp,\n"
+    "                              local const double8* localconstdouble8p,\n"
+    "                              local const double8 * restrict localconstdouble8restrictp,\n"
+    "                              local volatile double8*localvolatiledouble8p,\n"
+    "                              local volatile double8 *restrict localvolatiledouble8restrictp,\n"
+    "                              local const volatile double8* localconstvolatiledouble8p,\n"
+    "                              local const volatile double8 * restrict localconstvolatiledouble8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector8_d(double8 double8d,\n"
+    "                             const double8 constdouble8d,\n"
+    "                             private double8 privatedouble8d,\n"
+    "                             private const double8 privateconstdouble8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p(constant double16*constantdouble16p,\n"
+    "                              constant double16 *restrict constantdouble16restrictp,\n"
+    "                              global double16*globaldouble16p,\n"
+    "                              global double16 *restrict globaldouble16restrictp,\n"
+    "                              global const double16* globalconstdouble16p,\n"
+    "                              global const double16 * restrict globalconstdouble16restrictp,\n"
+    "                              global volatile double16*globalvolatiledouble16p,\n"
+    "                              global volatile double16 *restrict globalvolatiledouble16restrictp,\n"
+    "                              global const volatile double16* globalconstvolatiledouble16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_p2(global const volatile double16 * restrict globalconstvolatiledouble16restrictp,\n"
+    "                               local double16*localdouble16p,\n"
+    "                               local double16 *restrict localdouble16restrictp,\n"
+    "                               local const double16* localconstdouble16p,\n"
+    "                               local const double16 * restrict localconstdouble16restrictp,\n"
+    "                               local volatile double16*localvolatiledouble16p,\n"
+    "                               local volatile double16 *restrict localvolatiledouble16restrictp,\n"
+    "                               local const volatile double16* localconstvolatiledouble16p,\n"
+    "                               local const volatile double16 * restrict localconstvolatiledouble16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void double_vector16_d(double16 double16d,\n"
+    "                              const double16 constdouble16d,\n"
+    "                              private double16 privatedouble16d,\n"
+    "                              private const double16 privateconstdouble16d)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * double_arg_info[][77] = {
+    {
+        "double_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "constantdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "constantdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "globaldoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globaldoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "globalconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstdoublerestrictp",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalvolatiledoublep",
+    (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "globalconstvolatiledoublep",
+        NULL
+    },
+    {
+        "double_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "globalconstvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double*", "localdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double*", "localconstdoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstdoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localvolatiledoublerestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double*", "localconstvolatiledoublep",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double*", "localconstvolatiledoublerestrictp",
+        NULL
+    },
+    {
+        "double_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "doubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "constdoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privatedoubled",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double", "privateconstdoubled",
+        NULL
+    },
+    {
+        "double_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "constantdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "constantdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "globaldouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globaldouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "globalconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "globalconstvolatiledouble2p",
+        NULL
+    },
+    {
+        "double_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "globalconstvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2*", "localdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double2*", "localconstdouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstdouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localvolatiledouble2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double2*", "localconstvolatiledouble2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double2*", "localconstvolatiledouble2restrictp",
+        NULL
+    },
+    {
+        "double_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "double2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "constdouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privatedouble2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double2", "privateconstdouble2d",
+        NULL
+    },
+    {
+        "double_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "constantdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "constantdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "globaldouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globaldouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "globalconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "globalconstvolatiledouble3p",
+        NULL
+    },
+    {
+        "double_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "globalconstvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3*", "localdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double3*", "localconstdouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstdouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localvolatiledouble3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double3*", "localconstvolatiledouble3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double3*", "localconstvolatiledouble3restrictp",
+        NULL
+    },
+    {
+        "double_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "double3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "constdouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privatedouble3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double3", "privateconstdouble3d",
+        NULL
+    },
+    {
+        "double_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "constantdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "constantdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "globaldouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globaldouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "globalconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "globalconstvolatiledouble4p",
+        NULL
+    },
+    {
+        "double_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "globalconstvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4*", "localdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double4*", "localconstdouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstdouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localvolatiledouble4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double4*", "localconstvolatiledouble4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double4*", "localconstvolatiledouble4restrictp",
+        NULL
+    },
+    {
+        "double_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "double4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "constdouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privatedouble4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double4", "privateconstdouble4d",
+        NULL
+    },
+    {
+        "double_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "constantdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "constantdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "globaldouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globaldouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "globalconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "globalconstvolatiledouble8p",
+        NULL
+    },
+    {
+        "double_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "globalconstvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8*", "localdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double8*", "localconstdouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstdouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localvolatiledouble8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double8*", "localconstvolatiledouble8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double8*", "localconstvolatiledouble8restrictp",
+        NULL
+    },
+    {
+        "double_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "double8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "constdouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privatedouble8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double8", "privateconstdouble8d",
+        NULL
+    },
+    {
+        "double_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "constantdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "constantdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "globaldouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globaldouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "globalconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "globalconstvolatiledouble16p",
+        NULL
+    },
+    {
+        "double_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "globalconstvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16*", "localdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "double16*", "localconstdouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstdouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localvolatiledouble16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "double16*", "localconstvolatiledouble16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "double16*", "localconstvolatiledouble16restrictp",
+        NULL
+    },
+    {
+        "double_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "double16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "constdouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privatedouble16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "double16", "privateconstdouble16d",
+        NULL
+    },
+};
+
+
+// Support for optional half data type
+const char * half_kernel_args[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
+    "\n"
+    "kernel void half_scalar_p(constant half*constanthalfp,\n"
+    "                          constant half *restrict constanthalfrestrictp,\n"
+    "                          global half*globalhalfp,\n"
+    "                          global half *restrict globalhalfrestrictp,\n"
+    "                          global const half* globalconsthalfp,\n"
+    "                          global const half * restrict globalconsthalfrestrictp,\n"
+    "                          global volatile half*globalvolatilehalfp,\n"
+    "                          global volatile half *restrict globalvolatilehalfrestrictp,\n"
+    "                          global const volatile half* globalconstvolatilehalfp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_p2(global const volatile half * restrict globalconstvolatilehalfrestrictp,\n"
+    "                           local half*localhalfp,\n"
+    "                           local half *restrict localhalfrestrictp,\n"
+    "                           local const half* localconsthalfp,\n"
+    "                           local const half * restrict localconsthalfrestrictp,\n"
+    "                           local volatile half*localvolatilehalfp,\n"
+    "                           local volatile half *restrict localvolatilehalfrestrictp,\n"
+    "                           local const volatile half* localconstvolatilehalfp,\n"
+    "                           local const volatile half * restrict localconstvolatilehalfrestrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_scalar_d(half halfd,\n"
+    "                          const half consthalfd,\n"
+    "                          private half privatehalfd,\n"
+    "                          private const half privateconsthalfd)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p(constant half2*constanthalf2p,\n"
+    "                           constant half2 *restrict constanthalf2restrictp,\n"
+    "                           global half2*globalhalf2p,\n"
+    "                           global half2 *restrict globalhalf2restrictp,\n"
+    "                           global const half2* globalconsthalf2p,\n"
+    "                           global const half2 * restrict globalconsthalf2restrictp,\n"
+    "                           global volatile half2*globalvolatilehalf2p,\n"
+    "                           global volatile half2 *restrict globalvolatilehalf2restrictp,\n"
+    "                           global const volatile half2* globalconstvolatilehalf2p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_p2(global const volatile half2 * restrict globalconstvolatilehalf2restrictp,\n"
+    "                            local half2*localhalf2p,\n"
+    "                            local half2 *restrict localhalf2restrictp,\n"
+    "                            local const half2* localconsthalf2p,\n"
+    "                            local const half2 * restrict localconsthalf2restrictp,\n"
+    "                            local volatile half2*localvolatilehalf2p,\n"
+    "                            local volatile half2 *restrict localvolatilehalf2restrictp,\n"
+    "                            local const volatile half2* localconstvolatilehalf2p,\n"
+    "                            local const volatile half2 * restrict localconstvolatilehalf2restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector2_d(half2 half2d,\n"
+    "                           const half2 consthalf2d,\n"
+    "                           private half2 privatehalf2d,\n"
+    "                           private const half2 privateconsthalf2d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p(constant half3*constanthalf3p,\n"
+    "                           constant half3 *restrict constanthalf3restrictp,\n"
+    "                           global half3*globalhalf3p,\n"
+    "                           global half3 *restrict globalhalf3restrictp,\n"
+    "                           global const half3* globalconsthalf3p,\n"
+    "                           global const half3 * restrict globalconsthalf3restrictp,\n"
+    "                           global volatile half3*globalvolatilehalf3p,\n"
+    "                           global volatile half3 *restrict globalvolatilehalf3restrictp,\n"
+    "                           global const volatile half3* globalconstvolatilehalf3p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_p2(global const volatile half3 * restrict globalconstvolatilehalf3restrictp,\n"
+    "                            local half3*localhalf3p,\n"
+    "                            local half3 *restrict localhalf3restrictp,\n"
+    "                            local const half3* localconsthalf3p,\n"
+    "                            local const half3 * restrict localconsthalf3restrictp,\n"
+    "                            local volatile half3*localvolatilehalf3p,\n"
+    "                            local volatile half3 *restrict localvolatilehalf3restrictp,\n"
+    "                            local const volatile half3* localconstvolatilehalf3p,\n"
+    "                            local const volatile half3 * restrict localconstvolatilehalf3restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector3_d(half3 half3d,\n"
+    "                           const half3 consthalf3d,\n"
+    "                           private half3 privatehalf3d,\n"
+    "                           private const half3 privateconsthalf3d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p(constant half4*constanthalf4p,\n"
+    "                           constant half4 *restrict constanthalf4restrictp,\n"
+    "                           global half4*globalhalf4p,\n"
+    "                           global half4 *restrict globalhalf4restrictp,\n"
+    "                           global const half4* globalconsthalf4p,\n"
+    "                           global const half4 * restrict globalconsthalf4restrictp,\n"
+    "                           global volatile half4*globalvolatilehalf4p,\n"
+    "                           global volatile half4 *restrict globalvolatilehalf4restrictp,\n"
+    "                           global const volatile half4* globalconstvolatilehalf4p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_p2(global const volatile half4 * restrict globalconstvolatilehalf4restrictp,\n"
+    "                            local half4*localhalf4p,\n"
+    "                            local half4 *restrict localhalf4restrictp,\n"
+    "                            local const half4* localconsthalf4p,\n"
+    "                            local const half4 * restrict localconsthalf4restrictp,\n"
+    "                            local volatile half4*localvolatilehalf4p,\n"
+    "                            local volatile half4 *restrict localvolatilehalf4restrictp,\n"
+    "                            local const volatile half4* localconstvolatilehalf4p,\n"
+    "                            local const volatile half4 * restrict localconstvolatilehalf4restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector4_d(half4 half4d,\n"
+    "                           const half4 consthalf4d,\n"
+    "                           private half4 privatehalf4d,\n"
+    "                           private const half4 privateconsthalf4d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p(constant half8*constanthalf8p,\n"
+    "                           constant half8 *restrict constanthalf8restrictp,\n"
+    "                           global half8*globalhalf8p,\n"
+    "                           global half8 *restrict globalhalf8restrictp,\n"
+    "                           global const half8* globalconsthalf8p,\n"
+    "                           global const half8 * restrict globalconsthalf8restrictp,\n"
+    "                           global volatile half8*globalvolatilehalf8p,\n"
+    "                           global volatile half8 *restrict globalvolatilehalf8restrictp,\n"
+    "                           global const volatile half8* globalconstvolatilehalf8p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_p2(global const volatile half8 * restrict globalconstvolatilehalf8restrictp,\n"
+    "                            local half8*localhalf8p,\n"
+    "                            local half8 *restrict localhalf8restrictp,\n"
+    "                            local const half8* localconsthalf8p,\n"
+    "                            local const half8 * restrict localconsthalf8restrictp,\n"
+    "                            local volatile half8*localvolatilehalf8p,\n"
+    "                            local volatile half8 *restrict localvolatilehalf8restrictp,\n"
+    "                            local const volatile half8* localconstvolatilehalf8p,\n"
+    "                            local const volatile half8 * restrict localconstvolatilehalf8restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector8_d(half8 half8d,\n"
+    "                           const half8 consthalf8d,\n"
+    "                           private half8 privatehalf8d,\n"
+    "                           private const half8 privateconsthalf8d)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p(constant half16*constanthalf16p,\n"
+    "                            constant half16 *restrict constanthalf16restrictp,\n"
+    "                            global half16*globalhalf16p,\n"
+    "                            global half16 *restrict globalhalf16restrictp,\n"
+    "                            global const half16* globalconsthalf16p,\n"
+    "                            global const half16 * restrict globalconsthalf16restrictp,\n"
+    "                            global volatile half16*globalvolatilehalf16p,\n"
+    "                            global volatile half16 *restrict globalvolatilehalf16restrictp,\n"
+    "                            global const volatile half16* globalconstvolatilehalf16p)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_p2(global const volatile half16 * restrict globalconstvolatilehalf16restrictp,\n"
+    "                             local half16*localhalf16p,\n"
+    "                             local half16 *restrict localhalf16restrictp,\n"
+    "                             local const half16* localconsthalf16p,\n"
+    "                             local const half16 * restrict localconsthalf16restrictp,\n"
+    "                             local volatile half16*localvolatilehalf16p,\n"
+    "                             local volatile half16 *restrict localvolatilehalf16restrictp,\n"
+    "                             local const volatile half16* localconstvolatilehalf16p,\n"
+    "                             local const volatile half16 * restrict localconstvolatilehalf16restrictp)\n"
+    "{}\n",
+    "\n"
+    "kernel void half_vector16_d(half16 half16d,\n"
+    "                            const half16 consthalf16d,\n"
+    "                            private half16 privatehalf16d,\n"
+    "                            private const half16 privateconsthalf16d)\n"
+    "{}\n",
+    "\n"
+};
+
+const char * half_arg_info[][77] = {
+    {
+        "half_scalar_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "constanthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "constanthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "globalhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "globalconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "globalconstvolatilehalfp",
+        NULL
+    },
+    {
+        "half_scalar_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "globalconstvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half*", "localhalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localhalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half*", "localconsthalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconsthalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localvolatilehalfrestrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half*", "localconstvolatilehalfp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half*", "localconstvolatilehalfrestrictp",
+        NULL
+    },
+    {
+        "half_scalar_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "halfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "consthalfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privatehalfd",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half", "privateconsthalfd",
+        NULL
+    },
+    {
+        "half_vector2_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "constanthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "constanthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "globalhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "globalconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "globalconstvolatilehalf2p",
+        NULL
+    },
+    {
+        "half_vector2_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "globalconstvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2*", "localhalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localhalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half2*", "localconsthalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconsthalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localvolatilehalf2restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half2*", "localconstvolatilehalf2p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half2*", "localconstvolatilehalf2restrictp",
+        NULL
+    },
+    {
+        "half_vector2_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "half2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "consthalf2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privatehalf2d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half2", "privateconsthalf2d",
+        NULL
+    },
+    {
+        "half_vector3_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "constanthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "constanthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "globalhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "globalconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "globalconstvolatilehalf3p",
+        NULL
+    },
+    {
+        "half_vector3_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "globalconstvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3*", "localhalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localhalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half3*", "localconsthalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconsthalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localvolatilehalf3restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half3*", "localconstvolatilehalf3p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half3*", "localconstvolatilehalf3restrictp",
+        NULL
+    },
+    {
+        "half_vector3_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "half3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "consthalf3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privatehalf3d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half3", "privateconsthalf3d",
+        NULL
+    },
+    {
+        "half_vector4_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "constanthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "constanthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "globalhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "globalconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "globalconstvolatilehalf4p",
+        NULL
+    },
+    {
+        "half_vector4_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "globalconstvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4*", "localhalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localhalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half4*", "localconsthalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconsthalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localvolatilehalf4restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half4*", "localconstvolatilehalf4p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half4*", "localconstvolatilehalf4restrictp",
+        NULL
+    },
+    {
+        "half_vector4_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "half4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "consthalf4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privatehalf4d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half4", "privateconsthalf4d",
+        NULL
+    },
+    {
+        "half_vector8_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "constanthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "constanthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "globalhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "globalconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "globalconstvolatilehalf8p",
+        NULL
+    },
+    {
+        "half_vector8_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "globalconstvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8*", "localhalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localhalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half8*", "localconsthalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconsthalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localvolatilehalf8restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half8*", "localconstvolatilehalf8p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half8*", "localconstvolatilehalf8restrictp",
+        NULL
+    },
+    {
+        "half_vector8_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "half8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "consthalf8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privatehalf8d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half8", "privateconsthalf8d",
+        NULL
+    },
+    {
+        "half_vector16_p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "constanthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_CONSTANT, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "constanthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "globalhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "globalconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "globalconstvolatilehalf16p",
+        NULL
+    },
+    {
+        "half_vector16_p2",
+        (const char *)CL_KERNEL_ARG_ADDRESS_GLOBAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "globalconstvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16*", "localhalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localhalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST), "half16*", "localconsthalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconsthalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localvolatilehalf16restrictp",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE), "half16*", "localconstvolatilehalf16p",
+        (const char *)CL_KERNEL_ARG_ADDRESS_LOCAL, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_CONST|CL_KERNEL_ARG_TYPE_VOLATILE|CL_KERNEL_ARG_TYPE_RESTRICT), "half16*", "localconstvolatilehalf16restrictp",
+        NULL
+    },
+    {
+        "half_vector16_d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "half16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "consthalf16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privatehalf16d",
+        (const char *)CL_KERNEL_ARG_ADDRESS_PRIVATE, (const char *)CL_KERNEL_ARG_ACCESS_NONE, (const char *)(CL_KERNEL_ARG_TYPE_NONE), "half16", "privateconsthalf16d",
+        NULL
+    },
+};
+
+
+template<typename arg_info_t>
+int test(cl_device_id deviceID, cl_context context, kernel_args_t kernel_args, cl_uint lines_count, arg_info_t arg_info, size_t total_kernels_in_program) {
+
+  cl_program program;
+    cl_kernel kernel;
+    const size_t max_name_len = 512;
+    cl_char name[ max_name_len ];
+    cl_uint arg_count, numArgs;
+    size_t i, j, size;
+    int error;
+
+  program = clCreateProgramWithSource( context, lines_count, kernel_args, NULL, &error );
+    if ( program == NULL || error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create required arguments kernel program" );
+        return -1;
+    }
+
+    // Compile the program
+    log_info( "Building kernels...\n" );
+    clBuildProgram( program, 1, &deviceID, "-cl-kernel-arg-info", NULL, NULL );
+
+    // check for build errors and exit if things didn't work
+    size_t size_ret;
+    cl_build_status build_status;
+    error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(build_status), &build_status, &size_ret);
+    test_error( error, "Unable to query build status" );
+    if (build_status == CL_BUILD_ERROR) {
+        printf("CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
+        error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+        test_error( error, "Unable to get build log size" );
+        char *build_log = (char *)malloc(size_ret);
+        error = clGetProgramBuildInfo(program,deviceID, CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
+        test_error( error, "Unable to get build log" );
+        printf("CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
+        printf("CL_BUILD_ERROR. exiting\n");
+        free(build_log);
+        return -1;
+    }
+
+    // Lookup the number of kernels in the program.
+    log_info( "Testing kernels...\n" );
+    size_t total_kernels = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_KERNELS, sizeof( size_t ), &total_kernels, NULL );
+    test_error( error, "Unable to get program info num kernels" );
+
+    if ( total_kernels != total_kernels_in_program )
+    {
+        print_error( error, "Program did not build all kernels" );
+        return -1;
+    }
+
+    // Lookup the kernel names.
+    size_t kernel_names_len = 0;
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, 0, NULL, &kernel_names_len );
+    test_error( error, "Unable to get length of kernel names list." );
+
+    size_t expected_kernel_names_len = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        expected_kernel_names_len += 1 + strlen( arg_info[ i ][ 0 ] );
+    }
+    if ( kernel_names_len != expected_kernel_names_len )
+    {
+        log_error( "Kernel names string is not the right length, expected %d, got %d\n", (int) expected_kernel_names_len, (int) kernel_names_len );
+        return -1;
+    }
+
+    const size_t len = ( kernel_names_len + 1 ) * sizeof( char );
+    char* kernel_names = (char*) malloc( len );
+    error = clGetProgramInfo( program, CL_PROGRAM_KERNEL_NAMES, len, kernel_names, &kernel_names_len );
+    test_error( error, "Unable to get kernel names list." );
+
+    // Check to see if the kernel name array is null terminated.
+    if ( kernel_names[ kernel_names_len - 1 ] != '\0' )
+    {
+        free( kernel_names );
+        print_error( error, "Kernel name list was not null terminated" );
+        return -1;
+    }
+
+    // Check to see if the correct kernel name string was returned.
+    // Does the string contain each expected kernel name?
+    for ( i = 0; i < total_kernels; ++i )
+        if ( !strstr( kernel_names, arg_info[ i ][ 0 ] ) )
+            break;
+    if ( i != total_kernels )
+    {
+        log_error( "Kernel names string is missing \"%s\"\n", arg_info[ i ][ 0 ] );
+        free( kernel_names );
+        return -1;
+    }
+
+    // Are the kernel names delimited by ';'?
+    if ( !strtok( kernel_names, ";" ) )
+    {
+        error = -1;
+    }
+    else
+    {
+        for ( i = 1; i < total_kernels; ++i )
+        {
+            if ( !strtok( NULL, ";" ) )
+            {
+                error = -1;
+            }
+        }
+    }
+    if ( error )
+    {
+        log_error( "Kernel names string was not properly delimited by ';'\n" );
+        free( kernel_names );
+        return -1;
+    }
+    free( kernel_names );
+
+    // Create kernel objects and query them.
+    int rc = 0;
+    for ( i = 0; i < total_kernels; ++i )
+    {
+        int kernel_rc = 0;
+        const char* kernel_name = arg_info[ i ][ 0 ];
+        kernel = clCreateKernel( program, kernel_name, &error );
+        if( kernel == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Could not get kernel: %s\n", kernel_name );
+            kernel_rc = -1;
+        }
+
+        if(kernel_rc == 0)
+        {
+            // Determine the expected number of arguments.
+            arg_count = 0;
+            while (arg_info[ i ][ (ARG_INFO_FIELD_COUNT * arg_count) + 1 ] != NULL)
+                ++arg_count;
+
+            // Try to get the number of arguments.
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &size );
+            test_error( error, "Unable to get kernel arg count param size" );
+            if( size != sizeof( numArgs ) )
+            {
+                log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d) for kernel: %s\n", (int)sizeof( numArgs ), (int)size, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+
+
+        if(kernel_rc == 0)
+        {
+            error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+            test_error( error, "Unable to get kernel arg count" );
+            if( numArgs != arg_count )
+            {
+                log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d) for kernel: %s\n", arg_count, numArgs, kernel_name );
+                kernel_rc = -1;
+            }
+        }
+
+        if(kernel_rc == 0)
+        {
+            for ( j = 0; j < numArgs; ++j )
+            {
+
+                int arg_rc = 0;
+                cl_kernel_arg_address_qualifier expected_address_qualifier = (cl_kernel_arg_address_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ADDR_OFFSET ];
+                cl_kernel_arg_access_qualifier expected_access_qualifier =  (cl_kernel_arg_access_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ACCESS_OFFSET ];
+                cl_kernel_arg_type_qualifier expected_type_qualifier = (cl_kernel_arg_type_qualifier)(uintptr_t)arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_QUAL_OFFSET ];
+                const char* expected_type_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_TYPE_NAME_OFFSET ];
+                const char* expected_arg_name = arg_info[ i ][ (ARG_INFO_FIELD_COUNT * j) + ARG_INFO_ARG_NAME_OFFSET ];
+
+                // Try to get the address qualifier of each argument.
+                cl_kernel_arg_address_qualifier address_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof address_qualifier, &address_qualifier, &size );
+                test_error( error, "Unable to get argument address qualifier" );
+                error = (address_qualifier != expected_address_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad address qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_address_qualifier, (unsigned int)address_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the access qualifier of each argument.
+                cl_kernel_arg_access_qualifier access_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof access_qualifier, &access_qualifier, &size );
+                test_error( error, "Unable to get argument access qualifier" );
+                error = (access_qualifier != expected_access_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad access qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_access_qualifier, (unsigned int)access_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the type qualifier of each argument.
+                cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof arg_type_qualifier, &arg_type_qualifier, &size );
+                test_error( error, "Unable to get argument type qualifier" );
+                error = (arg_type_qualifier != expected_type_qualifier);
+                if ( error )
+                {
+                    log_error( "ERROR: Bad type qualifier, kernel: \"%s\", argument number: %d, expected \"0x%X\", got \"0x%X\"\n", kernel_name, (unsigned int)j, (unsigned int)expected_type_qualifier, (unsigned int)arg_type_qualifier );
+                    arg_rc = -1;
+                }
+
+                // Try to get the type of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo(kernel, (cl_uint)j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument type name" );
+                error = strcmp( (const char*) name, expected_type_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument type name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_type_name, name );
+                    arg_rc = -1;
+                }
+
+                // Try to get the name of each argument.
+                memset( name, 0, max_name_len );
+                error = clGetKernelArgInfo( kernel, (cl_uint)j, CL_KERNEL_ARG_NAME, max_name_len, name, &size );
+                test_error( error, "Unable to get argument name" );
+                error = strcmp( (const char*) name, expected_arg_name );
+                if ( error )
+                {
+                    log_error( "ERROR: Bad argument name, kernel: \"%s\", argument number: %d, expected \"%s\", got \"%s\"\n", kernel_name, (unsigned int)j, expected_arg_name, name );
+                    arg_rc = -1;
+                }
+
+                if(arg_rc != 0) {
+                    kernel_rc = -1;
+                }
+            }
+        }
+
+        //log_info( "%s ... %s\n",arg_info[i][0],kernel_rc == 0 ? "passed" : "failed" );
+        if(kernel_rc != 0) {
+            rc = -1;
+        }
+    }
+  return rc;
+}
+
+
+int    test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    size_t size;
+    int error;
+
+    cl_bool supports_double = 0; // assume not
+    cl_bool supports_half = 0; // assume not
+  cl_bool supports_images = 0; // assume not
+
+    // Check if this device supports images
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof supports_images, &supports_images, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+  if (supports_images) {
+    log_info(" o Device supports images\n");
+    log_info(" o Expecting SUCCESS when testing image kernel arguments.\n");
+  }
+  else {
+    log_info(" o Device lacks image support\n");
+    log_info(" o Not testing image kernel arguments.\n");
+  }
+
+    // Get the extensions string for the device
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &size);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS size failed");
+
+    char *extensions = (char*)malloc(sizeof(char)*(size + 1));
+    if (extensions == 0) {
+        log_error("Failed to allocate memory for extensions string.\n");
+        return -1;
+    }
+    memset( extensions, CHAR_MIN, sizeof(char)*(size+1) );
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, sizeof(char)*size, extensions, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+
+    // Check to make sure the extension string is NUL terminated.
+    if( extensions[size] != CHAR_MIN )
+    {
+        test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS wrote past the end of the array!" );
+        return -1;
+    }
+    extensions[size] = '\0';    // set last char to NUL to avoid problems with string functions later
+
+    // test for termination with '\0'
+    size_t stringSize = strlen( extensions );
+    if( stringSize == size )
+    {
+        test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS is not NUL terminated!" );
+        return -1;
+    }
+
+    if (strstr(extensions, "cl_khr_fp64")) {
+        log_info(" o Device claims extension 'cl_khr_fp64'\n");
+        log_info(" o Expecting SUCCESS when testing double kernel arguments.\n");
+        supports_double = 1;
+    } else {
+        cl_device_fp_config double_fp_config;
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, NULL);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
+        if (double_fp_config != 0)
+            supports_double = 1;
+        else {
+            log_info(" o Device lacks extension 'cl_khr_fp64'\n");
+            log_info(" o Not testing double kernel arguments.\n");
+            supports_double = 0;
+        }
+    }
+
+    if (strstr(extensions, "cl_khr_fp16")) {
+        log_info(" o Device claims extension 'cl_khr_fp16'\n");
+        log_info(" o Expecting SUCCESS when testing halfn* kernel arguments.\n");
+        supports_half = 1;
+    } else {
+        log_info(" o Device lacks extension 'cl_khr_fp16'\n");
+        log_info(" o Not testing halfn* kernel arguments.\n");
+        supports_half = 0;
+    }
+
+
+  int test_failed = 0;
+
+    // Now create a test program using required arguments
+  log_info("Testing required kernel arguments...\n");
+  error = test(deviceID, context, required_kernel_args, sizeof(required_kernel_args)/sizeof(required_kernel_args[0]), required_arg_info, sizeof(required_arg_info)/sizeof(required_arg_info[0]));
+  test_failed = (error) ? -1 : test_failed;
+
+  if ( supports_images ) {
+    log_info("Testing optional image arguments...\n");
+    error = test(deviceID, context, image_kernel_args, sizeof(image_kernel_args)/sizeof(image_kernel_args[0]), image_arg_info, sizeof(image_arg_info)/sizeof(image_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+
+    if ( supports_double ) {
+    log_info("Testing optional double arguments...\n");
+    error = test(deviceID, context, double_kernel_args, sizeof(double_kernel_args)/sizeof(double_kernel_args[0]), double_arg_info, sizeof(double_arg_info)/sizeof(double_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+
+    if ( supports_half ) {
+    log_info("Testing optional half arguments...\n");
+    error = test(deviceID, context, half_kernel_args, sizeof(half_kernel_args)/sizeof(half_kernel_args[0]), half_arg_info, sizeof(half_arg_info)/sizeof(half_arg_info[0]));
+    test_failed = (error) ? -1 : test_failed;
+  }
+
+    return test_failed;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_kernel_arg_multi_setup.cpp b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_multi_setup.cpp
new file mode 100644
index 00000000..89c5eeb1
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_kernel_arg_multi_setup.cpp
@@ -0,0 +1,277 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+
+// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
+
+const char *multi_arg_kernel_source_pattern =
+"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
+"{\n"
+"    int tid = get_global_id(0);\n"
+"    dst1[tid] = src1[tid];\n"
+"    dst2[tid] = src2[tid];\n"
+"    dst3[tid] = src3[tid];\n"
+"}\n";
+
+extern cl_uint gRandomSeed;
+
+#define MAX_ERROR_TOLERANCE 0.0005f
+
+int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
+                       ExplicitType vec1Type, int vec1Size,
+                       ExplicitType vec2Type, int vec2Size,
+                       ExplicitType vec3Type, int vec3Size, MTdata d)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error, i, j;
+    clMemWrapper streams[ 6 ];
+    size_t threads[1], localThreads[1];
+    char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
+    char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
+    const char *ptr;
+    void *initData[3], *resultData[3];
+
+
+    // Create the program source
+    sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
+    sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
+    sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
+
+    sprintf( programSrc, multi_arg_kernel_source_pattern,
+            vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
+            vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
+    ptr = programSrc;
+
+    // Create our testing kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Get thread dimensions
+    threads[0] = 1024;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size for kernel" );
+
+    // Create input streams
+    initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
+    streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
+    streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
+    streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
+    test_error( error, "Unable to create testing stream" );
+
+    // Set the arguments
+    error = 0;
+    for( i = 0; i < 6; i++ )
+        error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
+    test_error( error, "Unable to set arguments for kernel" );
+
+    // Execute!
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute kernel" );
+
+    // Read results
+    resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
+    resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
+    resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
+    error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
+    error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
+    error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
+    test_error( error, "Unable to read result stream" );
+
+    // Verify
+    char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
+    size_t span = get_explicit_type_size( vec1Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec1Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec1Size;
+        ptr2 += span * vec1Size;
+    }
+
+    ptr1 = (char *)initData[ 1 ];
+    ptr2 = (char *)resultData[ 1 ];
+    span = get_explicit_type_size( vec2Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec2Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec2Size;
+        ptr2 += span * vec2Size;
+    }
+
+    ptr1 = (char *)initData[ 2 ];
+    ptr2 = (char *)resultData[ 2 ];
+    span = get_explicit_type_size( vec3Type );
+    for( i = 0; i < (int)threads[0]; i++ )
+    {
+        for( j = 0; j < vec3Size; j++ )
+        {
+            if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
+            {
+                log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
+                free( initData[ 0 ] );
+                free( initData[ 1 ] );
+                free( initData[ 2 ] );
+                free( resultData[ 0 ] );
+                free( resultData[ 1 ] );
+                free( resultData[ 2 ] );
+                return -1;
+            }
+        }
+        ptr1 += span * vec3Size;
+        ptr2 += span * vec3Size;
+    }
+
+    // If we got here, everything verified successfully
+    free( initData[ 0 ] );
+    free( initData[ 1 ] );
+    free( initData[ 2 ] );
+    free( resultData[ 0 ] );
+    free( resultData[ 1 ] );
+    free( resultData[ 2 ] );
+
+    return 0;
+}
+
+int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    // Loop through every combination of input and output types
+    ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
+    int type1, type2, type3;
+    int size1, size2, size3;
+    RandomSeed seed( gRandomSeed );
+
+    log_info( "\n" ); // for formatting
+
+    for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
+    {
+        for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
+        {
+            for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
+            {
+                log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
+
+                // Loop through every combination of vector size
+                for( size1 = 2; size1 <= 8; size1 <<= 1 )
+                {
+                    for( size2 = 2; size2 <= 8; size2 <<= 1 )
+                    {
+                        for( size3 = 2; size3 <= 8; size3 <<= 1 )
+                        {
+                            log_info(".");
+                            fflush( stdout);
+                            if( test_multi_arg_set( device, context, queue,
+                                                   types[ type1 ], size1,
+                                                   types[ type2 ], size2,
+                                                   types[ type3 ], size3, seed ) )
+                                return -1;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    log_info( "\n" );
+    return 0;
+}
+
+int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    // Loop through a selection of combinations
+    ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
+    int type1, type2, type3;
+    int size1, size2, size3;
+    RandomSeed seed( gRandomSeed );
+
+    num_elements = 3*3*3*4;
+    log_info( "Testing %d random configurations\n", num_elements );
+
+    // Loop through every combination of vector size
+    for( size1 = 2; size1 <= 8; size1 <<= 1 )
+    {
+        for( size2 = 2; size2 <= 8; size2 <<= 1 )
+        {
+            for( size3 = 2; size3 <= 8; size3 <<= 1 )
+            {
+                // Loop through 4 type combinations for each size combination
+                int n;
+                for (n=0; n<4; n++) {
+                    type1 = (int)get_random_float(0,4, seed);
+                    type2 = (int)get_random_float(0,4, seed);
+                    type3 = (int)get_random_float(0,4, seed);
+
+
+                    log_info( "\ttesting %s%d, %s%d, %s%d...\n",
+                             get_explicit_type_name( types[ type1 ] ), size1,
+                             get_explicit_type_name( types[ type2 ] ), size2,
+                             get_explicit_type_name( types[ type3 ] ), size3 );
+
+                    if( test_multi_arg_set( device, context, queue,
+                                           types[ type1 ], size1,
+                                           types[ type2 ], size2,
+                                           types[ type3 ], size3, seed ) )
+                        return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_kernels.c b/test_conformance/compatibility/test_conformance/api/test_kernels.c
new file mode 100644
index 00000000..9cfbca27
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_kernels.c
@@ -0,0 +1,704 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+const char *sample_single_test_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_struct_test_kernel[] = {
+"typedef struct {\n"
+"__global int *A;\n"
+"__global int *B;\n"
+"} input_pair_t;\n"
+"\n"
+"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src->A[tid] + src->B[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_struct_array_test_kernel[] = {
+"typedef struct {\n"
+"int A;\n"
+"int B;\n"
+"} input_pair_t;\n"
+"\n"
+"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid].A + src[tid].B;\n"
+"\n"
+"}\n" };
+
+const char *sample_const_test_kernel[] = {
+"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid] + src2[tid];\n"
+"\n"
+"}\n" };
+
+const char *sample_const_global_test_kernel[] = {
+"__constant int addFactor = 1024;\n"
+"__kernel void sample_test(__global int *src1, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src1[tid] + addFactor;\n"
+"\n"
+"}\n" };
+
+const char *sample_two_kernel_program[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n",
+"__kernel void sample_test2(__global int *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)src[tid];\n"
+"\n"
+"}\n" };
+
+
+
+
+int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, testProgram;
+    cl_context testContext;
+    cl_kernel kernel;
+    cl_char name[ 512 ];
+    cl_uint numArgs, numInstances;
+    size_t paramSize;
+
+
+    /* Create reference */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
+    test_error( error, "Unable to get kernel function name param size" );
+    if( paramSize != strlen( "sample_test" ) + 1 )
+    {
+        log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
+    test_error( error, "Unable to get kernel function name" );
+    if( strcmp( (char *)name, "sample_test" ) != 0 )
+    {
+        log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
+        return -1;
+    }
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
+    test_error( error, "Unable to get kernel arg count param size" );
+    if( paramSize != sizeof( numArgs ) )
+    {
+        log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
+    test_error( error, "Unable to get kernel arg count" );
+    if( numArgs != 2 )
+    {
+        log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
+        return -1;
+    }
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
+    test_error( error, "Unable to get kernel reference count param size" );
+    if( paramSize != sizeof( numInstances ) )
+    {
+        log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
+    test_error( error, "Unable to get kernel reference count" );
+
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
+    test_error( error, "Unable to get kernel program param size" );
+    if( paramSize != sizeof( testProgram ) )
+    {
+        log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
+    test_error( error, "Unable to get kernel program" );
+    if( testProgram != program )
+    {
+        log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
+        return -1;
+    }
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
+    test_error( error, "Unable to get kernel context" );
+    if( testContext != context )
+    {
+        log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
+        return -1;
+    }
+
+    /* Release memory */
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+    return 0;
+}
+
+int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[100];
+    cl_int outputData[100];
+    RandomSeed seed( gRandomSeed );
+    int i;
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 100, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 100, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<100; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Set the arguments */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)100;
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* Try again */
+    if( localThreads[0] > 1 )
+        localThreads[0] /= 2;
+    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
+        localThreads[0]--;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* And again */
+    if( localThreads[0] > 1 )
+        localThreads[0] /= 2;
+    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
+        localThreads[0]--;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    /* One more time */
+    localThreads[0] = (unsigned int)1;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<100; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper    streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_float inputData[10];
+    cl_int outputData[10];
+    RandomSeed seed( gRandomSeed );
+    int i;
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Write some test data */
+    memset( outputData, 0, sizeof( outputData ) );
+
+    for (i=0; i<10; i++)
+        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
+
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Test setting the arguments by index manually */
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != (int)inputData[i])
+        {
+            log_error( "ERROR: Data did not verify on first pass!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel kernel;
+    void            *args[2];
+    cl_mem            outStream;
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10], randomTestDataB[10];
+    MTdata  d;
+
+    struct img_pair_t
+    {
+        cl_mem streamA;
+        cl_mem streamB;
+    } image_pair;
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d);
+        randomTestDataB[i] = (cl_int)genrand_int32(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
+    test_error( error, "Creating test array failed" );
+    outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    args[0] = &image_pair;
+    args[1] = outStream;
+
+    error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
+        {
+            log_error( "ERROR: Data did not verify!\n" );
+            return -1;
+        }
+    }
+
+
+    clReleaseMemObject( image_pair.streamA );
+    clReleaseMemObject( image_pair.streamB );
+    clReleaseMemObject( outStream );
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[3];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10], randomTestDataB[10];
+    cl_ulong maxSize;
+    MTdata d;
+
+    /* Verify our test buffer won't be bigger than allowed */
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( error, "Unable to get max constant buffer size" );
+    if( maxSize < sizeof( cl_int ) * 10 )
+    {
+        log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
+        return -1;
+    }
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* Make sure values are positive, just so we don't have to */
+        randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* deal with overflow on the verification */
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
+    test_error( error, "Creating test array failed" );
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
+        {
+            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    MTdata d;
+
+    typedef struct img_pair_type
+    {
+        int A;
+        int B;
+    } image_pair_t;
+
+    image_pair_t image_pair[ 10 ];
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        image_pair[i].A = (cl_int)genrand_int32(d);
+        image_pair[i].A = (cl_int)genrand_int32(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != image_pair[i].A + image_pair[i].B)
+        {
+            log_error( "ERROR: Data did not verify!\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel  kernel[3];
+    unsigned int kernelCount;
+
+    /* Create a test program */
+    program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create test program!\n" );
+        return -1;
+    }
+
+    /* Build */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build test program" );
+
+    /* Try getting the kernel count */
+    error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
+    test_error( error, "Unable to get kernel count for built program" );
+    if( kernelCount != 2 )
+    {
+        log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
+        return -1;
+    }
+
+    /* Try actually getting the kernels */
+    error = clCreateKernelsInProgram( program, 2, kernel, NULL );
+    test_error( error, "Unable to get kernels for built program" );
+    clReleaseKernel( kernel[0] );
+    clReleaseKernel( kernel[1] );
+
+    clReleaseProgram( program );
+    return 0;
+}
+
+int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper            streams[2];
+    size_t    threads[1], localThreads[1];
+    cl_int outputData[10];
+    int i;
+    cl_int randomTestDataA[10];
+    MTdata d;
+
+
+    /* Create a kernel to test with */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    d = init_genrand( gRandomSeed );
+    for( i = 0; i < 10; i++ )
+    {
+        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff;    /* Make sure values are positive and small, just so we don't have to */
+    }
+    free_mtdata(d); d = NULL;
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Test running the kernel and verifying it */
+    threads[0] = (size_t)10;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    for (i=0; i<10; i++)
+    {
+        if (outputData[i] != randomTestDataA[i] + 1024)
+        {
+            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_mem_object_info.cpp b/test_conformance/compatibility/test_conformance/api/test_mem_object_info.cpp
new file mode 100644
index 00000000..b4e14569
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_mem_object_info.cpp
@@ -0,0 +1,750 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+extern cl_uint gRandomSeed;
+
+
+#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
+error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );   \
+test_error( error, "Unable to get mem object " name );  \
+if( val != expected )   \
+{   \
+log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type " from %s:%d)\n",   \
+expected, (cast)val, __FILE__, __LINE__ );   \
+return -1;  \
+}   \
+if( size != sizeof( val ) ) \
+{   \
+log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d from %s:%d)\n", \
+(int)sizeof( val ), (int)size , __FILE__, __LINE__ );   \
+return -1;  \
+}
+
+static void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void * data )
+{
+    free( data );
+}
+
+static unsigned int
+get_image_dim(MTdata *d, unsigned int mod)
+{
+    unsigned int val = 0;
+
+    do
+    {
+        val = (unsigned int)genrand_int32(*d) % mod;
+    } while (val == 0);
+
+    return val;
+}
+
+
+int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    int error;
+    size_t size;
+    void * buffer = NULL;
+
+    clMemWrapper bufferObject;
+    clMemWrapper subBufferObject;
+
+    cl_mem_flags bufferFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_ONLY,
+        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_WRITE_ONLY,
+        CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+    };
+
+    cl_mem_flags subBufferFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_ONLY,
+        CL_MEM_WRITE_ONLY,
+        0,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | 0,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | 0,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | 0,
+    };
+
+
+    // Get the address alignment, so we can make sure the sub-buffer test later works properly.
+    cl_uint addressAlignBits;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(addressAlignBits), &addressAlignBits, NULL );
+
+    size_t addressAlign = addressAlignBits/8;
+    if ( addressAlign < 128 )
+    {
+        addressAlign = 128;
+    }
+
+    for ( unsigned int i = 0; i < sizeof(bufferFlags) / sizeof(cl_mem_flags); ++i )
+    {
+        //printf("@@@ bufferFlags[%u]=0x%x\n", i, bufferFlags[ i ]);
+        if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            void * ptr;
+            TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_HOST_PTR, ptr, buffer, "host pointer", "%p", void * )
+        }
+        else if ( (bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR) && (bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR) )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+        }
+        else if ( bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
+            test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR) to test with" );
+        }
+        else if ( bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR )
+        {
+            // Create a buffer object to test against.
+            buffer = malloc( addressAlign * 4 );
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
+            if ( error )
+            {
+                free( buffer );
+                test_error( error, "Unable to create buffer (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
+            test_error( error, "Unable to set mem object destructor callback" );
+        }
+        else
+        {
+            // Create a buffer object to test against.
+            bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
+            test_error( error, "Unable to create buffer to test with" );
+        }
+
+        // Perform buffer object queries.
+        cl_mem_object_type type;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
+
+        cl_mem_flags flags;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_FLAGS, flags, (unsigned int)bufferFlags[ i ], "flags", "%d", unsigned int )
+
+        size_t sz;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign * 4 ), "size", "%ld", size_t )
+
+        cl_uint mapCount;
+        error = clGetMemObjectInfo( bufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+        test_error( error, "Unable to get mem object map count" );
+        if( size != sizeof( mapCount ) )
+        {
+            log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                      (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+            return -1;
+        }
+
+        cl_uint refCount;
+        error = clGetMemObjectInfo( bufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+        test_error( error, "Unable to get mem object reference count" );
+        if( size != sizeof( refCount ) )
+        {
+            log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                      (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+            return -1;
+        }
+
+        cl_context otherCtx;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+        cl_mem origObj;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (void *)NULL, "associated mem object", "%p", void * )
+
+        size_t offset;
+        TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
+
+        cl_buffer_region region;
+        region.origin = addressAlign;
+        region.size = addressAlign;
+
+        // Loop over possible sub-buffer objects to create.
+        for ( unsigned int j = 0; j < sizeof(subBufferFlags) / sizeof(cl_mem_flags); ++j )
+        {
+            if ( subBufferFlags[ j ] & CL_MEM_READ_WRITE )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) )
+                    continue; // Buffer must be read_write for sub-buffer to be read_write.
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_READ_ONLY )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_READ_ONLY) )
+                    continue; // Buffer must be read_write or read_only for sub-buffer to be read_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_WRITE_ONLY )
+            {
+                if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_WRITE_ONLY) )
+                    continue; // Buffer must be read_write or write_only for sub-buffer to be write_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_HOST_READ_ONLY )
+            {
+                if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_WRITE_ONLY) )
+                    continue; // Buffer must be host all access or host read_only for sub-buffer to be host read_only
+            }
+            if ( subBufferFlags[ j ] & CL_MEM_HOST_WRITE_ONLY )
+            {
+                if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_READ_ONLY) )
+                    continue; // Buffer must be host all access or host write_only for sub-buffer to be host write_only
+            }
+            //printf("@@@ bufferFlags[%u]=0x%x subBufferFlags[%u]=0x%x\n", i, bufferFlags[ i ], j, subBufferFlags[ j ]);
+
+            subBufferObject = clCreateSubBuffer( bufferObject, subBufferFlags[ j ], CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
+            test_error( error, "Unable to create sub-buffer to test against" );
+
+            // Perform sub-buffer object queries.
+            cl_mem_object_type type;
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
+
+            cl_mem_flags flags;
+            cl_mem_flags inheritedFlags = subBufferFlags[ j ];
+            if ( (subBufferFlags[ j ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0 )
+            {
+              inheritedFlags |= bufferFlags[ i ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
+            }
+            inheritedFlags |= bufferFlags[ i ] & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
+            if ( (subBufferFlags[ j ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0)
+            {
+              inheritedFlags |= bufferFlags[ i ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
+            }
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_FLAGS, flags, (unsigned int)inheritedFlags, "flags", "%d", unsigned int )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign ), "size", "%ld", size_t )
+
+            if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
+            {
+                void * ptr;
+                void * offsetInBuffer = (char *)buffer + addressAlign;
+
+                TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_HOST_PTR, ptr, offsetInBuffer, "host pointer", "%p", void * )
+            }
+
+            cl_uint mapCount;
+            error = clGetMemObjectInfo( subBufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+            test_error( error, "Unable to get mem object map count" );
+            if( size != sizeof( mapCount ) )
+            {
+                log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                          (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+                return -1;
+            }
+
+            cl_uint refCount;
+            error = clGetMemObjectInfo( subBufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+            test_error( error, "Unable to get mem object reference count" );
+            if( size != sizeof( refCount ) )
+            {
+                log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                          (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+                return -1;
+            }
+
+            cl_context otherCtx;
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
+
+            TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
+
+            clReleaseMemObject( subBufferObject );
+            subBufferObject = NULL;
+
+        }
+
+        clReleaseMemObject( bufferObject );
+        bufferObject = NULL;
+    }
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_image_desc *imageInfo, cl_image_format *imageFormat, size_t pixelSize, cl_context context )
+{
+    int error;
+    size_t size;
+    cl_mem_object_type type;
+    cl_mem_flags flags;
+    cl_uint mapCount;
+    cl_uint refCount;
+    size_t rowPitchMultiplier;
+    size_t slicePitchMultiplier;
+    cl_context otherCtx;
+    size_t offset;
+    size_t sz;
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_TYPE, type, imageInfo->image_type, "type", "%d", int )
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_FLAGS, flags, (unsigned int)objectFlags, "flags", "%d", unsigned int )
+
+    error = clGetMemObjectInfo( *image, CL_MEM_SIZE, sizeof( sz ), &sz, NULL );
+    test_error( error, "Unable to get mem size" );
+
+    // The size returned is not constrained by the spec.
+
+    error = clGetMemObjectInfo( *image, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
+    test_error( error, "Unable to get mem object map count" );
+    if( size != sizeof( mapCount ) )
+    {
+        log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
+                  (int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clGetMemObjectInfo( *image, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get mem object reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
+                  (int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+    TEST_MEM_OBJECT_PARAM( *image, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_image_info( cl_device_id deviceID, cl_context context, cl_mem_object_type type )
+{
+    int error;
+    size_t size;
+    void * image = NULL;
+
+    cl_mem imageObject;
+    cl_image_desc imageInfo;
+
+    cl_mem_flags imageFlags[] = {
+        CL_MEM_READ_WRITE,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_ONLY,
+        CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_WRITE_ONLY,
+        CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+        CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+    };
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    cl_image_format imageFormat;
+    size_t pixelSize = 4;
+
+    imageFormat.image_channel_order = CL_RGBA;
+    imageFormat.image_channel_data_type = CL_UNORM_INT8;
+
+    imageInfo.image_width = imageInfo.image_height = imageInfo.image_depth = 1;
+    imageInfo.image_array_size = 0;
+    imageInfo.num_mip_levels = imageInfo.num_samples = 0;
+    imageInfo.buffer = NULL;
+
+    d = init_genrand( gRandomSeed );
+
+    for ( unsigned int i = 0; i < sizeof(imageFlags) / sizeof(cl_mem_flags); ++i )
+    {
+        imageInfo.image_row_pitch = 0;
+        imageInfo.image_slice_pitch = 0;
+
+        switch (type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE2D:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_height = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE3D:
+                imageInfo.image_width = get_image_dim(&d, 127);
+                imageInfo.image_height = get_image_dim(&d, 127);
+                imageInfo.image_depth = get_image_dim(&d, 127);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE3D;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                imageInfo.image_width = get_image_dim(&d, 1023);
+                imageInfo.image_array_size = get_image_dim(&d, 1023);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+                break;
+
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                imageInfo.image_width = get_image_dim(&d, 255);
+                imageInfo.image_height = get_image_dim(&d, 255);
+                imageInfo.image_array_size = get_image_dim(&d, 255);
+                imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+                break;
+        }
+
+        if ( imageFlags[i] & CL_MEM_USE_HOST_PTR )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            void * ptr;
+            TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            // release image object
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image2d (CL_MEM_USE_HOST_PTR) to test with" );
+            }
+
+            // Make sure image2d is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+
+            TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( (imageFlags[i] & CL_MEM_ALLOC_HOST_PTR) && (imageFlags[i] & CL_MEM_COPY_HOST_PTR) )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[ i ], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            // release image object
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( imageFlags[i] & CL_MEM_ALLOC_HOST_PTR )
+        {
+            // Create an image object to test against.
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
+            test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR) to test with" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else if ( imageFlags[i] & CL_MEM_COPY_HOST_PTR )
+        {
+            // Create an image object to test against.
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+            clReleaseMemObject(imageObject);
+
+            // Try again with non-zero rowPitch.
+            imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
+            switch (type)
+            {
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE3D:
+                    imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
+                    break;
+            }
+
+            image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
+                           ((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
+            if ( error )
+            {
+                free( image );
+                test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
+            }
+
+            // Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
+            error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
+            test_error( error, "Unable to set mem object destructor callback" );
+            ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+        else
+        {
+            // Create an image object to test against.
+            imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
+            test_error( error, "Unable to create image to test with" );
+            int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
+            if (ret)
+                return ret;
+
+        }
+
+        clReleaseMemObject( imageObject );
+    }
+
+    return CL_SUCCESS;
+}
+
+
+int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D);
+}
+
+int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE3D);
+}
+
+int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D);
+}
+
+int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D_ARRAY);
+}
+
+int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
+{
+    return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D_ARRAY);
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_mem_objects.cpp b/test_conformance/compatibility/test_conformance/api/test_mem_objects.cpp
new file mode 100644
index 00000000..b0dc99d4
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_mem_objects.cpp
@@ -0,0 +1,108 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+static volatile cl_int sDestructorIndex;
+
+void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
+{
+    int * userPtr = (int *)userData;
+
+    // ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
+    *userPtr = ++sDestructorIndex;
+}
+
+#ifndef ABS
+#define ABS( x ) ( ( x < 0 ) ? -x : x )
+#endif
+
+int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
+{
+    cl_int error;
+    int i;
+
+    // Set up some variables to catch the order in which callbacks are called
+    volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
+    sDestructorIndex = 0;
+
+    // Set up the callbacks
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
+    test_error( error, "Unable to set destructor callback" );
+
+    // Now release the buffer, which SHOULD call the callbacks
+    error = clReleaseMemObject( memObject );
+    test_error( error, "Unable to release test buffer" );
+
+    // Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
+    memObject = NULL;
+
+    // At this point, all three callbacks should have already been called
+    int numErrors = 0;
+    for(  i = 0; i < 3; i++ )
+    {
+        // Spin waiting for the release to finish.  If you don't call the mem_destructor_callback, you will not
+        // pass the test.  bugzilla 6316
+        while( 0 == callbackOrders[i] )
+        {}
+
+        if( ABS( callbackOrders[ i ] ) != 3-i )
+        {
+            log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
+                      i+1, ABS( callbackOrders[ i ] ), i );
+            numErrors++;
+        }
+    }
+
+    return ( numErrors > 0 ) ? -1 : 0;
+}
+
+int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clMemWrapper testBuffer, testImage;
+    cl_int error;
+
+
+    // Create a buffer and an image to test callbacks against
+    testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
+
+    if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
+    {
+        log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
+        return -1;
+    }
+
+    if( checkForImageSupport( deviceID ) == 0 )
+    {
+        cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
+        testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
+        test_error( error, "Unable to create testing image" );
+
+        if( test_mem_object_destructor_callback_single( testImage ) != 0 )
+        {
+            log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/compatibility/test_conformance/api/test_native_kernel.cpp b/test_conformance/compatibility/test_conformance/api/test_native_kernel.cpp
new file mode 100644
index 00000000..2d52134b
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_native_kernel.cpp
@@ -0,0 +1,121 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+static void CL_CALLBACK test_native_kernel_fn( void *userData )
+{
+    struct arg_struct {
+        cl_int * source;
+        cl_int * dest;
+        cl_int count;
+    } *args = (arg_struct *)userData;
+
+    for( cl_int i = 0; i < args->count; i++ )
+        args->dest[ i ] = args->source[ i ];
+}
+
+int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    int error;
+    RandomSeed seed( gRandomSeed );
+    // Check if we support native kernels
+    cl_device_exec_capabilities capabilities;
+    error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
+    if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
+        log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
+        return 0;
+    }
+
+    clMemWrapper streams[ 2 ];
+#if !(defined (_WIN32) && defined (_MSC_VER))
+    cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
+#else
+    cl_int* inBuffer  = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
+    cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
+#endif
+    clEventWrapper finishEvent;
+
+    struct arg_struct
+    {
+        cl_mem inputStream;
+        cl_mem outputStream;
+        cl_int count;
+    } args;
+
+
+    // Create some input values
+    generate_random_data( kInt, n_elems, seed, inBuffer );
+
+
+    // Create I/O streams
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
+    test_error( error, "Unable to create I/O stream" );
+    streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
+    test_error( error, "Unable to create I/O stream" );
+
+
+    // Set up the arrays to call with
+    args.inputStream = streams[ 0 ];
+    args.outputStream = streams[ 1 ];
+    args.count = n_elems;
+
+    void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
+
+
+    // Run the kernel
+    error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
+                                      &args, sizeof( args ),
+                                      2, &streams[ 0 ],
+                                      (const void **)memLocs,
+                                      0, NULL, &finishEvent );
+    test_error( error, "Unable to queue native kernel" );
+
+    // Finish and wait for the kernel to complete
+    error = clFinish( queue );
+    test_error(error, "clFinish failed");
+
+    error = clWaitForEvents( 1, &finishEvent );
+    test_error(error, "clWaitForEvents failed");
+
+    // Now read the results and verify
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    for( int i = 0; i < n_elems; i++ )
+    {
+        if( inBuffer[ i ] != outBuffer[ i ] )
+        {
+            log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
+                      i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c b/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c
new file mode 100644
index 00000000..0d792136
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c
@@ -0,0 +1,162 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#if defined(__APPLE__)
+#include <OpenCL/opencl.h>
+#include <OpenCL/cl_platform.h>
+#else
+#include <CL/opencl.h>
+#include <CL/cl_platform.h>
+#endif
+#include "procs.h"
+
+
+enum { SUCCESS, FAILURE };
+typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
+
+#define NITEMS 4096
+
+/* places the casted long value of the src ptr into each element of the output
+ * array, to allow testing that the kernel actually _gets_ the NULL value */
+const char *kernel_string =
+"kernel void test_kernel(global float *src, global long *dst)\n"
+"{\n"
+"    uint tid = get_global_id(0);\n"
+"    dst[tid] = (long)src;\n"
+"}\n";
+
+/*
+ * The guts of the test:
+ * call setKernelArgs with a regular buffer, &NULL, or NULL depending on
+ * the value of 'test_type'
+ */
+static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
+    cl_mem test_buf, cl_mem result_buf, test_type type)
+{
+    unsigned int test_success = 0;
+
+    unsigned int i;
+    cl_int status;
+    char *typestr;
+
+    if (type == NON_NULL_PATH) {
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+        typestr = "non-NULL";
+    } else if (type == ADDROF_NULL_PATH) {
+        test_buf = NULL;
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+        typestr = "&NULL";
+    } else if (type == NULL_PATH) {
+        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
+        typestr = "NULL";
+    }
+
+    log_info("Testing setKernelArgs with %s buffer.\n", typestr);
+
+    if (status != CL_SUCCESS) {
+        log_error("clSetKernelArg failed with status: %d\n", status);
+        return FAILURE; // no point in continuing *this* test
+    }
+
+    size_t global = NITEMS;
+    status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
+        NULL, 0, NULL, NULL);
+    test_error(status, "NDRangeKernel failed.");
+
+    cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
+    status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
+        sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
+    test_error(status, "ReadBuffer failed.");
+
+    // in the non-null case, we expect NONZERO values:
+    if (type == NON_NULL_PATH) {
+        for (i=0; i<NITEMS; i++) {
+            if (host_result[i] == 0) {
+                log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
+                test_success = FAILURE; break;
+            }
+        }
+
+    } else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
+        for (i=0; i<NITEMS; i++) {
+            if (host_result[i] != 0) {
+                log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
+                test_success = FAILURE; break;
+            }
+        }
+    }
+
+    free(host_result);
+
+    if (test_success == SUCCESS) {
+        log_info("\t%s ok.\n", typestr);
+    }
+
+    return test_success;
+}
+
+int test_null_buffer_arg(cl_device_id device, cl_context context,
+    cl_command_queue queue, int num_elements)
+{
+    unsigned int test_success = 0;
+    unsigned int i;
+    cl_int status;
+    cl_program program;
+    cl_kernel kernel;
+
+    // prep kernel:
+    program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
+    test_error(status, "CreateProgramWithSource failed.");
+
+    status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    test_error(status, "BuildProgram failed.");
+
+    kernel = clCreateKernel(program, "test_kernel", &status);
+    test_error(status, "CreateKernel failed.");
+
+    cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
+        NULL, NULL);
+
+    cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
+        NULL, NULL);
+
+    // set the destination buffer normally:
+    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
+    test_error(status, "SetKernelArg failed.");
+
+    //
+    // we test three cases:
+    //
+    // - typical case, used everyday: non-null buffer
+    // - the case of src as &NULL (the spec-compliance test)
+    // - the case of src as NULL (the backwards-compatibility test, Apple only)
+    //
+
+    test_success  = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
+    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
+
+#ifdef __APPLE__
+    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
+#endif
+
+    // clean up:
+    if (dev_src) clReleaseMemObject(dev_src);
+    clReleaseMemObject(dev_dst);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return test_success;
+}
diff --git a/test_conformance/compatibility/test_conformance/api/test_platform.cpp b/test_conformance/compatibility/test_conformance/api/test_platform.cpp
new file mode 100644
index 00000000..f748b248
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_platform.cpp
@@ -0,0 +1,289 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include <string.h>
+
+#define EXTENSION_NAME_BUF_SIZE 4096
+
+#define PRINT_EXTENSION_INFO 0
+
+int test_platform_extensions(cl_device_id deviceID, cl_context context,
+                 cl_command_queue queue, int num_elements)
+{
+    const char * extensions[] = {
+    "cl_khr_byte_addressable_store",
+//    "cl_APPLE_SetMemObjectDestructor",
+    "cl_khr_global_int32_base_atomics",
+    "cl_khr_global_int32_extended_atomics",
+    "cl_khr_local_int32_base_atomics",
+    "cl_khr_local_int32_extended_atomics",
+    "cl_khr_int64_base_atomics",
+    "cl_khr_int64_extended_atomics",
+// need to put in entires for various atomics
+    "cl_khr_3d_image_writes",
+    "cl_khr_fp16",
+    "cl_khr_fp64",
+    NULL
+    };
+
+    bool extensionsSupported[] = {
+    false, //"cl_khr_byte_addressable_store",
+    false, // need to put in entires for various atomics
+    false, // "cl_khr_global_int32_base_atomics",
+    false, // "cl_khr_global_int32_extended_atomics",
+    false, // "cl_khr_local_int32_base_atomics",
+    false, // "cl_khr_local_int32_extended_atomics",
+    false, // "cl_khr_int64_base_atomics",
+    false, // "cl_khr_int64_extended_atomics",
+    false, //"cl_khr_3d_image_writes",
+    false, //"cl_khr_fp16",
+    false, //"cl_khr_fp64",
+    false //NULL
+    };
+
+    int extensionIndex;
+
+    cl_platform_id platformID;
+    cl_int err;
+
+    char platform_extensions[EXTENSION_NAME_BUF_SIZE];
+    char device_extensions[EXTENSION_NAME_BUF_SIZE];
+
+    // Okay, so what we're going to do is just check the device indicated by
+    // deviceID against the platform that includes this device
+
+
+    // pass CL_DEVICE_PLATFORM to clGetDeviceInfo
+    // to get a result of type cl_platform_id
+
+    err = clGetDeviceInfo(deviceID,
+              CL_DEVICE_PLATFORM,
+              sizeof(cl_platform_id),
+              (void *)(&platformID),
+              NULL);
+
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get platformID from device\n");
+    return -1;
+    }
+
+
+    // now we grab the set of extensions specified by the platform
+    err = clGetPlatformInfo(platformID,
+                CL_PLATFORM_EXTENSIONS,
+                sizeof(platform_extensions),
+                (void *)(&platform_extensions[0]),
+                NULL);
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get extension string from platform\n");
+    return -1;
+    }
+
+#if PRINT_EXTENSION_INFO
+    log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
+#endif
+
+    // here we parse the platform extensions, to look for the "important" ones
+    for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
+    {
+    if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
+    {
+        // we found it
+#if PRINT_EXTENSION_INFO
+        log_info("Found \"%s\" in platform extensions\n",
+        extensions[extensionIndex]);
+#endif
+        extensionsSupported[extensionIndex] = true;
+    }
+    }
+
+    // and then we grab the set of extensions specified by the device
+    // (this can be turned into a "loop over all devices in this platform")
+    err = clGetDeviceInfo(deviceID,
+              CL_DEVICE_EXTENSIONS,
+              sizeof(device_extensions),
+              (void *)(&device_extensions[0]),
+              NULL);
+    if(err != CL_SUCCESS)
+    {
+    vlog_error("test_platform_extensions : could not get extension string from device\n");
+    return -1;
+    }
+
+
+#if PRINT_EXTENSION_INFO
+    log_info("Device extensions include \"%s\"\n\n", device_extensions);
+#endif
+
+    for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
+    {
+    if(extensionsSupported[extensionIndex] == false)
+    {
+        continue; // skip this one
+    }
+
+    if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
+    {
+        // device does not support it
+        vlog_error("Platform supports extension \"%s\" but device does not\n",
+               extensions[extensionIndex]);
+        return -1;
+    }
+    }
+    return 0;
+}
+
+int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+  cl_platform_id platforms[16];
+  cl_uint num_platforms;
+  char *string_returned;
+
+  string_returned = (char*)malloc(8192);
+
+  int total_errors = 0;
+  int err = CL_SUCCESS;
+
+
+  err = clGetPlatformIDs(16, platforms, &num_platforms);
+  test_error(err, "clGetPlatformIDs failed");
+
+  if (num_platforms <= 16) {
+    // Try with NULL
+    err = clGetPlatformIDs(num_platforms, platforms, NULL);
+    test_error(err, "clGetPlatformIDs failed with NULL for return size");
+  }
+
+  if (num_platforms < 1) {
+    log_error("Found 0 platforms.\n");
+    return -1;
+  }
+  log_info("Found %d platforms.\n", num_platforms);
+
+
+  for (int p=0; p<(int)num_platforms; p++) {
+    cl_device_id *devices;
+    cl_uint num_devices;
+    size_t size;
+
+
+    log_info("Platform %d (%p):\n", p, platforms[p]);
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
+    log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
+    log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
+    log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
+    log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    memset(string_returned, 0, 8192);
+    err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
+    test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
+    log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
+    if (strlen(string_returned)+1 != size) {
+      log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
+      total_errors++;
+    }
+
+    err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
+    test_error(err, "clGetDeviceIDs size failed.\n");
+    devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
+    memset(devices, 0, sizeof(cl_device_id)*num_devices);
+    err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
+    test_error(err, "clGetDeviceIDs failed.\n");
+
+    log_info("\tPlatform has %d devices.\n", (int)num_devices);
+    for (int d=0; d<(int)num_devices; d++) {
+      size_t returned_size;
+      cl_platform_id returned_platform;
+      cl_context context;
+      cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
+
+      err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
+      test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
+      if (returned_size != sizeof(cl_platform_id)) {
+        log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
+        total_errors++;
+      }
+
+      memset(string_returned, 0, 8192);
+      err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
+      test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
+
+      log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
+
+      log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
+      // Try creating a context for the platform
+      context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
+      test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
+
+      memset(properties, 0, sizeof(cl_context_properties)*3);
+
+      err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
+      test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
+      if (returned_size != sizeof(cl_context_properties)*3) {
+        log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
+                  returned_size, sizeof(cl_context_properties)*3);
+        total_errors++;
+      }
+
+      if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
+        log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
+                  (void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
+        total_errors++;
+      }
+
+      err = clReleaseContext(context);
+      test_error(err, "clReleaseContext failed");
+    }
+    free(devices);
+  }
+
+  free(string_returned);
+
+  return total_errors;
+}
diff --git a/test_conformance/compatibility/test_conformance/api/test_queries.cpp b/test_conformance/compatibility/test_conformance/api/test_queries.cpp
new file mode 100644
index 00000000..8e34f97d
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_queries.cpp
@@ -0,0 +1,635 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include <stdlib.h>
+#include <ctype.h>
+
+int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_platform_id platform;
+    cl_int error;
+    char buffer[ 4098 ];
+    size_t length;
+
+    // Get the platform to use
+    error = clGetPlatformIDs(1, &platform, NULL);
+    test_error( error, "Unable to get platform" );
+
+    // Platform profile should either be FULL_PROFILE or EMBEDDED_PROFILE
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_PROFILE, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get platform profile string" );
+
+    log_info("Returned CL_PLATFORM_PROFILE %s.\n", buffer);
+
+    if( strcmp( buffer, "FULL_PROFILE" ) != 0 && strcmp( buffer, "EMBEDDED_PROFILE" ) != 0 )
+    {
+        log_error( "ERROR: Returned platform profile string is not a valid string by OpenCL 1.2! (Returned: %s)\n", buffer );
+        return -1;
+    }
+    if( strlen( buffer )+1 != length )
+    {
+        log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
+                  (int)strlen( buffer )+1, (int)length );
+        return -1;
+    }
+
+    // Check just length return
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_PROFILE, 0, NULL, &length );
+    test_error( error, "Unable to get platform profile length" );
+    if( strlen( (char *)buffer )+1 != length )
+    {
+        log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
+                  (int)strlen( (char *)buffer )+1, (int)length );
+        return -1;
+    }
+
+
+    // Platform version should fit the regex "OpenCL *[0-9]+\.[0-9]+"
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_VERSION, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get platform version string" );
+
+    log_info("Returned CL_PLATFORM_VERSION %s.\n", buffer);
+
+    if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
+    {
+        log_error( "ERROR: Initial part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p1 = (char *)buffer + strlen( "OpenCL " );
+    while( *p1 == ' ' )
+        p1++;
+    char *p2 = p1;
+    while( isdigit( *p2 ) )
+        p2++;
+    if( *p2 != '.' )
+    {
+        log_error( "ERROR: Numeric part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    char *p3 = p2 + 1;
+    while( isdigit( *p3 ) )
+        p3++;
+    if( *p3 != ' ' )
+    {
+        log_error( "ERROR: space expected after minor version number! (returned: %s)\n", (char *)buffer );
+        return -1;
+    }
+    *p2 = ' '; // Put in a space for atoi below.
+    p2++;
+
+    // make sure it is null terminated
+    for( ; p3 != buffer + length; p3++ )
+        if( *p3 == '\0' )
+            break;
+    if( p3 == buffer + length )
+    {
+        log_error( "ERROR: platform version string is not NUL terminated!\n" );
+        return -1;
+    }
+
+    int major = atoi( p1 );
+    int minor = atoi( p2 );
+    int minor_revision = 2;
+    if( major * 10 + minor < 10 + minor_revision )
+    {
+        log_error( "ERROR: OpenCL profile version returned is less than 1.%d!\n", minor_revision );
+        return -1;
+    }
+
+    // Sanity checks on the returned values
+    if( length != strlen( (char *)buffer ) + 1)
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer )+1, (int)length );
+        return -1;
+    }
+
+    // Check just length
+    error = clGetPlatformInfo(platform,  CL_PLATFORM_VERSION, 0, NULL, &length );
+    test_error( error, "Unable to get platform version length" );
+    if( length != strlen( (char *)buffer )+1 )
+    {
+        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( buffer )+1, (int)length );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error );
+    test_error( error, "Unable to create sampler to test with" );
+
+    cl_uint refCount;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get sampler ref count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
+    test_error( error, "Unable to get sampler context" );
+    if( otherCtx != context )
+    {
+        log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
+        return -1;
+    }
+    if( size != sizeof( otherCtx ) )
+    {
+        log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
+        return -1;
+    }
+
+    cl_addressing_mode mode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
+    test_error( error, "Unable to get sampler addressing mode" );
+    if( mode != CL_ADDRESS_CLAMP )
+    {
+        log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
+        return -1;
+    }
+    if( size != sizeof( mode ) )
+    {
+        log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
+        return -1;
+    }
+
+    cl_filter_mode fmode;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
+    test_error( error, "Unable to get sampler filter mode" );
+    if( fmode != CL_FILTER_LINEAR )
+    {
+        log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
+        return -1;
+    }
+    if( size != sizeof( fmode ) )
+    {
+        log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
+        return -1;
+    }
+
+    cl_int norm;
+    error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
+    test_error( error, "Unable to get sampler normalized flag" );
+    if( norm != CL_TRUE )
+    {
+        log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
+        return -1;
+    }
+    if( size != sizeof( norm ) )
+    {
+        log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
+        return -1;
+    }
+
+    return 0;
+}
+
+#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast )    \
+error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get command queue " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}
+
+int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    cl_command_queue_properties device_props;
+    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
+    log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props);
+
+    clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error );
+    test_error( error, "Unable to create command queue to test with" );
+
+    cl_uint refCount;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
+    test_error( error, "Unable to get command queue reference count" );
+    if( size != sizeof( refCount ) )
+    {
+        log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
+        return -1;
+    }
+
+    cl_context otherCtx;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
+
+    cl_device_id otherDevice;
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
+    test_error(error, "clGetCommandQueue failed.");
+
+    if (size != sizeof(cl_device_id)) {
+        log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
+        return -1;
+    }
+
+    /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+    cl_uint otherDevice_vid, deviceID_vid;
+    error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+    test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+
+    if( otherDevice_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
+        return -1;
+    }
+
+    cl_command_queue_properties props;
+    TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
+
+    return 0;
+}
+
+int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+    cl_context_properties props;
+
+    error = clGetContextInfo( context, CL_CONTEXT_PROPERTIES, sizeof( props ), &props, &size );
+    test_error( error, "Unable to get context props" );
+
+    if (size == 0) {
+        // Valid size
+        return 0;
+    } else if (size == sizeof(cl_context_properties)) {
+        // Data must be NULL
+        if (props != 0) {
+            log_error("ERROR: Returned properties is no NULL.\n");
+            return -1;
+        }
+        // Valid data and size
+        return 0;
+    }
+    // Size was not 0 or 1
+    log_error( "ERROR: Returned size of context props is not valid! (expected 0 or %d, got %d)\n",
+              (int)sizeof(cl_context_properties), (int)size );
+    return -1;
+}
+
+#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
+error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get mem object " name );                            \
+if( val != expected )                                                                \
+{                                                                                    \
+log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val );    \
+return -1;                                                                        \
+}            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}
+
+void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
+{
+    free( data );
+}
+
+// All possible combinations of valid cl_mem_flags.
+static cl_mem_flags all_flags[16] = {
+  0,
+  CL_MEM_READ_WRITE,
+  CL_MEM_READ_ONLY,
+  CL_MEM_WRITE_ONLY,
+  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+  CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
+};
+
+#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast )    \
+error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get device " name );                            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}                \
+log_info( "\tReported device " name " : " type "\n", (cast)val );
+
+#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div )    \
+error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size );        \
+test_error( error, "Unable to get device " name );                            \
+if( size != sizeof( val ) )                \
+{                                        \
+log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
+return -1;    \
+}                \
+log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
+
+int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
+{
+    int error;
+    size_t size;
+
+    cl_uint vendorID;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_VENDOR_ID, vendorID, "vendor ID", "0x%08x", int )
+
+    char extensions[ 10240 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_EXTENSIONS, sizeof( extensions ), &extensions, &size );
+    test_error( error, "Unable to get device extensions" );
+    if( size != strlen( extensions ) + 1 )
+    {
+        log_error( "ERROR: Returned size of device extensions does not validate! (expected %d, got %d)\n", (int)( strlen( extensions ) + 1 ), (int)size );
+        return -1;
+    }
+    log_info( "\tReported device extensions: %s \n", extensions );
+
+    cl_uint preferred;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferred, "preferred vector char width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferred, "preferred vector short width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferred, "preferred vector int width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferred, "preferred vector long width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferred, "preferred vector float width", "%d", int )
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferred, "preferred vector double width", "%d", int )
+
+    // Note that even if cl_khr_fp64, the preferred width for double can be non-zero.  For example, vendors
+    // extensions can support double but may not support cl_khr_fp64, which implies math library support.
+
+    cl_uint baseAddrAlign;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bytes", int )
+
+    cl_uint maxDataAlign;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
+
+    cl_device_mem_cache_type cacheType;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof( cacheType ), &cacheType, &size );
+    test_error( error, "Unable to get device global mem cache type" );
+    if( size != sizeof( cacheType ) )
+    {
+        log_error( "ERROR: Returned size of device global mem cache type does not validate! (expected %d, got %d)\n", (int)sizeof( cacheType ), (int)size );
+        return -1;
+    }
+    const char *cacheTypeName = ( cacheType == CL_NONE ) ? "CL_NONE" : ( cacheType == CL_READ_ONLY_CACHE ) ? "CL_READ_ONLY_CACHE" : ( cacheType == CL_READ_WRITE_CACHE ) ? "CL_READ_WRITE_CACHE" : "<unknown>";
+    log_info( "\tReported device global mem cache type: %s \n", cacheTypeName );
+
+    cl_uint cachelineSize;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cachelineSize, "global mem cacheline size", "%d bytes", int )
+
+    cl_ulong cacheSize;
+    TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cacheSize, "global mem cache size", "%d KB", 1024 )
+
+    cl_ulong memSize;
+    TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, memSize, "global mem size", "%d MB", ( 1024 * 1024 ) )
+
+    cl_device_local_mem_type localMemType;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_TYPE, sizeof( localMemType ), &localMemType, &size );
+    test_error( error, "Unable to get device local mem type" );
+    if( size != sizeof( cacheType ) )
+    {
+        log_error( "ERROR: Returned size of device local mem type does not validate! (expected %d, got %d)\n", (int)sizeof( localMemType ), (int)size );
+        return -1;
+    }
+    const char *localMemTypeName = ( localMemType == CL_LOCAL ) ? "CL_LOCAL" : ( cacheType == CL_GLOBAL ) ? "CL_GLOBAL" : "<unknown>";
+    log_info( "\tReported device local mem type: %s \n", localMemTypeName );
+
+
+    cl_bool errSupport;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ERROR_CORRECTION_SUPPORT, errSupport, "error correction support", "%d", int )
+
+    size_t timerResolution;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, timerResolution, "profiling timer resolution", "%ld nanoseconds", long )
+
+    cl_bool endian;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ENDIAN_LITTLE, endian, "little endian flag", "%d", int )
+
+    cl_bool avail;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_AVAILABLE, avail, "available flag", "%d", int )
+
+    cl_bool compilerAvail;
+    TEST_DEVICE_PARAM( deviceID, CL_DEVICE_COMPILER_AVAILABLE, compilerAvail, "compiler available flag", "%d", int )
+
+    char profile[ 1024 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profile ), &profile, &size );
+    test_error( error, "Unable to get device profile" );
+    if( size != strlen( profile ) + 1 )
+    {
+        log_error( "ERROR: Returned size of device profile does not validate! (expected %d, got %d)\n", (int)( strlen( profile ) + 1 ), (int)size );
+        return -1;
+    }
+    if( strcmp( profile, "FULL_PROFILE" ) != 0 && strcmp( profile, "EMBEDDED_PROFILE" ) != 0 )
+    {
+        log_error( "ERROR: Returned profile of device not FULL or EMBEDDED as required by OpenCL 1.2! (Returned %s)\n", profile );
+        return -1;
+    }
+    log_info( "\tReported device profile: %s \n", profile );
+
+
+    return 0;
+}
+
+
+
+
+static const char *sample_compile_size[2] = {
+    "__kernel void sample_test(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     dst[tid] = src[tid];\n"
+    "\n"
+    "}\n",
+    "__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     dst[tid] = src[tid];\n"
+    "\n"
+    "}\n" };
+
+int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t realSize;
+    size_t kernel_max_workgroup_size;
+    size_t global[] = {64,14,10};
+    size_t local[] = {0,0,0};
+
+    cl_uint max_dimensions;
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
+    test_error(error,  "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+    log_info("Device reported CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = %d.\n", (int)max_dimensions);
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        error = create_single_kernel_helper( context, &program, &kernel, 1, &sample_compile_size[ 0 ], "sample_test" );
+        if( error != 0 )
+            return error;
+
+        error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_max_workgroup_size), &kernel_max_workgroup_size, NULL);
+        test_error( error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+        log_info("The CL_KERNEL_WORK_GROUP_SIZE for the kernel is %d.\n", (int)kernel_max_workgroup_size);
+
+        size_t size[ 3 ];
+        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
+        test_error( error, "Unable to get work group info" );
+
+        if( size[ 0 ] != 0 || size[ 1 ] != 0 || size[ 2 ] != 0 )
+        {
+            log_error( "ERROR: Nonzero compile work group size returned for nonspecified size! (returned %d,%d,%d)\n", (int)size[0], (int)size[1], (int)size[2] );
+            return -1;
+        }
+
+        if( realSize != sizeof( size ) )
+        {
+            log_error( "ERROR: Returned size of compile work group size not valid! (Expected %d, got %d)\n", (int)sizeof( size ), (int)realSize );
+            return -1;
+        }
+
+        // Determine some local dimensions to use for the test.
+        if (max_dimensions == 1) {
+            error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
+            test_error( error, "get_max_common_work_group_size failed");
+            log_info("For global dimension %d, kernel will require local dimension %d.\n", (int)global[0], (int)local[0]);
+        } else if (max_dimensions == 2) {
+            error = get_max_common_2D_work_group_size(context, kernel, global, local);
+            test_error( error, "get_max_common_2D_work_group_size failed");
+            log_info("For global dimension %d x %d, kernel will require local dimension %d x %d.\n", (int)global[0], (int)global[1], (int)local[0], (int)local[1]);
+        } else {
+            error = get_max_common_3D_work_group_size(context, kernel, global, local);
+            test_error( error, "get_max_common_3D_work_group_size failed");
+            log_info("For global dimension %d x %d x %d, kernel will require local dimension %d x %d x %d.\n",
+                     (int)global[0], (int)global[1], (int)global[2], (int)local[0], (int)local[1], (int)local[2]);
+        }
+    }
+
+
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        clMemWrapper in, out;
+        //char source[1024];
+        char *source = (char*)malloc(1024);
+        source[0] = '\0';
+
+        sprintf(source, sample_compile_size[1], local[0], local[1], local[2]);
+
+        error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&source, "sample_test" );
+        if( error != 0 )
+            return error;
+
+        size_t size[ 3 ];
+        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
+        test_error( error, "Unable to get work group info" );
+
+        if( size[ 0 ] != local[0] || size[ 1 ] != local[1] || size[ 2 ] != local[2] )
+        {
+            log_error( "ERROR: Incorrect compile work group size returned for specified size! (returned %d,%d,%d, expected %d,%d,%d)\n",
+                      (int)size[0], (int)size[1], (int)size[2], (int)local[0], (int)local[1], (int)local[2]);
+            return -1;
+        }
+
+        // Verify that the kernel will only execute with that size.
+        in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*global[0], NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+        out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*global[0], NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+        test_error(error, "clSetKernelArg failed");
+        error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        log_info("kernel_required_group_size may report spurious ERRORS in the conformance log.\n");
+
+        local[0]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2] );
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        if (max_dimensions == 1) {
+            free(source);
+            return 0;
+        }
+
+        local[0]--; local[1]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        if (max_dimensions == 2) {
+            return 0;
+            free(source);
+        }
+
+        local[1]--; local[2]++;
+        error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
+        if (error != CL_INVALID_WORK_GROUP_SIZE) {
+            log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
+                      (int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
+            print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
+            return -1;
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+        free(source);
+    }
+
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_retain.cpp b/test_conformance/compatibility/test_conformance/api/test_retain.cpp
new file mode 100644
index 00000000..0fa0ac6b
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_retain.cpp
@@ -0,0 +1,234 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif // !_WIN32
+
+// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
+// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
+// this define.
+//#define VERIFY_AFTER_RELEASE    1
+
+#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
+#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
+
+#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
+log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
+return -1;    }
+
+int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
+{
+    cl_command_queue queue;
+    cl_uint numInstances;
+    int err;
+
+
+    /* Create a test queue */
+    queue = clCreateCommandQueue( context, deviceID, 0, &err );
+    test_error( err, "Unable to create command queue to test with" );
+
+    /* Test the instance count */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* Now release the program */
+    clReleaseCommandQueue( queue );
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    if( err != CL_INVALID_COMMAND_QUEUE )
+    {
+        print_error( err, "Command queue was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
+{
+    cl_command_queue queue;
+    unsigned int numInstances, i;
+    int err;
+
+
+    /* Create a test program */
+    queue = clCreateCommandQueue( context, deviceID, 0, &err );
+    test_error( err, "Unable to create command queue to test with" );
+
+    /* Increment 9 times, which should bring the count to 10 */
+    for( i = 0; i < 9; i++ )
+    {
+        clRetainCommandQueue( queue );
+    }
+
+    /* Test the instance count */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 10 );
+
+    /* Now release 5 times, which should take us to 5 */
+    for( i = 0; i < 5; i++ )
+    {
+        clReleaseCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 5 );
+
+    /* Retain again three times, which should take us to 8 */
+    for( i = 0; i < 3; i++ )
+    {
+        clRetainCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 8 );
+
+    /* Release 7 times, which should take it to 1 */
+    for( i = 0; i < 7; i++ )
+    {
+        clReleaseCommandQueue( queue );
+    }
+
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    test_error( err, "Unable to get queue instance count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* And one last one */
+    clReleaseCommandQueue( queue );
+
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_QUEUE_INSTANCE_COUNT( queue );
+    if( err != CL_INVALID_COMMAND_QUEUE )
+    {
+        print_error( err, "Command queue was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem object;
+    cl_uint numInstances;
+    int err;
+
+
+    /* Create a test object */
+    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
+    test_error( err, "Unable to create buffer to test with" );
+
+    /* Test the instance count */
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* Now release the program */
+    clReleaseMemObject( object );
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_MEM_INSTANCE_COUNT( object );
+    if( err != CL_INVALID_MEM_OBJECT )
+    {
+        print_error( err, "Mem object was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem object;
+    unsigned int numInstances, i;
+    int err;
+
+
+    /* Create a test object */
+    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
+    test_error( err, "Unable to create buffer to test with" );
+
+    /* Increment 9 times, which should bring the count to 10 */
+    for( i = 0; i < 9; i++ )
+    {
+        clRetainMemObject( object );
+    }
+
+    /* Test the instance count */
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 10 );
+
+    /* Now release 5 times, which should take us to 5 */
+    for( i = 0; i < 5; i++ )
+    {
+        clReleaseMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 5 );
+
+    /* Retain again three times, which should take us to 8 */
+    for( i = 0; i < 3; i++ )
+    {
+        clRetainMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 8 );
+
+    /* Release 7 times, which should take it to 1 */
+    for( i = 0; i < 7; i++ )
+    {
+        clReleaseMemObject( object );
+    }
+
+    GET_MEM_INSTANCE_COUNT( object );
+    test_error( err, "Unable to get mem object count" );
+    VERIFY_INSTANCE_COUNT( numInstances, 1 );
+
+    /* And one last one */
+    clReleaseMemObject( object );
+
+#ifdef VERIFY_AFTER_RELEASE
+    /* We're not allowed to get the instance count after the object has been completely released. But that's
+     exactly how we can tell the release worked--by making sure getting the instance count fails! */
+    GET_MEM_INSTANCE_COUNT( object );
+    if( err != CL_INVALID_MEM_OBJECT )
+    {
+        print_error( err, "Mem object was not properly released" );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
diff --git a/test_conformance/compatibility/test_conformance/api/test_retain_program.c b/test_conformance/compatibility/test_conformance/api/test_retain_program.c
new file mode 100644
index 00000000..7fe22f0c
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/api/test_retain_program.c
@@ -0,0 +1,109 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/compat.h"
+
+int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program program;
+    cl_kernel kernel;
+    int error;
+    const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
+
+    /* Create a test program */
+    program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
+    test_error( error, "Unable to create program to test with" );
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build sample program to test with" );
+
+    /* And create a kernel from it */
+    kernel = clCreateKernel( program, "sample_test", &error );
+    test_error( error, "Unable to create kernel" );
+
+    /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
+    clReleaseProgram( program );
+    clReleaseKernel( kernel );
+
+    /* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
+    return 0;
+}
+
+const char *sample_delay_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    for( int i = 0; i < 1000000; i++ ); \n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_kernel kernel;
+    cl_mem streams[2];
+    size_t threads[1] = { 10 }, localThreadSize;
+
+
+    /* We now need an event to test. So we'll execute a kernel to get one */
+    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
+    test_error( error, "Unable to calc local thread size" );
+
+
+    /* Execute the kernel */
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* The kernel should still be executing, but we should still be able to release it. It's not terribly
+       useful, but we should be able to do it, if the internal refcounting is indeed correct. */
+
+    clReleaseMemObject( streams[ 1 ] );
+    clReleaseMemObject( streams[ 0 ] );
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+
+  /* Now make sure we're really finished before we go on. */
+  error = clFinish(queue);
+  test_error( error, "Unable to finish context.");
+
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
new file mode 100644
index 00000000..d6a5fe65
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
@@ -0,0 +1,65 @@
+set(MODULE_NAME COMPATIBILITY_BASIC)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
+    test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
+    test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
+    test_hiloeo.c test_local.c test_pointercast.c
+    test_if.c test_loop.c
+    test_readimage.c test_readimage_int16.c test_readimage_fp32.c
+    test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
+    test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
+    test_multireadimageonefmt.c test_multireadimagemultifmt.c
+    test_imagedim.c
+    test_vloadstore.c
+    test_int2float.c test_float2int.c
+    test_createkernelsinprogram.c
+    test_hostptr.c
+    test_explicit_s2v.cpp
+    test_constant.c
+    test_image_multipass.c
+    test_imagereadwrite.c test_imagereadwrite3d.c
+    test_image_param.c
+    test_imagenpot.c
+    test_image_r8.c
+    test_barrier.c
+    test_basic_parameter_types.c
+    test_arrayreadwrite.c
+    test_arraycopy.c
+    test_imagearraycopy.c
+    test_imagearraycopy3d.c
+    test_imagecopy.c
+    test_imagerandomcopy.c
+    test_arrayimagecopy.c
+    test_arrayimagecopy3d.c
+    test_imagecopy3d.c
+    test_enqueue_map.cpp
+    test_work_item_functions.cpp
+    test_astype.cpp
+    test_async_copy.cpp
+    test_sizeof.c
+    test_vector_creation.cpp
+    test_vec_type_hint.c
+    test_numeric_constants.cpp
+    test_constant_source.cpp
+    test_bufferreadwriterect.c
+    test_async_strided_copy.cpp
+    test_preprocessors.cpp
+    test_kernel_memory_alignment.cpp
+    test_global_work_offsets.cpp
+    test_kernel_call_kernel_function.cpp
+    test_local_kernel_scope.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/imageHelpers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/rounding_mode.c
+    ../../test_common/harness/msvc9.c
+)
+
+include(../../../CMakeCommon.txt)
diff --git a/test_conformance/compatibility/test_conformance/basic/Jamfile b/test_conformance/compatibility/test_conformance/basic/Jamfile
new file mode 100644
index 00000000..02c3fca9
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/Jamfile
@@ -0,0 +1,75 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_basic
+    : main.c
+      test_arraycopy.c
+      test_arrayimagecopy3d.c
+      test_arrayimagecopy.c
+      test_arrayreadwrite.c
+      test_astype.cpp
+      test_async_copy.cpp
+      test_barrier.c
+      test_basic_parameter_types.c
+      test_constant.c
+      test_createkernelsinprogram.c
+      test_enqueue_map.cpp
+      test_explicit_s2v.cpp
+      test_float2int.c
+      test_fpmath_float2.c
+      test_fpmath_float4.c
+      test_fpmath_float.c
+      test_hiloeo.c
+      test_hostptr.c
+      test_if.c
+      test_imagearraycopy3d.c
+      test_imagearraycopy.c
+      test_imagecopy3d.c
+      test_imagecopy.c
+      test_imagedim.c
+      test_image_multipass.c
+      test_imagenpot.c
+      test_image_param.c
+      test_image_r8.c
+      test_imagerandomcopy.c
+      test_imagereadwrite3d.c
+      test_imagereadwrite.c
+      test_int2float.c
+      test_intmath_int2.c
+      test_intmath_int4.c
+      test_intmath_int.c
+      test_intmath_long2.c
+      test_intmath_long4.c
+      test_intmath_long.c
+      test_local.c
+      test_loop.c
+      test_multireadimagemultifmt.c
+      test_multireadimageonefmt.c
+      test_pointercast.c
+      test_readimage3d.c
+      test_readimage3d_fp32.c
+      test_readimage3d_int16.c
+      test_readimage.c
+      test_readimage_fp32.c
+      test_readimage_int16.c
+      test_sizeof.c
+      test_vec_type_hint.c
+      test_vector_creation.cpp
+      test_vloadstore.c
+      test_work_item_functions.cpp
+      test_writeimage.c
+      test_writeimage_fp32.c
+      test_writeimage_int16.c
+      test_numeric_constants.cpp
+      test_kernel_call_kernel_function.cpp
+    ;
+    
+install dist
+    : test_basic
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/basic
+    ;
+ 
\ No newline at end of file
diff --git a/test_conformance/compatibility/test_conformance/basic/Makefile b/test_conformance/compatibility/test_conformance/basic/Makefile
new file mode 100644
index 00000000..eaa45ce2
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/Makefile
@@ -0,0 +1,94 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
+		test_intmath_int.c test_intmath_int2.c test_intmath_int4.c  \
+		test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
+		test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
+		test_if.c test_sizeof.c test_loop.c \
+		test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
+		test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
+		test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
+		test_multireadimageonefmt.c test_multireadimagemultifmt.c \
+		test_imagedim.c \
+		test_vloadstore.c \
+		test_int2float.c test_float2int.c \
+		test_createkernelsinprogram.c \
+		test_hostptr.c \
+		test_explicit_s2v.cpp \
+		test_constant.c \
+		test_constant_source.cpp \
+		test_image_multipass.c \
+		test_imagereadwrite.c test_imagereadwrite3d.c \
+		test_bufferreadwriterect.c \
+		test_image_param.c \
+		test_imagenpot.c \
+		test_image_r8.c \
+		test_barrier.c \
+		test_arrayreadwrite.c \
+		test_arraycopy.c \
+		test_imagearraycopy.c \
+		test_imagearraycopy3d.c \
+		test_imagecopy.c \
+		test_imagerandomcopy.c \
+		test_arrayimagecopy.c \
+		test_arrayimagecopy3d.c\
+		test_imagecopy3d.c \
+		test_enqueue_map.cpp \
+		test_work_item_functions.cpp \
+		test_astype.cpp \
+		test_async_copy.cpp \
+		test_async_strided_copy.cpp \
+		test_numeric_constants.cpp \
+		test_kernel_call_kernel_function.cpp \
+		test_basic_parameter_types.c \
+		test_vector_creation.cpp \
+		test_vec_type_hint.c \
+		test_preprocessors.cpp \
+		test_kernel_memory_alignment.cpp \
+		test_global_work_offsets.cpp \
+		../../test_common/harness/errorHelpers.c \
+		../../test_common/harness/threadTesting.c \
+		../../test_common/harness/testHarness.c \
+		../../test_common/harness/rounding_mode.c \
+		../../test_common/harness/kernelHelpers.c \
+		../../test_common/harness/typeWrappers.cpp \
+		../../test_common/harness/imageHelpers.cpp \
+                ../../test_common/harness/mt19937.c \
+		../../test_common/harness/conversions.c 
+
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_basic
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/main.c b/test_conformance/compatibility/test_conformance/basic/main.c
new file mode 100644
index 00000000..fd6ae953
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/main.c
@@ -0,0 +1,263 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "procs.h"
+
+// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables:
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = false;
+
+basefn    basefn_list[] = {
+    test_hostptr,
+    test_fpmath_float,
+    test_fpmath_float2,
+    test_fpmath_float4,
+    test_intmath_int,
+    test_intmath_int2,
+    test_intmath_int4,
+    test_intmath_long,
+    test_intmath_long2,
+    test_intmath_long4,
+    test_hiloeo,
+    test_if,
+    test_sizeof,
+    test_loop,
+    test_pointer_cast,
+    test_local_arg_def,
+    test_local_kernel_def,
+    test_local_kernel_scope,
+    test_constant,
+    test_constant_source,
+    test_readimage,
+    test_readimage_int16,
+    test_readimage_fp32,
+    test_writeimage,
+    test_writeimage_int16,
+    test_writeimage_fp32,
+    test_multireadimageonefmt,
+
+    test_multireadimagemultifmt,
+    test_image_r8,
+    test_barrier,
+    test_int2float,
+    test_float2int,
+    test_imagereadwrite,
+    test_imagereadwrite3d,
+    test_readimage3d,
+    test_readimage3d_int16,
+    test_readimage3d_fp32,
+    test_bufferreadwriterect,
+    test_arrayreadwrite,
+    test_arraycopy,
+    test_imagearraycopy,
+    test_imagearraycopy3d,
+    test_imagecopy,
+    test_imagecopy3d,
+    test_imagerandomcopy,
+    test_arrayimagecopy,
+    test_arrayimagecopy3d,
+    test_imagenpot,
+
+    test_vload_global,
+    test_vload_local,
+    test_vload_constant,
+    test_vload_private,
+    test_vstore_global,
+    test_vstore_local,
+    test_vstore_private,
+
+    test_createkernelsinprogram,
+    test_imagedim_pow2,
+    test_imagedim_non_pow2,
+    test_image_param,
+    test_image_multipass_integer_coord,
+    test_image_multipass_float_coord,
+    test_explicit_s2v_bool,
+    test_explicit_s2v_char,
+    test_explicit_s2v_uchar,
+    test_explicit_s2v_short,
+    test_explicit_s2v_ushort,
+    test_explicit_s2v_int,
+    test_explicit_s2v_uint,
+    test_explicit_s2v_long,
+    test_explicit_s2v_ulong,
+    test_explicit_s2v_float,
+    test_explicit_s2v_double,
+
+    test_enqueue_map_buffer,
+    test_enqueue_map_image,
+
+    test_work_item_functions,
+
+    test_astype,
+
+    test_async_copy_global_to_local,
+    test_async_copy_local_to_global,
+    test_async_strided_copy_global_to_local,
+    test_async_strided_copy_local_to_global,
+    test_prefetch,
+
+    test_kernel_call_kernel_function,
+    test_host_numeric_constants,
+    test_kernel_numeric_constants,
+    test_kernel_limit_constants,
+    test_kernel_preprocessor_macros,
+
+    test_basic_parameter_types,
+    test_vector_creation,
+    test_vec_type_hint,
+    test_kernel_memory_alignment_local,
+    test_kernel_memory_alignment_global,
+    test_kernel_memory_alignment_constant,
+    test_kernel_memory_alignment_private,
+
+    test_global_work_offsets,
+    test_get_global_offset
+};
+
+const char    *basefn_names[] = {
+    "hostptr",
+    "fpmath_float",
+    "fpmath_float2",
+    "fpmath_float4",
+    "intmath_int",
+    "intmath_int2",
+    "intmath_int4",
+    "intmath_long",
+    "intmath_long2",
+    "intmath_long4",
+    "hiloeo",
+    "if",
+    "sizeof",
+    "loop",
+    "pointer_cast",
+    "local_arg_def",
+    "local_kernel_def",
+    "local_kernel_scope",
+    "constant",
+    "constant_source",
+    "readimage",
+    "readimage_int16",
+    "readimage_fp32",
+    "writeimage",
+    "writeimage_int16",
+    "writeimage_fp32",
+    "mri_one",
+
+    "mri_multiple",
+    "image_r8",
+    "barrier",
+    "int2float",
+    "float2int",
+    "imagereadwrite",
+    "imagereadwrite3d",
+    "readimage3d",
+    "readimage3d_int16",
+    "readimage3d_fp32",
+    "bufferreadwriterect",
+    "arrayreadwrite",
+    "arraycopy",
+    "imagearraycopy",
+    "imagearraycopy3d",
+    "imagecopy",
+    "imagecopy3d",
+    "imagerandomcopy",
+    "arrayimagecopy",
+    "arrayimagecopy3d",
+    "imagenpot",
+
+    "vload_global",
+    "vload_local",
+    "vload_constant",
+    "vload_private",
+    "vstore_global",
+    "vstore_local",
+    "vstore_private",
+
+    "createkernelsinprogram",
+    "imagedim_pow2",
+    "imagedim_non_pow2",
+    "image_param",
+    "image_multipass_integer_coord",
+    "image_multipass_float_coord",
+    "explicit_s2v_bool",
+    "explicit_s2v_char",
+    "explicit_s2v_uchar",
+    "explicit_s2v_short",
+    "explicit_s2v_ushort",
+    "explicit_s2v_int",
+    "explicit_s2v_uint",
+    "explicit_s2v_long",
+    "explicit_s2v_ulong",
+    "explicit_s2v_float",
+    "explicit_s2v_double",
+
+    "enqueue_map_buffer",
+    "enqueue_map_image",
+
+    "work_item_functions",
+
+    "astype",
+
+    "async_copy_global_to_local",
+    "async_copy_local_to_global",
+    "async_strided_copy_global_to_local",
+    "async_strided_copy_local_to_global",
+    "prefetch",
+
+    "kernel_call_kernel_function",
+    "host_numeric_constants",
+    "kernel_numeric_constants",
+    "kernel_limit_constants",
+    "kernel_preprocessor_macros",
+
+    "parameter_types",
+
+    "vector_creation",
+    "vec_type_hint",
+
+    "kernel_memory_alignment_local",
+    "kernel_memory_alignment_global",
+    "kernel_memory_alignment_constant",
+    "kernel_memory_alignment_private",
+
+    "global_work_offsets",
+    "get_global_offset",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+
+int main(int argc, const char *argv[])
+{
+    int err = runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/procs.h b/test_conformance/compatibility/test_conformance/basic/procs.h
new file mode 100644
index 00000000..b5b5ce6f
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/procs.h
@@ -0,0 +1,142 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+extern void     memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+
+extern int      test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
+extern int      test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+extern int      test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int    test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/run_array b/test_conformance/compatibility/test_conformance/basic/run_array
new file mode 100644
index 00000000..07d67892
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/run_array
@@ -0,0 +1,3 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic arrayreadwrite arraycopy bufferreadwriterect $@
diff --git a/test_conformance/compatibility/test_conformance/basic/run_array_image_copy b/test_conformance/compatibility/test_conformance/basic/run_array_image_copy
new file mode 100644
index 00000000..f88ec2a0
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/run_array_image_copy
@@ -0,0 +1,3 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy
diff --git a/test_conformance/compatibility/test_conformance/basic/run_image b/test_conformance/compatibility/test_conformance/basic/run_image
new file mode 100644
index 00000000..9bb5ee1b
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/run_image
@@ -0,0 +1,17 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic  \
+imagecopy imagerandomcopy \
+imagearraycopy imagearraycopy3d \
+image_r8 \
+readimage readimage_int16 readimage_fp32 \
+writeimage writeimage_int16 writeimage_fp32 \
+imagenpot \
+image_param \
+image_multipass_integer_coord \
+readimage3d \
+readimage3d_int16 \
+readimage3d_fp32 \
+imagereadwrite3d \
+imagereadwrite \
+$@
diff --git a/test_conformance/compatibility/test_conformance/basic/run_multi_read_image b/test_conformance/compatibility/test_conformance/basic/run_multi_read_image
new file mode 100644
index 00000000..aa87b1cd
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/run_multi_read_image
@@ -0,0 +1,4 @@
+#!/bin/sh
+cd `dirname $0`
+./test_basic mri_one mri_multiple
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_arraycopy.c b/test_conformance/compatibility/test_conformance/basic/test_arraycopy.c
new file mode 100644
index 00000000..5104c49b
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_arraycopy.c
@@ -0,0 +1,201 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *copy_kernel_code =
+"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = src[tid];\n"
+"}\n";
+
+int
+test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_uint    *input_ptr, *output_ptr;
+    cl_mem                streams[4], results;
+    cl_program          program;
+    cl_kernel            kernel;
+    unsigned            num_elements = 128 * 1024;
+    cl_uint             num_copies = 1;
+    size_t                delta_offset;
+    unsigned            i;
+    cl_int err;
+    MTdata              d;
+
+    int error_count = 0;
+
+    input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+
+    // results
+    results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+/*****************************************************************************************************************************************/
+#pragma mark client backing
+
+    log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
+    // randomize data
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    // client backing
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    delta_offset = num_elements * sizeof(cl_uint) / num_copies;
+    for (i=0; i<num_copies; i++)
+    {
+        size_t    offset = i * delta_offset;
+        err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyBuffer failed");
+    }
+
+    // Try upload from client backing
+    err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+            error_count++;
+        }
+    }
+
+    if (err)
+        log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
+    else
+        log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
+
+
+
+#pragma mark framework backing (no client data)
+
+    log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
+    // randomize data
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    // no backing
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    for (i=0; i<num_copies; i++)
+    {
+        size_t    offset = i * delta_offset;
+
+        // Copy the array up from host ptr
+        err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed");
+
+        err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyBuffer failed");
+    }
+
+    err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+            error_count++;
+            break;
+        }
+    }
+
+    if (err)
+        log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
+    else
+        log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
+
+/*****************************************************************************************************************************************/
+#pragma mark kernel copy test
+
+    log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
+    // randomize data
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+    free_mtdata(d); d= NULL;
+
+    // client backing
+  streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
+  test_error(err, "create_single_kernel_helper failed");
+
+  err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
+  err |= clSetKernelArg(kernel, 1, sizeof results, &results);
+  test_error(err, "clSetKernelArg failed");
+
+  size_t threads[3] = {num_elements, 0, 0};
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+    err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (input_ptr[i] != output_ptr[i])
+        {
+            err = -1;
+      error_count++;
+            break;
+        }
+    }
+
+  // Keep track of multiple errors.
+  if (error_count != 0)
+    err = error_count;
+
+    if (err)
+        log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
+    else
+        log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
+
+
+  clReleaseProgram(program);
+  clReleaseKernel(kernel);
+  clReleaseMemObject(results);
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+
+  free(input_ptr);
+  free(output_ptr);
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c
new file mode 100644
index 00000000..41627198
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c
@@ -0,0 +1,143 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *bufptr, *imgptr;
+  clMemWrapper      buffer, image;
+  int        img_width = 512;
+  int        img_height = 512;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
+  test_error(err, "create_image_2d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  d = init_genrand( gRandomSeed );
+  bufptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     bufptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
+  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  imgptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)bufptr;
+    unsigned char * outchar = (unsigned char*)imgptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(bufptr);
+  free(imgptr);
+
+  if (err)
+    log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
+  }
+
+  if (err)
+    log_error("ARRAY to IMAGE copy test failed\n");
+  else
+    log_info("ARRAY to IMAGE copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c
new file mode 100644
index 00000000..5b7ed5a7
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c
@@ -0,0 +1,144 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *bufptr, *imgptr;
+  clMemWrapper      buffer, image;
+  int        img_width = 128;
+  int        img_height = 128;
+  int        img_depth = 32;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  d = init_genrand( gRandomSeed );
+  bufptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     bufptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
+  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  imgptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)bufptr;
+    unsigned char * outchar = (unsigned char*)imgptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(bufptr);
+  free(imgptr);
+
+  if (err)
+    log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
+  }
+
+  if (err)
+    log_error("ARRAY to IMAGE3D copy test failed\n");
+  else
+    log_info("ARRAY to IMAGE3D copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c b/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c
new file mode 100644
index 00000000..3a58c6aa
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c
@@ -0,0 +1,94 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+int
+test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint                *inptr, *outptr;
+    cl_mem              streams[1];
+    int                 num_tries = 400;
+    num_elements = 1024 * 1024 * 4;
+    int                 i, j, err;
+    MTdata              d;
+
+    inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
+    outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
+
+    // randomize data
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    for (i=0; i<num_tries; i++)
+    {
+        int        offset;
+        int        cb;
+
+        do {
+            offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
+            if (offset > 0 && offset < num_elements)
+                break;
+        } while (1);
+        cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
+        if (cb > (num_elements - offset))
+            cb = num_elements - offset;
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed");
+
+        err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
+        test_error(err, "clEnqueueReadBuffer failed");
+
+        for (j=offset; j<offset+cb; j++)
+        {
+            if (inptr[j] != outptr[j])
+            {
+                log_error("ARRAY read, write test failed\n");
+                err = -1;
+                break;
+            }
+        }
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(d);
+    clReleaseMemObject(streams[0]);
+    free(inptr);
+    free(outptr);
+
+    if (!err)
+        log_info("ARRAY read, write test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_astype.cpp b/test_conformance/compatibility/test_conformance/basic/test_astype.cpp
new file mode 100644
index 00000000..ea8a94f4
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_astype.cpp
@@ -0,0 +1,289 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+
+static const char *astype_kernel_pattern =
+"%s\n"
+"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( src[ tid ] );\n"
+"   dst[ tid ] = tmp;\n"
+"}\n";
+
+static const char *astype_kernel_pattern_V3srcV3dst =
+"%s\n"
+"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
+"   vstore3(tmp,tid,dst);\n"
+"}\n";
+// in the printf, remove the third and fifth argument, each of which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+static const char *astype_kernel_pattern_V3dst =
+"%s\n"
+"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s3 tmp = as_%s3( src[ tid ] );\n"
+"   vstore3(tmp,tid,dst);\n"
+"}\n";
+// in the printf, remove the fifth argument, which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+
+static const char *astype_kernel_pattern_V3src =
+"%s\n"
+"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
+"{\n"
+"    int tid = get_global_id( 0 );\n"
+"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
+"   dst[ tid ] = tmp;\n"
+"}\n";
+// in the printf, remove the third argument, which
+// should be a "3", when copying from the printf for astype_kernel_pattern
+
+
+int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
+                    unsigned int vecSize, unsigned int outVecSize,
+                    int numElements )
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+
+    char programSrc[ 10240 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    size_t typeSize = get_explicit_type_size( inVecType );
+    size_t outTypeSize = get_explicit_type_size(outVecType);
+    char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    MTdata d;
+
+
+
+    // Create program
+    if(outVecSize == 3 && vecSize == 3) {
+        // astype_kernel_pattern_V3srcV3dst
+        sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
+    } else if(outVecSize == 3) {
+        // astype_kernel_pattern_V3dst
+        sprintf( programSrc, astype_kernel_pattern_V3dst,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ),
+                get_explicit_type_name( outVecType ),
+                get_explicit_type_name( outVecType ));
+
+    } else if(vecSize == 3) {
+        // astype_kernel_pattern_V3src
+        sprintf( programSrc, astype_kernel_pattern_V3src,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
+    } else {
+        sprintf( programSrc, astype_kernel_pattern,
+                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
+                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
+    }
+
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+
+    // Create some input values
+    size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
+    char *inBuffer = (char*)malloc( inBufferSize );
+    size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
+    char *outBuffer = (char*)malloc( outBufferSize );
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( inVecType, numElements * vecSize,
+                         d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    // Create I/O streams and set arguments
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create I/O stream" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
+    test_error( error, "Unable to create I/O stream" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+
+
+    // Run the kernel
+    threads[ 0 ] = numElements;
+    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
+    test_error( error, "Unable to get group size to run with" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+
+    // Get the results and compare
+    // The beauty is that astype is supposed to return the bit pattern as a different type, which means
+    // the output should have the exact same bit pattern as the input. No interpretation necessary!
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    char *expected = inBuffer;
+    char *actual = outBuffer;
+    size_t compSize = typeSize*vecSize;
+    if(outTypeSize*outVecSize < compSize) {
+        compSize = outTypeSize*outVecSize;
+    }
+
+    if(outVecSize == 4 && vecSize == 3)
+    {
+        // as_type4(vec3) should compile but produce undefined results??
+        free(inBuffer);
+        free(outBuffer);
+        return 0;
+    }
+
+    if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
+    {
+        // as_typen(vecm) should compile and run but produce
+        // implementation-defined results for m != n
+        // and n*sizeof(type) = sizeof(vecm)
+        free(inBuffer);
+        free(outBuffer);
+        return 0;
+    }
+
+    for( int i = 0; i < numElements; i++ )
+    {
+        if( memcmp( expected, actual, compSize ) != 0 )
+        {
+            char expectedString[ 1024 ], actualString[ 1024 ];
+            log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
+                      (int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
+                      GetDataVectorString( expected, typeSize, vecSize, expectedString ),
+                      GetDataVectorString( actual, typeSize, vecSize, actualString ) );
+            log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
+                      programSrc, (int)threads[0],(int) localThreads[0]);
+            free(inBuffer);
+            free(outBuffer);
+            return 1;
+        }
+        expected += typeSize * vecSize;
+        actual += outTypeSize * outVecSize;
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+    return 0;
+}
+
+int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
+    // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
+    // for us to verify what is "valid". So the only thing we can test are types that match in size independent
+    // of the element count (char -> uchar, etc)
+    ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
+    size_t inTypeSize, outTypeSize;
+    int error = 0;
+
+    for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
+    {
+        inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
+
+        if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
+            continue;
+
+        for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
+        {
+            outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
+            if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
+                continue;
+            }
+
+            if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
+                continue;
+
+            // change this check
+            if( inTypeIdx == outTypeIdx ) {
+                continue;
+            }
+
+            log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
+            fflush( stdout );
+
+            for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+            {
+
+                for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
+                {
+                    if(vecSizes[sizeIdx]*inTypeSize !=
+                       vecSizes[outSizeIdx]*outTypeSize )
+                    {
+                        continue;
+                    }
+                    error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
+
+
+                }
+
+            }
+            if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
+               get_explicit_type_size(vecTypes[outTypeIdx])) {
+                // as_type3(vec4) allowed, as_type4(vec3) not allowed
+                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
+                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
+            }
+
+        }
+    }
+    return error;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp b/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp
new file mode 100644
index 00000000..234ed698
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_async_copy.cpp
@@ -0,0 +1,276 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+
+static const char *async_global_to_local_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"     localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+"    event_t event;\n"
+"    event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );\n"
+// Wait for the copy to complete, then verify by manually copying to the dest
+"    wait_group_events( 1, &event );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+static const char *async_local_to_global_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
+// Do this to verify all kernels are done copying to the local buffer before we try the copy
+"    barrier( CLK_LOCAL_MEM_FENCE );\n"
+"    event_t event;\n"
+"    event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, 0 );\n"
+"    wait_group_events( 1, &event );\n"
+"}\n" ;
+
+
+static const char *prefetch_kernel =
+"%s\n" // optional pragma string
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
+"{\n"
+" // Ignore this: %s%s%s\n"
+" int i;\n"
+" prefetch( (const __global %s*)(src+copiesPerWorkItem*get_global_id(0)), copiesPerWorkItem);\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"  dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+
+
+int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode,
+              ExplicitType vecType, int vecSize
+              )
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    void *inBuffer, *outBuffer;
+    MTdata d;
+    char vecNameString[64]; vecNameString[0] = 0;
+    if (vecSize == 1)
+        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
+    else
+        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
+
+
+    size_t elementSize = get_explicit_type_size(vecType)*vecSize;
+    log_info("Testing %s\n", vecNameString);
+
+    cl_long max_local_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
+
+    unsigned int num_of_compute_devices;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
+
+    char programSource[4096]; programSource[0]=0;
+    char *programPtr;
+
+    sprintf(programSource, kernelCode,
+            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+            vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    //log_info("program: %s\n", programSource);
+    programPtr = programSource;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    size_t max_workgroup_size;
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
+
+    size_t max_local_workgroup_size[3];
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (max_workgroup_size > max_local_workgroup_size[0])
+        max_workgroup_size = max_local_workgroup_size[0];
+
+    size_t numberOfCopiesPerWorkitem = 13;
+    size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
+    size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
+
+    // Calculation can return 0 on embedded devices due to 1KB local mem limit
+    if(maxLocalWorkgroupSize == 0)
+    {
+        maxLocalWorkgroupSize = 1;
+    }
+
+    size_t localWorkgroupSize = maxLocalWorkgroupSize;
+    if (maxLocalWorkgroupSize > max_workgroup_size)
+        localWorkgroupSize = max_workgroup_size;
+
+    size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
+    size_t numberOfLocalWorkgroups = 1111;
+    size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
+    size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
+
+    inBuffer = (void*)malloc(globalBufferSize);
+    outBuffer = (void*)malloc(globalBufferSize);
+    memset(outBuffer, 0, globalBufferSize);
+
+    cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
+    copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
+    copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
+
+    log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n",
+             (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt);
+
+    threads[0] = globalWorkgroupSize;
+    localThreads[0] = localWorkgroupSize;
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create input buffer" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Enqueue
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to queue kernel" );
+
+    // Read
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify
+    if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
+    {
+        log_error( "ERROR: Results of copy did not validate!\n" );
+        unsigned char * inchar = (unsigned char*)inBuffer;
+        unsigned char * outchar = (unsigned char*)outBuffer;
+        int failuresPrinted = 0;
+        for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
+            int failed = 0;
+            for (int j=0; j<(int)elementSize; j++)
+                if (inchar[i+j] != outchar[i+j])
+                    failed = 1;
+            char values[4096];
+            values[0] = 0;
+            if (failed) {
+                sprintf(values + strlen( values), "%d -> [", i);
+                for (int j=0; j<(int)elementSize; j++)
+                    sprintf(values + strlen( values), "%2x ", inchar[i+j]);
+                sprintf(values + strlen(values), "] != [");
+                for (int j=0; j<(int)elementSize; j++)
+                    sprintf(values + strlen( values), "%2x ", outchar[i+j]);
+                sprintf(values + strlen(values), "]");
+                log_error("%s\n", values);
+                failuresPrinted++;
+            }
+            if (failuresPrinted > 5) {
+                log_error("Not printing further failures...\n");
+                break;
+            }
+        }
+        return -1;
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+
+    return 0;
+}
+
+int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int size, typeIndex;
+
+    int errors = 0;
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        for( size = 0; vecSizes[ size ] != 0; size++ )
+        {
+            if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) {
+                errors++;
+            }
+        }
+    }
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+
+int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
+}
+
+int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel );
+}
+
+int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_copy_all_types( deviceID, context, queue, prefetch_kernel );
+}
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp
new file mode 100644
index 00000000..6db06398
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_async_strided_copy.cpp
@@ -0,0 +1,267 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+
+static const char *async_strided_global_to_local_kernel =
+"%s\n" // optional pragma string
+"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" event_t event;\n"
+" event = async_work_group_strided_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
+// Wait for the copy to complete, then verify by manually copying to the dest
+" wait_group_events( 1, &event );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
+"}\n" ;
+
+static const char *async_strided_local_to_global_kernel =
+"%s\n" // optional pragma string
+"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"{\n"
+" int i;\n"
+// Zero the local storage first
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
+// Do this to verify all kernels are done zeroing the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" for(i=0; i<copiesPerWorkItem; i++)\n"
+"   localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ];\n"
+// Do this to verify all kernels are done copying to the local buffer before we try the copy
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+" event_t event;\n"
+" event = async_work_group_strided_copy((__global %s*)(dst+copiesPerWorkgroup*stride*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
+" wait_group_events( 1, &event );\n"
+"}\n" ;
+
+
+int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
+{
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    size_t threads[ 1 ], localThreads[ 1 ];
+    void *inBuffer, *outBuffer;
+    MTdata d;
+    char vecNameString[64]; vecNameString[0] = 0;
+
+    if (vecSize == 1)
+        sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
+    else
+        sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
+
+
+    size_t elementSize = get_explicit_type_size(vecType)*vecSize;
+    log_info("Testing %s\n", vecNameString);
+
+    cl_long max_local_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
+
+    unsigned int num_of_compute_devices;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
+    test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
+
+    char programSource[4096]; programSource[0]=0;
+    char *programPtr;
+
+    sprintf(programSource, kernelCode,
+        vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+        "",
+        vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+    //log_info("program: %s\n", programSource);
+    programPtr = programSource;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    size_t max_workgroup_size;
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+    test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
+
+    size_t max_local_workgroup_size[3];
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+  // Pick the minimum of the device and the kernel
+    if (max_workgroup_size > max_local_workgroup_size[0])
+        max_workgroup_size = max_local_workgroup_size[0];
+
+    cl_ulong max_global_mem_size;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+
+    cl_bool unified_mem;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL);
+    test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
+
+    int number_of_global_mem_buffers = (unified_mem) ? 4 : 2;
+
+    size_t numberOfCopiesPerWorkitem = 3;
+    size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
+    size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
+
+    size_t localWorkgroupSize = maxLocalWorkgroupSize;
+    if (maxLocalWorkgroupSize > max_workgroup_size)
+        localWorkgroupSize = max_workgroup_size;
+
+    size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
+    size_t numberOfLocalWorkgroups = 579;//1111;
+
+    // Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed
+    // by the allocated buffer. This is done to avoid resource  errors resulting from address space fragmentation.
+    size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride);
+    if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit;
+
+    size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
+    size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
+
+    inBuffer = (void*)malloc(globalBufferSize);
+    outBuffer = (void*)malloc(globalBufferSize);
+    memset(outBuffer, 0, globalBufferSize);
+
+    cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
+    copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
+    copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
+
+    log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n",
+                (int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt);
+
+    threads[0] = globalWorkgroupSize;
+    localThreads[0] = localWorkgroupSize;
+
+    d = init_genrand( gRandomSeed );
+    generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
+    free_mtdata(d); d = NULL;
+
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+    test_error( error, "Unable to create input buffer" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Enqueue
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to queue kernel" );
+
+    // Read
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify
+    for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
+    {
+        if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, elementSize) != 0 )
+        {
+            unsigned char * inchar = (unsigned char*)inBuffer + i;
+            unsigned char * outchar = (unsigned char*)outBuffer + i;
+            char values[4096];
+            values[0] = 0;
+
+            log_error( "ERROR: Results of copy did not validate!\n" );
+            sprintf(values + strlen( values), "%d -> [", i);
+            for (int j=0; j<(int)elementSize; j++)
+                sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
+            sprintf(values + strlen(values), "] != [");
+            for (int j=0; j<(int)elementSize; j++)
+                sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+
+               return -1;
+        }
+    }
+
+    free(inBuffer);
+    free(outBuffer);
+
+    return 0;
+}
+
+int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
+    unsigned int size, typeIndex, stride;
+
+    int errors = 0;
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        for( size = 0; vecSizes[ size ] != 0; size++ )
+        {
+            for( stride = 0; strideSizes[ stride ] != 0; stride++)
+            {
+                if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] ))
+                {
+                    errors++;
+                }
+            }
+        }
+    }
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+
+
+int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
+}
+
+int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
+}
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_barrier.c b/test_conformance/compatibility/test_conformance/basic/test_barrier.c
new file mode 100644
index 00000000..87d106d3
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_barrier.c
@@ -0,0 +1,158 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *barrier_kernel_code =
+"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
+"{\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"     \n"
+"     // updated to work for any workgroup size \n"
+"    for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
+"    {\n"
+"        barrier(CLK_GLOBAL_MEM_FENCE);\n"
+"        if (tid + i < lsize)\n"
+"            tmp_sum[tid] += tmp_sum[tid + i];\n"
+"         lsize = i; \n"
+"    }\n"
+"\n"
+"     //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
+"    if (tid == 0)\n"
+"        *sum = tmp_sum[0];\n"
+"}\n";
+
+
+static int
+verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
+{
+  int            r = 0;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+        r += inptr[i];
+  }
+
+    if (r != outptr[0])
+    {
+        log_error("BARRIER test failed\n");
+        return -1;
+    }
+
+  log_info("BARRIER test passed\n");
+  return 0;
+}
+
+
+int
+test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[3];
+    cl_int            *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3];
+    size_t    local_threads[3];
+    int                err;
+    int                i;
+    size_t max_local_workgroup_size[3];
+    size_t max_threadgroup_size = 0;
+    MTdata d;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
+    test_error(err, "Failed to build kernel/program.");
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
+                                 sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
+    test_error(err, "clGetKernelWorkgroupInfo failed.");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (max_threadgroup_size > max_local_workgroup_size[0])
+        max_threadgroup_size = max_local_workgroup_size[0];
+
+    // work group size must divide evenly into the global size
+    while( num_elements % max_threadgroup_size )
+        max_threadgroup_size--;
+
+    input_ptr = (int*)malloc(sizeof(int) * num_elements);
+    output_ptr = (int*)malloc(sizeof(int));
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
+    free_mtdata(d);  d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed.");
+
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+    err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
+    test_error(err, "clSetKernelArg failed.");
+
+    global_threads[0] = max_threadgroup_size;
+    local_threads[0] = max_threadgroup_size;
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
+    test_error(err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed.");
+
+    err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_basic_parameter_types.c b/test_conformance/compatibility/test_conformance/basic/test_basic_parameter_types.c
new file mode 100644
index 00000000..79311a2a
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_basic_parameter_types.c
@@ -0,0 +1,302 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+const char *kernel_code =
+"__kernel void test_kernel(\n"
+"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(c);\n"
+"  result[1] = %s(uc);\n"
+"  result[2] = %s(s);\n"
+"  result[3] = %s(us);\n"
+"  result[4] = %s(i);\n"
+"  result[5] = %s(ui);\n"
+"  result[6] = f;\n"
+"}\n";
+
+const char *kernel_code_long =
+"__kernel void test_kernel_long(\n"
+"long%s l, ulong%s ul,\n"
+"__global float%s *result)\n"
+"{\n"
+"  result[0] = %s(l);\n"
+"  result[1] = %s(ul);\n"
+"}\n";
+
+int
+test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+     clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[2*16];
+  int count, index;
+  const char* types[] = { "long", "ulong" };
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
+
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
+
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
+
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
+
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
+    }
+
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+
+    // Build the kernel
+    sprintf(kernel_string, kernel_code_long,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 2; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 2; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)l[index]; break;
+          case 1: expected = (float)ul[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  return total_errors;
+}
+
+int
+test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+     clMemWrapper results;
+  int error;
+  size_t global[3] = {1, 1, 1};
+  float results_back[7*16];
+  int count, index;
+  const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
+  char kernel_string[8192];
+  int sizes[] = {1, 2, 4, 8, 16};
+  const char* size_strings[] = {"", "2", "4", "8", "16"};
+  float expected;
+  int total_errors = 0;
+  int size_to_test;
+  char *ptr;
+  char convert_string[1024];
+  size_t max_parameter_size;
+
+  // We don't really care about the contents since we're just testing that the types work.
+  cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+  cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
+
+  // Calculate how large our paramter size is to the kernel
+  size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
+  sizeof(cl_short) +sizeof(cl_ushort) +
+  sizeof(cl_int) +sizeof(cl_uint) +
+  sizeof(cl_float);
+
+  // Init our strings.
+  kernel_string[0] = '\0';
+  convert_string[0] = '\0';
+
+  // Get the maximum parameter size allowed
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
+    test_error( error, "Unable to get max parameter size from device" );
+
+  // Create the results buffer
+  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
+  test_error(error, "clCreateBuffer failed");
+
+  // Go over all the vector sizes
+  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
+    if (total_parameter_size > max_parameter_size) {
+      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
+               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
+      continue;
+    }
+
+    log_info("Testing vector size %d\n", sizes[size_to_test]);
+
+    // If size is > 1, then we need a explicit convert call.
+    if (sizes[size_to_test] > 1) {
+      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
+    } else {
+      sprintf(convert_string, " ");
+    }
+
+    // Build the kernel
+    sprintf(kernel_string, kernel_code,
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
+            size_strings[size_to_test], size_strings[size_to_test],
+            convert_string, convert_string, convert_string,
+            convert_string, convert_string, convert_string
+    );
+
+    ptr = kernel_string;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
+    test_error(error, "create single kernel failed");
+
+    // Set the arguments
+    for (count = 0; count < 7; count++) {
+      switch (count) {
+        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
+        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
+        case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
+        case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
+        case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
+        case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
+        case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
+        default: log_error("Test error"); break;
+      }
+      if (error)
+        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
+      test_error(error, "clSetKernelArgs failed");
+    }
+    error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
+    test_error(error, "clSetKernelArgs failed");
+
+    // Execute
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    // Read back the results
+    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Verify the results
+    for (count = 0; count < 7; count++) {
+      for (index=0; index < sizes[size_to_test]; index++) {
+        switch (count) {
+          case 0: expected = (float)c[index]; break;
+          case 1: expected = (float)uc[index]; break;
+          case 2: expected = (float)s[index]; break;
+          case 3: expected = (float)us[index]; break;
+          case 4: expected = (float)i[index]; break;
+          case 5: expected = (float)ui[index]; break;
+          case 6: expected = (float)f[index]; break;
+          default: log_error("Test error"); break;
+        }
+
+        if (results_back[count*sizes[size_to_test]+index] != expected) {
+          total_errors++;
+          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
+                    index, results_back[count*sizes[size_to_test]+index], expected);
+        }
+      }
+    }
+  }
+
+  if (gHasLong) {
+    log_info("Testing long types...\n");
+    total_errors += test_basic_parameter_types_long( device, context, queue, num_elements );
+  }
+  else {
+    log_info("Longs unsupported, skipping.");
+  }
+
+  return total_errors;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_bufferreadwriterect.c b/test_conformance/compatibility/test_conformance/basic/test_bufferreadwriterect.c
new file mode 100644
index 00000000..e72ae708
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_bufferreadwriterect.c
@@ -0,0 +1,529 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define CL_EXIT_ERROR(cmd,format,...)                \
+{                                \
+if ((cmd) != CL_SUCCESS) {                    \
+log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);    \
+log_error(format,## __VA_ARGS__ );            \
+log_error("\n");                        \
+/*abort();*/                \
+}                                \
+}
+
+typedef unsigned char BufferType;
+
+// Globals for test
+cl_command_queue queue;
+
+// Width and height of each pair of images.
+enum { TotalImages = 8 };
+size_t width  [TotalImages];
+size_t height [TotalImages];
+size_t depth  [TotalImages];
+
+// cl buffer and host buffer.
+cl_mem buffer [TotalImages];
+BufferType* verify[TotalImages];
+BufferType* backing[TotalImages];
+
+// Temporary buffer used for read and write operations.
+BufferType* tmp_buffer;
+size_t tmp_buffer_size;
+
+size_t num_tries   = 50; // Number of randomly selected operations to perform.
+size_t alloc_scale = 2;   // Scale term applied buffer allocation size.
+MTdata mt;
+
+// Initialize a buffer in host memory containing random values of the specified size.
+static void initialize_image(BufferType* ptr, size_t w, size_t h, size_t d, MTdata mt)
+{
+    enum { ElementSize = sizeof(BufferType)/sizeof(unsigned char) };
+
+    unsigned char* buf = (unsigned char*)ptr;
+    size_t size = w*h*d*ElementSize;
+
+    for (size_t i = 0; i != size; i++) {
+        buf[i] = (unsigned char)(genrand_int32(mt) % 0xff);
+    }
+}
+
+// This function prints the contents of a buffer to standard error.
+void print_buffer(BufferType* buf, size_t w, size_t h, size_t d) {
+    log_error("Size = %lux%lux%lu (%lu total)\n",w,h,d,w*h*d);
+    for (unsigned k=0; k!=d;++k) {
+        log_error("Slice: %u\n",k);
+        for (unsigned j=0; j!=h;++j) {
+            for (unsigned i=0;i!=w;++i) {
+                log_error("%02x",buf[k*(w*h)+j*w+i]);
+            }
+            log_error("\n");
+        }
+        log_error("\n");
+    }
+}
+
+// Returns true if the two specified regions overlap.
+bool check_overlap(const size_t src_offset[3], const size_t dst_offset[3], const size_t region[3]) {
+
+    const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]};
+    const size_t src_max[] = {src_offset[0]+region[0], src_offset[1]+region[1], src_offset[2]+region[2]};
+
+    const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]};
+    const size_t dst_max[] = {dst_offset[0]+region[0], dst_offset[1]+region[1], dst_offset[2]+region[2]};
+
+    // Check for overlap, using the span space formulation.
+    bool overlap = true;
+    unsigned i;
+    for (i=0; i != 3; ++i) {
+        overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]);
+    }
+
+    return overlap;
+}
+
+// This function invokes the CopyBufferRect CL command and then mirrors the operation on the host side verify buffers.
+int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    // Copy between cl buffers.
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    cl_int err;
+    if (check_overlap(soffset,doffset,sregion)) {
+        log_info( "Copy overlap reported, skipping copy buffer rect\n" );
+        return CL_SUCCESS;
+    } else {
+        if ((err = clEnqueueCopyBufferRect(queue,
+                                         buffer[src],buffer[dst],
+                                         soffset, doffset,
+                                         sregion,/*dregion,*/
+                                         width[src], src_slice_pitch,
+                                         width[dst], dst_slice_pitch,
+                                         0, NULL, NULL)) != CL_SUCCESS)
+        {
+            CL_EXIT_ERROR(err, "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+        }
+    }
+
+    // Copy between host buffers.
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        size_t dz = sz;
+        size_t dy = sy;
+        size_t dx = sx;
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
+
+        verify[dst][d_idx] = verify[src][s_idx];
+    }
+
+    return 0;
+}
+
+// This function compares the destination region in the buffer pointed
+// to by device, to the source region of the specified verify buffer.
+int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
+
+    // Copy between host buffers.
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+sz)*dslice + (doffset[1]+sy)*dpitch + doffset[0]+sx;
+
+        if (device[d_idx] != verify[src][s_idx]) {
+            log_error("Verify failed on comparsion %lu: coordinate (%lu, %lu, %lu) of region\n",i,sx,sy,sz);
+            log_error("0x%02x != 0x%02x\n", device[d_idx], verify[src][s_idx]);
+#if 0
+            // Uncomment this section to print buffers.
+            log_error("Device (copy): [%lu]\n",dst);
+            print_buffer(device,width[dst],height[dst],depth[dst]);
+            log_error("\n");
+            log_error("Verify: [%lu]\n",src);
+            print_buffer(verify[src],width[src],height[src],depth[src]);
+            log_error("\n");
+            abort();
+#endif
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+// This function invokes ReadBufferRect to read a region from the
+// specified source buffer into a temporary destination buffer. The
+// contents of the temporary buffer are then compared to the source
+// region of the corresponding verify buffer.
+int read_verify_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    // Clear the temporary destination host buffer.
+    memset(tmp_buffer, 0xff, tmp_buffer_size);
+
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    CL_EXIT_ERROR(clEnqueueReadBufferRect(queue,
+                                          buffer[src],
+                                          CL_TRUE,
+                                          soffset,doffset,
+                                          sregion,
+                                          width[src], src_slice_pitch,
+                                          width[dst], dst_slice_pitch,
+                                          tmp_buffer,
+                                          0, NULL, NULL), "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+
+    return verify_region(tmp_buffer,src,soffset,sregion,dst,doffset);
+}
+
+// This function performs the same verification check as
+// read_verify_region, except a MapBuffer command is used to access the
+// device buffer data instead of a ReadBufferRect, and the whole
+// buffer is checked.
+int map_verify_region(size_t src) {
+
+    size_t size_bytes = width[src]*height[src]*depth[src]*sizeof(BufferType);
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    cl_int err;
+    BufferType* mapped = (BufferType*)clEnqueueMapBuffer(queue,buffer[src],CL_TRUE,CL_MAP_READ,0,size_bytes,0,NULL,NULL,&err);
+    CL_EXIT_ERROR(err, "clEnqueueMapBuffer failed for buffer %u",(unsigned)src);
+
+    size_t soffset[] = { 0, 0, 0 };
+    size_t sregion[] = { width[src], height[src], depth[src] };
+
+    int ret = verify_region(mapped,src,soffset,sregion,src,soffset);
+
+    CL_EXIT_ERROR(clEnqueueUnmapMemObject(queue,buffer[src],mapped,0,NULL,NULL),
+                  "clEnqueueUnmapMemObject failed for buffer %u",(unsigned)src);
+
+    return ret;
+}
+
+// This function generates a new temporary buffer and then writes a
+// region of it to a region in the specified destination buffer.
+int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
+
+    initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
+    // memset(tmp_buffer, 0xf0, tmp_buffer_size);
+
+    size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
+    size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
+
+    // Copy the source region of the cl buffer, to the destination region of the temporary buffer.
+    CL_EXIT_ERROR(clEnqueueWriteBufferRect(queue,
+                                           buffer[dst],
+                                           CL_TRUE,
+                                           doffset,soffset,
+    /*sregion,*/dregion,
+                                           width[dst], dst_slice_pitch,
+                                           width[src], src_slice_pitch,
+                                           tmp_buffer,
+                                           0, NULL, NULL), "clEnqueueWriteBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
+
+    // Copy from the temporary buffer to the host buffer.
+    size_t spitch = width[src];
+    size_t sslice = width[src]*height[src];
+    size_t dpitch = width[dst];
+    size_t dslice = width[dst]*height[dst];
+
+    size_t total = sregion[0] * sregion[1] * sregion[2];
+    for (size_t i = 0; i != total; ++i) {
+
+        // Compute the coordinates of the element within the source and destination regions.
+        size_t rslice = sregion[0]*sregion[1];
+        size_t sz = i / rslice;
+        size_t sy = (i % rslice) / sregion[0];
+        size_t sx = (i % rslice) % sregion[0];
+
+        size_t dz = sz;
+        size_t dy = sy;
+        size_t dx = sx;
+
+        // Compute the offset in bytes of the source and destination.
+        size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
+        size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
+
+        verify[dst][d_idx] = tmp_buffer[s_idx];
+    }
+    return 0;
+}
+
+void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
+{
+    free( data );
+}
+
+// This is the main test function for the conformance test.
+int
+test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements)
+{
+    queue = queue_;
+    cl_int err;
+
+    // Initialize the random number generator.
+    mt = init_genrand( gRandomSeed );
+
+    // Compute a maximum buffer size based on the number of test images and the device maximum.
+    cl_ulong max_mem_alloc_size = 0;
+    CL_EXIT_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, NULL),"Could not get device info");
+    log_info("CL_DEVICE_MAX_MEM_ALLOC_SIZE = %llu bytes.\n", max_mem_alloc_size);
+
+    // Confirm that the maximum allocation size is not zero.
+    if (max_mem_alloc_size == 0) {
+        log_error("Error: CL_DEVICE_MAX_MEM_ALLOC_SIZE is zero bytes\n");
+        return -1;
+    }
+
+    // Guess at a reasonable maximum dimension.
+    size_t max_mem_alloc_dim = (size_t)cbrt((double)(max_mem_alloc_size/sizeof(BufferType)))/alloc_scale;
+    if (max_mem_alloc_dim == 0) {
+        max_mem_alloc_dim = max_mem_alloc_size;
+    }
+
+    log_info("Using maximum dimension      = %lu.\n", max_mem_alloc_dim);
+
+    // Create pairs of cl buffers and host buffers on which operations will be mirrored.
+    log_info("Creating %u pairs of random sized host and cl buffers.\n", TotalImages);
+
+    size_t max_size = 0;
+    size_t total_bytes = 0;
+
+    for (unsigned i=0; i != TotalImages; ++i) {
+
+        // Determine a width and height for this buffer.
+        size_t size_bytes;
+        size_t tries = 0;
+        size_t max_tries = 1048576;
+        do {
+            width[i]   = get_random_size_t(1, max_mem_alloc_dim, mt);
+            height[i]  = get_random_size_t(1, max_mem_alloc_dim, mt);
+            depth[i]   = get_random_size_t(1, max_mem_alloc_dim, mt);
+            ++tries;
+        } while ((tries < max_tries) && (size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType)) > max_mem_alloc_size);
+
+        // Check to see if adequately sized buffers were found.
+        if (tries >= max_tries) {
+            log_error("Error: Could not find random buffer sized less than %llu bytes in %lu tries.\n",
+                      max_mem_alloc_size, max_tries);
+            return -1;
+        }
+
+        // Keep track of the dimensions of the largest buffer.
+        max_size = (size_bytes > max_size) ? size_bytes : max_size;
+        total_bytes += size_bytes;
+
+        log_info("Buffer[%u] is (%lu,%lu,%lu) = %lu MB (truncated)\n",i,width[i],height[i],depth[i],(size_bytes)/1048576);
+    }
+
+    log_info( "Total size: %lu MB (truncated)\n", total_bytes/1048576 );
+
+    // Allocate a temporary buffer for read and write operations.
+    tmp_buffer_size  = max_size;
+    tmp_buffer = (BufferType*)malloc(tmp_buffer_size);
+
+    // Initialize cl buffers
+    log_info( "Initializing buffers\n" );
+    for (unsigned i=0; i != TotalImages; ++i) {
+
+        size_t size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType);
+
+        // Allocate a host copy of the buffer for verification.
+        verify[i] = (BufferType*)malloc(size_bytes);
+        CL_EXIT_ERROR(verify[i] ? CL_SUCCESS : -1, "malloc of host buffer failed for buffer %u", i);
+
+        // Allocate the buffer in host memory.
+        backing[i] = (BufferType*)malloc(size_bytes);
+        CL_EXIT_ERROR(backing[i] ? CL_SUCCESS : -1, "malloc of backing buffer failed for buffer %u", i);
+
+        // Generate a random buffer.
+        log_info( "Initializing buffer %u\n", i );
+        initialize_image(verify[i], width[i], height[i], depth[i], mt);
+
+        // Copy the image into a buffer which will passed to CL.
+        memcpy(backing[i], verify[i], size_bytes);
+
+        // Create the CL buffer.
+        buffer[i] = clCreateBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size_bytes, backing[i], &err);
+        CL_EXIT_ERROR(err,"clCreateBuffer failed for buffer %u", i);
+
+        // Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
+        err = clSetMemObjectDestructorCallback( buffer[i], mem_obj_destructor_callback, backing[i] );
+        CL_EXIT_ERROR(err, "Unable to set mem object destructor callback" );
+    }
+
+    // Main test loop, run num_tries times.
+    log_info( "Executing %u test operations selected at random.\n", (unsigned)num_tries );
+    for (size_t iter = 0; iter < num_tries; ++iter) {
+
+        // Determine a source and a destination.
+        size_t src = get_random_size_t(0,TotalImages,mt);
+        size_t dst = get_random_size_t(0,TotalImages,mt);
+
+        // Determine the minimum dimensions.
+        size_t min_width = width[src] < width[dst] ? width[src] : width[dst];
+        size_t min_height = height[src] < height[dst] ? height[src] : height[dst];
+        size_t min_depth = depth[src] < depth[dst] ? depth[src] : depth[dst];
+
+        // Generate a random source rectangle within the minimum dimensions.
+        size_t mx = get_random_size_t(0, min_width-1, mt);
+        size_t my = get_random_size_t(0, min_height-1, mt);
+        size_t mz = get_random_size_t(0, min_depth-1, mt);
+
+        size_t sw = get_random_size_t(1, (min_width - mx), mt);
+        size_t sh = get_random_size_t(1, (min_height - my), mt);
+        size_t sd = get_random_size_t(1, (min_depth - mz), mt);
+
+        size_t sx = get_random_size_t(0, width[src]-sw, mt);
+        size_t sy = get_random_size_t(0, height[src]-sh, mt);
+        size_t sz = get_random_size_t(0, depth[src]-sd, mt);
+
+        size_t soffset[] = { sx, sy, sz };
+        size_t sregion[] = { sw, sh, sd };
+
+        // Generate a destination rectangle of the same size.
+        size_t dw = sw;
+        size_t dh = sh;
+        size_t dd = sd;
+
+        // Generate a random destination offset within the buffer.
+        size_t dx = get_random_size_t(0, (width[dst] - dw), mt);
+        size_t dy = get_random_size_t(0, (height[dst] - dh), mt);
+        size_t dz = get_random_size_t(0, (depth[dst] - dd), mt);
+        size_t doffset[] = { dx, dy, dz };
+        size_t dregion[] = { dw, dh, dd };
+
+        // Execute one of three operations:
+        // - Copy: Copies between src and dst within each set of host, buffer, and images.
+        // - Read & verify: Reads src region from buffer and image, and compares to host.
+        // - Write: Generates new buffer with src dimensions, and writes to cl buffer and image.
+
+        enum { TotalOperations = 3 };
+        size_t operation = get_random_size_t(0,TotalOperations,mt);
+
+        switch (operation) {
+            case 0:
+                log_info("%lu Copy %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = copy_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+            case 1:
+                log_info("%lu Read %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = read_verify_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+            case 2:
+                log_info("%lu Write %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
+                         iter,
+                         src, soffset[0], soffset[1], soffset[2],
+                         dst, doffset[0], doffset[1], doffset[2],
+                         sregion[0], sregion[1], sregion[2],
+                         sregion[0]*sregion[1]*sregion[2]);
+                if ((err = write_region(src, soffset, sregion, dst, doffset, dregion)))
+                    return err;
+                break;
+        }
+
+#if 0
+        // Uncomment this section to verify each operation.
+        // If commented out, verification won't occur until the end of the
+        // test, and it will not be possible to determine which operation failed.
+        log_info("Verify src %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", src, 0, 0, 0, width[src], height[src], depth[src]);
+        if (err = map_verify_region(src))
+            return err;
+
+        log_info("Verify dst %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", dst, 0, 0, 0, width[dst], height[dst], depth[dst]);
+        if (err = map_verify_region(dst))
+            return err;
+
+
+#endif
+
+    } // end main for loop.
+
+    for (unsigned i=0;i<TotalImages;++i) {
+        log_info("Verify %u offset (%u,%u,%u) region (%lux%lux%lu)\n", i, 0, 0, 0, width[i], height[i], depth[i]);
+        if ((err = map_verify_region(i)))
+            return err;
+    }
+
+    // Clean-up.
+    free_mtdata(mt);
+    for (unsigned i=0;i<TotalImages;++i) {
+        free( verify[i] );
+        clReleaseMemObject( buffer[i] );
+    }
+    free( tmp_buffer );
+
+    if (!err) {
+        log_info("RECT read, write test passed\n");
+    }
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_constant.c b/test_conformance/compatibility/test_conformance/basic/test_constant.c
new file mode 100644
index 00000000..236c6430
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_constant.c
@@ -0,0 +1,261 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *constant_kernel_code =
+"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    float ftmp = tmpF[tid]; \n"
+"    float Itmp = tmpI[tid]; \n"
+"    out[tid] = ftmp * Itmp; \n"
+"}\n";
+
+const char *loop_constant_kernel_code =
+"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
+"{\n"
+"    int tid = get_global_id(0);\n"
+"    float sum = 0;\n"
+"    for (int i = 0; i < num; i++) {\n"
+"        float  pos  = i_pos[i*3];\n"
+"        sum += pos;\n"
+"    }\n"
+"    out[tid] = sum;\n"
+"}\n";
+
+
+static int
+verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
+{
+    int         i;
+
+    for (i=0; i < n; i++)
+    {
+        float f = tmpF[i] * tmpI[i];
+        if( out[i] != f )
+        {
+            log_error("CONSTANT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("CONSTANT test passed\n");
+    return 0;
+}
+
+
+static int
+verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
+{
+    int i;
+    cl_int j;
+    for (i=0; i < n; i++)
+    {
+        float sum = 0;
+        for (j=0; j < l; ++j)
+            sum += tmp[j*3];
+
+        if( out[i] != sum )
+        {
+            log_error("loop CONSTANT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("loop CONSTANT test passed\n");
+    return 0;
+}
+
+int
+test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[3];
+    cl_int            *tmpI;
+    cl_float        *tmpF, *out;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3];
+    int                err;
+    unsigned int                i;
+    cl_ulong maxSize;
+    size_t num_floats, num_ints, constant_values;
+    MTdata          d;
+    RoundingMode     oldRoundMode;
+    int isRTZ = 0;
+
+  /* Verify our test buffer won't be bigger than allowed */
+    err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
+    test_error( err, "Unable to get max constant buffer size" );
+
+  log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
+  maxSize/=4;
+  num_ints = (size_t)maxSize/sizeof(cl_int);
+  num_floats = (size_t)maxSize/sizeof(cl_float);
+  if (num_ints >= num_floats) {
+    constant_values = num_floats;
+  } else {
+    constant_values = num_ints;
+  }
+
+  log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
+           constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
+
+    tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
+    tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
+    out  = (cl_float*)malloc(sizeof(cl_float) * constant_values);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * constant_values, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<constant_values; i++) {
+        tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
+        tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
+    if (err) {
+    log_error("Failed to create kernel and program: %d\n", err);
+    return -1;
+  }
+
+
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    global_threads[0] = constant_values;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
+    {
+        oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        isRTZ = 1;
+    }
+
+    err = verify(tmpF, tmpI, out, (int)constant_values);
+
+    if (isRTZ)
+        (void)set_round(oldRoundMode, kfloat);
+
+    // Loop constant buffer test
+    cl_program loop_program;
+    cl_kernel  loop_kernel;
+    cl_int limit = 2;
+
+    memset(out, 0, sizeof(cl_float) * constant_values);
+    err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
+                                      &loop_constant_kernel_code, "loop_constant_kernel" );
+    if (err) {
+        log_error("Failed to create loop kernel and program: %d\n", err);
+        return -1;
+    }
+
+    err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
+    if (err != CL_SUCCESS) {
+        log_error("clSetKernelArgs for loop kernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS) {
+        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
+    if (err != CL_SUCCESS) {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseKernel(loop_kernel);
+    clReleaseProgram(loop_program);
+    free(tmpI);
+    free(tmpF);
+    free(out);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_constant_source.cpp b/test_conformance/compatibility/test_conformance/basic/test_constant_source.cpp
new file mode 100644
index 00000000..75927773
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_constant_source.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *constant_source_kernel_code[] = {
+"__constant int outVal = 42;\n"
+"__constant int outIndex = 7;\n"
+"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
+"\n"
+"__kernel void constant_kernel( __global int *out )\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    if( tid == 0 )\n"
+"    {\n"
+"        out[ 0 ] = outVal;\n"
+"        out[ 1 ] = outValues[ outIndex ];\n"
+"    }\n"
+"    else\n"
+"    {\n"
+"        out[ tid + 1 ] = outValues[ tid ];\n"
+"    }\n"
+"}\n" };
+
+int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    clMemWrapper outStream;
+    cl_int         outValues[ 17 ];
+    cl_int         expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
+
+    cl_int        error;
+
+
+    // Create a kernel to test with
+    error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Create our output buffer
+    outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set the argument
+    error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
+    test_error( error, "Unable to set kernel argument" );
+
+    // Run test kernel
+    size_t threads[ 1 ] = { 16 };
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to enqueue kernel" );
+
+    // Read results
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    // Verify results
+    for( int i = 0; i < 17; i++ )
+    {
+        if( expectedValues[ i ] != outValues[ i ] )
+        {
+            if( i == 0 )
+                log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            else if( i == 1 )
+                log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            else
+                log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c b/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c
new file mode 100644
index 00000000..58bcc810
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c
@@ -0,0 +1,121 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *sample_single_kernel = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"};
+
+const char *sample_double_kernel = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"
+"__kernel void sample_test2(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n"};
+
+
+int
+test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program        program;
+    cl_kernel        kernel[2];
+    unsigned int    num_kernels;
+    size_t            lengths[2];
+    int                err;
+
+    lengths[0] = strlen(sample_single_kernel);
+    program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
+    if (!program)
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgramExecutable failed\n");
+        return -1;
+    }
+
+    err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
+    if ( (err != CL_SUCCESS) || (num_kernels != 1) )
+    {
+        log_error("clCreateKernelsInProgram test failed for a single kernel\n");
+        return -1;
+    }
+
+    clReleaseKernel(kernel[0]);
+    clReleaseProgram(program);
+
+  lengths[0] = strlen(sample_double_kernel);
+  program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
+    if (!program)
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgramExecutable failed\n");
+        return -1;
+    }
+
+    err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
+    if ( (err != CL_SUCCESS) || (num_kernels != 2) )
+    {
+        log_error("clCreateKernelsInProgram test failed for two kernels\n");
+        return -1;
+    }
+
+  log_info("clCreateKernelsInProgram test passed\n");
+
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program);
+
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp
new file mode 100644
index 00000000..3bf7be58
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp
@@ -0,0 +1,253 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+const cl_mem_flags flag_set[] = {
+  CL_MEM_ALLOC_HOST_PTR,
+  CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
+  CL_MEM_USE_HOST_PTR,
+  CL_MEM_COPY_HOST_PTR,
+  0
+};
+const char* flag_set_names[] = {
+  "CL_MEM_ALLOC_HOST_PTR",
+  "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
+  "CL_MEM_USE_HOST_PTR",
+  "CL_MEM_COPY_HOST_PTR",
+  "0"
+};
+
+int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    const size_t bufferSize = 256*256;
+    int src_flag_id;
+    MTdata d = init_genrand( gRandomSeed );
+    cl_char *initialData = (cl_char*)malloc(bufferSize);
+    cl_char *finalData = (cl_char*)malloc(bufferSize);
+
+    for (src_flag_id=0; src_flag_id < 5; src_flag_id++)
+    {
+        clMemWrapper memObject;
+        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+        generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
+
+        if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), initialData, &error);
+        else
+            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), NULL, &error);
+        test_error( error, "Unable to create testing buffer" );
+
+        if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+        {
+            error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
+            test_error( error, "clEnqueueWriteBuffer failed");
+        }
+
+        for( int i = 0; i < 128; i++ )
+        {
+
+          size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
+          size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
+
+          cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                                offset, length, 0, NULL, NULL, &error );
+          if( error != CL_SUCCESS )
+          {
+            print_error( error, "clEnqueueMapBuffer call failed" );
+            log_error( "\tOffset: %d  Length: %d\n", (int)offset, (int)length );
+            free( initialData );
+            free( finalData );
+            free_mtdata(d);
+            return -1;
+          }
+
+          // Write into the region
+          for( size_t j = 0; j < length; j++ )
+          {
+            cl_char spin = (cl_char)genrand_int32( d );
+
+            // Test read AND write in one swipe
+            cl_char value = mappedRegion[ j ];
+            value = spin - value;
+            mappedRegion[ j ] = value;
+
+            // Also update the initial data array
+            value = initialData[ offset + j ];
+            value = spin - value;
+            initialData[ offset + j ] = value;
+          }
+
+          // Unmap
+          error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
+          test_error( error, "Unable to unmap buffer" );
+        }
+
+        // Final validation: read actual values of buffer and compare against our reference
+        error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
+        test_error( error, "Unable to read results" );
+
+        for( size_t q = 0; q < bufferSize; q++ )
+        {
+            if( initialData[ q ] != finalData[ q ] )
+            {
+                log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
+                free( initialData );
+                free( finalData );
+                free_mtdata(d);
+                return -1;
+            }
+        }
+    } // cl_mem flags
+
+    free( initialData );
+    free( finalData );
+    free_mtdata(d);
+
+    return 0;
+}
+
+int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
+    const size_t imageSize = 256;
+    int src_flag_id;
+    cl_uint *initialData;
+    cl_uint *finalData;
+    MTdata  d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+
+    initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
+    finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
+
+    if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
+    {
+        log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
+        free(initialData);
+        free(finalData);
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+  for (src_flag_id=0; src_flag_id < 5; src_flag_id++) {
+    clMemWrapper memObject;
+    log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+
+    generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
+
+    if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
+      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
+                                  imageSize, imageSize, 0, initialData, &error );
+    else
+      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
+                                  imageSize, imageSize, 0, NULL, &error );
+    test_error( error, "Unable to create testing buffer" );
+
+    if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
+      size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
+      error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
+      test_error( error, "Unable to write to testing buffer" );
+    }
+
+    for( int i = 0; i < 128; i++ )
+    {
+
+      size_t offset[3], region[3];
+      size_t rowPitch;
+
+      offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
+      region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
+      offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
+      region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
+      offset[ 2 ] = 0;
+      region[ 2 ] = 1;
+      cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
+                                                           offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
+      if( error != CL_SUCCESS )
+      {
+        print_error( error, "clEnqueueMapImage call failed" );
+        log_error( "\tOffset: %d,%d  Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
+        free(initialData);
+        free(finalData);
+        free_mtdata(d);
+        return -1;
+      }
+
+      // Write into the region
+      cl_uint *mappedPtr = mappedRegion;
+      for( size_t y = 0; y < region[ 1 ]; y++ )
+      {
+        for( size_t x = 0; x < region[ 0 ] * 4; x++ )
+        {
+          cl_int spin = (cl_int)random_in_range( 16, 1024, d );
+
+          cl_int value;
+          // Test read AND write in one swipe
+          value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
+          value = spin - value;
+          mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
+
+          // Also update the initial data array
+          value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
+          value = spin - value;
+          initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
+        }
+      }
+
+      // Unmap
+      error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
+      test_error( error, "Unable to unmap buffer" );
+    }
+
+    // Final validation: read actual values of buffer and compare against our reference
+    size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
+    error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
+    {
+      if( initialData[ q ] != finalData[ q ] )
+      {
+        log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
+                                    (int)finalData[ q ], (int)initialData[ q ] );
+        free(initialData);
+        free(finalData);
+        free_mtdata(d);
+        return -1;
+      }
+    }
+  } // cl_mem_flags
+
+    free(initialData);
+    free(finalData);
+    free_mtdata(d);
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp b/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp
new file mode 100644
index 00000000..205e7bbd
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp
@@ -0,0 +1,384 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
+"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n"        \
+"{\n"                                                                            \
+"    int  tid = get_global_id(0);\n"                                        \
+"    " srctype "  src = sourceValues[tid];\n"                                        \
+"\n"                                                                            \
+"    destValues[tid] = (" dsttype #size ")src;\n"                        \
+"\n"                                                                            \
+"}\n"
+
+#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
+{        \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
+DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
+}
+
+#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
+
+/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
+
+#define DECLARE_S2V_IDENT_KERNELS_SET(srctype)    \
+{                                                    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,bool),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,char),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,uchar),            \
+            DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,short),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,ushort),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,int),                \
+DECLARE_S2V_IDENT_KERNELS(#srctype,uint),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,long),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,ulong),            \
+DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long),    \
+DECLARE_S2V_IDENT_KERNELS(#srctype,float),            \
+DECLARE_EMPTY                                        \
+}
+
+#define DECLARE_EMPTY_SET                \
+{                                                    \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY, \
+DECLARE_EMPTY    \
+}
+
+
+/* The overall array */
+const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
+    DECLARE_S2V_IDENT_KERNELS_SET(bool),
+    DECLARE_S2V_IDENT_KERNELS_SET(char),
+    DECLARE_S2V_IDENT_KERNELS_SET(uchar),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
+    DECLARE_S2V_IDENT_KERNELS_SET(short),
+    DECLARE_S2V_IDENT_KERNELS_SET(ushort),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
+    DECLARE_S2V_IDENT_KERNELS_SET(int),
+    DECLARE_S2V_IDENT_KERNELS_SET(uint),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
+    DECLARE_S2V_IDENT_KERNELS_SET(long),
+    DECLARE_S2V_IDENT_KERNELS_SET(ulong),
+    DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
+    DECLARE_S2V_IDENT_KERNELS_SET(float),
+    DECLARE_EMPTY_SET
+};
+
+int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
+                               ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error;
+    clMemWrapper streams[2];
+    void *outData;
+    unsigned char convertedData[ 8 ];    /* Max type size is 8 bytes */
+    size_t threadSize[3], groupSize[3];
+    unsigned int i, s;
+    unsigned char *inPtr, *outPtr;
+    size_t paramSize, destTypeSize;
+
+    const char* finalProgramSrc[2] = {
+        "", // optional pragma
+        programSrc
+    };
+
+    if (srcType == kDouble || destType == kDouble) {
+        finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    }
+
+
+    if( programSrc == NULL )
+        return 0;
+
+    paramSize = get_explicit_type_size( srcType );
+    destTypeSize = get_explicit_type_size( destType );
+
+    size_t destStride = destTypeSize * vecSize;
+
+    outData = malloc( destStride * count );
+
+    if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
+    {
+        log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
+        return -1;
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
+    test_error( error, "clCreateBuffer failed");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  destStride * count, NULL, &error);
+    test_error( error, "clCreateBuffer failed");
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threadSize[0] = count;
+
+    error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now verify the results. Each value should have been duplicated four times, and we should be able to just
+     do a memcpy instead of relying on the actual type of data */
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output values!" );
+
+    inPtr = (unsigned char *)inputData;
+    outPtr = (unsigned char *)outData;
+
+    for( i = 0; i < count; i++ )
+    {
+        /* Convert the input data element to our output data type to compare against */
+        convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
+
+        /* Now compare every element of the vector */
+        for( s = 0; s < vecSize; s++ )
+        {
+            if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
+            {
+                unsigned int *p = (unsigned int *)outPtr;
+                log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
+                log_error( "       Input:   0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
+                log_error( "       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                return -1;
+            }
+        }
+        inPtr += paramSize;
+        outPtr += destStride;
+    }
+
+    free( outData );
+
+    return 0;
+}
+
+int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
+                                   unsigned int count, void *inputData )
+{
+    unsigned int sizes[] = { 2, 4, 8, 16, 0 };
+    int i, dstType, failed = 0;
+
+
+    for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
+    {
+        if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            continue;
+
+        if (( dstType == kLong || dstType == kULong ) && !gHasLong )
+            continue;
+
+        for( i = 0; sizes[i] != 0; i++ )
+        {
+            if( dstType != srcType )
+                continue;
+            if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
+               strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
+                continue;
+
+            if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
+                                           srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
+            {
+                log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
+                          get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
+                failed = -1;
+                break;
+            }
+        }
+    }
+
+    return failed;
+}
+
+int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
+    return 0;
+#if 0
+    bool    data[128];
+
+    generate_random_data( kBool, 128, data );
+
+    return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
+#endif
+}
+
+int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kChar, 128, seed, data );
+
+    return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
+}
+
+int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned char    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUChar, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    short            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kShort, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned short    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUShort, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kInt, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kUInt, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_long    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kLong, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kLong,  128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_ulong    data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kULong, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kULong,  128, data ) != 0 )
+        return -1;
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    float            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    generate_random_data( kFloat, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+
+int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    double            data[128];
+    RandomSeed seed(gRandomSeed);
+
+    if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
+        log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
+        return 0;
+    }
+
+    generate_random_data( kDouble, 128, seed, data );
+
+    if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
+        return -1;
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_float2int.c b/test_conformance/compatibility/test_conformance/basic/test_float2int.c
new file mode 100644
index 00000000..0191ceec
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_float2int.c
@@ -0,0 +1,160 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *float2int_kernel_code =
+"__kernel void test_float2int(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n";
+
+
+int
+verify_float2int(cl_float *inptr, cl_int *outptr, int n)
+{
+  int     i;
+
+  for (i=0; i<n; i++)
+  {
+    if (outptr[i] != (int)inptr[i])
+    {
+      log_error("FLOAT2INT test failed\n");
+      return -1;
+    }
+  }
+
+  log_info("FLOAT2INT test passed\n");
+  return 0;
+}
+
+
+int
+test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_float        *input_ptr;
+    cl_int          *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    void            *values[2];
+    size_t            lengths[1];
+    size_t    threads[1];
+    int                err;
+    int                i;
+    MTdata          d;
+
+    input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    lengths[0] = strlen(float2int_kernel_code);
+    program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
+    if (!program)
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgramExecutable failed\n");
+        return -1;
+    }
+
+    kernel = clCreateKernel(program, "test_float2int", NULL);
+    if (!kernel)
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_float2int(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float.c b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float.c
new file mode 100644
index 00000000..b5b3dae9
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float.c
@@ -0,0 +1,270 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+#include "procs.h"
+
+const char *fpadd_kernel_code =
+"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub_kernel_code =
+"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul_kernel_code =
+"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+static const float    MAX_ERR = 1e-5f;
+
+int
+verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_MUL float test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_MUL float test passed\n");
+    return 0;
+}
+
+
+int
+test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+    size_t length = sizeof(cl_float) * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error( err, "clEnqueueWriteBuffer failed.");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    threads[0] = (unsigned int)num_elements;
+    for (i=0; i<3; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+        test_error( err, "clEnqueueNDRangeKernel failed.");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+        test_error( err, "clEnqueueReadBuffer failed.");
+
+        if( isRTZ )
+            set_round( kRoundTowardZero, kfloat );
+
+        switch (i)
+        {
+            case 0:
+                err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+            case 1:
+                err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+            case 2:
+                err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+                break;
+        }
+
+        if( isRTZ )
+            set_round( oldMode, kfloat );
+
+        if (err)
+            break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    free_mtdata( d );
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float2.c b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float2.c
new file mode 100644
index 00000000..1e938b9b
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float2.c
@@ -0,0 +1,268 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "../../test_common/harness/rounding_mode.h"
+
+
+#include "procs.h"
+
+const char *fpadd2_kernel_code =
+"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub2_kernel_code =
+"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul2_kernel_code =
+"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+int
+verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float2 test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float2 test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_MUL float2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_MUL float2 test passed\n");
+    return 0;
+}
+
+
+int
+test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    cl_float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_float) * 2 * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+    free_mtdata(d);
+    d = NULL;
+
+    threads[0] = (unsigned int)num_elements;
+    for (i=0; i<3; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+      test_error( err, "clEnqueueNDRangeKernel failed.");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+      test_error( err, "clEnqueueReadBuffer failed.");
+
+        if( isRTZ )
+            set_round( kRoundTowardZero, kfloat );
+
+        switch (i)
+        {
+            case 0:
+                err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+            case 1:
+                err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+            case 2:
+                err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
+                break;
+        }
+
+        if( isRTZ )
+            set_round( oldMode, kfloat );
+
+        if (err)
+            break;
+    }
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float4.c b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float4.c
new file mode 100644
index 00000000..284d5f1b
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float4.c
@@ -0,0 +1,269 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+const char *fpadd4_kernel_code =
+"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *fpsub4_kernel_code =
+"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *fpmul4_kernel_code =
+"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+
+int
+verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_ADD float4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_ADD float4 test passed\n");
+    return 0;
+}
+
+int
+verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("FP_SUB float4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("FP_SUB float4 test passed\n");
+    return 0;
+}
+
+int
+verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
+{
+  float       r;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    r = inptrA[i] * inptrB[i];
+    if (r != outptr[i])
+    {
+      log_error("FP_MUL float4 test failed\n");
+      return -1;
+    }
+  }
+
+  log_info("FP_MUL float4 test passed\n");
+  return 0;
+}
+
+
+int
+test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[3];
+    cl_kernel kernel[3];
+
+    cl_float *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_float) * 4 * num_elements;
+    int isRTZ = 0;
+    RoundingMode oldMode = kDefaultRoundingMode;
+
+    // check for floating point capabilities
+    cl_device_fp_config single_config = 0;
+    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
+    if (err) {
+      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
+      test_finish();
+      return -1;
+    }
+    //If we only support rtz mode
+    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    {
+        //Check to make sure we are an embedded device
+        char profile[32];
+        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+        if( err )
+        {
+            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
+              test_finish();
+              return -1;
+        }
+        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
+        {
+            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
+            test_finish();
+            return -1;
+        }
+
+        isRTZ = 1;
+        oldMode = get_round();
+    }
+
+    input_ptr[0] = (cl_float*)malloc(length);
+    input_ptr[1] = (cl_float*)malloc(length);
+    input_ptr[2] = (cl_float*)malloc(length);
+    output_ptr   = (cl_float*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
+    test_error( err, "clCreateBuffer failed.");
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+
+    free_mtdata(d);
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
+    test_error( err, "create_single_kernel_helper failed");
+
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
+    test_error( err, "create_single_kernel_helper failed");
+
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    test_error( err, "clSetKernelArgs failed.");
+
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<3; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    test_error( err, "clEnqueueNDRangeKernel failed.");
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    test_error( err, "clEnqueueReadBuffer failed.");
+
+    if( isRTZ )
+        set_round( kRoundTowardZero, kfloat );
+
+    switch (i)
+    {
+      case 0:
+        err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+      case 1:
+        err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+      case 2:
+        err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
+        break;
+    }
+
+    if( isRTZ )
+        set_round( oldMode, kfloat );
+
+    if (err)
+      break;
+    }
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<3; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_global_work_offsets.cpp b/test_conformance/compatibility/test_conformance/basic/test_global_work_offsets.cpp
new file mode 100644
index 00000000..edaca386
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_global_work_offsets.cpp
@@ -0,0 +1,284 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include <ctype.h>
+
+
+const char *work_offset_test[] = {
+    "__kernel void test( __global int * outputID_A, \n"
+    "                        __global int * outputID_B, __global int * outputID_C )\n"
+    "{\n"
+    "    size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
+    "    size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
+    "    size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
+    "    size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
+    "\n"
+    "    outputID_A[ id ] = get_global_id( 0 );\n"
+    "    outputID_B[ id ] = get_global_id( 1 );\n"
+    "    outputID_C[ id ] = get_global_id( 2 );\n"
+    "}\n"
+    };
+
+#define MAX_TEST_ITEMS 16 * 16 * 16
+#define NUM_TESTS 16
+#define MAX_OFFSET 256
+
+#define CHECK_RANGE( v, m, c ) \
+    if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
+    {    \
+        log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
+        return -1;    \
+    }
+
+int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
+{
+    size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
+    size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
+
+    static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
+    memset( counts, 0, sizeof( counts ) );
+
+    for( size_t i = 0; i < limit; i++ )
+    {
+        // Check ranges first
+        CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
+        CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
+        CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
+
+        // Now set the value in the map
+        counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
+    }
+
+    // Now check the map
+    int missed = 0, multiple = 0, errored = 0, corrected = 0;
+    for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
+    {
+        for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
+        {
+            for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
+            {
+                const char * limitMsg = " (further errors of this type suppressed)";
+                if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
+                {
+                    if( counts[ x ][ y ][ z ] < 1 )
+                    {
+                        if( missed < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
+                        missed++;
+                    }
+                    else if( counts[ x ][ y ][ z ] > 1 )
+                    {
+                        if( multiple < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
+                        multiple++;
+                    }
+                }
+                else
+                {
+                    if( counts[ x ][ y ][ z ] > 0 )
+                    {
+                        if( errored < 3 )
+                            log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
+                        errored++;
+                    }
+                }
+                    }
+                }
+                    }
+
+    if( missed || multiple || errored )
+    {
+        size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
+                        ( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
+                        ( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
+            int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
+
+        if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
+            log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
+        else
+            log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
+                            missed, multiple, errored );
+    }
+    return ( missed | multiple | errored | corrected );
+}
+
+int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 7 ];
+
+    int error;
+    size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
+    cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
+
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    //// Create some output streams
+
+    // Use just one output array to init them all (no need to init every single stack storage here)
+    memset( outputA, 0xff, sizeof( outputA ) );
+    for( int i = 0; i < 3; i++ )
+    {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
+        test_error( error, "Unable to create output array" );
+    }
+
+    // Run a few different times
+    MTdata seed = init_genrand( gRandomSeed );
+    for( int test = 0; test < NUM_TESTS; test++ )
+    {
+        // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
+        threads[ 0 ] = random_in_range( 1, 32, seed );
+        threads[ 1 ] = random_in_range( 1, 16, seed );
+        threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
+
+        // Make sure we get the local thread count right
+        error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
+        test_error( error, "Unable to determine local work group sizes" );
+
+        // Randomize some offsets
+        for( int j = 0; j < 3; j++ )
+            offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
+
+        log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
+                 threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                 offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+
+        // Now set up and run
+        for( int i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
+            test_error( error, "Unable to set indexed kernel arguments" );
+        }
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        // Read our results back now
+        cl_int * resultBuffers[] = { outputA, outputB, outputC };
+        for( int i = 0; i < 3; i++ )
+        {
+            error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
+            test_error( error, "Unable to get result data" );
+        }
+
+        // Now we need to check the results. The outputs should have one entry for each possible ID,
+        // but they won't be in order, so we need to construct a count map to determine what we got
+        if( check_results( threads, offsets, outputA, outputB, outputC ) )
+        {
+            log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
+                      threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                      offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+            return -1;
+        }
+    }
+
+    free_mtdata(seed);
+
+    // All done!
+    return 0;
+}
+
+const char *get_offset_test[] = {
+    "__kernel void test( __global int * outOffsets )\n"
+    "{\n"
+    "    // We use local ID here so we don't have to worry about offsets\n"
+    "   // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
+    "    outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
+    "    outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
+    "    outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
+    "}\n"
+};
+
+int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 1 ];
+
+    int error;
+    size_t    threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
+    cl_int outOffsets[ 3 ];
+
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    // Create some output streams, and storage for a single control ID
+    memset( outOffsets, 0xff, sizeof( outOffsets ) );
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
+    test_error( error, "Unable to create control ID buffer" );
+
+    // Run a few different times
+    MTdata seed = init_genrand( gRandomSeed );
+    for( int test = 0; test < NUM_TESTS; test++ )
+    {
+        // Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
+        threads[ 0 ] = random_in_range( 1, 32, seed );
+        threads[ 1 ] = random_in_range( 1, 16, seed );
+        threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
+
+        // Make sure we get the local thread count right
+        error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
+        test_error( error, "Unable to determine local work group sizes" );
+
+        // Randomize some offsets
+        for( int j = 0; j < 3; j++ )
+            offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
+
+        log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
+                 threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
+                 offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
+
+        // Now set up and run
+        error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        // Read our results back now
+        error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        // And check!
+        int errors = 0;
+        for( int j = 0; j < 3; j++ )
+        {
+            if( outOffsets[ j ] != (cl_int)offsets[ j ] )
+            {
+                log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
+                errors++;
+            }
+        }
+        if( errors > 0 )
+            return errors;
+    }
+    free_mtdata(seed);
+
+    // All done!
+    return 0;
+}
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_hiloeo.c b/test_conformance/compatibility/test_conformance/basic/test_hiloeo.c
new file mode 100644
index 00000000..72e97cee
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_hiloeo.c
@@ -0,0 +1,421 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
+int lo_offset( int index, int vectorSize) { return index; }
+int even_offset( int index, int vectorSize ) { return index * 2; }
+int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
+
+typedef int (*OffsetFunc)( int index, int vectorSize );
+static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
+typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
+static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
+
+static const unsigned int vector_sizes[] =     { 1, 2, 3, 4, 8, 16};
+static const unsigned int vector_aligns[] =    { 1, 2, 4, 4, 8, 16};
+static const unsigned int out_vector_idx[] =   { 0, 0, 1, 1, 3, 4};
+// if input is size vector_sizes[i], output is size
+// vector_sizes[out_vector_idx[i]]
+// input type name is strcat(gentype, vector_size_names[i]);
+// and output type name is
+// strcat(gentype, vector_size_names[out_vector_idx[i]]);
+static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
+    -1,-1,-1,-1,-1,-1,-1,5};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
+
+static const size_t  kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
+
+int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_int *input_ptr, *output_ptr, *p;
+    int err;
+    cl_uint i;
+    int hasDouble = is_extension_available( device, "cl_khr_fp64" );
+    cl_uint vectorSize, operatorToUse;
+    cl_uint type;
+    MTdata d;
+
+    int expressionMode;
+    int numExpressionModes = 2;
+
+    size_t length = sizeof(cl_int) * 4 * n_elems;
+
+    input_ptr   = (cl_int*)malloc(length);
+    output_ptr  = (cl_int*)malloc(length);
+
+    p = input_ptr;
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<4 * (cl_uint) n_elems; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[2];
+
+        // skip double if unavailable
+        if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
+            continue;
+
+        if( !gHasLong &&
+            ( 0 == strcmp( test_str_names[type], "long" )) &&
+            ( 0 == strcmp( test_str_names[type], "ulong" )))
+            continue;
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+        for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
+        {
+            log_info( " %s", operatorToUse_names[ operatorToUse ] );
+            fflush( stdout );
+            for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
+                for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
+
+                    cl_program program = NULL;
+                    cl_kernel kernel = NULL;
+                    cl_uint outVectorSize = out_vector_idx[vectorSize];
+                    char expression[1024];
+
+                    const char *source[] = {
+                        "", // optional pragma string
+                        "__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
+                        "(__global ", test_str_names[type], vector_size_names[vectorSize],
+                        " *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
+                        " *dst)\n"
+                        "{\n"
+                        "    int  tid = get_global_id(0);\n"
+                        "\n"
+                        "    ", test_str_names[type],
+                        vector_size_names[out_vector_idx[vectorSize]],
+                        " tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
+                        "    dst[tid] = tmp;\n"
+                        "}\n"
+                    };
+
+                    if(expressionMode == 0) {
+                        sprintf(expression, "srcA[tid]");
+                    } else if(expressionMode == 1) {
+                        switch(vector_sizes[vectorSize]) {
+                            case 16:
+                                sprintf(expression,
+                                        "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 8:
+                                sprintf(expression,
+                                        "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 4:
+                                sprintf(expression,
+                                        "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 3:
+                                sprintf(expression,
+                                        "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            case 2:
+                                sprintf(expression,
+                                        "((%s2)(srcA[tid].s0, srcA[tid].s1))",
+                                        test_str_names[type]
+                                        );
+                                break;
+                            default :
+                                sprintf(expression, "srcA[tid]");
+                                log_info("Default\n");
+                        }
+                    } else {
+                        sprintf(expression, "srcA[tid]");
+                    }
+
+                    if (0 == strcmp( test_str_names[type], "double" ))
+                        source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+                    char kernelName[128];
+                    snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
+                    err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+                    if (err)
+                        return -1;
+
+                    err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+                    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clSetKernelArgs failed\n");
+                        return -1;
+                    }
+
+                    //Wipe the output buffer clean
+                    uint32_t pattern = 0xdeadbeef;
+                    memset_pattern4( output_ptr, &pattern, length );
+                    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueWriteBuffer failed\n");
+                        return -1;
+                    }
+
+                    size_t size = elementCount / (vector_aligns[vectorSize]);
+                    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueNDRangeKernel failed\n");
+                        return -1;
+                    }
+
+                    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        log_error("clEnqueueReadBuffer failed\n");
+                        return -1;
+                    }
+
+                    char *inP = (char *)input_ptr;
+                    char *outP = (char *)output_ptr;
+                    outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
+                                            ( vector_sizes[ out_vector_idx[vectorSize] ] ) );
+                    // was                outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
+                    for( size_t e = 0; e < size; e++ )
+                    {
+                        if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
+
+                            log_info("e is %d\n", (int)e);
+                            fflush(stdout);
+                            // break;
+                            return -1;
+                        }
+                        inP += kSizes[type] * ( vector_aligns[vectorSize] );
+                        outP += kSizes[type] * ( vector_aligns[outVectorSize] );
+                    }
+
+                    clReleaseKernel( kernel );
+                    clReleaseProgram( program );
+                    log_info( "." );
+                    fflush( stdout );
+                }
+            }
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        log_info( "done\n" );
+    }
+
+    log_info("HiLoEO test passed\n");
+
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
+{
+    cl_ulong  array[8];
+    void *p = array;
+    size_t halfVectorSize  = vector_sizes[out_vector_idx[vectorSize]];
+    size_t cmpVectorSize =  vector_sizes[out_vector_idx[vectorSize]];
+    // was 1 << (vectorSize-1);
+    OffsetFunc f = offsetFuncs[ operatorToUse ];
+    size_t elementSize =  kSizes[type];
+
+    if(vector_size_names[vectorSize][0] == '3') {
+        if(operatorToUse_names[operatorToUse][0] == 'h' ||
+           operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
+        {
+            cmpVectorSize = 1; // special case for vec3 ignored values
+        }
+    }
+
+    switch( elementSize )
+    {
+        case 1:
+        {
+            char *i = (char*)in;
+            char *o = (char*)out;
+            size_t j;
+            cl_uint k;
+            OffsetFunc f = offsetFuncs[ operatorToUse ];
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                char *o2 = (char*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", %d", i[j] );
+                    log_info( " } --> { %d", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", %d", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 2:
+        {
+            short *i = (short*)in;
+            short *o = (short*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                short *o2 = (short*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", %d", i[j] );
+                    log_info( " } --> { %d", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", %d", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 4:
+        {
+            int *i = (int*)in;
+            int *o = (int*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                int *o2 = (int *)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                for( j = 0; j < cmpVectorSize; j++ )
+        {
+            /* Allow float nans to be binary different */
+            if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
+            {
+                log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+            for( j = 1; j < halfVectorSize * 2; j++ )
+                log_info( ", 0x%8.8x", i[j] );
+            log_info( " } --> { 0x%8.8x", o[0] );
+            for( j = 1; j < halfVectorSize; j++ )
+                log_info( ", 0x%8.8x", o[j] );
+            log_info( " }\n" );
+            return -1;
+            }
+        }
+        i += 2 * halfVectorSize;
+        o += halfVectorSize;
+            }
+        }
+            break;
+
+        case 8:
+        {
+            cl_ulong *i = (cl_ulong*)in;
+            cl_ulong *o = (cl_ulong*)out;
+            size_t j;
+            cl_uint k;
+
+            for( k = 0; k  < elementCount; k++ )
+            {
+                cl_ulong *o2 = (cl_ulong*)p;
+                for( j = 0; j < halfVectorSize; j++ )
+                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
+
+                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
+                {
+                    log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
+                    for( j = 1; j < halfVectorSize * 2; j++ )
+                        log_info( ", 0x%16.16llx", i[j] );
+                    log_info( " } --> { 0x%16.16llx", o[0] );
+                    for( j = 1; j < halfVectorSize; j++ )
+                        log_info( ", 0x%16.16llx", o[j] );
+                    log_info( " }\n" );
+                    return -1;
+                }
+                i += 2 * halfVectorSize;
+                o += halfVectorSize;
+            }
+        }
+            break;
+
+        default:
+            log_info( "Internal error. Unknown data type\n" );
+            return -2;
+    }
+
+    return 0;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_hostptr.c b/test_conformance/compatibility/test_conformance/basic/test_hostptr.c
new file mode 100644
index 00000000..fed42ff0
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_hostptr.c
@@ -0,0 +1,276 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *hostptr_kernel_code =
+"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+static const float    MAX_ERR = 1e-5f;
+
+static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
+{
+    cl_float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static void make_random_data(unsigned count, float *ptr, MTdata d)
+{
+    cl_uint     i;
+    for (i=0; i<count; i++)
+        ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
+}
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static unsigned char *
+randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
+{
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+int
+test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_float            *input_ptr[2], *output_ptr;
+    cl_program            program;
+    cl_kernel           kernel;
+    size_t              threads[3]={0,0,0};
+    cl_image_format     img_format;
+    cl_uchar            *rgba8_inptr, *rgba8_outptr;
+    void                *lock_buffer;
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_int              err;
+    MTdata              d;
+    RoundingMode        oldRoundMode;
+    int                    isRTZ = 0;
+
+    // Block to mark deletion of streams before deletion of host_ptr
+    {
+        clMemWrapper        streams[7];
+
+        PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+        // Alloc buffers
+        input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+
+        d = init_genrand( gRandomSeed );
+        rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
+        rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
+
+        // Random data
+        make_random_data(num_elements, input_ptr[0], d);
+        make_random_data(num_elements, input_ptr[1], d);
+
+        // Create host-side input
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
+        test_error(err, "clCreateBuffer 0 failed");
+
+        // Create a copied input
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
+        test_error(err, "clCreateBuffer 1 failed");
+
+        // Create a host-side output
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
+        test_error(err, "clCreateBuffer 2 failed");
+
+        // Create a host-side input
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
+        test_error(err, "create_image_2d 3 failed");
+
+        // Create a copied input
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
+        test_error(err, "create_image_2d 4 failed");
+
+        // Create a host-side output
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
+        test_error(err, "create_image_2d 5 failed");
+
+        // Create a copied output
+        img_format.image_channel_data_type = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
+        test_error(err, "create_image_2d 6 failed");
+
+        err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
+        test_error(err, "create_single_kernel_helper failed");
+
+        // Execute kernel
+        err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+        err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+        test_error(err, "clSetKernelArg failed");
+
+        threads[0] = (size_t)num_elements;
+        err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error(err, "clEnqueueNDRangeKernel failed");
+
+        cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
+        test_error( err, "clEnqueueMapBuffer failed" );
+
+        //If we only support rtz mode
+        if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
+        {
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+            isRTZ = 1;
+        }
+
+        if (isRTZ)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+
+        // Verify that we got the expected results back on the host side
+        err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
+        if (err)
+        {
+            log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
+                      "and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
+        } else {
+            log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
+                     "and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
+        }
+
+        if (isRTZ)
+            set_round(oldRoundMode, kfloat);
+
+        err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
+        test_error( err, "clEnqueueUnmapMemObject failed" );
+
+        size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
+        randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
+        free_mtdata(d); d = NULL;
+
+        // Copy from host-side to host-side
+        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
+        err = clEnqueueCopyImage(queue, streams[3], streams[5],
+                                 origin, origin, region,  0, NULL, NULL);
+        test_error(err, "clEnqueueCopyImage failed");
+        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
+
+        // test the lock buffer interface
+        log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
+        size_t row_pitch;
+        lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
+                                        0, origin, region,
+                                        &row_pitch, NULL,
+                                        0, NULL, NULL, &err);
+        test_error(err, "clEnqueueMapImage failed");
+
+        err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
+        if (err != CL_SUCCESS)
+        {
+            log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
+            return -1;
+        }
+        log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
+
+        err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
+        test_error(err, "clEnqueueUnmapMemObject failed");
+
+        // Copy host-side to device-side and read back
+        log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
+        err = clEnqueueCopyImage(queue, streams[3], streams[5],
+                                 origin, origin, region,
+                                 0, NULL, NULL);
+        test_error(err, "clEnqueueCopyImage failed");
+
+        err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
+        test_error(err, "clEnqueueReadImage failed");
+
+        err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
+        if (err != CL_SUCCESS)
+        {
+            log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
+            return -1;
+        }
+        log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
+    }
+    // cleanup
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    free(rgba8_inptr);
+    free(rgba8_outptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_if.c b/test_conformance/compatibility/test_conformance/basic/test_if.c
new file mode 100644
index 00000000..f6c5db4c
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_if.c
@@ -0,0 +1,165 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *conditional_kernel_code =
+"__kernel void test_if(__global int *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    if (src[tid] == 0)\n"
+"        dst[tid] = 0x12345678;\n"
+"    else if (src[tid] == 1)\n"
+"        dst[tid] = 0x23456781;\n"
+"    else if (src[tid] == 2)\n"
+"        dst[tid] = 0x34567812;\n"
+"    else if (src[tid] == 3)\n"
+"        dst[tid] = 0x45678123;\n"
+"    else if (src[tid] == 4)\n"
+"        dst[tid] = 0x56781234;\n"
+"    else if (src[tid] == 5)\n"
+"        dst[tid] = 0x67812345;\n"
+"    else if (src[tid] == 6)\n"
+"        dst[tid] = 0x78123456;\n"
+"    else if (src[tid] == 7)\n"
+"        dst[tid] = 0x81234567;\n"
+"    else\n"
+"        dst[tid] = 0x7FFFFFFF;\n"
+"\n"
+"}\n";
+
+const int results[] = {
+    0x12345678,
+    0x23456781,
+    0x34567812,
+    0x45678123,
+    0x56781234,
+    0x67812345,
+    0x78123456,
+    0x81234567,
+};
+
+int
+verify_if(int *inptr, int *outptr, int n)
+{
+    int     r, i;
+
+    for (i=0; i<n; i++)
+    {
+        if (inptr[i] <= 7)
+            r = results[inptr[i]];
+        else
+            r = 0x7FFFFFFF;
+
+        if (r != outptr[i])
+        {
+            log_error("IF test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("IF test passed\n");
+    return 0;
+}
+
+int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_int *input_ptr, *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * num_elements;
+    input_ptr  = (cl_int*)malloc(length);
+    output_ptr = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)get_random_float(0, 32, d);
+
+    free_mtdata(d); d = NULL;
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
+  if (err)
+    return -1;
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)num_elements;
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_if(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_image_multipass.c b/test_conformance/compatibility/test_conformance/basic/test_image_multipass.c
new file mode 100644
index 00000000..74a539c8
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_image_multipass.c
@@ -0,0 +1,643 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *image_to_image_kernel_integer_coord_code =
+"\n"
+"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+static const char *image_to_image_kernel_float_coord_code =
+"\n"
+"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static const char *image_sum_kernel_integer_coord_code =
+"\n"
+"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color0;\n"
+"    float4 color1;\n"
+"\n"
+"    color0 = read_imagef(srcimg0, sampler, (int2)(tid_x, tid_y));\n"
+"    color1 = read_imagef(srcimg1, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color0 + color1);\n"
+"\n"
+"}\n";
+
+
+static const char *image_sum_kernel_float_coord_code =
+"\n"
+"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color0;\n"
+"    float4 color1;\n"
+"\n"
+"    color0 = read_imagef(srcimg0, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    color1 = read_imagef(srcimg1, sampler, (float2)((float)tid_x, (float)tid_y));\n"
+"    write_imagef(dstimg,(int2)(tid_x, tid_y), color0 + color1);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_initial_byte_image(int w, int h, int num_elements, unsigned char value)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+        ptr[i] = value;
+
+    return ptr;
+}
+
+static unsigned char *
+generate_expected_byte_image(unsigned char **input_data, int num_inputs, int w, int h, int num_elements)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+    {
+        int j;
+        ptr[i] = 0;
+        for (j = 0; j < num_inputs; j++)
+        {
+            unsigned char *input = *(input_data + j);
+            ptr[i] += input[i];
+        }
+    }
+
+    return ptr;
+}
+
+
+static unsigned char *
+generate_byte_image(int w, int h, int num_elements, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * num_elements);
+    int             i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+        ptr[i] = (unsigned char)genrand_int32(d) & 31;
+
+    return ptr;
+}
+
+static int
+verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int num_elements)
+{
+    int     i;
+
+    for (i = 0; i < w*h*num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int
+test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_image_format     img_format;
+
+    int                 num_input_streams = 8;
+    cl_mem              *input_streams;
+    cl_mem                accum_streams[2];
+    unsigned char       *expected_output;
+    unsigned char       *output_ptr;
+    cl_kernel           kernel[2];
+    int                 err;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+
+    expected_output = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+    // Create the accum images with initial data.
+    {
+        unsigned char          *initial_data;
+        cl_mem_flags        flags;
+
+        initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+        err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            free(expected_output);
+            free(output_ptr);
+            return -1;
+        }
+
+        free(initial_data);
+    }
+
+    // Set up the input data.
+    {
+        cl_mem_flags        flags;
+        unsigned char       **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
+        MTdata              d;
+
+        input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        int i;
+        d = init_genrand( gRandomSeed );
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            input_data[i] = generate_byte_image(img_width, img_height, 4, d);
+            input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!input_streams[i])
+            {
+                log_error("create_image_2d failed\n");
+                free_mtdata(d);
+                free(expected_output);
+                free(output_ptr);
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
+                                      origin, region, 0, 0,
+                                      input_data[i], 0, NULL, NULL);
+            if (err)
+            {
+                log_error("clWriteImage failed: %d\n", err);
+                free_mtdata(d);
+                free(expected_output);
+                free(output_ptr);
+                free(input_streams);
+                return -1;
+            }
+
+
+        }
+        free_mtdata(d); d = NULL;
+        expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            free(input_data[i]);
+        }
+        free( input_data );
+    }
+
+    // Set up the kernels.
+    {
+        cl_program          program[4];
+
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_integer_coord_code, "image_to_image_copy");
+        if (err)
+        {
+            log_error("Failed to create kernel 0: %d\n", err);
+            return -1;
+        }
+        err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_integer_coord_code, "image_sum");
+        if (err)
+        {
+            log_error("Failed to create kernel 1: %d\n", err);
+            return -1;
+        }
+        clReleaseProgram(program[0]);
+        clReleaseProgram(program[1]);
+    }
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    {
+        size_t        threads[3] = {0, 0, 0};
+        threads[0] = (size_t)img_width;
+        threads[1] = (size_t)img_height;
+        int i;
+
+        {
+            cl_mem accum_input;
+            cl_mem accum_output;
+
+            err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
+            err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            for (i = 1; i < num_input_streams; i++)
+            {
+                accum_input = accum_streams[(i-1)%2];
+                accum_output = accum_streams[i%2];
+
+                err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
+                err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
+                err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
+                err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
+
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+                err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+            }
+
+            // Copy the last accum into the other one.
+            accum_input = accum_streams[(i-1)%2];
+            accum_output = accum_streams[i%2];
+            err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
+                                     origin, region, 0, 0,
+                                     (void *)output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                return -1;
+            }
+            err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
+            if (err)
+            {
+                log_error("IMAGE_MULTIPASS test failed.\n");
+            }
+            else
+            {
+                log_info("IMAGE_MULTIPASS test passed\n");
+            }
+        }
+
+        clReleaseSampler(sampler);
+    }
+
+
+    // cleanup
+    clReleaseMemObject(accum_streams[0]);
+    clReleaseMemObject(accum_streams[1]);
+    {
+        int i;
+        for (i = 0; i < num_input_streams; i++)
+        {
+            clReleaseMemObject(input_streams[i]);
+        }
+    }
+    free(input_streams);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    free(expected_output);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int                 img_width = 512;
+    int                 img_height = 512;
+    cl_image_format     img_format;
+
+    int                 num_input_streams = 8;
+    cl_mem              *input_streams;
+    cl_mem                accum_streams[2];
+    unsigned char       *expected_output;
+    unsigned char       *output_ptr;
+    cl_kernel           kernel[2];
+    int                 err;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+    // Create the accum images with initial data.
+    {
+        unsigned char          *initial_data;
+        cl_mem_flags        flags;
+
+        initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            return -1;
+        }
+
+        size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            return -1;
+        }
+
+        accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!accum_streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            return -1;
+        }
+        err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  initial_data, 0, NULL, NULL);
+        if (err)
+        {
+            log_error("clWriteImage failed: %d\n", err);
+            return -1;
+        }
+
+        free(initial_data);
+    }
+
+    // Set up the input data.
+    {
+        cl_mem_flags        flags;
+        unsigned char       **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
+        MTdata              d;
+
+        input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
+        flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+
+        int i;
+        d = init_genrand( gRandomSeed );
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            input_data[i] = generate_byte_image(img_width, img_height, 4, d);
+            input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!input_streams[i])
+            {
+                log_error("create_image_2d failed\n");
+                free(input_data);
+                free(input_streams);
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
+                                      origin, region, 0, 0,
+                                      input_data[i], 0, NULL, NULL);
+            if (err)
+            {
+                log_error("clWriteImage failed: %d\n", err);
+                free(input_data);
+                free(input_streams);
+                return -1;
+            }
+        }
+        free_mtdata(d); d = NULL;
+        expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
+        for ( i = 0; i < num_input_streams; i++)
+        {
+            free(input_data[i]);
+        }
+        free(input_data);
+    }
+
+    // Set up the kernels.
+    {
+        cl_program          program[2];
+
+        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_float_coord_code, "image_to_image_copy");
+        if (err)
+        {
+            log_error("Failed to create kernel 2: %d\n", err);
+            return -1;
+        }
+        err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_float_coord_code, "image_sum");
+        if (err)
+        {
+            log_error("Failed to create kernel 3: %d\n", err);
+            return -1;
+        }
+
+        clReleaseProgram(program[0]);
+        clReleaseProgram(program[1]);
+    }
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    {
+        size_t        threads[3] = {0, 0, 0};
+        threads[0] = (size_t)img_width;
+        threads[1] = (size_t)img_height;
+        int i;
+
+        {
+            cl_mem accum_input;
+            cl_mem accum_output;
+
+            err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
+            err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            for (i = 1; i < num_input_streams; i++)
+            {
+                accum_input = accum_streams[(i-1)%2];
+                accum_output = accum_streams[i%2];
+
+                err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
+                err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
+                err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
+                err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
+
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+                err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+            }
+
+            // Copy the last accum into the other one.
+            accum_input = accum_streams[(i-1)%2];
+            accum_output = accum_streams[i%2];
+            err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
+            err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+            err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
+            err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
+                                     origin, region, 0, 0,
+                                     (void *)output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                return -1;
+            }
+            err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
+            if (err)
+            {
+                log_error("IMAGE_MULTIPASS test failed.\n");
+            }
+            else
+            {
+                log_info("IMAGE_MULTIPASS test passed\n");
+            }
+        }
+
+    }
+
+
+    // cleanup
+    clReleaseSampler(sampler);
+    clReleaseMemObject(accum_streams[0]);
+    clReleaseMemObject(accum_streams[1]);
+    {
+        int i;
+        for (i = 0; i < num_input_streams; i++)
+        {
+            clReleaseMemObject(input_streams[i]);
+        }
+    }
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    free(expected_output);
+    free(output_ptr);
+    free(input_streams);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_image_param.c b/test_conformance/compatibility/test_conformance/basic/test_image_param.c
new file mode 100644
index 00000000..567af7f2
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_image_param.c
@@ -0,0 +1,251 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include "../../test_common/harness/conversions.h"
+
+
+static const char *param_kernel[] = {
+"__kernel void test_fn(read_only image2d_t srcimg, sampler_t sampler, __global float4 *results )\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    results[ tid_y * get_image_width( srcimg ) + tid_x ] = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"\n"
+"}\n" };
+
+int validate_results( size_t width, size_t height, cl_image_format &format, char *inputData, cl_float *actualResults )
+{
+    for( size_t i = 0; i < width * height; i++ )
+    {
+        cl_float expected[ 4 ], tolerance;
+
+        switch( format.image_channel_data_type )
+        {
+            case CL_UNORM_INT8:
+            {
+                cl_uchar *p = (cl_uchar *)inputData;
+                expected[ 0 ] = p[ 0 ] / 255.f;
+                expected[ 1 ] = p[ 1 ] / 255.f;
+                expected[ 2 ] = p[ 2 ] / 255.f;
+                expected[ 3 ] = p[ 3 ] / 255.f;
+                tolerance = 1.f / 255.f;
+                break;
+            }
+            case CL_SNORM_INT8:
+            {
+                cl_char *p = (cl_char *)inputData;
+                expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
+                expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
+                expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
+                expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_UNSIGNED_INT8:
+            {
+                cl_uchar *p = (cl_uchar *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_SIGNED_INT8:
+            {
+                cl_short *p = (cl_short *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 1.f / 127.f;
+                break;
+            }
+            case CL_UNORM_INT16:
+            {
+                cl_ushort *p = (cl_ushort *)inputData;
+                expected[ 0 ] = p[ 0 ] / 65535.f;
+                expected[ 1 ] = p[ 1 ] / 65535.f;
+                expected[ 2 ] = p[ 2 ] / 65535.f;
+                expected[ 3 ] = p[ 3 ] / 65535.f;
+                tolerance = 1.f / 65535.f;
+                break;
+            }
+            case CL_UNSIGNED_INT32:
+            {
+                cl_uint *p = (cl_uint *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 0.0001f;
+                break;
+            }
+            case CL_FLOAT:
+            {
+                cl_float *p = (cl_float *)inputData;
+                expected[ 0 ] = p[ 0 ];
+                expected[ 1 ] = p[ 1 ];
+                expected[ 2 ] = p[ 2 ];
+                expected[ 3 ] = p[ 3 ];
+                tolerance = 0.0001f;
+                break;
+            }
+            default:
+                // Should never get here
+                break;
+        }
+
+        if( format.image_channel_order == CL_BGRA )
+        {
+            cl_float tmp = expected[ 0 ];
+            expected[ 0 ] = expected[ 2 ];
+            expected[ 2 ] = tmp;
+        }
+
+        // Within an error tolerance, make sure the results match
+        cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
+        cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
+        cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
+        cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
+
+        if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
+        {
+            log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
+                            GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
+            log_error( "    Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
+            log_error( "      Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
+
+            // Check real quick a special case error here
+            cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
+            cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
+            cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
+            cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
+            if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
+            {
+                log_error( "\t(Kernel did not respect change in channel order)\n" );
+            }
+            return -1;
+        }
+
+        // Increment and go
+        actualResults += 4;
+        inputData += get_format_type_size( &format ) * 4;
+    }
+
+    return 0;
+}
+
+int test_image_param(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t              sizes[] = { 64, 100, 128, 250, 512 };
+    cl_image_format      formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
+    ExplicitType      types[] =  { kUChar, kUShort, kFloat, kUChar };
+    int               error;
+    size_t            i, j, idx;
+    size_t            threads[ 2 ];
+    MTdata            d;
+
+    const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
+    const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
+    const size_t numAttempts = numSizes * numFormats;
+
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ numAttempts ][ 2 ];
+    BufferOwningPtr<char> inputs[ numAttempts ];
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            // For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
+            // that we can read values from. The output buffer will remain consistent to ensure that any changes we
+            // witness are due to the image changes
+            inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
+
+            streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
+            {
+                char err_str[256];
+                sprintf(err_str, "Unable to create input image for format %s order %s" ,
+                                  GetChannelOrderName( formats[j].image_channel_order ),
+                                  GetChannelTypeName( formats[j].image_channel_data_type ));
+                test_error( error, err_str);
+            }
+
+            streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
+            test_error( error, "Unable to create output buffer" );
+        }
+    }
+    free_mtdata(d); d = NULL;
+
+    // Create a single kernel to use for all the tests
+    error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Also create a sampler to use for all the runs
+    clSamplerWrapper sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &error );
+    test_error( error, "clCreateSampler failed" );
+
+    // Set up the arguments for each and queue
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
+            error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
+            error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
+            test_error( error, "Unable to set kernel arguments" );
+
+            threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "clEnqueueNDRangeKernel failed" );
+        }
+    }
+
+    // Now go through each combo and validate the results
+    for( i = 0, idx = 0; i < numSizes; i++ )
+    {
+        for( j = 0; j < numFormats; j++, idx++ )
+        {
+            BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
+
+            error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
+            test_error( error, "Unable to read results" );
+
+            error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
+            if( error )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/compatibility/test_conformance/basic/test_image_r8.c b/test_conformance/compatibility/test_conformance/basic/test_image_r8.c
new file mode 100644
index 00000000..2f9976f0
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_image_r8.c
@@ -0,0 +1,176 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *r_uint8_kernel_code =
+"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    uint4    color;\n"
+"\n"
+"    color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"    dst[indx] = (unsigned char)(color.x);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8bit_image(int w, int h, MTdata d)
+{
+    unsigned char    *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char));
+    int             i;
+
+    for (i=0; i<w*h; i++)
+      ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_8bit_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n");
+    return 0;
+}
+
+int
+test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    cl_uchar    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[3];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    img_format.image_channel_order = CL_R;
+    img_format.image_channel_data_type = CL_UNSIGNED_INT8;
+
+    // early out if this image type is not supported
+    if( ! is_image_format_supported( context, (cl_mem_flags)(CL_MEM_READ_ONLY), CL_MEM_OBJECT_IMAGE2D, &img_format ) ) {
+        log_info("WARNING: Image type not supported; skipping test.\n");
+        return 0;
+    }
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_8bit_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1};
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
+                            origin, region, 0, 0,
+                            input_ptr,
+                            0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteImage failed: %d\n", err);
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" );
+    if (err) {
+    log_error("Failed to create kernel and program: %d\n", err);
+    return -1;
+  }
+
+  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+  test_error(err, "clCreateSampler failed");
+
+  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed: %d\n", err);
+        return -1;
+    }
+
+    threads[0] = (size_t)img_width;
+    threads[1] = (size_t)img_height;
+    err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height);
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagearraycopy.c b/test_conformance/compatibility/test_conformance/basic/test_imagearraycopy.c
new file mode 100644
index 00000000..a361b6a4
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagearraycopy.c
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_imagearraycopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *imgptr, *bufptr;
+  clMemWrapper      image, buffer;
+  int        img_width = 512;
+  int        img_height = 512;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
+  test_error(err, "create_image_2d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  d = init_genrand( gRandomSeed );
+  imgptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     imgptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
+  err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, &copyevent, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  err = clReleaseEvent(copyevent);
+  test_error(err, "clReleaseEvent failed");
+
+  if (memcmp(imgptr, bufptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)imgptr;
+    unsigned char * outchar = (unsigned char*)bufptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(imgptr);
+  free(bufptr);
+
+  if (err)
+    log_error("IMAGE to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_imagearraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_imagearraycopy_single_format(device, context, queue, &formats[i]);
+  }
+
+  free(formats);
+  if (err)
+    log_error("IMAGE to ARRAY copy test failed\n");
+  else
+    log_info("IMAGE to ARRAY copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagearraycopy3d.c b/test_conformance/compatibility/test_conformance/basic/test_imagearraycopy3d.c
new file mode 100644
index 00000000..d0fc05a3
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagearraycopy3d.c
@@ -0,0 +1,144 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+int test_imagearraycopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+{
+  cl_uchar    *imgptr, *bufptr;
+  clMemWrapper      image, buffer;
+  int        img_width = 128;
+  int        img_height = 128;
+  int        img_depth = 32;
+  size_t    elem_size;
+  size_t    buffer_size;
+  int        i;
+  cl_int          err;
+  MTdata          d;
+  cl_event  copyevent;
+
+  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
+
+  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
+  test_error(err, "clGetImageInfo failed");
+
+  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  imgptr = (cl_uchar*)malloc(buffer_size);
+
+  d = init_genrand( gRandomSeed );
+  imgptr = (cl_uchar*)malloc(buffer_size);
+  for (i=0; i<(int)buffer_size; i++) {
+     imgptr[i] = (cl_uchar)genrand_int32(d);
+  }
+  free_mtdata(d); d = NULL;
+
+  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
+  err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
+  test_error(err, "clEnqueueWriteBuffer failed");
+
+  err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, &copyevent );
+  test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+  bufptr = (cl_uchar*)malloc(buffer_size);
+
+  err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, &copyevent, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (memcmp(imgptr, bufptr, buffer_size) != 0) {
+    log_error( "ERROR: Results did not validate!\n" );
+    unsigned char * inchar = (unsigned char*)imgptr;
+    unsigned char * outchar = (unsigned char*)bufptr;
+    int failuresPrinted = 0;
+    int i;
+    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
+        int failed = 0;
+        int j;
+        for (j=0; j<(int)elem_size; j++)
+            if (inchar[i+j] != outchar[i+j])
+                failed = 1;
+        char values[4096];
+        values[0] = 0;
+        if (failed) {
+            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            int j;
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
+            sprintf(values + strlen(values), "] != expected [");
+            for (j=0; j<(int)elem_size; j++)
+                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
+            sprintf(values + strlen(values), "]");
+            log_error("%s\n", values);
+            failuresPrinted++;
+        }
+        if (failuresPrinted > 5) {
+            log_error("Not printing further failures...\n");
+            break;
+        }
+    }
+    err = -1;
+  }
+
+  free(imgptr);
+  free(bufptr);
+
+  if (err)
+    log_error("IMAGE3D to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
+              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+
+  return err;
+}
+
+int test_imagearraycopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  cl_int          err;
+  cl_image_format *formats;
+  cl_uint         num_formats;
+  cl_uint         i;
+
+  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+
+  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
+  test_error(err, "clGetSupportedImageFormats failed");
+
+  for (i = 0; i < num_formats; i++) {
+    err |= test_imagearraycopy3d_single_format(device, context, queue, &formats[i]);
+  }
+
+  if (err)
+    log_error("IMAGE3D to ARRAY copy test failed\n");
+  else
+    log_info("IMAGE3D to ARRAY copy test passed\n");
+
+  return err;
+}
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagecopy.c b/test_conformance/compatibility/test_conformance/basic/test_imagecopy.c
new file mode 100644
index 00000000..d01ff5ff
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagecopy.c
@@ -0,0 +1,234 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short    *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper            streams[6];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                i, err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p, *outp;
+        int        x, y, delta_w = img_width/8, delta_h = img_height/16;
+
+        switch (i)
+        {
+            case 0:
+                p = (void *)rgba8_inptr;
+                outp = (void *)rgba8_outptr;
+        log_info("Testing CL_RGBA CL_UNORM_INT8\n");
+                break;
+            case 1:
+                p = (void *)rgba16_inptr;
+                outp = (void *)rgba16_outptr;
+        log_info("Testing CL_RGBA CL_UNORM_INT16\n");
+                break;
+            case 2:
+                p = (void *)rgbafp_inptr;
+                outp = (void *)rgbafp_outptr;
+        log_info("Testing CL_RGBA CL_FLOAT\n");
+                break;
+        }
+
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
+        test_error(err, "create_image_2d failed");
+
+        int copy_number = 0;
+        for (y=0; y<img_height; y+=delta_h)
+        {
+            for (x=0; x<img_width; x+=delta_w)
+            {
+        copy_number++;
+        size_t copy_origin[3] = {x,y,0}, copy_region[3]={delta_w, delta_h, 1};
+        err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1],
+                                 copy_origin, copy_origin, copy_region,
+                                 0, NULL, NULL);
+        if (err) {
+          log_error("Copy %d (origin [%d, %d], size [%d, %d], image size [%d x %d]) Failed\n", copy_number, x, y, delta_w, delta_h, img_width, img_height);
+        }
+        test_error(err, "clEnqueueCopyImage failed");
+            }
+        }
+
+        err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
+        test_error(err, "clEnqueueReadImage failed");
+
+        switch (i)
+        {
+            case 0:
+                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
+                break;
+            case 1:
+                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
+                break;
+            case 2:
+                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+  free(rgba8_inptr);
+  free(rgba16_inptr);
+  free(rgbafp_inptr);
+  free(rgba8_outptr);
+  free(rgba16_outptr);
+  free(rgbafp_outptr);
+
+    if (err)
+        log_error("IMAGE copy test failed\n");
+    else
+        log_info("IMAGE copy test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagecopy3d.c b/test_conformance/compatibility/test_conformance/basic/test_imagecopy3d.c
new file mode 100644
index 00000000..6e4cb9e7
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagecopy3d.c
@@ -0,0 +1,237 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_uint8_image(unsigned num_elements, MTdata d)
+{
+    unsigned char *ptr = (unsigned char*)malloc(num_elements);
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_uint8_image(unsigned char *image, unsigned char *outptr, unsigned num_elements)
+{
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static unsigned short *
+generate_uint16_image(unsigned num_elements, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_uint16_image(unsigned short *image, unsigned short *outptr, unsigned num_elements)
+{
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static float *
+generate_float_image(unsigned num_elements, MTdata d)
+{
+    float   *ptr = (float*)malloc(num_elements * sizeof(float));
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_float_image(float *image, float *outptr, unsigned num_elements)
+{
+    unsigned i;
+
+    for (i=0; i<num_elements; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements_ignored)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short *rgba16_inptr, *rgba16_outptr;
+    float *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper streams[6];
+    int img_width = 128;
+    int img_height = 128;
+    int img_depth = 64;
+    int i;
+    cl_int        err;
+    unsigned    num_elements = img_width * img_height * img_depth * 4;
+    MTdata      d;
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
+    rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
+    rgbafp_inptr = (float *)generate_float_image(num_elements, d);
+    free_mtdata(d); d = NULL;
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+    streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+    streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p, *outp;
+        int        x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
+
+        switch (i)
+        {
+            case 0:
+                p = (void *)rgba8_inptr;
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                p = (void *)rgba16_inptr;
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                p = (void *)rgbafp_inptr;
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        size_t origin[3]={0,0,0}, region[3]={img_width, img_height, img_depth};
+        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteImage failed");
+
+        for (z=0; z<img_depth; z+=delta_d)
+        {
+            for (y=0; y<img_height; y+=delta_h)
+            {
+                for (x=0; x<img_width; x+=delta_w)
+                {
+                  origin[0] = x; origin[1] = y; origin[2] = z;
+                  region[0] = delta_w; region[1] = delta_h; region[2] = delta_d;
+
+                  err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
+                  test_error(err, "clEnqueueCopyImage failed");
+                }
+            }
+        }
+
+        origin[0] = 0; origin[1] = 0; origin[2] = 0;
+        region[0] = img_width; region[1] = img_height; region[2] = img_depth;
+        err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
+        test_error(err, "clEnqueueReadImage failed");
+
+        switch (i)
+        {
+            case 0:
+                err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
+        if (err) log_error("Failed uint8\n");
+                break;
+            case 1:
+                err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
+        if (err) log_error("Failed uint16\n");
+                break;
+            case 2:
+                err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
+        if (err) log_error("Failed float\n");
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+  free(rgba8_inptr);
+  free(rgba16_inptr);
+  free(rgbafp_inptr);
+  free(rgba8_outptr);
+  free(rgba16_outptr);
+  free(rgbafp_outptr);
+
+    if (err)
+        log_error("IMAGE3D copy test failed\n");
+    else
+        log_info("IMAGE3D copy test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagedim.c b/test_conformance/compatibility/test_conformance/basic/test_imagedim.c
new file mode 100644
index 00000000..81de4cbc
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagedim.c
@@ -0,0 +1,514 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *image_dim_kernel_code =
+"\n"
+"__kernel void test_image_dim(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"     write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h; i++)
+    {
+        if (outptr[i] != image[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int
+test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2];
+     cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
+    int total_errors = 0;
+    MTdata  d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
+    if (err)
+    {
+        log_error("create_program_and_kernel_with_sources failed\n");
+        return -1;
+    }
+
+    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
+        return -1;
+    }
+    log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
+           max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
+
+    if (max_mem_size > (cl_ulong)SIZE_MAX) {
+        max_mem_size = (cl_ulong)SIZE_MAX;
+    }
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    max_img_width = (int)max_image2d_width;
+    max_img_height = (int)max_image2d_height;
+
+    // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
+  //  and we want to consume 1/4 of global memory (this is the minimum required to be
+  //  supported by the spec)
+    max_mem_size /= 4; // use 1/4
+    max_mem_size /= 4; // 4 bytes per pixel
+    max_img_dim = (int)sqrt((double)max_mem_size);
+    // convert to a power of 2
+    {
+        unsigned int    n = (unsigned int)max_img_dim;
+        unsigned int    m = 0x80000000;
+
+        // round-down to the nearest power of 2
+        while (m > n)
+            m >>= 1;
+
+        max_img_dim = (int)m;
+    }
+
+    if (max_img_width > max_img_dim)
+        max_img_width = max_img_dim;
+    if (max_img_height > max_img_dim)
+        max_img_height = max_img_dim;
+
+    log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
+                max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_8888_image(max_img_width, max_img_height, d);
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
+
+    // test power of 2 width, height starting at 1 to 4K
+    for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
+    {
+        img_height = (1 << i2);
+        for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
+        {
+            img_width = (1 << j2);
+
+            img_format.image_channel_order = CL_RGBA;
+            img_format.image_channel_data_type = CL_UNORM_INT8;
+            streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!streams[0])
+            {
+                log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+            img_format.image_channel_order = CL_RGBA;
+            img_format.image_channel_data_type = CL_UNORM_INT8;
+            streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+            if (!streams[1])
+            {
+                log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
+                clReleaseMemObject(streams[0]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+
+            size_t origin[3] = {0,0,0};
+            size_t region[3] = {img_width, img_height, 1};
+            err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clWriteImage failed\n");
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+
+            err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+            err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+
+            threads[0] = (size_t)img_width;
+            threads[1] = (size_t)img_height;
+            log_info("Testing image dimensions %d x %d with local threads NULL.\n", img_width, img_height);
+            err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                log_error("Image Dimension test failed.  image width = %d, image height = %d, local NULL\n",
+                            img_width, img_height);
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+            err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadImage failed\n");
+                log_error("Image Dimension test failed.  image width = %d, image height = %d, local NULL\n",
+                            img_width, img_height);
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+                free(input_ptr);
+                free(output_ptr);
+                free_mtdata(d);
+                return -1;
+            }
+            err = verify_8888_image(input_ptr, output_ptr, img_width, img_height);
+            if (err)
+            {
+                total_errors++;
+                log_error("Image Dimension test failed.  image width = %d, image height = %d\n", img_width, img_height);
+            }
+
+            clReleaseMemObject(streams[0]);
+            clReleaseMemObject(streams[1]);
+        }
+    }
+
+    // cleanup
+    free(input_ptr);
+    free(output_ptr);
+    free_mtdata(d);
+    clReleaseSampler(sampler);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return total_errors;
+}
+
+
+
+int
+test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2], local_threads[2];
+    cl_ulong    max_mem_size;
+    int                img_width, max_img_width;
+    int                img_height, max_img_height;
+    int                max_img_dim;
+    int                i, j, i2, j2, err=0;
+    size_t            max_image2d_width, max_image2d_height;
+    int total_errors = 0;
+    size_t max_local_workgroup_size[3];
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
+    if (err)
+    {
+        log_error("create_program_and_kernel_with_sources failed\n");
+        return -1;
+    }
+
+    size_t work_group_size = 0;
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
+    test_error(err, "clGetKerenlWorkgroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
+        return -1;
+    }
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
+    if (err)
+    {
+        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
+        return -1;
+    }
+    log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
+           max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    max_img_width = (int)max_image2d_width;
+    max_img_height = (int)max_image2d_height;
+
+  if (max_mem_size > (cl_ulong)SIZE_MAX) {
+    max_mem_size = (cl_ulong)SIZE_MAX;
+  }
+
+    // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
+    //  and we want to consume 1/4 of global memory (this is the minimum required to be
+    //  supported by the spec)
+    max_mem_size /= 4; // use 1/4
+    max_mem_size /= 4; // 4 bytes per pixel
+    max_img_dim = (int)sqrt((double)max_mem_size);
+    // convert to a power of 2
+    {
+        unsigned int    n = (unsigned int)max_img_dim;
+        unsigned int    m = 0x80000000;
+
+        // round-down to the nearest power of 2
+        while (m > n)
+            m >>= 1;
+
+        max_img_dim = (int)m;
+    }
+
+    if (max_img_width > max_img_dim)
+        max_img_width = max_img_dim;
+    if (max_img_height > max_img_dim)
+        max_img_height = max_img_dim;
+
+    log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
+            max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_8888_image(max_img_width, max_img_height, d);
+    output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * max_img_width * max_img_height);
+
+    int plus_minus;
+    for (plus_minus=0; plus_minus < 3; plus_minus++)
+    {
+
+    // test power of 2 width, height starting at 1 to 4K
+        for (i=2,i2=1; i<=max_img_height; i<<=1,i2++)
+        {
+            img_height = (1 << i2);
+            for (j=2,j2=1; j<=max_img_width; j<<=1,j2++)
+            {
+                img_width = (1 << j2);
+
+                int effective_img_height = img_height;
+                int effective_img_width = img_width;
+
+                local_threads[0] = 1;
+                local_threads[1] = 1;
+
+                switch (plus_minus) {
+                    case 0:
+                      effective_img_height--;
+                      local_threads[0] = work_group_size > max_local_workgroup_size[0] ? max_local_workgroup_size[0] : work_group_size;
+                      while (img_width%local_threads[0] != 0)
+                        local_threads[0]--;
+                      break;
+                    case 1:
+                      effective_img_width--;
+                      local_threads[1] = work_group_size > max_local_workgroup_size[1] ? max_local_workgroup_size[1] : work_group_size;
+                      while (img_height%local_threads[1] != 0)
+                        local_threads[1]--;
+                      break;
+                    case 2:
+                      effective_img_width--;
+                      effective_img_height--;
+                      break;
+                    default:
+                      break;
+                }
+
+                img_format.image_channel_order = CL_RGBA;
+                img_format.image_channel_data_type = CL_UNORM_INT8;
+                streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
+                if (!streams[0])
+                {
+                    log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+                img_format.image_channel_order = CL_RGBA;
+                img_format.image_channel_data_type = CL_UNORM_INT8;
+                streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, effective_img_width, effective_img_height, 0, NULL, NULL);
+                if (!streams[1])
+                {
+                    log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
+                    clReleaseMemObject(streams[0]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                  size_t origin[3] = {0,0,0};
+                  size_t region[3] = {effective_img_width, effective_img_height, 1};
+                  err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clWriteImage failed\n");
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+                err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+                err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+
+                threads[0] = (size_t)effective_img_width;
+                threads[1] = (size_t)effective_img_height;
+                log_info("Testing image dimensions %d x %d with local threads %d x %d.\n",
+                            effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
+                err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, local_threads, 0, NULL, NULL );
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    log_error("Image Dimension test failed.  image width = %d, image height = %d, local %d x %d\n",
+                                effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+                err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    log_error("clReadImage failed\n");
+                    log_error("Image Dimension test failed.  image width = %d, image height = %d, local %d x %d\n",
+                                effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
+                    clReleaseMemObject(streams[0]);
+                    clReleaseMemObject(streams[1]);
+                    free(input_ptr);
+                    free(output_ptr);
+                    free_mtdata(d);
+                    return -1;
+                }
+                err = verify_8888_image(input_ptr, output_ptr, effective_img_width, effective_img_height);
+                if (err)
+                {
+                    total_errors++;
+                    log_error("Image Dimension test failed.  image width = %d, image height = %d\n", effective_img_width, effective_img_height);
+                }
+
+                clReleaseMemObject(streams[0]);
+                clReleaseMemObject(streams[1]);
+            }
+        }
+
+  }
+
+    // cleanup
+    free(input_ptr);
+    free(output_ptr);
+    free_mtdata(d);
+    clReleaseSampler(sampler);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return total_errors;
+}
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagenpot.c b/test_conformance/compatibility/test_conformance/basic/test_imagenpot.c
new file mode 100644
index 00000000..acea61dc
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagenpot.c
@@ -0,0 +1,220 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba8888_kernel_code =
+"\n"
+"__kernel void test_rgba8888(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    if ( (tid_x >= get_image_width(dstimg)) || (tid_y >= get_image_height(dstimg)) )\n"
+"        return;\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8888_image(unsigned char *src, unsigned char *dst, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (dst[i] != src[i])
+        {
+            log_error("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d failed\n", w, h);
+            return -1;
+        }
+    }
+
+    log_info("NPOT_IMAGE_RGBA_UNORM_INT8 test for width = %d, height = %d passed\n", w, h);
+    return 0;
+}
+
+
+int    img_width_selection[] = { 97, 111, 322, 479 };
+int    img_height_selection[] = { 149, 222, 754, 385 };
+
+int
+test_imagenpot(cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr, *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    global_threads[3], local_threads[3];
+    size_t            local_workgroup_size;
+    int                img_width;
+    int                img_height;
+    int                err;
+    cl_uint            m;
+    size_t max_local_workgroup_size[3];
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device_id )
+
+    cl_device_type device_type;
+    err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
+    if (err) {
+        log_error("Failed to get device type: %d\n",err);
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (m=0; m<sizeof(img_width_selection)/sizeof(int); m++)
+    {
+        img_width = img_width_selection[m];
+        img_height = img_height_selection[m];
+        input_ptr = generate_8888_image(img_width, img_height, d);
+        output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("create_image_2d failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format,
+                                 img_width, img_height, 0, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("create_image_2d failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
+                              origin, region, 0, 0,
+                              input_ptr,
+                              0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+
+        err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba8888_kernel_code, "test_rgba8888" );
+        if (err)
+        {
+            log_error("Failed to create kernel and program: %d\n", err);
+            free_mtdata(d);
+            return -1;
+        }
+
+        cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+        test_error(err, "clCreateSampler failed");
+
+        err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+        err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+        err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local_workgroup_size), &local_workgroup_size, NULL);
+        test_error(err, "clGetKernelWorkGroupInfo for CL_KERNEL_WORK_GROUP_SIZE failed");
+
+        err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+        test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+        // Pick the minimum of the device and the kernel
+        if (local_workgroup_size > max_local_workgroup_size[0])
+            local_workgroup_size = max_local_workgroup_size[0];
+
+        global_threads[0] = ((img_width + local_workgroup_size - 1) / local_workgroup_size) * local_workgroup_size;
+        global_threads[1] = img_height;
+        local_threads[0] = local_workgroup_size;
+        local_threads[1] = 1;
+        err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, global_threads, local_threads, 0, NULL, NULL );
+
+        if (err != CL_SUCCESS)
+        {
+            log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+            free_mtdata(d);
+            return -1;
+        }
+        err = clEnqueueReadImage(queue, streams[1], CL_TRUE,
+                             origin, region, 0, 0,
+                             (void *)output_ptr,
+                             0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        err = verify_rgba8888_image(input_ptr, output_ptr, img_width, img_height);
+
+        // cleanup
+        clReleaseSampler(sampler);
+        clReleaseMemObject(streams[0]);
+        clReleaseMemObject(streams[1]);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        free(input_ptr);
+        free(output_ptr);
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(d);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagerandomcopy.c b/test_conformance/compatibility/test_conformance/basic/test_imagerandomcopy.c
new file mode 100644
index 00000000..1b95f6a8
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagerandomcopy.c
@@ -0,0 +1,269 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int x, int y, int w, int h, int img_width)
+{
+    int     i, j, indx;
+
+    for (j=y; j<(y+h); j++)
+    {
+        indx = j*img_width*4;
+        for (i=x*4; i<(x+w)*4; i++)
+        {
+            if (outptr[indx+i] != image[indx+i])
+                return -1;
+        }
+    }
+    return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int x, int y, int w, int h, int img_width)
+{
+    int     i, j, indx;
+
+    for (j=y; j<(y+h); j++)
+    {
+        indx = j*img_width*4;
+        for (i=x*4; i<(x+w)*4; i++)
+        {
+            if (outptr[indx+i] != image[indx+i])
+                return -1;
+        }
+    }
+    return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int x, int y, int w, int h, int img_width)
+{
+    int     i, j, indx;
+
+    for (j=y; j<(y+h); j++)
+    {
+        indx = j*img_width*4;
+        for (i=x*4; i<(x+w)*4; i++)
+        {
+            if (outptr[indx+i] != image[indx+i])
+                return -1;
+        }
+    }
+    return 0;
+}
+
+
+#define NUM_COPIES    10
+static const char *test_str_names[] = { "CL_RGBA CL_UNORM_INT8", "CL_RGBA CL_UNORM_INT16", "CL_RGBA CL_FLOAT" };
+
+int
+test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short    *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper            streams[6];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                i, j;
+    cl_int          err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    log_info("Testing with image %d x %d.\n", img_width, img_height);
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void            *p, *outp;
+        unsigned int    x[2], y[2], delta_w, delta_h ;
+
+        switch (i)
+        {
+            case 0:
+                p = (void *)rgba8_inptr;
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                p = (void *)rgba16_inptr;
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                p = (void *)rgbafp_inptr;
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
+        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
+//        err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
+        test_error(err, "clEnqueueWriteImage failed");
+
+        for (j=0; j<NUM_COPIES; j++)
+        {
+            x[0] = (int)get_random_float(0, img_width, d);
+            do
+            {
+                x[1] = (int)get_random_float(0, img_width, d);
+            } while (x[1] <= x[0]);
+
+            y[0] = (int)get_random_float(0, img_height, d);
+            do
+            {
+                y[1] = (int)get_random_float(0, img_height, d);
+            } while (y[1] <= y[0]);
+
+            delta_w = x[1] - x[0];
+            delta_h = y[1] - y[0];
+            log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
+            origin[0] = x[0];
+            origin[1] = y[0];
+            origin[2] = 0;
+            region[0] = delta_w;
+            region[1] = delta_h;
+            region[2] = 1;
+            err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
+//          err = clCopyImage(context, streams[i*2], streams[i*2+1],
+//                              x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
+            test_error(err, "clEnqueueCopyImage failed");
+
+            origin[0] = 0;
+            origin[1] = 0;
+            origin[2] = 0;
+            region[0] = img_width;
+            region[1] = img_height;
+            region[2] = 1;
+            err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
+//            err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
+            test_error(err, "clEnqueueReadImage failed");
+
+            switch (i)
+            {
+                case 0:
+                    err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
+                    break;
+                case 1:
+                    err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
+                    break;
+                case 2:
+                    err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
+                    break;
+            }
+
+            if (err)
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(d); d = NULL;
+    free(rgba8_inptr);
+    free(rgba16_inptr);
+    free(rgbafp_inptr);
+    free(rgba8_outptr);
+    free(rgba16_outptr);
+    free(rgbafp_outptr);
+
+    if (err)
+        log_error("IMAGE random copy test failed\n");
+    else
+        log_info("IMAGE random copy test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite.c b/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite.c
new file mode 100644
index 00000000..9a84e6d6
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite.c
@@ -0,0 +1,417 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, MTdata d)
+{
+  unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+  int             i;
+
+  for (i=0; i<w*h*4; i++)
+    ptr[i] = (unsigned char)genrand_int32(d);
+
+  return ptr;
+}
+
+static void
+update_rgba8_image(unsigned char *p, int x, int y, int w, int h, int img_width, MTdata d)
+{
+    int        i, j, indx;
+
+    for (i=y; i<y+h; i++)
+    {
+        indx = (i * img_width + x) * 4;
+        for (j=x; j<x+w; j++,indx+=4)
+        {
+            p[indx+0] = (unsigned char)genrand_int32(d);
+            p[indx+1] = (unsigned char)genrand_int32(d);
+            p[indx+2] = (unsigned char)genrand_int32(d);
+            p[indx+3] = (unsigned char)genrand_int32(d);
+        }
+    }
+}
+
+static void
+update_image_from_image(void *out, void *in, int x, int y, int w, int h, int img_width, int elem_size)
+{
+    int        i, j, k, out_indx, in_indx;
+    in_indx = 0;
+
+    for (i=y; i<y+h; i++)
+    {
+        out_indx = (i * img_width + x) * elem_size;
+        for (j=x; j<x+w; j++,out_indx+=elem_size)
+        {
+            for (k=0; k<elem_size; k++)
+            {
+                ((char*)out)[out_indx + k] = ((char*)in)[in_indx];
+                in_indx++;
+            }
+        }
+    }
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+  {
+    if (outptr[i] != image[i])
+    {
+        log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+        return -1;
+    }
+  }
+
+  return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, MTdata d)
+{
+  unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
+  int             i;
+
+  for (i=0; i<w*h*4; i++)
+    ptr[i] = (unsigned short)genrand_int32(d);
+
+  return ptr;
+}
+
+static void
+update_rgba16_image(unsigned short *p, int x, int y, int w, int h, int img_width, MTdata d)
+{
+    int        i, j, indx;
+
+    for (i=y; i<y+h; i++)
+    {
+        indx = (i * img_width + x) * 4;
+        for (j=x; j<x+w; j++,indx+=4)
+        {
+            p[indx+0] = (unsigned short)genrand_int32(d);
+            p[indx+1] = (unsigned short)genrand_int32(d);
+            p[indx+2] = (unsigned short)genrand_int32(d);
+            p[indx+3] = (unsigned short)genrand_int32(d);
+        }
+    }
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
+{
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+  {
+    if (outptr[i] != image[i])
+    {
+        log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+        return -1;
+    }
+  }
+
+  return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, MTdata d)
+{
+  float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+    ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+  return ptr;
+}
+
+static void
+update_rgbafp_image(float *p, int x, int y, int w, int h, int img_width, MTdata d)
+{
+    int        i, j, indx;
+
+    for (i=y; i<y+h; i++)
+    {
+        indx = (i * img_width + x) * 4;
+        for (j=x; j<x+w; j++,indx+=4)
+        {
+            p[indx+0] = get_random_float(-0x40000000, 0x40000000, d);
+            p[indx+1] = get_random_float(-0x40000000, 0x40000000, d);
+            p[indx+2] = get_random_float(-0x40000000, 0x40000000, d);
+            p[indx+3] = get_random_float(-0x40000000, 0x40000000, d);
+        }
+    }
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int w, int h)
+{
+  int     i;
+
+  for (i=0; i<w*h*4; i++)
+  {
+    if (outptr[i] != image[i])
+    {
+        log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+        return -1;
+    }
+  }
+
+  return 0;
+}
+
+
+int
+test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short    *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper            streams[3];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                num_tries = 200;
+    int                i, j, err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
+    test_error(err, "create_image_2d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p;
+
+        if (i == 0)
+            p = (void *)rgba8_inptr;
+        else if (i == 1)
+            p = (void *)rgba16_inptr;
+        else
+            p = (void *)rgbafp_inptr;
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
+        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
+                              origin, region, 0, 0,
+                              p, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage2D failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0,j=0; i<num_tries*3; i++,j++)
+    {
+        int        x = (int)get_random_float(0, img_width, d);
+        int        y = (int)get_random_float(0, img_height, d);
+        int        w = (int)get_random_float(1, (img_width - x), d);
+        int        h = (int)get_random_float(1, (img_height - y), d);
+        size_t    input_pitch;
+        int     set_input_pitch = (int)(genrand_int32(d) & 0x01);
+        int     packed_update = (int)(genrand_int32(d) & 0x01);
+        void    *p, *outp;
+        int        elem_size;
+
+        if (j == 3)
+            j = 0;
+
+        switch (j)
+        {
+            case 0:
+                //if ((w<=10) || (h<=10)) continue;
+                elem_size = 4;
+                if(packed_update)
+                {
+                    p = generate_rgba8_image(w, h, d);
+                    update_image_from_image(rgba8_inptr, p, x, y, w, h, img_width, elem_size);
+                }
+                else
+                {
+                    update_rgba8_image(rgba8_inptr, x, y, w, h, img_width, d);
+                    p = (void *)(rgba8_inptr + ((y * img_width + x) * 4));
+                }
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                //if ((w<=8) || (h<=8)) continue;
+                elem_size = 2*4;
+                if(packed_update)
+                {
+                    p = generate_rgba16_image(w, h, d);
+                    update_image_from_image(rgba16_inptr, p, x, y, w, h, img_width, elem_size);
+                }
+                else
+                {
+                    update_rgba16_image(rgba16_inptr, x, y, w, h, img_width, d);
+                    p = (void *)(rgba16_inptr + ((y * img_width + x) * 4));
+                }
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                //if ((w<=8) || (h<=8)) continue;
+                elem_size = 4*4;
+                if(packed_update)
+                {
+                    p = generate_rgbafp_image(w, h, d);
+                    update_image_from_image(rgbafp_inptr, p, x, y, w, h, img_width, elem_size);
+                }
+                else
+                {
+                    update_rgbafp_image(rgbafp_inptr, x, y, w, h, img_width, d);
+                    p = (void *)(rgbafp_inptr + ((y * img_width + x) * 4));
+                }
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        const char* update_packed_pitch_name = "";
+        if(packed_update)
+        {
+            if(set_input_pitch)
+            {
+                // for packed updates the pitch does not need to be calculated here (but can be)
+                update_packed_pitch_name = "'packed with pitch'";
+                input_pitch = w*elem_size;
+            }
+            else
+            {
+                // for packed updates the pitch does not need to be calculated here
+                update_packed_pitch_name = "'packed without pitch'";
+                input_pitch = 0;
+            }
+        }
+        else
+        {
+            // for unpacked updates the pitch is required
+            update_packed_pitch_name = "'unpacked with pitch'";
+            input_pitch = img_width*elem_size;
+        }
+
+        size_t origin[3] = {x,y,0}, region[3] = {w, h, 1};
+        err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
+                              origin, region, input_pitch, 0, p,
+                              0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage update failed for %s %s: %d\n",
+                (packed_update) ? "packed" : "unpacked",
+                (set_input_pitch) ? "set pitch" : "unset pitch", err);
+            free_mtdata(d);
+            return -1;
+        }
+
+        if(packed_update)
+        {
+            free(p);
+            p = NULL;
+        }
+
+        memset(outp, 0x7, img_width*img_height*elem_size);
+
+        origin[0]=0; origin[1]=0; origin[2]=0;
+        region[0]=img_width; region[1]=img_height; region[2]=1;
+        err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
+                             origin, region, 0,0,
+                             outp, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clReadImage failed\n");
+            free_mtdata(d);
+            return -1;
+        }
+
+        switch (j)
+        {
+            case 0:
+                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
+                if (err)
+                {
+                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
+                    log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 1:
+                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
+                if (err)
+                {
+                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
+                    log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 2:
+                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
+                if (err)
+                {
+                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
+                    log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+        }
+
+        if (err) break;
+    }
+
+    free_mtdata(d);
+    free(rgba8_inptr);
+    free(rgba16_inptr);
+    free(rgbafp_inptr);
+    free(rgba8_outptr);
+    free(rgba16_outptr);
+    free(rgbafp_outptr);
+
+    if (!err)
+        log_info("IMAGE read, write test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite3d.c b/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite3d.c
new file mode 100644
index 00000000..76c8f9a1
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite3d.c
@@ -0,0 +1,417 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static unsigned char *
+generate_rgba8_image(int w, int h, int d, MTdata mtData)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * d *4);
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+            ptr[i] = (unsigned char)genrand_int32(mtData);
+
+    return ptr;
+}
+
+static void
+update_rgba8_image(unsigned char *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
+{
+    int        i, j, k, indx;
+    int        img_slice = img_width * img_height;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+        {
+            indx = (k * img_slice + j * img_width + x) * 4;
+            for (i=x; i<x+w; i++,indx+=4)
+            {
+                p[indx+0] = (unsigned char)genrand_int32(mtData);
+                p[indx+1] = (unsigned char)genrand_int32(mtData);
+                p[indx+2] = (unsigned char)genrand_int32(mtData);
+                p[indx+3] = (unsigned char)genrand_int32(mtData);
+            }
+        }
+}
+
+static void
+update_image_from_image(void *out, void *in, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, int elem_size)
+{
+    int        i, j, k, elem, out_indx, in_indx;
+    int        img_slice = img_width * img_height;
+    in_indx = 0;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+        {
+            out_indx = (k * img_slice + j * img_width + x) * elem_size;
+            for (i=x; i<x+w; i++,out_indx+=elem_size)
+            {
+                for (elem=0; elem<elem_size; elem++)
+                {
+                    ((char*)out)[out_indx + elem] = ((char*)in)[in_indx];
+                    in_indx++;
+                }
+            }
+        }
+}
+
+static int
+verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+static unsigned short *
+generate_rgba16_image(int w, int h, int d, MTdata mtData)
+{
+    unsigned short    *ptr = (unsigned short*)malloc(w * h * d * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+            ptr[i] = (unsigned short)genrand_int32(mtData);
+
+    return ptr;
+}
+
+static void
+update_rgba16_image(unsigned short *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
+{
+    int        i, j, k, indx;
+    int        img_slice = img_width * img_height;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+    {
+        indx = (k * img_slice + j * img_width + x) * 4;
+        for (i=x; i<x+w; i++,indx+=4)
+        {
+            p[indx+0] = (unsigned short)genrand_int32(mtData);
+            p[indx+1] = (unsigned short)genrand_int32(mtData);
+            p[indx+2] = (unsigned short)genrand_int32(mtData);
+            p[indx+3] = (unsigned short)genrand_int32(mtData);
+        }
+    }
+}
+
+static int
+verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+static float *
+generate_rgbafp_image(int w, int h, int d, MTdata mtData)
+{
+    float   *ptr = (float*)malloc(w * h * d *4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+            ptr[i] = get_random_float(-0x40000000, 0x40000000, mtData);
+
+    return ptr;
+}
+
+static void
+update_rgbafp_image(float *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
+{
+    int        i, j, k, indx;
+    int        img_slice = img_width * img_height;
+
+    for (k=z; k<z+d; k++)
+        for (j=y; j<y+h; j++)
+        {
+            indx = (k * img_slice + j * img_width + x) * 4;
+            for (i=x; i<x+w; i++,indx+=4)
+            {
+                p[indx+0] = get_random_float(-0x40000000, 0x40000000, mtData);
+                p[indx+1] = get_random_float(-0x40000000, 0x40000000, mtData);
+                p[indx+2] = get_random_float(-0x40000000, 0x40000000, mtData);
+                p[indx+3] = get_random_float(-0x40000000, 0x40000000, mtData);
+            }
+        }
+}
+
+static int
+verify_rgbafp_image(float *image, float *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+int
+test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_image_format    img_format;
+    unsigned char    *rgba8_inptr, *rgba8_outptr;
+    unsigned short *rgba16_inptr, *rgba16_outptr;
+    float            *rgbafp_inptr, *rgbafp_outptr;
+    clMemWrapper    streams[3];
+    int       img_width = 64;
+    int       img_height = 64;
+    int       img_depth = 32;
+    int       img_slice = img_width * img_height;
+    int       num_tries = 30;
+    int       i, j, err;
+    MTdata      mtData;
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    mtData = init_genrand( gRandomSeed );
+    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, img_depth, mtData);
+    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, img_depth, mtData);
+    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, img_depth, mtData);
+
+    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height * img_depth);
+    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height * img_depth);
+    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height * img_depth);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    for (i=0; i<3; i++)
+    {
+        void    *p;
+
+        if (i == 0)
+            p = (void *)rgba8_inptr;
+        else if (i == 1)
+            p = (void *)rgba16_inptr;
+        else
+            p = (void *)rgbafp_inptr;
+
+        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, img_depth};
+        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
+                                  origin, region, 0, 0,
+                                  p,
+                                  0, NULL, NULL);
+        test_error(err, "clEnqueueWriteImage failed");
+    }
+
+    for (i=0,j=0; i<num_tries*3; i++,j++)
+    {
+        int        x = (int)get_random_float(0, (float)img_width - 1, mtData);
+        int        y = (int)get_random_float(0, (float)img_height - 1, mtData);
+        int        z = (int)get_random_float(0, (float)img_depth - 1, mtData);
+        int        w = (int)get_random_float(1, (float)(img_width - x), mtData);
+        int        h = (int)get_random_float(1, (float)(img_height - y), mtData);
+        int        d = (int)get_random_float(1, (float)(img_depth - z), mtData);
+        size_t    input_pitch, input_slice_pitch;
+        int     set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
+        int     packed_update = (int)(genrand_int32(mtData) & 0x01);
+        void    *p, *outp;
+        int        elem_size;
+
+        if (j == 3)
+            j = 0;
+
+        // packed: the source image for the write is a whole image                                                                                                                                                                                                                                                      .
+        // unpacked: the source image for the write is a subset within a larger image
+        switch (j)
+        {
+            case 0:
+                elem_size = 4;
+                if(packed_update)
+                {
+                    p = generate_rgba8_image(w, h, d, mtData);
+                    update_image_from_image(rgba8_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
+                }
+                else
+                {
+                    update_rgba8_image(rgba8_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
+                    p = (void *)(rgba8_inptr + ((z * img_slice + y * img_width + x) * 4));
+                }
+                outp = (void *)rgba8_outptr;
+                break;
+            case 1:
+                elem_size = 2*4;
+                if(packed_update)
+                {
+                    p = generate_rgba16_image(w, h, d, mtData);
+                    update_image_from_image(rgba16_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
+                }
+                else
+                {
+                    update_rgba16_image(rgba16_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
+                    p = (void *)(rgba16_inptr + ((z * img_slice + y * img_width + x) * 4));
+                }
+                outp = (void *)rgba16_outptr;
+                break;
+            case 2:
+                elem_size = 4*4;
+                if(packed_update)
+                {
+                    p = generate_rgbafp_image(w, h, d, mtData);
+                    update_image_from_image(rgbafp_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
+                }
+                else
+                {
+                    update_rgbafp_image(rgbafp_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
+                    p = (void *)(rgbafp_inptr + ((z * img_slice + y * img_width + x) * 4));
+                }
+                outp = (void *)rgbafp_outptr;
+                break;
+        }
+
+        const char* update_packed_pitch_name = "";
+        if(packed_update)
+        {
+            if(set_input_pitch)
+            {
+                // for packed updates the pitch does not need to be calculated here (but can be)
+                update_packed_pitch_name = "'packed with pitch'";
+                input_pitch = w*elem_size;
+                input_slice_pitch = w*h*elem_size;
+            }
+            else
+            {
+                // for packed updates the pitch does not need to be calculated here
+                update_packed_pitch_name = "'packed without pitch'";
+                input_pitch = 0;
+                input_slice_pitch = 0;
+            }
+        }
+        else
+        {
+            // for unpacked updates the pitch is required
+            update_packed_pitch_name = "'unpacked with pitch'";
+            input_pitch = img_width*elem_size;
+            input_slice_pitch = input_pitch*img_height;
+        }
+
+    size_t origin[3] = {x,y,z}, region[3] = {w, h, d};
+        err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
+                              origin, region, input_pitch, input_slice_pitch,
+                              p, 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteImage failed");
+
+        if(packed_update)
+        {
+            free(p);
+            p = NULL;
+        }
+
+        memset(outp, 0x7, img_width*img_height*img_depth*elem_size);
+
+    origin[0]=0; origin[1]=0; origin[2]=0; region[0]=img_width; region[1]=img_height; region[2]=img_depth;
+        err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
+                             origin, region, 0, 0,
+                             outp, 0, NULL, NULL);
+    test_error(err, "clEnqueueReadImage failed");
+
+        switch (j)
+        {
+            case 0:
+                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height, img_depth);
+                if (err)
+                {
+                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
+                    log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 1:
+                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height, img_depth);
+                if (err)
+                {
+                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
+                    log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+            case 2:
+                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height, img_depth);
+                if (err)
+                {
+                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
+                    log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
+                }
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    free_mtdata(mtData);
+    free(rgba8_inptr);
+    free(rgba16_inptr);
+    free(rgbafp_inptr);
+    free(rgba8_outptr);
+    free(rgba16_outptr);
+    free(rgbafp_outptr);
+
+    if (!err)
+        log_info("IMAGE read, write test passed\n");
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_int2float.c b/test_conformance/compatibility/test_conformance/basic/test_int2float.c
new file mode 100644
index 00000000..4f807d1c
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_int2float.c
@@ -0,0 +1,159 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int2float_kernel_code =
+"__kernel void test_int2float(__global int *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)src[tid];\n"
+"\n"
+"}\n";
+
+
+int
+verify_int2float(cl_int *inptr, cl_float *outptr, int n)
+{
+    int     i;
+
+    for (i=0; i<n; i++)
+    {
+        if (outptr[i] != (float)inptr[i])
+        {
+            log_error("INT2FLOAT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT2FLOAT test passed\n");
+    return 0;
+}
+
+int
+test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[2];
+    cl_int            *input_ptr;
+    cl_float        *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    void            *values[2];
+    size_t            lengths[1];
+    size_t    threads[1];
+    int                err;
+    int                i;
+    MTdata          d;
+
+    input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    lengths[0] = strlen(int2float_kernel_code);
+    program = clCreateProgramWithSource(context, 1, &int2float_kernel_code, lengths, NULL);
+    if (!program)
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgramExecutable failed\n");
+        return -1;
+    }
+
+    kernel = clCreateKernel(program, "test_int2float", NULL);
+    if (!kernel)
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_int2float(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_intmath_int.c b/test_conformance/compatibility/test_conformance/basic/test_intmath_int.c
new file mode 100644
index 00000000..e9dbe354
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_int.c
@@ -0,0 +1,389 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int_add_kernel_code =
+"__kernel void test_int_add(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *int_sub_kernel_code =
+"__kernel void test_int_sub(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *int_mul_kernel_code =
+"__kernel void test_int_mul(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *int_mad_kernel_code =
+"__kernel void test_int_mad(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+static const float    MAX_ERR = 1e-5f;
+
+int
+verify_int_add(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_ADD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_ADD int test passed\n");
+    return 0;
+}
+
+int
+verify_int_sub(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_SUB int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_SUB int test passed\n");
+    return 0;
+}
+
+int
+verify_int_mul(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MUL int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MUL int test passed\n");
+    return 0;
+}
+
+int
+verify_int_mad(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MAD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MAD int test passed\n");
+    return 0;
+}
+
+int
+test_intmath_int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_int *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * num_elements;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    input_ptr[2] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = (int)genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    program[0] = clCreateProgramWithSource(context, 1, &int_add_kernel_code, NULL, NULL);
+    if (!program[0])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[0] = clCreateKernel(program[0], "test_int_add", NULL);
+    if (!kernel[0])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[1] = clCreateProgramWithSource(context, 1, &int_sub_kernel_code, NULL, NULL);
+    if (!program[1])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[1] = clCreateKernel(program[1], "test_int_sub", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[2] = clCreateProgramWithSource(context, 1, &int_mul_kernel_code, NULL, NULL);
+    if (!program[2])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[2] = clCreateKernel(program[2], "test_int_mul", NULL);
+    if (!kernel[2])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[3] = clCreateProgramWithSource(context, 1, &int_mad_kernel_code, NULL, NULL);
+    if (!program[3])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[3] = clCreateKernel(program[3], "test_int_mad", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_int_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_int_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_int_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_int_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_intmath_int2.c b/test_conformance/compatibility/test_conformance/basic/test_intmath_int2.c
new file mode 100644
index 00000000..3767f537
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_int2.c
@@ -0,0 +1,388 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int_add2_kernel_code =
+"__kernel void test_int_add2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *int_sub2_kernel_code =
+"__kernel void test_int_sub2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *int_mul2_kernel_code =
+"__kernel void test_int_mul2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *int_mad2_kernel_code =
+"__kernel void test_int_mad2(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_int_add2(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_ADD int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_ADD int2 test passed\n");
+    return 0;
+}
+
+int
+verify_int_sub2(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_SUB int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_SUB int2 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mul2(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MUL int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MUL int2 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mad2(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MAD int2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MAD int2 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_int2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_int *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * 2 * num_elements;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    input_ptr[2] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*2; i++)
+        p[i] = (int)genrand_int32(d);
+
+    free_mtdata( d );
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    program[0] = clCreateProgramWithSource(context, 1, &int_add2_kernel_code, NULL, NULL);
+    if (!program[0])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[0] = clCreateKernel(program[0], "test_int_add2", NULL);
+    if (!kernel[0])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[1] = clCreateProgramWithSource(context, 1, &int_sub2_kernel_code, NULL, NULL);
+    if (!program[1])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[1] = clCreateKernel(program[1], "test_int_sub2", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[2] = clCreateProgramWithSource(context, 1, &int_mul2_kernel_code, NULL, NULL);
+    if (!program[2])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[2] = clCreateKernel(program[2], "test_int_mul2", NULL);
+    if (!kernel[2])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[3] = clCreateProgramWithSource(context, 1, &int_mad2_kernel_code, NULL, NULL);
+    if (!program[3])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[3] = clCreateKernel(program[3], "test_int_mad2", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_int_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_int_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_int_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_int_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_intmath_int4.c b/test_conformance/compatibility/test_conformance/basic/test_intmath_int4.c
new file mode 100644
index 00000000..70de6378
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_int4.c
@@ -0,0 +1,387 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *int_add4_kernel_code =
+"__kernel void test_int_add4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *int_sub4_kernel_code =
+"__kernel void test_int_sub4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *int_mul4_kernel_code =
+"__kernel void test_int_mul4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *int_mad4_kernel_code =
+"__kernel void test_int_mad4(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_int_add4(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_ADD int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_ADD int4 test passed\n");
+    return 0;
+}
+
+int
+verify_int_sub4(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_SUB int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_SUB int4 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mul4(int *inptrA, int *inptrB, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MUL int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MUL int4 test passed\n");
+    return 0;
+}
+
+int
+verify_int_mad4(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
+{
+    int            r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("INT_MAD int4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("INT_MAD int4 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_int4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_int *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_int) * 4 * num_elements;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    input_ptr[2] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = (int)genrand_int32(d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements*4; i++)
+        p[i] = (int)genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    program[0] = clCreateProgramWithSource(context, 1, &int_add4_kernel_code, NULL, NULL);
+    if (!program[0])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[0] = clCreateKernel(program[0], "test_int_add4", NULL);
+    if (!kernel[0])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[1] = clCreateProgramWithSource(context, 1, &int_sub4_kernel_code, NULL, NULL);
+    if (!program[1])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[1] = clCreateKernel(program[1], "test_int_sub4", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[2] = clCreateProgramWithSource(context, 1, &int_mul4_kernel_code, NULL, NULL);
+    if (!program[2])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[2] = clCreateKernel(program[2], "test_int_mul4", NULL);
+    if (!kernel[2])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[3] = clCreateProgramWithSource(context, 1, &int_mad4_kernel_code, NULL, NULL);
+    if (!program[3])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[3] = clCreateKernel(program[3], "test_int_mad4", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_int_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_int_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_int_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_int_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_intmath_long.c b/test_conformance/compatibility/test_conformance/basic/test_intmath_long.c
new file mode 100644
index 00000000..c7a0e65c
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_long.c
@@ -0,0 +1,397 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *long_add_kernel_code =
+"__kernel void test_long_add(__global long *srcA, __global long *srcB, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *long_sub_kernel_code =
+"__kernel void test_long_sub(__global long *srcA, __global long *srcB, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *long_mul_kernel_code =
+"__kernel void test_long_mul(__global long *srcA, __global long *srcB, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *long_mad_kernel_code =
+"__kernel void test_long_mad(__global long *srcA, __global long *srcB, __global long *srcC, __global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+static const float    MAX_ERR = 1e-5f;
+
+int
+verify_long_add(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_ADD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_ADD int test passed\n");
+    return 0;
+}
+
+int
+verify_long_sub(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_SUB int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_SUB int test passed\n");
+    return 0;
+}
+
+int
+verify_long_mul(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MUL int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MUL int test passed\n");
+    return 0;
+}
+
+int
+verify_long_mad(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int            i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MAD int test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MAD int test passed\n");
+    return 0;
+}
+
+int
+test_intmath_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_long *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+
+    if(! gHasLong )
+    {
+        log_info("64-bit integers are not supported by this device. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_long) * num_elements;
+
+    input_ptr[0] = (cl_long*)malloc(length);
+    input_ptr[1] = (cl_long*)malloc(length);
+    input_ptr[2] = (cl_long*)malloc(length);
+    output_ptr   = (cl_long*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    program[0] = clCreateProgramWithSource(context, 1, &long_add_kernel_code, NULL, NULL);
+    if (!program[0])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[0] = clCreateKernel(program[0], "test_long_add", NULL);
+    if (!kernel[0])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[1] = clCreateProgramWithSource(context, 1, &long_sub_kernel_code, NULL, NULL);
+    if (!program[1])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[1] = clCreateKernel(program[1], "test_long_sub", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[2] = clCreateProgramWithSource(context, 1, &long_mul_kernel_code, NULL, NULL);
+    if (!program[2])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[2] = clCreateKernel(program[2], "test_long_mul", NULL);
+    if (!kernel[2])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[3] = clCreateProgramWithSource(context, 1, &long_mad_kernel_code, NULL, NULL);
+    if (!program[3])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[3] = clCreateKernel(program[3], "test_long_mad", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_long_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_long_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_long_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_long_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_intmath_long2.c b/test_conformance/compatibility/test_conformance/basic/test_intmath_long2.c
new file mode 100644
index 00000000..95d82de5
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_long2.c
@@ -0,0 +1,395 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *long_add2_kernel_code =
+"__kernel void test_long_add2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *long_sub2_kernel_code =
+"__kernel void test_long_sub2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *long_mul2_kernel_code =
+"__kernel void test_long_mul2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *long_mad2_kernel_code =
+"__kernel void test_long_mad2(__global long2 *srcA, __global long2 *srcB, __global long2 *srcC, __global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_long_add2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_ADD long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_ADD long2 test passed\n");
+    return 0;
+}
+
+int
+verify_long_sub2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_SUB long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_SUB long2 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mul2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MUL long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MUL long2 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mad2(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MAD long2 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MAD long2 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_long2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_long *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+
+    if(! gHasLong)
+    {
+        log_info("64-bit integers are not supported in this device. Skipping test.\n");
+        return 0;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_long) * 2* num_elements;
+
+    input_ptr[0] = (cl_long*)malloc(length);
+    input_ptr[1] = (cl_long*)malloc(length);
+    input_ptr[2] = (cl_long*)malloc(length);
+    output_ptr   = (cl_long*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements * 2; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[1];
+    for (i=0; i<num_elements * 2; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[2];
+    for (i=0; i<num_elements * 2; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    program[0] = clCreateProgramWithSource(context, 1, &long_add2_kernel_code, NULL, NULL);
+    if (!program[0])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[0] = clCreateKernel(program[0], "test_long_add2", NULL);
+    if (!kernel[0])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[1] = clCreateProgramWithSource(context, 1, &long_sub2_kernel_code, NULL, NULL);
+    if (!program[1])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[1] = clCreateKernel(program[1], "test_long_sub2", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[2] = clCreateProgramWithSource(context, 1, &long_mul2_kernel_code, NULL, NULL);
+    if (!program[2])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[2] = clCreateKernel(program[2], "test_long_mul2", NULL);
+    if (!kernel[2])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[3] = clCreateProgramWithSource(context, 1, &long_mad2_kernel_code, NULL, NULL);
+    if (!program[3])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[3] = clCreateKernel(program[3], "test_long_mad2", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_long_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_long_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_long_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_long_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_intmath_long4.c b/test_conformance/compatibility/test_conformance/basic/test_intmath_long4.c
new file mode 100644
index 00000000..73582e3e
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_long4.c
@@ -0,0 +1,395 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *long_add4_kernel_code =
+"__kernel void test_long_add4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] + srcB[tid];\n"
+"}\n";
+
+const char *long_sub4_kernel_code =
+"__kernel void test_long_sub4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] - srcB[tid];\n"
+"}\n";
+
+const char *long_mul4_kernel_code =
+"__kernel void test_long_mul4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid];\n"
+"}\n";
+
+const char *long_mad4_kernel_code =
+"__kernel void test_long_mad4(__global long4 *srcA, __global long4 *srcB, __global long4 *srcC, __global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
+"}\n";
+
+int
+verify_long_add4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_ADD long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_ADD long4 test passed\n");
+    return 0;
+}
+
+int
+verify_long_sub4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] - inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_SUB long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_SUB long4 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mul4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MUL long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MUL long4 test passed\n");
+    return 0;
+}
+
+int
+verify_long_mad4(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
+{
+    cl_long    r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] * inptrB[i] + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error("LONG_MAD long4 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("LONG_MAD long4 test passed\n");
+    return 0;
+}
+
+int
+test_intmath_long4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_program program[4];
+    cl_kernel kernel[4];
+
+    cl_long *input_ptr[3], *output_ptr, *p;
+    size_t threads[1];
+    int err, i;
+
+    if(! gHasLong )
+    {
+        log_info("64-bit integers are not supported by this device. Skipping test.\n");
+        return 0;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(cl_long) * 4 * num_elements;
+
+    input_ptr[0] = (cl_long*)malloc(length);
+    input_ptr[1] = (cl_long*)malloc(length);
+    input_ptr[2] = (cl_long*)malloc(length);
+    output_ptr   = (cl_long*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements * 4; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[1];
+    for (i=0; i<num_elements * 4; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+    p = input_ptr[2];
+    for (i=0; i<num_elements * 4; i++)
+        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    program[0] = clCreateProgramWithSource(context, 1, &long_add4_kernel_code, NULL, NULL);
+    if (!program[0])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[0] = clCreateKernel(program[0], "test_long_add4", NULL);
+    if (!kernel[0])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[1] = clCreateProgramWithSource(context, 1, &long_sub4_kernel_code, NULL, NULL);
+    if (!program[1])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[1] = clCreateKernel(program[1], "test_long_sub4", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[2] = clCreateProgramWithSource(context, 1, &long_mul4_kernel_code, NULL, NULL);
+    if (!program[2])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[2] = clCreateKernel(program[2], "test_long_mul4", NULL);
+    if (!kernel[2])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  program[3] = clCreateProgramWithSource(context, 1, &long_mad4_kernel_code, NULL, NULL);
+    if (!program[3])
+    {
+        log_error("clCreateProgramWithSource failed\n");
+        return -1;
+    }
+
+    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clBuildProgram failed\n");
+        return -1;
+    }
+
+    kernel[3] = clCreateKernel(program[3], "test_long_mad4", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)num_elements;
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        err = verify_long_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 1:
+        err = verify_long_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 2:
+        err = verify_long_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
+        break;
+      case 3:
+        err = verify_long_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+        break;
+    }
+    if (err)
+      break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<4; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_kernel_call_kernel_function.cpp b/test_conformance/compatibility/test_conformance/basic/test_kernel_call_kernel_function.cpp
new file mode 100644
index 00000000..80fea55f
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_kernel_call_kernel_function.cpp
@@ -0,0 +1,253 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+
+const char *kernel_call_kernel_code[] = {
+    "void test_function_to_call(__global int *output, __global int *input, int where);\n"
+    "\n"
+    "__kernel void test_kernel_to_call(__global int *output, __global int *input, int where) \n"
+    "{\n"
+    "  int b;\n"
+    "  if (where == 0) {\n"
+    "    output[get_global_id(0)] = 0;\n"
+    "  }\n"
+    "  for (b=0; b<where; b++)\n"
+    "    output[get_global_id(0)] += input[b];  \n"
+    "}\n"
+    "\n"
+    "__kernel void test_call_kernel(__global int *src, __global int *dst, int times) \n"
+    "{\n"
+    "  int tid = get_global_id(0);\n"
+    "  int a;\n"
+    "  dst[tid] = 1;\n"
+    "  for (a=0; a<times; a++)\n"
+    "    test_kernel_to_call(dst, src, tid);\n"
+    "}\n"
+    "void test_function_to_call(__global int *output, __global int *input, int where) \n"
+    "{\n"
+    "  int b;\n"
+    "  if (where == 0) {\n"
+    "    output[get_global_id(0)] = 0;\n"
+    "  }\n"
+    "  for (b=0; b<where; b++)\n"
+    "    output[get_global_id(0)] += input[b];  \n"
+    "}\n"
+    "\n"
+    "__kernel void test_call_function(__global int *src, __global int *dst, int times) \n"
+    "{\n"
+    "  int tid = get_global_id(0);\n"
+    "  int a;\n"
+    "  dst[tid] = 1;\n"
+    "  for (a=0; a<times; a++)\n"
+    "    test_function_to_call(dst, src, tid);\n"
+    "}\n"
+};
+
+
+
+int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    num_elements = 256;
+
+    int error, errors = 0;
+    clProgramWrapper program;
+    clKernelWrapper kernel1, kernel2, kernel_to_call;
+    clMemWrapper    streams[2];
+
+    size_t    threads[] = {num_elements,1,1};
+    cl_int *input, *output, *expected;
+    cl_int times = 4;
+    int pass = 0;
+
+    input = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+    output = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+    expected = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+
+    for (int i=0; i<num_elements; i++) {
+        input[i] = i;
+        output[i] = i;
+        expected[i] = output[i];
+    }
+    // Calculate the expected results
+    for (int tid=0; tid<num_elements; tid++) {
+        expected[tid] = 1;
+        for (int a=0; a<times; a++) {
+            int where = tid;
+            if (where == 0)
+                expected[tid] = 0;
+            for (int b=0; b<where; b++) {
+                expected[tid] += input[b];
+            }
+        }
+    }
+
+    // Test kernel calling a kernel
+    log_info("Testing kernel calling kernel...\n");
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel1, 1, kernel_call_kernel_code, "test_call_kernel" ) != 0 )
+    {
+        return -1;
+    }
+
+    kernel_to_call = clCreateKernel(program, "test_kernel_to_call", &error);
+    test_error(error, "clCreateKernel failed");
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(cl_int)*num_elements, input, &error);
+    test_error( error, "clCreateBuffer failed" );
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,  sizeof(cl_int)*num_elements, output, &error);
+    test_error( error, "clCreateBuffer failed" );
+
+    error = clSetKernelArg(kernel1, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel1, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel1, 2, sizeof( times ), &times);
+    test_error( error, "clSetKernelArg failed" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel1, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "clEnqueueNDRangeKernel failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
+    test_error( error, "clEnqueueReadBuffer failed" );
+
+    // Compare the results
+    pass = 1;
+    for (int i=0; i<num_elements; i++) {
+        if (output[i] != expected[i]) {
+            if (errors > 10)
+                continue;
+            if (errors == 10) {
+                log_error("Suppressing further results...\n");
+                continue;
+            }
+            log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
+            errors++;
+            pass = 0;
+        }
+    }
+    if (pass) log_info("Passed kernel calling kernel...\n");
+
+
+
+    // Test kernel calling a function
+    log_info("Testing kernel calling function...\n");
+    // Reset the inputs
+    for (int i=0; i<num_elements; i++) {
+        input[i] = i;
+        output[i] = i;
+    }
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+
+    kernel2 = clCreateKernel(program, "test_call_function", &error);
+    test_error(error, "clCreateKernel failed");
+
+    error = clSetKernelArg(kernel2, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel2, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel2, 2, sizeof( times ), &times);
+    test_error( error, "clSetKernelArg failed" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel2, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "clEnqueueNDRangeKernel failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
+    test_error( error, "clEnqueueReadBuffer failed" );
+
+    // Compare the results
+    pass = 1;
+    for (int i=0; i<num_elements; i++) {
+        if (output[i] != expected[i]) {
+            if (errors > 10)
+                continue;
+            if (errors > 10) {
+                log_error("Suppressing further results...\n");
+                continue;
+            }
+            log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
+            errors++;
+            pass = 0;
+        }
+    }
+    if (pass) log_info("Passed kernel calling function...\n");
+
+
+    // Test calling the kernel we called from another kernel
+    log_info("Testing calling the kernel we called from another kernel before...\n");
+    // Reset the inputs
+    for (int i=0; i<num_elements; i++) {
+        input[i] = i;
+        output[i] = i;
+        expected[i] = output[i];
+    }
+    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, input, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+
+    // Calculate the expected results
+    int where = times;
+    for (int tid=0; tid<num_elements; tid++) {
+        if (where == 0)
+            expected[tid] = 0;
+        for (int b=0; b<where; b++) {
+            expected[tid] += input[b];
+        }
+    }
+
+
+    error = clSetKernelArg(kernel_to_call, 0, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel_to_call, 1, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "clSetKernelArg failed" );
+    error = clSetKernelArg(kernel_to_call, 2, sizeof( times ), &times);
+    test_error( error, "clSetKernelArg failed" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel_to_call, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "clEnqueueNDRangeKernel failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*num_elements, output, 0, NULL, NULL );
+    test_error( error, "clEnqueueReadBuffer failed" );
+
+    // Compare the results
+    pass = 1;
+    for (int i=0; i<num_elements; i++) {
+        if (output[i] != expected[i]) {
+            if (errors > 10)
+                continue;
+            if (errors > 10) {
+                log_error("Suppressing further results...\n");
+                continue;
+            }
+            log_error("Results do not match: output[%d]=%d != expected[%d]=%d\n", i, output[i], i, expected[i]);
+            errors++;
+            pass = 0;
+        }
+    }
+    if (pass) log_info("Passed calling the kernel we called from another kernel before...\n");
+
+    free( input );
+    free( output );
+    free( expected );
+
+    return errors;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_kernel_memory_alignment.cpp b/test_conformance/compatibility/test_conformance/basic/test_kernel_memory_alignment.cpp
new file mode 100644
index 00000000..1352e46a
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_kernel_memory_alignment.cpp
@@ -0,0 +1,572 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+// For global, local, and constant
+const char *parameter_kernel_long =
+"%s\n" // optional pragma
+"kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
+"{\n"
+"   results[0] = (ulong)&mem0[0];\n"
+"   results[1] = (ulong)&mem2[0];\n"
+"   results[2] = (ulong)&mem3[0];\n"
+"   results[3] = (ulong)&mem4[0];\n"
+"   results[4] = (ulong)&mem8[0];\n"
+"   results[5] = (ulong)&mem16[0];\n"
+"}\n";
+
+// For private and local
+const char *local_kernel_long =
+"%s\n" // optional pragma
+"kernel void test(global ulong *results)\n"
+"{\n"
+"   %s %s mem0[3];\n"
+"   %s %s2 mem2[3];\n"
+"   %s %s3 mem3[3];\n"
+"   %s %s4 mem4[3];\n"
+"   %s %s8 mem8[3];\n"
+"   %s %s16 mem16[3];\n"
+"   results[0] = (ulong)&mem0[0];\n"
+"   results[1] = (ulong)&mem2[0];\n"
+"   results[2] = (ulong)&mem3[0];\n"
+"   results[3] = (ulong)&mem4[0];\n"
+"   results[4] = (ulong)&mem8[0];\n"
+"   results[5] = (ulong)&mem16[0];\n"
+"}\n";
+
+// For constant
+const char *constant_kernel_long =
+"%s\n" // optional pragma
+"  constant %s mem0[3]    = {0};\n"
+"  constant %s2 mem2[3]   = {(%s2)(0)};\n"
+"  constant %s3 mem3[3]   = {(%s3)(0)};\n"
+"  constant %s4 mem4[3]   = {(%s4)(0)};\n"
+"  constant %s8 mem8[3]   = {(%s8)(0)};\n"
+"  constant %s16 mem16[3] = {(%s16)(0)};\n"
+"\n"
+"kernel void test(global ulong *results)\n"
+"{\n"
+"   results[0] = (ulong)&mem0;\n"
+"   results[1] = (ulong)&mem2;\n"
+"   results[2] = (ulong)&mem3;\n"
+"   results[3] = (ulong)&mem4;\n"
+"   results[4] = (ulong)&mem8;\n"
+"   results[5] = (ulong)&mem16;\n"
+"}\n";
+
+
+// For global, local, and constant
+const char *parameter_kernel_no_long =
+"%s\n" // optional pragma
+"kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
+"{\n"
+"   results[0] = (uint)&mem0[0];\n"
+"   results[1] = (uint)&mem2[0];\n"
+"   results[2] = (uint)&mem3[0];\n"
+"   results[3] = (uint)&mem4[0];\n"
+"   results[4] = (uint)&mem8[0];\n"
+"   results[5] = (uint)&mem16[0];\n"
+"}\n";
+
+// For private and local
+const char *local_kernel_no_long =
+"%s\n" // optional pragma
+"kernel void test(global uint *results)\n"
+"{\n"
+"   %s %s mem0[3];\n"
+"   %s %s2 mem2[3];\n"
+"   %s %s3 mem3[3];\n"
+"   %s %s4 mem4[3];\n"
+"   %s %s8 mem8[3];\n"
+"   %s %s16 mem16[3];\n"
+"   results[0] = (uint)&mem0[0];\n"
+"   results[1] = (uint)&mem2[0];\n"
+"   results[2] = (uint)&mem3[0];\n"
+"   results[3] = (uint)&mem4[0];\n"
+"   results[4] = (uint)&mem8[0];\n"
+"   results[5] = (uint)&mem16[0];\n"
+"}\n";
+
+// For constant
+const char *constant_kernel_no_long =
+"%s\n" // optional pragma
+"  constant %s mem0[3]    = {0};\n"
+"  constant %s2 mem2[3]   = {(%s2)(0)};\n"
+"  constant %s3 mem3[3]   = {(%s3)(0)};\n"
+"  constant %s4 mem4[3]   = {(%s4)(0)};\n"
+"  constant %s8 mem8[3]   = {(%s8)(0)};\n"
+"  constant %s16 mem16[3] = {(%s16)(0)};\n"
+"\n"
+"kernel void test(global uint *results)\n"
+"{\n"
+"   results[0] = (uint)&mem0;\n"
+"   results[1] = (uint)&mem2;\n"
+"   results[2] = (uint)&mem3;\n"
+"   results[3] = (uint)&mem4;\n"
+"   results[4] = (uint)&mem8;\n"
+"   results[5] = (uint)&mem16;\n"
+"}\n";
+
+enum AddressSpaces
+{
+    kGlobal        = 0,
+    kLocal,
+    kConstant,
+    kPrivate
+};
+
+typedef enum AddressSpaces    AddressSpaces;
+
+#define DEBUG 0
+
+const char * get_explicit_address_name( AddressSpaces address )
+{
+    /* Quick method to avoid branching: make sure the following array matches the Enum order */
+    static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"};
+
+    return sExplicitAddressNames[ address ];
+}
+
+
+int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address )
+{
+    const char *constant_kernel;
+    const char *parameter_kernel;
+    const char *local_kernel;
+
+    if ( gHasLong )
+    {
+        constant_kernel  = constant_kernel_long;
+        parameter_kernel = parameter_kernel_long;
+        local_kernel     = local_kernel_long;
+    }
+    else
+    {
+        constant_kernel  = constant_kernel_no_long;
+        parameter_kernel = parameter_kernel_no_long;
+        local_kernel     = local_kernel_no_long;
+    }
+
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    char *kernel_code = (char*)malloc(4096);
+    cl_kernel kernel;
+    cl_program program;
+    int error;
+    int total_errors = 0;
+    cl_mem results;
+    cl_ulong *results_data;
+    cl_mem mem0, mem2, mem3, mem4, mem8, mem16;
+
+    results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6);
+    results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+    mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+
+    // For each type
+
+    // Calculate alignment mask for each size
+
+    // For global, local, constant, private
+
+    // If global, local or constant -- do parameter_kernel
+    // If private or local -- do local_kernel
+    // If constant -- do constant kernel
+
+    int numConstantArgs;
+    clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL);
+
+    int typeIndex;
+    for (typeIndex = 0; typeIndex < 10; typeIndex++) {
+        // Skip double tests if we don't support doubles
+        if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) {
+            log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+            continue;
+        }
+
+        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
+            continue;
+
+        log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex]));
+
+        // Determine the expected alignment masks.
+        // E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011
+        // We can then and the returned address with that and we should have 0.
+        cl_ulong alignments[6];
+        alignments[0] = get_explicit_type_size(vecType[typeIndex])-1;
+        alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1;
+        alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
+        alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
+        alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1;
+        alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1;
+
+        // Parameter kernel
+        if (address == kGlobal || address == kLocal || address == kConstant) {
+            log_info("\tTesting parameter kernel...\n");
+
+            if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) {
+                sprintf(kernel_code, parameter_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
+                );
+            }
+            else {
+                sprintf(kernel_code, parameter_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
+                );
+            }
+            //printf("Kernel is: \n%s\n", kernel_code);
+
+            // Create the kernel
+            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
+            test_error(error, "create_single_kernel_helper failed");
+
+            // Initialize the results
+            memset(results_data, 0, sizeof(cl_long)*5);
+            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueWriteBuffer failed");
+
+            // Set the arguments
+            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
+            test_error(error, "clSetKernelArg failed");
+            if (address != kLocal) {
+                error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16);
+                test_error(error, "clSetKernelArg failed");
+            } else {
+                error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL);
+                test_error(error, "clSetKernelArg failed");
+                error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL);
+                test_error(error, "clSetKernelArg failed");
+            }
+
+            // Enqueue the kernel
+            size_t global_size = 1;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            test_error(error, "clEnqueueNDRangeKernel failed");
+
+            // Read back the results
+            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueReadBuffer failed");
+
+            // Verify the results
+            if (gHasLong) {
+                for (int i = 0; i < 6; i++) {
+                    if ((results_data[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
+                    }
+                }
+            }
+            // Verify the results on devices that do not support longs
+            else {
+                cl_uint *results_data_no_long = (cl_uint *)results_data;
+
+                for (int i = 0; i < 6; i++) {
+                    if ((results_data_no_long[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    }
+                }
+            }
+
+            clReleaseKernel(kernel);
+            clReleaseProgram(program);
+        }
+
+
+
+
+        // Local kernel
+        if (address == kLocal || address == kPrivate) {
+            log_info("\tTesting local kernel...\n");
+            sprintf(kernel_code, local_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
+                    );
+            //printf("Kernel is: \n%s\n", kernel_code);
+
+            // Create the kernel
+            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
+            test_error(error, "create_single_kernel_helper failed");
+
+            // Initialize the results
+            memset(results_data, 0, sizeof(cl_long)*5);
+            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueWriteBuffer failed");
+
+            // Set the arguments
+            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
+            test_error(error, "clSetKernelArg failed");
+
+            // Enqueue the kernel
+            size_t global_size = 1;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            test_error(error, "clEnqueueNDRangeKernel failed");
+
+            // Read back the results
+            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueReadBuffer failed");
+
+            // Verify the results
+            if (gHasLong) {
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
+                    }
+                }
+            }
+        // Verify the results on devices that do not support longs
+            else {
+                cl_uint *results_data_no_long = (cl_uint *)results_data;
+
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data_no_long[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    }
+                }
+            }
+            clReleaseKernel(kernel);
+            clReleaseProgram(program);
+        }
+
+
+
+        // Constant kernel
+        if (address == kConstant) {
+            log_info("\tTesting constant kernel...\n");
+            sprintf(kernel_code, constant_kernel,
+                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex]),
+                    get_explicit_type_name(vecType[typeIndex])
+                    );
+            //printf("Kernel is: \n%s\n", kernel_code);
+
+            // Create the kernel
+            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
+            test_error(error, "create_single_kernel_helper failed");
+
+            // Initialize the results
+            memset(results_data, 0, sizeof(cl_long)*5);
+            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueWriteBuffer failed");
+
+            // Set the arguments
+            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
+            test_error(error, "clSetKernelArg failed");
+
+            // Enqueue the kernel
+            size_t global_size = 1;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+            test_error(error, "clEnqueueNDRangeKernel failed");
+
+            // Read back the results
+            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
+            test_error(error, "clEnqueueReadBuffer failed");
+
+            // Verify the results
+            if (gHasLong) {
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
+                    }
+                }
+            }
+        // Verify the results on devices that do not support longs
+            else {
+                cl_uint *results_data_no_long = (cl_uint *)results_data;
+
+                for (int i = 0; i < 5; i++) {
+                    if ((results_data_no_long[i] & alignments[i]) != 0) {
+                        total_errors++;
+                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    } else {
+                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
+                    }
+                }
+            }
+            clReleaseKernel(kernel);
+            clReleaseProgram(program);
+        }
+
+
+    }
+
+    clReleaseMemObject(results);
+    clReleaseMemObject(mem0);
+    clReleaseMemObject(mem2);
+    clReleaseMemObject(mem3);
+    clReleaseMemObject(mem4);
+    clReleaseMemObject(mem8);
+    clReleaseMemObject(mem16);
+    free( kernel_code );
+    free( results_data );
+
+    if (total_errors != 0)
+        return -1;
+    return 0;
+
+}
+
+
+int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kLocal );
+}
+
+int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kGlobal );
+}
+
+int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances
+    // are unable to meaningfully take (or more precisely use) the address of constant data by virtue
+    // of limitations in their ISA design. This feature was not tested in 1.0, so they were declared
+    // conformant by Khronos. The failure is however caught here.
+    //
+    // Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction
+    // of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1
+    // devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform.
+    // To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement
+    // of no new tests for 1.0 devices.  So certain allowances have to be made in intractable cases
+    // such as this one.
+    //
+    // There is some precedent. Similar allowances are made for other 1.0 hardware features such as
+    // local memory size.  The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1.
+
+    // Detect 1.0 devices
+    // Get CL_DEVICE_VERSION size
+    size_t string_size = 0;
+    int err;
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) )
+    {
+        log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" );
+        return -1;
+    }
+
+    //Allocate storage to hold the version string
+    char *version_string = (char*) malloc(string_size);
+    if( NULL == version_string )
+    {
+        log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" );
+        return -1;
+    }
+
+    // Get CL_DEVICE_VERSION string
+    if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) )
+    {
+        log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" );
+        return -1;
+    }
+
+    // easy out for 1.0 devices
+    const char *string_1_0 = "OpenCL 1.0 ";
+    if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) )
+    {
+        log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string );
+        free(version_string);
+        return 0;
+    }
+    log_info( "Device version string: \"%s\"\n", version_string );
+    free(version_string);
+
+    // Everyone else is to be ground mercilessly under the wheels of progress
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kConstant );
+}
+
+int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kPrivate );
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_local.c b/test_conformance/compatibility/test_conformance/basic/test_local.c
new file mode 100644
index 00000000..1be5926a
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_local.c
@@ -0,0 +1,372 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *barrier_with_localmem_kernel_code[] = {
+"__kernel void compute_sum_with_localmem(__global int *a, int n, __local int *tmp_sum, __global int *sum)\n"
+"{\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"\n"
+"    if( lsize == 1 )\n"
+"    {\n"
+"       if( tid == 0 )\n"
+"           *sum = tmp_sum[0];\n"
+"       return;\n"
+"    }\n"
+"\n"
+"    do\n"
+"    {\n"
+"       barrier(CLK_LOCAL_MEM_FENCE);\n"
+"       if (tid < lsize/2)\n"
+"       {\n"
+"           int sum = tmp_sum[tid];\n"
+"           if( (lsize & 1) && tid == 0 )\n"
+"               sum += tmp_sum[tid + lsize - 1];\n"
+"           tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
+"       }\n"
+"       lsize = lsize/2; \n"
+"    }while( lsize );\n"
+"\n"
+"    if( tid == 0 )\n"
+"       *sum = tmp_sum[0];\n"
+"}\n",
+"__kernel void compute_sum_with_localmem(__global int *a, int n, __global int *sum)\n"
+"{\n"
+"     __local int tmp_sum[%d];\n"
+"    int  tid = get_local_id(0);\n"
+"    int  lsize = get_local_size(0);\n"
+"    int  i;\n"
+"\n"
+"    tmp_sum[tid] = 0;\n"
+"    for (i=tid; i<n; i+=lsize)\n"
+"        tmp_sum[tid] += a[i];\n"
+"\n"
+"    if( lsize == 1 )\n"
+"    {\n"
+"       if( tid == 0 )\n"
+"           *sum = tmp_sum[0];\n"
+"       return;\n"
+"    }\n"
+"\n"
+"    do\n"
+"    {\n"
+"       barrier(CLK_LOCAL_MEM_FENCE);\n"
+"       if (tid < lsize/2)\n"
+"       {\n"
+"           int sum = tmp_sum[tid];\n"
+"           if( (lsize & 1) && tid == 0 )\n"
+"               sum += tmp_sum[tid + lsize - 1];\n"
+"           tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
+"       }\n"
+"       lsize = lsize/2; \n"
+"    }while( lsize );\n"
+"\n"
+"    if( tid == 0 )\n"
+"       *sum = tmp_sum[0];\n"
+"}\n"
+};
+
+
+static int
+verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
+{
+    int            r = 0;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r += inptr[i];
+    }
+
+    if (r != outptr[0])
+    {
+        log_error("LOCAL test failed: *%d vs %d\n", r, outptr[0] );
+        return -1;
+    }
+
+    log_info("LOCAL test passed\n");
+    return 0;
+}
+
+int test_local_arg_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+
+    cl_int *input_ptr, *output_ptr, *tmp_ptr;
+    size_t global_threads[1], local_threads[1];
+    size_t wgsize, kwgsize;
+    size_t max_local_workgroup_size[3];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
+    if (err) {
+        log_error("clGetDeviceInfo failed, %d\n\n", err);
+        return -1;
+    }
+    wgsize/=2;
+    if (wgsize < 1)
+        wgsize = 1;
+
+    size_t in_length = sizeof(cl_int) * num_elements;
+    size_t out_length = sizeof(cl_int) * wgsize;
+
+    input_ptr = (cl_int *)malloc(in_length);
+    output_ptr = (cl_int *)malloc(out_length);
+    tmp_ptr = (cl_int *)malloc(out_length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (int)genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_with_localmem_kernel_code[0], "compute_sum_with_localmem" );
+  if (err)
+    return -1;
+
+  err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
+  test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+
+  err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+  test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+  // Pick the minimum of the device and the kernel
+  if (kwgsize > max_local_workgroup_size[0])
+    kwgsize = max_local_workgroup_size[0];
+
+  //    err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
+  err  = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+  err |= clSetKernelArg(kernel, 2, wgsize * sizeof(cl_int), NULL);
+  err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    global_threads[0] = wgsize;
+    local_threads[0] = wgsize;
+
+  // Adjust the local thread size to fit and be a nice multiple.
+  if (kwgsize < wgsize) {
+    log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
+        local_threads[0] = kwgsize;
+  }
+  while (global_threads[0] % local_threads[0] != 0)
+    local_threads[0]--;
+
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(tmp_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+int test_local_kernel_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+
+    cl_int *input_ptr, *output_ptr, *tmp_ptr;
+    size_t global_threads[1], local_threads[1];
+    size_t wgsize, kwgsize;
+    int err, i;
+    char *program_source = (char*)malloc(sizeof(char)*2048);
+    MTdata d = init_genrand( gRandomSeed );
+    size_t max_local_workgroup_size[3];
+    memset(program_source, 0, 2048);
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
+    if (err) {
+        log_error("clGetDeviceInfo failed, %d\n\n", err);
+        return -1;
+    }
+    wgsize/=2;
+    if (wgsize < 1)
+        wgsize = 1;
+
+    size_t in_length = sizeof(cl_int) * num_elements;
+    size_t out_length = sizeof(cl_int) * wgsize;
+
+    input_ptr = (cl_int *)malloc(in_length);
+    output_ptr = (cl_int *)malloc(out_length);
+    tmp_ptr = (cl_int *)malloc(out_length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++)
+        input_ptr[i] = (cl_int) genrand_int32(d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    // Validate that created kernel doesn't violate local memory size allowed by the device
+    cl_ulong localMemSize = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clGetDeviceInfo failed\n");
+        return -1;
+    }
+    if ( wgsize > (localMemSize / (sizeof(cl_int)*sizeof(cl_int))) )
+    {
+        wgsize = localMemSize / (sizeof(cl_int)*sizeof(cl_int));
+    }
+
+    sprintf(program_source, barrier_with_localmem_kernel_code[1], (int)(wgsize * sizeof(cl_int)));
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, (const char**)&program_source, "compute_sum_with_localmem" );
+    free(program_source);
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
+    test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    // Pick the minimum of the device and the kernel
+    if (kwgsize > max_local_workgroup_size[0])
+        kwgsize = max_local_workgroup_size[0];
+
+    //    err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
+    err  = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
+    err |= clSetKernelArg(kernel, 2, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    global_threads[0] = wgsize;
+    local_threads[0] = wgsize;
+
+  // Adjust the local thread size to fit and be a nice multiple.
+  if (kwgsize < wgsize) {
+    log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
+        local_threads[0] = kwgsize;
+  }
+  while (global_threads[0] % local_threads[0] != 0)
+    local_threads[0]--;
+
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(tmp_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp b/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp
new file mode 100644
index 00000000..a469751d
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp
@@ -0,0 +1,138 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+#define MAX_LOCAL_STORAGE_SIZE  256
+#define MAX_LOCAL_STORAGE_SIZE_STRING "256"
+
+const char *kernelSource[] = {
+    "__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n"
+    "{\n"
+    "   __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n"
+    "   unsigned int theValue = input[ get_global_id( 0 ) ];\n"
+    "\n"
+    "   // If we just write linearly, there's no verification that the items in a group share local data\n"
+    "   // So we write reverse-linearly, which requires items to read the local data written by at least one\n"
+    "   // different item\n"
+    "   localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n"
+    "\n"
+    "   // The barrier ensures that all local items have written to the local storage\n"
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "   // Now we loop back through the local storage and look for the max value. We only do this if\n"
+    "   // we're the first item in a group\n"
+    "   unsigned int max = 0;\n"
+    "   if( get_local_id( 0 ) == 0 )\n"
+    "   {\n"
+    "       for( size_t i = 0; i < get_local_size( 0 ); i++ )\n"
+    "       {\n"
+    "           if( localStorage[ i ] > max )\n"
+    "               max = localStorage[ i ];\n"
+    "       }\n"
+    "       outMaxes[ get_group_id( 0 ) ] = max;\n"
+    "   }\n"
+    "}\n"
+};
+
+int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+    MTdata randSeed = init_genrand( gRandomSeed );
+
+    // Create a test kernel
+    error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" );
+    test_error( error, "Unable to create test kernel" );
+
+
+    // Determine an appropriate test size
+    size_t workGroupSize;
+    error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL );
+    test_error( error, "Unable to obtain kernel work group size" );
+
+    // Make sure the work group size doesn't overrun our local storage size in the kernel
+    while( workGroupSize > MAX_LOCAL_STORAGE_SIZE )
+        workGroupSize >>= 1;
+
+    size_t testSize = workGroupSize;
+    while( testSize < 1024 )
+        testSize += workGroupSize;
+    size_t numGroups = testSize / workGroupSize;
+    log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize );
+
+    // Create two buffers for operation
+    cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) );
+    generate_random_data( kUInt, testSize, randSeed, inputData );
+    free_mtdata( randSeed );
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error );
+    test_error( error, "Unable to create input buffer" );
+
+    cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+
+    // Set up the kernel args and run
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel arg" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel arg" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL );
+    test_error( error, "Unable to enqueue kernel" );
+
+
+    // Read results and verify
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL );
+    test_error( error, "Unable to read output data" );
+
+    // MingW compiler seems to have a bug that otimizes the code below incorrectly.
+    // adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler.
+    for( volatile size_t i = 0; i < numGroups; i++ )
+    {
+        // Determine the max in our case
+        cl_uint localMax = 0;
+        for( volatile size_t j = 0; j < workGroupSize; j++ )
+        {
+            if( inputData[ i * workGroupSize + j ] > localMax )
+                localMax = inputData[ i * workGroupSize + j ];
+        }
+
+        if( outputData[ i ] != localMax )
+        {
+            log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i );
+            free(inputData);
+            free(outputData);
+            return -1;
+        }
+    }
+
+    free(inputData);
+    free(outputData);
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_loop.c b/test_conformance/compatibility/test_conformance/basic/test_loop.c
new file mode 100644
index 00000000..a4f043b4
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_loop.c
@@ -0,0 +1,184 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+const char *loop_kernel_code =
+"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    int  n = get_global_size(0);\n"
+"    int  i, j;\n"
+"\n"
+"    dst[tid] = 0;\n"
+"    for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n"
+"    {\n"
+"        if (j >= n)\n"
+"            j = 0;\n"
+"        dst[tid] += src[j];\n"
+"    }\n"
+"\n"
+"}\n";
+
+
+int
+verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
+{
+    int     r, i, j, k;
+
+    for (i=0; i<n; i++)
+    {
+        r = 0;
+        for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++)
+        {
+            if (k >= n)
+                k = 0;
+            r += inptr[k];
+        }
+
+        if (r != outptr[i])
+        {
+            log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r);
+            return -1;
+        }
+    }
+
+    log_info("LOOP test passed\n");
+    return 0;
+}
+
+int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[4];
+    cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[1];
+    int err, i;
+
+    size_t length = sizeof(cl_int) * num_elements;
+    input_ptr  = (cl_int*)malloc(length);
+    loop_indx  = (cl_int*)malloc(length);
+    loop_cnt   = (cl_int*)malloc(length);
+    output_ptr = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    MTdata d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        input_ptr[i] = (int)genrand_int32(d);
+        loop_indx[i] = (int)get_random_float(0, num_elements-1, d);
+        loop_cnt[i] = (int)get_random_float(0, num_elements/32, d);
+    }
+    free_mtdata(d); d = NULL;
+
+  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueWriteBuffer failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" );
+  if (err)
+    return -1;
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)num_elements;
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(loop_indx);
+    free(loop_cnt);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_multireadimagemultifmt.c b/test_conformance/compatibility/test_conformance/basic/test_multireadimagemultifmt.c
new file mode 100644
index 00000000..1f7fdbcb
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_multireadimagemultifmt.c
@@ -0,0 +1,230 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *multireadimage_kernel_code =
+"__kernel void test_multireadimage(read_only image2d_t img0, read_only image2d_t img1, \n"
+"                                  read_only image2d_t img2, __global float4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int2           tid = (int2)(tid_x, tid_y);\n"
+"    int            indx = tid_y * get_image_width(img1) + tid_x;\n"
+"    float4         sum;\n"
+"\n"
+"    sum = read_imagef(img0, sampler, tid);\n"
+"    sum += read_imagef(img1, sampler, tid);\n"
+"    sum += read_imagef(img2, sampler, tid);\n"
+"\n"
+"    dst[indx] = sum;\n"
+"}\n";
+
+#define MAX_ERR    1e-7f
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static unsigned short *
+generate_16bit_image(int w, int h, MTdata d)
+{
+    unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned short)genrand_int32(d);
+
+    return ptr;
+}
+
+static float *
+generate_float_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * (int)sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+
+static int
+verify_multireadimage(void *image[], float *outptr, int w, int h)
+{
+  int     i;
+  float   sum;
+  float ulp, max_ulp = 0.0f;
+
+  // ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
+  float max_ulp_allowed = (float)(3*1.5+2*0.5);
+
+  for (i=0; i<w*h*4; i++)
+  {
+    sum = (float)((unsigned char *)image[0])[i] / 255.0f;
+    sum += (float)((unsigned short *)image[1])[i] / 65535.0f;
+    sum += (float)((float *)image[2])[i];
+    ulp = Ulp_Error(outptr[i], sum);
+    if (ulp > max_ulp)
+      max_ulp = ulp;
+  }
+
+  if (max_ulp > max_ulp_allowed) {
+    log_error("READ_MULTIREADIMAGE_MULTIFORMAT test failed.  Max ulp error = %g\n", max_ulp);
+        return -1;
+  }
+
+  log_info("READ_MULTIREADIMAGE_MULTIFORMAT test passed.  Max ulp error = %g\n", max_ulp);
+  return 0;
+}
+
+
+int
+test_multireadimagemultifmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[4];
+    cl_image_format    img_format;
+    void            *input_ptr[3], *output_ptr;
+    cl_program        program;
+    cl_kernel        kernel;
+    size_t    threads[2];
+    int                img_width = 512;
+    int                img_height = 512;
+    int                i, err;
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr[0] = (void *)generate_8888_image(img_width, img_height, d);
+    input_ptr[1] = (void *)generate_16bit_image(img_width, img_height, d);
+    input_ptr[2] = (void *)generate_float_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (void *)malloc(sizeof(float) * 4 * img_width * img_height);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float)*4 * img_width*img_height, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<3; i++)
+    {
+      size_t origin[3] = {0,0,0}, region[3]={img_width, img_height,1};
+      err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clWriteImage failed\n");
+            return -1;
+        }
+    }
+
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
+    if (err)
+        return -1;
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    for (i=0; i<4; i++)
+      err |= clSetKernelArg(kernel, i,sizeof streams[i], &streams[i]);
+    err |= clSetKernelArg(kernel, 4, sizeof sampler, &sampler);
+
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)img_width;
+    threads[1] = (size_t)img_height;
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+    err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, sizeof(float)*4*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    err = verify_multireadimage(input_ptr, (float*)output_ptr, img_width, img_height);
+
+    // cleanup
+    clReleaseSampler(sampler);
+    for (i=0; i<4; i++)
+        clReleaseMemObject(streams[i]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    for (i=0; i<3; i++)
+        free(input_ptr[i]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_multireadimageonefmt.c b/test_conformance/compatibility/test_conformance/basic/test_multireadimageonefmt.c
new file mode 100644
index 00000000..e561176d
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_multireadimageonefmt.c
@@ -0,0 +1,198 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *multireadimage_kernel_code =
+"__kernel void test_multireadimage(int n, int m, sampler_t sampler, \n"
+"                                  read_only image2d_t img0, read_only image2d_t img1, \n"
+"                                  read_only image2d_t img2, read_only image2d_t img3, \n"
+"                                  read_only image2d_t img4, read_only image2d_t img5, \n"
+"                                  read_only image2d_t img6, __global float4 *dst)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int2   tid = (int2)(tid_x, tid_y);\n"
+"    int    indx = tid_y * get_image_width(img5) + tid_x;\n"
+"    float4 sum;\n"
+"\n"
+"    sum = read_imagef(img0, sampler, tid);\n"
+"    sum += read_imagef(img1, sampler, tid);\n"
+"    sum += read_imagef(img2, sampler, tid);\n"
+"    sum += read_imagef(img3, sampler, tid);\n"
+"    sum += read_imagef(img4, sampler, tid);\n"
+"    sum += read_imagef(img5, sampler, tid);\n"
+"    sum += read_imagef(img6, sampler, tid);\n"
+"\n"
+"    dst[indx] = sum;\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_multireadimage(void *image[], int num_images, float *outptr, int w, int h)
+{
+  int     i, j;
+  float   sum;
+  float ulp, max_ulp = 0.0f;
+
+  // ULP error of 1.5 for each read_imagef plus 0.5 for each addition.
+  float max_ulp_allowed = (float)(num_images*1.5+0.5*(num_images-1));
+
+  for (i=0; i<w*h*4; i++)
+  {
+    sum = 0.0f;
+    for (j=0; j<num_images; j++)
+    {
+      sum += ((float)((unsigned char *)image[j])[i] / 255.0f);
+    }
+    ulp = Ulp_Error(outptr[i], sum);
+    if (ulp > max_ulp)
+      max_ulp = ulp;
+  }
+
+    if (max_ulp > max_ulp_allowed)
+    {
+        log_error("READ_MULTIREADIMAGE_RGBA8888 test failed.  Max ULP err = %g\n", max_ulp);
+        return -1;
+    }
+  log_info("READ_MULTIREADIMAGE_RGBA8888 test passed.  Max ULP err = %g\n", max_ulp);
+  return 0;
+}
+
+
+int test_multireadimageonefmt(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[8];
+    cl_image_format    img_format;
+    void *input_ptr[7], *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(float);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    output_ptr = malloc(length);
+
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<7; i++) {
+        input_ptr[i] = (void *)generate_8888_image(img_width, img_height, d);
+
+        img_format.image_channel_order = CL_RGBA;
+        img_format.image_channel_data_type = CL_UNORM_INT8;
+        streams[i] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+        if (!streams[i])
+        {
+          log_error("create_image_2d failed\n");
+          return -1;
+        }
+
+        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, input_ptr[i], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+          log_error("clWriteImage failed\n");
+          return -1;
+        }
+    }
+    free_mtdata(d); d = NULL;
+
+
+  streams[7] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[7])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &multireadimage_kernel_code, "test_multireadimage");
+    if (err)
+        return -1;
+
+  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+  test_error(err, "clCreateSampler failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof i, &i);
+  err |= clSetKernelArg(kernel, 1, sizeof err, &err);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+  for (i=0; i<8; i++)
+    err |= clSetKernelArg(kernel, 3+i, sizeof streams[i], &streams[i]);
+
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+
+  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clExecuteKernel failed\n");
+    return -1;
+  }
+  err = clEnqueueReadBuffer(queue, streams[7], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_multireadimage(input_ptr, 7, (float *)output_ptr, img_width, img_height);
+
+    // cleanup
+  clReleaseSampler(sampler);
+  for (i=0; i<8; i++)
+    clReleaseMemObject(streams[i]);
+  clReleaseKernel(kernel);
+  clReleaseProgram(program);
+  for (i=0; i<7; i++)
+    free(input_ptr[i]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_numeric_constants.cpp b/test_conformance/compatibility/test_conformance/basic/test_numeric_constants.cpp
new file mode 100644
index 00000000..5aeca0ed
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_numeric_constants.cpp
@@ -0,0 +1,710 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+
+#define TEST_VALUE_POSITIVE( string_name, name, value ) \
+{ \
+if (name < value) { \
+log_error("FAILED: " string_name ": " #name " < " #value "\n"); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " >= " #value "\n"); \
+} \
+}
+
+#define TEST_VALUE_NEGATIVE( string_name, name, value ) \
+{ \
+if (name > value) { \
+log_error("FAILED: " string_name ": " #name " > " #value "\n"); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " <= " #value "\n"); \
+} \
+}
+
+#define TEST_VALUE_EQUAL_LITERAL( string_name, name, value ) \
+{ \
+if (name != value) { \
+log_error("FAILED: " string_name ": " #name " != " #value "\n"); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " = " #value "\n"); \
+} \
+}
+
+#define TEST_VALUE_EQUAL( string_name, name, value ) \
+{ \
+if (name != value) { \
+log_error("FAILED: " string_name ": " #name " != %a   (%17.21g)\n", value, value); \
+errors++;\
+} else { \
+log_info("\t" string_name ": " #name " = %a  (%17.21g)\n", value, value); \
+} \
+}
+
+int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int errors = 0;
+    TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_BIT",     CL_CHAR_BIT,    8)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MAX",    CL_SCHAR_MAX,   127)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SCHAR_MIN",    CL_SCHAR_MIN,   (-127-1))
+    TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MAX",     CL_CHAR_MAX,    CL_SCHAR_MAX)
+    TEST_VALUE_EQUAL_LITERAL( "CL_CHAR_MIN",     CL_CHAR_MIN,    CL_SCHAR_MIN)
+    TEST_VALUE_EQUAL_LITERAL( "CL_UCHAR_MAX",    CL_UCHAR_MAX,   255)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MAX",     CL_SHRT_MAX,    32767)
+    TEST_VALUE_EQUAL_LITERAL( "CL_SHRT_MIN",     CL_SHRT_MIN,    (-32767-1))
+    TEST_VALUE_EQUAL_LITERAL( "CL_USHRT_MAX",    CL_USHRT_MAX,   65535)
+    TEST_VALUE_EQUAL_LITERAL( "CL_INT_MAX",      CL_INT_MAX,     2147483647)
+    TEST_VALUE_EQUAL_LITERAL( "CL_INT_MIN",      CL_INT_MIN,     (-2147483647-1))
+    TEST_VALUE_EQUAL_LITERAL( "CL_UINT_MAX",     CL_UINT_MAX,    0xffffffffU)
+    TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MAX",     CL_LONG_MAX,    ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
+    TEST_VALUE_EQUAL_LITERAL( "CL_LONG_MIN",     CL_LONG_MIN,    ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
+    TEST_VALUE_EQUAL_LITERAL( "CL_ULONG_MAX",    CL_ULONG_MAX,   ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
+
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_DIG",         CL_FLT_DIG,         6)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MANT_DIG",    CL_FLT_MANT_DIG,    24)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_10_EXP",  CL_FLT_MAX_10_EXP,  +38)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX_EXP",     CL_FLT_MAX_EXP,     +128)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_10_EXP",  CL_FLT_MIN_10_EXP,  -37)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN_EXP",     CL_FLT_MIN_EXP,     -125)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_RADIX",       CL_FLT_RADIX,       2)
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MAX",         CL_FLT_MAX,         MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103))
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_MIN",         CL_FLT_MIN,         MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
+    TEST_VALUE_EQUAL_LITERAL( "CL_FLT_EPSILON",     CL_FLT_EPSILON,     MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
+
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_DIG",         CL_DBL_DIG,         15)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MANT_DIG",    CL_DBL_MANT_DIG,    53)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_10_EXP",  CL_DBL_MAX_10_EXP,  +308)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MAX_EXP",     CL_DBL_MAX_EXP,     +1024)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_10_EXP",  CL_DBL_MIN_10_EXP,  -307)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_MIN_EXP",     CL_DBL_MIN_EXP,     -1021)
+    TEST_VALUE_EQUAL_LITERAL( "CL_DBL_RADIX",       CL_DBL_RADIX,       2)
+    TEST_VALUE_EQUAL( "CL_DBL_MAX",         CL_DBL_MAX,         MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
+    TEST_VALUE_EQUAL( "CL_DBL_MIN",         CL_DBL_MIN,         MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
+    TEST_VALUE_EQUAL( "CL_DBL_EPSILON",     CL_DBL_EPSILON,     MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
+
+    TEST_VALUE_EQUAL( "CL_M_E",          CL_M_E,         MAKE_HEX_DOUBLE(0x1.5bf0a8b145769p+1, 0x15bf0a8b145769LL, -51) );
+    TEST_VALUE_EQUAL( "CL_M_LOG2E",      CL_M_LOG2E,     MAKE_HEX_DOUBLE(0x1.71547652b82fep+0, 0x171547652b82feLL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_LOG10E",     CL_M_LOG10E,    MAKE_HEX_DOUBLE(0x1.bcb7b1526e50ep-2, 0x1bcb7b1526e50eLL, -54) );
+    TEST_VALUE_EQUAL( "CL_M_LN2",        CL_M_LN2,       MAKE_HEX_DOUBLE(0x1.62e42fefa39efp-1, 0x162e42fefa39efLL, -53) );
+    TEST_VALUE_EQUAL( "CL_M_LN10",       CL_M_LN10,      MAKE_HEX_DOUBLE(0x1.26bb1bbb55516p+1, 0x126bb1bbb55516LL, -51) );
+    TEST_VALUE_EQUAL( "CL_M_PI",         CL_M_PI,        MAKE_HEX_DOUBLE(0x1.921fb54442d18p+1, 0x1921fb54442d18LL, -51) );
+    TEST_VALUE_EQUAL( "CL_M_PI_2",       CL_M_PI_2,      MAKE_HEX_DOUBLE(0x1.921fb54442d18p+0, 0x1921fb54442d18LL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_PI_4",       CL_M_PI_4,      MAKE_HEX_DOUBLE(0x1.921fb54442d18p-1, 0x1921fb54442d18LL, -53) );
+    TEST_VALUE_EQUAL( "CL_M_1_PI",       CL_M_1_PI,      MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-2, 0x145f306dc9c883LL, -54) );
+    TEST_VALUE_EQUAL( "CL_M_2_PI",       CL_M_2_PI,      MAKE_HEX_DOUBLE(0x1.45f306dc9c883p-1, 0x145f306dc9c883LL, -53) );
+    TEST_VALUE_EQUAL( "CL_M_2_SQRTPI",   CL_M_2_SQRTPI,  MAKE_HEX_DOUBLE(0x1.20dd750429b6dp+0, 0x120dd750429b6dLL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_SQRT2",      CL_M_SQRT2,     MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp+0, 0x16a09e667f3bcdLL, -52) );
+    TEST_VALUE_EQUAL( "CL_M_SQRT1_2",    CL_M_SQRT1_2,   MAKE_HEX_DOUBLE(0x1.6a09e667f3bcdp-1, 0x16a09e667f3bcdLL, -53) );
+
+    TEST_VALUE_EQUAL( "CL_M_E_F",        CL_M_E_F,       MAKE_HEX_FLOAT(0x1.5bf0a8p+1f, 0x15bf0a8L, -23));
+    TEST_VALUE_EQUAL( "CL_M_LOG2E_F",    CL_M_LOG2E_F,   MAKE_HEX_FLOAT(0x1.715476p+0f, 0x1715476L, -24));
+    TEST_VALUE_EQUAL( "CL_M_LOG10E_F",   CL_M_LOG10E_F,  MAKE_HEX_FLOAT(0x1.bcb7b2p-2f, 0x1bcb7b2L, -26));
+    TEST_VALUE_EQUAL( "CL_M_LN2_F",      CL_M_LN2_F,     MAKE_HEX_FLOAT(0x1.62e43p-1f, 0x162e43L, -21) );
+    TEST_VALUE_EQUAL( "CL_M_LN10_F",     CL_M_LN10_F,    MAKE_HEX_FLOAT(0x1.26bb1cp+1f, 0x126bb1cL, -23));
+    TEST_VALUE_EQUAL( "CL_M_PI_F",       CL_M_PI_F,      MAKE_HEX_FLOAT(0x1.921fb6p+1f, 0x1921fb6L, -23));
+    TEST_VALUE_EQUAL( "CL_M_PI_2_F",     CL_M_PI_2_F,    MAKE_HEX_FLOAT(0x1.921fb6p+0f, 0x1921fb6L, -24));
+    TEST_VALUE_EQUAL( "CL_M_PI_4_F",     CL_M_PI_4_F,    MAKE_HEX_FLOAT(0x1.921fb6p-1f, 0x1921fb6L, -25));
+    TEST_VALUE_EQUAL( "CL_M_1_PI_F",     CL_M_1_PI_F,    MAKE_HEX_FLOAT(0x1.45f306p-2f, 0x145f306L, -26));
+    TEST_VALUE_EQUAL( "CL_M_2_PI_F",     CL_M_2_PI_F,    MAKE_HEX_FLOAT(0x1.45f306p-1f, 0x145f306L, -25));
+    TEST_VALUE_EQUAL( "CL_M_2_SQRTPI_F", CL_M_2_SQRTPI_F,MAKE_HEX_FLOAT(0x1.20dd76p+0f, 0x120dd76L, -24));
+    TEST_VALUE_EQUAL( "CL_M_SQRT2_F",    CL_M_SQRT2_F,   MAKE_HEX_FLOAT(0x1.6a09e6p+0f, 0x16a09e6L, -24));
+    TEST_VALUE_EQUAL( "CL_M_SQRT1_2_F",  CL_M_SQRT1_2_F, MAKE_HEX_FLOAT(0x1.6a09e6p-1f, 0x16a09e6L, -25));
+
+    return errors;
+}
+
+
+const char *kernel_int_float[] = {
+  "__kernel void test( __global float *float_out, __global int *int_out, __global uint *uint_out) \n"
+  "{\n"
+  "  int_out[0] = CHAR_BIT;\n"
+  "  int_out[1] = SCHAR_MAX;\n"
+  "  int_out[2] = SCHAR_MIN;\n"
+  "  int_out[3] = CHAR_MAX;\n"
+  "  int_out[4] = CHAR_MIN;\n"
+  "  int_out[5] = UCHAR_MAX;\n"
+  "  int_out[6] = SHRT_MAX;\n"
+  "  int_out[7] = SHRT_MIN;\n"
+  "  int_out[8] = USHRT_MAX;\n"
+  "  int_out[9] = INT_MAX;\n"
+  "  int_out[10] = INT_MIN;\n"
+  "  uint_out[0] = UINT_MAX;\n"
+
+  "  int_out[11] = FLT_DIG;\n"
+  "  int_out[12] = FLT_MANT_DIG;\n"
+  "  int_out[13] = FLT_MAX_10_EXP;\n"
+  "  int_out[14] = FLT_MAX_EXP;\n"
+  "  int_out[15] = FLT_MIN_10_EXP;\n"
+  "  int_out[16] = FLT_MIN_EXP;\n"
+  "  int_out[17] = FLT_RADIX;\n"
+  "#ifdef __IMAGE_SUPPORT__\n"
+  "  int_out[18] = __IMAGE_SUPPORT__;\n"
+  "#else\n"
+  "  int_out[18] = 0xf00baa;\n"
+  "#endif\n"
+  "  float_out[0] = FLT_MAX;\n"
+  "  float_out[1] = FLT_MIN;\n"
+  "  float_out[2] = FLT_EPSILON;\n"
+  "  float_out[3] = M_E_F;\n"
+  "  float_out[4] = M_LOG2E_F;\n"
+  "  float_out[5] = M_LOG10E_F;\n"
+  "  float_out[6] = M_LN2_F;\n"
+  "  float_out[7] = M_LN10_F;\n"
+  "  float_out[8] = M_PI_F;\n"
+  "  float_out[9] = M_PI_2_F;\n"
+  "  float_out[10] = M_PI_4_F;\n"
+  "  float_out[11] = M_1_PI_F;\n"
+  "  float_out[12] = M_2_PI_F;\n"
+  "  float_out[13] = M_2_SQRTPI_F;\n"
+  "  float_out[14] = M_SQRT2_F;\n"
+  "  float_out[15] = M_SQRT1_2_F;\n"
+  "}\n"
+};
+
+const char *kernel_long[] = {
+  "__kernel void test(__global long *long_out, __global ulong *ulong_out) \n"
+  "{\n"
+  "  long_out[0] = LONG_MAX;\n"
+  "  long_out[1] = LONG_MIN;\n"
+  "  ulong_out[0] = ULONG_MAX;\n"
+  "}\n"
+};
+
+const char *kernel_double[] = {
+  "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+  "__kernel void test( __global double *double_out, __global long *long_out ) \n    "
+  "{\n"
+  "  long_out[0] = DBL_DIG;\n"
+  "  long_out[1] = DBL_MANT_DIG;\n"
+  "  long_out[2] = DBL_MAX_10_EXP;\n"
+  "  long_out[3] = DBL_MAX_EXP;\n"
+  "  long_out[4] = DBL_MIN_10_EXP;\n"
+  "  long_out[5] = DBL_MIN_EXP;\n"
+  "  long_out[6] = DBL_RADIX;\n"
+  "  double_out[0] = DBL_MAX;\n"
+  "  double_out[1] = DBL_MIN;\n"
+  "  double_out[2] = DBL_EPSILON;\n"
+  "  double_out[3] = M_E;\n"
+  "  double_out[4] = M_LOG2E;\n"
+  "  double_out[5] = M_LOG10E;\n"
+  "  double_out[6] = M_LN2;\n"
+  "  double_out[7] = M_LN10;\n"
+  "  double_out[8] = M_PI;\n"
+  "  double_out[9] = M_PI_2;\n"
+  "  double_out[10] = M_PI_4;\n"
+  "  double_out[11] = M_1_PI;\n"
+  "  double_out[12] = M_2_PI;\n"
+  "  double_out[13] = M_2_SQRTPI;\n"
+  "  double_out[14] = M_SQRT2;\n"
+  "  double_out[15] = M_SQRT1_2;\n"
+  "}\n"
+};
+
+
+int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error, errors = 0;
+    //    clProgramWrapper program;
+    //    clKernelWrapper kernel;
+    //    clMemWrapper    streams[3];
+    cl_program program;
+    cl_kernel kernel;
+    cl_mem    streams[3];
+
+    size_t    threads[] = {1,1,1};
+    cl_float float_out[16];
+    cl_int int_out[19];
+    cl_uint uint_out[1];
+    cl_long long_out[7];
+    cl_ulong ulong_out[1];
+    cl_double double_out[16];
+
+    /** INTs and FLOATs **/
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_int_float, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(float_out), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(int_out), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(uint_out), NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(float_out), (void*)float_out, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(int_out), (void*)int_out, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(uint_out), (void*)uint_out, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    TEST_VALUE_EQUAL_LITERAL( "CHAR_BIT", int_out[0],         8)
+    TEST_VALUE_EQUAL_LITERAL( "SCHAR_MAX", int_out[1],        127)
+    TEST_VALUE_EQUAL_LITERAL( "SCHAR_MIN", int_out[2],        (-127-1))
+    TEST_VALUE_EQUAL_LITERAL( "CHAR_MAX", int_out[3],         CL_SCHAR_MAX)
+    TEST_VALUE_EQUAL_LITERAL( "CHAR_MIN", int_out[4],         CL_SCHAR_MIN)
+    TEST_VALUE_EQUAL_LITERAL( "UCHAR_MAX", int_out[5],        255)
+    TEST_VALUE_EQUAL_LITERAL( "SHRT_MAX", int_out[6],         32767)
+    TEST_VALUE_EQUAL_LITERAL( "SHRT_MIN",int_out[7],          (-32767-1))
+    TEST_VALUE_EQUAL_LITERAL( "USHRT_MAX", int_out[8],        65535)
+    TEST_VALUE_EQUAL_LITERAL( "INT_MAX", int_out[9],          2147483647)
+    TEST_VALUE_EQUAL_LITERAL( "INT_MIN", int_out[10],         (-2147483647-1))
+    TEST_VALUE_EQUAL_LITERAL( "UINT_MAX", uint_out[0],        0xffffffffU)
+
+    TEST_VALUE_EQUAL_LITERAL( "FLT_DIG", int_out[11],         6)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MANT_DIG", int_out[12],    24)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_10_EXP", int_out[13],  +38)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MAX_EXP", int_out[14],     +128)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_10_EXP", int_out[15],  -37)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_MIN_EXP", int_out[16],     -125)
+    TEST_VALUE_EQUAL_LITERAL( "FLT_RADIX", int_out[17],       2)
+    TEST_VALUE_EQUAL( "FLT_MAX", float_out[0],           MAKE_HEX_FLOAT(0x1.fffffep127f, 0x1fffffeL, 103))
+    TEST_VALUE_EQUAL( "FLT_MIN", float_out[1],           MAKE_HEX_FLOAT(0x1.0p-126f, 0x1L, -126))
+    TEST_VALUE_EQUAL( "FLT_EPSILON", float_out[2],       MAKE_HEX_FLOAT(0x1.0p-23f, 0x1L, -23))
+    TEST_VALUE_EQUAL( "M_E_F", float_out[3],             CL_M_E_F )
+    TEST_VALUE_EQUAL( "M_LOG2E_F", float_out[4],         CL_M_LOG2E_F )
+    TEST_VALUE_EQUAL( "M_LOG10E_F", float_out[5],        CL_M_LOG10E_F )
+    TEST_VALUE_EQUAL( "M_LN2_F", float_out[6],           CL_M_LN2_F )
+    TEST_VALUE_EQUAL( "M_LN10_F", float_out[7],          CL_M_LN10_F )
+    TEST_VALUE_EQUAL( "M_PI_F", float_out[8],            CL_M_PI_F )
+    TEST_VALUE_EQUAL( "M_PI_2_F", float_out[9],          CL_M_PI_2_F )
+    TEST_VALUE_EQUAL( "M_PI_4_F", float_out[10],         CL_M_PI_4_F )
+    TEST_VALUE_EQUAL( "M_1_PI_F", float_out[11],         CL_M_1_PI_F )
+    TEST_VALUE_EQUAL( "M_2_PI_F", float_out[12],         CL_M_2_PI_F )
+    TEST_VALUE_EQUAL( "M_2_SQRTPI_F", float_out[13],     CL_M_2_SQRTPI_F )
+    TEST_VALUE_EQUAL( "M_SQRT2_F", float_out[14],        CL_M_SQRT2_F )
+    TEST_VALUE_EQUAL( "M_SQRT1_2_F", float_out[15],      CL_M_SQRT1_2_F )
+
+    // We need to check these values against what we know is supported on the device
+    if( checkForImageSupport( deviceID ) == 0 )
+    { // has images
+        // If images are supported, the constant should have been defined to the value 1
+        if( int_out[18] == 0xf00baa )
+        {
+            log_error( "FAILURE: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
+            return -1;
+        }
+        else if( int_out[18] != 1 )
+        {
+            log_error( "FAILURE: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", int_out[18] );
+            return -1;
+        }
+    }
+    else
+    { // no images
+        // If images aren't supported, the constant should be undefined
+        if( int_out[18] != 0xf00baa )
+        {
+            log_error( "FAILURE: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", int_out[18] );
+            return -1;
+        }
+    }
+    log_info( "\t__IMAGE_SUPPORT__: %d\n", int_out[18]);
+
+    clReleaseMemObject(streams[0]); streams[0] = NULL;
+    clReleaseMemObject(streams[1]); streams[1] = NULL;
+    clReleaseMemObject(streams[2]); streams[2] = NULL;
+    clReleaseKernel(kernel); kernel = NULL;
+    clReleaseProgram(program); program = NULL;
+
+    /** LONGs **/
+
+    if(!gHasLong) {
+        log_info("Longs not supported; skipping long tests.\n");
+    }
+    else
+    {
+        // Create the kernel
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_long, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(ulong_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+
+        error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(ulong_out), &ulong_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        TEST_VALUE_EQUAL_LITERAL( "LONG_MAX", long_out[0],        ((cl_long) 0x7FFFFFFFFFFFFFFFLL))
+        TEST_VALUE_EQUAL_LITERAL( "LONG_MIN", long_out[1],        ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL))
+        TEST_VALUE_EQUAL_LITERAL( "ULONG_MAX", ulong_out[0],       ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL))
+
+        clReleaseMemObject(streams[0]); streams[0] = NULL;
+        clReleaseMemObject(streams[1]); streams[1] = NULL;
+        clReleaseKernel(kernel); kernel = NULL;
+        clReleaseProgram(program); program = NULL;
+    }
+
+    /** DOUBLEs **/
+
+    if(!is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+    }
+    else
+    {
+        // Create the kernel
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_double, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(double_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(long_out), NULL, &error);
+        test_error( error, "Creating test array failed" );
+
+        error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(double_out), &double_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(long_out), &long_out, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        TEST_VALUE_EQUAL_LITERAL( "DBL_DIG", long_out[0],          15)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MANT_DIG", long_out[1],     53)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_10_EXP", long_out[2],   +308)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MAX_EXP", long_out[3],      +1024)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_10_EXP", long_out[4],   -307)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_MIN_EXP", long_out[5],      -1021)
+        TEST_VALUE_EQUAL_LITERAL( "DBL_RADIX", long_out[6],        2)
+        TEST_VALUE_EQUAL( "DBL_MAX", double_out[0],         MAKE_HEX_DOUBLE(0x1.fffffffffffffp1023, 0x1fffffffffffffLL, 971))
+        TEST_VALUE_EQUAL( "DBL_MIN", double_out[1],         MAKE_HEX_DOUBLE(0x1.0p-1022, 0x1LL, -1022))
+        TEST_VALUE_EQUAL( "DBL_EPSILON", double_out[2],     MAKE_HEX_DOUBLE(0x1.0p-52, 0x1LL, -52))
+        //TEST_VALUE_EQUAL( "M_E", double_out[3], CL_M_E )
+        TEST_VALUE_EQUAL( "M_LOG2E", double_out[4],         CL_M_LOG2E )
+        TEST_VALUE_EQUAL( "M_LOG10E", double_out[5],        CL_M_LOG10E )
+        TEST_VALUE_EQUAL( "M_LN2", double_out[6],           CL_M_LN2 )
+        TEST_VALUE_EQUAL( "M_LN10", double_out[7],          CL_M_LN10 )
+        TEST_VALUE_EQUAL( "M_PI", double_out[8],            CL_M_PI )
+        TEST_VALUE_EQUAL( "M_PI_2", double_out[9],          CL_M_PI_2 )
+        TEST_VALUE_EQUAL( "M_PI_4", double_out[10],         CL_M_PI_4 )
+        TEST_VALUE_EQUAL( "M_1_PI", double_out[11],         CL_M_1_PI )
+        TEST_VALUE_EQUAL( "M_2_PI", double_out[12],         CL_M_2_PI )
+        TEST_VALUE_EQUAL( "M_2_SQRTPI", double_out[13],     CL_M_2_SQRTPI )
+        TEST_VALUE_EQUAL( "M_SQRT2", double_out[14],        CL_M_SQRT2 )
+        TEST_VALUE_EQUAL( "M_SQRT1_2", double_out[15],      CL_M_SQRT1_2 )
+
+        clReleaseMemObject(streams[0]); streams[0] = NULL;
+        clReleaseMemObject(streams[1]); streams[1] = NULL;
+        clReleaseKernel(kernel); kernel = NULL;
+        clReleaseProgram(program); program = NULL;
+    }
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed");
+
+    return errors;
+}
+
+
+const char *kernel_constant_limits[] = {
+    "__kernel void test( __global int *intOut, __global float *floatOut ) \n"
+    "{\n"
+    "  intOut[0] = isinf( MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[1] = isnormal( MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[2] = isnan( MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[3] = sizeof( MAXFLOAT );\n"
+    "  intOut[4] = ( MAXFLOAT == FLT_MAX ) ? 1 : 0;\n"
+    //    "  intOut[5] = ( MAXFLOAT == CL_FLT_MAX ) ? 1 : 0;\n"
+    "  intOut[6] = ( MAXFLOAT == MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[7] = ( MAXFLOAT == 0x1.fffffep127f ) ? 1 : 0;\n"
+    "  floatOut[0] = MAXFLOAT;\n"
+    "}\n"
+};
+
+const char *kernel_constant_extended_limits[] = {
+    "__kernel void test( __global int *intOut, __global float *floatOut ) \n"
+    "{\n"
+    "  intOut[0] = ( INFINITY == HUGE_VALF ) ? 1 : 0;\n"
+    "  intOut[1] = sizeof( INFINITY );\n"
+    "  intOut[2] = isinf( INFINITY ) ? 1 : 0;\n"
+    "  intOut[3] = isnormal( INFINITY ) ? 1 : 0;\n"
+    "  intOut[4] = isnan( INFINITY ) ? 1 : 0;\n"
+    "  intOut[5] = ( INFINITY > MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[6] = ( -INFINITY < -MAXFLOAT ) ? 1 : 0;\n"
+    "  intOut[7] = ( ( MAXFLOAT + MAXFLOAT ) == INFINITY ) ? 1 : 0;\n"
+    "  intOut[8] = ( nextafter( MAXFLOAT, INFINITY ) == INFINITY ) ? 1 : 0;\n"
+    "  intOut[9] = ( nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY ) ? 1 : 0;\n"
+    "  intOut[10] = ( INFINITY == INFINITY ) ? 1 : 0;\n"
+    "  intOut[11] = ( as_uint( INFINITY ) == 0x7f800000 ) ? 1 : 0;\n"
+    "  floatOut[0] = INFINITY;\n"
+    "\n"
+    "  intOut[12] = sizeof( HUGE_VALF );\n"
+    "  intOut[13] = ( HUGE_VALF == INFINITY ) ? 1 : 0;\n"
+    "  floatOut[1] = HUGE_VALF;\n"
+    "\n"
+    "  intOut[14] = ( NAN == NAN ) ? 1 : 0;\n"
+    "  intOut[15] = ( NAN != NAN ) ? 1 : 0;\n"
+    "  intOut[16] = isnan( NAN ) ? 1 : 0;\n"
+    "  intOut[17] = isinf( NAN ) ? 1 : 0;\n"
+    "  intOut[18] = isnormal( NAN ) ? 1 : 0;\n"
+    "  intOut[19] = ( ( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000 ) ? 1 : 0;\n"
+    "  intOut[20] = sizeof( NAN );\n"
+    "  floatOut[2] = NAN;\n"
+    "\n"
+    "  intOut[21] = isnan( INFINITY / INFINITY ) ? 1 : 0;\n"
+    "  intOut[22] = isnan( INFINITY - INFINITY ) ? 1 : 0;\n"
+    "  intOut[23] = isnan( 0.f / 0.f ) ? 1 : 0;\n"
+    "  intOut[24] = isnan( INFINITY * 0.f ) ? 1 : 0;\n"
+    "  intOut[25] = ( INFINITY == NAN ); \n"
+    "  intOut[26] = ( -INFINITY == NAN ); \n"
+    "  intOut[27] = ( INFINITY > NAN ); \n"
+    "  intOut[28] = ( -INFINITY < NAN ); \n"
+    "  intOut[29] = ( INFINITY != NAN ); \n"
+    "  intOut[30] = ( NAN > INFINITY ); \n"
+    "  intOut[31] = ( NAN < -INFINITY ); \n"
+
+    "}\n"
+};
+
+const char *kernel_constant_double_limits[] = {
+    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+    "__kernel void test( __global int *intOut, __global double *doubleOut ) \n"
+    "{\n"
+    "  intOut[0] = sizeof( HUGE_VAL );\n"
+    "  intOut[1] = ( HUGE_VAL == INFINITY ) ? 1 : 0;\n"
+    "  intOut[2] = isinf( HUGE_VAL ) ? 1 : 0;\n"
+    "  intOut[3] = isnormal( HUGE_VAL ) ? 1 : 0;\n"
+    "  intOut[4] = isnan( HUGE_VAL ) ? 1 : 0;\n"
+    "  intOut[5] = ( HUGE_VAL == HUGE_VALF ) ? 1 : 0;\n"
+    "  intOut[6] = ( as_ulong( HUGE_VAL ) == 0x7ff0000000000000UL ) ? 1 : 0;\n"
+    "  doubleOut[0] = HUGE_VAL;\n"
+    "}\n"
+};
+
+#define TEST_FLOAT_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Float constant failed requirement: %s (bitwise value is 0x%8.8x)\n", msg, *( (uint32_t *)&f ) ); return -1; }
+#define TEST_DOUBLE_ASSERTION( a, msg, f ) if( !( a ) ) { log_error( "ERROR: Double constant failed requirement: %s (bitwise value is 0x%16.16llx)\n", msg, *( (uint64_t *)&f ) ); return -1; }
+
+int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    size_t              threads[] = {1,1,1};
+    clMemWrapper        intStream, floatStream, doubleStream;
+    cl_int              intOut[ 32 ];
+    cl_float            floatOut[ 3 ];
+    cl_double           doubleOut[ 1 ];
+
+
+    /* Create some I/O streams */
+    intStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(intOut), NULL, &error );
+    test_error( error, "Creating test array failed" );
+    floatStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(floatOut), NULL, &error );
+    test_error( error, "Creating test array failed" );
+
+    // Stage 1: basic limits on MAXFLOAT
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_limits, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        // Test MAXFLOAT properties
+        TEST_FLOAT_ASSERTION( intOut[0] == 0, "isinf( MAXFLOAT ) = false", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[1] == 1, "isnormal( MAXFLOAT ) = true", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[2] == 0, "isnan( MAXFLOAT ) = false", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[3] == 4, "sizeof( MAXFLOAT ) = 4", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[4] == 1, "MAXFLOAT = FLT_MAX", floatOut[0] )
+        TEST_FLOAT_ASSERTION( floatOut[0] == CL_FLT_MAX, "MAXFLOAT = CL_FLT_MAX", floatOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[6] == 1, "MAXFLOAT = MAXFLOAT", floatOut[0] )
+        TEST_FLOAT_ASSERTION( floatOut[0] == MAKE_HEX_FLOAT( 0x1.fffffep127f, 0x1fffffeL, 103), "MAXFLOAT = 0x1.fffffep127f", floatOut[0] )
+    }
+
+    // Stage 2: INFINITY and NAN
+    char profileStr[128] = "";
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL );
+    test_error( error, "Unable to run INFINITY/NAN tests (unable to get CL_DEVICE_PROFILE" );
+
+    bool testInfNan = true;
+    if( strcmp( profileStr, "EMBEDDED_PROFILE" ) == 0 )
+    {
+        // We test if we're not an embedded profile, OR if the inf/nan flag in the config is set
+        cl_device_fp_config single = 0;
+        error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
+        test_error( error, "Unable to run INFINITY/NAN tests (unable to get FP_CONFIG bits)" );
+
+        if( ( single & CL_FP_INF_NAN ) == 0 )
+        {
+            log_info( "Skipping INFINITY and NAN tests on embedded device (INF/NAN not supported on this device)" );
+            testInfNan = false;
+        }
+    }
+
+    if( testInfNan )
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_extended_limits, "test" ) != 0 )
+        {
+            return -1;
+        }
+
+        error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg( kernel, 1, sizeof( floatStream ), &floatStream );
+        test_error( error, "Unable to set indexed kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Kernel execution failed" );
+
+        error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+        error = clEnqueueReadBuffer( queue, floatStream, CL_TRUE, 0, sizeof(floatOut), floatOut, 0, NULL, NULL );
+        test_error( error, "Unable to get result data" );
+
+        TEST_FLOAT_ASSERTION( intOut[0] == 1, "INFINITY == HUGE_VALF", intOut[0] )
+        TEST_FLOAT_ASSERTION( intOut[1] == 4, "sizeof( INFINITY ) == 4", intOut[1] )
+        TEST_FLOAT_ASSERTION( intOut[2] == 1, "isinf( INFINITY ) == true", intOut[2] )
+        TEST_FLOAT_ASSERTION( intOut[3] == 0, "isnormal( INFINITY ) == false", intOut[3] )
+        TEST_FLOAT_ASSERTION( intOut[4] == 0, "isnan( INFINITY ) == false", intOut[4] )
+        TEST_FLOAT_ASSERTION( intOut[5] == 1, "INFINITY > MAXFLOAT", intOut[5] )
+        TEST_FLOAT_ASSERTION( intOut[6] == 1, "-INFINITY < -MAXFLOAT", intOut[6] )
+        TEST_FLOAT_ASSERTION( intOut[7] == 1, "( MAXFLOAT + MAXFLOAT ) == INFINITY", intOut[7] )
+        TEST_FLOAT_ASSERTION( intOut[8] == 1, "nextafter( MAXFLOAT, INFINITY ) == INFINITY", intOut[8] )
+        TEST_FLOAT_ASSERTION( intOut[9] == 1, "nextafter( -MAXFLOAT, -INFINITY ) == -INFINITY", intOut[9] )
+        TEST_FLOAT_ASSERTION( intOut[10] == 1, "INFINITY = INFINITY", intOut[10] )
+        TEST_FLOAT_ASSERTION( intOut[11] == 1, "asuint( INFINITY ) == 0x7f800000", intOut[11] )
+        TEST_FLOAT_ASSERTION( *( (uint32_t *)&floatOut[0] ) == 0x7f800000, "asuint( INFINITY ) == 0x7f800000", floatOut[0] )
+        TEST_FLOAT_ASSERTION( floatOut[1] == INFINITY, "INFINITY == INFINITY", floatOut[1] )
+
+        TEST_FLOAT_ASSERTION( intOut[12] == 4, "sizeof( HUGE_VALF ) == 4", intOut[12] )
+        TEST_FLOAT_ASSERTION( intOut[13] == 1, "HUGE_VALF == INFINITY", intOut[13] )
+        TEST_FLOAT_ASSERTION( floatOut[1] == HUGE_VALF, "HUGE_VALF == HUGE_VALF", floatOut[1] )
+
+        TEST_FLOAT_ASSERTION( intOut[14] == 0, "(NAN == NAN) = false", intOut[14] )
+        TEST_FLOAT_ASSERTION( intOut[15] == 1, "(NAN != NAN) = true", intOut[15] )
+        TEST_FLOAT_ASSERTION( intOut[16] == 1, "isnan( NAN ) = true", intOut[16] )
+        TEST_FLOAT_ASSERTION( intOut[17] == 0, "isinf( NAN ) = false", intOut[17] )
+        TEST_FLOAT_ASSERTION( intOut[18] == 0, "isnormal( NAN ) = false", intOut[18] )
+        TEST_FLOAT_ASSERTION( intOut[19] == 1, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", intOut[19] )
+        TEST_FLOAT_ASSERTION( intOut[20] == 4, "sizeof( NAN ) = 4", intOut[20] )
+        TEST_FLOAT_ASSERTION( ( *( (uint32_t *)&floatOut[2] ) & 0x7fffffff ) > 0x7f800000, "( as_uint( NAN ) & 0x7fffffff ) > 0x7f800000", floatOut[2] )
+
+        TEST_FLOAT_ASSERTION( intOut[ 21 ] == 1, "isnan( INFINITY / INFINITY ) = true", intOut[ 21 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 22 ] == 1, "isnan( INFINITY - INFINITY ) = true", intOut[ 22 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 23 ] == 1, "isnan( 0.f / 0.f ) = true", intOut[ 23 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 24 ] == 1, "isnan( INFINITY * 0.f ) = true", intOut[ 24 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 25 ] == 0, "( INFINITY == NAN ) = false", intOut[ 25 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 26 ] == 0, "(-INFINITY == NAN ) = false", intOut[ 26 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 27 ] == 0, "( INFINITY > NAN ) = false", intOut[ 27 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 28 ] == 0, "(-INFINITY < NAN ) = false", intOut[ 28 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 29 ] == 1, "( INFINITY != NAN ) = true", intOut[ 29 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 30 ] == 0, "( NAN < INFINITY ) = false", intOut[ 30 ] )
+        TEST_FLOAT_ASSERTION( intOut[ 31 ] == 0, "( NAN > -INFINITY ) = false", intOut[ 31 ] )
+    }
+
+    // Stage 3: limits on HUGE_VAL (double)
+    if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
+        log_info( "Note: Skipping double HUGE_VAL tests (doubles unsupported on device)\n" );
+    else
+    {
+        cl_device_fp_config config = 0;
+        error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( config ), &config, NULL );
+        test_error( error, "Unable to run INFINITY/NAN tests (unable to get double FP_CONFIG bits)" );
+
+        if( ( config & CL_FP_INF_NAN ) == 0 )
+            log_info( "Skipping HUGE_VAL tests (INF/NAN not supported on this device)" );
+        else
+        {
+            clProgramWrapper program;
+            clKernelWrapper kernel;
+
+            if( create_single_kernel_helper( context, &program, &kernel, 1, kernel_constant_double_limits, "test" ) != 0 )
+            {
+                return -1;
+            }
+
+            doubleStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(doubleOut), NULL, &error );
+            test_error( error, "Creating test array failed" );
+
+            error = clSetKernelArg( kernel, 0, sizeof( intStream ), &intStream );
+            test_error( error, "Unable to set indexed kernel arguments" );
+            error = clSetKernelArg( kernel, 1, sizeof( doubleStream ), &doubleStream );
+            test_error( error, "Unable to set indexed kernel arguments" );
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Kernel execution failed" );
+
+            error = clEnqueueReadBuffer( queue, intStream, CL_TRUE, 0, sizeof(intOut), intOut, 0, NULL, NULL );
+            test_error( error, "Unable to get result data" );
+            error = clEnqueueReadBuffer( queue, doubleStream, CL_TRUE, 0, sizeof(doubleOut), doubleOut, 0, NULL, NULL );
+            test_error( error, "Unable to get result data" );
+
+            TEST_DOUBLE_ASSERTION( intOut[0] == 8, "sizeof( HUGE_VAL ) = 8", intOut[0] )
+            TEST_DOUBLE_ASSERTION( intOut[1] == 1, "HUGE_VAL = INFINITY", intOut[1] )
+            TEST_DOUBLE_ASSERTION( intOut[2] == 1, "isinf( HUGE_VAL ) = true", intOut[2] )
+            TEST_DOUBLE_ASSERTION( intOut[3] == 0, "isnormal( HUGE_VAL ) = false", intOut[3] )
+            TEST_DOUBLE_ASSERTION( intOut[4] == 0, "isnan( HUGE_VAL ) = false", intOut[4] )
+            TEST_DOUBLE_ASSERTION( intOut[5] == 1, "HUGE_VAL = HUGE_VAL", intOut[5] )
+            TEST_DOUBLE_ASSERTION( intOut[6] == 1, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", intOut[6] )
+            TEST_DOUBLE_ASSERTION( *( (uint64_t *)&doubleOut[0] ) == 0x7ff0000000000000ULL, "as_ulong( HUGE_VAL ) = 0x7ff0000000000000UL", doubleOut[0] )
+        }
+    }
+
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_pointercast.c b/test_conformance/compatibility/test_conformance/basic/test_pointercast.c
new file mode 100644
index 00000000..88a19aad
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_pointercast.c
@@ -0,0 +1,140 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *pointer_cast_kernel_code =
+"__kernel void test_pointer_cast(__global unsigned char *src, __global unsigned int *dst)\n"
+"{\n"
+"    int          tid = get_global_id(0);\n"
+"    __global unsigned int *p = (__global unsigned int *)src;\n"
+"\n"
+"    dst[tid] = p[tid];\n"
+"\n"
+"}\n";
+
+
+int
+verify_pointer_cast(unsigned char *inptr, unsigned int *outptr, int n)
+{
+    unsigned int    *p = (unsigned int *)inptr;
+    int             i;
+    cl_uint r;
+
+    for (i=0; i<n; i++)
+    {
+        r = p[i];
+
+        if (r != outptr[i])
+        {
+            log_error("POINTER_CAST test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("POINTER_CAST test passed\n");
+    return 0;
+}
+
+int test_pointer_cast(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    unsigned char *input_ptr;
+    unsigned int *output_ptr;
+    cl_program program;
+    cl_kernel kernel;
+    size_t threads[1];
+    int err, i;
+    MTdata d = init_genrand( gRandomSeed );
+
+    size_t length = sizeof(int) * num_elements;
+    input_ptr  = (unsigned char*)malloc(length);
+    output_ptr = (unsigned int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements*4; i++)
+        input_ptr[i] = (unsigned char)genrand_int32(d);
+
+    free_mtdata(d);
+    d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &pointer_cast_kernel_code, "test_pointer_cast" );
+    if (err)
+        return -1;
+
+    err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)num_elements;
+  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueNDRangeKernel failed\n");
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clReadArray failed\n");
+    return -1;
+  }
+
+  err = verify_pointer_cast(input_ptr, output_ptr, num_elements);
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_preprocessors.cpp b/test_conformance/compatibility/test_conformance/basic/test_preprocessors.cpp
new file mode 100644
index 00000000..332f99de
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_preprocessors.cpp
@@ -0,0 +1,393 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include <ctype.h>
+
+// Test __FILE__, __LINE__, __OPENCL_VERSION__, __OPENCL_C_VERSION__, __ENDIAN_LITTLE__, __ROUNDING_MODE__, __IMAGE_SUPPORT__, __FAST_RELAXED_MATH__
+// __kernel_exec
+
+const char *preprocessor_test = {
+    "#line 2 \"%s\"\n"
+    "__kernel void test( __global int *results, __global char *outFileString, __global char *outRoundingString )\n"
+    "{\n"
+
+    // Integer preprocessor macros
+    "#ifdef __IMAGE_SUPPORT__\n"
+    "    results[0] =    __IMAGE_SUPPORT__;\n"
+    "#else\n"
+    "    results[0] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __ENDIAN_LITTLE__\n"
+    "    results[1] =    __ENDIAN_LITTLE__;\n"
+    "#else\n"
+    "    results[1] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __OPENCL_VERSION__\n"
+    "    results[2] =    __OPENCL_VERSION__;\n"
+    "#else\n"
+    "    results[2] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __OPENCL_C_VERSION__\n"
+    "    results[3] =    __OPENCL_C_VERSION__;\n"
+    "#else\n"
+    "    results[3] = 0xf00baa;\n"
+    "#endif\n"
+
+    "#ifdef __LINE__\n"
+    "    results[4] =    __LINE__;\n"
+    "#else\n"
+    "    results[4] = 0xf00baa;\n"
+    "#endif\n"
+
+#if 0 // Removed by Affie's request 2/24
+    "#ifdef __FAST_RELAXED_MATH__\n"
+    "    results[5] =    __FAST_RELAXED_MATH__;\n"
+    "#else\n"
+    "    results[5] = 0xf00baa;\n"
+    "#endif\n"
+#endif
+
+    "#ifdef __kernel_exec\n"
+    "    results[6] = 1;\n"    // By spec, we can only really evaluate that it is defined, not what it expands to
+    "#else\n"
+    "    results[6] = 0xf00baa;\n"
+    "#endif\n"
+
+    // String preprocessor macros. Technically, there are strings in OpenCL, but not really.
+    "#ifdef __FILE__\n"
+    "    int i;\n"
+    "    constant char *f = \"\" __FILE__;\n"
+    "   for( i = 0; f[ i ] != 0 && i < 512; i++ )\n"
+    "        outFileString[ i ] = f[ i ];\n"
+    "    outFileString[ i ] = 0;\n"
+    "#else\n"
+    "    outFileString[ 0 ] = 0;\n"
+    "#endif\n"
+
+    "}\n"
+    };
+
+int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 3 ];
+
+    int error;
+    size_t    threads[] = {1,1,1};
+
+    cl_int results[ 7 ];
+    cl_char fileString[ 512 ] = "", roundingString[ 128 ] = "";
+    char programSource[4096];
+    char curFileName[512];
+    char *programPtr = programSource;
+    int i = 0;
+    snprintf(curFileName, 512, "%s", __FILE__);
+#ifdef _WIN32
+    // Replace "\" with "\\"
+    while(curFileName[i] != '\0') {
+        if (curFileName[i] == '\\') {
+            int j = i + 1;
+            char prev = '\\';
+            while (curFileName[j - 1] != '\0') {
+                char tmp = curFileName[j];
+                curFileName[j] = prev;
+                prev = tmp;
+                j++;
+            }
+            i++;
+        }
+        i++;
+    }
+#endif
+    sprintf(programSource,preprocessor_test,curFileName);
+
+    // Create the kernel
+    if( create_single_kernel_helper( context, &program, &kernel, 1,  (const char **)&programPtr, "test" ) != 0 )
+    {
+        return -1;
+    }
+
+    /* Create some I/O streams */
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(results), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(fileString), NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(roundingString), NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    // Set up and run
+    for( int i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
+        test_error( error, "Unable to set indexed kernel arguments" );
+    }
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(fileString), fileString, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(roundingString), roundingString, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+
+    /////// Check the integer results
+
+    // We need to check these values against what we know is supported on the device
+    if( checkForImageSupport( deviceID ) == 0 )
+    {
+        // If images are supported, the constant should have been defined to the value 1
+        if( results[ 0 ] == 0xf00baa )
+        {
+            log_error( "ERROR: __IMAGE_SUPPORT__ undefined even though images are supported\n" );
+            return -1;
+        }
+        else if( results[ 0 ] != 1 )
+        {
+            log_error( "ERROR: __IMAGE_SUPPORT__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 0 ] );
+            return -1;
+        }
+    }
+    else
+    {
+        // If images aren't supported, the constant should be undefined
+        if( results[ 0 ] != 0xf00baa )
+        {
+            log_error( "ERROR: __IMAGE_SUPPORT__ defined to value %d even though images aren't supported", (int)results[ 0 ] );
+            return -1;
+        }
+    }
+
+    // __ENDIAN_LITTLE__ is similar to __IMAGE_SUPPORT__: 1 if it's true, undefined if it isn't
+    cl_bool deviceIsLittleEndian;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_ENDIAN_LITTLE, sizeof( deviceIsLittleEndian ), &deviceIsLittleEndian, NULL );
+    test_error( error, "Unable to get endian property of device to validate against" );
+
+    if( deviceIsLittleEndian )
+    {
+        if( results[ 1 ] == 0xf00baa )
+        {
+            log_error( "ERROR: __ENDIAN_LITTLE__ undefined even though the device is little endian\n" );
+            return -1;
+        }
+        else if( results[ 1 ] != 1 )
+        {
+            log_error( "ERROR: __ENDIAN_LITTLE__ defined, but to the wrong value (defined as %d, spec states it should be 1)\n", (int)results[ 1 ] );
+            return -1;
+        }
+    }
+    else
+    {
+        if( results[ 1 ] != 0xf00baa )
+        {
+            log_error( "ERROR: __ENDIAN_LITTLE__ defined to value %d even though the device is not little endian (should be undefined per spec)", (int)results[ 1 ] );
+            return -1;
+        }
+    }
+
+    // __OPENCL_VERSION__
+    if( results[ 2 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ undefined!" );
+        return -1;
+    }
+
+    // The OpenCL version reported by the macro reports the feature level supported by the compiler. Since
+    // this doesn't directly match any property we can query, we just check to see if it's a sane value
+    char versionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof( versionBuffer ), versionBuffer, NULL );
+    test_error( error, "Unable to get device's version to validate against" );
+
+    // We need to parse to get the version number to compare against
+    char *p1, *p2, *p3;
+    for( p1 = versionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+
+    if( p2 == p3 )
+    {
+        log_error( "ERROR: Unable to verify OpenCL version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    int major = atoi( p1 );
+    int minor = atoi( p2 + 1 );
+    int realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 2 ] < 100 ) || ( results[ 2 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
+        return -1;
+    }
+
+    // __OPENCL_C_VERSION__
+    if( results[ 3 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ undefined!\n" );
+        return -1;
+    }
+
+    // The OpenCL C version reported by the macro reports the OpenCL C supported by the compiler for this OpenCL device.
+    char cVersionBuffer[ 128 ];
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( cVersionBuffer ), cVersionBuffer, NULL );
+    test_error( error, "Unable to get device's OpenCL C version to validate against" );
+
+    // We need to parse to get the version number to compare against
+    for( p1 = cVersionBuffer; ( *p1 != 0 ) && !isdigit( *p1 ); p1++ )
+        ;
+    for( p2 = p1; ( *p2 != 0 ) && ( *p2 != '.' ); p2++ )
+        ;
+    for( p3 = p2; ( *p3 != 0 ) && ( *p3 != ' ' ); p3++ )
+        ;
+
+    if( p2 == p3 )
+    {
+        log_error( "ERROR: Unable to verify OpenCL C version string (platform string is incorrect format)\n" );
+        return -1;
+    }
+    *p2 = 0;
+    *p3 = 0;
+    major = atoi( p1 );
+    minor = atoi( p2 + 1 );
+    realVersion = ( major * 100 ) + ( minor * 10 );
+    if( ( results[ 3 ] < 100 ) || ( results[ 3 ] > realVersion ) )
+    {
+        log_error( "ERROR: Kernel preprocessor __OPENCL_C_VERSION__ does not make sense w.r.t. device's version string! "
+                  "(preprocessor states %d, real version is %d (%d.%d))\n", results[ 2 ], realVersion, major, minor );
+        return -1;
+    }
+
+    // __LINE__
+    if( results[ 4 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __LINE__ undefined!" );
+        return -1;
+    }
+
+    // This is fun--we get to search for where __LINE__ actually is so we know what line it should define to!
+    // Note: it shows up twice, once for the #ifdef, and the other for the actual result output
+    const char *linePtr = strstr( preprocessor_test, "__LINE__" );
+    if( linePtr == NULL )
+    {
+        log_error( "ERROR: Nonsensical NULL pointer encountered!" );
+        return -2;
+    }
+    linePtr = strstr( linePtr + strlen( "__LINE__" ), "__LINE__" );
+    if( linePtr == NULL )
+    {
+        log_error( "ERROR: Nonsensical NULL pointer encountered!" );
+        return -2;
+    }
+
+    // Now count how many carriage returns are before the string
+    const char *retPtr = strchr( preprocessor_test, '\n' );
+    int retCount = 1;
+    for( ; ( retPtr < linePtr ) && ( retPtr != NULL ); retPtr = strchr( retPtr + 1, '\n' ) )
+        retCount++;
+
+    if( retCount != results[ 4 ] )
+    {
+        log_error( "ERROR: Kernel preprocessor __LINE__ does not expand to the actual line number! (expanded to %d, but was on line %d)\n",
+                  results[ 4 ], retCount );
+        return -1;
+    }
+
+#if 0 // Removed by Affie's request 2/24
+    // __FAST_RELAXED_MATH__
+    // Since create_single_kernel_helper does NOT define -cl-fast-relaxed-math, this should be undefined
+    if( results[ 5 ] != 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ defined even though build option was not used (should be undefined)\n" );
+        return -1;
+    }
+#endif
+
+    // __kernel_exec
+    // We can ONLY check to verify that it is defined
+    if( results[ 6 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __kernel_exec must be defined\n" );
+        return -1;
+    }
+
+    //// String preprocessors
+
+    // Since we provided the program directly, __FILE__ should compile to "<program source>".
+    if( fileString[ 0 ] == 0 )
+    {
+        log_error( "ERROR: Kernel preprocessor __FILE__ undefined!\n" );
+        return -1;
+    }
+    else if( strncmp( (char *)fileString, __FILE__, 512 ) != 0 )
+    {
+        log_info( "WARNING: __FILE__ defined, but to an unexpected value (%s)\n\tShould be: \"%s\"", fileString, __FILE__ );
+        return -1;
+    }
+
+
+#if 0 // Removed by Affie's request 2/24
+    // One more try through: try with -cl-fast-relaxed-math to make sure the appropriate preprocessor gets defined
+    clProgramWrapper programB = clCreateProgramWithSource( context, 1, preprocessor_test, NULL, &error );
+    test_error( error, "Unable to create test program" );
+
+    // Try compiling
+    error = clBuildProgram( programB, 1, &deviceID, "-cl-fast-relaxed-math", NULL, NULL );
+    test_error( error, "Unable to build program" );
+
+    // Create a kernel again to run against
+    clKernelWrapper kernelB = clCreateKernel( programB, "test", &error );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Set up and run
+    for( int i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg( kernelB, i, sizeof( streams[i] ), &streams[i] );
+        test_error( error, "Unable to set indexed kernel arguments" );
+    }
+
+    error = clEnqueueNDRangeKernel( queue, kernelB, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Kernel execution failed" );
+
+    // Only need the one read
+    error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(results), results, 0, NULL, NULL );
+    test_error( error, "Unable to get result data" );
+
+    // We only need to check the one result this time
+    if( results[ 5 ] == 0xf00baa )
+    {
+        log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined!\n" );
+        return -1;
+    }
+    else if( results[ 5 ] != 1 )
+    {
+        log_error( "ERROR: Kernel preprocessor __FAST_RELAXED_MATH__ not defined to 1 (was %d)\n", results[ 5 ] );
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_readimage.c b/test_conformance/compatibility/test_conformance/basic/test_readimage.c
new file mode 100644
index 00000000..e9de33c7
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_readimage.c
@@ -0,0 +1,244 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *bgra8888_kernel_code =
+"\n"
+"__kernel void test_bgra8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
+"    dst[indx] = convert_uchar4_rte(color.zyxw);\n"
+"\n"
+"}\n";
+
+
+static const char *rgba8888_kernel_code =
+"\n"
+"__kernel void test_rgba8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
+"    dst[indx] = convert_uchar4_rte(color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (unsigned char)genrand_int32( d);
+
+    return ptr;
+}
+
+static int
+verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_BGRA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_BGRA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+static int
+verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_RGBA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_RGBA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+
+int test_readimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program[2];
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr[2], *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(unsigned char);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr[0] = generate_8888_image(img_width, img_height, d);
+    input_ptr[1] = generate_8888_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (unsigned char*)malloc(length);
+
+    img_format.image_channel_order = CL_BGRA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteImage failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteImage failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
+    if (err)
+        return -1;
+
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
+    if (err)
+        return -1;
+
+    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
+    err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArg failed\n");
+        return -1;
+    }
+
+    err  = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArg failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+
+    for (i=0; i<2; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+            return -1;
+        }
+        err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        switch (i)
+        {
+            case 0:
+                err = verify_bgra8888_image(input_ptr[i], output_ptr, img_width, img_height);
+                break;
+            case 1:
+                err = verify_rgba8888_image(input_ptr[i], output_ptr, img_width, img_height);
+                break;
+        }
+
+        if (err)
+            break;
+    }
+
+    // cleanup
+    clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<2; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_readimage3d.c b/test_conformance/compatibility/test_conformance/basic/test_readimage3d.c
new file mode 100644
index 00000000..1044f315
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_readimage3d.c
@@ -0,0 +1,230 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *bgra8888_kernel_code =
+"\n"
+"__kernel void test_bgra8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    dst[indx].x = color.z;\n"
+"    dst[indx].y = color.y;\n"
+"    dst[indx].z = color.x;\n"
+"    dst[indx].w = color.w;\n"
+"\n"
+"}\n";
+
+
+static const char *rgba8888_kernel_code =
+"\n"
+"__kernel void test_rgba8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    //indx *= 4;\n"
+"    dst[indx].x = color.x;\n"
+"    dst[indx].y = color.y;\n"
+"    dst[indx].z = color.z;\n"
+"    dst[indx].w = color.w;\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_3d_image8(int w, int h, int d, MTdata data)
+{
+    unsigned char   *ptr = (unsigned char*)malloc(w * h * d * 4);
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = (unsigned char)genrand_int32(data);
+
+    return ptr;
+}
+
+static int
+verify_3d_image8(double *image, float *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != (float)image[i])
+        {
+            float ulps = Ulp_Error( outptr[i], image[i]);
+
+            if(! (fabsf(ulps) < 1.5f) )
+            {
+                log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
+                    (int)i, image[i], outptr[ i ],  ulps );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static double *
+prepare_reference(unsigned char * input_ptr, int w, int h, int d)
+{
+    double   *ptr = (double*)malloc(w * h * d * 4 * sizeof(double));
+    int         i;
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = ((double)input_ptr[i]/255);
+
+    return ptr;
+}
+
+
+int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program[2];
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    unsigned char    *input_ptr[2];
+    float *output_ptr;
+    double *ref_ptr[2];
+    size_t threads[3];
+    int img_width = 64;
+    int img_height = 64;
+    int img_depth = 64;
+    int i, err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, img_depth};
+    size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
+
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr[0] = generate_3d_image8(img_width, img_height, img_depth, d);
+    input_ptr[1] = generate_3d_image8(img_width, img_height, img_depth, d);
+    ref_ptr[0] = prepare_reference(input_ptr[0], img_width, img_height, img_depth);
+    ref_ptr[1] = prepare_reference(input_ptr[1], img_width, img_height, img_depth);
+    free_mtdata(d); d = NULL;
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_BGRA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+    err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+  err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
+  if (err)
+    return -1;
+
+  err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
+  if (err)
+    return -1;
+
+  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+  test_error(err, "clCreateSampler failed");
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+     threads[2] = (unsigned int)img_depth;
+
+  for (i=0; i<2; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 3, NULL, threads, NULL, 0, NULL, NULL);
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    switch (i)
+    {
+      case 0:
+        err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
+        if ( err != 0 )
+            log_info("READ_IMAGE3D_BGRA_UNORM_INT8 test passed\n");
+        break;
+      case 1:
+        err = verify_3d_image8(ref_ptr[i], output_ptr, img_width, img_height, img_depth);
+        if ( err != 0 )
+            log_info("READ_IMAGE3D_RGBA_UNORM_INT8 test passed\n");
+        break;
+    }
+
+    if (err)
+      break;
+  }
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+  for (i=0; i<2; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+  free(ref_ptr[0]);
+  free(ref_ptr[1]);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_readimage3d_fp32.c b/test_conformance/compatibility/test_conformance/basic/test_readimage3d_fp32.c
new file mode 100644
index 00000000..af3fda07
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_readimage3d_fp32.c
@@ -0,0 +1,147 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+static const char *rgbaFFFF_kernel_code =
+"__kernel void test_rgbaFFFF(read_only image3d_t srcimg, __global float *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    indx *= 4;\n"
+"    dst[indx+0] = color.x;\n"
+"    dst[indx+1] = color.y;\n"
+"    dst[indx+2] = color.z;\n"
+"    dst[indx+3] = color.w;\n"
+"\n"
+"}\n";
+
+
+static float *
+generate_float_image(int w, int h, int d, MTdata data)
+{
+    float   *ptr = (float*)malloc(w * h * d * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, data);
+
+    return ptr;
+}
+
+static int
+verify_float_image(float *image, float *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE3D_RGBA_FLOAT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE3D_RGBA_FLOAT test passed\n");
+    return 0;
+}
+
+
+int test_readimage3d_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    float *input_ptr, *output_ptr;
+    size_t threads[3];
+    int img_width = 64;
+    int img_height = 64;
+    int img_depth = 64;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, img_depth};
+    size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr = generate_float_image(img_width, img_height, img_depth, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+  test_error(err, "create_image_3d failed");
+
+  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
+  if (err)
+    return -1;
+
+  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+  test_error(err, "clCreateSampler failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+    threads[2] = (unsigned int)img_depth;
+  err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  err = verify_float_image(input_ptr, output_ptr, img_width, img_height, img_depth);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_readimage3d_int16.c b/test_conformance/compatibility/test_conformance/basic/test_readimage3d_int16.c
new file mode 100644
index 00000000..ac96a5e8
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_readimage3d_int16.c
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba16_kernel_code =
+"__kernel void test_rgba16(read_only image3d_t srcimg, __global ushort4 *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    ushort4 dst_write;\n"
+"    dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
+"    dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
+"    dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
+"    dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
+"    dst[indx] = dst_write;\n"
+"\n"
+"}\n";
+
+
+static unsigned short *
+generate_16bit_image(int w, int h, int d, MTdata data)
+{
+    unsigned short    *ptr = (cl_ushort*)malloc(w * h * d * 4 * sizeof(cl_ushort));
+    int             i;
+
+    for (i=0; i<w*h*d*4; i++)
+        ptr[i] = (cl_ushort)genrand_int32(data);
+
+    return ptr;
+}
+
+static int
+verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h, int d)
+{
+    int     i;
+
+    for (i=0; i<w*h*d*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE3D_RGBA_UNORM_INT16 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE3D_RGBA_UNORM_INT16 test passed\n");
+    return 0;
+}
+
+int test_readimage3d_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    cl_ushort *input_ptr, *output_ptr;
+    size_t threads[3];
+    int img_width = 64;
+    int img_height = 64;
+    int img_depth = 64;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, img_depth};
+    size_t length = img_width * img_height * img_depth * 4 * sizeof(cl_ushort);
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr = generate_16bit_image(img_width, img_height, img_depth, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_ushort*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
+    test_error(err, "create_image_3d failed");
+
+  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueWriteImage failed");
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
+  if (err)
+    return -1;
+
+  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+  test_error(err, "clCreateSampler failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+  test_error(err, "clSetKernelArg failed");
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+    threads[2] = (unsigned int)img_depth;
+  err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height, img_depth);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_readimage_fp32.c b/test_conformance/compatibility/test_conformance/basic/test_readimage_fp32.c
new file mode 100644
index 00000000..845e42ba
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_readimage_fp32.c
@@ -0,0 +1,167 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+static const char *rgbaFFFF_kernel_code =
+"__kernel void test_rgbaFFFF(read_only image2d_t srcimg, __global float *dst, sampler_t smp)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
+"    indx *= 4;\n"
+"    dst[indx+0] = color.x;\n"
+"    dst[indx+1] = color.y;\n"
+"    dst[indx+2] = color.z;\n"
+"    dst[indx+3] = color.w;\n"
+"\n"
+"}\n";
+
+
+static float *
+generate_float_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_float_image(float *image, float *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_RGBA_FLOAT test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_RGBA_FLOAT test passed\n");
+    return 0;
+}
+
+int test_readimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    float *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(float);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+  d = init_genrand( gRandomSeed );
+    input_ptr = generate_float_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteImage failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
+  if (err)
+    return -1;
+
+  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+  test_error(err, "clCreateSampler failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_float_image(input_ptr, output_ptr, img_width, img_height);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_readimage_int16.c b/test_conformance/compatibility/test_conformance/basic/test_readimage_int16.c
new file mode 100644
index 00000000..6d1f030d
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_readimage_int16.c
@@ -0,0 +1,166 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba16_kernel_code =
+"__kernel void test_rgba16(read_only image2d_t srcimg, __global ushort4 *dst, sampler_t smp)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
+"    ushort4 dst_write;\n"
+"    dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
+"    dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
+"    dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
+"    dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
+"    dst[indx] = dst_write;\n"
+"\n"
+"}\n";
+
+
+static unsigned short *
+generate_16bit_image(int w, int h, MTdata d)
+{
+    cl_ushort    *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (cl_ushort)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h)
+{
+    int     i;
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("READ_IMAGE_RGBA_UNORM_INT16 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("READ_IMAGE_RGBA_UNORM_INT16 test passed\n");
+    return 0;
+}
+
+int test_readimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_program program;
+    cl_kernel kernel;
+    cl_image_format    img_format;
+    cl_ushort *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int err;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_16bit_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_ushort*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteImage failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
+  if (err)
+    return -1;
+
+  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+  test_error(err, "clCreateSampler failed");
+
+  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
+    return -1;
+  }
+
+  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+  if (err != CL_SUCCESS)
+  {
+    log_error("clEnqueueReadBuffer failed\n");
+    return -1;
+  }
+
+  err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height);
+
+    // cleanup
+  clReleaseSampler(sampler);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_sizeof.c b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c
new file mode 100644
index 00000000..bece1b94
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_sizeof.c
@@ -0,0 +1,396 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "procs.h"
+
+
+
+cl_int get_type_size( cl_context context, cl_command_queue queue, const char *type, cl_ulong *size  )
+{
+    const char *sizeof_kernel_code[4] =
+    {
+        "", /* optional pragma string */
+        "__kernel __attribute__((reqd_work_group_size(1,1,1))) void test_sizeof(__global uint *dst) \n"
+        "{\n"
+        "   dst[0] = (uint) sizeof( ", type, " );\n"
+        "}\n"
+    };
+
+    cl_program  p;
+    cl_kernel   k;
+    cl_mem      m;
+    cl_uint        temp;
+
+
+    if (!strncmp(type, "double", 6))
+    {
+        sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    }
+    else if (!strncmp(type, "half", 4))
+    {
+        sizeof_kernel_code[0] = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+    }
+
+    cl_int err = create_single_kernel_helper( context, &p, &k, 4, sizeof_kernel_code, "test_sizeof" );
+    if( err )
+        return err;
+
+    m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
+    if( NULL == m )
+    {
+        clReleaseProgram( p );
+        clReleaseKernel( k );
+        log_error("\nclCreateBuffer FAILED\n");
+        return err;
+    }
+
+    err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
+    if( err )
+    {
+        clReleaseProgram( p );
+        clReleaseKernel( k );
+        clReleaseMemObject( m );
+        log_error("\nclSetKernelArg FAILED\n");
+        return err;
+    }
+
+    err = clEnqueueTask( queue, k, 0, NULL, NULL );
+    clReleaseProgram( p );
+    clReleaseKernel( k );
+    if( err )
+    {
+        clReleaseMemObject( m );
+        log_error( "\nclEnqueueTask FAILED\n" );
+        return err;
+    }
+
+    err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
+    clReleaseMemObject( m );
+    if( err )
+        log_error( "\nclEnqueueReadBuffer FAILED\n" );
+
+    *size = (cl_ulong) temp;
+
+    return err;
+}
+
+typedef struct size_table
+{
+    const char *name;
+    cl_ulong   size;
+    cl_ulong   cl_size;
+}size_table;
+
+const size_table  scalar_table[] =
+{
+    // Fixed size entries from table 6.1
+    {  "char",              1,  sizeof( cl_char )   },
+    {  "uchar",             1,  sizeof( cl_uchar)   },
+    {  "unsigned char",     1,  sizeof( cl_uchar)   },
+    {  "short",             2,  sizeof( cl_short)   },
+    {  "ushort",            2,  sizeof( cl_ushort)  },
+    {  "unsigned short",    2,  sizeof( cl_ushort)  },
+    {  "int",               4,  sizeof( cl_int )    },
+    {  "uint",              4,  sizeof( cl_uint)    },
+    {  "unsigned int",      4,  sizeof( cl_uint)    },
+    {  "float",             4,  sizeof( cl_float)   },
+    {  "long",              8,  sizeof( cl_long )   },
+    {  "ulong",             8,  sizeof( cl_ulong)   },
+    {  "unsigned long",     8,  sizeof( cl_ulong)   }
+};
+
+const size_table  vector_table[] =
+{
+    // Fixed size entries from table 6.1
+    {  "char",      1,  sizeof( cl_char )   },
+    {  "uchar",     1,  sizeof( cl_uchar)   },
+    {  "short",     2,  sizeof( cl_short)   },
+    {  "ushort",    2,  sizeof( cl_ushort)  },
+    {  "int",       4,  sizeof( cl_int )    },
+    {  "uint",      4,  sizeof( cl_uint)    },
+    {  "float",     4,  sizeof( cl_float)   },
+    {  "long",      8,  sizeof( cl_long )   },
+    {  "ulong",     8,  sizeof( cl_ulong)   }
+};
+
+const char  *ptr_table[] =
+{
+    "void*",
+    "size_t",
+    "sizeof(int)",      // check return type of sizeof
+    "ptrdiff_t"
+};
+
+const char *other_types[] =
+{
+    "event_t",
+    "image2d_t",
+    "image3d_t",
+    "sampler_t"
+};
+
+static int IsPowerOfTwo( cl_ulong x ){ return 0 == (x & (x-1)); }
+
+int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t i, j;
+    cl_ulong test;
+    cl_uint ptr_size = CL_UINT_MAX;
+    cl_int err = CL_SUCCESS;
+
+    // Check address space size
+    err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(ptr_size), &ptr_size, NULL);
+    if( err || ptr_size > 64)
+    {
+        log_error( "FAILED:  Unable to get CL_DEVICE_ADDRESS_BITS for device %p\n", device );
+        return -1;
+    }
+    log_info( "\tCL_DEVICE_ADDRESS_BITS = %u\n", ptr_size );
+    ptr_size /= 8;
+
+    // Test standard scalar sizes
+    for( i = 0; i < sizeof( scalar_table ) / sizeof( scalar_table[0] ); i++ )
+    {
+        if( ! gHasLong &&
+           (0 == strcmp(scalar_table[i].name, "long") ||
+            0 == strcmp(scalar_table[i].name, "ulong") ||
+            0 == strcmp(scalar_table[i].name, "unsigned long")))
+        {
+            log_info("\nLongs are not supported by this device. Skipping test.\t");
+            continue;
+        }
+
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, scalar_table[i].name, &test  );
+        if( err )
+            return err;
+        if( test != scalar_table[i].size )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", scalar_table[i].name, test, scalar_table[i].size );
+            return -1;
+        }
+        if( test != scalar_table[i].cl_size )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", scalar_table[i].name, test, scalar_table[i].cl_size );
+            return -2;
+        }
+        log_info( "%16s", scalar_table[i].name );
+    }
+    log_info( "\n" );
+
+    // Test standard vector sizes
+    for( j = 2; j <= 16; j *= 2 )
+    {
+        // For each vector size, iterate through types
+        for( i = 0; i < sizeof( vector_table ) / sizeof( vector_table[0] ); i++ )
+        {
+            if( !gHasLong &&
+               (0 == strcmp(vector_table[i].name, "long") ||
+                0 == strcmp(vector_table[i].name, "ulong")))
+            {
+                log_info("\nLongs are not supported by this device. Skipping test.\t");
+                continue;
+            }
+
+            char name[32];
+            sprintf( name, "%s%ld", vector_table[i].name, j );
+
+            test = CL_ULONG_MAX;
+            err = get_type_size( context, queue, name, &test  );
+            if( err )
+                return err;
+            if( test != j * vector_table[i].size )
+            {
+                log_error( "\nFAILED: Type %s has size %lld, but expected size %lld!\n", name, test, j * vector_table[i].size );
+                return -1;
+            }
+            if( test != j * vector_table[i].cl_size )
+            {
+                log_error( "\nFAILED: Type %s has size %lld, but cl_ size is %lld!\n", name, test, j * vector_table[i].cl_size );
+                return -2;
+            }
+            log_info( "%16s", name );
+        }
+        log_info( "\n" );
+    }
+
+    //Check that pointer sizes are correct
+    for( i = 0; i < sizeof( ptr_table ) / sizeof( ptr_table[0] ); i++ )
+    {
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, ptr_table[i], &test  );
+        if( err )
+            return err;
+        if( test != ptr_size )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, but expected size %u!\n", ptr_table[i], test, ptr_size );
+            return -1;
+        }
+        log_info( "%16s", ptr_table[i] );
+    }
+
+    // Check that intptr_t is large enough
+    test = CL_ULONG_MAX;
+    err = get_type_size( context, queue, "intptr_t", &test  );
+    if( err )
+        return err;
+    if( test < ptr_size )
+    {
+        log_error( "\nFAILED: intptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
+        return -1;
+    }
+    if( ! IsPowerOfTwo( test ) )
+    {
+        log_error( "\nFAILED: sizeof(intptr_t) is %lld, but must be a power of two!\n", test );
+        return -2;
+    }
+    log_info( "%16s", "intptr_t" );
+
+    // Check that uintptr_t is large enough
+    test = CL_ULONG_MAX;
+    err = get_type_size( context, queue, "uintptr_t", &test  );
+    if( err )
+        return err;
+    if( test < ptr_size )
+    {
+        log_error( "\nFAILED: uintptr_t has size %lld, but must be at least %u!\n", test, ptr_size );
+        return -1;
+    }
+    if( ! IsPowerOfTwo( test ) )
+    {
+        log_error( "\nFAILED: sizeof(uintptr_t) is %lld, but must be a power of two!\n", test );
+        return -2;
+    }
+    log_info( "%16s\n", "uintptr_t" );
+
+    //Check that other types are powers of two
+    for( i = 0; i < sizeof( other_types ) / sizeof( other_types[0] ); i++ )
+    {
+        if( 0 == strcmp(other_types[i], "image2d_t") &&
+           checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        {
+            log_info("\nimages are not supported by this device. Skipping test.\t");
+            continue;
+        }
+
+        if( gIsEmbedded &&
+           0 == strcmp(other_types[i], "image3d_t") &&
+           checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        {
+            log_info("\n3D images are not supported by this device. Skipping test.\t");
+            continue;
+        }
+
+        if( 0 == strcmp(other_types[i], "sampler_t") &&
+           checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        {
+          log_info("\nimages are not supported by this device. Skipping test.\t");
+          continue;
+        }
+
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, other_types[i], &test  );
+        if( err )
+            return err;
+        if( ! IsPowerOfTwo( test ) )
+        {
+            log_error( "\nFAILED: Type %s has size %lld, which is not a power of two (section 6.1.5)!\n", other_types[i], test );
+            return -1;
+        }
+        log_info( "%16s", other_types[i] );
+    }
+    log_info( "\n" );
+
+
+    //Check double
+    if( is_extension_available( device, "cl_khr_fp64" ) )
+    {
+        log_info( "\tcl_khr_fp64:" );
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, "double", &test  );
+        if( err )
+            return err;
+        if( test != 8 )
+        {
+            log_error( "\nFAILED: double has size %lld, but must be 8!\n", test );
+            return -1;
+        }
+        log_info( "%16s", "double" );
+
+        // Test standard vector sizes
+        for( j = 2; j <= 16; j *= 2 )
+        {
+            char name[32];
+            sprintf( name, "double%ld", j );
+
+            test = CL_ULONG_MAX;
+            err = get_type_size( context, queue, name, &test  );
+            if( err )
+                return err;
+            if( test != 8*j )
+            {
+                log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 8 * j);
+                return -1;
+            }
+            log_info( "%16s", name );
+        }
+        log_info( "\n" );
+    }
+
+    //Check half
+    if( is_extension_available( device, "cl_khr_fp16" ) )
+    {
+        log_info( "\tcl_khr_fp16:" );
+        test = CL_ULONG_MAX;
+        err = get_type_size( context, queue, "half", &test  );
+        if( err )
+            return err;
+        if( test != 2 )
+        {
+            log_error( "\nFAILED: half has size %lld, but must be 2!\n", test );
+            return -1;
+        }
+        log_info( "%16s", "half" );
+
+        // Test standard vector sizes
+        for( j = 2; j <= 16; j *= 2 )
+        {
+            char name[32];
+            sprintf( name, "half%ld", j );
+
+            test = CL_ULONG_MAX;
+            err = get_type_size( context, queue, name, &test  );
+            if( err )
+                return err;
+            if( test != 2*j )
+            {
+                log_error( "\nFAILED: %s has size %lld, but must be %ld!\n", name, test, 2 * j);
+                return -1;
+            }
+            log_info( "%16s", name );
+        }
+        log_info( "\n" );
+    }
+
+    return err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_vec_type_hint.c b/test_conformance/compatibility/test_conformance/basic/test_vec_type_hint.c
new file mode 100644
index 00000000..d3b5fa76
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_vec_type_hint.c
@@ -0,0 +1,97 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+
+static const char *sample_kernel = {
+  "%s\n" // optional pragma string
+  "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n"
+  "{\n"
+  "    int  tid = get_global_id(0);\n"
+  "     dst[tid] = src[tid];\n"
+  "\n"
+  "}\n"
+};
+
+int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  int error;
+  int vec_type_index, vec_size_index;
+
+  ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    const char *size_names[] = {"", "2", "4", "8", "16"};
+    char *program_source;
+
+  program_source = (char*)malloc(sizeof(char)*4096);
+
+  for (vec_type_index=0; vec_type_index<10; vec_type_index++) {
+    if (vecType[vec_type_index] == kDouble) {
+      if (!is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        continue;
+      }
+      log_info("Testing doubles.\n");
+    }
+
+    for (vec_size_index=0; vec_size_index<5; vec_size_index++) {
+      clProgramWrapper program;
+      clKernelWrapper kernel;
+      clMemWrapper in, out;
+      size_t global[] = {1,1,1};
+
+      log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
+
+      program_source[0] = '\0';
+      sprintf(program_source, sample_kernel,
+              (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+              get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
+
+      error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" );
+      if( error != 0 )
+        return error;
+
+      in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error);
+      test_error(error, "clCreateBuffer failed");
+      out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error);
+      test_error(error, "clCreateBuffer failed");
+
+      error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+      test_error(error, "clSetKernelArg failed");
+      error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+      test_error(error, "clSetKernelArg failed");
+
+      error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
+      test_error(error, "clEnqueueNDRangeKernel failed");
+
+      error = clFinish(queue);
+      test_error(error, "clFinish failed");
+    }
+  }
+
+  free(program_source);
+
+  return 0;
+}
diff --git a/test_conformance/compatibility/test_conformance/basic/test_vector_creation.cpp b/test_conformance/compatibility/test_conformance/basic/test_vector_creation.cpp
new file mode 100644
index 00000000..9ab0103a
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_vector_creation.cpp
@@ -0,0 +1,406 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+
+
+
+#define DEBUG 0
+#define DEPTH 16
+// Limit the maximum code size for any given kernel.
+#define MAX_CODE_SIZE (1024*32)
+
+const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
+const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
+
+// Creates a kernel by enumerating all possible ways of building the vector out of vloads
+// skip_to_results will skip results up to a given number. If the amount of code generated
+// is greater than MAX_CODE_SIZE, this function will return the number of results used,
+// which can then be used as the skip_to_result value to continue where it left off.
+int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
+
+    int number_of_sizes;
+
+    switch (output_size) {
+        case 1:
+            number_of_sizes = 1;
+            break;
+        case 2:
+            number_of_sizes = 2;
+            break;
+        case 3:
+            number_of_sizes = 3;
+            break;
+        case 4:
+            number_of_sizes = 4;
+            break;
+        case 8:
+            number_of_sizes = 5;
+            break;
+        case 16:
+            number_of_sizes = 6;
+            break;
+        default:
+            log_error("Invalid size: %d\n", output_size);
+            return -1;
+    }
+
+    int total_results = 0;
+    int current_result = 0;
+    int total_vloads = 0;
+    int total_program_length = 0;
+    int aborted_due_to_size = 0;
+
+    if (skip_to_result < 0)
+        skip_to_result = 0;
+
+    // The line of code for the vector creation
+    char line[1024];
+    // Keep track of what size vector we are using in each position so we can iterate through all fo them
+    int pos[DEPTH];
+    int max_size = output_size;
+    if (DEBUG > 1) log_info("max_size: %d\n", max_size);
+
+    program[0] = '\0';
+    sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
+            type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+            get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
+    total_program_length += (int)strlen(program);
+
+    char storePrefix[ 128 ], storeSuffix[ 128 ];
+
+    // Start out trying sizes 1,1,1,1,1...
+    for (int i=0; i<DEPTH; i++)
+        pos[i] = 0;
+
+    int done = 0;
+    while (!done) {
+        if (DEBUG > 1) {
+            log_info("pos size[] = [");
+            for (int k=0; k<DEPTH; k++)
+                log_info(" %d ", pos[k]);
+            log_info("]\n");
+        }
+
+        // Go through the selected vector sizes and see if the first n of them fit the
+        //  required size exactly.
+        int size_so_far = 0;
+        int vloads;
+        for ( vloads=0; vloads<DEPTH; vloads++) {
+            if (size_so_far + sizes[pos[vloads]] <= max_size) {
+                size_so_far += sizes[pos[vloads]];
+            } else {
+                break;
+            }
+        }
+        if (DEBUG > 1)  log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
+
+        // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
+        //  of the sizes to the right. Prune them from the search.
+        if (size_so_far != max_size) {
+            // Zero all the sizes to the right
+            for (int k=vloads+1; k<DEPTH; k++) {
+                pos[k] = 0;
+            }
+            // Increment this current size and propagate the values up if needed
+            for (int d=vloads; d>=0; d--) {
+                pos[d]++;
+                if (pos[d] >= number_of_sizes) {
+                    pos[d] = 0;
+                    if (d == 0) {
+                        // If we rolled over then we are done
+                        done = 1;
+                        break;
+                    }
+                } else {
+                    break;
+                }
+            }
+            // Go on to the next size since this one (and all others "under" it) didn't fit
+            continue;
+        }
+
+
+        // Generate the actual load line if we are building this part
+        line[0]= '\0';
+        if (skip_to_result == 0 || total_results >= skip_to_result) {
+            if( number_of_sizes == 3 )
+            {
+                sprintf( storePrefix, "vstore3( " );
+                sprintf( storeSuffix, ", %d, result )", current_result );
+            }
+            else
+            {
+                sprintf( storePrefix, "result[%d] = ", current_result );
+                storeSuffix[ 0 ] = 0;
+            }
+
+            sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
+            current_result++;
+
+            int offset = 0;
+            for (int i=0; i<vloads; i++) {
+                if (pos[i] == 0)
+                    sprintf(line + strlen(line), "src[%d]", offset);
+                else
+                    sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
+                offset += sizes[pos[i]];
+                if (i<(vloads-1))
+                    sprintf(line + strlen(line), ",");
+            }
+            sprintf(line + strlen(line), ")%s;\n", storeSuffix);
+
+            strcat(program, line);
+            total_vloads += vloads;
+        }
+        total_results++;
+        total_program_length += (int)strlen(line);
+        if (total_program_length > MAX_CODE_SIZE) {
+            aborted_due_to_size = 1;
+            done = 1;
+        }
+
+
+        if (DEBUG) log_info("line is: %s", line);
+
+        // If we did not use all of them, then we ignore any changes further to the right.
+        // We do this by causing those loops to skip on the next iteration.
+        if (vloads < DEPTH) {
+            if (DEBUG > 1) log_info("done with this depth\n");
+            for (int k=vloads; k<DEPTH; k++)
+                pos[k] = number_of_sizes;
+        }
+
+        // Increment the far right size by 1, rolling over as needed
+        for (int d=DEPTH-1; d>=0; d--) {
+            pos[d]++;
+            if (pos[d] >= number_of_sizes) {
+                pos[d] = 0;
+                if (d == 0) {
+                    // If we rolled over at the far-left then we are done
+                    done = 1;
+                    break;
+                }
+            } else {
+                break;
+            }
+        }
+        if (done)
+            break;
+
+        // Continue until we are done.
+    }
+    strcat(program, "}\n\n"); //log_info("%s\n", program);
+    total_program_length += 3;
+    if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
+                        get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
+    *number_of_results = current_result;
+    if (aborted_due_to_size)
+        return total_results;
+    return 0;
+}
+
+
+
+
+int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
+
+    char *program_source;
+    int error;
+    int total_errors = 0;
+
+    cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+    cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+    void *input_data_converted;
+    void *output_data;
+
+    int number_of_results;;
+
+    input_data_converted = malloc(sizeof(cl_double)*16);
+    program_source = (char*)malloc(sizeof(char)*1024*1024*4);
+
+    // Iterate over all the types
+    for (int type_index=0; type_index<10; type_index++) {
+    if(!gHasLong && ((vecType[type_index] == kLong)  || (vecType[type_index] == kULong)))
+    {
+      log_info("Long/ULong data type not supported on this device\n");
+      continue;
+    }
+
+        clMemWrapper input;
+
+        if (vecType[type_index] == kDouble) {
+            if (!is_extension_available(deviceID, "cl_khr_fp64")) {
+                log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+                continue;
+            }
+            log_info("Testing doubles.\n");
+        }
+
+        // Convert the data to the right format for the test.
+        memset(input_data_converted, 0xff, sizeof(cl_double)*16);
+        if (vecType[type_index] != kDouble) {
+            for (int j=0; j<16; j++) {
+                convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
+                                       kInt, 0, kRoundToEven, vecType[type_index]);
+            }
+        } else {
+            memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
+        }
+
+        input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
+                               (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
+        if (error) {
+            print_error(error, "clCreateBuffer failed");
+            total_errors++;
+            continue;
+        }
+
+        // Iterate over all the vector sizes.
+        for (int size_index=1; size_index< 5; size_index++) {
+            size_t global[] = {1,1,1};
+            int number_generated = -1;
+            int previous_number_generated = 0;
+
+            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
+            while (number_generated != 0) {
+                clMemWrapper output;
+                clKernelWrapper kernel;
+                clProgramWrapper program;
+
+                number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
+                if (number_generated != 0) {
+                    if (previous_number_generated == 0)
+                        log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
+                    log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
+                }
+
+                error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
+                if (error) {
+                    log_error("create_single_kernel_helper failed.\n");
+                    total_errors++;
+                    break;
+                }
+
+                output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                        number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
+                                        NULL, &error);
+                if (error) {
+                    print_error(error, "clCreateBuffer failed");
+                    total_errors++;
+                    break;
+                }
+
+                error = clSetKernelArg(kernel, 0, sizeof(input), &input);
+                error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
+                if (error) {
+                    print_error(error, "clSetKernelArg failed");
+                    total_errors++;
+                    break;
+                }
+
+                error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
+                if (error) {
+                    print_error(error, "clEnqueueNDRangeKernel failed");
+                    total_errors++;
+                    break;
+                }
+
+                error = clFinish(queue);
+                if (error) {
+                    print_error(error, "clFinish failed");
+                    total_errors++;
+                    break;
+                }
+
+                output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
+                if (output_data == NULL) {
+                    log_error("Failed to allocate memory for output data.\n");
+                    total_errors++;
+                    break;
+                }
+                memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
+                error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
+                                            number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
+                                            output_data, 0, NULL, NULL);
+                if (error) {
+                    print_error(error, "clEnqueueReadBuffer failed");
+                    total_errors++;
+                    free(output_data);
+                    break;
+                }
+
+                // Check the results
+                char *res = (char *)output_data;
+                char *exp = (char *)input_data_converted;
+                for (int i=0; i<number_of_results; i++) {
+                    // If they do not match, then print out why
+                    if (memcmp(input_data_converted,
+                               res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
+                               get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
+                        ) {
+                        log_error("Data failed to validate for result %d\n", i);
+
+                        // Find the line in the program that failed. This is ugly.
+                        char search[32];
+                        char found_line[1024];
+                        found_line[0]='\0';
+                        search[0]='\0';
+                        sprintf(search, "result[%d] = (", i);
+                        char *start_loc = strstr(program_source, search);
+                        if (start_loc == NULL)
+                            log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
+                        else {
+                          char *end_loc = strstr(start_loc, "\n");
+                          memcpy(&found_line, start_loc, (end_loc-start_loc));
+                          found_line[end_loc-start_loc]='\0';
+                          log_error("Failed vector line: %s\n", found_line);
+                        }
+
+                        for (int j=0; j<(int)vecSizes[size_index]; j++) {
+                            char expected_value[64];
+                            char returned_value[64];
+                            expected_value[0]='\0';
+                            returned_value[0]='\0';
+                            print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
+                            print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
+                            log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
+                                      returned_value, expected_value);
+                        }
+
+                        total_errors++;
+                    }
+                }
+                free(output_data);
+                previous_number_generated = number_generated;
+            } // number_generated != 0
+
+        } // vector sizes
+    } // vector types
+
+    free(input_data_converted);
+    free(program_source);
+
+    return total_errors;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c b/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c
new file mode 100644
index 00000000..7a7911b9
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c
@@ -0,0 +1,985 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+// Outputs debug information for stores
+#define DEBUG 0
+// Forces stores/loads to be done with offsets = tid
+#define LINEAR_OFFSETS 0
+#define NUM_LOADS    512
+
+static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+#pragma mark -------------------- vload harness --------------------------
+
+typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize );
+
+int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
+               create_vload_program_fn createFn, size_t bufferSize, MTdata d )
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 4 ];
+    const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS;
+
+    if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128;
+
+    size_t threads[ 1 ], localThreads[ 1 ];
+    clProtectedArray inBuffer( bufferSize );
+    char programSrc[ 10240 ];
+    cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ];
+    size_t numElements, typeSize, i;
+    unsigned int outVectorSize;
+
+
+    typeSize = get_explicit_type_size( type );
+    numElements = bufferSize / ( typeSize * vecSize );
+    bufferSize = numElements * typeSize * vecSize;    // To account for rounding
+
+    if (DEBUG) log_info("Testing: numLoads: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numLoads, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
+
+    // Create some random input data and random offsets to load from
+    generate_random_data( type, numElements * vecSize, d, (void *)inBuffer );
+    for( i = 0; i < numLoads; i++ )
+    {
+        offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 1, d );
+        if( offsets[ i ] < numElements - 2 )
+            alignmentOffsets[ i ] = (cl_uint)random_in_range( 0, (int)vecSize - 1, d );
+        else
+            alignmentOffsets[ i ] = 0;
+        if (LINEAR_OFFSETS) offsets[i] = (cl_uint)i;
+    }
+    if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n");
+
+    // 32-bit fixup
+    outVectorSize = vecSize;
+
+    // Declare output buffers now
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char outBuffer[ numLoads * typeSize * outVectorSize ];
+    char referenceBuffer[ numLoads * typeSize * vecSize ];
+#else
+    char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char));
+    char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char));
+#endif
+
+    // Create the program
+
+
+    createFn( programSrc, numElements, type, vecSize, outVectorSize);
+
+    // Create our kernel
+    const char *ptr = programSrc;
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+    if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
+
+    // Get the number of args to differentiate the kernels with local storage. (They have 5)
+    cl_uint numArgs;
+    error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
+    test_error( error, "clGetKernelInfo failed");
+
+    // Set up parameters
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, bufferSize, (void *)inBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(offsets[0]), offsets, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+
+    // Set parameters and run
+    if (numArgs == 5) {
+        // We need to set the size of the local storage
+        error = clSetKernelArg(kernel, 0, bufferSize, NULL);
+        test_error( error, "clSetKernelArg for buffer failed");
+        for( i = 0; i < 4; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
+            test_error( error, "Unable to set kernel argument" );
+        }
+    } else {
+        // No local storage
+        for( i = 0; i < 4; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
+            test_error( error, "Unable to set kernel argument" );
+        }
+    }
+
+    threads[ 0 ] = numLoads;
+    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
+    test_error( error, "Unable to get local thread size" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to exec kernel" );
+
+    // Get the results
+    error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+
+    // Create the reference results
+    memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char));
+    for( i = 0; i < numLoads; i++ )
+    {
+        memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize,
+               typeSize * vecSize );
+    }
+
+    // Validate the results now
+    char *expected = referenceBuffer;
+    char *actual = outBuffer;
+    char *in = (char *)(void *)inBuffer;
+
+    if (DEBUG) {
+        log_info("Memory contents:\n");
+        for (i=0; i<numElements; i++) {
+            char  inString[1024];
+            char expectedString[ 1024 ], actualString[ 1024 ];
+            if (i < numLoads) {
+                log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                         GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                         GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ),
+                         offsets[i], alignmentOffsets[i]);
+                if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*outVectorSize]), typeSize * vecSize) != 0)
+                    log_error(" << ERROR\n");
+                else
+                    log_info("\n");
+            } else {
+                log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                         GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                         GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ));
+            }
+        }
+    }
+
+    for( i = 0; i < numLoads; i++ )
+    {
+        if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
+        {
+            char expectedString[ 1024 ], actualString[ 1024 ];
+            log_error( "ERROR: Data sample %d for vload of %s%d did not validate (expected {%s}, got {%s}, loaded from offset %d)\n",
+                      (int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
+                      GetDataVectorString( actual, typeSize, vecSize, actualString ), (int)offsets[ i ] );
+            return 1;
+        }
+        expected += typeSize * vecSize;
+        actual += typeSize * outVectorSize;
+    }
+
+    return 0;
+}
+
+int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
+    const char *size_names[] = { "2", "3", "4", "8", "16"};
+    unsigned int typeIdx, sizeIdx;
+    int error = 0;
+    MTdata mtData = init_genrand( gRandomSeed );
+
+    log_info("Testing with buffer size of %d.\n", (int)bufferSize);
+
+    for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
+    {
+
+        if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+            continue;
+
+        if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
+            continue;
+
+        for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+        {
+            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
+
+            int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData );
+            if (error_this_type) {
+                error += error_this_type;
+                log_error("Failure; skipping further sizes for this type.");
+                break;
+            }
+        }
+    }
+
+    free_mtdata(mtData);
+
+    return error;
+}
+
+#pragma mark -------------------- vload test cases --------------------------
+
+void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid ] = tmp.s0;\n"
+    "   results[ 3*tid+1 ] = tmp.s1;\n"
+    "   results[ 3*tid+2 ] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName, typeName, typeName );
+    } else {
+        sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
+                (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_vloadset( device, context, queue, create_global_load_code, 10240 );
+}
+
+
+void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    //"   __local %s%d sSharedStorage[ %d ];\n"
+    "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "   int lid = get_local_id( 0 );\n"
+    "\n"
+    "    if( lid == 0 )\n"
+    "    {\n"
+    "        for( int i = 0; i < %d; i++ )\n"
+    "           sSharedStorage[ i ] = src[ i ];\n"
+    "    }\n"
+    //  Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
+    //  threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
+    //  updated on all threads at that point
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    //"   __local %s%d sSharedStorage[ %d ];\n"
+    "__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "   int lid = get_local_id( 0 );\n"
+    "\n"
+    "    if( lid == 0 )\n"
+    "    {\n"
+    "        for( int i = 0; i < %d; i++ ) {\n"
+    "           sSharedStorage[ 3*i   ] = src[ 3*i   ];\n"
+    "           sSharedStorage[ 3*i +1] = src[ 3*i +1];\n"
+    "           sSharedStorage[ 3*i +2] = src[ 3*i +2];\n"
+    "        }\n"
+    "    }\n"
+    //  Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
+    //  threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
+    //  updated on all threads at that point
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid   ] = tmp.s0;\n"
+    "   results[ 3*tid +1] = tmp.s1;\n"
+    "   results[ 3*tid +2] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble  ? doubleExtensionPragma : "",
+                "",
+                typeName, /*(int)inBufferSize,*/
+                typeName, typeName,
+                (int)inBufferSize,
+                typeName, typeName );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble  ? doubleExtensionPragma : "",
+                "",
+                typeName, (int)inVectorSize, /*(int)inBufferSize,*/
+                typeName, (int)inVectorSize, typeName, (int)outVectorSize,
+                (int)inBufferSize,
+                typeName, (int)inVectorSize, (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Determine the max size of a local buffer that we can test against
+    cl_ulong localSize;
+    int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
+    test_error( error, "Unable to get max size of local memory buffer" );
+    if( localSize > 10240 )
+        localSize = 10240;
+    if (localSize > 4096)
+        localSize -= 2048;
+    else
+        localSize /= 2;
+
+    return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize );
+}
+
+
+void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid   ] = tmp.s0;\n"
+    "   results[ 3*tid+1 ] = tmp.s1;\n"
+    "   results[ 3*tid+2 ] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName,  typeName,
+                typeName );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
+                (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Determine the max size of a local buffer that we can test against
+    cl_ulong maxSize;
+    int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL );
+    test_error( error, "Unable to get max size of constant memory buffer" );
+    if( maxSize > 10240 )
+        maxSize = 10240;
+    if (maxSize > 4096)
+        maxSize -= 2048;
+    else
+        maxSize /= 2;
+
+    return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize );
+}
+
+
+void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+{
+    const char *pattern =
+    "%s%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "#define PRIV_TYPE %s%d\n"
+    "#define PRIV_SIZE %d\n"
+    "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
+    "{\n"
+    "    __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
+    "    int tid = get_global_id( 0 );\n"
+    "\n"
+    "    for( int i = 0; i < %d; i++ )\n"
+    "      sPrivateStorage[ i ] = src[ i ];\n"
+    //    Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
+    //  anybody else to sync up
+    "\n"
+    "    %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ tid ] = tmp;\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "#define PRIV_TYPE %s\n"
+    "#define PRIV_SIZE %d\n"
+    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+    "{\n"
+    "    __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
+    "    int tid = get_global_id( 0 );\n"
+    "\n"
+    "    for( int i = 0; i < PRIV_SIZE; i++ )\n"
+    "    {\n"
+    "        sPrivateStorage[ i ] = src[ i ];\n"
+    "    }\n"
+    //    Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
+    //  anybody else to sync up
+    "\n"
+    "    %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
+    "   results[ 3*tid   ] = tmp.s0;\n"
+    "   results[ 3*tid+1 ] = tmp.s1;\n"
+    "   results[ 3*tid+2 ] = tmp.s2;\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize ==3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, 3*((int)inBufferSize),
+                typeName, typeName,
+                typeName );
+        // log_info("Src is \"\n%s\n\"\n", destBuffer);
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                "",
+                typeName, (int)inVectorSize, (int)inBufferSize,
+                typeName, (int)inVectorSize, typeName, (int)outVectorSize,
+                (int)inBufferSize,
+                typeName, (int)inVectorSize, (int)inVectorSize, typeName );
+    }
+}
+
+int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // We have no idea how much actual private storage is available, so just pick a reasonable value,
+    // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
+    return test_vloadset( device, context, queue, create_private_load_code, 256 );
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+#pragma mark -------------------- vstore harness --------------------------
+
+typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize );
+
+int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
+                create_vstore_program_fn createFn, size_t bufferSize, MTdata d )
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 3 ];
+
+    size_t threads[ 1 ], localThreads[ 1 ];
+
+    size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS;
+
+    if (DEBUG)
+        bufferSize = (bufferSize < 128) ? bufferSize : 128;
+
+    typeSize = get_explicit_type_size( type );
+    numElements = bufferSize / ( typeSize * vecSize );
+    bufferSize = numElements * typeSize * vecSize;    // To account for rounding
+    if( numStores > numElements * 2 / 3 )
+    {
+        // Note: unlike load, we have to restrict the # of stores here, since all offsets must be unique for our test
+        // (Plus, we leave some room for extra values to make sure didn't get written)
+        numStores = numElements * 2 / 3;
+        if( numStores < 1 )
+            numStores = 1;
+    }
+    if (DEBUG)
+        log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    cl_uint offsets[ numStores ];
+#else
+    cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint));
+#endif
+    char programSrc[ 10240 ];
+    size_t i;
+
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char inBuffer[ numStores * typeSize * vecSize ];
+#else
+    char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char));
+#endif
+    clProtectedArray outBuffer( numElements * typeSize * vecSize );
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char referenceBuffer[ numElements * typeSize * vecSize ];
+#else
+    char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char));
+#endif
+
+    // Create some random input data and random offsets to load from
+    generate_random_data( type, numStores * vecSize, d, (void *)inBuffer );
+
+    // Note: make sure no two offsets are the same, otherwise the output would depend on
+    // the order that threads ran in, and that would be next to impossible to verify
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    char flags[ numElements ];
+#else
+    char* flags = (char*)_malloca( numElements * sizeof(char));
+#endif
+
+    memset( flags, 0, numElements * sizeof(char) );
+    for( i = 0; i < numStores; i++ )
+    {
+        do
+        {
+            offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 2, d );    // Note: keep it one vec below the end for offset testing
+        } while( flags[ offsets[ i ] ] != 0 );
+        flags[ offsets[ i ] ] = -1;
+        if (LINEAR_OFFSETS)
+            offsets[i] = (int)i;
+    }
+    if (LINEAR_OFFSETS)
+        log_info("Offsets set to thread IDs to simplify output.\n");
+
+    createFn( programSrc, numElements, type, vecSize );
+
+    // Create our kernel
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+    test_error( error, "Unable to create testing kernel" );
+    if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
+
+    // Get the number of args to differentiate the kernels with local storage. (They have 5)
+    cl_uint numArgs;
+    error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
+    test_error( error, "clGetKernelInfo failed");
+
+    // Set up parameters
+    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error );
+    test_error( error, "Unable to create kernel stream" );
+    streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error );
+    test_error( error, "Unable to create kernel stream" );
+
+    // Set parameters and run
+    if (numArgs == 5)
+    {
+        // We need to set the size of the local storage
+        error = clSetKernelArg(kernel, 0, bufferSize, NULL);
+        test_error( error, "clSetKernelArg for buffer failed");
+        for( i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
+            test_error( error, "Unable to set kernel argument" );
+        }
+    }
+    else
+    {
+        // No local storage
+        for( i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
+            if (error)
+                log_info("%s\n", programSrc);
+            test_error( error, "Unable to set kernel argument" );
+        }
+    }
+
+    threads[ 0 ] = numStores;
+    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
+    test_error( error, "Unable to get local thread size" );
+
+    // Run in a loop, changing the address offset from 0 to ( vecSize - 1 ) each time, since
+    // otherwise stores might overlap each other, and it'd be a nightmare to test!
+    for( cl_uint addressOffset = 0; addressOffset < vecSize; addressOffset++ )
+    {
+        if (DEBUG)
+            log_info("\tstore addressOffset is %d, executing with threads %d\n", addressOffset, (int)threads[0]);
+
+        // Clear the results first
+        memset( outBuffer, 0, numElements * typeSize * vecSize );
+        error = clEnqueueWriteBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
+        test_error( error, "Unable to erase result stream" );
+
+        // Set up the new offset and run
+        if (numArgs == 5)
+            error = clSetKernelArg( kernel, 3+1, sizeof( cl_uint ), &addressOffset );
+        else
+            error = clSetKernelArg( kernel, 3, sizeof( cl_uint ), &addressOffset );
+        test_error( error, "Unable to set address offset argument" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Unable to exec kernel" );
+
+        // Get the results
+        error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
+        test_error( error, "Unable to read results" );
+
+
+        // Create the reference results
+        memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) );
+        for( i = 0; i < numStores; i++ )
+        {
+            memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize );
+        }
+
+        // Validate the results now
+        char *expected = referenceBuffer;
+        char *actual = (char *)(void *)outBuffer;
+
+        if (DEBUG)
+        {
+            log_info("Memory contents:\n");
+            for (i=0; i<numElements; i++)
+            {
+                char  inString[1024];
+                char expectedString[ 1024 ], actualString[ 1024 ];
+                if (i < numStores)
+                {
+                    log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                             GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                             GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ),
+                             offsets[i]);
+                    if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*vecSize]), typeSize * vecSize) != 0)
+                        log_error(" << ERROR\n");
+                    else
+                        log_info("\n");
+                }
+                else
+                {
+                    log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
+                             GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
+                             GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ));
+                }
+            }
+        }
+
+        for( i = 0; i < numElements; i++ )
+        {
+            if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
+            {
+                char expectedString[ 1024 ], actualString[ 1024 ];
+                log_error( "ERROR: Data sample %d for vstore of %s%d did not validate (expected {%s}, got {%s}",
+                          (int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
+                          GetDataVectorString( actual, typeSize, vecSize, actualString ) );
+                size_t j;
+                for( j = 0; j < numStores; j++ )
+                {
+                    if( offsets[ j ] == (cl_uint)i )
+                    {
+                        log_error( ", stored from store #%d (of %d, offset = %d) with address offset of %d", (int)j, (int)numStores, offsets[j], (int)addressOffset );
+                        break;
+                    }
+                }
+                if( j == numStores )
+                    log_error( ", supposed to be canary value" );
+                log_error( ")\n" );
+                return 1;
+            }
+            expected += typeSize * vecSize;
+            actual += typeSize * vecSize;
+        }
+    }
+
+    return 0;
+}
+
+int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
+    const char *size_names[] = { "2", "3", "4", "8", "16"};
+    unsigned int typeIdx, sizeIdx;
+    int error = 0;
+    MTdata d = init_genrand( gRandomSeed );
+
+    log_info("Testing with buffer size of %d.\n", (int)bufferSize);
+
+    for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
+    {
+        if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+            continue;
+
+        if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
+            continue;
+
+        for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+        {
+            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
+
+            int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d );
+            if (error_this_type)
+            {
+                log_error("Failure; skipping further sizes for this type.\n");
+                error += error_this_type;
+                break;
+            }
+        }
+    }
+
+    free_mtdata(d);
+    return error;
+}
+
+
+#pragma mark -------------------- vstore test cases --------------------------
+
+void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+{
+    const char *pattern =
+    "%s"
+    "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s"
+    "__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    "    if((tid&3) == 0) { // if \"tid\" is a multiple of 4 \n"
+    "      vstore3( srcValues[ 3*(tid>>2) ], offsets[ tid ], destBuffer + alignmentOffset );\n"
+    "    } else {\n"
+    "      vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n"
+    "    }\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, typeName, typeName);
+
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, (int)inVectorSize, typeName, (int)inVectorSize );
+    }
+    // if(inVectorSize == 3 || inVectorSize == 4) {
+    //     log_info("\n----\n%s\n----\n", destBuffer);
+    // }
+}
+
+int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    return test_vstoreset( device, context, queue, create_global_store_code, 10240 );
+}
+
+
+void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+{
+    const char *pattern =
+    "%s"
+    "\n"
+    "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n"
+    " sSharedStorage[ offsets[tid] +1 ] =  sSharedStorage[ offsets[tid] ];\n"
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    "    vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n"
+    "\n"
+    // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
+    // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  int i;\n"
+    "  __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
+    "  __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
+    "  for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+    const char *patternV3 =
+    "%s"
+    "\n"
+    "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    "    sSharedStorage[ 3*offsets[tid]   ] = (%s)0;\n"
+    "    sSharedStorage[ 3*offsets[tid] +1 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid] ];\n"
+    "    sSharedStorage[ 3*offsets[tid] +2 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid]];\n"
+    "    sSharedStorage[ 3*offsets[tid] +3 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid]];\n"
+    "    sSharedStorage[ 3*offsets[tid] +4 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid] ];\n"
+    "    sSharedStorage[ 3*offsets[tid] +5 ] =  \n"
+    "        sSharedStorage[ 3*offsets[tid]];\n"
+    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    " vstore3( vload3(tid,srcValues), offsets[ tid ], sSharedStorage  + alignmentOffset );\n"
+    "\n"
+    // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
+    // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
+    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  int i;\n"
+    "  __local %s *sp =  (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
+    "  __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
+    "  for( i = 0; i < 3; i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName,
+                typeName,
+                typeName,  typeName,
+                typeName, typeName, typeName  );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, (int)inVectorSize,
+                typeName, (int)inVectorSize, typeName, (int)inVectorSize,
+                typeName, (int)inVectorSize, typeName,
+                (int)inVectorSize, typeName, typeName,
+                typeName, typeName, typeName  );
+    }
+    // log_info(destBuffer);
+}
+
+int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // Determine the max size of a local buffer that we can test against
+    cl_ulong localSize;
+    int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
+    test_error( error, "Unable to get max size of local memory buffer" );
+    if( localSize > 10240 )
+        localSize = 10240;
+    if (localSize > 4096)
+        localSize -= 2048;
+    else
+        localSize /= 2;
+    return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize );
+}
+
+
+void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+{
+    const char *pattern =
+    "%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "\n"
+    "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    __private %s%d sPrivateStorage[ %d ];\n"
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    " sPrivateStorage[tid] = (%s%d)(%s)0;\n"
+    "\n"
+    "   vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  uint i;\n"
+    "  __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
+    "  __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
+    "  for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+
+    const char *patternV3 =
+    "%s"
+    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+    "\n"
+    "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n"
+    "{\n"
+    "    __private %s3 sPrivateStorage[ %d ];\n" // keep this %d
+    "    int tid = get_global_id( 0 );\n"
+    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
+    " sPrivateStorage[tid] = (%s3)(%s)0;\n"
+    "\n"
+
+    "   vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
+    "\n"
+    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
+    // otherwise, local threads would be overwriting results from other local threads
+    "  uint i;\n"
+    "  __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
+    "  __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
+    "  for( i = 0; i < 3; i++ ) \n"
+    "       dp[i] = sp[i];\n"
+    "}\n";
+
+    const char *typeName = get_explicit_type_name(type);
+    if(inVectorSize == 3) {
+        sprintf( destBuffer, patternV3,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName,  typeName,
+                typeName, (int)inBufferSize,
+                typeName, typeName,
+                typeName, typeName, typeName, typeName, typeName );
+    } else {
+        sprintf( destBuffer, pattern,
+                type == kDouble ? doubleExtensionPragma : "",
+                typeName, (int)inVectorSize, typeName, (int)inVectorSize,
+                typeName, (int)inVectorSize, (int)inBufferSize,
+                typeName, (int)inVectorSize, typeName,
+                (int)inVectorSize, typeName, typeName, typeName, typeName, typeName );
+    }
+}
+
+int test_vstore_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+    // We have no idea how much actual private storage is available, so just pick a reasonable value,
+    // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
+    return test_vstoreset( device, context, queue, create_private_store_code, 256 );
+}
+
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp
new file mode 100644
index 00000000..04de4e39
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp
@@ -0,0 +1,177 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+typedef struct work_item_data
+{
+    cl_uint workDim;
+    cl_uint globalSize[ 3 ];
+    cl_uint globalID[ 3 ];
+    cl_uint localSize[ 3 ];
+    cl_uint localID[ 3 ];
+    cl_uint numGroups[ 3 ];
+    cl_uint groupID[ 3 ];
+};
+
+static const char *workItemKernelCode =
+"typedef struct {\n"
+"    uint workDim;\n"
+"    uint globalSize[ 3 ];\n"
+"    uint globalID[ 3 ];\n"
+"    uint localSize[ 3 ];\n"
+"    uint localID[ 3 ];\n"
+"    uint numGroups[ 3 ];\n"
+"    uint groupID[ 3 ];\n"
+" } work_item_data;\n"
+"\n"
+"__kernel void sample_kernel( __global work_item_data *outData )\n"
+"{\n"
+"    int id = get_global_id(0);\n"
+"   outData[ id ].workDim = (uint)get_work_dim();\n"
+"    for( uint i = 0; i < get_work_dim(); i++ )\n"
+"   {\n"
+"       outData[ id ].globalSize[ i ] = (uint)get_global_size( i );\n"
+"       outData[ id ].globalID[ i ] = (uint)get_global_id( i );\n"
+"       outData[ id ].localSize[ i ] = (uint)get_local_size( i );\n"
+"       outData[ id ].localID[ i ] = (uint)get_local_id( i );\n"
+"       outData[ id ].numGroups[ i ] = (uint)get_num_groups( i );\n"
+"       outData[ id ].groupID[ i ] = (uint)get_group_id( i );\n"
+"   }\n"
+"}";
+
+#define NUM_TESTS 1
+
+int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper outData;
+    work_item_data    testData[ 10240 ];
+    size_t threads[3], localThreads[3];
+    MTdata d;
+
+
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &workItemKernelCode, "sample_kernel" );
+    test_error( error, "Unable to create testing kernel" );
+
+    outData = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( testData ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( outData ), &outData );
+    test_error( error, "Unable to set kernel arg" );
+
+    d = init_genrand( gRandomSeed );
+    for( size_t dim = 1; dim <= 3; dim++ )
+    {
+        for( int i = 0; i < NUM_TESTS; i++  )
+        {
+            size_t numItems = 1;
+            for( size_t j = 0; j < dim; j++ )
+            {
+                // All of our thread sizes should be within the max local sizes, since they're all <= 20
+                threads[ j ] = (size_t)random_in_range( 1, 20, d );
+                localThreads[ j ] = threads[ j ] / (size_t)random_in_range( 1, (int)threads[ j ], d );
+                while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) )
+                    localThreads[ j ]--;
+
+                numItems *= threads[ j ];
+
+                // Hack for now: localThreads > 1 are iffy
+                localThreads[ j ] = 1;
+            }
+            error = clEnqueueNDRangeKernel( queue, kernel, (cl_uint)dim, NULL, threads, localThreads, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            error = clEnqueueReadBuffer( queue, outData, CL_TRUE, 0, sizeof( testData ), testData, 0, NULL, NULL );
+            test_error( error, "Unable to read results" );
+
+            // Validate
+            for( size_t q = 0; q < threads[0]; q++ )
+            {
+                // We can't really validate the actual value of each one, but we can validate that they're within a sane range
+                if( testData[ q ].workDim != (cl_uint)dim )
+                {
+                    log_error( "ERROR: get_work_dim() did not return proper value for %d dimensions (expected %d, got %d)\n", (int)dim, (int)dim, (int)testData[ q ].workDim );
+                    free_mtdata(d);
+                    return -1;
+                }
+                for( size_t j = 0; j < dim; j++ )
+                {
+                    if( testData[ q ].globalSize[ j ] != (cl_uint)threads[ j ] )
+                    {
+                        log_error( "ERROR: get_global_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
+                                    (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalSize[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].globalID[ j ] < 0 || testData[ q ].globalID[ j ] >= (cl_uint)threads[ j ] )
+                    {
+                        log_error( "ERROR: get_global_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
+                                  (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalID[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].localSize[ j ] != (cl_uint)localThreads[ j ] )
+                    {
+                        log_error( "ERROR: get_local_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
+                                  (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localSize[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].localID[ j ] < 0 && testData[ q ].localID[ j ] >= (cl_uint)localThreads[ j ] )
+                    {
+                        log_error( "ERROR: get_local_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
+                                  (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localID[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    size_t groupCount = ( threads[ j ] + localThreads[ j ] - 1 ) / localThreads[ j ];
+                    if( testData[ q ].numGroups[ j ] != (cl_uint)groupCount )
+                    {
+                        log_error( "ERROR: get_num_groups(%d) did not return proper value for %d dimensions (expected %d with global dim %d and local dim %d, got %d)\n",
+                                  (int)j, (int)dim, (int)groupCount, (int)threads[ j ], (int)localThreads[ j ], (int)testData[ q ].numGroups[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                    if( testData[ q ].groupID[ j ] < 0 || testData[ q ].groupID[ j ] >= (cl_uint)groupCount )
+                    {
+                        log_error( "ERROR: get_group_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
+                                  (int)j, (int)dim, (int)groupCount, (int)testData[ q ].groupID[ j ] );
+                        free_mtdata(d);
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+
+    free_mtdata(d);
+    return 0;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_writeimage.c b/test_conformance/compatibility/test_conformance/basic/test_writeimage.c
new file mode 100644
index 00000000..26489a26
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_writeimage.c
@@ -0,0 +1,300 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *bgra8888_write_kernel_code =
+"\n"
+"__kernel void test_bgra8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+2], (float)src[indx+1], (float)src[indx+0], (float)src[indx+3]);\n"
+"    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static const char *rgba8888_write_kernel_code =
+"\n"
+"__kernel void test_rgba8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
+"    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned char *
+generate_8888_image(int w, int h, MTdata d)
+{
+    cl_uchar   *ptr = (cl_uchar *)malloc(w * h * 4);
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (cl_uchar)genrand_int32(d);
+
+    return ptr;
+}
+
+static int
+verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("WRITE_IMAGE_BGRA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("WRITE_IMAGE_BGRA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+static int
+verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("WRITE_IMAGE_RGBA_UNORM_INT8 test failed\n");
+            return -1;
+        }
+    }
+
+    log_info("WRITE_IMAGE_RGBA_UNORM_INT8 test passed\n");
+    return 0;
+}
+
+
+int test_writeimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[6];
+    cl_program program[2];
+    cl_kernel kernel[4];
+
+    unsigned char    *input_ptr[2], *output_ptr;
+    cl_image_format    img_format;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err, any_err = 0;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(unsigned char);
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr[0] = generate_8888_image(img_width, img_height, d);
+    input_ptr[1] = generate_8888_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+    output_ptr = (unsigned char*)malloc(length);
+
+    img_format.image_channel_order = CL_BGRA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_BGRA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[2] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT8;
+    streams[3] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[3])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+  streams[4] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[4])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+  streams[5] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[5])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+  err = clEnqueueWriteBuffer(queue, streams[4], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+  err = clEnqueueWriteBuffer(queue, streams[5], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_write_kernel_code, "test_bgra8888_write" );
+  if (err)
+    return -1;
+    kernel[2] = clCreateKernel(program[0], "test_bgra8888_write", NULL);
+    if (!kernel[2])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_write_kernel_code, "test_rgba8888_write" );
+  if (err)
+    return -1;
+    kernel[3] = clCreateKernel(program[1], "test_rgba8888_write", NULL);
+    if (!kernel[3])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[4], &streams[4]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[5], &streams[5]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[2], 0, sizeof streams[4], &streams[4]);
+  err |= clSetKernelArg(kernel[2], 1, sizeof streams[2], &streams[2]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[3], 0, sizeof streams[5], &streams[5]);
+  err |= clSetKernelArg(kernel[3], 1, sizeof streams[3], &streams[3]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (unsigned int)img_width;
+    threads[1] = (unsigned int)img_height;
+
+  for (i=0; i<4; i++)
+  {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+      log_error("clReadImage failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+            case 2:
+        err = verify_bgra8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
+        break;
+      case 1:
+            case 3:
+        err = verify_rgba8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
+        break;
+    }
+
+    //if (err)
+    //break;
+
+    any_err |= err;
+  }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    clReleaseMemObject(streams[4]);
+    clReleaseMemObject(streams[5]);
+  for (i=0; i<2; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseKernel(kernel[i+2]);
+    clReleaseProgram(program[i]);
+  }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return any_err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c b/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c
new file mode 100644
index 00000000..70e636cd
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c
@@ -0,0 +1,188 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+
+static const char *rgbaFFFF_write_kernel_code =
+"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static float *
+generate_float_image(int w, int h, MTdata d)
+{
+    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
+
+    return ptr;
+}
+
+static int
+verify_float_image(const char *string, float *image, float *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != image[i])
+        {
+            log_error("%s failed\n", string);
+            return -1;
+        }
+    }
+
+    log_info("%s passed\n", string);
+    return 0;
+}
+
+int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program;
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    float *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err, any_err = 0;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(float);
+    MTdata d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    input_ptr = generate_float_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (float*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_FLOAT;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
+  if (err)
+    return -1;
+    kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)img_width;
+  threads[1] = (unsigned int)img_height;
+
+    for (i=0; i<2; i++)
+    {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clExecuteKernel failed\n");
+            return -1;
+        }
+
+    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clReadImage failed\n");
+            return -1;
+        }
+
+        err = verify_float_image((i == 0) ? "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_READ_WRITE" :
+                             "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_WRITE_ONLY",
+                             input_ptr, output_ptr, img_width, img_height);
+        any_err |= err;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return any_err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/basic/test_writeimage_int16.c b/test_conformance/compatibility/test_conformance/basic/test_writeimage_int16.c
new file mode 100644
index 00000000..1f460849
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/basic/test_writeimage_int16.c
@@ -0,0 +1,194 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#include "procs.h"
+
+static const char *rgba16_write_kernel_code =
+"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
+"    color /= 65535.0f;\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+static unsigned short *
+generate_16bit_image(int w, int h, MTdata d)
+{
+    cl_ushort  *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
+    int             i;
+
+    for (i=0; i<w*h*4; i++)
+        ptr[i] = (cl_ushort)genrand_int32(d);
+
+    return ptr;
+}
+
+// normalized 16bit ints ... get dived by 64k then muled by 64k...
+// give the poor things some tolerance
+#define MAX_ERR 1
+
+static int
+verify_16bit_image(const char *string, cl_ushort *image, cl_ushort *outptr, int w, int h)
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (abs(outptr[i] - image[i]) > MAX_ERR)
+        {
+            log_error("%s failed\n", string);
+            return -1;
+        }
+    }
+
+    log_info("%s passed\n", string);
+    return 0;
+}
+
+int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[3];
+    cl_program program;
+    cl_kernel kernel[2];
+    cl_image_format    img_format;
+    cl_ushort *input_ptr, *output_ptr;
+    size_t threads[2];
+    int img_width = 512;
+    int img_height = 512;
+    int i, err, any_err = 0;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {img_width, img_height, 1};
+    size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    MTdata d = init_genrand( gRandomSeed );
+    input_ptr = generate_16bit_image(img_width, img_height, d);
+    free_mtdata(d); d = NULL;
+
+    output_ptr = (cl_ushort*)malloc(length);
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+
+    img_format.image_channel_order = CL_RGBA;
+    img_format.image_channel_data_type = CL_UNORM_INT16;
+    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("create_image_2d failed\n");
+        return -1;
+    }
+  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateArray failed\n");
+        return -1;
+    }
+
+  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
+  if (err)
+    return -1;
+    kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
+    if (!kernel[1])
+    {
+        log_error("clCreateKernel failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
+  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+  threads[0] = (unsigned int)img_width;
+  threads[1] = (unsigned int)img_height;
+
+    for (i=0; i<2; i++)
+    {
+    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clExecuteKernel failed\n");
+            return -1;
+        }
+
+    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clReadImage failed\n");
+            return -1;
+        }
+
+        err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" :
+                             "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY",
+                             input_ptr, output_ptr, img_width, img_height);
+        any_err |= err;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseProgram(program);
+    free(input_ptr);
+    free(output_ptr);
+
+    return any_err;
+}
+
+
diff --git a/test_conformance/compatibility/test_conformance/images/image_helpers.h b/test_conformance/compatibility/test_conformance/images/image_helpers.h
new file mode 100644
index 00000000..d2132c7e
--- /dev/null
+++ b/test_conformance/compatibility/test_conformance/images/image_helpers.h
@@ -0,0 +1,534 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _image_helpers_h
+#define _image_helpers_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/mt19937.h"
+#include "../../test_common/harness/rounding_mode.h"
+#include "../../test_common/harness/clImageHelper.h"
+
+extern int gTestCount;
+extern int gTestFailure;
+extern cl_device_type gDeviceType;
+
+// Number of iterations per image format to test if not testing max images, rounding, or small images
+#define NUM_IMAGE_ITERATIONS 3
+
+// Definition for our own sampler type, to mirror the cl_sampler internals
+typedef struct {
+    cl_addressing_mode addressing_mode;
+    cl_filter_mode     filter_mode;
+    bool               normalized_coords;
+} image_sampler_data;
+
+extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
+extern void print_write_header( cl_image_format *format, bool err);
+extern void print_header( cl_image_format *format, bool err );
+extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
+extern bool check_minimum_supported( cl_image_format *formatList, unsigned int numFormats, cl_mem_flags flags );
+
+cl_channel_type  get_channel_type_from_name( const char *name );
+cl_channel_order  get_channel_order_from_name( const char *name );
+int random_in_range( int minV, int maxV, MTdata d );
+int random_log_in_range( int minV, int maxV, MTdata d );
+
+typedef struct
+{
+    size_t width;
+    size_t height;
+    size_t depth;
+    size_t rowPitch;
+    size_t slicePitch;
+    size_t arraySize;
+    cl_image_format *format;
+    cl_mem buffer;
+    cl_mem_object_type type;
+} image_descriptor;
+
+typedef struct
+{
+    float p[4];
+}FloatPixel;
+
+void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
+                   size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
+                   const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format);
+extern size_t get_format_max_int( cl_image_format *format );
+
+extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
+
+extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                      void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ );
+
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     unsigned int *valuesToFind, int *outX, int *outY, int *outZ );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     int *valuesToFind, int *outX, int *outY, int *outZ );
+extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
+                                     float *valuesToFind, int *outX, int *outY, int *outZ );
+
+extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
+                            const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
+
+int has_alpha(cl_image_format *format);
+
+inline float calculate_array_index( float coord, float extent );
+
+template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                         int x, int y, int z, T *outData )
+{
+    float convert_half_to_float( unsigned short halfValue );
+
+    if ( x < 0 || x >= (int)imageInfo->width
+               || ( imageInfo->height != 0 && ( y < 0 || y >= (int)imageInfo->height ) )
+               || ( imageInfo->depth != 0 && ( z < 0 || z >= (int)imageInfo->depth ) )
+               || ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
+    {
+        // Border color
+        outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
+        if (!has_alpha(imageInfo->format))
+            outData[3] = 1;
+        return;
+    }
+
+    cl_image_format *format = imageInfo->format;
+
+    unsigned int i;
+    T tempData[ 4 ];
+
+    // Advance to the right spot
+    char *ptr = (char *)imageData;
+    size_t pixelSize = get_pixel_size( format );
+
+    ptr += z * imageInfo->slicePitch + y * imageInfo->rowPitch + x * pixelSize;
+
+    // OpenCL only supports reading floats from certain formats
+    switch( format->image_channel_data_type )
+    {
+        case CL_SNORM_INT8:
+        {
+            cl_char *dPtr = (cl_char *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SIGNED_INT8:
+        {
+            cl_char *dPtr = (cl_char *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT8:
+        {
+            cl_uchar *dPtr = (cl_uchar*)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SNORM_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_SIGNED_INT16:
+        {
+            cl_short *dPtr = (cl_short *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT16:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_HALF_FLOAT:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)convert_half_to_float( dPtr[ i ] );
+            break;
+        }
+
+        case CL_SIGNED_INT32:
+        {
+            cl_int *dPtr = (cl_int *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNSIGNED_INT32:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+
+        case CL_UNORM_SHORT_565:
+        {
+            cl_ushort *dPtr = (cl_ushort*)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+#ifdef OBSOLETE_FORMAT
+        case CL_UNORM_SHORT_565_REV:
+        {
+            unsigned short *dPtr = (unsigned short *)ptr;
+            tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_SHORT_555_REV:
+        {
+            unsigned short *dPtr = (unsigned short *)ptr;
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_INT_8888:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
+            break;
+        }
+        case CL_UNORM_INT_8888_REV:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
+            tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
+            break;
+        }
+
+        case CL_UNORM_INT_101010_REV:
+        {
+            unsigned int *dPtr = (unsigned int *)ptr;
+            tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
+            break;
+        }
+#endif
+        case CL_UNORM_SHORT_555:
+        {
+            cl_ushort *dPtr = (cl_ushort *)ptr;
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
+            break;
+        }
+
+        case CL_UNORM_INT_101010:
+        {
+            cl_uint *dPtr = (cl_uint *)ptr;
+            tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
+            tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
+            tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
+            break;
+        }
+
+        case CL_FLOAT:
+        {
+            cl_float *dPtr = (cl_float *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ];
+            break;
+        }
+#ifdef CL_SFIXED14_APPLE
+        case CL_SFIXED14_APPLE:
+        {
+            cl_float *dPtr = (cl_float *)ptr;
+            for( i = 0; i < get_format_channel_count( format ); i++ )
+                tempData[ i ] = (T)dPtr[ i ] + 0x4000;
+            break;
+        }
+#endif
+    }
+
+
+    outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
+    outData[ 3 ] = 1;
+
+    if( format->image_channel_order == CL_A )
+    {
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_R   )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_Rx   )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_RA )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 1 ];
+    }
+    else if( format->image_channel_order == CL_RG  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+    }
+    else if( format->image_channel_order == CL_RGx  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+    }
+    else if( format->image_channel_order == CL_RGB  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+    }
+    else if( format->image_channel_order == CL_RGBx  )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+    }
+    else if( format->image_channel_order == CL_RGBA )
+    {
+        outData[ 0 ] = tempData[ 0 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 2 ];
+        outData[ 3 ] = tempData[ 3 ];
+    }
+    else if( format->image_channel_order == CL_ARGB )
+    {
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_BGRA )
+    {
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 3 ];
+    }
+    else if( format->image_channel_order == CL_INTENSITY )
+    {
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = tempData[ 0 ];
+    }
+    else if( format->image_channel_order == CL_LUMINANCE )
+    {
+        outData[ 1 ] = tempData[ 0 ];
+        outData[ 2 ] = tempData[ 0 ];
+    }
+#ifdef CL_1RGB_APPLE
+    else if( format->image_channel_order == CL_1RGB_APPLE )
+    {
+        outData[ 0 ] = tempData[ 1 ];
+        outData[ 1 ] = tempData[ 2 ];
+        outData[ 2 ] = tempData[ 3 ];
+        outData[ 3 ] = 0xff;
+    }
+#endif
+#ifdef CL_BGR1_APPLE
+    else if( format->image_channel_order == CL_BGR1_APPLE )
+    {
+        outData[ 0 ] = tempData[ 2 ];
+        outData[ 1 ] = tempData[ 1 ];
+        outData[ 2 ] = tempData[ 0 ];
+        outData[ 3 ] = 0xff;
+    }
+#endif
+    else
+    {
+        log_error("Invalid format:");
+        print_header(format, true);
+    }
+}
+
+// Stupid template rules
+bool get_integer_coords( float x, float y, float z,
+                        size_t width, size_t height, size_t depth,
+                        image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                        int &outX, int &outY, int &outZ );
+bool get_integer_coords_offset( float x, float y, float z,
+                               float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                               size_t width, size_t height, size_t depth,
+                               image_sampler_data *imageSampler, image_descriptor *imageInfo,
+                               int &outX, int &outY, int &outZ );
+
+
+template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
+                                                  float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                                  image_sampler_data *imageSampler, T *outData )
+{
+    int iX, iY, iZ;
+
+    float max_w = imageInfo->width;
+    float max_h;
+    float max_d;
+
+    switch (imageInfo->type) {
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            max_h = imageInfo->arraySize;
+            max_d = 0;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            max_h = imageInfo->height;
+            max_d = imageInfo->arraySize;
+            break;
+        default:
+            max_h = imageInfo->height;
+            max_d = imageInfo->depth;
+            break;
+    }
+
+    get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
+
+    read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData );
+}
+
+
+template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, image_sampler_data *imageSampler, T *outData )
+{
+    return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
+}
+
+FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                    float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+
+FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
+                                           float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
+                                           image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
+
+
+extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
+extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results,  float *errors );
+
+extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d );
+
+// deprecated
+// extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
+
+extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
+extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
+extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
+extern int issubnormal(float);
+
+
+#define errMax( _x , _y )       ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
+
+static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
+{
+    return y > x ? y - x : x - y;
+}
+
+static inline cl_uint abs_diff_int( cl_int x, cl_int y )
+{
+    return (cl_uint) (y > x ? y - x : x - y);
+}
+
+static inline cl_float relative_error( float test, float expected )
+{
+    // 0-0/0 is 0 in this case, not NaN
+    if( test == 0.0f && expected == 0.0f )
+        return 0.0f;
+
+    return (test - expected) / expected;
+}
+
+extern float random_float(float low, float high);
+
+class CoordWalker
+{
+public:
+    CoordWalker( void * coords, bool useFloats, size_t vecSize );
+    ~CoordWalker();
+
+    cl_float    Get( size_t idx, size_t el );
+
+protected:
+    cl_float * mFloatCoords;
+    cl_int * mIntCoords;
+    size_t    mVecSize;
+};
+
+extern int  DetectFloatToHalfRoundingMode( cl_command_queue );  // Returns CL_SUCCESS on success
+
+int inline is_half_nan( cl_ushort half ){ return (half & 0x7fff) > 0x7c00; }
+
+cl_ushort convert_float_to_half( cl_float f );
+cl_float  convert_half_to_float( cl_ushort h );
+
+
+#endif // _image_helpers_h
+
+
diff --git a/test_conformance/compiler/CMakeLists.txt b/test_conformance/compiler/CMakeLists.txt
new file mode 100644
index 00000000..861ae083
--- /dev/null
+++ b/test_conformance/compiler/CMakeLists.txt
@@ -0,0 +1,25 @@
+set(MODULE_NAME COMPILER)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_build_helpers.c
+    test_compile.c
+    test_async_build.c
+    test_build_options.cpp
+    test_preprocessor.c
+    test_image_macro.c
+    test_compiler_defines_for_extensions.cpp
+    test_pragma_unroll.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/os_helpers.cpp
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/compiler/Jamfile b/test_conformance/compiler/Jamfile
new file mode 100644
index 00000000..d1324298
--- /dev/null
+++ b/test_conformance/compiler/Jamfile
@@ -0,0 +1,27 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_compiler
+    : main.c
+     test_async_build.c
+     test_build_helpers.c
+     test_build_options.cpp
+     test_compile.c
+     test_preprocessor.c
+     test_pragma_unroll.c
+   ;
+
+install dist
+    : test_compiler 
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/compiler
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/compiler
+    ;
+
+install data
+    : includeTestDirectory/testIncludeFile.h
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/compiler/includeTestDirectory
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/compiler/includeTestDirectory
+    ;
diff --git a/test_conformance/compiler/Makefile b/test_conformance/compiler/Makefile
new file mode 100644
index 00000000..aab43bf8
--- /dev/null
+++ b/test_conformance/compiler/Makefile
@@ -0,0 +1,51 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		  test_build_helpers.c \
+		  test_compile.c \
+		  test_compiler_defines_for_extensions.cpp \
+		  test_async_build.c \
+		  test_build_options.cpp \
+		  test_preprocessor.c \
+          test_image_macro.c \
+          test_pragma_unroll.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/kernelHelpers.c \
+		  ../../test_common/harness/typeWrappers.cpp \
+                  ../../test_common/harness/mt19937.c \
+      ../../test_common/harness/os_helpers.cpp \
+		  ../../test_common/harness/conversions.c
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_compiler
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/compiler/includeTestDirectory/testIncludeFile.h b/test_conformance/compiler/includeTestDirectory/testIncludeFile.h
new file mode 100644
index 00000000..d8684fb4
--- /dev/null
+++ b/test_conformance/compiler/includeTestDirectory/testIncludeFile.h
@@ -0,0 +1,16 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define HEADER_FOUND 12
diff --git a/test_conformance/compiler/main.c b/test_conformance/compiler/main.c
new file mode 100644
index 00000000..a97b5829
--- /dev/null
+++ b/test_conformance/compiler/main.c
@@ -0,0 +1,168 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn    basefn_list[] = {
+    test_load_program_source,
+    test_load_multistring_source,
+    test_load_two_kernel_source,
+    test_load_null_terminated_source,
+    test_load_null_terminated_multi_line_source,
+    test_load_null_terminated_partial_multi_line_source,
+    test_load_discreet_length_source,
+    test_get_program_source,
+    test_get_program_build_info,
+    test_get_program_info,
+
+    test_large_compile,
+    test_async_build_pieces,
+
+    test_options_optimizations,
+    test_options_build_macro,
+    test_options_build_macro_existence,
+    test_options_include_directory,
+    test_options_denorm_cache,
+
+    test_preprocessor_define_udef,
+    test_preprocessor_include,
+    test_preprocessor_line_error,
+    test_preprocessor_pragma,
+
+    test_compiler_defines_for_extensions,
+    test_image_macro,
+
+    test_simple_compile_only,
+    test_simple_static_compile_only,
+    test_simple_extern_compile_only,
+    test_simple_compile_with_callback,
+    test_simple_embedded_header_compile,
+    test_simple_link_only,
+    test_two_file_regular_variable_access,
+    test_two_file_regular_struct_access,
+    test_two_file_regular_function_access,
+    test_simple_link_with_callback,
+    test_simple_embedded_header_link,
+    test_execute_after_simple_compile_and_link,
+    test_execute_after_simple_compile_and_link_no_device_info,
+    test_execute_after_simple_compile_and_link_with_defines,
+    test_execute_after_simple_compile_and_link_with_callbacks,
+    test_execute_after_simple_library_with_link,
+    test_execute_after_two_file_link,
+    test_execute_after_embedded_header_link,
+    test_execute_after_included_header_link,
+    test_execute_after_serialize_reload_object,
+    test_execute_after_serialize_reload_library,
+    test_simple_library_only,
+    test_simple_library_with_callback,
+    test_simple_library_with_link,
+    test_two_file_link,
+    test_multi_file_libraries,
+    test_multiple_files,
+    test_multiple_libraries,
+    test_multiple_files_multiple_libraries,
+    test_multiple_embedded_headers,
+
+    test_program_binary_type,
+    test_compile_and_link_status_options_log,
+
+    test_pragma_unroll
+};
+
+
+const char    *basefn_names[] = {
+    "load_program_source",
+    "load_multistring_source",
+    "load_two_kernel_source",
+    "load_null_terminated_source",
+    "load_null_terminated_multi_line_source",
+    "load_null_terminated_partial_multi_line_source",
+    "load_discreet_length_source",
+    "get_program_source",
+    "get_program_build_info",
+    "get_program_info",
+
+    "large_compile",
+    "async_build",
+
+    "options_build_optimizations",
+    "options_build_macro",
+    "options_build_macro_existence",
+    "options_include_directory",
+    "options_denorm_cache",
+
+    "preprocessor_define_udef",
+    "preprocessor_include",
+    "preprocessor_line_error",
+    "preprocessor_pragma",
+
+    "compiler_defines_for_extensions",
+    "image_macro",
+
+    "simple_compile_only",
+    "simple_static_compile_only",
+    "simple_extern_compile_only",
+    "simple_compile_with_callback",
+    "simple_embedded_header_compile",
+    "simple_link_only",
+    "two_file_regular_variable_access",
+    "two_file_regular_struct_access",
+    "two_file_regular_function_access",
+    "simple_link_with_callback",
+    "simple_embedded_header_link",
+    "execute_after_simple_compile_and_link",
+    "execute_after_simple_compile_and_link_no_device_info",
+    "execute_after_simple_compile_and_link_with_defines",
+    "execute_after_simple_compile_and_link_with_callbacks",
+    "execute_after_simple_library_with_link",
+    "execute_after_two_file_link",
+    "execute_after_embedded_header_link",
+    "execute_after_included_header_link",
+    "execute_after_serialize_reload_object",
+    "execute_after_serialize_reload_library",
+    "simple_library_only",
+    "simple_library_with_callback",
+    "simple_library_with_link",
+    "two_file_link",
+    "multi_file_libraries",
+    "multiple_files",
+    "multiple_libraries",
+    "multiple_files_multiple_libraries",
+    "multiple_embedded_headers",
+    "program_binary_type",
+    "compile_and_link_status_options_log",
+
+    "pragma_unroll",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/compiler/procs.h b/test_conformance/compiler/procs.h
new file mode 100644
index 00000000..54a93db3
--- /dev/null
+++ b/test_conformance/compiler/procs.h
@@ -0,0 +1,88 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+extern int        test_load_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_multistring_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_two_kernel_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_null_terminated_partial_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_load_discreet_length_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_get_program_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_large_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_async_build_pieces(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_options_optimizations(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_build_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_build_macro_existence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_include_directory(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_options_denorm_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_preprocessor_define_udef(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_include(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_line_error(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_preprocessor_pragma(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_compiler_defines_for_extensions(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
+extern int test_image_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_simple_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_static_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_embedded_header_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_simple_link_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_variable_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_struct_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_regular_function_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_no_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_included_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_simple_library_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_library_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multi_file_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_files(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_files_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_program_binary_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_compile_and_link_status_options_log(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_pragma_unroll(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
\ No newline at end of file
diff --git a/test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h b/test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h
new file mode 100644
index 00000000..ef795d67
--- /dev/null
+++ b/test_conformance/compiler/secondIncludeTestDirectory/testIncludeFile.h
@@ -0,0 +1,16 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define HEADER_FOUND 42
diff --git a/test_conformance/compiler/testBase.h b/test_conformance/compiler/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/compiler/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/compiler/test_async_build.c b/test_conformance/compiler/test_async_build.c
new file mode 100644
index 00000000..e32c64ab
--- /dev/null
+++ b/test_conformance/compiler/test_async_build.c
@@ -0,0 +1,94 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+
+const char *sample_async_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+volatile int       buildNotificationSent;
+
+void CL_CALLBACK test_notify_build_complete( cl_program program, void *userData )
+{
+    if( userData == NULL || strcmp( (char *)userData, "userData" ) != 0 )
+    {
+        log_error( "ERROR: User data passed in to build notify function was not correct!\n" );
+        buildNotificationSent = -1;
+    }
+    else
+        buildNotificationSent = 1;
+    log_info( "\n   <-- program successfully built\n" );
+}
+
+int test_async_build_pieces(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_build_status status;
+
+
+    buildNotificationSent = 0;
+
+    /* First, test by doing the slow method of the individual calls */
+    error = create_single_kernel_helper_create_program(context, &program, 1, sample_async_kernel);
+    test_error(error, "Unable to create program from source");
+
+    /* Compile the program */
+    error = clBuildProgram( program, 1, &deviceID, NULL, test_notify_build_complete, (void *)"userData" );
+    test_error( error, "Unable to build program source" );
+
+    /* Wait for build to complete (just keep polling, since we're just a test */
+    if( ( error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ) ) != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get program build status" );
+        return -1;
+    }
+    while( (int)status == CL_BUILD_IN_PROGRESS )
+    {
+        log_info( "\n  -- still waiting for build... (status is %d)", status );
+        sleep( 1 );
+        error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+        test_error( error, "Unable to get program build status" );
+    }
+
+    if( status != CL_BUILD_SUCCESS )
+    {
+        log_error( "ERROR: build failed! (status: %d)\n", (int)status );
+        return -1;
+    }
+
+    if( buildNotificationSent == 0 )
+    {
+        log_error( "ERROR: Async build completed, but build notification was not sent!\n" );
+        return -1;
+    }
+
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+
diff --git a/test_conformance/compiler/test_build_helpers.c b/test_conformance/compiler/test_build_helpers.c
new file mode 100644
index 00000000..0fcb7103
--- /dev/null
+++ b/test_conformance/compiler/test_build_helpers.c
@@ -0,0 +1,571 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/parseParameters.h"
+
+const char *sample_kernel_code_single_line[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n" };
+
+size_t sample_single_line_lengths[1];
+
+const char *sample_kernel_code_multi_line[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)",
+"{",
+"    int  tid = get_global_id(0);",
+"",
+"    dst[tid] = (int)src[tid];",
+"",
+"}" };
+
+size_t sample_multi_line_lengths[ 7 ];
+
+const char *sample_kernel_code_two_line[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (int)src[tid];\n"
+"\n"
+"}\n",
+"__kernel void sample_test2(__global int *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)src[tid];\n"
+"\n"
+"}\n" };
+
+size_t sample_two_line_lengths[1];
+
+const char *sample_kernel_code_bad_multi_line[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)",
+"{",
+"    int  tid = get_global_id(0);thisisanerror",
+"",
+"    dst[tid] = (int)src[tid];",
+"",
+"}" };
+
+size_t sample_bad_multi_line_lengths[ 7 ];
+
+
+int test_load_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    size_t length;
+    char *buffer;
+
+    /* Preprocess: calc the length of each source file line */
+    sample_single_line_lengths[ 0 ] = strlen( sample_kernel_code_single_line[ 0 ] );
+
+    /* New OpenCL API only has one entry point, so go ahead and just try it */
+    program = clCreateProgramWithSource( context, 1, sample_kernel_code_single_line, sample_single_line_lengths, &error);
+    test_error( error, "Unable to create reference program" );
+
+    /* Now get the source and compare against our original */
+    error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, NULL, NULL, &length );
+    test_error( error, "Unable to get length of first program source" );
+
+    // Note: according to spec section 5.4.5, the length returned should include the null terminator
+    if( length != sample_single_line_lengths[0] + 1 )
+    {
+        log_error( "ERROR: Length of program (%ld) does not match reference length (%ld)!\n", length, sample_single_line_lengths[0] + 1  );
+        return -1;
+    }
+
+    buffer = (char *)malloc( length );
+    error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, length, buffer, NULL );
+    test_error( error, "Unable to get buffer of first program source" );
+
+    if( strcmp( (char *)buffer, sample_kernel_code_single_line[ 0 ] ) != 0 )
+    {
+        log_error( "ERROR: Program sources do not match!\n" );
+        return -1;
+    }
+
+    /* All done */
+    free( buffer );
+
+    return 0;
+}
+
+int test_load_multistring_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+
+    int i;
+
+
+    /* Preprocess: calc the length of each source file line */
+    for( i = 0; i < 7; i++ )
+    {
+        sample_multi_line_lengths[ i ] = strlen( sample_kernel_code_multi_line[ i ] );
+    }
+
+    /* Create another program using the macro function */
+    program = clCreateProgramWithSource( context, 7, sample_kernel_code_multi_line, sample_multi_line_lengths, &error );
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create reference program!\n" );
+        return -1;
+    }
+
+    /* Try compiling */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build multi-line program source" );
+
+    /* Should probably check binary here to verify the same results... */
+
+    /* All done! */
+
+    return 0;
+}
+
+int test_load_two_kernel_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    int i;
+
+
+    /* Preprocess: calc the length of each source file line */
+    for( i = 0; i < 2; i++ )
+    {
+        sample_two_line_lengths[ i ] = strlen( sample_kernel_code_two_line[ i ] );
+    }
+
+    /* Now create a program using the macro function */
+    program = clCreateProgramWithSource( context, 2, sample_kernel_code_two_line, sample_two_line_lengths, &error );
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create two-kernel program!\n" );
+        return -1;
+    }
+
+    /* Try compiling */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build two-kernel program source" );
+
+    /* Should probably check binary here to verify the same results... */
+
+    /* All done! */
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_load_null_terminated_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+
+    /* Now create a program using the macro function */
+    program = clCreateProgramWithSource( context, 1, sample_kernel_code_single_line, NULL, &error );
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create null-terminated program!" );
+        return -1;
+    }
+
+    /* Try compiling */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build null-terminated program source" );
+
+    /* Should probably check binary here to verify the same results... */
+
+    /* All done! */
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_load_null_terminated_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+
+    /* Now create a program using the macro function */
+    program = clCreateProgramWithSource( context, 7, sample_kernel_code_multi_line, NULL, &error );
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create null-terminated program!" );
+        return -1;
+    }
+
+    /* Try compiling */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build null-terminated program source" );
+
+    /* Should probably check binary here to verify the same results... */
+
+    /* All done! */
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+
+int test_load_discreet_length_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    int i;
+
+
+    /* Preprocess: calc the length of each source file line */
+    for( i = 0; i < 7; i++ )
+    {
+        sample_bad_multi_line_lengths[ i ] = strlen( sample_kernel_code_bad_multi_line[ i ] );
+    }
+
+    /* Now force the length of the third line to skip the actual error */
+    sample_bad_multi_line_lengths[2] -= strlen("thisisanerror");
+
+    /* Now create a program using the macro function */
+    program = clCreateProgramWithSource( context, 7, sample_kernel_code_bad_multi_line, sample_bad_multi_line_lengths, &error );
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create null-terminated program!" );
+        return -1;
+    }
+
+    /* Try compiling */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build null-terminated program source" );
+
+    /* Should probably check binary here to verify the same results... */
+
+    /* All done! */
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_load_null_terminated_partial_multi_line_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    int i;
+
+    /* Preprocess: calc the length of each source file line */
+    for( i = 0; i < 7; i++ )
+    {
+        if( i & 0x01 )
+            sample_multi_line_lengths[ i ] = 0; /* Should force for null-termination on this line only */
+        else
+            sample_multi_line_lengths[ i ] = strlen( sample_kernel_code_multi_line[ i ] );
+    }
+
+    /* Now create a program using the macro function */
+    program = clCreateProgramWithSource( context, 7, sample_kernel_code_multi_line, sample_multi_line_lengths, &error );
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create null-terminated program!" );
+        return -1;
+    }
+
+    /* Try compiling */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build null-terminated program source" );
+
+    /* Should probably check binary here to verify the same results... */
+
+    /* All done! */
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_get_program_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_device_id device1;
+    cl_context context1;
+    size_t paramSize;
+    cl_uint numInstances;
+
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, sample_kernel_code_single_line);
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create reference program!\n" );
+        return -1;
+    }
+
+    /* Test that getting the device works. */
+    device1 = (cl_device_id)0xbaadfeed;
+    error = clGetProgramInfo( program, CL_PROGRAM_DEVICES, sizeof( device1 ), &device1, NULL );
+    test_error( error, "Unable to get device of program" );
+
+  /* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
+  cl_uint device1_vid, deviceID_vid;
+  error = clGetDeviceInfo(device1, CL_DEVICE_VENDOR_ID, sizeof(device1_vid), &device1_vid, NULL );
+  test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
+  test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
+
+    if( device1_vid != deviceID_vid )
+    {
+        log_error( "ERROR: Incorrect device returned for program! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, device1_vid );
+        return -1;
+    }
+
+    cl_uint devCount;
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, sizeof( devCount ), &devCount, NULL );
+    test_error( error, "Unable to get device count of program" );
+
+    if( devCount != 1 )
+    {
+        log_error( "ERROR: Invalid device count returned for program! (Expected 1, got %d)\n", (int)devCount );
+        return -1;
+    }
+
+    context1 = (cl_context)0xbaadfeed;
+    error = clGetProgramInfo( program, CL_PROGRAM_CONTEXT, sizeof( context1 ), &context1, NULL );
+    test_error( error, "Unable to get device of program" );
+
+    if( context1 != context )
+    {
+        log_error( "ERROR: Invalid context returned for program! (Expected %p, got %p)\n", context, context1 );
+        return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
+    test_error( error, "Unable to get instance count" );
+
+    /* While we're at it, test the sizes of programInfo too */
+    error = clGetProgramInfo( program, CL_PROGRAM_DEVICES, NULL, NULL, &paramSize );
+    test_error( error, "Unable to get device param size" );
+    if( paramSize != sizeof( cl_device_id ) )
+    {
+        log_error( "ERROR: Size returned for device is wrong!\n" );
+        return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_CONTEXT, NULL, NULL, &paramSize );
+    test_error( error, "Unable to get context param size" );
+    if( paramSize != sizeof( cl_context ) )
+    {
+        log_error( "ERROR: Size returned for context is wrong!\n" );
+        return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_REFERENCE_COUNT, NULL, NULL, &paramSize );
+    test_error( error, "Unable to get instance param size" );
+    if( paramSize != sizeof( cl_uint ) )
+    {
+        log_error( "ERROR: Size returned for num instances is wrong!\n" );
+        return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, NULL, NULL, &paramSize );
+    test_error( error, "Unable to get device count param size" );
+    if( paramSize != sizeof( cl_uint ) )
+    {
+        log_error( "ERROR: Size returned for device count is wrong!\n" );
+        return -1;
+    }
+
+    clReleaseProgram( program );
+
+    return 0;
+}
+
+int test_get_program_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program program;
+    int error;
+    char buffer[10240];
+    size_t length;
+
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, sample_kernel_code_single_line);
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create test program!\n" );
+        return -1;
+    }
+
+    /* Try getting the length */
+    error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, NULL, NULL, &length );
+    test_error( error, "Unable to get program source length" );
+    if (length != strlen(sample_kernel_code_single_line[0]) + 1 && !(gOfflineCompiler && gOfflineCompilerOutputType != kSource))
+    {
+        log_error( "ERROR: Length returned for program source is incorrect!\n" );
+        return -1;
+    }
+
+    /* Try normal source */
+    error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, sizeof( buffer ), buffer, NULL );
+    test_error( error, "Unable to get program source" );
+    if (strlen(buffer) != strlen(sample_kernel_code_single_line[0]) && !(gOfflineCompiler && gOfflineCompilerOutputType != kSource))
+    {
+        log_error( "ERROR: Length of program source is incorrect!\n" );
+        return -1;
+    }
+
+    /* Try both at once */
+    error = clGetProgramInfo( program, CL_PROGRAM_SOURCE, sizeof( buffer ), buffer, &length );
+    test_error( error, "Unable to get program source" );
+    if (strlen(buffer) != strlen(sample_kernel_code_single_line[0]) && !(gOfflineCompiler && gOfflineCompilerOutputType != kSource))
+    {
+        log_error( "ERROR: Length of program source is incorrect!\n" );
+        return -1;
+    }
+    if (length != strlen(sample_kernel_code_single_line[0]) + 1 && !(gOfflineCompiler && gOfflineCompilerOutputType != kSource))
+    {
+        log_error( "ERROR: Returned length of program source is incorrect!\n" );
+        return -1;
+    }
+
+    /* if we got here, everything passed */
+    clReleaseProgram( program );
+    return 0;
+}
+
+int test_get_program_build_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_program program;
+    int error;
+    char *buffer;
+    size_t length, newLength;
+    cl_build_status status;
+
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, sample_kernel_code_single_line);
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create test program!\n" );
+        return -1;
+    }
+
+    /* Make sure getting the length works */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, 0, NULL, &length );
+    test_error( error, "Unable to get program build status length" );
+    if( length != sizeof( status ) )
+    {
+        log_error( "ERROR: Returned length of program build status is invalid! (Expected %d, got %d)\n", (int)sizeof( status ), (int)length );
+        return -1;
+    }
+
+    /* Now actually build it and verify the status */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build program source" );
+
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get program build status" );
+    if( status != CL_BUILD_SUCCESS )
+    {
+        log_error( "ERROR: Getting built program build status did not return CL_BUILD_SUCCESS! (%d)\n", (int)status );
+        return -1;
+    }
+
+    /***** Build log *****/
+
+    /* Try getting the length */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
+    test_error( error, "Unable to get program build log length" );
+
+  log_info("Build log is %ld long.\n", length);
+
+  buffer = (char*)malloc(length);
+
+    /* Try normal source */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, length, buffer, NULL );
+    test_error( error, "Unable to get program build log" );
+
+  if( buffer[length-1] != '\0' )
+  {
+        log_error( "clGetProgramBuildInfo overwrote allocated space for build log! '%c'\n", buffer[length-1]  );
+        return -1;
+  }
+
+    /* Try both at once */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, length, buffer, &newLength );
+    test_error( error, "Unable to get program build log" );
+
+  free(buffer);
+
+
+    /***** Build options *****/
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &length );
+    test_error( error, "Unable to get program build options length" );
+
+    buffer = (char*)malloc(length);
+
+    /* Try normal source */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, length, buffer, NULL );
+    test_error( error, "Unable to get program build options" );
+
+    /* Try both at once */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, length, buffer, &newLength );
+    test_error( error, "Unable to get program build options" );
+
+    free(buffer);
+
+  /* Try with a valid option */
+  clReleaseProgram( program );
+    program = clCreateProgramWithSource( context, 1, sample_kernel_code_single_line, NULL, &error );
+    if( program == NULL )
+    {
+        log_error( "ERROR: Unable to create test program!\n" );
+        return -1;
+    }
+
+    error = clBuildProgram( program, 1, &deviceID, "-cl-opt-disable", NULL, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Building with valid options failed!" );
+        return -1;
+    }
+
+  error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, NULL, NULL, &length );
+    test_error( error, "Unable to get program build options" );
+
+  buffer = (char*)malloc(length);
+
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, length, buffer, NULL );
+    test_error( error, "Unable to get program build options" );
+    if( strcmp( (char *)buffer, "-cl-opt-disable" ) != 0 )
+    {
+        log_error( "ERROR: Getting program build options for program with -cl-opt-disable build options did not return expected value (got %s)\n", buffer );
+        return -1;
+    }
+
+  free(buffer);
+
+    /* if we got here, everything passed */
+    clReleaseProgram( program );
+    return 0;
+}
+
+
+
+
diff --git a/test_conformance/compiler/test_build_options.cpp b/test_conformance/compiler/test_build_options.cpp
new file mode 100644
index 00000000..791eafd9
--- /dev/null
+++ b/test_conformance/compiler/test_build_options.cpp
@@ -0,0 +1,410 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/os_helpers.h"
+
+const char *preprocessor_test_kernel[] = {
+"__kernel void sample_test(__global int *dst)\n"
+"{\n"
+"    dst[0] = TEST_MACRO;\n"
+"\n"
+"}\n" };
+
+const char *preprocessor_existence_test_kernel[] = {
+    "__kernel void sample_test(__global int *dst)\n"
+    "{\n"
+    "#ifdef TEST_MACRO\n"
+    "    dst[0] = 42;\n"
+    "#else\n"
+    "    dst[0] = 24;\n"
+    "#endif\n"
+    "\n"
+    "}\n" };
+
+const char *include_test_kernel[] = {
+"#include \"./testIncludeFile.h\"\n"
+"__kernel void sample_test(__global int *dst)\n"
+"{\n"
+"    dst[0] = HEADER_FOUND;\n"
+"\n"
+"}\n" };
+
+const char *options_test_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    size_t tid = get_global_id(0);\n"
+"    dst[tid] = src[tid];\n"
+"}\n" };
+
+const char *optimization_options[] = {
+    "-cl-single-precision-constant",
+    "-cl-denorms-are-zero",
+    "-cl-opt-disable",
+    "-cl-mad-enable",
+    "-cl-no-signed-zeros",
+    "-cl-unsafe-math-optimizations",
+    "-cl-finite-math-only",
+    "-cl-fast-relaxed-math",
+    "-w",
+    "-Werror",
+#if defined( __APPLE__ )
+    "-cl-opt-enable",
+    "-cl-auto-vectorize-enable"
+#endif
+    };
+
+cl_int get_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_int *outValue )
+{
+    cl_int error;
+    clKernelWrapper kernel = clCreateKernel( program, "sample_test", &error );
+    test_error( error, "Unable to create kernel from program" );
+
+    clMemWrapper outStream;
+    outStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &error );
+    test_error( error, "Unable to create test buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
+    test_error( error, "Unable to set kernel argument" );
+
+    size_t threads[1] = { 1 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = clEnqueueReadBuffer( queue, outStream, true, 0, sizeof( cl_int ), outValue, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    return CL_SUCCESS;
+}
+
+int test_options_optimizations(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_build_status status;
+
+    for(size_t i = 0; i < sizeof(optimization_options) / (sizeof(char*)); i++) {
+
+        clProgramWrapper program;
+        error = create_single_kernel_helper_create_program(context, &program, 1, options_test_kernel, optimization_options[i]);
+        if( program == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create reference program!\n" );
+            return -1;
+        }
+
+        /* Build with the macro defined */
+        log_info("Testing optimization option '%s'\n", optimization_options[i]);
+        error = clBuildProgram( program, 1, &deviceID, optimization_options[i], NULL, NULL );
+        test_error( error, "Test program did not properly build" );
+
+        error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+        test_error( error, "Unable to get program build status" );
+
+        if( (int)status != CL_BUILD_SUCCESS )
+        {
+            log_info("Building with optimization option '%s' failed to compile!\n", optimization_options[i]);
+            print_error( error, "Failed to build with optimization defined")
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int test_options_build_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+    cl_build_status status;
+
+
+    program = clCreateProgramWithSource( context, 1, preprocessor_test_kernel, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create reference program!\n" );
+        return -1;
+    }
+
+    /* Build with the macro defined */
+    error = clBuildProgram( program, 1, &deviceID, "-DTEST_MACRO=1 ", NULL, NULL );
+    test_error( error, "Test program did not properly build" );
+
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get program build status" );
+
+    if( (int)status != CL_BUILD_SUCCESS )
+    {
+        print_error( error, "Failed to build with macro defined" );
+        return -1;
+    }
+
+
+    // Go ahead and run the program to verify results
+    cl_int firstResult, secondResult;
+
+    error = get_result_from_program( context, queue, program, &firstResult );
+    test_error( error, "Unable to get result from first program" );
+
+    if( firstResult != 1 )
+    {
+        log_error( "ERROR: Result from first program did not validate! (Expected 1, got %d)\n", firstResult );
+        return -1;
+    }
+
+    // Rebuild with a different value for the define macro, to make sure caching behaves properly
+    error = clBuildProgram( program, 1, &deviceID, "-DTEST_MACRO=5 ", NULL, NULL );
+    test_error( error, "Test program did not properly rebuild" );
+
+    error = get_result_from_program( context, queue, program, &secondResult );
+    test_error( error, "Unable to get result from second program" );
+
+    if( secondResult != 5 )
+    {
+        if( secondResult == firstResult )
+            log_error( "ERROR: Program result did not change with device macro change (program was not recompiled)!\n" );
+        else
+            log_error( "ERROR: Result from second program did not validate! (Expected 5, got %d)\n", secondResult );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_options_build_macro_existence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    clProgramWrapper program;
+
+
+    // In this case, the program should still run without the macro, but it should give a different result
+    program = clCreateProgramWithSource( context, 1, preprocessor_existence_test_kernel, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create reference program!\n" );
+        return -1;
+    }
+
+    /* Build without the macro defined */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Test program did not properly build" );
+
+    // Go ahead and run the program to verify results
+    cl_int firstResult, secondResult;
+
+    error = get_result_from_program( context, queue, program, &firstResult );
+    test_error( error, "Unable to get result from first program" );
+
+    if( firstResult != 24 )
+    {
+        log_error( "ERROR: Result from first program did not validate! (Expected 24, got %d)\n", firstResult );
+        return -1;
+    }
+
+    // Now compile again with the macro defined and verify a change in results
+    error = clBuildProgram( program, 1, &deviceID, "-DTEST_MACRO", NULL, NULL );
+    test_error( error, "Test program did not properly build" );
+
+    error = get_result_from_program( context, queue, program, &secondResult );
+    test_error( error, "Unable to get result from second program" );
+
+    if( secondResult != 42 )
+    {
+        if( secondResult == firstResult )
+            log_error( "ERROR: Program result did not change with device macro addition (program was not recompiled)!\n" );
+        else
+            log_error( "ERROR: Result from second program did not validate! (Expected 42, got %d)\n", secondResult );
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_options_include_directory(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+
+    std::string sep  = dir_sep();
+    std::string path = exe_dir();    // Directory where test executable is located.
+    std::string include_dir;
+
+    clProgramWrapper program;
+    cl_build_status status;
+
+    /* Try compiling the program first without the directory included Should fail. */
+    program = clCreateProgramWithSource( context, 1, include_test_kernel, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create reference program!\n" );
+        return -1;
+    }
+
+    /* Build with the include directory defined */
+    include_dir = "-I " + path + sep + "includeTestDirectory";
+
+//    log_info("%s\n", include_dir);
+    error = clBuildProgram( program, 1, &deviceID, include_dir.c_str(), NULL, NULL );
+    test_error( error, "Test program did not properly build" );
+
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get program build status" );
+
+    if( (int)status != CL_BUILD_SUCCESS )
+    {
+        print_error( error, "Failed to build with include directory" );
+        return -1;
+    }
+
+    // Go ahead and run the program to verify results
+    cl_int firstResult, secondResult;
+
+    error = get_result_from_program( context, queue, program, &firstResult );
+    test_error( error, "Unable to get result from first program" );
+
+    if( firstResult != 12 )
+    {
+        log_error( "ERROR: Result from first program did not validate! (Expected 12, got %d)\n", firstResult );
+        return -1;
+    }
+
+    // Rebuild with a different include directory
+    include_dir = "-I " + path + sep + "secondIncludeTestDirectory";
+    error = clBuildProgram( program, 1, &deviceID, include_dir.c_str(), NULL, NULL );
+    test_error( error, "Test program did not properly rebuild" );
+
+    error = get_result_from_program( context, queue, program, &secondResult );
+    test_error( error, "Unable to get result from second program" );
+
+    if( secondResult != 42 )
+    {
+        if( secondResult == firstResult )
+            log_error( "ERROR: Program result did not change with include path change (program was not recompiled)!\n" );
+        else
+            log_error( "ERROR: Result from second program did not validate! (Expected 42, got %d)\n", secondResult );
+        return -1;
+    }
+
+    return 0;
+}
+
+const char *denorm_test_kernel[] = {
+    "__kernel void sample_test( float src1, float src2, __global float *dst)\n"
+    "{\n"
+    "    dst[ 0 ] = src1 + src2;\n"
+    "\n"
+    "}\n" };
+
+cl_int get_float_result_from_program( cl_context context, cl_command_queue queue, cl_program program, cl_float inA, cl_float inB, cl_float *outValue )
+{
+    cl_int error;
+
+    clKernelWrapper kernel = clCreateKernel( program, "sample_test", &error );
+    test_error( error, "Unable to create kernel from program" );
+
+    clMemWrapper outStream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float), NULL, &error );
+    test_error( error, "Unable to create test buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( cl_float ), &inA );
+    test_error( error, "Unable to set kernel argument" );
+
+    error = clSetKernelArg( kernel, 1, sizeof( cl_float ), &inB );
+    test_error( error, "Unable to set kernel argument" );
+
+    error = clSetKernelArg( kernel, 2, sizeof( outStream ), &outStream );
+    test_error( error, "Unable to set kernel argument" );
+
+    size_t threads[1] = { 1 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = clEnqueueReadBuffer( queue, outStream, true, 0, sizeof( cl_float ), outValue, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    return CL_SUCCESS;
+}
+
+int test_options_denorm_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+
+    clProgramWrapper program;
+    cl_build_status status;
+
+
+    // If denorms aren't even supported, testing this flag is pointless
+    cl_device_fp_config floatCaps = 0;
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCaps), &floatCaps,  NULL);
+    test_error( error, "Unable to get device FP config" );
+    if( ( floatCaps & CL_FP_DENORM ) == 0 )
+    {
+        log_info( "Device does not support denormalized single-precision floats; skipping test.\n" );
+        return 0;
+    }
+
+    program = clCreateProgramWithSource( context, 1, denorm_test_kernel, NULL, &error );
+    test_error( error, "Unable to create test program" );
+
+    // Build first WITH the denorm flush flag
+    error = clBuildProgram( program, 1, &deviceID, "-cl-denorms-are-zero", NULL, NULL );
+    test_error( error, "Test program did not properly build" );
+
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get program build status" );
+
+    if( (int)status != CL_BUILD_SUCCESS )
+    {
+        print_error( error, "Failed to build with include directory" );
+        return -1;
+    }
+
+    // Note: the following in floating point is a subnormal number, thus adding two of them together
+    // should give us a subnormalized result. If denormals are flushed to zero, however, it'll give us zero instead
+    uint32_t intSubnormal = 0x00000001;
+    cl_float *input = (cl_float *)&intSubnormal;
+    cl_float firstResult, secondResult;
+
+    error = get_float_result_from_program( context, queue, program, *input, *input, &firstResult );
+    test_error( error, "Unable to get result from first program" );
+
+    // Note: since -cl-denorms-are-zero is a HINT, not a requirement, the result we got could
+    // either be subnormal (hint ignored) or zero (hint respected). Since either is technically
+    // valid, there isn't anything we can to do validate results for now
+
+    // Rebuild without flushing flag set
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Test program did not properly rebuild" );
+
+    error = get_float_result_from_program( context, queue, program, *input, *input, &secondResult );
+    test_error( error, "Unable to get result from second program" );
+
+    // Now, there are three possiblities here:
+    // 1. The denorms-are-zero hint is not respected, in which case the first and second result will be identical
+    // 2. The hint is respected, and the program was properly rebuilt, in which case the first result will be zero and the second non-zero
+    // 3. The hint is respected, but the program was not properly rebuilt, in which case both results will be zero
+    // 3 is the only error condition we need to look for
+    uint32_t *fPtr = (uint32_t *)&firstResult;
+    uint32_t *sPtr = (uint32_t *)&secondResult;
+
+    if( ( *fPtr == 0 ) && ( *sPtr == 0 ) )
+    {
+        log_error( "ERROR: Program result didn't change when -cl-denorms-are-zero flag was removed.\n"
+                  "First result (should be zero): 0x%08x, Second result (should be non-zero): 0x%08x\n",
+                  *fPtr, *sPtr );
+        return -1;
+    }
+
+    return 0;
+}
+
diff --git a/test_conformance/compiler/test_compile.c b/test_conformance/compiler/test_compile.c
new file mode 100644
index 00000000..926d69bd
--- /dev/null
+++ b/test_conformance/compiler/test_compile.c
@@ -0,0 +1,3170 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#if defined(_WIN32)
+#include <time.h>
+#elif  defined(__linux__) || defined(__APPLE__)
+#include <sys/time.h>
+#include <unistd.h>
+#endif
+#include "../../test_common/harness/conversions.h"
+
+extern cl_uint gRandomSeed;
+
+#define MAX_LINE_SIZE_IN_PROGRAM 1024
+#define MAX_LOG_SIZE_IN_PROGRAM  2048
+
+const char *sample_kernel_start =
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    float temp;\n"
+"    int  tid = get_global_id(0);\n";
+
+const char *sample_kernel_end = "}\n";
+
+const char *sample_kernel_lines[] = {
+"dst[tid] = src[tid];\n",
+"dst[tid] = src[tid] * 3.f;\n",
+"temp = src[tid] / 4.f;\n",
+"dst[tid] = dot(temp,src[tid]);\n",
+"dst[tid] = dst[tid] + temp;\n" };
+
+/* I compile and link therefore I am. Robert Ioffe */
+/* The following kernels are used in testing Improved Compilation and Linking feature */
+
+const char *simple_kernel =
+"__kernel void\n"
+"CopyBuffer(\n"
+"    __global float* src,\n"
+"    __global float* dst )\n"
+"{\n"
+"    int id = (int)get_global_id(0);\n"
+"    dst[id] = src[id];\n"
+"}\n";
+
+const char *simple_kernel_with_defines =
+"__kernel void\n"
+"CopyBuffer(\n"
+"    __global float* src,\n"
+"    __global float* dst )\n"
+"{\n"
+"    int id = (int)get_global_id(0);\n"
+"    float temp = src[id] - 42;\n"
+"    dst[id] = FIRST + temp + SECOND;\n"
+"}\n";
+
+const char *simple_kernel_template =
+"__kernel void\n"
+"CopyBuffer%d(\n"
+"    __global float* src,\n"
+"    __global float* dst )\n"
+"{\n"
+"    int id = (int)get_global_id(0);\n"
+"    dst[id] = src[id];\n"
+"}\n";
+
+const char *composite_kernel_start =
+"__kernel void\n"
+"CompositeKernel(\n"
+"    __global float* src,\n"
+"    __global float* dst )\n"
+"{\n";
+
+const char *composite_kernel_end = "}\n";
+
+const char *composite_kernel_template =
+"    CopyBuffer%d(src, dst);\n";
+
+const char *composite_kernel_extern_template =
+"extern __kernel void\n"
+"CopyBuffer%d(\n"
+"    __global float* src,\n"
+"    __global float* dst );\n";
+
+const char *another_simple_kernel =
+"extern __kernel void\n"
+"CopyBuffer(\n"
+"    __global float* src,\n"
+"    __global float* dst );\n"
+"__kernel void\n"
+"AnotherCopyBuffer(\n"
+"    __global float* src,\n"
+"    __global float* dst )\n"
+"{\n"
+"    CopyBuffer(src, dst);\n"
+"}\n";
+
+const char* simple_header =
+"extern __kernel void\n"
+"CopyBuffer(\n"
+"    __global float* src,\n"
+"    __global float* dst );\n";
+
+const char* simple_header_name = "simple_header.h";
+
+const char* another_simple_kernel_with_header =
+"#include \"simple_header.h\"\n"
+"__kernel void\n"
+"AnotherCopyBuffer(\n"
+"    __global float* src,\n"
+"    __global float* dst )\n"
+"{\n"
+"    CopyBuffer(src, dst);\n"
+"}\n";
+
+const char* header_name_templates[4]   = { "simple_header%d.h",
+                                           "foo/simple_header%d.h",
+                                           "foo/bar/simple_header%d.h",
+                                           "foo/bar/baz/simple_header%d.h"};
+
+const char* include_header_name_templates[4]   = { "#include \"simple_header%d.h\"\n",
+                                                   "#include \"foo/simple_header%d.h\"\n",
+                                                   "#include \"foo/bar/simple_header%d.h\"\n",
+                                                   "#include \"foo/bar/baz/simple_header%d.h\"\n"};
+
+const char* compile_extern_var      = "extern constant float foo;\n";
+const char* compile_extern_struct   = "extern constant struct bar bart;\n";
+const char* compile_extern_function = "extern int baz(int, int);\n";
+
+const char* compile_static_var      = "static constant float foo = 2.78;\n";
+const char* compile_static_struct   = "static constant struct bar {float x, y, z, r; int color; } foo = {3.14159};\n";
+const char* compile_static_function = "static int foo(int x, int y) { return x*x + y*y; }\n";
+
+const char* compile_regular_var      = "constant float foo = 4.0f;\n";
+const char* compile_regular_struct   = "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, 0.f, 0};\n";
+const char* compile_regular_function = "int foo(int x, int y) { return x*x + y*y; }\n";
+
+const char* link_static_var_access = // use with compile_static_var
+"extern constant float foo;\n"
+"float access_foo() { return foo; }\n";
+
+const char* link_static_struct_access = // use with compile_static_struct
+"extern constant struct bar{float x, y, z, r; int color; } foo;\n"
+"struct bar access_foo() {return foo; }\n";
+
+const char* link_static_function_access = // use with compile_static_function
+"extern int foo(int, int);\n"
+"int access_foo() { int blah = foo(3, 4); return blah + 5; }\n";
+
+int test_large_single_compile(cl_context context, cl_device_id deviceID, unsigned int numLines)
+{
+    int error;
+    cl_program program;
+    const char **lines;
+    unsigned int numChoices, i;
+    MTdata d;
+
+    /* First, allocate the array for our line pointers */
+    lines = (const char **)malloc( numLines * sizeof( const char * ) );
+    if (lines == NULL) {
+        log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+        return -1;
+    }
+
+    /* First and last lines are easy */
+    lines[ 0 ] = sample_kernel_start;
+    lines[ numLines - 1 ] = sample_kernel_end;
+
+    numChoices = sizeof( sample_kernel_lines ) / sizeof( sample_kernel_lines[ 0 ] );
+
+    /* Fill the rest with random lines to hopefully prevent much optimization */
+    d = init_genrand( gRandomSeed );
+    for( i = 1; i < numLines - 1; i++ )
+    {
+        lines[ i ] = sample_kernel_lines[ genrand_int32(d) % numChoices ];
+    }
+    free_mtdata(d);     d = NULL;
+
+    /* Try to create a program with these lines */
+    error = create_single_kernel_helper_create_program(context, &program, numLines, lines);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+        free( lines );
+        if (program != NULL)
+        {
+           error = clReleaseProgram( program );
+           test_error( error, "Unable to release a program object" );
+        }
+        return -1;
+    }
+
+    /* Build it */
+    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+    test_error( error, "Unable to build a long program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release a program object" );
+
+    free( lines );
+
+    return 0;
+}
+
+int test_large_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int toTest[] = { 64, 128, 256, 512, 1024, 2048, 4096, 0 }; //8192, 16384, 32768, 0 };
+    unsigned int i;
+
+    log_info( "Testing large compiles...this might take awhile...\n" );
+
+    for( i = 0; toTest[ i ] != 0; i++ )
+    {
+        log_info( "   %d...\n", toTest[ i ] );
+
+#if defined(_WIN32)
+        clock_t start = clock();
+#elif  defined(__linux__) || defined(__APPLE__)
+    timeval time1, time2;
+    gettimeofday(&time1, NULL);
+#endif
+
+        if( test_large_single_compile( context, deviceID, toTest[ i ] ) != 0 )
+        {
+            log_error( "ERROR: long program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__);
+            return -1;
+        }
+
+#if defined(_WIN32)
+        clock_t end = clock();
+    log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
+#elif  defined(__linux__) || defined(__APPLE__)
+    gettimeofday(&time2, NULL);
+    log_perf( (float)(float)(time2.tv_sec  - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+#endif
+    }
+
+    return 0;
+}
+
+static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel);
+
+#if defined(__APPLE__) || defined(__linux)
+#define _strdup strdup
+#endif
+
+int test_large_multi_file_library(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+{
+    int error;
+    cl_program program;
+    cl_program *simple_kernels;
+    const char **lines;
+    unsigned int i;
+    char buffer[MAX_LINE_SIZE_IN_PROGRAM];
+
+    simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
+    if (simple_kernels == NULL) {
+        log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+        return -1;
+    }
+    /* First, allocate the array for our line pointers */
+    lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
+    if (lines == NULL) {
+        free(simple_kernels);
+        log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    for( i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, composite_kernel_extern_template, i);
+        lines[i] = _strdup(buffer);
+    }
+    /* First and last lines are easy */
+    lines[ numLines ] = composite_kernel_start;
+    lines[ 2* numLines + 1] = composite_kernel_end;
+
+    /* Fill the rest with templated kernels */
+    for( i = numLines + 1; i < 2* numLines + 1; i++ )
+    {
+        sprintf(buffer, composite_kernel_template, i - numLines - 1);
+        lines[ i ] = _strdup(buffer);
+    }
+
+    /* Try to create a program with these lines */
+    error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__  );
+        free( simple_kernels );
+        for( i = 0; i < numLines; i++)
+        {
+            free( (void*)lines[i] );
+            free( (void*)lines[i+numLines+1] );
+        }
+        free( lines );
+        if (program != NULL)
+        {
+           error = clReleaseProgram( program );
+           test_error( error, "Unable to release program object" );
+        }
+
+        return -1;
+    }
+
+    /* Compile it */
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    /* Create and compile templated kernels */
+    for( i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, simple_kernel_template, i);
+        const char* kernel_source = _strdup(buffer);
+        simple_kernels[i] = clCreateProgramWithSource( context, 1, &kernel_source, NULL, &error );
+        if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__  );
+            return -1;
+        }
+
+        /* Compile it */
+        error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "Unable to compile a simple program" );
+
+        free((void*)kernel_source);
+    }
+
+    /* Create library out of compiled templated kernels */
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error);
+    test_error( error, "Unable to create a multi-line library" );
+
+    /* Link the program that calls the kernels and the library that contains them */
+    cl_program programs[2] = { program, my_newly_minted_library };
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error);
+    test_error( error, "Unable to link a program with a library" );
+
+    // Create the composite kernel
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+    test_error( error, "Unable to create a composite kernel" );
+
+    // Run the composite kernel and verify the results
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    for( i = 0; i < numLines; i++)
+    {
+        free( (void*)lines[i] );
+        free( (void*)lines[i+numLines+1] );
+    }
+    free( lines );
+
+    for(i = 0; i < numLines; i++)
+    {
+        error = clReleaseProgram( simple_kernels[i] );
+        test_error( error, "Unable to release program object" );
+    }
+    free( simple_kernels );
+
+  error = clReleaseKernel( kernel );
+    test_error( error, "Unable to release kernel object" );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_multi_file_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 };
+    unsigned int i;
+
+    log_info( "Testing multi-file libraries ...this might take awhile...\n" );
+
+    for( i = 0; toTest[ i ] != 0; i++ )
+    {
+        log_info( "   %d...\n", toTest[ i ] );
+
+#if defined(_WIN32)
+        clock_t start = clock();
+#elif  defined(__linux__) || defined(__APPLE__)
+    timeval time1, time2;
+    gettimeofday(&time1, NULL);
+#endif
+
+        if( test_large_multi_file_library( context, deviceID, queue, toTest[ i ] ) != 0 )
+        {
+            log_error( "ERROR: multi-file library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__  );
+            return -1;
+        }
+
+#if defined(_WIN32)
+        clock_t end = clock();
+    log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
+#elif  defined(__linux__) || defined(__APPLE__)
+    gettimeofday(&time2, NULL);
+    log_perf( (float)(float)(time2.tv_sec  - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+#endif
+    }
+
+    return 0;
+}
+
+int test_large_multiple_embedded_headers(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+{
+    int error;
+    cl_program program;
+    cl_program *simple_kernels;
+    cl_program *headers;
+    const char **header_names;
+    const char **lines;
+    unsigned int i;
+    char buffer[MAX_LINE_SIZE_IN_PROGRAM];
+
+    simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
+    if (simple_kernels == NULL) {
+        log_error( "ERROR: Unable to allocate simple_kernels array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+        return -1;
+    }
+    headers = (cl_program*)malloc(numLines*sizeof(cl_program));
+    if (headers == NULL) {
+        log_error( "ERROR: Unable to allocate headers array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+        return -1;
+    }
+    /* First, allocate the array for our line pointers */
+    header_names = (const char**)malloc( numLines*sizeof( const char * ) );
+    if (header_names == NULL) {
+        log_error( "ERROR: Unable to allocate header_names array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+        return -1;
+    }
+    lines = (const char **)malloc( (2*numLines + 2)*sizeof( const char * ) );
+    if (lines == NULL) {
+        log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    for( i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, include_header_name_templates[i % 4], i);
+        lines[i] = _strdup(buffer);
+        sprintf(buffer, header_name_templates[i % 4], i);
+        header_names[i] = _strdup(buffer);
+
+        sprintf(buffer, composite_kernel_extern_template, i);
+        const char* line = _strdup(buffer);
+        error = create_single_kernel_helper_create_program(context, &headers[i], 1, &line);
+        if( headers[i] == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__);
+            return -1;
+        }
+    }
+    /* First and last lines are easy */
+    lines[ numLines ] = composite_kernel_start;
+    lines[ 2* numLines + 1 ] = composite_kernel_end;
+
+    /* Fill the rest with templated kernels */
+    for( i = numLines + 1; i < 2* numLines + 1; i++ )
+    {
+        sprintf(buffer, composite_kernel_template, i - numLines - 1);
+        lines[ i ] = _strdup(buffer);
+    }
+
+    /* Try to create a program with these lines */
+    error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    /* Compile it */
+    error = clCompileProgram(program, 1, &deviceID, NULL, numLines, headers, header_names, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    /* Create and compile templated kernels */
+    for( i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, simple_kernel_template, i);
+        const char* kernel_source = _strdup(buffer);
+        error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
+        if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+            return -1;
+        }
+
+        /* Compile it */
+        error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "Unable to compile a simple program" );
+
+        free((void*)kernel_source);
+    }
+
+    /* Create library out of compiled templated kernels */
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error);
+    test_error( error, "Unable to create a multi-line library" );
+
+    /* Link the program that calls the kernels and the library that contains them */
+    cl_program programs[2] = { program, my_newly_minted_library };
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error);
+    test_error( error, "Unable to link a program with a library" );
+
+    // Create the composite kernel
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+    test_error( error, "Unable to create a composite kernel" );
+
+    // Run the composite kernel and verify the results
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    for( i = 0; i < numLines; i++)
+    {
+        free( (void*)lines[i] );
+        free( (void*)header_names[i] );
+    }
+    for( i = numLines + 1; i < 2* numLines + 1; i++ )
+    {
+        free( (void*)lines[i] );
+    }
+    free( lines );
+    free( header_names );
+
+    for(i = 0; i < numLines; i++)
+    {
+        error = clReleaseProgram( simple_kernels[i] );
+        test_error( error, "Unable to release program object" );
+        error = clReleaseProgram( headers[i] );
+        test_error( error, "Unable to release header program object" );
+    }
+    free( simple_kernels );
+    free( headers );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 };
+    unsigned int i;
+
+    log_info( "Testing multiple embedded headers ...this might take awhile...\n" );
+
+    for( i = 0; toTest[ i ] != 0; i++ )
+    {
+        log_info( "   %d...\n", toTest[ i ] );
+
+#if defined(_WIN32)
+        clock_t start = clock();
+#elif  defined(__linux__) || defined(__APPLE__)
+    timeval time1, time2;
+    gettimeofday(&time1, NULL);
+#endif
+
+        if( test_large_multiple_embedded_headers( context, deviceID, queue, toTest[ i ] ) != 0 )
+        {
+            log_error( "ERROR: multiple embedded headers program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ );
+            return -1;
+        }
+
+#if defined(_WIN32)
+        clock_t end = clock();
+    log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
+#elif  defined(__linux__) || defined(__APPLE__)
+    gettimeofday(&time2, NULL);
+    log_perf( (float)(float)(time2.tv_sec  - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+#endif
+    }
+
+    return 0;
+}
+
+double logbase(double a, double base)
+{
+   return log(a) / log(base);
+}
+
+int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+{
+    int error;
+    cl_program *simple_kernels;
+    const char **lines;
+    unsigned int i;
+    char buffer[MAX_LINE_SIZE_IN_PROGRAM];
+    /* I want to create (log2(N)+1)/2 libraries */
+    unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2;
+    unsigned int numLibraries = (unsigned int)pow(2.0, level - 1.0);
+    unsigned int numFilesInLib = numLines/numLibraries;
+    cl_program *my_program_and_libraries = (cl_program*)malloc((1+numLibraries)*sizeof(cl_program));
+    if (my_program_and_libraries == NULL) {
+        log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries), __FILE__, __LINE__);
+        return -1;
+    }
+
+    log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib);
+
+    simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
+    if (simple_kernels == NULL) {
+        log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+        return -1;
+    }
+    /* First, allocate the array for our line pointers */
+    lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
+    if (lines == NULL) {
+        log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__);
+        return -1;
+    }
+
+    for(i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, composite_kernel_extern_template, i);
+        lines[i] = _strdup(buffer);
+    }
+    /* First and last lines are easy */
+    lines[ numLines ] = composite_kernel_start;
+    lines[ 2*numLines + 1] = composite_kernel_end;
+
+    /* Fill the rest with templated kernels */
+    for(i = numLines + 1; i < 2*numLines + 1; i++ )
+    {
+        sprintf(buffer, composite_kernel_template, i - numLines - 1);
+        lines[ i ] = _strdup(buffer);
+    }
+
+    /* Try to create a program with these lines */
+    error = create_single_kernel_helper_create_program(context, &my_program_and_libraries[0], 2 * numLines + 2, lines);
+    if( my_program_and_libraries[0] == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    /* Compile it */
+    error = clCompileProgram(my_program_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    /* Create and compile templated kernels */
+    for(i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, simple_kernel_template, i);
+        const char* kernel_source = _strdup(buffer);
+        error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
+        if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+            return -1;
+        }
+
+        /* Compile it */
+        error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "Unable to compile a simple program" );
+
+        free((void*)kernel_source);
+    }
+
+    /* Create library out of compiled templated kernels */
+    for(i = 0; i < numLibraries; i++) {
+        my_program_and_libraries[i+1] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+i*numFilesInLib, NULL, NULL, &error);
+        test_error( error, "Unable to create a multi-line library" );
+    }
+
+    /* Link the program that calls the kernels and the library that contains them */
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1, my_program_and_libraries, NULL, NULL, &error);
+    test_error( error, "Unable to link a program with a library" );
+
+    // Create the composite kernel
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+    test_error( error, "Unable to create a composite kernel" );
+
+    // Run the composite kernel and verify the results
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    for(i = 0; i <= numLibraries; i++) {
+        error = clReleaseProgram( my_program_and_libraries[i] );
+        test_error( error, "Unable to release program object" );
+    }
+    free( my_program_and_libraries );
+    for(i = 0; i < numLines; i++)
+    {
+        free( (void*)lines[i] );
+    }
+    for(i = numLines + 1; i < 2*numLines + 1; i++ )
+    {
+        free( (void*)lines[i] );
+    }
+    free( lines );
+
+    for(i = 0; i < numLines; i++)
+    {
+        error = clReleaseProgram( simple_kernels[i] );
+        test_error( error, "Unable to release program object" );
+    }
+    free( simple_kernels );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int toTest[] = { 2, 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 };
+    unsigned int i;
+
+    log_info( "Testing multiple libraries ...this might take awhile...\n" );
+
+    for( i = 0; toTest[ i ] != 0; i++ )
+    {
+        log_info( "   %d...\n", toTest[ i ] );
+
+#if defined(_WIN32)
+        clock_t start = clock();
+#elif  defined(__linux__) || defined(__APPLE__)
+    timeval time1, time2;
+    gettimeofday(&time1, NULL);
+#endif
+
+        if( test_large_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 )
+        {
+            log_error( "ERROR: multiple library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ );
+            return -1;
+        }
+
+#if defined(_WIN32)
+        clock_t end = clock();
+    log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
+#elif  defined(__linux__) || defined(__APPLE__)
+    gettimeofday(&time2, NULL);
+    log_perf( (float)(float)(time2.tv_sec  - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+#endif
+    }
+
+    return 0;
+}
+
+int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+{
+    int error;
+    cl_program *simple_kernels;
+    const char **lines;
+    unsigned int i;
+    char buffer[MAX_LINE_SIZE_IN_PROGRAM];
+    /* I want to create (log2(N)+1)/4 libraries */
+    unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2;
+    unsigned int numLibraries = (unsigned int)pow(2.0, level - 2.0);
+    unsigned int numFilesInLib = numLines/(2*numLibraries);
+    cl_program *my_programs_and_libraries = (cl_program*)malloc((1+numLibraries+numLibraries*numFilesInLib)*sizeof(cl_program));
+    if (my_programs_and_libraries == NULL) {
+        log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries+numLibraries*numFilesInLib), __FILE__, __LINE__ );
+        return -1;
+    }
+    log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib);
+
+    simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
+    if (simple_kernels == NULL) {
+        log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+        return -1;
+    }
+    /* First, allocate the array for our line pointers */
+    lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
+    if (lines == NULL) {
+        log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    for(i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, composite_kernel_extern_template, i);
+        lines[i] = _strdup(buffer);
+    }
+    /* First and last lines are easy */
+    lines[ numLines ] = composite_kernel_start;
+    lines[ 2*numLines + 1] = composite_kernel_end;
+
+    /* Fill the rest with templated kernels */
+    for(i = numLines + 1; i < 2*numLines + 1; i++ )
+    {
+        sprintf(buffer, composite_kernel_template, i - numLines - 1);
+        lines[ i ] = _strdup(buffer);
+    }
+
+    /* Try to create a program with these lines */
+    error = create_single_kernel_helper_create_program(context, &my_programs_and_libraries[0], 2 * numLines + 2, lines);
+    if( my_programs_and_libraries[0] == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    /* Compile it */
+    error = clCompileProgram(my_programs_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    /* Create and compile templated kernels */
+    for(i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, simple_kernel_template, i);
+        const char* kernel_source = _strdup(buffer);
+        error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
+        if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+            return -1;
+        }
+
+        /* Compile it */
+        error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "Unable to compile a simple program" );
+
+        free((void*)kernel_source);
+    }
+
+    /* Copy already compiled kernels */
+    for( i = 0; i < numLibraries*numFilesInLib; i++) {
+        my_programs_and_libraries[i+1] = simple_kernels[i];
+    }
+
+    /* Create library out of compiled templated kernels */
+    for( i = 0; i < numLibraries; i++) {
+        my_programs_and_libraries[i+1+numLibraries*numFilesInLib] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+(i*numFilesInLib+numLibraries*numFilesInLib), NULL, NULL, &error);
+        test_error( error, "Unable to create a multi-line library" );
+    }
+
+    /* Link the program that calls the kernels and the library that contains them */
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1+numLibraries*numFilesInLib, my_programs_and_libraries, NULL, NULL, &error);
+    test_error( error, "Unable to link a program with a library" );
+
+    // Create the composite kernel
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+    test_error( error, "Unable to create a composite kernel" );
+
+    // Run the composite kernel and verify the results
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    for(i = 0; i < numLibraries+1+numLibraries*numFilesInLib; i++) {
+        error = clReleaseProgram( my_programs_and_libraries[i] );
+        test_error( error, "Unable to release program object" );
+    }
+    free( my_programs_and_libraries );
+
+    for(i = 0; i < numLines; i++)
+    {
+        free( (void*)lines[i] );
+    }
+    for(i = numLines + 1; i < 2*numLines + 1; i++ )
+    {
+        free( (void*)lines[i] );
+    }
+    free( lines );
+
+    for(i = numLibraries*numFilesInLib; i < numLines; i++)
+    {
+        error = clReleaseProgram( simple_kernels[i] );
+        test_error( error, "Unable to release program object" );
+    }
+    free( simple_kernels );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_multiple_files_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 };
+    unsigned int i;
+
+    log_info( "Testing multiple files and multiple libraries ...this might take awhile...\n" );
+
+    for( i = 0; toTest[ i ] != 0; i++ )
+    {
+        log_info( "   %d...\n", toTest[ i ] );
+
+#if defined(_WIN32)
+        clock_t start = clock();
+#elif  defined(__linux__) || defined(__APPLE__)
+    timeval time1, time2;
+    gettimeofday(&time1, NULL);
+#endif
+
+        if( test_large_multiple_files_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 )
+        {
+            log_error( "ERROR: multiple files, multiple libraries program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ );
+            return -1;
+        }
+
+#if defined(_WIN32)
+        clock_t end = clock();
+    log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
+#elif  defined(__linux__) || defined(__APPLE__)
+    gettimeofday(&time2, NULL);
+    log_perf( (float)(float)(time2.tv_sec  - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+#endif
+    }
+
+    return 0;
+}
+
+int test_large_multiple_files(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+{
+    int error;
+    const char **lines;
+    unsigned int i;
+    char buffer[MAX_LINE_SIZE_IN_PROGRAM];
+    cl_program *my_programs = (cl_program*)malloc((1+numLines)*sizeof(cl_program));
+
+    if (my_programs == NULL) {
+        log_error( "ERROR: Unable to allocate my_programs array with %d programs! (in %s:%d)\n", (1+numLines), __FILE__, __LINE__);
+        return -1;
+    }
+    /* First, allocate the array for our line pointers */
+    lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
+    if (lines == NULL) {
+        log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__);
+        return -1;
+    }
+
+    for(i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, composite_kernel_extern_template, i);
+        lines[i] = _strdup(buffer);
+    }
+    /* First and last lines are easy */
+    lines[ numLines ] = composite_kernel_start;
+    lines[ 2* numLines + 1] = composite_kernel_end;
+
+    /* Fill the rest with templated kernels */
+    for(i = numLines + 1; i < 2*numLines + 1; i++ )
+    {
+        sprintf(buffer, composite_kernel_template, i - numLines - 1);
+        lines[ i ] = _strdup(buffer);
+    }
+
+    /* Try to create a program with these lines */
+    error = create_single_kernel_helper_create_program(context, &my_programs[0], 2 * numLines + 2, lines);
+    if( my_programs[0] == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    /* Compile it */
+    error = clCompileProgram(my_programs[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    /* Create and compile templated kernels */
+    for( i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, simple_kernel_template, i);
+        const char* kernel_source = _strdup(buffer);
+        error = create_single_kernel_helper_create_program(context, &my_programs[i + 1], 1, &kernel_source);
+        if( my_programs[i+1] == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+            return -1;
+        }
+
+        /* Compile it */
+        error = clCompileProgram(my_programs[i+1], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "Unable to compile a simple program" );
+
+        free((void*)kernel_source);
+    }
+
+    /* Link the program that calls the kernels and the library that contains them */
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1+numLines, my_programs, NULL, NULL, &error);
+    test_error( error, "Unable to link a program with a library" );
+
+    // Create the composite kernel
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+    test_error( error, "Unable to create a composite kernel" );
+
+    // Run the composite kernel and verify the results
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    for(i = 0; i < 1+numLines; i++) {
+        error = clReleaseProgram( my_programs[i] );
+        test_error( error, "Unable to release program object" );
+    }
+    free( my_programs );
+    for(i = 0; i < numLines; i++)
+    {
+        free( (void*)lines[i] );
+    }
+    for(i = numLines + 1; i < 2*numLines + 1; i++ )
+    {
+        free( (void*)lines[i] );
+    }
+    free( lines );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_multiple_files(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 };
+    unsigned int i;
+
+    log_info( "Testing multiple files compilation and linking into a single executable ...this might take awhile...\n" );
+
+    for( i = 0; toTest[ i ] != 0; i++ )
+    {
+        log_info( "   %d...\n", toTest[ i ] );
+
+#if defined(_WIN32)
+        clock_t start = clock();
+#elif  defined(__linux__) || defined(__APPLE__)
+    timeval time1, time2;
+    gettimeofday(&time1, NULL);
+#endif
+
+        if( test_large_multiple_files( context, deviceID, queue, toTest[ i ] ) != 0 )
+        {
+            log_error( "ERROR: multiple files program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ );
+            return -1;
+        }
+
+#if defined(_WIN32)
+        clock_t end = clock();
+    log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
+#elif  defined(__linux__) || defined(__APPLE__)
+    gettimeofday(&time2, NULL);
+    log_perf( (float)(float)(time2.tv_sec  - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+#endif
+    }
+
+    return 0;
+}
+
+int test_simple_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing a simple compilation only...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_static_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing a simple static compilations only...\n");
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_var);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple static variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    log_info("Compiling a static variable...\n");
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple static variable program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_struct);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple static struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    log_info("Compiling a static struct...\n");
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple static variable program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_function);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple static function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    log_info("Compiling a static function...\n");
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple static function program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing a simple extern compilations only...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_header);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple extern kernel test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    log_info("Compiling an extern kernel...\n");
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple extern kernel program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_var);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple extern variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    log_info("Compiling an extern variable...\n");
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple extern variable program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_struct);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple extern struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    log_info("Compiling an extern struct...\n");
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple extern variable program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_function);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple extern function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    log_info("Compiling an extern function...\n");
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple extern function program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+struct simple_user_data {
+    const char*        m_message;
+    cl_event        m_event;
+};
+
+const char* once_upon_a_midnight_dreary = "Once upon a midnight dreary!";
+
+static void CL_CALLBACK simple_compile_callback(cl_program program, void* user_data)
+{
+    simple_user_data* simple_compile_user_data = (simple_user_data*)user_data;
+    log_info("in the simple_compile_callback: program %p just completed compiling with '%s'\n", program, simple_compile_user_data->m_message);
+    if (strcmp(once_upon_a_midnight_dreary, simple_compile_user_data->m_message) != 0)
+    {
+        log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s (in %s:%d)!\n", once_upon_a_midnight_dreary, simple_compile_user_data->m_message, __FILE__, __LINE__);
+    }
+
+    int error;
+    log_info("in the simple_compile_callback: program %p just completed compiling with '%p'\n", program, simple_compile_user_data->m_event);
+
+    error = clSetUserEventStatus(simple_compile_user_data->m_event, CL_COMPLETE);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: in the simple_compile_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        exit(-1);
+    }
+    log_info("in the simple_compile_callback: Successfully signaled compile_program_completion_event!\n");
+}
+
+int test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_event compile_program_completion_event;
+
+    log_info("Testing a simple compilation with callback...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    compile_program_completion_event = clCreateUserEvent(context, &error);
+    test_error( error, "Unable to create a user event");
+
+    simple_user_data simple_compile_user_data = {once_upon_a_midnight_dreary, compile_program_completion_event};
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, simple_compile_callback, (void*)&simple_compile_user_data);
+    test_error( error, "Unable to compile a simple program with a callback" );
+
+    error = clWaitForEvents(1, &compile_program_completion_event);
+    test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event");
+
+    /* All done! */
+    error = clReleaseEvent(compile_program_completion_event);
+    test_error( error, "Unable to release event object" );
+
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_embedded_header_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, header;
+
+    log_info("Testing a simple embedded header compile only...\n");
+    program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
+    if( header == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL);
+    test_error( error, "Unable to compile a simple program with embedded header" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_link_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing a simple linking only...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to link a simple program" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_two_file_regular_variable_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, second_program, my_newly_linked_program;
+
+    const char* sources[2] = {simple_kernel, compile_regular_var}; // here we want to avoid linking error due to lack of kernels
+    log_info("Compiling and linking two program objects, where one tries to access regular variable from another...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 2, sources);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a test program with regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program with regular function" );
+
+    error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_var_access);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a test program that tries to access a regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a program that tries to access a regular variable" );
+
+    cl_program two_programs[2] = { program, second_program };
+    my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error);
+    test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular variable" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( second_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_two_file_regular_struct_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, second_program, my_newly_linked_program;
+
+    const char* sources[2] = {simple_kernel, compile_regular_struct}; // here we want to avoid linking error due to lack of kernels
+    log_info("Compiling and linking two program objects, where one tries to access regular struct from another...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 2, sources);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a test program with regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program with regular struct" );
+
+    error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_struct_access);
+    if( second_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a test program that tries to access a regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a program that tries to access a regular struct" );
+
+    cl_program two_programs[2] = { program, second_program };
+    my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error);
+    test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular struct" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( second_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+
+int test_two_file_regular_function_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, second_program, my_newly_linked_program;
+
+    const char* sources[2] = {simple_kernel, compile_regular_function}; // here we want to avoid linking error due to lack of kernels
+    log_info("Compiling and linking two program objects, where one tries to access regular function from another...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 2, sources);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a test program with regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program with regular function" );
+
+    error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_function_access);
+    if( second_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a test program that tries to access a regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a program that tries to access a regular function" );
+
+    cl_program two_programs[2] = { program, second_program };
+    my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error);
+    test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular function" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( second_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, header, simple_program;
+
+    log_info("Testing a simple embedded header link...\n");
+    program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
+    if( header == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL);
+    test_error( error, "Unable to compile a simple program with embedded header" );
+
+    error = create_single_kernel_helper_create_program(context, &simple_program, 1, &simple_kernel);
+    if( simple_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program two_programs[2] = { program, simple_program };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( simple_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+const char* when_i_pondered_weak_and_weary = "When I pondered weak and weary!";
+
+static void CL_CALLBACK simple_link_callback(cl_program program, void* user_data)
+{
+    simple_user_data* simple_link_user_data = (simple_user_data*)user_data;
+    log_info("in the simple_link_callback: program %p just completed linking with '%s'\n", program, (const char*)simple_link_user_data->m_message);
+    if (strcmp(when_i_pondered_weak_and_weary, simple_link_user_data->m_message) != 0)
+    {
+        log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s! (in %s:%d)\n", when_i_pondered_weak_and_weary, simple_link_user_data->m_message, __FILE__, __LINE__);
+    }
+
+    int error;
+    log_info("in the simple_link_callback: program %p just completed linking with '%p'\n", program, simple_link_user_data->m_event);
+
+    error = clSetUserEventStatus(simple_link_user_data->m_event, CL_COMPLETE);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: simple_link_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        exit(-1);
+    }
+    log_info("in the simple_link_callback: Successfully signaled link_program_completion_event event!\n");
+}
+
+int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_event link_program_completion_event;
+
+    log_info("Testing a simple linking with callback...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    link_program_completion_event = clCreateUserEvent(context, &error);
+    test_error( error, "Unable to create a user event");
+
+    simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event};
+
+    clLinkProgram(context, 1, &deviceID, NULL, 1, &program, simple_link_callback, (void*)&simple_link_user_data, &error);
+    test_error( error, "Unable to link a simple program" );
+
+    error = clWaitForEvents(1, &link_program_completion_event);
+    test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event");
+
+    /* All done! */
+    error = clReleaseEvent(link_program_completion_event);
+    test_error( error, "Unable to release event object" );
+
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+static void initBuffer(float* & srcBuffer, unsigned int cnDimension)
+{
+    float num = 0.0f;
+
+    for( unsigned int i = 0; i < cnDimension; i++ )
+    {
+        if( ( i % 10 ) == 0  )
+        {
+            num = 0.0f;
+        }
+
+        srcBuffer[ i ] = num;
+        num = num + 1.0f;
+    }
+}
+
+static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel)
+{
+    int error, result = CL_SUCCESS;
+    const size_t cnDimension = 32;
+
+    // Allocate source buffer
+    float * srcBuffer = (float*)malloc(cnDimension * sizeof(float));
+    float * dstBuffer = (float*)malloc(cnDimension * sizeof(float));
+
+    if (srcBuffer == NULL) {
+        log_error( "ERROR: Unable to allocate srcBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__);
+        return -1;
+    }
+    if (dstBuffer == NULL) {
+        log_error( "ERROR: Unable to allocate dstBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__);
+        return -1;
+    }
+
+    if( srcBuffer && dstBuffer )
+    {
+        // initialize host memory
+        initBuffer(srcBuffer, cnDimension );
+
+        // Allocate device memory
+        cl_mem deviceMemSrc = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                cnDimension * sizeof( cl_float ), srcBuffer, &error);
+        test_error( error, "Unable to create a source memory buffer" );
+
+        cl_mem deviceMemDst = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                cnDimension * sizeof( cl_float ), 0, &error);
+        test_error( error, "Unable to create a destination memory buffer" );
+
+        // Set kernel args
+        // Set parameter 0 to be the source buffer
+        error = clSetKernelArg(kernel, 0, sizeof( cl_mem ), ( void * )&deviceMemSrc );
+        test_error( error, "Unable to set the first kernel argument" );
+
+        // Set parameter 1 to be the destination buffer
+        error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), ( void * )&deviceMemDst );
+        test_error( error, "Unable to set the second kernel argument" );
+
+        // Execute kernel
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
+            &cnDimension, 0, 0, NULL, NULL );
+        test_error( error, "Unable to enqueue kernel" );
+
+        error = clFlush( queue );
+        test_error( error, "Unable to flush the queue" );
+
+        // copy results from device back to host
+        error = clEnqueueReadBuffer(queue, deviceMemDst, CL_TRUE, 0, cnDimension * sizeof( cl_float ),
+            dstBuffer, 0, NULL,    NULL );
+        test_error( error, "Unable to read the destination buffer" );
+
+        error = clFlush( queue );
+        test_error( error, "Unable to flush the queue" );
+
+        // Compare the source and destination buffers
+        const int* pSrc = (int*)srcBuffer;
+        const int* pDst = (int*)dstBuffer;
+        int mismatch = 0;
+
+        for( size_t i = 0; i < cnDimension; i++ )
+        {
+            if( pSrc[i] != pDst[i] )
+            {
+                if( mismatch < 4 )
+                {
+                    log_info("Offset %08lX:  Expected %08X, Got %08X\n", i * 4, pSrc[i],    pDst[i] );
+                }
+                else
+                {
+                    log_info(".");
+                }
+                mismatch++;
+            }
+        }
+
+        if( mismatch )
+        {
+            log_info("*** %d mismatches found, TEST FAILS! ***\n", mismatch );
+            result = -1;
+        }
+        else
+        {
+            log_info("Buffers match, test passes.\n");
+        }
+
+        free( srcBuffer );
+        srcBuffer = NULL;
+        free( dstBuffer );
+        dstBuffer = NULL;
+
+        if( deviceMemSrc )
+        {
+            error = clReleaseMemObject( deviceMemSrc );
+            test_error( error, "Unable to release memory object" );
+        }
+
+        if( deviceMemDst )
+        {
+            error = clReleaseMemObject( deviceMemDst );
+            test_error( error, "Unable to release memory object" );
+        }
+    }
+    return result;
+}
+
+int test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing execution after a simple compile and link...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to link a simple program" );
+
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_execute_after_simple_compile_and_link_no_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing execution after a simple compile and link with no device information provided...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 0, NULL, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_linked_program = clLinkProgram(context, 0, NULL, NULL, 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to link a simple program" );
+
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing execution after a simple compile and link with defines...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel_with_defines, "-DFIRST=5 -DSECOND=37");
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, "-DFIRST=5 -DSECOND=37", 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to link a simple program" );
+
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    size_t            binarySize;
+    unsigned char *binary;
+
+    log_info("Testing execution after serialization and reloading of the object...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+    if (binary == NULL) {
+        log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    unsigned char *buffers[ 1 ] = { binary };
+    cl_int loadErrors[ 1 ];
+
+    // Do another sanity check here first
+    size_t size;
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
+    test_error( error, "Unable to get expected size of binaries array" );
+    if( size != sizeof( buffers ) )
+    {
+        log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+        free(binary);
+        return -1;
+    }
+
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    // use clCreateProgramWithBinary
+    cl_program program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
+    test_error( error, "Unable to create program with binary" );
+
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program_with_binary, NULL, NULL, &error);
+    test_error( error, "Unable to link a simple program" );
+
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( program_with_binary );
+    test_error( error, "Unable to release program object" );
+
+    free(binary);
+
+    return 0;
+}
+
+int test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, another_program;
+    size_t            binarySize;
+    unsigned char *binary;
+
+    log_info("Testing execution after linking a binary with a simple library...\n");
+    // we will test creation of a simple library from one file
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to create a simple library" );
+
+
+    // Get the size of the resulting library (only one device)
+    error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+    if (binary == NULL) {
+        log_error( "ERROR: Unable to allocate binary character array with %lu characters (in %s:%d)!", binarySize, __FILE__, __LINE__);
+        return -1;
+    }
+    unsigned char *buffers[ 1 ] = { binary };
+    cl_int loadErrors[ 1 ];
+
+    // Do another sanity check here first
+    size_t size;
+    error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size );
+    test_error( error, "Unable to get expected size of binaries array" );
+    if( size != sizeof( buffers ) )
+    {
+        log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+        free(binary);
+        return -1;
+    }
+
+    error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    // use clCreateProgramWithBinary
+    cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
+    test_error( error, "Unable to create program with binary" );
+
+    error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
+    if( another_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program program_and_archive[2] = { another_program, library_with_binary };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from a binary and a library" );
+
+    cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+    test_error( error, "Unable to create another simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, another_kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( another_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( library_with_binary );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    free(binary);
+
+    return 0;
+}
+
+static void CL_CALLBACK program_compile_completion_callback(cl_program program, void* user_data)
+{
+    int error;
+    cl_event compile_program_completion_event = (cl_event)user_data;
+    log_info("in the program_compile_completion_callback: program %p just completed compiling with '%p'\n", program, compile_program_completion_event);
+
+    error = clSetUserEventStatus(compile_program_completion_event, CL_COMPLETE);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: in the program_compile_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        exit(-1);
+    }
+    log_info("in the program_compile_completion_callback: Successfully signaled compile_program_completion_event event!\n");
+}
+
+static void CL_CALLBACK program_link_completion_callback(cl_program program, void* user_data)
+{
+    int error;
+    cl_event link_program_completion_event = (cl_event)user_data;
+    log_info("in the program_link_completion_callback: program %p just completed linking with '%p'\n", program, link_program_completion_event);
+
+    error = clSetUserEventStatus(link_program_completion_event, CL_COMPLETE);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: in the program_link_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        exit(-1);
+    }
+    log_info("in the program_link_completion_callback: Successfully signaled link_program_completion_event event!\n");
+}
+
+int test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_event compile_program_completion_event, link_program_completion_event;
+
+    log_info("Testing execution after a simple compile and link with callbacks...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    compile_program_completion_event = clCreateUserEvent(context, &error);
+    test_error( error, "Unable to create a user event");
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL,
+        program_compile_completion_callback, (void*)compile_program_completion_event);
+    test_error( error, "Unable to compile a simple program" );
+
+    error = clWaitForEvents(1, &compile_program_completion_event);
+    test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event");
+
+    clReleaseEvent(compile_program_completion_event);
+
+    link_program_completion_event = clCreateUserEvent(context, &error);
+    test_error( error, "Unable to create a user event");
+
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program,
+        program_link_completion_callback, (void*)link_program_completion_event, &error);
+    test_error( error, "Unable to link a simple program" );
+
+    error = clWaitForEvents(1, &link_program_completion_event);
+    test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event");
+
+    clReleaseEvent(link_program_completion_event);
+
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_library_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+
+    log_info("Testing creation of a simple library...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to create a simple library" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_library_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program;
+    cl_event link_program_completion_event;
+
+    log_info("Testing creation of a simple library with a callback...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    link_program_completion_event = clCreateUserEvent(context, &error);
+    test_error( error, "Unable to create a user event");
+
+    simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event};
+
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program,
+        simple_link_callback, (void*)&simple_link_user_data, &error);
+    test_error( error, "Unable to create a simple library" );
+
+    error = clWaitForEvents(1, &link_program_completion_event);
+    test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event");
+
+    /* All done! */
+    error = clReleaseEvent(link_program_completion_event);
+    test_error( error, "Unable to release event object" );
+
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, another_program;
+
+    log_info("Testing creation and linking with a simple library...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to create a simple library" );
+
+    error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
+    if( another_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program program_and_archive[2] = { another_program, my_newly_minted_library };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from a binary and a library" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( another_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, another_program;
+
+    log_info("Testing execution after linking a binary with a simple library...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
+    test_error( error, "Unable to create a simple library" );
+
+    error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
+    if( another_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program program_and_archive[2] = { another_program, my_newly_minted_library };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from a binary and a library" );
+
+    cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+    test_error( error, "Unable to create another simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, another_kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( another_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, another_program;
+
+    log_info("Testing two file compiling and linking...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+
+    error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
+    if( another_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program two_programs[2] = { program, another_program };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from two binaries" );
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( another_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, another_program;
+
+    log_info("Testing two file compiling and linking and execution of two kernels afterwards ...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
+    if( another_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program two_programs[2] = { program, another_program };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from two binaries" );
+
+    cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+    test_error( error, "Unable to create another simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, another_kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( another_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, header, simple_program;
+
+    log_info("Testing execution after embedded header link...\n");
+    // we will test execution after compiling and linking with embedded headers
+    program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
+    if( header == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL);
+    test_error( error, "Unable to compile a simple program with embedded header" );
+
+    simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error);
+    if( simple_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program two_programs[2] = { program, simple_program };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" );
+
+    cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+    test_error( error, "Unable to create another simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, another_kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( simple_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+#if defined(__APPLE__) || defined(__linux)
+#define _mkdir(x) mkdir(x,S_IRWXU)
+#define _chdir chdir
+#define _rmdir rmdir
+#define _unlink unlink
+#else
+#include <direct.h>
+#endif
+
+int test_execute_after_included_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, simple_program;
+
+    log_info("Testing execution after included header link...\n");
+    // we will test execution after compiling and linking with included headers
+    program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    /* setup */
+#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ ))
+    /* Some tests systems doesn't allow one to write in the test directory */
+    if (_chdir("/tmp") != 0) {
+        log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+#endif
+    if (_mkdir("foo") != 0) {
+        log_error( "ERROR: Unable to create directory foo! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    if (_mkdir("foo/bar") != 0) {
+        log_error( "ERROR: Unable to create directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    if (_chdir("foo/bar") != 0) {
+        log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    FILE* simple_header_file = fopen(simple_header_name, "w");
+    if (simple_header_file == NULL) {
+        log_error( "ERROR: Unable to create simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ );
+        return -1;
+    }
+    if (fprintf(simple_header_file, "%s", simple_header) < 0) {
+        log_error( "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__);
+        return -1;
+    }
+    if (fclose(simple_header_file) != 0) {
+        log_error( "ERROR: Unable to close simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__);
+        return -1;
+    }
+    if (_chdir("../..") != 0) {
+        log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__);
+        return -1;
+    }
+#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ ))
+    error = clCompileProgram(program, 1, &deviceID, "-I/tmp/foo/bar", 0, NULL, NULL, NULL, NULL);
+#else
+    error = clCompileProgram(program, 1, &deviceID, "-Ifoo/bar", 0, NULL, NULL, NULL, NULL);
+#endif
+    test_error( error, "Unable to compile a simple program with included header" );
+
+    /* cleanup */
+    if (_chdir("foo/bar") != 0) {
+        log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    if (_unlink(simple_header_name) != 0) {
+        log_error( "ERROR: Unable to remove simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ );
+        return -1;
+    }
+    if (_chdir("../..") != 0) {
+        log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    if (_rmdir("foo/bar") != 0) {
+        log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    if (_rmdir("foo") != 0) {
+        log_error( "ERROR: Unable to remove directory foo! (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+
+    simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error);
+    if( simple_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program two_programs[2] = { program, simple_program };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" );
+
+    cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+    test_error( error, "Unable to create another simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, another_kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( simple_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_program_binary_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+    cl_program program, another_program, program_with_binary, fully_linked_program_with_binary;
+    cl_program_binary_type program_type = -1;
+    size_t size;
+    size_t            binarySize;
+    unsigned char *binary;
+
+    log_info("Testing querying of program binary type...\n");
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+  error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
+    test_error( error, "Unable to get program binary type" );
+    if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT)
+    {
+        log_error( "ERROR: Expected program type of a just compiled program to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    program_type = -1;
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    {
+        binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+        if (binary == NULL) {
+            log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ );
+            return -1;
+        }
+        unsigned char *buffers[ 1 ] = { binary };
+        cl_int loadErrors[ 1 ];
+
+        // Do another sanity check here first
+        size_t size;
+        error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
+        test_error( error, "Unable to get expected size of binaries array" );
+        if( size != sizeof( buffers ) )
+        {
+            log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+            free(binary);
+            return -1;
+        }
+
+        error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+        test_error( error, "Unable to get program binary" );
+
+        // use clCreateProgramWithBinary
+        program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
+        test_error( error, "Unable to create program with binary" );
+
+        error = clGetProgramBuildInfo (program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
+        test_error( error, "Unable to get program binary type" );
+        if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT)
+        {
+            log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ );
+            return -1;
+        }
+        program_type = -1;
+        free(binary);
+    }
+
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program_with_binary, NULL, NULL, &error);
+  test_error( error, "Unable to create a simple library" );
+  error = clGetProgramBuildInfo (my_newly_minted_library, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
+    test_error( error, "Unable to get program binary type" );
+    if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY)
+    {
+        log_error( "ERROR: Expected program type of a just linked library to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    program_type = -1;
+
+    // Get the size of the resulting library (only one device)
+    error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+    if (binary == NULL) {
+        log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__);
+        return -1;
+    }
+
+    unsigned char *buffers[ 1 ] = { binary };
+    cl_int loadErrors[ 1 ];
+
+    // Do another sanity check here first
+    error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size );
+    test_error( error, "Unable to get expected size of binaries array" );
+    if( size != sizeof( buffers ) )
+    {
+        log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+        free(binary);
+        return -1;
+    }
+
+    error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+    test_error( error, "Unable to get program binary" );
+
+    // use clCreateProgramWithBinary
+    cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
+    test_error( error, "Unable to create program with binary" );
+  error = clGetProgramBuildInfo (library_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
+    test_error( error, "Unable to get program binary type" );
+    if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY)
+    {
+        log_error( "ERROR: Expected program type of a library loaded with binary to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    program_type = -1;
+  free(binary);
+
+    error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
+    if( another_program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+    test_error( error, "Unable to compile a simple program" );
+
+    cl_program program_and_archive[2] = { another_program, library_with_binary };
+    cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+    test_error( error, "Unable to create an executable from a binary and a library" );
+
+  error = clGetProgramBuildInfo (fully_linked_program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
+    test_error( error, "Unable to get program binary type" );
+    if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
+    {
+        log_error( "ERROR: Expected program type of a newly build executable to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    program_type = -1;
+
+    // Get the size of the resulting binary (only one device)
+    error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
+    test_error( error, "Unable to get binary size" );
+
+    // Sanity check
+    if( binarySize == 0 )
+    {
+        log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+
+    // Create a buffer and get the actual binary
+    {
+        binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
+        if (binary == NULL) {
+            log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ );
+            return -1;
+        }
+        unsigned char *buffers[ 1 ] = { binary };
+        cl_int loadErrors[ 1 ];
+
+        // Do another sanity check here first
+        size_t size;
+        error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, 0, NULL, &size );
+        test_error( error, "Unable to get expected size of binaries array" );
+        if( size != sizeof( buffers ) )
+        {
+            log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+            free(binary);
+            return -1;
+        }
+
+        error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
+        test_error( error, "Unable to get program binary" );
+
+        // use clCreateProgramWithBinary
+        fully_linked_program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
+        test_error( error, "Unable to create program with binary" );
+
+    error = clGetProgramBuildInfo (fully_linked_program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
+        test_error( error, "Unable to get program binary type" );
+        if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
+        {
+            log_error( "ERROR: Expected program type of a program created from a fully linked executable binary to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ );
+            return -1;
+        }
+        program_type = -1;
+        free(binary);
+    }
+
+    error = clBuildProgram(fully_linked_program_with_binary, 1, &deviceID, NULL, NULL, NULL);
+        test_error( error, "Unable to build a simple program" );
+
+    cl_kernel kernel = clCreateKernel(fully_linked_program_with_binary, "CopyBuffer", &error);
+    test_error( error, "Unable to create a simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    cl_kernel another_kernel = clCreateKernel(fully_linked_program_with_binary, "AnotherCopyBuffer", &error);
+    test_error( error, "Unable to create another simple kernel" );
+
+    error = verifyCopyBuffer(context, queue, another_kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    /* Oh, one more thing. Steve Jobs and apparently Herb Sutter. The question is "Who is copying whom?" */
+    error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
+    test_error( error, "Unable to build a simple program" );
+  error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
+    test_error( error, "Unable to get program binary type" );
+    if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
+    {
+        log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ );
+        return -1;
+    }
+    program_type = -1;
+
+    /* All's well that ends well. William Shakespeare */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( another_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( library_with_binary );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( fully_linked_program_with_binary );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( program_with_binary );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+volatile int       compileNotificationSent;
+
+void CL_CALLBACK test_notify_compile_complete( cl_program program, void *userData )
+{
+    if( userData == NULL || strcmp( (char *)userData, "compilation" ) != 0 )
+    {
+        log_error( "ERROR: User data passed in to compile notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ );
+        compileNotificationSent = -1;
+    }
+    else
+        compileNotificationSent = 1;
+    log_info( "\n   <-- program successfully compiled\n" );
+}
+
+volatile int       libraryCreationNotificationSent;
+
+void CL_CALLBACK test_notify_create_library_complete( cl_program program, void *userData )
+{
+    if( userData == NULL || strcmp( (char *)userData, "create library" ) != 0 )
+    {
+        log_error( "ERROR: User data passed in to library creation notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ );
+        libraryCreationNotificationSent = -1;
+    }
+    else
+        libraryCreationNotificationSent = 1;
+    log_info( "\n   <-- library successfully created\n" );
+}
+
+volatile int       linkNotificationSent;
+
+void CL_CALLBACK test_notify_link_complete( cl_program program, void *userData )
+{
+    if( userData == NULL || strcmp( (char *)userData, "linking" ) != 0 )
+    {
+        log_error( "ERROR: User data passed in to link notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ );
+        linkNotificationSent = -1;
+    }
+    else
+        linkNotificationSent = 1;
+    log_info( "\n   <-- program successfully linked\n" );
+}
+
+int test_large_compile_and_link_status_options_log(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+{
+    int error;
+    cl_program program;
+    cl_program * simple_kernels;
+    const char **lines;
+    unsigned int i;
+    char buffer[MAX_LINE_SIZE_IN_PROGRAM];
+    char *compile_log;
+    char *compile_options;
+    char *library_log;
+    char *library_options;
+    char *linking_log;
+    char *linking_options;
+    cl_build_status status;
+    size_t size_ret;
+
+    compileNotificationSent = libraryCreationNotificationSent = linkNotificationSent = 0;
+
+    simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
+    if (simple_kernels == NULL) {
+        log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+        return -1;
+    }
+    /* First, allocate the array for our line pointers */
+    lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
+    if (lines == NULL) {
+        log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__);
+        return -1;
+    }
+
+    for(i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, composite_kernel_extern_template, i);
+        lines[i] = _strdup(buffer);
+    }
+    /* First and last lines are easy */
+    lines[ numLines ] = composite_kernel_start;
+    lines[ 2*numLines + 1] = composite_kernel_end;
+
+    /* Fill the rest with templated kernels */
+    for(i = numLines + 1; i < 2*numLines + 1; i++ )
+    {
+        sprintf(buffer, composite_kernel_template, i - numLines - 1);
+        lines[ i ] = _strdup(buffer);
+    }
+
+    /* Try to create a program with these lines */
+    error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines);
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+        return -1;
+    }
+
+    /* Lets check that the compilation status is CL_BUILD_NONE */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get program compile status" );
+    if (status != CL_BUILD_NONE)
+    {
+        log_error( "ERROR: Expected compile status to be CL_BUILD_NONE prior to the beginning of the compilation! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    /* Compile it */
+    error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, test_notify_compile_complete, (void *)"compilation");
+    test_error( error, "Unable to compile a simple program" );
+
+    /* Wait for compile to complete (just keep polling, since we're just a test */
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get program compile status" );
+
+    while( (int)status == CL_BUILD_IN_PROGRESS )
+    {
+        log_info( "\n  -- still waiting for compile... (status is %d)", status );
+        sleep( 1 );
+        error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+        test_error( error, "Unable to get program compile status" );
+    }
+    if( status != CL_BUILD_SUCCESS )
+    {
+        log_error( "ERROR: compile failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+        return -1;
+    }
+
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret );
+    test_error( error, "Device failed to return compile log size" );
+    compile_log = (char *)malloc(size_ret);
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, compile_log, NULL );
+    if (error != CL_SUCCESS){
+        log_error("Device failed to return a compile log (in %s:%d)\n", __FILE__, __LINE__);
+        test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+    }
+    log_info("BUILD LOG: %s\n", compile_log);
+    free(compile_log);
+
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret );
+    test_error(error, "Device failed to return compile options size");
+    compile_options = (char *)malloc(size_ret);
+    error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, compile_options, NULL );
+    test_error(error, "Device failed to return compile options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed");
+
+    log_info("BUILD OPTIONS: %s\n", compile_options);
+    free(compile_options);
+
+    /* Create and compile templated kernels */
+    for( i = 0; i < numLines; i++)
+    {
+        sprintf(buffer, simple_kernel_template, i);
+        const char* kernel_source = _strdup(buffer);
+        error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
+        if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+            return -1;
+        }
+
+        /* Compile it */
+        error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
+        test_error( error, "Unable to compile a simple program" );
+
+        free((void*)kernel_source);
+    }
+
+    /* Create library out of compiled templated kernels */
+    cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, test_notify_create_library_complete, (void *)"create library", &error);
+    test_error( error, "Unable to create a multi-line library" );
+
+    /* Wait for library creation to complete (just keep polling, since we're just a test */
+    error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get library creation link status" );
+
+    while( (int)status == CL_BUILD_IN_PROGRESS )
+    {
+        log_info( "\n  -- still waiting for library creation... (status is %d)", status );
+        sleep( 1 );
+        error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+        test_error( error, "Unable to get library creation link status" );
+    }
+    if( status != CL_BUILD_SUCCESS )
+    {
+        log_error( "ERROR: library creation failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+        return -1;
+    }
+    error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret );
+    test_error( error, "Device failed to return a library creation log size" );
+    library_log = (char *)malloc(size_ret);
+    error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, library_log, NULL );
+    if (error != CL_SUCCESS) {
+        log_error("Device failed to return a library creation log (in %s:%d)\n", __FILE__, __LINE__);
+        test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+    }
+    log_info("CREATE LIBRARY LOG: %s\n", library_log);
+    free(library_log);
+
+    error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret );
+    test_error(error, "Device failed to return library creation options size");
+    library_options = (char *)malloc(size_ret);
+    error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, library_options, NULL );
+    test_error(error, "Device failed to return library creation options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed");
+
+    log_info("CREATE LIBRARY OPTIONS: %s\n", library_options);
+    free(library_options);
+
+    /* Link the program that calls the kernels and the library that contains them */
+    cl_program programs[2] = { program, my_newly_minted_library };
+    cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, test_notify_link_complete, (void *)"linking", &error);
+    test_error( error, "Unable to link a program with a library" );
+
+    /* Wait for linking to complete (just keep polling, since we're just a test */
+    error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get program link status" );
+
+    while( (int)status == CL_BUILD_IN_PROGRESS )
+    {
+        log_info( "\n  -- still waiting for program linking... (status is %d)", status );
+        sleep( 1 );
+        error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
+        test_error( error, "Unable to get program link status" );
+    }
+    if( status != CL_BUILD_SUCCESS )
+    {
+        log_error( "ERROR: program linking failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+        return -1;
+    }
+    error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret );
+    test_error( error, "Device failed to return a linking log size" );
+    linking_log = (char *)malloc(size_ret);
+    error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, linking_log, NULL );
+    if (error != CL_SUCCESS){
+        log_error("Device failed to return a linking log (in %s:%d).\n", __FILE__, __LINE__);
+        test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+    }
+    log_info("BUILDING LOG: %s\n", linking_log);
+    free(linking_log);
+
+    error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret );
+    test_error(error, "Device failed to return linking options size");
+    linking_options = (char *)malloc(size_ret);
+    error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, linking_options, NULL );
+    test_error(error, "Device failed to return linking options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed");
+
+    log_info("BUILDING OPTIONS: %s\n", linking_options);
+    free(linking_options);
+
+    // Create the composite kernel
+    cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+    test_error( error, "Unable to create a composite kernel" );
+
+    // Run the composite kernel and verify the results
+    error = verifyCopyBuffer(context, queue, kernel);
+    if (error != CL_SUCCESS)
+        return error;
+
+    /* All done! */
+    error = clReleaseProgram( program );
+    test_error( error, "Unable to release program object" );
+
+    for(i = 0; i < numLines; i++)
+    {
+        free( (void*)lines[i] );
+        free( (void*)lines[i+numLines+1] );
+    }
+    free( lines );
+
+    for(i = 0; i < numLines; i++)
+    {
+        error = clReleaseProgram( simple_kernels[i] );
+        test_error( error, "Unable to release program object" );
+    }
+    free( simple_kernels );
+
+    error = clReleaseProgram( my_newly_minted_library );
+    test_error( error, "Unable to release program object" );
+
+    error = clReleaseProgram( my_newly_linked_program );
+    test_error( error, "Unable to release program object" );
+
+    return 0;
+}
+
+int test_compile_and_link_status_options_log(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    unsigned int toTest[] = { 256, 0 }; //512, 1024, 8192, 16384, 32768, 0 };
+    unsigned int i;
+
+    log_info( "Testing Compile and Link Status, Options and Logging ...this might take awhile...\n" );
+
+    for( i = 0; toTest[ i ] != 0; i++ )
+    {
+        log_info( "   %d...\n", toTest[ i ] );
+
+#if defined(_WIN32)
+        clock_t start = clock();
+#elif  defined(__linux__) || defined(__APPLE__)
+    timeval time1, time2;
+    gettimeofday(&time1, NULL);
+#endif
+
+        if( test_large_compile_and_link_status_options_log( context, deviceID, queue, toTest[ i ] ) != 0 )
+        {
+            log_error( "ERROR: large program compilation, linking, status, options and logging test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ );
+            return -1;
+        }
+
+#if defined(_WIN32)
+        clock_t end = clock();
+    log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
+#elif  defined(__linux__) || defined(__APPLE__)
+    gettimeofday(&time2, NULL);
+    log_perf( (float)(float)(time2.tv_sec  - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+#endif
+    }
+
+    return 0;
+}
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
new file mode 100644
index 00000000..a262d019
--- /dev/null
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -0,0 +1,420 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include <limits.h>
+#include <ctype.h>
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+
+
+
+const char *known_extensions[] = {
+    "cl_khr_fp64",
+    "cl_khr_global_int32_base_atomics",
+    "cl_khr_global_int32_extended_atomics",
+    "cl_khr_local_int32_base_atomics",
+    "cl_khr_local_int32_extended_atomics",
+    "cl_khr_int64_base_atomics",
+    "cl_khr_int64_extended_atomics",
+    "cl_khr_3d_image_writes",
+    "cl_khr_byte_addressable_store",
+    "cl_khr_fp16",
+    "cl_khr_gl_sharing",
+    "cl_khr_gl_event",
+    "cl_khr_d3d10_sharing",
+    "cl_khr_d3d11_sharing",
+    "cl_khr_icd",
+    "cl_khr_dx9_media_sharing",
+    "cl_khr_depth_images",
+    "cl_khr_gl_depth_images",
+    "cl_khr_gl_msaa_sharing",
+    "cl_khr_image2d_from_buffer",
+    "cl_khr_initialize_memory",
+    "cl_khr_terminate_context",
+    "cl_khr_spir",
+    "cl_khr_srgb_image_writes",
+    "cl_khr_subgroups",
+    "cl_khr_mipmap_image",
+    "cl_khr_mipmap_image_writes",
+    "cl_khr_egl_image",
+    "cl_khr_egl_event",
+    "cl_khr_throttle_hints",
+    "cl_khr_priority_hints",
+};
+
+size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
+size_t first_API_extension = 10;
+
+const char *known_embedded_extensions[] = {
+    "cles_khr_int64",
+    NULL
+};
+
+typedef enum
+{
+    kUnsupported_extension = -1,
+    kVendor_extension = 0,
+    kLanguage_extension = 1,
+    kAPI_extension = 2
+}Extension_Type;
+
+const char *kernel_strings[] = {
+    "kernel void test(global int *defines)\n{\n",
+    "#pragma OPENCL EXTENSION %s : enable\n",
+    "#ifdef %s\n"
+    "  defines[%d] = 1;\n"
+    "#else\n"
+    "  defines[%d] = 0;\n"
+    "#endif\n",
+    "#pragma OPENCL EXTENSION %s : disable\n\n",
+    "}\n"
+};
+
+int test_compiler_defines_for_extensions(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
+{
+
+    int error;
+    int total_errors = 0;
+
+
+    // Get the extensions string for the device
+    size_t size;
+    error = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS size failed");
+
+    char *extensions = (char*)malloc(sizeof(char)*(size + 1));
+    if (extensions == 0) {
+        log_error("Failed to allocate memory for extensions string.\n");
+        return -1;
+    }
+    memset( extensions, CHAR_MIN, sizeof(char)*(size+1) );
+
+    error = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, sizeof(char)*size, extensions, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+
+    // Check to make sure the extension string is NUL terminated.
+    if( extensions[size] != CHAR_MIN )
+    {
+        test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS wrote past the end of the array!" );
+        return -1;
+    }
+    extensions[size] = '\0';    // set last char to NUL to avoid problems with string functions later
+
+    // test for termination with '\0'
+    size_t stringSize = strlen( extensions );
+    if( stringSize == size )
+    {
+        test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS is not NUL terminated!" );
+        return -1;
+    }
+
+    // Break up the extensions
+    log_info("Device reports the following extensions:\n");
+    char *extensions_supported[1024];
+    Extension_Type extension_type[1024];
+    int num_of_supported_extensions = 0;
+    char *currentP = extensions;
+
+    memset( extension_type, 0, sizeof( extension_type) );
+
+    // loop over extension string
+    while (currentP != extensions + stringSize)
+    {
+        // skip leading white space
+        while( *currentP == ' ' )
+            currentP++;
+
+        // Exit if end of string
+        if( *currentP == '\0' )
+        {
+            if( currentP != extensions + stringSize)
+            {
+                test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS contains a NUL in the middle of the string!" );
+                return -1;
+            }
+            break;
+        }
+
+        // Not space, not end of string, so extension
+        char *start = currentP;             // start of extension name
+
+        // loop looking for the end
+        while (*currentP != ' ' && currentP != extensions + stringSize)
+        {
+            // check for non-space white space in the extension name
+            if( isspace(*currentP) )
+            {
+                test_error( -1, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS contains a non-space whitespace in an extension name!" );
+                return -1;
+            }
+            currentP++;
+        }
+
+        // record the extension name
+        uintptr_t extension_length = (uintptr_t) currentP - (uintptr_t) start;
+        extensions_supported[ num_of_supported_extensions ] = (char*) malloc( (extension_length + 1) * sizeof( char ) );
+        if( NULL == extensions_supported[ num_of_supported_extensions ] )
+        {
+            log_error( "Error: unable to allocate memory to hold extension name: %ld chars\n", extension_length );
+            return -1;
+        }
+        memcpy( extensions_supported[ num_of_supported_extensions ], start, extension_length * sizeof( char ) );
+        extensions_supported[ num_of_supported_extensions ][extension_length] = '\0';
+
+        // If the extension is a cl_khr extension, make sure it is an approved cl_khr extension -- looking for misspellings here
+        if( extensions_supported[ num_of_supported_extensions ][0] == 'c'  &&
+            extensions_supported[ num_of_supported_extensions ][1] == 'l'  &&
+            extensions_supported[ num_of_supported_extensions ][2] == '_'  &&
+            extensions_supported[ num_of_supported_extensions ][3] == 'k'  &&
+            extensions_supported[ num_of_supported_extensions ][4] == 'h'  &&
+            extensions_supported[ num_of_supported_extensions ][5] == 'r'  &&
+            extensions_supported[ num_of_supported_extensions ][6] == '_' )
+        {
+            size_t ii;
+            for( ii = 0; ii < num_known_extensions; ii++ )
+            {
+                if( 0 == strcmp( known_extensions[ii], extensions_supported[ num_of_supported_extensions ] ) )
+                    break;
+            }
+            if( ii == num_known_extensions )
+            {
+                log_error( "FAIL: Extension %s is not in the list of approved Khronos extensions!", extensions_supported[ num_of_supported_extensions ] );
+                return -1;
+            }
+        }
+        // Is it an embedded extension?
+        else if( memcmp( extensions_supported[ num_of_supported_extensions ], "cles_khr_", 9 ) == 0 )
+        {
+            // Yes, but is it a known one?
+            size_t ii;
+            for( ii = 0; known_embedded_extensions[ ii ] != NULL; ii++ )
+            {
+                if( strcmp( known_embedded_extensions[ ii ], extensions_supported[ num_of_supported_extensions ] ) == 0 )
+                    break;
+            }
+            if( known_embedded_extensions[ ii ] == NULL )
+            {
+                log_error( "FAIL: Extension %s is not in the list of approved Khronos embedded extensions!", extensions_supported[ num_of_supported_extensions ] );
+                return -1;
+            }
+
+            // It's approved, but are we even an embedded system?
+            char profileStr[128] = "";
+            error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL );
+            test_error( error, "Unable to get CL_DEVICE_PROFILE to validate embedded extension name" );
+
+            if( strcmp( profileStr, "EMBEDDED_PROFILE" ) != 0 )
+            {
+                log_error( "FAIL: Extension %s is an approved embedded extension, but on a non-embedded profile!", extensions_supported[ num_of_supported_extensions ] );
+                return -1;
+            }
+        }
+        else
+        { // All other extensions must be of the form cl_<vendor_name>_<name>
+            if( extensions_supported[ num_of_supported_extensions ][0] != 'c'  ||
+                extensions_supported[ num_of_supported_extensions ][1] != 'l'  ||
+                extensions_supported[ num_of_supported_extensions ][2] != '_' )
+            {
+                log_error( "FAIL:  Extension %s doesn't start with \"cl_\"!", extensions_supported[ num_of_supported_extensions ] );
+                return -1;
+            }
+
+            if( extensions_supported[ num_of_supported_extensions ][3] == '_' || extensions_supported[ num_of_supported_extensions ][3] == '\0' )
+            {
+                log_error( "FAIL:  Vendor name is missing in extension %s!", extensions_supported[ num_of_supported_extensions ] );
+                return -1;
+            }
+
+            // look for the second underscore for name
+            char *p = extensions_supported[ num_of_supported_extensions ] + 4;
+            while( *p != '\0' && *p != '_' )
+                p++;
+
+            if( *p != '_' || p[1] == '\0')
+            {
+                log_error( "FAIL:  extension name is missing in extension %s!", extensions_supported[ num_of_supported_extensions ] );
+                return -1;
+            }
+        }
+
+
+        num_of_supported_extensions++;
+    }
+
+    // Build a list of the known extensions that are not supported by the device
+    char *extensions_not_supported[1024];
+    int num_not_supported_extensions = 0;
+    for( int i = 0; i < num_of_supported_extensions; i++ )
+    {
+        int is_supported = 0;
+        for( size_t j = 0; j < num_known_extensions; j++ )
+            {
+            if( strcmp( extensions_supported[ i ], known_extensions[ j ] ) == 0 )
+            {
+                extension_type[ i ] = ( j < first_API_extension ) ? kLanguage_extension : kAPI_extension;
+                is_supported = 1;
+                break;
+            }
+        }
+        if( !is_supported )
+        {
+            for( int j = 0; known_embedded_extensions[ j ] != NULL; j++ )
+            {
+                if( strcmp( extensions_supported[ i ], known_embedded_extensions[ j ] ) == 0 )
+                {
+                    extension_type[ i ] = kLanguage_extension;
+                    is_supported = 1;
+                    break;
+                }
+            }
+        }
+        if (!is_supported) {
+            extensions_not_supported[num_not_supported_extensions] = (char*)malloc(strlen(extensions_supported[i])+1);
+            strcpy(extensions_not_supported[num_not_supported_extensions], extensions_supported[i]);
+            num_not_supported_extensions++;
+        }
+    }
+
+    for (int i=0; i<num_of_supported_extensions; i++) {
+        log_info("%40s -- Supported\n", extensions_supported[i]);
+    }
+    for (int i=0; i<num_not_supported_extensions; i++) {
+        log_info("%40s -- Not Supported\n", extensions_not_supported[i]);
+    }
+
+    // Build the kernel
+    char *kernel_code = (char*)malloc(1025*256*(num_not_supported_extensions+num_of_supported_extensions));
+    memset(kernel_code, 0, 1025*256*(num_not_supported_extensions+num_of_supported_extensions));
+
+    int i, index = 0;
+    strcat(kernel_code, kernel_strings[0]);
+    for (i=0; i<num_of_supported_extensions; i++, index++) {
+
+        if (extension_type[i] == kLanguage_extension)
+            sprintf(kernel_code + strlen(kernel_code), kernel_strings[1], extensions_supported[i]);
+
+        sprintf(kernel_code + strlen(kernel_code), kernel_strings[2], extensions_supported[i], index, index );
+
+        if (extension_type[i] == kLanguage_extension)
+            sprintf(kernel_code + strlen(kernel_code), kernel_strings[3], extensions_supported[i] );
+    }
+    for ( i = 0; i<num_not_supported_extensions; i++, index++) {
+        sprintf(kernel_code + strlen(kernel_code), kernel_strings[2], extensions_not_supported[i], index, index );
+    }
+    strcat(kernel_code, kernel_strings[4]);
+
+    // Now we need to execute the kernel
+    cl_mem defines;
+    cl_int *data;
+    cl_program program;
+    cl_kernel kernel;
+
+    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
+    test_error(error, "create_single_kernel_helper failed");
+
+    data = (cl_int*)malloc(sizeof(cl_int)*(num_not_supported_extensions+num_of_supported_extensions));
+    memset(data, 0, sizeof(cl_int)*(num_not_supported_extensions+num_of_supported_extensions));
+    defines = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                             sizeof(cl_int)*(num_not_supported_extensions+num_of_supported_extensions), data, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    error = clSetKernelArg(kernel, 0, sizeof(defines), &defines);
+    test_error(error, "clSetKernelArg failed");
+
+    size_t global_size = 1;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    error = clEnqueueReadBuffer(queue, defines, CL_TRUE, 0, sizeof(cl_int)*(num_not_supported_extensions+num_of_supported_extensions),
+                                data, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    // Report what the compiler reported
+    log_info("\nCompiler reported the following extensions defined in the OpenCL C kernel environment:\n");
+    index = 0;
+    int total_supported = 0;
+    for (int i=0; i<num_of_supported_extensions; i++, index++) {
+        if (data[index] == 1) {
+            log_info("\t%s\n", extensions_supported[i]);
+            total_supported++;
+        }
+    }
+    for (int i=0; i<num_not_supported_extensions; i++, index++) {
+        if (data[index] == 1) {
+            log_info("\t%s\n", extensions_not_supported[i]);
+            total_supported++;
+        }
+    }
+    if (total_supported == 0)
+        log_info("\t(none)\n");
+
+    // Count the errors
+    index = 0;
+    int unknown = 0;
+    for ( i=0; i<num_of_supported_extensions; i++)
+    {
+        if (data[i] != 1)
+        {
+            switch( extension_type[i] )
+            {
+                case kLanguage_extension:
+                    log_error("ERROR: Supported extension %s not defined in kernel.\n", extensions_supported[i]);
+                    total_errors++;
+                    break;
+                case kVendor_extension:
+                    unknown++;
+                    break;
+                case kAPI_extension:
+                    break;
+                default:
+                    log_error( "ERROR: internal test error in extension detection.  This is probably a bug in the test.\n" );
+                    break;
+            }
+        }
+    }
+
+    if(unknown)
+    {
+        log_info( "\nThe following non-KHR extensions are supported but do not add a preprocessor symbol to OpenCL C.\n" );
+        for (int z=0; z<num_of_supported_extensions; z++)
+        {
+            if (data[z] != 1 && extension_type[z] == kVendor_extension )
+                log_info( "\t%s\n", extensions_supported[z]);
+        }
+    }
+
+    for ( ; i<num_not_supported_extensions; i++) {
+        if (data[i] != 0) {
+            log_error("ERROR: Unsupported extension %s is defined in kernel.\n", extensions_not_supported[i]);
+            total_errors++;
+        }
+    }
+    log_info("\n");
+
+    // cleanup
+    free(data);
+    free(kernel_code);
+    for(i=0; i<num_of_supported_extensions; i++) {
+      free(extensions_supported[i]);
+    }
+    free(extensions);
+
+    if (total_errors)
+        return -1;
+    return 0;
+}
diff --git a/test_conformance/compiler/test_image_macro.c b/test_conformance/compiler/test_image_macro.c
new file mode 100644
index 00000000..329a017c
--- /dev/null
+++ b/test_conformance/compiler/test_image_macro.c
@@ -0,0 +1,95 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#if (defined( __APPLE__ ) || defined( __linux__ ))
+#include <unistd.h>
+#endif
+
+const char * image_supported_source = "kernel void enabled(global int * buf) { \r\n" \
+"int n = get_global_id(0); \r\n"\
+"buf[n] = 0; \r\n "\
+"#ifndef __IMAGE_SUPPORT__ \r\n" \
+"ERROR; \r\n"\
+"#endif \r\n"\
+"\r\n } \r\n";
+
+
+const char * image_not_supported_source = "kernel void not_enabled(global int * buf) { \r\n" \
+"int n = get_global_id(0); \r\n"\
+"buf[n] = 0; \r\n "\
+"#ifdef __IMAGE_SUPPORT__ \r\n" \
+"ERROR; \r\n"\
+"#endif \r\n"\
+"\r\n } \r\n";
+
+
+int test_image_macro(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_bool image_support;
+    char buf[256];
+    int status;
+    cl_program program;
+
+    status = clGetDeviceInfo( deviceID, CL_DEVICE_NAME, sizeof( buf ), buf, NULL );
+    if( status )
+    {
+      log_error( "getting device info (name): %d\n", status );
+      exit(-1);
+    }
+
+    status = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof( image_support ), &image_support, NULL );
+    if( status )
+    {
+      log_error( "getting device info (image support): %d\n", status );
+      return status;
+    }
+
+    if( (image_support == CL_TRUE) )
+    {
+        status = create_single_kernel_helper_create_program(context, &program, 1, (const char**)&image_supported_source);
+
+        if( status )
+        {
+            log_error ("Failure creating program, [%d] \n", status );
+            return status;
+        }
+
+        status = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+        if( status )
+            log_error("CL_DEVICE_IMAGE_SUPPORT is set, __IMAGE_SUPPORT__ macro not set \n");
+        else
+            log_info("CL_DEVICE_IMAGE_SUPPORT is set, __IMAGE_SUPPORT__ macro is set \n");
+    }
+    else
+    {
+        status = create_single_kernel_helper_create_program(context, &program, 1, (const char**)&image_not_supported_source);
+        if( status )
+        {
+            log_error ("Failure creating program, [%d] \n", status );
+            return status;
+        }
+
+        status = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
+        if( status )
+            log_error("CL_DEVICE_IMAGE_SUPPORT not set, __IMAGE_SUPPORT__ macro is set \n");
+        else
+            log_info("CL_DEVICE_IMAGE_SUPPORT not set, __IMAGE_SUPPORT__ macro not set \n");
+    }
+
+    clReleaseProgram( program );
+    return status;
+}
+
diff --git a/test_conformance/compiler/test_pragma_unroll.c b/test_conformance/compiler/test_pragma_unroll.c
new file mode 100644
index 00000000..3a5cbf90
--- /dev/null
+++ b/test_conformance/compiler/test_pragma_unroll.c
@@ -0,0 +1,290 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include <vector>
+
+const char *pragma_unroll_kernels[] = {
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" __attribute__((opencl_unroll_hint))\n"
+" for(size_t i = 0; i < 100; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" __attribute__((opencl_unroll_hint(1)))\n"
+" for(size_t i = 0; i < 100; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" __attribute__((opencl_unroll_hint(10)))\n"
+" for(size_t i = 0; i < 100; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" __attribute__((opencl_unroll_hint(100)))\n"
+" for(size_t i = 0; i < 100; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" __attribute__((opencl_unroll_hint))\n"
+" for(size_t i = 0; i < n; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" __attribute__((opencl_unroll_hint(1)))\n"
+" for(size_t i = 0; i < n; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" __attribute__((opencl_unroll_hint(10)))\n"
+" for(size_t i = 0; i < n; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" __attribute__((opencl_unroll_hint(100)))\n"
+" for(size_t i = 0; i < n; ++i)\n"
+"   dst[i] = i;\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint))\n"
+" while(i < 100) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(1)))\n"
+" while(i < 100) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(10)))\n"
+" while(i < 100) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(100)))\n"
+" while(i < 100) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint))\n"
+" while(i < n) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(1)))\n"
+" while(i < n) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(10)))\n"
+" while(i < n) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(100)))\n"
+" while(i < n) {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" }\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < 100);\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(1)))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < 100);\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(10)))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < 100);\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(100)))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < 100);\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < n);\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(1)))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < n);\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(10)))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < n);\n"
+"}\n",
+"__kernel void pragma_unroll(__global uint *dst)\n"
+"{\n"
+" size_t tid = get_global_id(0);\n"
+" size_t n = (tid + 1) * 100;\n"
+" size_t i = 0;\n"
+" __attribute__((opencl_unroll_hint(100)))\n"
+" do {\n"
+"   dst[i] = i;\n"
+"   ++i;\n"
+" } while(i < n);\n"
+"}\n",
+};
+
+int test_pragma_unroll(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+  const size_t ELEMENT_NUM = 100;
+  const size_t KERNEL_NUM = 24;
+
+  cl_int error;
+
+  //execute all kernels and check if the results are as expected
+  for (size_t kernelIdx = 0; kernelIdx < KERNEL_NUM; ++kernelIdx) {
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, (const char **)&pragma_unroll_kernels[kernelIdx], "pragma_unroll", "-cl-std=CL2.0" ) ) {
+      log_error("The program we attempted to compile was: \n%s\n", pragma_unroll_kernels[kernelIdx]);
+      return -1;
+    }
+
+    clMemWrapper buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, ELEMENT_NUM * sizeof(cl_uint), NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
+    test_error(error, "clSetKernelArg failed");
+
+    //only one thread should be enough to verify if kernel is fully functional
+    size_t workSize = 1;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &workSize, NULL, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    std::vector<cl_uint> results(ELEMENT_NUM, 0);
+    error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, ELEMENT_NUM * sizeof(cl_uint), &results[0], 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    for (size_t i = 0; i < ELEMENT_NUM; ++i) {
+      if (results[i] != i) {
+        log_error("Kernel %d returned invalid result. Test: %d, expected: %d\n", kernelIdx + 1, results[i], i);
+        return -1;
+      }
+    }
+  }
+
+  return 0;
+}
diff --git a/test_conformance/compiler/test_preprocessor.c b/test_conformance/compiler/test_preprocessor.c
new file mode 100644
index 00000000..4f3277c6
--- /dev/null
+++ b/test_conformance/compiler/test_preprocessor.c
@@ -0,0 +1,342 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/os_helpers.h"
+
+extern cl_uint gRandomSeed;
+
+const char *define_kernel_code[] = {
+" #define VALUE\n"
+"__kernel void define_test(__global int *src, __global int *dstA, __global int *dstB)\n"
+"{\n"
+" int tid = get_global_id(0);\n"
+"#ifdef VALUE\n"
+" dstA[tid] = src[tid] * 2;\n"
+"#else\n"
+" dstA[tid] = src[tid] * 4;\n"
+"#endif\n"
+"\n"
+"#undef VALUE\n"
+"#ifdef VALUE\n"
+" dstB[tid] = src[tid] * 2;\n"
+"#else\n"
+" dstB[tid] = src[tid] * 4;\n"
+"#endif\n"
+"\n"
+"}\n"};
+
+
+
+
+int test_preprocessor_define_udef(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+
+  cl_int error;
+  clKernelWrapper kernel;
+  clProgramWrapper program;
+  clMemWrapper buffer[3];
+  cl_int *srcData, *resultData;
+  int i;
+  MTdata d;
+
+  error = create_single_kernel_helper(context, &program, &kernel, 1, define_kernel_code, "define_test");
+  if (error)
+    return -1;
+
+  buffer[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, num_elements*sizeof(cl_int), NULL, &error);
+  test_error( error, "clCreateBuffer failed");
+  buffer[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, num_elements*sizeof(cl_int), NULL, &error);
+  test_error( error, "clCreateBuffer failed");
+  buffer[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, num_elements*sizeof(cl_int), NULL, &error);
+  test_error( error, "clCreateBuffer failed");
+
+  srcData = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+  if (srcData == NULL) {
+    log_error("Failed to allocate storage for source data (%d cl_ints).\n", num_elements);
+    return -1;
+  }
+
+  d = init_genrand( gRandomSeed );
+  for (i=0; i<num_elements; i++)
+    srcData[i] = (int)get_random_float(-1024, 1024,d);
+  free_mtdata(d);   d = NULL;
+
+  resultData = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+  if (resultData == NULL) {
+    free(srcData);
+    log_error("Failed to allocate storage for result data (%d cl_ints).\n", num_elements);
+    return -1;
+  }
+
+  error = clSetKernelArg(kernel, 0, sizeof(buffer[0]), &buffer[0]);
+  test_error(error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel, 1, sizeof(buffer[1]), &buffer[1]);
+  test_error(error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel, 2, sizeof(buffer[2]), &buffer[2]);
+  test_error(error, "clSetKernelArg failed");
+
+
+  error = clEnqueueWriteBuffer(queue, buffer[0], CL_TRUE, 0, num_elements*sizeof(cl_int), srcData, 0, NULL, NULL);
+  test_error(error, "clEnqueueWriteBuffer failed");
+
+  size_t threads[3] = {num_elements, 0, 0};
+  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(error, "clEnqueueNDRangeKernel failed");
+
+  error = clEnqueueReadBuffer(queue, buffer[1], CL_TRUE, 0, num_elements*sizeof(cl_int), resultData, 0, NULL, NULL);
+  test_error(error, "clEnqueueReadBuffer failed");
+
+  for (i=0; i<num_elements; i++)
+    if (resultData[i] != srcData[i]*2) {
+      free(srcData);
+      free(resultData);
+      return -1;
+    }
+
+  error = clEnqueueReadBuffer(queue, buffer[2], CL_TRUE, 0, num_elements*sizeof(cl_int), resultData, 0, NULL, NULL);
+  test_error(error, "clEnqueueReadBuffer failed");
+
+  for (i=0; i<num_elements; i++)
+    if (resultData[i] != srcData[i]*4) {
+      free(srcData);
+      free(resultData);
+      return -1;
+    }
+
+  free(srcData);
+  free(resultData);
+  return 0;
+}
+
+
+const char *include_kernel_code =
+"#include \"%s\"\n"
+"__kernel void include_test(__global int *src, __global int *dstA)\n"
+"{\n"
+" int tid = get_global_id(0);\n"
+"#ifdef HEADER_FOUND\n"
+" dstA[tid] = HEADER_FOUND;\n"
+"#else\n"
+" dstA[tid] = 0;\n"
+"#endif\n"
+"\n"
+"}\n";
+
+
+int test_preprocessor_include(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+
+  cl_int error;
+  clKernelWrapper kernel;
+  clProgramWrapper program;
+  clMemWrapper buffer[2];
+  cl_int *resultData;
+  int i;
+
+  char include_dir[4096] = {0};
+  char include_kernel[4096] = {0};
+
+  char const * sep  = get_dir_sep();
+  char const * path = get_exe_dir();
+
+  /* Build with the include directory defined */
+  sprintf(include_dir,"%s%sincludeTestDirectory%stestIncludeFile.h", path, sep, sep);
+  sprintf(include_kernel, include_kernel_code, include_dir);
+  free( (void *) sep );
+  free( (void *) path );
+
+  const char* test_kernel[] = { include_kernel, 0 };
+  error = create_single_kernel_helper(context, &program, &kernel, 1, test_kernel, "include_test");
+  if (error)
+    return -1;
+
+  buffer[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, num_elements*sizeof(cl_int), NULL, &error);
+  test_error( error, "clCreateBuffer failed");
+  buffer[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, num_elements*sizeof(cl_int), NULL, &error);
+  test_error( error, "clCreateBuffer failed");
+
+  resultData = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+  if (resultData == NULL) {
+    log_error("Failed to allocate storage for result data (%d cl_ints).\n", num_elements);
+    return -1;
+  }
+
+  error = clSetKernelArg(kernel, 0, sizeof(buffer[0]), &buffer[0]);
+  test_error(error, "clSetKernelArg failed");
+  error = clSetKernelArg(kernel, 1, sizeof(buffer[1]), &buffer[1]);
+  test_error(error, "clSetKernelArg failed");
+
+  size_t threads[3] = {num_elements, 0, 0};
+  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(error, "clEnqueueNDRangeKernel failed");
+
+  error = clEnqueueReadBuffer(queue, buffer[1], CL_TRUE, 0, num_elements*sizeof(cl_int), resultData, 0, NULL, NULL);
+  test_error(error, "clEnqueueReadBuffer failed");
+
+  for (i=0; i<num_elements; i++)
+    if (resultData[i] != 12) {
+      free(resultData);
+      return -1;
+    }
+
+  free(resultData);
+  return 0;
+}
+
+
+
+
+const char *line_error_kernel_code[] = {
+"__kernel void line_error_test(__global int *dstA)\n"
+"{\n"
+" int tid = get_global_id(0);\n"
+"#line 124 \"fictitious/file/name.c\" \n"
+"#error  some error\n"
+" dstA[tid] = tid;\n"
+"\n"
+"}\n"};
+
+
+int test_preprocessor_line_error(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+
+  cl_int error, error2;
+  clKernelWrapper kernel;
+  clProgramWrapper program;
+  clMemWrapper buffer[2];
+
+  char buildLog[ 1024 * 128 ];
+
+  log_info("test_preprocessor_line_error may report spurious ERRORS in the conformance log.\n");
+
+  /* Create the program object from source */
+  program = clCreateProgramWithSource( context, 1, line_error_kernel_code, NULL, &error );
+  test_error(error, "clCreateProgramWithSource failed");
+
+  /* Compile the program */
+  error2 = clBuildProgram( program, 0, NULL, NULL, NULL, NULL );
+  if (error2) {
+    log_info("Build error detected at clBuildProgram.");
+  } else {
+    log_info("Error not reported by clBuildProgram.\n");
+  }
+
+  cl_build_status status;
+  error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, NULL);
+  test_error(error, "clGetProgramBuildInfo failed for CL_PROGRAM_BUILD_STATUS");
+  if (status != CL_BUILD_ERROR) {
+    log_error("Build status did not return CL_BUILD_ERROR for a program with #error defined.\n");
+    return -1;
+  } else if (status == CL_BUILD_ERROR || error2) {
+        error2 = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, sizeof( buildLog ), buildLog, NULL );
+        test_error( error2, "Unable to get program build log" );
+
+    log_info("Build failed as expected with #error in source:\n");
+        log_info( "Build log is: ------------\n" );
+        log_info( "%s\n", buildLog );
+        log_info( "Original source is: ------------\n" );
+    log_info( "%s", line_error_kernel_code[0] );
+        log_info( "\n----------\n" );
+
+    if (strstr(buildLog, "fictitious/file/name.c")) {
+      log_info("Found file name from #line param in log output.\n");
+    } else {
+      log_info("WARNING: Did not find file name from #line param in log output.\n");
+    }
+
+    if (strstr(buildLog, "124")) {
+      log_info("Found line number from #line param in log output.\n");
+    } else {
+      log_info("WARNING: Did not find line number from #line param in log output.\n");
+    }
+
+    log_info("test_preprocessor_line_error PASSED.\n");
+        return 0;
+    }
+
+    /* And create a kernel from it */
+    kernel = clCreateKernel( program, "line_error_test", &error );
+  test_error(error, "clCreateKernel failed");
+
+  buffer[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, num_elements*sizeof(cl_int), NULL, &error);
+  test_error( error, "clCreateBuffer failed");
+
+  error = clSetKernelArg(kernel, 0, sizeof(buffer[0]), &buffer[0]);
+  test_error(error, "clSetKernelArg failed");
+
+  size_t threads[3] = {num_elements, 0, 0};
+  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(error, "clEnqueueNDRangeKernel failed");
+
+  log_error("Program built and ran with #error defined.");
+  return -1;
+}
+
+
+
+const char *pragma_kernel_code[] = {
+"__kernel void pragma_test(__global int *dstA)\n"
+"{\n"
+"#pragma A fool thinks himself to be wise, but a wise man knows himself to be a fool.\n"
+" int tid = get_global_id(0);\n"
+"#pragma\n"
+" dstA[tid] = tid;\n"
+"#pragma  mark Though I am not naturally honest, I am so sometimes by chance.\n"
+"\n"
+"}\n"};
+
+
+int test_preprocessor_pragma(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+
+  cl_int error;
+  clKernelWrapper kernel;
+  clProgramWrapper program;
+  clMemWrapper buffer[2];
+  cl_int *resultData;
+  int i;
+
+  error = create_single_kernel_helper(context, &program, &kernel, 1, pragma_kernel_code, "pragma_test");
+  if (error)
+    return -1;
+
+  buffer[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, num_elements*sizeof(cl_int), NULL, &error);
+  test_error( error, "clCreateBuffer failed");
+
+  error = clSetKernelArg(kernel, 0, sizeof(buffer[0]), &buffer[0]);
+  test_error(error, "clSetKernelArg failed");
+
+  size_t threads[3] = {num_elements, 0, 0};
+  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
+  test_error(error, "clEnqueueNDRangeKernel failed");
+
+  resultData = (cl_int*)malloc(sizeof(cl_int)*num_elements);
+  if (resultData == NULL) {
+    log_error("Failed to allocate storage for result data (%d cl_ints).\n", num_elements);
+    return -1;
+  }
+
+  error = clEnqueueReadBuffer(queue, buffer[0], CL_TRUE, 0, num_elements*sizeof(cl_int), resultData, 0, NULL, NULL);
+  test_error(error, "clEnqueueReadBuffer failed");
+
+  for (i=0; i<num_elements; i++)
+    if (resultData[i] != i) {
+      free(resultData);
+      return -1;
+    }
+
+  free(resultData);
+  return 0;
+}
+
+
+
diff --git a/test_conformance/computeinfo/CMakeLists.txt b/test_conformance/computeinfo/CMakeLists.txt
new file mode 100644
index 00000000..2bf1a7f2
--- /dev/null
+++ b/test_conformance/computeinfo/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(MODULE_NAME computeinfo)
+
+set(${MODULE_NAME}_SOURCES
+        main.c
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/parseParameters.cpp
+        ../../test_common/harness/testHarness.c
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/computeinfo/Jamfile b/test_conformance/computeinfo/Jamfile
new file mode 100644
index 00000000..f66bf76a
--- /dev/null
+++ b/test_conformance/computeinfo/Jamfile
@@ -0,0 +1,13 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe computeinfo : main.c ;
+
+install dist
+    : computeinfo
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/computeinfo
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/computeinfo
+    ;
diff --git a/test_conformance/computeinfo/Makefile b/test_conformance/computeinfo/Makefile
new file mode 100644
index 00000000..f9ff3d38
--- /dev/null
+++ b/test_conformance/computeinfo/Makefile
@@ -0,0 +1,32 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = main.c ../../test_common/harness/errorHelpers.c ../../test_common/harness/kernelHelpers.c 
+HEADERS = 
+TARGET = computeinfo
+INCLUDE = -I./../../OpenCL/cl_headers/public
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${RC_CFLAGS} ${ATF}
+
+OBJECTS = main.o errorHelpers.o kernelHelpers.o
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(OBJECTS): $(FRAMEWORK) $(HEADERS)
+	$(CC) $(CFLAGS) $(INCLUDE) $(FRAMEWORK)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/computeinfo/main.c b/test_conformance/computeinfo/main.c
new file mode 100644
index 00000000..8126c95a
--- /dev/null
+++ b/test_conformance/computeinfo/main.c
@@ -0,0 +1,873 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+
+static int dump_supported_formats;
+
+typedef struct
+{
+    cl_device_type device_type;
+    const char*    device_type_name;
+    unsigned       num_devices;
+    cl_device_id*  devices;
+    // more infos here
+} device_info;
+
+device_info device_infos[] =
+{
+    {CL_DEVICE_TYPE_DEFAULT,     "CL_DEVICE_TYPE_DEFAULT",     -1, NULL},
+    {CL_DEVICE_TYPE_CPU,         "CL_DEVICE_TYPE_CPU",         -1, NULL},
+    {CL_DEVICE_TYPE_GPU,         "CL_DEVICE_TYPE_GPU",         -1, NULL},
+    {CL_DEVICE_TYPE_ACCELERATOR, "CL_DEVICE_TYPE_ACCELERATOR", -1, NULL},
+    {CL_DEVICE_TYPE_ALL,         "CL_DEVICE_TYPE_ALL",         -1, NULL},
+};
+
+// config types
+enum
+{
+    type_cl_device_type,
+    type_cl_device_fp_config,
+    type_cl_device_mem_cache_type,
+    type_cl_local_mem_type,
+    type_cl_device_exec_capabilities,
+    type_cl_command_queue_properties,
+    type_cl_device_id,
+    type_cl_device_affinity_domain,
+    type_cl_uint,
+    type_size_t,
+    type_size_t_arr,
+    type_cl_ulong,
+    type_string,
+    type_cl_device_svm_capabilities,
+};
+
+typedef union
+{
+    cl_device_type                  type;
+    cl_device_fp_config             fp_config;
+    cl_device_mem_cache_type        mem_cache_type;
+    cl_device_local_mem_type        local_mem_type;
+    cl_device_exec_capabilities     exec_capabilities;
+    cl_command_queue_properties     queue_properties;
+    cl_device_id                    device_id;
+    cl_device_affinity_domain       affinity_domain;
+    cl_int                          uint;
+    size_t                          sizet;
+    size_t                          sizet_arr[3];
+    cl_ulong                        ull;
+    char                            string[1024];
+    cl_device_svm_capabilities      svmCapabilities;
+} config_data;
+
+struct _version {
+  int major;
+  int minor;
+};
+typedef struct _version version_t;
+
+struct _extensions {
+  int cl_khr_fp64;
+  int cl_khr_fp16;
+};
+typedef struct _extensions extensions_t;
+
+// Compare two versions, return -1 (the first is lesser), 0 (equal), 1 (the first is greater).
+int vercmp( version_t a, version_t b ) {
+  if ( a.major < b.major || a.major == b.major && a.minor < b.minor ) {
+    return -1;
+  } else if ( a.major == b.major && a.minor == b.minor ) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
+
+typedef struct
+{
+    version_t             version;    // Opcode is introduced in this version of OpenCL spec.
+    cl_device_info        opcode;
+    const char*           opcode_name;
+    int                   config_type;
+    config_data           config;
+} config_info;
+
+#define CONFIG_INFO( major, minor, opcode, type ) { { major, minor }, opcode, #opcode, type_ ## type, { 0 } }
+
+config_info image_buffer_config_infos[] =
+{
+#ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
+    CONFIG_INFO( 1, 2, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, cl_uint),
+    CONFIG_INFO( 1, 2, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint),
+#endif
+};
+
+config_info config_infos[] =
+{
+    // `CL_DEVICE_VERSION' must be the first item in the list! It's version must be 0, 0.
+    CONFIG_INFO( 0, 0, CL_DEVICE_VERSION, string),
+    // `CL_DEVICE_EXTENSIONS' must be the second!
+    CONFIG_INFO( 1, 1, CL_DEVICE_EXTENSIONS, string),
+
+    CONFIG_INFO( 1, 1, CL_DEVICE_TYPE, cl_device_type),
+    CONFIG_INFO( 1, 1, CL_DEVICE_VENDOR_ID, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_WORK_ITEM_SIZES, size_t_arr),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint),
+
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_ADDRESS_BITS, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong),
+    CONFIG_INFO( 1, 1, CL_DEVICE_IMAGE2D_MAX_WIDTH, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_IMAGE3D_MAX_WIDTH, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_IMAGE3D_MAX_DEPTH, size_t),
+    CONFIG_INFO( 1, 2, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, size_t),
+    CONFIG_INFO( 1, 2, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_IMAGE_SUPPORT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_PARAMETER_SIZE, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_SAMPLERS, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint),
+
+    CONFIG_INFO( 1, 1, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config),
+    CONFIG_INFO( 1, 1, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config),
+    CONFIG_INFO( 1, 1, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type),
+    CONFIG_INFO( 1, 1, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong),
+    CONFIG_INFO( 1, 1, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong),
+
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong),
+    CONFIG_INFO( 1, 1, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_LOCAL_MEM_TYPE, cl_local_mem_type),
+    CONFIG_INFO( 1, 1, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong),
+    CONFIG_INFO( 1, 1, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_t),
+    CONFIG_INFO( 1, 1, CL_DEVICE_ENDIAN_LITTLE, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_AVAILABLE, cl_uint),
+    CONFIG_INFO( 1, 1, CL_DEVICE_COMPILER_AVAILABLE, cl_uint),
+    CONFIG_INFO( 1, 2, CL_DEVICE_LINKER_AVAILABLE, cl_uint),
+
+    CONFIG_INFO( 1, 2, CL_DEVICE_BUILT_IN_KERNELS, string),
+
+    CONFIG_INFO( 1, 2, CL_DEVICE_PRINTF_BUFFER_SIZE, size_t),
+    CONFIG_INFO( 1, 2, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_uint),
+
+    CONFIG_INFO( 1, 2, CL_DEVICE_PARENT_DEVICE, cl_device_id),
+    CONFIG_INFO( 1, 2, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint),
+    CONFIG_INFO( 1, 2, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain),
+    CONFIG_INFO( 1, 2, CL_DEVICE_REFERENCE_COUNT, cl_uint),
+
+    CONFIG_INFO( 1, 1, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities),
+    CONFIG_INFO( 1, 1, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties),
+    CONFIG_INFO( 1, 1, CL_DEVICE_NAME, string),
+    CONFIG_INFO( 1, 1, CL_DEVICE_VENDOR, string),
+    CONFIG_INFO( 1, 1, CL_DRIVER_VERSION, string),
+    CONFIG_INFO( 1, 1, CL_DEVICE_PROFILE, string),
+    CONFIG_INFO( 1, 1, CL_DEVICE_VERSION, string),
+    CONFIG_INFO( 1, 1, CL_DEVICE_OPENCL_C_VERSION, string),
+    CONFIG_INFO( 1, 1, CL_DEVICE_EXTENSIONS, string),
+
+    CONFIG_INFO( 2, 0, CL_DEVICE_MAX_PIPE_ARGS, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint),
+
+    CONFIG_INFO( 2, 0, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, size_t),
+    CONFIG_INFO( 2, 0, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_t),
+
+    CONFIG_INFO( 2, 0, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties),
+    CONFIG_INFO( 2, 0, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, cl_command_queue_properties),
+    CONFIG_INFO( 2, 0, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_MAX_ON_DEVICE_QUEUES, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_MAX_ON_DEVICE_EVENTS, cl_uint),
+
+    CONFIG_INFO( 2, 0, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint),
+    CONFIG_INFO( 2, 0, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT , cl_uint),
+
+    CONFIG_INFO( 2, 0, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities),
+};
+
+#define ENTRY(T) { T, #T }
+struct image_type_entry {
+    cl_mem_object_type val;
+    const char *str;
+};
+static const struct image_type_entry image_types[] = {
+    ENTRY(CL_MEM_OBJECT_IMAGE1D), ENTRY(CL_MEM_OBJECT_IMAGE1D_BUFFER),
+    ENTRY(CL_MEM_OBJECT_IMAGE2D), ENTRY(CL_MEM_OBJECT_IMAGE3D),
+    ENTRY(CL_MEM_OBJECT_IMAGE1D_ARRAY), ENTRY(CL_MEM_OBJECT_IMAGE2D_ARRAY)
+};
+
+struct supported_flags_entry {
+    cl_mem_flags val;
+    const char *str;
+};
+
+static const struct supported_flags_entry supported_flags[] = {
+    ENTRY(CL_MEM_READ_ONLY), ENTRY(CL_MEM_WRITE_ONLY),
+    ENTRY(CL_MEM_READ_WRITE), ENTRY(CL_MEM_KERNEL_READ_AND_WRITE)
+};
+
+int getImageInfo(cl_device_id device)
+{
+    cl_context ctx;
+    cl_int err;
+    cl_uint i, num_supported;
+    cl_image_format *formats;
+    int num_errors;
+    int ii, ni = sizeof(image_types)/sizeof(image_types[0]);
+    int fi, nf = sizeof(supported_flags)/sizeof(supported_flags[0]);
+
+    ctx = clCreateContext(NULL, 1, &device, notify_callback, NULL, &err);
+    if (!ctx) {
+        print_error(err, "Unable to create context from device");
+        return 1;
+    }
+
+    num_errors = 0;
+    for (ii=0; ii<ni; ++ii) {
+        log_info("\t%s supported formats:\n", image_types[ii].str);
+        for (fi=0; fi<nf; ++fi) {
+
+            err = clGetSupportedImageFormats(ctx, supported_flags[fi].val, image_types[ii].val, 5000, NULL, &num_supported);
+            if (err != CL_SUCCESS) {
+                print_error(err, "clGetSupportedImageFormats failed");
+                ++num_errors;
+                continue;
+            }
+
+            log_info("\t\t%s: %u supported formats\n", supported_flags[fi].str, num_supported);
+
+            if (num_supported == 0 || dump_supported_formats == 0)
+                continue;
+
+            formats = (cl_image_format *)malloc(num_supported * sizeof(cl_image_format));
+            if (formats == NULL) {
+                log_error("malloc failed\n");
+                clReleaseContext(ctx);
+                return num_errors + 1;
+            }
+
+            err = clGetSupportedImageFormats(ctx, supported_flags[fi].val, image_types[ii].val, num_supported, formats, NULL);
+            if (err != CL_SUCCESS) {
+                print_error(err, "clGetSupportedImageFormats failed");
+                ++num_errors;
+                free(formats);
+                continue;
+            }
+
+            for (i=0; i<num_supported; ++i)
+                log_info("\t\t\t%s / %s\n",
+                         GetChannelOrderName(formats[i].image_channel_order),
+                         GetChannelTypeName(formats[i].image_channel_data_type));
+
+            free(formats);
+        }
+    }
+
+    err = clReleaseContext(ctx);
+    if (err) {
+        print_error(err, "Failed to release context\n");
+        ++num_errors;
+    }
+
+    return num_errors;
+}
+
+int getConfigInfo(cl_device_id device, config_info* info)
+{
+    int err = CL_SUCCESS;
+    int size_err = 0;
+    size_t config_size_ret;
+    switch(info->config_type)
+    {
+        case type_cl_device_type:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.type), &info->config.type, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.type);
+            break;
+        case type_cl_device_fp_config:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.fp_config), &info->config.fp_config, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.fp_config);
+            break;
+        case type_cl_device_mem_cache_type:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.mem_cache_type), &info->config.mem_cache_type, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.mem_cache_type);
+            break;
+        case type_cl_local_mem_type:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.local_mem_type), &info->config.local_mem_type, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.local_mem_type);
+            break;
+        case type_cl_device_exec_capabilities:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.exec_capabilities), &info->config.exec_capabilities, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.exec_capabilities);
+            break;
+        case type_cl_command_queue_properties:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.queue_properties), &info->config.queue_properties, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.queue_properties);
+            break;
+        case type_cl_device_id:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.device_id), &info->config.device_id, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.device_id);
+            break;
+        case type_cl_device_affinity_domain:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.affinity_domain), &info->config.affinity_domain, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.affinity_domain);
+            break;
+        case type_cl_uint:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.uint), &info->config.uint, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.uint);
+            break;
+        case type_size_t_arr:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.sizet_arr), &info->config.sizet_arr, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.sizet_arr);
+            break;
+        case type_size_t:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.sizet), &info->config.sizet, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.sizet);
+            break;
+        case type_cl_ulong:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.ull), &info->config.ull, &config_size_ret);
+            size_err = config_size_ret != sizeof(info->config.ull);
+            break;
+        case type_string:
+            err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.string), &info->config.string, &config_size_ret);
+            break;
+        case type_cl_device_svm_capabilities:
+          err = clGetDeviceInfo(device, info->opcode, sizeof(info->config.svmCapabilities), &info->config.svmCapabilities, &config_size_ret);
+          break;
+        default:
+            log_error("Unknown config type: %d\n", info->config_type);
+            break;
+    }
+    if (err || size_err)
+        log_error("\tFailed clGetDeviceInfo for %s.\n", info->opcode_name);
+    if(err)
+        print_error(err, "\t\tclGetDeviceInfo failed.");
+    if (size_err)
+        log_error("\t\tWrong size return from clGetDeviceInfo.\n");
+    return err || size_err;
+}
+
+void dumpConfigInfo(cl_device_id device, config_info* info)
+{
+    // We should not error if we find an unknown configuration since vendors
+    // may specify their own options beyond the list in the specification.
+    switch(info->config_type)
+    {
+        case type_cl_device_type:
+            log_info("\t%s == %s|%s|%s|%s\n", info->opcode_name,
+                     (info->config.fp_config & CL_DEVICE_TYPE_CPU) ? "CL_DEVICE_TYPE_CPU":"",
+                     (info->config.fp_config & CL_DEVICE_TYPE_GPU) ? "CL_DEVICE_TYPE_GPU":"",
+                     (info->config.fp_config & CL_DEVICE_TYPE_ACCELERATOR) ? "CL_DEVICE_TYPE_ACCELERATOR":"",
+                     (info->config.fp_config & CL_DEVICE_TYPE_DEFAULT) ? "CL_DEVICE_TYPE_DEFAULT":""
+                     );
+            {
+                cl_device_type all_device_types = CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR | CL_DEVICE_TYPE_DEFAULT;
+                if(info->config.fp_config & ~all_device_types)
+                {
+                    log_info("WARNING: %s unknown bits found 0x%08llX", info->opcode_name, (info->config.fp_config & ~all_device_types));
+                }
+            }
+            break;
+        case type_cl_device_fp_config:
+            log_info("\t%s == %s|%s|%s|%s|%s|%s|%s\n", info->opcode_name,
+                     (info->config.fp_config & CL_FP_DENORM) ? "CL_FP_DENORM":"",
+                     (info->config.fp_config & CL_FP_INF_NAN) ? "CL_FP_INF_NAN":"",
+                     (info->config.fp_config & CL_FP_ROUND_TO_NEAREST) ? "CL_FP_ROUND_TO_NEAREST":"",
+                     (info->config.fp_config & CL_FP_ROUND_TO_ZERO) ? "CL_FP_ROUND_TO_ZERO":"",
+                     (info->config.fp_config & CL_FP_ROUND_TO_INF) ? "CL_FP_ROUND_TO_INF":"",
+                     (info->config.fp_config & CL_FP_FMA) ? "CL_FP_FMA":"",
+                     (info->config.fp_config & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) ? "CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT":""
+                     );
+            {
+                cl_device_fp_config all_fp_config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
+                                                    CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA |
+                                                    CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
+                if(info->config.fp_config & ~all_fp_config)
+                    log_info("WARNING: %s unknown bits found 0x%08llX", info->opcode_name, (info->config.fp_config & ~all_fp_config));
+            }
+            break;
+        case type_cl_device_mem_cache_type:
+            switch(info->config.mem_cache_type)
+            {
+                case CL_NONE:
+                    log_info("\t%s == CL_NONE\n", info->opcode_name);
+                    break;
+                case CL_READ_ONLY_CACHE:
+                    log_info("\t%s == CL_READ_ONLY_CACHE\n", info->opcode_name);
+                    break;
+                case CL_READ_WRITE_CACHE:
+                    log_info("\t%s == CL_READ_WRITE_CACHE\n", info->opcode_name);
+                    break;
+                default:
+                    log_error("ERROR: %s out of range, %d\n", info->opcode_name, info->config.mem_cache_type);
+                    break;
+            }
+            break;
+        case type_cl_local_mem_type:
+            switch(info->config.local_mem_type)
+            {
+                case CL_NONE:
+                    log_info("\t%s == CL_NONE\n", info->opcode_name);
+                    break;
+                case CL_LOCAL:
+                    log_info("\t%s == CL_LOCAL\n", info->opcode_name);
+                    break;
+                case CL_GLOBAL:
+                    log_info("\t%s == CL_GLOBAL\n", info->opcode_name);
+                    break;
+                default:
+                    log_info("WARNING: %s out of range, %d\n", info->opcode_name, info->config.local_mem_type);
+                    break;
+            }
+            break;
+        case type_cl_device_exec_capabilities:
+            log_info("\t%s == %s|%s\n", info->opcode_name,
+                     (info->config.exec_capabilities & CL_EXEC_KERNEL) ? "CL_EXEC_KERNEL":"",
+                     (info->config.exec_capabilities & CL_EXEC_NATIVE_KERNEL) ? "CL_EXEC_NATIVE_KERNEL":"" );
+            {
+                cl_device_exec_capabilities all_exec_cap = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL;
+                if(info->config.exec_capabilities & ~all_exec_cap)
+                    log_info("WARNING: %s unknown bits found 0x%08llX", info->opcode_name, (info->config.exec_capabilities & ~all_exec_cap));
+            }
+            break;
+        case type_cl_command_queue_properties:
+            log_info("\t%s == %s|%s\n", info->opcode_name,
+                     (info->config.queue_properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE":"",
+                     (info->config.queue_properties & CL_QUEUE_PROFILING_ENABLE) ? "CL_QUEUE_PROFILING_ENABLE":"");
+            {
+                cl_command_queue_properties all_queue_properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE;
+                if(info->config.queue_properties & ~all_queue_properties)
+                    log_info("WARNING: %s unknown bits found 0x%08llX", info->opcode_name, (info->config.exec_capabilities & ~all_queue_properties));
+            }
+            break;
+        case type_cl_device_id:
+            log_info("\t%s == %ld\n", info->opcode_name, (intptr_t)info->config.device_id);
+            break;
+        case type_cl_device_affinity_domain:
+            log_info("\t%s == %s|%s|%s|%s|%s|%s\n", info->opcode_name,
+                     (info->config.affinity_domain & CL_DEVICE_AFFINITY_DOMAIN_NUMA) ? "CL_DEVICE_AFFINITY_DOMAIN_NUMA":"",
+                     (info->config.affinity_domain & CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE) ? "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE":"",
+                     (info->config.affinity_domain & CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE) ? "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE":"",
+                     (info->config.affinity_domain & CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE) ? "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE":"",
+                     (info->config.affinity_domain & CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE) ? "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE":"",
+                     (info->config.affinity_domain & CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE) ? "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE":""
+                     );
+            {
+                cl_device_affinity_domain all_affinity_domain = CL_DEVICE_AFFINITY_DOMAIN_NUMA |
+                                                                CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE |
+                                                                CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE |
+                                                                CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE |
+                                                                CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE |
+                                                                CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
+                if(info->config.affinity_domain & ~all_affinity_domain)
+                    log_error("ERROR: %s unknown bits found 0x%08llX", info->opcode_name, (info->config.affinity_domain & ~all_affinity_domain));
+            }
+            break;
+        case type_cl_uint:
+            log_info("\t%s == %u\n", info->opcode_name, info->config.uint);
+            break;
+        case type_size_t_arr:
+            log_info("\t%s == %d %d %d\n", info->opcode_name, info->config.sizet_arr[0],
+                     info->config.sizet_arr[1], info->config.sizet_arr[2]);
+            break;
+        case type_size_t:
+            log_info("\t%s == %ld\n", info->opcode_name, info->config.sizet);
+            break;
+        case type_cl_ulong:
+            log_info("\t%s == %lld\n", info->opcode_name, info->config.ull);
+            break;
+        case type_string:
+            log_info("\t%s == \"%s\"\n", info->opcode_name, info->config.string);
+            break;
+        case type_cl_device_svm_capabilities:
+          log_info("\t%s == %s|%s|%s|%s\n", info->opcode_name,
+            (info->config.svmCapabilities & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? "CL_DEVICE_SVM_COARSE_GRAIN_BUFFER":"",
+            (info->config.svmCapabilities & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? "CL_DEVICE_SVM_FINE_GRAIN_BUFFER":"",
+            (info->config.svmCapabilities & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? "CL_DEVICE_SVM_FINE_GRAIN_SYSTEM":"",
+            (info->config.svmCapabilities & CL_DEVICE_SVM_ATOMICS) ? "CL_DEVICE_SVM_ATOMICS":"");
+          {
+            cl_device_svm_capabilities all_svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
+                                                              CL_DEVICE_SVM_FINE_GRAIN_BUFFER |
+                                                              CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
+                                                              CL_DEVICE_SVM_ATOMICS;
+            if(info->config.svmCapabilities & ~all_svm_capabilities)
+              log_info("WARNING: %s unknown bits found 0x%08llX", info->opcode_name, (info->config.svmCapabilities & ~all_svm_capabilities));
+          }
+          break;
+    }
+}
+
+void print_platform_string_selector( cl_platform_id platform, const char *selector_name, cl_platform_info selector )
+{
+    // Currently all the selectors are strings
+    size_t size = 0;
+    char *value;
+    int err;
+
+    if(( err = clGetPlatformInfo( platform, selector, 0, NULL, &size )))
+    {
+        log_error( "FAILURE: Unable to get platform info size for %s.\n", selector_name );
+        exit( -1 );
+    }
+
+    if( size == 0 )
+    {
+        log_error( "FAILURE: The size of %s was returned to be zero.\n", selector_name );
+        exit( -1 );
+    }
+
+    value = (char*) malloc( size );
+    if( NULL == value )
+    {
+        log_error( "Internal test failure:  Unable to allocate %ld bytes\n", size );
+        exit(-1);
+    }
+
+    memset( value, -1, size );
+    if(( err = clGetPlatformInfo( platform, selector, size, value, NULL )))
+    {
+        log_error( "FAILURE: Unable to get platform info for %s.\n", selector_name );
+        free( value );
+        exit( -1 );
+    }
+
+    if( value[size-1] != '\0' )
+    {
+        log_error( "FAILURE: platform info for %s is either not NUL terminated, or the size is wrong.\n", selector_name );
+        free( value );
+        exit( -1 );
+    }
+
+    log_info( "\t%s: %s\n", selector_name, value );
+    free( value );
+}
+
+int parseVersion( char const * str, version_t * version )
+{
+  int rc = -1;
+  version->major = 0;
+  version->minor = 0;
+  if ( strncmp( str, "OpenCL 1.2", 10 ) == 0 && ( str[ 10 ] == 0 || str[ 10 ] == ' ' ) ) {
+    version->major = 1;
+    version->minor = 2;
+    rc = 0;
+  } else if ( strncmp( str, "OpenCL 1.1", 10 ) == 0 && ( str[ 10 ] == 0 || str[ 10 ] == ' ' ) ) {
+    version->major = 1;
+    version->minor = 1;
+    rc = 0;
+  } else if ( strncmp( str, "OpenCL 2.0", 10 ) == 0 && ( str[ 10 ] == 0 || str[ 10 ] == ' ' ) ) {
+    version->major = 2;
+    version->minor = 0;
+    rc = 0;
+  } else if ( strncmp( str, "OpenCL 2.1", 10 ) == 0 && ( str[ 10 ] == 0 || str[ 10 ] == ' ' ) ) {
+    version->major = 2;
+    version->minor = 1;
+    rc = 0;
+  } else {
+    log_error( "ERROR: Unexpected version string: `%s'.\n", str );
+  };
+  return rc;
+}
+
+int parseExtensions( char const * str, extensions_t * extensions )
+{
+  char const * begin  = NULL;
+  char const * space  = NULL;
+  size_t       length = 0;
+
+  memset( extensions, 0, sizeof ( extensions_t ) );
+
+  begin = str;
+  while ( begin[ 0 ] != 0 ) {
+    space = strchr( begin, ' ' );    // Find space position.
+    if ( space != NULL ) {           // Calculate length of word.
+      length = space - begin;
+    } else {
+      length = strlen( begin );
+    }
+    if ( strncmp( begin, "cl_khr_fp64", length ) == 0 ) {
+      extensions->cl_khr_fp64 = 1;
+    }
+    if ( strncmp( begin, "cl_khr_fp16", length ) == 0 ) {
+      extensions->cl_khr_fp16 = 1;
+    }
+    begin += length;                 // Skip word.
+    if ( begin[ 0 ] == ' ' ) {       // Skip space, if any.
+      begin += 1;
+    }
+  }
+
+  return 0;
+}
+
+int getConfigInfos( cl_device_id device )
+{
+  int             total_errors = 0;
+  unsigned        onConfigInfo;
+  version_t       version = { 0, 0 }; // Version of the device. Will get real value on the first loop iteration.
+  version_t const ver11   = { 1, 1 }; // Version 1.1.
+  extensions_t    extensions = { 0 };
+  int             get;                // Boolean flag: true = get property, false = skip it.
+  int             err;
+  for ( onConfigInfo = 0; onConfigInfo < sizeof(config_infos) / sizeof(config_infos[0]); onConfigInfo++) {
+    config_info info = config_infos[ onConfigInfo ];
+    // Get a property only if device version is equal or greater than property version.
+    get = ( vercmp( version, info.version ) >= 0 );
+    if ( info.opcode == CL_DEVICE_DOUBLE_FP_CONFIG && vercmp( version, ver11 ) <= 0 ) {
+      // CL_DEVICE_DOUBLE_FP_CONFIG is a special case. It was introduced in OpenCL 1.1, but
+      // device is required to report it only if doubles are supported. So, before querying
+      // it on device version 1.1, we have to check doubles are sopported.
+      // In OpenCL 1.2 CL_DEVICE_DOUBLE_FP_CONFIG should be reported unconditionally.
+      get = extensions.cl_khr_fp64;
+    };
+    if ( info.opcode == CL_DEVICE_HALF_FP_CONFIG ) {
+      // CL_DEVICE_HALF_FP_CONFIG should be reported only when cl_khr_fp16 extension is available
+      get = extensions.cl_khr_fp16;
+    };
+    if ( get ) {
+      err = getConfigInfo(device, & info);
+      if ( ! err ) {
+        dumpConfigInfo(device, & info);
+        if ( info.opcode == CL_DEVICE_VERSION ) {
+          err = parseVersion( info.config.string, & version );
+          if ( err ) {
+            total_errors++;
+            break;
+          }
+        } else if ( info.opcode == CL_DEVICE_EXTENSIONS ) {
+          err = parseExtensions( info.config.string, & extensions );
+          if ( err ) {
+            total_errors++;
+            break;
+          }
+        }
+      } else {
+        total_errors++;
+      }
+    } else {
+      log_info( "\tSkipped: %s.\n", info.opcode_name );
+    }
+  }
+
+  if (is_extension_available(device, "cl_khr_image2d_from_buffer")){
+    for ( onConfigInfo = 0; onConfigInfo < sizeof(image_buffer_config_infos) / sizeof(image_buffer_config_infos[0]); onConfigInfo++) {
+      config_info info = image_buffer_config_infos[ onConfigInfo ];
+      get = ( vercmp( version, info.version ) >= 0 );
+      if ( get ) {
+        err = getConfigInfo(device, & info);
+        if ( ! err ) {
+          dumpConfigInfo(device, & info);
+        }
+        else {
+          total_errors++;
+        }
+      }
+    }
+  }
+
+  total_errors += getImageInfo(device);
+
+  return total_errors;
+}
+
+
+int main(int argc, const char** argv)
+{
+    cl_platform_id platform;
+    test_start();
+
+    if (argc == 2) {
+    if (!strcmp(argv[1], "-v"))
+        dump_supported_formats = 1;
+    else {
+        log_info("Use option \"-v\" for verbose output\n");
+        return 0;
+    }
+    }
+
+    int err;
+    int total_errors = 0;
+
+    err = clGetPlatformIDs(1, &platform, NULL);
+    test_error(err, "clGetPlatformIDs failed");
+    if (err != CL_SUCCESS) {
+        total_errors++;
+    }
+
+    // print platform info
+    log_info( "\nclGetPlatformInfo:\n------------------\n" );
+    print_platform_string_selector( platform, "CL_PLATFORM_PROFILE", CL_PLATFORM_PROFILE );
+    print_platform_string_selector( platform, "CL_PLATFORM_VERSION", CL_PLATFORM_VERSION );
+    print_platform_string_selector( platform, "CL_PLATFORM_NAME", CL_PLATFORM_NAME );
+    print_platform_string_selector( platform, "CL_PLATFORM_VENDOR", CL_PLATFORM_VENDOR );
+    print_platform_string_selector( platform, "CL_PLATFORM_EXTENSIONS", CL_PLATFORM_EXTENSIONS );
+    log_info( "\n" );
+
+    // Check to see if this test is being run on a specific device
+    char* device_type_env = getenv("CL_DEVICE_TYPE");
+    char* device_index_env = getenv("CL_DEVICE_INDEX");
+
+    if (device_type_env || device_index_env) {
+
+      cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
+      size_t device_type_idx = 0;
+      size_t device_index = 0;
+
+      // Check to see if a device type was specified.
+      if (device_type_env) {
+        if (!strcmp(device_type_env,"default") || !strcmp(device_type_env,"CL_DEVICE_TYPE_DEFAULT")) {
+          device_type = CL_DEVICE_TYPE_DEFAULT;
+          device_type_idx = 0;
+        }
+        else if (!strcmp(device_type_env,"cpu") || !strcmp(device_type_env,"CL_DEVICE_TYPE_CPU")) {
+          device_type = CL_DEVICE_TYPE_CPU;
+          device_type_idx = 1;
+        }
+        else if (!strcmp(device_type_env,"gpu") || !strcmp(device_type_env,"CL_DEVICE_TYPE_GPU")) {
+          device_type = CL_DEVICE_TYPE_GPU;
+          device_type_idx = 2;
+        }
+        else if (!strcmp(device_type_env,"accelerator") || !strcmp(device_type_env,"CL_DEVICE_TYPE_ACCELERATOR")) {
+          device_type = CL_DEVICE_TYPE_ACCELERATOR;
+          device_type_idx = 3;
+        }
+        else {
+          log_error("CL_DEVICE_TYPE=\"%s\" is invalid\n",device_type_env);
+          return -1;
+        }
+      }
+
+      // Check to see if a device index was specified
+      if (device_index_env)
+        device_index = atoi(device_index_env);
+
+      // Look up the device
+      cl_uint num_devices;
+      err = clGetDeviceIDs(platform, device_type, 0, NULL, &num_devices);
+      if (err)
+      {
+        log_error("No devices of type %s found.\n", device_type_env);
+        return -1;
+      }
+
+      if (device_index >= num_devices) {
+        log_error("CL_DEVICE_INDEX=%d is greater than the number of matching devices %d\n",(unsigned)device_index,num_devices);
+        return -1;
+      }
+
+      if (num_devices == 0)
+      {
+        log_error("No devices of type %s found.\n", device_type_env);
+        return -1;
+      }
+
+      cl_device_id *devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+      err = clGetDeviceIDs(platform, device_type, num_devices, devices, NULL);
+      if (err)
+      {
+        log_error("No devices of type %s found.\n", device_type_env);
+        free(devices);
+        return -1;
+      }
+
+      cl_device_id device = devices[device_index];
+      free(devices);
+
+      log_info("%s Device %d of %d Info:\n", device_infos[device_type_idx].device_type_name, (unsigned)device_index+1, num_devices);
+      total_errors += getConfigInfos( device );
+      log_info("\n");
+
+    }
+
+    // Otherwise iterate over all of the devices in the platform
+    else {
+      //print device info
+      int onInfo;
+      for(onInfo = 0; onInfo < sizeof(device_infos) / sizeof(device_infos[0]); onInfo++)
+      {
+          log_info("Getting device IDs for %s devices\n", device_infos[onInfo].device_type_name);
+          err = clGetDeviceIDs(platform, device_infos[onInfo].device_type, 0, NULL, &device_infos[onInfo].num_devices);
+          if (err == CL_DEVICE_NOT_FOUND)
+          {
+              log_info("No devices of type %s found.\n", device_infos[onInfo].device_type_name);
+              continue;
+          }
+          test_error(err, "clGetDeviceIDs failed");
+
+          log_info("Found %d %s devices:\n", device_infos[onInfo].num_devices, device_infos[onInfo].device_type_name);
+          if(device_infos[onInfo].num_devices)
+          {
+              device_infos[onInfo].devices = (cl_device_id *)malloc(sizeof(cl_device_id) * device_infos[onInfo].num_devices);
+              err = clGetDeviceIDs(platform, device_infos[onInfo].device_type, device_infos[onInfo].num_devices, device_infos[onInfo].devices, NULL);
+              test_error(err, "clGetDeviceIDs failed");
+          }
+
+          int onDevice;
+          for(onDevice = 0; onDevice < device_infos[onInfo].num_devices; onDevice++)
+          {
+              log_info("%s Device %d of %d Info:\n", device_infos[onInfo].device_type_name, onDevice+1, device_infos[onInfo].num_devices);
+              total_errors += getConfigInfos( device_infos[onInfo].devices[onDevice] );
+              log_info("\n");
+          }
+
+          if(device_infos[onInfo].num_devices)
+          {
+              free(device_infos[onInfo].devices);
+          }
+      }
+    }
+
+    if (total_errors)
+        log_error("FAILED computeinfo.\n");
+    else
+        log_info("PASSED computeinfo.\n");
+
+    test_finish();
+    if (total_errors)
+        return -1;
+    return 0;
+}
+
diff --git a/test_conformance/contractions/CMakeLists.txt b/test_conformance/contractions/CMakeLists.txt
new file mode 100644
index 00000000..ac407682
--- /dev/null
+++ b/test_conformance/contractions/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(MODULE_NAME CONTRACTIONS)
+
+set(${MODULE_NAME}_SOURCES
+        contractions.c
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/mingw_compat.c
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/rounding_mode.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/contractions/Jamfile b/test_conformance/contractions/Jamfile
new file mode 100644
index 00000000..a5bd7f19
--- /dev/null
+++ b/test_conformance/contractions/Jamfile
@@ -0,0 +1,13 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe contractions : contractions.c ;
+
+install dist
+    : contractions
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/contractions
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/contractions
+    ;
diff --git a/test_conformance/contractions/Makefile b/test_conformance/contractions/Makefile
new file mode 100644
index 00000000..14e283dc
--- /dev/null
+++ b/test_conformance/contractions/Makefile
@@ -0,0 +1,32 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+CC = c++
+CFLAGS = -g -arch i386 -arch x86_64 -Wall $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} -I ../../test_common/harness
+LIBRARIES = -framework OpenCL -framework ApplicationServices -framework IOKit -I/System/Library/Frameworks/OpenCL.framework/Headers ${RC_CFLAGS} ${ATF}
+
+release:
+	echo "Build Release"
+	$(CC) *.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/kernelHelpers.c ../../test_common/harness/errorHelpers.c ../../test_common/harness/mt19937.c -Os $(CFLAGS) -o contractions $(LIBRARIES)
+
+debug:
+	echo "Build Debug"
+	$(CC) *.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/kernelHelpers.c ../../test_common/harness/errorHelpers.c ../../test_common/harness/mt19937.c -O0 $(CFLAGS) -D_DEBUG=1 -o contractions_debug  $(LIBRARIES)
+
+test:	release
+	arch -i386 ./contractions -c > cpu.log;	
+	arch -i386 ./contractions -g > gpu.log;	
+
+test64:	release
+	arch -x86_64 ./contractions_debug -c > cpu64.log;	
+	arch -x86_64 ./contractions_debug -g > gpu64.log;	
+	echo "Testing 64-bit mode in progress. This may take up to 1 day to complete. See cpu64.log and gpu64.log for results."
+
+
+clean:
+	rm -f ./contractions
+	rm -f ./contractions_debug
+
+all: release
diff --git a/test_conformance/contractions/contractions.c b/test_conformance/contractions/contractions.c
new file mode 100644
index 00000000..164fde6c
--- /dev/null
+++ b/test_conformance/contractions/contractions.c
@@ -0,0 +1,1217 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <string.h>
+#include <stdio.h>
+
+#if !defined(_WIN32)
+#include <libgen.h>
+#include <sys/param.h>
+#endif
+
+#include "mingw_compat.h"
+#if defined (__MINGW32__)
+#include <sys/param.h>
+#endif
+
+#include <time.h>
+#include "errorHelpers.h"
+#include "../../test_common/harness/compat.h"
+#include "../../test_common/harness/mt19937.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/rounding_mode.h"
+#include "../../test_common/harness/fpcontrol.h"
+#include "../../test_common/harness/parseParameters.h"
+#if defined( __APPLE__ )
+#include <sys/sysctl.h>
+#endif
+#if defined( __linux__ )
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/sysctl.h>
+#endif
+
+#if defined (_WIN32)
+#include <string.h>
+#endif
+
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+#include <emmintrin.h>
+#endif
+
+#if defined(__PPC__)
+// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// be used because not all Power implementations retain or observed the NI (non-IEEE
+// mode) bit.
+__thread fpu_control_t fpu_control = 0;
+#endif
+
+#ifndef MAXPATHLEN
+#define MAXPATHLEN  2048
+#endif
+
+char                appName[ MAXPATHLEN ] = "";
+cl_device_type      gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_device_id        gDevice = NULL;
+cl_context          gContext = NULL;
+cl_command_queue    gQueue = NULL;
+cl_program          gProgram[5] = { NULL, NULL, NULL, NULL, NULL };
+cl_program          gProgram_double[5] = { NULL, NULL, NULL, NULL, NULL };
+int                 gForceFTZ = 0;
+int                 gSeed = 0;
+int                 gSeedSpecified = 0;
+int                 gHasDouble = 0;
+MTdata              gMTdata = NULL;
+int                 gSkipNanInf = 0;
+int                     gIgnoreZeroSign = 0;
+
+cl_mem              bufA = NULL;
+cl_mem              bufB = NULL;
+cl_mem              bufC = NULL;
+cl_mem              bufD = NULL;
+cl_mem              bufE = NULL;
+cl_mem              bufC_double = NULL;
+cl_mem              bufD_double = NULL;
+float               *buf1, *buf2, *buf3, *buf4, *buf5, *buf6;
+float               *correct[8];
+int                     *skipTest[8];
+
+double              *buf3_double, *buf4_double, *buf5_double, *buf6_double;
+double              *correct_double[8];
+
+#define BUFFER_SIZE         (1024*1024)
+
+
+static int ParseArgs( int argc, const char **argv );
+static void PrintArch( void );
+static void PrintUsage( void );
+static int InitCL( void );
+static void ReleaseCL( void );
+static int RunTest( int testNumber );
+static int RunTest_Double( int testNumber );
+
+#if defined(__ANDROID__)
+#define nanf( X ) strtof( "NAN", ( char ** ) NULL )
+#define nan( X )  strtod( "NAN", ( char ** ) NULL )
+#endif
+
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+// defeat x87 on MSVC
+float sse_add(float x, float y)
+{
+    volatile float a = x;
+    volatile float b = y;
+
+    // defeat x87
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_add_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
+    return a;
+}
+
+double sse_add_sd(double x, double y)
+{
+    volatile double a = x;
+    volatile double b = y;
+
+    // defeat x87
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_add_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
+    return a;
+}
+
+float sse_sub(float x, float y)
+{
+    volatile float a = x;
+    volatile float b = y;
+
+    // defeat x87
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_sub_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
+    return a;
+}
+
+double sse_sub_sd(double x, double y)
+{
+    volatile double a = x;
+    volatile double b = y;
+
+    // defeat x87
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_sub_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
+    return a;
+}
+
+float sse_mul(float x, float y)
+{
+    volatile float a = x;
+    volatile float b = y;
+
+    // defeat x87
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_mul_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
+    return a;
+}
+
+double sse_mul_sd(double x, double y)
+{
+    volatile double a = x;
+    volatile double b = y;
+
+    // defeat x87
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_mul_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
+    return a;
+}
+#endif
+
+#ifdef __PPC__
+float ppc_mul(float a, float b)
+{
+    float p;
+
+    if (gForceFTZ) {
+        // Flush input a to zero if it is sub-normal
+        if (fabsf(a) < FLT_MIN) {
+            a = copysignf(0.0, a);
+        }
+        // Flush input b to zero if it is sub-normal
+        if (fabsf(b) < FLT_MIN) {
+            b = copysignf(0.0, b);
+        }
+        // Perform multiply
+        p = a * b;
+        // Flush the product if it is a sub-normal
+        if (fabs((double)a * (double)b) < FLT_MIN) {
+            p = copysignf(0.0, p);
+        }
+    } else {
+        p = a * b;
+    }
+    return p;
+}
+#endif
+
+int main( int argc, const char **argv )
+{
+    int error  = 0;
+    int i;
+
+    test_start();
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    error = ParseArgs( argc, argv );
+    if( error )
+        return error;
+
+    // Init OpenCL
+    error = InitCL();
+    if( error )
+        return error;
+
+    // run the tests
+    log_info( "Testing floats...\n" );
+    for( i = 0; i < 8; i++ )
+        error |= RunTest( i );
+
+    if( gHasDouble )
+    {
+        log_info( "Testing doubles...\n" );
+        for( i = 0; i < 8; i++ )
+            error |= RunTest_Double( i );
+    }
+
+
+    int flush_error = clFinish(gQueue);
+    if (flush_error)
+        log_error("clFinish failed: %d\n", flush_error);
+
+    if( error )
+        vlog_error( "Contractions test FAILED.\n" );
+    else
+        vlog( "Contractions test PASSED.\n" );
+
+    ReleaseCL();
+    test_finish();
+
+    return error;
+}
+
+
+
+static int ParseArgs( int argc, const char **argv )
+{
+    int i;
+    int length_of_seed = 0;
+
+    { // Extract the app name
+        strncpy( appName, argv[0], MAXPATHLEN );
+
+#if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
+        char baseName[MAXPATHLEN];
+        char *base = NULL;
+        strncpy( baseName, argv[0], MAXPATHLEN );
+        base = basename( baseName );
+        if( NULL != base )
+        {
+            strncpy( appName, base, sizeof( appName )  );
+            appName[ sizeof( appName ) -1 ] = '\0';
+        }
+#elif defined (_WIN32)
+        char fname[_MAX_FNAME + _MAX_EXT + 1];
+        char ext[_MAX_EXT];
+
+        errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
+                                   fname, _MAX_FNAME, ext, _MAX_EXT );
+        if (err == 0) { // no error
+            strcat (fname, ext); //just cat them, size of frame can keep both
+            strncpy (appName, fname, sizeof(appName));
+            appName[ sizeof( appName ) -1 ] = '\0';
+        }
+#endif
+    }
+
+    /* Check if we are forced to CPU mode */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            vlog_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+            abort();
+        }
+    }
+
+    vlog( "\n%s\t", appName );
+    for( i = 1; i < argc; i++ )
+    {
+        const char *arg = argv[i];
+        if( NULL == arg )
+            break;
+
+        vlog( "\t%s", arg );
+        int optionFound = 0;
+        if( arg[0] == '-' )
+        {
+            while( arg[1] != '\0' )
+            {
+                arg++;
+                optionFound = 1;
+                switch( *arg )
+                {
+                    case 'h':
+                        PrintUsage();
+                        return -1;
+
+                    case 's':
+                        arg++;
+                        gSeed = atoi( arg );
+                        while (arg[length_of_seed] >='0' && arg[length_of_seed]<='9')
+                            length_of_seed++;
+                        gSeedSpecified = 1;
+                        arg+=length_of_seed-1;
+                        break;
+
+                    case 'z':
+                        gForceFTZ ^= 1;
+                        break;
+
+                    case ' ':
+                        break;
+
+                    default:
+                        vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
+                        PrintUsage();
+                        return -1;
+                }
+            }
+        }
+        else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_CPU" ) )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_GPU" ) )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_ACCELERATOR" ) )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_DEFAULT" ) )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            vlog( "ERROR -- unknown argument: %s\n", arg );
+            abort();
+        }
+    }
+    vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ );
+
+    PrintArch();
+
+    return 0;
+}
+
+static void PrintArch( void )
+{
+    vlog( "\nHost info:\n" );
+    vlog( "\tsizeof( void*) = %ld\n", sizeof( void *) );
+#if defined( __ppc__ )
+    vlog( "\tARCH:\tppc\n" );
+#elif defined( __ppc64__ )
+    vlog( "\tARCH:\tppc64\n" );
+#elif defined( __PPC__ )
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __i386__ )
+    vlog( "\tARCH:\ti386\n" );
+#elif defined( __x86_64__ )
+    vlog( "\tARCH:\tx86_64\n" );
+#elif defined( __arm__ )
+    vlog( "\tARCH:\tarm\n" );
+#else
+    vlog( "\tARCH:\tunknown\n" );
+#endif
+
+#if defined( __APPLE__ )
+    int type = 0;
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    vlog( "\tcpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    vlog( "\tcpu subtype:\t%d\n", type );
+
+#elif defined( __linux__ ) && !defined(__aarch64__)
+    int _sysctl(struct __sysctl_args *args );
+#define OSNAMESZ 100
+
+    struct __sysctl_args args;
+    char osname[OSNAMESZ];
+    size_t osnamelth;
+    int name[] = { CTL_KERN, KERN_OSTYPE };
+    memset(&args, 0, sizeof(struct __sysctl_args));
+    args.name = name;
+    args.nlen = sizeof(name)/sizeof(name[0]);
+    args.oldval = osname;
+    args.oldlenp = &osnamelth;
+
+    osnamelth = sizeof(osname);
+
+    if (syscall(SYS__sysctl, &args) == -1) {
+        vlog( "_sysctl error\n" );
+    }
+    else {
+        vlog("this machine is running %*s\n", osnamelth, osname);
+    }
+
+#endif
+}
+
+static void PrintUsage( void )
+{
+    vlog( "%s [-z]: <optional: math function names>\n", appName );
+    vlog( "\toptions:\n" );
+    vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
+    vlog( "\t\t-sNUMBER set random seed.\n");
+    vlog( "\n" );
+}
+
+const char *sizeNames[] = { "float", "float2", "float4", "float8", "float16" };
+const char *sizeNames_double[] = { "double", "double2", "double4", "double8", "double16" };
+
+static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    vlog( "%s\n", errinfo );
+}
+
+static int InitCL( void )
+{
+    cl_platform_id platform = NULL;
+    int error;
+    uint32_t i, j;
+    int *bufSkip = NULL;
+    int isRTZ = 0;
+    int isEmbedded = 0;
+    RoundingMode oldRoundMode = kDefaultRoundingMode;
+
+    if( (error = clGetPlatformIDs(1, &platform, NULL) ) )
+        return error;
+
+    if( (error = clGetDeviceIDs(platform,  gDeviceType, 1, &gDevice, NULL )) )
+        return error;
+
+    cl_device_fp_config floatCapabilities = 0;
+    if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL)))
+        floatCapabilities = 0;
+    if(0 == (CL_FP_DENORM & floatCapabilities) )
+        gForceFTZ ^= 1;
+
+    // check for cl_khr_fp64
+    size_t extensions_size = 0;
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, 0, NULL, &extensions_size )))
+    {
+        vlog_error( "clGetDeviceInfo(CL_DEVICE_EXTENSIONS) failed. %d\n", error );
+        return -1;
+    }
+    if( extensions_size )
+    {
+        char *extensions = (char*)malloc(extensions_size);
+        if( NULL == extensions )
+        {
+            vlog_error( "ERROR: Unable to allocate %ld bytes to hold extensions string\n", extensions_size );
+            return -1;
+        }
+
+        if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, extensions_size, extensions, NULL )))
+        {
+            vlog_error( "clGetDeviceInfo(CL_DEVICE_EXTENSIONS) failed 2. %d\n", error );
+            return -1;
+        }
+
+        gHasDouble = NULL != strstr( extensions, "cl_khr_fp64" );
+        free( extensions );
+    }
+
+    if(0 == (CL_FP_INF_NAN & floatCapabilities) )
+        gSkipNanInf = 1;
+
+    char profile[1024] = "";
+    if ( (error = clGetDeviceInfo(gDevice, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) ) {}
+    else if (strstr(profile, "EMBEDDED_PROFILE"))
+        isEmbedded = 1;
+
+    // Embedded devices that flush to zero are allowed to have an undefined sign.
+    if (isEmbedded && gForceFTZ)
+        gIgnoreZeroSign = 1;
+
+    gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error );
+    if( NULL == gContext || error )
+    {
+        vlog_error( "clCreateDeviceGroup failed. %d\n", error );
+        return -1;
+    }
+
+    gQueue = clCreateCommandQueueWithProperties( gContext, gDevice, 0, &error );
+    if( NULL == gQueue || error )
+    {
+        vlog_error( "clCreateContext failed. %d\n", error );
+        return -2;
+    }
+
+    // setup input buffers
+    bufA = clCreateBuffer(  gContext,  CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
+    bufB = clCreateBuffer(  gContext,  CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
+    bufC = clCreateBuffer(  gContext,  CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
+    bufD = clCreateBuffer(  gContext,  CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
+    bufE = clCreateBuffer(  gContext,  CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
+
+    if( bufA == NULL    ||
+       bufB == NULL    ||
+       bufC == NULL    ||
+       bufD == NULL    ||
+       bufE == NULL    )
+    {
+        vlog_error( "clCreateArray failed for input\n" );
+        return -4;
+    }
+
+    if( gHasDouble )
+    {
+        bufC_double = clCreateBuffer(  gContext,  CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
+        bufD_double = clCreateBuffer(  gContext,  CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
+        if( bufC_double == NULL    ||
+           bufD_double == NULL    )
+        {
+            vlog_error( "clCreateArray failed for input DP\n" );
+            return -4;
+        }
+    }
+
+    const char *kernels[] = {
+        "", "#pragma OPENCL FP_CONTRACT OFF\n"
+        "__kernel void kernel1( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = a[i] * b[i] + c[i];\n"
+        "}\n"
+        "\n"
+        "__kernel void kernel2( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = a[i] * b[i] - c[i];\n"
+        "}\n"
+        "\n"
+        "__kernel void kernel3( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = c[i] + a[i] * b[i];\n"
+        "}\n"
+        "\n"
+        "__kernel void kernel4( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = c[i] - a[i] * b[i];\n"
+        "}\n"
+        "\n"
+        "__kernel void kernel5( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = -(a[i] * b[i] + c[i]);\n"
+        "}\n"
+        "\n"
+        "__kernel void kernel6( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = -(a[i] * b[i] - c[i]);\n"
+        "}\n"
+        "\n"
+        "__kernel void kernel7( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = -(c[i] + a[i] * b[i]);\n"
+        "}\n"
+        "\n"
+        "__kernel void kernel8( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = -(c[i] - a[i] * b[i]);\n"
+        "}\n"
+        "\n" };
+
+    for( i = 0; i < sizeof( sizeNames ) / sizeof( sizeNames[0] ); i++ )
+    {
+        size_t strCount = sizeof( kernels ) / sizeof( kernels[0] );
+        kernels[0] = "";
+
+        for( j = 2; j < strCount; j += 2 )
+            kernels[j] = sizeNames[i];
+
+        error = create_single_kernel_helper_create_program(gContext, &gProgram[i], strCount, kernels);
+        if( NULL == gProgram[i] )
+        {
+            vlog_error( "clCreateProgramWithSource failed\n" );
+            return -5;
+        }
+
+        if(( error = clBuildProgram(gProgram[i], 1, &gDevice, NULL, NULL, NULL) ))
+        {
+            vlog_error( "clBuildProgramExecutable failed\n" );
+            char build_log[2048] = "";
+
+            clGetProgramBuildInfo(gProgram[i], gDevice, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, NULL);
+            vlog_error( "Log:\n%s\n", build_log );
+            return -5;
+        }
+    }
+
+    if( gHasDouble )
+    {
+        kernels[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for( i = 0; i < sizeof( sizeNames_double ) / sizeof( sizeNames_double[0] ); i++ )
+        {
+            size_t strCount = sizeof( kernels ) / sizeof( kernels[0] );
+
+            for( j = 2; j < strCount; j += 2 )
+                kernels[j] = sizeNames_double[i];
+
+            error = create_single_kernel_helper_create_program(gContext, &gProgram_double[i], strCount, kernels);
+            if( NULL == gProgram_double[i] )
+            {
+                vlog_error( "clCreateProgramWithSource failed\n" );
+                return -5;
+            }
+
+            if(( error = clBuildProgram(gProgram_double[i], 1, &gDevice, NULL, NULL, NULL) ))
+            {
+                vlog_error( "clBuildProgramExecutable failed\n" );
+                char build_log[2048] = "";
+
+                clGetProgramBuildInfo(gProgram_double[i], gDevice, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, NULL);
+                vlog_error( "Log:\n%s\n", build_log );
+                return -5;
+            }
+        }
+    }
+
+    if( 0 == gSeedSpecified )
+    {
+        time_t currentTime = time( NULL );
+        struct tm *t = localtime(&currentTime);
+        gSeed = t->tm_sec + 60 * ( t->tm_min + 60 * (t->tm_hour + 24 * (t->tm_yday + 365 * t->tm_year)));
+        gSeed = (uint32_t) (((uint64_t) gSeed * (uint64_t) gSeed ) >> 16);
+    }
+    gMTdata = init_genrand( gSeed );
+
+
+    // Init bufA and bufB
+    {
+        buf1 = (float *)malloc( BUFFER_SIZE );
+        buf2 = (float *)malloc( BUFFER_SIZE );
+        buf3 = (float *)malloc( BUFFER_SIZE );
+        buf4 = (float *)malloc( BUFFER_SIZE );
+        buf5 = (float *)malloc( BUFFER_SIZE );
+        buf6 = (float *)malloc( BUFFER_SIZE );
+
+        bufSkip = (int *)malloc( BUFFER_SIZE );
+
+        if( NULL == buf1 || NULL == buf2 || NULL == buf3 || NULL == buf4 || NULL == buf5 || NULL == buf6 || NULL == bufSkip)
+        {
+            vlog_error( "Out of memory initializing buffers\n" );
+            return -15;
+        }
+        for( i = 0; i < sizeof( correct ) / sizeof( correct[0] ); i++ )
+        {
+            correct[i] = (float *)malloc( BUFFER_SIZE );
+            skipTest[i] = (int *)malloc( BUFFER_SIZE );
+            if(( NULL == correct[i] ) || ( NULL == skipTest[i]))
+            {
+                vlog_error( "Out of memory initializing buffers 2\n" );
+                return -15;
+            }
+        }
+
+        for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
+            ((uint32_t*) buf1)[i] = genrand_int32( gMTdata );
+
+        if( (error = clEnqueueWriteBuffer(gQueue, bufA, CL_FALSE, 0, BUFFER_SIZE, buf1, 0, NULL, NULL) ))
+        {
+            vlog_error( "Failure %d at clEnqueueWriteBuffer1\n", error );
+            return error;
+        }
+
+        for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
+            ((uint32_t*) buf2)[i] = genrand_int32( gMTdata );
+
+        if( (error = clEnqueueWriteBuffer(gQueue, bufB, CL_FALSE, 0, BUFFER_SIZE, buf2, 0, NULL, NULL) ))
+        {
+            vlog_error( "Failure %d at clEnqueueWriteBuffer2\n", error );
+            return error;
+        }
+
+        void *ftzInfo = NULL;
+        if( gForceFTZ )
+            ftzInfo = FlushToZero();
+        if ((CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(gDevice)) && isEmbedded) {
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+            isRTZ = 1;
+        }
+        float *f = (float*) buf1;
+        float *f2 = (float*) buf2;
+        float *f3 = (float*) buf3;
+        float *f4 = (float*) buf4;
+        for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
+        {
+            float q = f[i];
+            float q2 = f2[i];
+
+            feclearexcept(FE_OVERFLOW);
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+            // VS2005 might use x87 for straight multiplies, and we can't
+            // turn that off
+            f3[i] = sse_mul(q, q2);
+            f4[i] = sse_mul(-q, q2);
+#elif defined(__PPC__)
+            // None of the current generation PPC processors support HW
+            // FTZ, emulate it in sw.
+            f3[i] = ppc_mul(q, q2);
+            f4[i] = ppc_mul(-q, q2);
+#else
+            f3[i] = q * q2;
+            f4[i] = -q * q2;
+#endif
+            // Skip test if the device doesn't support infinities and NaN AND the result overflows
+            // or either input is an infinity of NaN
+            bufSkip[i] = (gSkipNanInf && ((FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))) ||
+                                          (fabsf(q)  == FLT_MAX) || (q  != q)  ||
+                                          (fabsf(q2) == FLT_MAX) || (q2 != q2)));
+        }
+
+        if( gForceFTZ )
+            UnFlushToZero(ftzInfo);
+
+    if (isRTZ)
+      (void)set_round(oldRoundMode, kfloat);
+
+
+        if( (error = clEnqueueWriteBuffer(gQueue, bufC, CL_FALSE, 0, BUFFER_SIZE, buf3, 0, NULL, NULL) ))
+        {
+            vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, bufD, CL_FALSE, 0, BUFFER_SIZE, buf4, 0, NULL, NULL) ))
+        {
+            vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error );
+            return error;
+        }
+
+        // Fill the buffers with NaN
+        float *f5 = (float*) buf5;
+        float nan_val = nanf("");
+        for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
+            f5[i] = nan_val;
+
+        // calculate reference results
+        for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
+        {
+            for ( j=0; j<8; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                switch (j)
+                {
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+                        // VS2005 might use x87 for straight add/sub, and we can't
+                        // turn that off
+                    case 0:
+                        correct[0][i] = sse_add(buf3[i],buf4[i]); break;
+                    case 1:
+                        correct[1][i] = sse_sub(buf3[i],buf3[i]); break;
+                    case 2:
+                        correct[2][i] = sse_add(buf4[i],buf3[i]); break;
+                    case 3:
+                        correct[3][i] = sse_sub(buf3[i],buf3[i]); break;
+                    case 4:
+                        correct[4][i] = -sse_add(buf3[i],buf4[i]); break;
+                    case 5:
+                        correct[5][i] = -sse_sub(buf3[i],buf3[i]); break;
+                    case 6:
+                        correct[6][i] = -sse_add(buf4[i],buf3[i]); break;
+                    case 7:
+                        correct[7][i] = -sse_sub(buf3[i],buf3[i]); break;
+#else
+                    case 0:
+                        correct[0][i] = buf3[i] + buf4[i]; break;
+                    case 1:
+                        correct[1][i] = buf3[i] - buf3[i]; break;
+                    case 2:
+                        correct[2][i] = buf4[i] + buf3[i]; break;
+                    case 3:
+                        correct[3][i] = buf3[i] - buf3[i]; break;
+                    case 4:
+                        correct[4][i] = -(buf3[i] + buf4[i]); break;
+                    case 5:
+                        correct[5][i] = -(buf3[i] - buf3[i]); break;
+                    case 6:
+                        correct[6][i] = -(buf4[i] + buf3[i]); break;
+                    case 7:
+                        correct[7][i] = -(buf3[i] - buf3[i]); break;
+#endif
+                }
+                // Further skip test inputs if the device doesn support infinities AND NaNs
+                // resulting sum overflows
+                skipTest[j][i] = (bufSkip[i] ||
+                                  (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
+
+#if defined(__PPC__)
+                // Since the current Power processors don't emulate flush to zero in HW,
+                // it must be emulated in SW instead.
+                if (gForceFTZ)
+                {
+                    if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
+                        correct[j][i] = copysignf(0.0f, correct[j][i]);
+                }
+#endif
+            }
+        }
+        if( gHasDouble )
+        {
+            // Spec requires correct non-flushed results
+            // for doubles. We disable FTZ if this is default on
+            // the platform (like ARM) for reference result computation
+            // It is no-op if platform default is not FTZ (e.g. x86)
+            FPU_mode_type oldMode;
+            DisableFTZ( &oldMode );
+
+            buf3_double = (double *)malloc( BUFFER_SIZE );
+            buf4_double = (double *)malloc( BUFFER_SIZE );
+            buf5_double = (double *)malloc( BUFFER_SIZE );
+            buf6_double = (double *)malloc( BUFFER_SIZE );
+            if( NULL == buf3_double || NULL == buf4_double || NULL == buf5_double || NULL == buf6_double )
+            {
+                vlog_error( "Out of memory initializing DP buffers\n" );
+                return -15;
+            }
+            for( i = 0; i < sizeof( correct_double ) / sizeof( correct_double[0] ); i++ )
+            {
+                correct_double[i] = (double *)malloc( BUFFER_SIZE );
+                if( NULL == correct_double[i] )
+                {
+                    vlog_error( "Out of memory initializing DP buffers 2\n" );
+                    return -15;
+                }
+            }
+
+
+            double *f  = (double*) buf1;
+            double *f2 = (double*) buf2;
+            double *f3 = (double*) buf3_double;
+            double *f4 = (double*) buf4_double;
+            for( i = 0; i < BUFFER_SIZE / sizeof(double); i++ )
+            {
+                double q = f[i];
+                double q2 = f2[i];
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+                // VS2005 might use x87 for straight multiplies, and we can't
+                // turn that off
+                f3[i] = sse_mul_sd(q, q2);
+                f4[i] = sse_mul_sd(-q, q2);
+#else
+                f3[i] = q * q2;
+                f4[i] = -q * q2;
+#endif
+            }
+
+            if( (error = clEnqueueWriteBuffer(gQueue, bufC_double, CL_FALSE, 0, BUFFER_SIZE, buf3_double, 0, NULL, NULL) ))
+            {
+                vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error );
+                return error;
+            }
+            if( (error = clEnqueueWriteBuffer(gQueue, bufD_double, CL_FALSE, 0, BUFFER_SIZE, buf4_double, 0, NULL, NULL) ))
+            {
+                vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error );
+                return error;
+            }
+
+            // Fill the buffers with NaN
+            double *f5 = (double*) buf5_double;
+            double nan_val = nanf("");
+            for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
+                f5[i] = nan_val;
+
+            // calculate reference results
+            for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
+            {
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+                // VS2005 might use x87 for straight add/sub, and we can't
+                // turn that off
+                correct_double[0][i] = sse_add_sd(buf3_double[i],buf4_double[i]);
+                correct_double[1][i] = sse_sub_sd(buf3_double[i],buf3_double[i]);
+                correct_double[2][i] = sse_add_sd(buf4_double[i],buf3_double[i]);
+                correct_double[3][i] = sse_sub_sd(buf3_double[i],buf3_double[i]);
+                correct_double[4][i] = -sse_add_sd(buf3_double[i],buf4_double[i]);
+                correct_double[5][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]);
+                correct_double[6][i] = -sse_add_sd(buf4_double[i],buf3_double[i]);
+                correct_double[7][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]);
+#else
+                correct_double[0][i] = buf3_double[i] + buf4_double[i];
+                correct_double[1][i] = buf3_double[i] - buf3_double[i];
+                correct_double[2][i] = buf4_double[i] + buf3_double[i];
+                correct_double[3][i] = buf3_double[i] - buf3_double[i];
+                correct_double[4][i] = -(buf3_double[i] + buf4_double[i]);
+                correct_double[5][i] = -(buf3_double[i] - buf3_double[i]);
+                correct_double[6][i] = -(buf4_double[i] + buf3_double[i]);
+                correct_double[7][i] = -(buf3_double[i] - buf3_double[i]);
+#endif
+            }
+
+            // Restore previous FP state since we modified it for
+            // reference result computation (see DisableFTZ call above)
+            RestoreFPState(&oldMode);
+        }
+    }
+
+    char c[1000];
+    static const char *no_yes[] = { "NO", "YES" };
+    vlog( "\nCompute Device info:\n" );
+    clGetDeviceInfo( gDevice,  CL_DEVICE_NAME, sizeof(c), (void *)&c, NULL);
+    vlog( "\tDevice Name: %s\n", c );
+    clGetDeviceInfo( gDevice,  CL_DEVICE_VENDOR, sizeof(c), (void *)&c, NULL);
+    vlog( "\tVendor: %s\n", c );
+    clGetDeviceInfo( gDevice,  CL_DEVICE_VERSION, sizeof(c), (void *)&c, NULL);
+    vlog( "\tDevice Version: %s\n", c );
+    clGetDeviceInfo( gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
+    vlog( "\tCL C Version: %s\n", c );
+    clGetDeviceInfo( gDevice,  CL_DRIVER_VERSION, sizeof(c), (void *)&c, NULL);
+    vlog( "\tDriver Version: %s\n", c );
+    vlog( "\tSubnormal values supported? %s\n", no_yes[0 != (CL_FP_DENORM & floatCapabilities)] );
+    vlog( "\tTesting with FTZ mode ON? %s\n", no_yes[0 != gForceFTZ] );
+    vlog( "\tTesting Doubles? %s\n", no_yes[0 != gHasDouble] );
+    vlog( "\tRandom Number seed: 0x%8.8x\n", gSeed );
+    vlog( "\n\n" );
+
+    return 0;
+}
+
+static void ReleaseCL( void )
+{
+    clReleaseMemObject(bufA);
+    clReleaseMemObject(bufB);
+    clReleaseMemObject(bufC);
+    clReleaseMemObject(bufD);
+    clReleaseMemObject(bufE);
+    clReleaseProgram(gProgram[0]);
+    clReleaseProgram(gProgram[1]);
+    clReleaseProgram(gProgram[2]);
+    clReleaseProgram(gProgram[3]);
+    clReleaseProgram(gProgram[4]);
+    if( gHasDouble )
+    {
+        clReleaseMemObject(bufC_double);
+        clReleaseMemObject(bufD_double);
+        clReleaseProgram(gProgram_double[0]);
+        clReleaseProgram(gProgram_double[1]);
+        clReleaseProgram(gProgram_double[2]);
+        clReleaseProgram(gProgram_double[3]);
+        clReleaseProgram(gProgram_double[4]);
+    }
+    clReleaseCommandQueue(gQueue);
+    clReleaseContext(gContext);
+}
+
+
+static int RunTest( int testNumber )
+{
+    size_t i;
+    int error = 0;
+    cl_mem args[4];
+    float *c;
+    const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4",
+        "kernel5", "kernel6", "kernel7", "kernel8" };
+    switch( testNumber )
+    {
+        case 0:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD;     c = buf4;   break;      // a * b + c
+        case 1:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC;     c = buf3;   break;
+        case 2:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD;     c = buf4;   break;
+        case 3:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC;     c = buf3;   break;
+        case 4:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD;     c = buf4;   break;
+        case 5:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC;     c = buf3;   break;
+        case 6:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD;     c = buf4;   break;
+        case 7:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC;     c = buf3;   break;
+        default:
+            vlog_error( "Unknown test case %d passed to RunTest\n", testNumber );
+            return -1;
+    }
+
+
+    int vectorSize;
+    for( vectorSize = 0; vectorSize < 5; vectorSize++ )
+    {
+        cl_kernel k = clCreateKernel( gProgram[ vectorSize ], kernelName[ testNumber ], &error );
+        if( NULL == k || error )
+        {
+            vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize );
+            return -2;
+        }
+
+        // set the kernel args
+        for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ )
+            if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) ))
+            {
+                vlog_error( "Error %d setting kernel arg # %ld\n", error, i );
+                return error;
+            }
+
+        // write NaNs to the result array
+        if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf5, 0, NULL, NULL) ))
+        {
+            vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber );
+            return error;
+        }
+
+        // execute the kernel
+        size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_float ) * (1<<vectorSize)), 0, 0 };
+        if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) )))
+        {
+            vlog_error( "Got Error # %d trying to execture kernel\n", error );
+            return error;
+        }
+
+        // read the data back
+        if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6, 0, NULL, NULL ) ))
+        {
+            vlog_error( "Failure %d at clReadArray %d\n", error, testNumber );
+            return error;
+        }
+
+        // verify results
+        float *test = (float*) buf6;
+        float *a = (float*) buf1;
+        float *b = (float*) buf2;
+        for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
+        {
+            if( isnan(test[i]) && isnan(correct[testNumber][i] ) )
+                continue;
+
+            if( skipTest[testNumber][i] )
+                continue;
+
+            // sign of zero must be correct
+            if(( ((uint32_t*) test)[i] != ((uint32_t*) correct[testNumber])[i] ) &&
+               !(gIgnoreZeroSign && (test[i] == 0.0f) && (correct[testNumber][i] == 0.0f)) )
+            {
+                switch( testNumber )
+                {
+                        // Zeros for these should be positive
+                    case 0:     vlog_error( "%ld) Error for %s %s: %a * %a + %a =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+                    case 1:     vlog_error( "%ld) Error for %s %s: %a * %a - %a =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+                    case 2:     vlog_error( "%ld) Error for %s %s: %a + %a * %a =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+                    case 3:     vlog_error( "%ld) Error for %s %s: %a - %a * %a =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+
+                        // Zeros for these should be negative
+                    case 4:     vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+                    case 5:     vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+                    case 6:     vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+                    case 7:     vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) =  *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       clReleaseKernel(k); return -1;
+                    default:
+                        vlog_error( "error: Unknown test number!\n" );
+                        clReleaseKernel(k);
+                        return -2;
+                }
+            }
+        }
+
+        clReleaseKernel(k);
+    }
+
+    return error;
+}
+
+static int RunTest_Double( int testNumber )
+{
+    size_t i;
+    int error = 0;
+    cl_mem args[4];
+    double *c;
+    const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4",
+        "kernel5", "kernel6", "kernel7", "kernel8" };
+
+    switch( testNumber )
+    {
+        case 0:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD_double;     c = buf4_double;   break;      // a * b + c
+        case 1:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC_double;     c = buf3_double;   break;
+        case 2:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD_double;     c = buf4_double;   break;
+        case 3:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC_double;     c = buf3_double;   break;
+        case 4:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD_double;     c = buf4_double;   break;
+        case 5:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC_double;     c = buf3_double;   break;
+        case 6:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufD_double;     c = buf4_double;   break;
+        case 7:     args[0] = bufE;     args[1] = bufA;     args[2] = bufB;     args[3] = bufC_double;     c = buf3_double;   break;
+        default:
+            vlog_error( "Unknown test case %d passed to RunTest\n", testNumber );
+            return -1;
+    }
+
+    int vectorSize;
+    for( vectorSize = 0; vectorSize < 5; vectorSize++ )
+    {
+        cl_kernel k = clCreateKernel( gProgram_double[ vectorSize ], kernelName[ testNumber ], &error );
+        if( NULL == k || error )
+        {
+            vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize );
+            return -2;
+        }
+
+        // set the kernel args
+        for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ )
+            if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) ))
+            {
+                vlog_error( "Error %d setting kernel arg # %ld\n", error, i );
+                return error;
+            }
+
+        // write NaNs to the result array
+        if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_FALSE, 0, BUFFER_SIZE, buf5_double, 0, NULL, NULL) ))
+        {
+            vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber );
+            return error;
+        }
+
+        // execute the kernel
+        size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_double ) * (1<<vectorSize)), 0, 0 };
+        if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) )))
+        {
+            vlog_error( "Got Error # %d trying to execture kernel\n", error );
+            return error;
+        }
+
+        // read the data back
+        if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6_double, 0, NULL, NULL ) ))
+        {
+            vlog_error( "Failure %d at clReadArray %d\n", error, testNumber );
+            return error;
+        }
+
+        // verify results
+        double *test = (double*) buf6_double;
+        double *a = (double*) buf1;
+        double *b = (double*) buf2;
+        for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
+        {
+            if( isnan(test[i]) && isnan(correct_double[testNumber][i] ) )
+                continue;
+
+            // sign of zero must be correct
+            if( ((uint64_t*) test)[i] != ((uint64_t*) correct_double[testNumber])[i] )
+            {
+                switch( testNumber )
+                {
+                        // Zeros for these should be positive
+                    case 0:     vlog_error( "%ld) Error for %s %s: %a * %a + %a =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       return -1;
+                    case 1:     vlog_error( "%ld) Error for %s %s: %a * %a - %a =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       return -1;
+                    case 2:     vlog_error( "%ld) Error for %s %s: %a + %a * %a =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       return -1;
+                    case 3:     vlog_error( "%ld) Error for %s %s: %a - %a * %a =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       return -1;
+
+                        // Zeros for these should be negative
+                    case 4:     vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       return -1;
+                    case 5:     vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           a[i], b[i], c[i], correct[testNumber][i], test[i] );       return -1;
+                    case 6:     vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       return -1;
+                    case 7:     vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) =  *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
+                                           c[i], a[i], b[i], correct[testNumber][i], test[i] );       return -1;
+                    default:
+                        vlog_error( "error: Unknown test number!\n" );
+                        return -2;
+                }
+            }
+        }
+
+        clReleaseKernel(k);
+    }
+
+    return error;
+}
diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
new file mode 100644
index 00000000..d7d7dccb
--- /dev/null
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -0,0 +1,48 @@
+set(MODULE_NAME CONVERSIONS)
+
+# VS2005 AMD64 WAR (basic_test_conversions.c causes internal compiler error)
+if(MSVC)
+set(CMAKE_C_FLAGS_RELEASE "/Od /Ob0")
+set(CMAKE_CXX_FLAGS_RELEASE "/Od /Ob0")
+else(MSVC)
+set(CMAKE_C_FLAGS_RELEASE "-O0")
+set(CMAKE_CXX_FLAGS_RELEASE "-O0")
+endif(MSVC)
+
+set (${MODULE_NAME}_SOURCES
+      Sleep.c test_conversions.c basic_test_conversions.c
+      ../../test_common/harness/ThreadPool.c
+      ../../test_common/harness/rounding_mode.c
+      ../../test_common/harness/mt19937.c
+      ../../test_common/harness/msvc9.c
+      ../../test_common/harness/mingw_compat.c
+      ../../test_common/harness/errorHelpers.c
+      ../../test_common/harness/parseParameters.cpp
+      ../../test_common/harness/kernelHelpers.c
+)
+
+if(ANDROID)
+    list(APPEND CONVERSIONS_SOURCES fplib.c)
+endif(ANDROID)
+
+if(WIN32)
+set_source_files_properties(
+    ${MODULE_NAME}_SOURCES
+        PROPERTIES LANGUAGE CXX)
+endif(WIN32)
+
+if(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
+# -march is needed for CPU atomics, default arch on gcc is i386
+# that does not support atomics.
+set_source_files_properties(
+        ${MODULE_NAME}_SOURCES
+        COMPILE_FLAGS -march=i686)
+endif(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
+
+if(NOT ANDROID)
+set_source_files_properties(
+        Sleep.c test_conversions.c basic_test_conversions.c
+        COMPILE_FLAGS -msse2)
+endif(NOT ANDROID)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/conversions/Jamfile b/test_conformance/conversions/Jamfile
new file mode 100644
index 00000000..b23c6823
--- /dev/null
+++ b/test_conformance/conversions/Jamfile
@@ -0,0 +1,17 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_conversions
+    : basic_test_conversions.c
+      Sleep.c
+      test_conversions.c
+    ;
+
+install dist
+    : test_conversions
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/conversions
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/conversions
+    ;
diff --git a/test_conformance/conversions/Makefile b/test_conformance/conversions/Makefile
new file mode 100644
index 00000000..370224e3
--- /dev/null
+++ b/test_conformance/conversions/Makefile
@@ -0,0 +1,50 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+# We do not use dependencies in this Makefile
+
+SRCFILES = Sleep.c test_conversions.c  ../../test_common/harness/mt19937.c ../../test_common/harness/ThreadPool.c ../../test_common/harness/rounding_mode.c
+
+CC = c++
+
+CFLAGS = -g -Wall -Wshorten-64-to-32 $(COMPILERFLAGS) \
+         ${RC_CFLAGS} ${USE_ATF}
+
+INCLUDES =  -I../../test_common/harness \
+            ${RC_CFLAGS} ${ATF}
+
+LIBRARIES = -framework OpenCL -framework CoreFoundation -framework IOKit 
+
+release:
+	echo "Build Release"
+	$(CC) -c basic_test_conversions.c -Os $(CFLAGS) -o basic_test_conversions.o
+	$(CC) $(SRCFILES) -Os $(CFLAGS) basic_test_conversions.o -o test_conversions $(INCLUDES) $(LIBRARIES)
+
+debug:
+	echo "Build Debug"
+	$(CC) -c basic_test_conversions.c -O0 $(CFLAGS) -o basic_test_conversions.o
+	$(CC) test_conversions.c -c -O0 -g  $(CFLAGS) $(INCLUDES)  -o test_conversions.o 
+	$(CC) Sleep.c -c -O0 -g  $(CFLAGS) $(INCLUDES)  -o Sleep.o 
+	$(CC) ../../test_common/harness/mt19937.c -c -O0 -g  $(INCLUDES) $(CFLAGS)  -o mt19937.o 
+	$(CC) ../../test_common/harness/ThreadPool.c -c -O0 -g  $(INCLUDES) $(CFLAGS)  -o ThreadPool.o 
+	$(CC) ../../test_common/harness/rounding_mode.c -c -O0 -g $(INCLUDES) $(CFLAGS)  -o rounding_mode.o 
+	$(CC) *.o -g -O0 -o test_conversions_debug $(LIBRARIES) -arch i386 -arch x86_64
+
+clean:
+	rm -f test_conversions
+	rm -f *.o
+	rm -f test_conversions_debug
+
+test:	release
+	arch -i386 ./test_conversions -c > cpu.out	&
+	arch -i386 ./test_conversions -g > gpu.out	&
+	echo "Testing 32-bit mode conversions for CPU and GPU, which takes a day or three.  Results may be found in cpu.out and gpu.out\n"
+
+test64:	release
+	arch -x86_64 ./test_conversions_64 -c > cpu64.out	&
+	arch -x86_64 ./test_conversions_64 -g > gpu64.out	&
+	echo "Testing 64-bit mode conversions for CPU and GPU, which takes a day or three.  Results may be found in cpu64.out and gpu64.out\n"
+
+all: release
diff --git a/test_conformance/conversions/Sleep.c b/test_conformance/conversions/Sleep.c
new file mode 100644
index 00000000..8ed6ef34
--- /dev/null
+++ b/test_conformance/conversions/Sleep.c
@@ -0,0 +1,120 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( __APPLE__ )
+
+    #include "Sleep.h"
+    #include <IOKit/pwr_mgt/IOPMLib.h>
+    #include <IOKit/IOMessage.h>
+    #include "basic_test_conversions.h"
+
+    #define UNUSED  __attribute__((unused))
+
+    struct
+    {
+        io_connect_t            connection;
+        IONotificationPortRef    port;
+        io_object_t                iterator;
+    }sleepInfo;
+
+    void sleepCallback(    void *            refcon,
+                        io_service_t        service,
+                        natural_t        messageType,
+                        void *            messageArgument );
+
+    void sleepCallback(    void *            refcon UNUSED,
+                        io_service_t        service UNUSED,
+                        natural_t        messageType,
+                        void *            messageArgument )
+    {
+
+        IOReturn result;
+    /*
+    service -- The IOService whose state has changed.
+    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
+    messageArgument -- An argument for the message, dependent on the messageType.
+    */
+        switch ( messageType )
+        {
+            case kIOMessageSystemWillSleep:
+                // Handle demand sleep (such as sleep caused by running out of
+                // batteries, closing the lid of a laptop, or selecting
+                // sleep from the Apple menu.
+                IOAllowPowerChange(sleepInfo.connection,(long)messageArgument);
+                vlog( "Hard sleep occurred.\n" );
+                break;
+            case kIOMessageCanSystemSleep:
+                // In this case, the computer has been idle for several minutes
+                // and will sleep soon so you must either allow or cancel
+                // this notification. Important: if you don’t respond, there will
+                // be a 30-second timeout before the computer sleeps.
+                // IOCancelPowerChange(root_port,(long)messageArgument);
+                result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
+                if( kIOReturnSuccess != result )
+                    vlog( "sleep prevention failed. (%d)\n", result);
+            break;
+            case kIOMessageSystemHasPoweredOn:
+                // Handle wakeup.
+                break;
+        }
+    }
+
+    void PreventSleep( void )
+    {
+        vlog( "Disabling sleep... " );
+        sleepInfo.iterator = (io_object_t) 0;
+        sleepInfo.port = NULL;
+        sleepInfo.connection = IORegisterForSystemPower
+                                (
+                                    &sleepInfo,                    //void * refcon,
+                                    &sleepInfo.port,            //IONotificationPortRef * thePortRef,
+                                    sleepCallback,                //IOServiceInterestCallback callback,
+                                    &sleepInfo.iterator            //io_object_t * notifier
+                                );
+
+        if( (io_connect_t) 0 == sleepInfo.connection )
+            vlog( "failed.\n" );
+        else
+            vlog( "done.\n" );
+
+        CFRunLoopAddSource(CFRunLoopGetCurrent(),
+                            IONotificationPortGetRunLoopSource(sleepInfo.port),
+                            kCFRunLoopDefaultMode);
+    }
+
+    void ResumeSleep( void )
+    {
+        IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator );
+        if( 0 != result )
+            vlog( "Got error %d restoring sleep \n", result );
+        else
+            vlog( "Sleep restored.\n" );
+    }
+
+#else  /* not __APPLE__ */
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+    void PreventSleep( void )  {}
+    void ResumeSleep( void )   {}
+
+#if defined(__cplusplus)
+}
+#endif //__cplusplus
+
+#endif
+
+
diff --git a/test_conformance/conversions/Sleep.h b/test_conformance/conversions/Sleep.h
new file mode 100644
index 00000000..195e886f
--- /dev/null
+++ b/test_conformance/conversions/Sleep.h
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef SLEEP_H
+#define SLEEP_H
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+void PreventSleep( void );
+void ResumeSleep( void );
+
+#if defined(__cplusplus)
+}
+#endif //__cplusplus
+
+#endif /* SLEEP_H */
+
+
diff --git a/test_conformance/conversions/basic_test_conversions.c b/test_conformance/conversions/basic_test_conversions.c
new file mode 100644
index 00000000..73f38633
--- /dev/null
+++ b/test_conformance/conversions/basic_test_conversions.c
@@ -0,0 +1,2246 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include "basic_test_conversions.h"
+#include <limits.h>
+#include <string.h>
+
+#include "../../test_common/harness/mt19937.h"
+
+#if defined( __arm__ ) && defined( __GNUC__ )
+#include "fplib.h"
+#endif
+
+#if defined( __arm__ ) && defined( __GNUC__ )
+/* Rounding modes and saturation for use with qcom 64 bit to float conversion library */
+    bool            qcom_sat;
+    roundingMode    qcom_rm;
+#endif
+
+static inline cl_ulong random64( MTdata d );
+
+#if defined (_WIN32)
+    #include <mmintrin.h>
+    #include <emmintrin.h>
+#else // !_WIN32
+#if defined (__SSE__ )
+    #include <xmmintrin.h>
+#endif
+#if defined (__SSE2__ )
+    #include <emmintrin.h>
+#endif
+#endif // _WIN32
+
+const char *gTypeNames[ kTypeCount ] = {
+                                            "uchar", "char",
+                                            "ushort", "short",
+                                            "uint",   "int",
+                                            "float", "double",
+                                            "ulong", "long"
+                                        };
+
+const char *gRoundingModeNames[ kRoundingModeCount ] = {
+                                                            "",
+                                                            "_rte",
+                                                            "_rtp",
+                                                            "_rtn",
+                                                            "_rtz"
+                                                        };
+
+const char *gSaturationNames[ 2 ] = { "", "_sat" };
+
+size_t gTypeSizes[ kTypeCount ] = {
+                                    sizeof( cl_uchar ), sizeof( cl_char ),
+                                    sizeof( cl_ushort ), sizeof( cl_short ),
+                                    sizeof( cl_uint ), sizeof( cl_int ),
+                                    sizeof( cl_float ), sizeof( cl_double ),
+                                    sizeof( cl_ulong ), sizeof( cl_long ),
+                                };
+
+long lrintf_clamped( float f );
+long lrintf_clamped( float f )
+{
+    static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+
+    if( f >= -(float) LONG_MIN )
+        return LONG_MAX;
+
+    if( f <= (float) LONG_MIN )
+        return LONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
+    {
+        volatile float x = f;
+        float magicVal = magic[ f < 0 ];
+
+#if defined( __SSE__ ) || defined (_WIN32)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128 v = _mm_set_ss( x );
+        __m128 m = _mm_set_ss( magicVal );
+        v = _mm_add_ss( v, m );
+        v = _mm_sub_ss( v, m );
+        _mm_store_ss( (float*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long) f;
+}
+
+long long llrintf_clamped( float f );
+long long llrintf_clamped( float f )
+{
+    static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+
+    if( f >= -(float) LLONG_MIN )
+        return LLONG_MAX;
+
+    if( f <= (float) LLONG_MIN )
+        return LLONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
+    {
+        volatile float x = f;
+        float magicVal = magic[ f < 0 ];
+#if defined( __SSE__ ) || defined (_WIN32)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128 v = _mm_set_ss( x );
+        __m128 m = _mm_set_ss( magicVal );
+        v = _mm_add_ss( v, m );
+        v = _mm_sub_ss( v, m );
+        _mm_store_ss( (float*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long long) f;
+}
+
+long lrint_clamped( double f );
+long lrint_clamped( double f )
+{
+    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+
+    if( sizeof( long ) > 4 )
+    {
+        if( f >= -(double) LONG_MIN )
+            return LONG_MAX;
+    }
+    else
+    {
+        if( f >= LONG_MAX )
+            return LONG_MAX;
+    }
+
+    if( f <= (double) LONG_MIN )
+        return LONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
+    {
+        volatile double x = f;
+        double magicVal = magic[ f < 0 ];
+#if defined( __SSE2__ ) || defined (_MSC_VER)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128d v = _mm_set_sd( x );
+        __m128d m = _mm_set_sd( magicVal );
+        v = _mm_add_sd( v, m );
+        v = _mm_sub_sd( v, m );
+        _mm_store_sd( (double*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long) f;
+}
+
+long long llrint_clamped( double f );
+long long llrint_clamped( double f )
+{
+    static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+
+    if( f >= -(double) LLONG_MIN )
+        return LLONG_MAX;
+
+    if( f <= (double) LLONG_MIN )
+        return LLONG_MIN;
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
+    {
+        volatile double x = f;
+        double magicVal = magic[ f < 0 ];
+#if defined( __SSE2__ ) || defined (_MSC_VER)
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128d v = _mm_set_sd( x );
+        __m128d m = _mm_set_sd( magicVal );
+        v = _mm_add_sd( v, m );
+        v = _mm_sub_sd( v, m );
+        _mm_store_sd( (double*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return (long long) f;
+}
+
+
+/*
+    Names created as:
+
+    #include <stdio.h>
+
+    const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
+
+    int main( void )
+    {
+
+        int i,j;
+
+        for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
+            for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
+            {
+                if( j == i )
+                    continue;
+
+                vlog( "void %s2%s( void *, void *);\n", names[i], names[j] );
+            }
+
+
+        return 0;
+    }
+*/
+
+static float my_fabsf( float x );
+static double my_fabs( double x );
+
+
+
+static void uchar2char( void *, void *);
+static void uchar2ushort( void *, void *);
+static void uchar2short( void *, void *);
+static void uchar2uint( void *, void *);
+static void uchar2int( void *, void *);
+static void uchar2float( void *, void *);
+static void uchar2double( void *, void *);
+static void uchar2ulong( void *, void *);
+static void uchar2long( void *, void *);
+static void char2uchar( void *, void *);
+static void char2ushort( void *, void *);
+static void char2short( void *, void *);
+static void char2uint( void *, void *);
+static void char2int( void *, void *);
+static void char2float( void *, void *);
+static void char2double( void *, void *);
+static void char2ulong( void *, void *);
+static void char2long( void *, void *);
+static void ushort2uchar( void *, void *);
+static void ushort2char( void *, void *);
+static void ushort2short( void *, void *);
+static void ushort2uint( void *, void *);
+static void ushort2int( void *, void *);
+static void ushort2float( void *, void *);
+static void ushort2double( void *, void *);
+static void ushort2ulong( void *, void *);
+static void ushort2long( void *, void *);
+static void short2uchar( void *, void *);
+static void short2char( void *, void *);
+static void short2ushort( void *, void *);
+static void short2uint( void *, void *);
+static void short2int( void *, void *);
+static void short2float( void *, void *);
+static void short2double( void *, void *);
+static void short2ulong( void *, void *);
+static void short2long( void *, void *);
+static void uint2uchar( void *, void *);
+static void uint2char( void *, void *);
+static void uint2ushort( void *, void *);
+static void uint2short( void *, void *);
+static void uint2int( void *, void *);
+static void uint2float( void *, void *);
+static void uint2double( void *, void *);
+static void uint2ulong( void *, void *);
+static void uint2long( void *, void *);
+static void int2uchar( void *, void *);
+static void int2char( void *, void *);
+static void int2ushort( void *, void *);
+static void int2short( void *, void *);
+static void int2uint( void *, void *);
+static void int2float( void *, void *);
+static void int2double( void *, void *);
+static void int2ulong( void *, void *);
+static void int2long( void *, void *);
+static void float2uchar( void *, void *);
+static void float2char( void *, void *);
+static void float2ushort( void *, void *);
+static void float2short( void *, void *);
+static void float2uint( void *, void *);
+static void float2int( void *, void *);
+static void float2double( void *, void *);
+static void float2ulong( void *, void *);
+static void float2long( void *, void *);
+static void double2uchar( void *, void *);
+static void double2char( void *, void *);
+static void double2ushort( void *, void *);
+static void double2short( void *, void *);
+static void double2uint( void *, void *);
+static void double2int( void *, void *);
+static void double2float( void *, void *);
+static void double2ulong( void *, void *);
+static void double2long( void *, void *);
+static void ulong2uchar( void *, void *);
+static void ulong2char( void *, void *);
+static void ulong2ushort( void *, void *);
+static void ulong2short( void *, void *);
+static void ulong2uint( void *, void *);
+static void ulong2int( void *, void *);
+static void ulong2float( void *, void *);
+static void ulong2double( void *, void *);
+static void ulong2long( void *, void *);
+static void long2uchar( void *, void *);
+static void long2char( void *, void *);
+static void long2ushort( void *, void *);
+static void long2short( void *, void *);
+static void long2uint( void *, void *);
+static void long2int( void *, void *);
+static void long2float( void *, void *);
+static void long2double( void *, void *);
+static void long2ulong( void *, void *);
+
+/*
+    Conversion list created as
+
+    #include <stdio.h>
+
+    const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
+
+    int main( void )
+    {
+
+        int i,j;
+
+        for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
+        {
+            vlog( "{ " );
+            for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
+            {
+                if( j == i )
+                    vlog( "          NULL, " );
+                else
+                {
+                    char s[64];
+                    sprintf( s, "%s2%s,", names[j], names[i] );
+                    vlog( "%15s ", s );
+                }
+            }
+            vlog( "},\n" );
+        }
+
+        return 0;
+    }
+
+ */
+/*
+Convert gConversions[kTypeCount][kTypeCount] = {
+{           NULL,     char2uchar,   ushort2uchar,    short2uchar,     uint2uchar,      int2uchar,    float2uchar,   double2uchar,    ulong2uchar,     long2uchar, },
+{     uchar2char,           NULL,    ushort2char,     short2char,      uint2char,       int2char,     float2char,    double2char,     ulong2char,      long2char, },
+{   uchar2ushort,    char2ushort,           NULL,   short2ushort,    uint2ushort,     int2ushort,   float2ushort,  double2ushort,   ulong2ushort,    long2ushort, },
+{    uchar2short,     char2short,   ushort2short,           NULL,     uint2short,      int2short,    float2short,   double2short,    ulong2short,     long2short, },
+{     uchar2uint,      char2uint,    ushort2uint,     short2uint,           NULL,       int2uint,     float2uint,    double2uint,     ulong2uint,      long2uint, },
+{      uchar2int,       char2int,     ushort2int,      short2int,       uint2int,           NULL,      float2int,     double2int,      ulong2int,       long2int, },
+{    uchar2float,     char2float,   ushort2float,    short2float,     uint2float,      int2float,           NULL,   double2float,    ulong2float,     long2float, },
+{   uchar2double,    char2double,  ushort2double,   short2double,    uint2double,     int2double,   float2double,           NULL,   ulong2double,    long2double, },
+{    uchar2ulong,     char2ulong,   ushort2ulong,    short2ulong,     uint2ulong,      int2ulong,    float2ulong,   double2ulong,           NULL,     long2ulong, },
+{     uchar2long,      char2long,    ushort2long,     short2long,      uint2long,       int2long,     float2long,    double2long,     ulong2long,           NULL, } };
+*/
+
+static void uchar2char_sat( void *, void *);
+static void uchar2ushort_sat( void *, void *);
+static void uchar2short_sat( void *, void *);
+static void uchar2uint_sat( void *, void *);
+static void uchar2int_sat( void *, void *);
+static void uchar2float_sat( void *, void *);
+static void uchar2double_sat( void *, void *);
+static void uchar2ulong_sat( void *, void *);
+static void uchar2long_sat( void *, void *);
+static void char2uchar_sat( void *, void *);
+static void char2ushort_sat( void *, void *);
+static void char2short_sat( void *, void *);
+static void char2uint_sat( void *, void *);
+static void char2int_sat( void *, void *);
+static void char2float_sat( void *, void *);
+static void char2double_sat( void *, void *);
+static void char2ulong_sat( void *, void *);
+static void char2long_sat( void *, void *);
+static void ushort2uchar_sat( void *, void *);
+static void ushort2char_sat( void *, void *);
+static void ushort2short_sat( void *, void *);
+static void ushort2uint_sat( void *, void *);
+static void ushort2int_sat( void *, void *);
+static void ushort2float_sat( void *, void *);
+static void ushort2double_sat( void *, void *);
+static void ushort2ulong_sat( void *, void *);
+static void ushort2long_sat( void *, void *);
+static void short2uchar_sat( void *, void *);
+static void short2char_sat( void *, void *);
+static void short2ushort_sat( void *, void *);
+static void short2uint_sat( void *, void *);
+static void short2int_sat( void *, void *);
+static void short2float_sat( void *, void *);
+static void short2double_sat( void *, void *);
+static void short2ulong_sat( void *, void *);
+static void short2long_sat( void *, void *);
+static void uint2uchar_sat( void *, void *);
+static void uint2char_sat( void *, void *);
+static void uint2ushort_sat( void *, void *);
+static void uint2short_sat( void *, void *);
+static void uint2int_sat( void *, void *);
+static void uint2float_sat( void *, void *);
+static void uint2double_sat( void *, void *);
+static void uint2ulong_sat( void *, void *);
+static void uint2long_sat( void *, void *);
+static void int2uchar_sat( void *, void *);
+static void int2char_sat( void *, void *);
+static void int2ushort_sat( void *, void *);
+static void int2short_sat( void *, void *);
+static void int2uint_sat( void *, void *);
+static void int2float_sat( void *, void *);
+static void int2double_sat( void *, void *);
+static void int2ulong_sat( void *, void *);
+static void int2long_sat( void *, void *);
+static void float2uchar_sat( void *, void *);
+static void float2char_sat( void *, void *);
+static void float2ushort_sat( void *, void *);
+static void float2short_sat( void *, void *);
+static void float2uint_sat( void *, void *);
+static void float2int_sat( void *, void *);
+static void float2double_sat( void *, void *);
+static void float2ulong_sat( void *, void *);
+static void float2long_sat( void *, void *);
+static void double2uchar_sat( void *, void *);
+static void double2char_sat( void *, void *);
+static void double2ushort_sat( void *, void *);
+static void double2short_sat( void *, void *);
+static void double2uint_sat( void *, void *);
+static void double2int_sat( void *, void *);
+static void double2float_sat( void *, void *);
+static void double2ulong_sat( void *, void *);
+static void double2long_sat( void *, void *);
+static void ulong2uchar_sat( void *, void *);
+static void ulong2char_sat( void *, void *);
+static void ulong2ushort_sat( void *, void *);
+static void ulong2short_sat( void *, void *);
+static void ulong2uint_sat( void *, void *);
+static void ulong2int_sat( void *, void *);
+static void ulong2float_sat( void *, void *);
+static void ulong2double_sat( void *, void *);
+static void ulong2long_sat( void *, void *);
+static void long2uchar_sat( void *, void *);
+static void long2char_sat( void *, void *);
+static void long2ushort_sat( void *, void *);
+static void long2short_sat( void *, void *);
+static void long2uint_sat( void *, void *);
+static void long2int_sat( void *, void *);
+static void long2float_sat( void *, void *);
+static void long2double_sat( void *, void *);
+static void long2ulong_sat( void *, void *);
+/*
+    #include <stdio.h>
+
+    const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
+
+    int main( void )
+    {
+
+        int i,j;
+
+        for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
+        {
+            vlog( "{ " );
+            for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
+            {
+                if( j == i )
+                    vlog( "             NULL, " );
+                else
+                {
+                    char s[64];
+                    sprintf( s, "%s2%s_sat,", names[j], names[i] );
+                    vlog( "%18s ", s );
+                }
+            }
+            vlog( "},\n" );
+        }
+
+        return 0;
+    }
+
+Convert gSaturatedConversions[kTypeCount][kTypeCount] = {
+{              NULL,    char2uchar_sat,  ushort2uchar_sat,   short2uchar_sat,    uint2uchar_sat,     int2uchar_sat,   float2uchar_sat,  double2uchar_sat,   ulong2uchar_sat,    long2uchar_sat, },
+{    uchar2char_sat,              NULL,   ushort2char_sat,    short2char_sat,     uint2char_sat,      int2char_sat,    float2char_sat,   double2char_sat,    ulong2char_sat,     long2char_sat, },
+{  uchar2ushort_sat,   char2ushort_sat,              NULL,  short2ushort_sat,   uint2ushort_sat,    int2ushort_sat,  float2ushort_sat, double2ushort_sat,  ulong2ushort_sat,   long2ushort_sat, },
+{   uchar2short_sat,    char2short_sat,  ushort2short_sat,              NULL,    uint2short_sat,     int2short_sat,   float2short_sat,  double2short_sat,   ulong2short_sat,    long2short_sat, },
+{    uchar2uint_sat,     char2uint_sat,   ushort2uint_sat,    short2uint_sat,              NULL,      int2uint_sat,    float2uint_sat,   double2uint_sat,    ulong2uint_sat,     long2uint_sat, },
+{     uchar2int_sat,      char2int_sat,    ushort2int_sat,     short2int_sat,      uint2int_sat,              NULL,     float2int_sat,    double2int_sat,     ulong2int_sat,      long2int_sat, },
+{   uchar2float_sat,    char2float_sat,  ushort2float_sat,   short2float_sat,    uint2float_sat,     int2float_sat,              NULL,  double2float_sat,   ulong2float_sat,    long2float_sat, },
+{  uchar2double_sat,   char2double_sat, ushort2double_sat,  short2double_sat,   uint2double_sat,    int2double_sat,  float2double_sat,              NULL,  ulong2double_sat,   long2double_sat, },
+{   uchar2ulong_sat,    char2ulong_sat,  ushort2ulong_sat,   short2ulong_sat,    uint2ulong_sat,     int2ulong_sat,   float2ulong_sat,  double2ulong_sat,              NULL,    long2ulong_sat, },
+{    uchar2long_sat,     char2long_sat,   ushort2long_sat,    short2long_sat,     uint2long_sat,      int2long_sat,    float2long_sat,   double2long_sat,    ulong2long_sat,              NULL, }
+};
+*/
+
+/*
+    #include <stdio.h>
+
+    const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
+    const char *types[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "llong" };
+
+    int main( void )
+    {
+
+        int i,j;
+
+        for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
+            for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
+            {
+                if( j == i )
+                    continue;
+
+                switch( i )
+                {
+                    case 6: //float
+                        if( j == 7 )
+                            vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
+                        else
+                            vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) my_rintf(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] );
+                        break;
+                    case 7: //double
+                        if( j == 6 )
+                            vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
+                        else
+                            vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) rint(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] );
+                        break;
+                    default:
+                        vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s)
+                        ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
+                        break;
+                }
+            }
+
+
+        return 0;
+    }
+*/
+
+float my_fabsf( float x )
+{
+    union{ cl_uint u; float f; }u;
+    u.f = x;
+    u.u &= 0x7fffffff;
+    return u.f;
+}
+
+double my_fabs( double x )
+{
+    union{ cl_ulong u; double f; }u;
+    u.f = x;
+    u.u &= 0x7fffffffffffffffULL;
+    return u.f;
+}
+
+static float my_rintf( float f );
+static float my_rintf( float f )
+{
+    static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+
+    // Round fractional values to integer in round towards nearest mode
+    if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
+    {
+        volatile float x = f;
+        float magicVal = magic[ f < 0 ];
+
+#if defined( __SSE__ )
+        // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
+        __m128 v = _mm_set_ss( x );
+        __m128 m = _mm_set_ss( magicVal );
+        v = _mm_add_ss( v, m );
+        v = _mm_sub_ss( v, m );
+        _mm_store_ss( (float*) &x, v );
+#else
+        x += magicVal;
+        x -= magicVal;
+#endif
+        f = x;
+    }
+
+    return f;
+}
+
+static void uchar2char( void *out, void *in){ ((char*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2short( void *out, void *in){ ((short*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2int( void *out, void *in){ ((int*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2float( void *out, void *in)
+{
+    cl_uchar l = ((cl_uchar*) in)[0];
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void uchar2double( void *out, void *in)
+{
+    cl_uchar l = ((cl_uchar*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void uchar2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; }
+static void char2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_char*) in)[0]; }
+static void char2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_char*) in)[0]; }
+static void char2short( void *out, void *in){ ((short*) out)[0] = ((cl_char*) in)[0]; }
+static void char2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_char*) in)[0]; }
+static void char2int( void *out, void *in){ ((int*) out)[0] = ((cl_char*) in)[0]; }
+static void char2float( void *out, void *in)
+{
+    cl_char l = ((cl_char*) in)[0];
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void char2double( void *out, void *in)
+{
+    cl_char l = ((cl_char*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void char2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_char*) in)[0]; }
+static void char2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; }
+static void ushort2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2char( void *out, void *in){ ((char*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2short( void *out, void *in){ ((short*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2int( void *out, void *in){ ((int*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2float( void *out, void *in)
+{
+    cl_ushort l = ((cl_ushort*) in)[0];
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void ushort2double( void *out, void *in)
+{
+    cl_ushort l = ((cl_ushort*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void ushort2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; }
+static void short2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_short*) in)[0]; }
+static void short2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_short*) in)[0]; }
+static void short2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_short*) in)[0]; }
+static void short2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_short*) in)[0]; }
+static void short2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; }
+static void short2float( void *out, void *in)
+{
+    cl_short l = ((cl_short*) in)[0];
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void short2double( void *out, void *in)
+{
+    cl_short l = ((cl_short*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void short2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_short*) in)[0]; }
+static void short2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; }
+static void uint2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_uint*) in)[0]; }
+static void uint2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_uint*) in)[0]; }
+static void uint2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uint*) in)[0]; }
+static void uint2short( void *out, void *in){ ((short*) out)[0] = ((cl_uint*) in)[0]; }
+static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; }
+static void uint2float( void *out, void *in)
+{
+    cl_uint l = ((cl_uint*) in)[0];
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void uint2double( void *out, void *in)
+{
+    cl_uint l = ((cl_uint*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void uint2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; }
+static void uint2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; }
+static void int2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_int*) in)[0]; }
+static void int2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_int*) in)[0]; }
+static void int2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_int*) in)[0]; }
+static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) in)[0]; }
+static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; }
+static void int2float( void *out, void *in)
+{
+    cl_int l = ((cl_int*) in)[0];
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void int2double( void *out, void *in)
+{
+    cl_int l = ((cl_int*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+}
+static void int2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_int*) in)[0]; }
+static void int2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; }
+static void float2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = my_rintf(((cl_float*) in)[0]); }
+static void float2char( void *out, void *in){ ((cl_char*) out)[0] = my_rintf(((cl_float*) in)[0]); }
+static void float2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = my_rintf(((cl_float*) in)[0]); }
+static void float2short( void *out, void *in){ ((cl_short*) out)[0] = my_rintf(((cl_float*) in)[0]); }
+static void float2uint( void *out, void *in){ ((cl_uint*) out)[0] = my_rintf(((cl_float*) in)[0]); }
+static void float2int( void *out, void *in){ ((cl_int*) out)[0] = my_rintf(((cl_float*) in)[0]); }
+static void float2double( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; }
+static void float2ulong( void *out, void *in)
+{
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+    // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int.
+    // However, fistp stores it as a signed int, and some of the test values won't
+    // fit into a signed int. (These test values are >= 2^63.) The result on VS2005
+    // is that these end up silently (at least by default settings) clamped to
+    // the max lowest ulong.
+    cl_float x = my_rintf(((cl_float *)in)[0]);
+    if (x >= 9223372036854775808.0f) {
+        x -= 9223372036854775808.0f;
+        ((cl_ulong*) out)[0] = x;
+        ((cl_ulong*) out)[0] += 9223372036854775808ULL;
+    } else {
+        ((cl_ulong*) out)[0] = x;
+    }
+#else
+    ((cl_ulong*) out)[0] = my_rintf(((cl_float*) in)[0]);
+#endif
+}
+
+static void float2long( void *out, void *in){ ((cl_long*) out)[0] =  llrint_clamped( ((cl_float*) in)[0] ); }
+static void double2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = rint(((cl_double*) in)[0]); }
+static void double2char( void *out, void *in){ ((cl_char*) out)[0] = rint(((cl_double*) in)[0]); }
+static void double2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = rint(((cl_double*) in)[0]); }
+static void double2short( void *out, void *in){ ((cl_short*) out)[0] = rint(((cl_double*) in)[0]); }
+static void double2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) rint(((cl_double*) in)[0]); }
+static void double2int( void *out, void *in){ ((cl_int*) out)[0] = (int) rint(((cl_double*) in)[0]); }
+static void double2float( void *out, void *in){ ((cl_float*) out)[0] = (float) ((cl_double*) in)[0]; }
+static void double2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = (cl_ulong) rint(((cl_double*) in)[0]); }
+static void double2long( void *out, void *in){ ((cl_long*) out)[0] = (cl_long) rint(((cl_double*) in)[0]); }
+static void ulong2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_ulong*) in)[0]; }
+static void ulong2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_ulong*) in)[0]; }
+static void ulong2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_ulong*) in)[0]; }
+static void ulong2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short)((cl_ulong*) in)[0]; }
+static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_ulong*) in)[0]; }
+static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
+static void ulong2float( void *out, void *in)
+{
+#if defined(_MSC_VER)
+    cl_ulong l = ((cl_ulong*) in)[0];
+    float result;
+
+    cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
+#if defined(_M_X64)
+    _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
+#else
+    result = sl;
+#endif
+    ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
+#else
+    cl_ulong l = ((cl_ulong*) in)[0];
+#if defined( __arm__ ) && defined( __GNUC__ )
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
+     * and ignores the user rounding mode setting in hardware.
+     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+     * so for testing different rounding modes, we need to use alternative reference function */
+    ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
+#else
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+#endif
+#endif
+}
+static void ulong2double( void *out, void *in)
+{
+#if defined(_MSC_VER)
+    cl_ulong l = ((cl_ulong*) in)[0];
+    double result;
+
+    cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
+#if defined(_M_X64)
+    _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl));
+#else
+    result = sl;
+#endif
+    ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
+#else
+    cl_ulong l = ((cl_ulong*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+#endif
+}
+static void ulong2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ulong*) in)[0]; }
+static void long2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_long*) in)[0]; }
+static void long2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_long*) in)[0]; }
+static void long2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_long*) in)[0]; }
+static void long2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short) ((cl_long*) in)[0]; }
+static void long2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_long*) in)[0]; }
+static void long2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_long*) in)[0]; }
+static void long2float( void *out, void *in)
+{
+#if defined(_MSC_VER) && defined(_M_X64)
+    cl_long l = ((cl_long*) in)[0];
+    float result;
+
+    _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l));
+    ((float*) out)[0] = (l == 0 ? 0.0f : result);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+#else
+    cl_long l = ((cl_long*) in)[0];
+#if defined( __arm__ ) && defined( __GNUC__ )
+    /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+     * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
+     * and ignores the user rounding mode setting in hardware.
+     * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+     * so for testing different rounding modes, we need to use alternative reference function */
+    ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
+#else
+    ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+#endif
+#endif
+}
+static void long2double( void *out, void *in)
+{
+#if defined(_MSC_VER) && defined(_M_X64)
+    cl_long l = ((cl_long*) in)[0];
+    double result;
+
+    _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l));
+    ((double*) out)[0] = (l == 0 ? 0.0 : result);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+#else
+    cl_long l = ((cl_long*) in)[0];
+    ((double*) out)[0] = (l == 0 ? 0.0 : (double) l);      // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+#endif
+}
+static void long2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_long*) in)[0]; }
+
+#define CLAMP( _lo, _x, _hi )   ( (_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
+
+// Done by hand
+static void uchar2char_sat( void *out, void *in){ cl_uchar c = ((cl_uchar*) in)[0]; ((cl_char*) out)[0] = c > 0x7f ? 0x7f : c; }
+static void uchar2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
+static void uchar2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
+static void uchar2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; }
+static void uchar2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; }
+static void char2uchar_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uchar*) out)[0] = c < 0 ? 0 : c; }
+static void char2ushort_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ushort*) out)[0] = c < 0 ? 0 : c; }
+static void char2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_char*) in)[0]; }
+static void char2uint_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uint*) out)[0] = c < 0 ? 0 : c; }
+static void char2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_char*) in)[0]; }
+static void char2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_char*) in)[0]; }
+static void char2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_char*) in)[0]; }
+static void char2ulong_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ulong*) out)[0] = c < 0 ? 0 : c; }
+static void char2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; }
+static void ushort2uchar_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_uchar*) out)[0] = u > 0xff ? 0xFF : u; }
+static void ushort2char_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_char*) out)[0] = u > 0x7f ? 0x7F : u; }
+static void ushort2short_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_short*) out)[0] = u > 0x7fff ? 0x7fFF : u; }
+static void ushort2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf((cl_float)((cl_ushort*) in)[0]); }     // my_fabs workaround for <rdar://problem/5965527>
+static void ushort2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_ushort*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
+static void ushort2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; }
+static void ushort2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; }
+static void short2uchar_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, s, CL_UCHAR_MAX ); }
+static void short2char_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, s, CL_CHAR_MAX ); }
+static void short2ushort_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ushort*) out)[0] = s < 0 ? 0 : s; }
+static void short2uint_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uint*) out)[0] = s < 0 ? 0 : s; }
+static void short2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; }
+static void short2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_short*) in)[0]; }
+static void short2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_short*) in)[0]; }
+static void short2ulong_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ulong*) out)[0] = s < 0 ? 0 : s; }
+static void short2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; }
+static void uint2uchar_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX); }
+static void uint2char_sat( void *out, void *in){  cl_uint u = ((cl_uint*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); }
+static void uint2ushort_sat( void *out, void *in){  cl_uint u = ((cl_uint*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX); }
+static void uint2short_sat( void *out, void *in){  cl_uint u = ((cl_uint*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX); }
+static void uint2int_sat( void *out, void *in){  cl_uint u = ((cl_uint*) in)[0]; ((cl_int*) out)[0] = CLAMP( 0, u, CL_INT_MAX); }
+static void uint2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uint*) in)[0] ); }  // my_fabs workaround for <rdar://problem/5965527>
+static void uint2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uint*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
+static void uint2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; }
+static void uint2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; }
+static void int2uchar_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, i, CL_UCHAR_MAX); }
+static void int2char_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, i, CL_CHAR_MAX); }
+static void int2ushort_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, i, CL_USHRT_MAX); }
+static void int2short_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, i, CL_SHRT_MAX); }
+static void int2uint_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uint*) out)[0] = CLAMP( 0, i, CL_INT_MAX); }
+static void int2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_int*) in)[0]; }
+static void int2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_int*) in)[0]; }
+static void int2ulong_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ulong*) out)[0] = i < 0 ? 0 : i; }
+static void int2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; }
+static void float2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_UCHAR_MAX ); }
+static void float2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_CHAR_MAX); }
+static void float2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_USHRT_MAX ); }
+static void float2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_SHRT_MAX ); }
+static void float2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrintf_clamped(((cl_float*) in)[0]), CL_UINT_MAX ); }
+static void float2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_INT_MAX ); }
+static void float2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; }
+static void float2ulong_sat( void *out, void *in)
+{
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+    // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int.
+    // However, fistp stores it as a signed int, and some of the test values won't
+    // fit into a signed int. (These test values are >= 2^63.) The result on VS2005
+    // is that these end up silently (at least by default settings) clamped to
+    // the max lowest ulong.
+    cl_float x = my_rintf(((cl_float *)in)[0]);
+    if (x >= 18446744073709551616.0f) {         // 2^64
+        ((cl_ulong*) out)[0] = 0xFFFFFFFFFFFFFFFFULL;
+    } else if (x < 0) {
+        ((cl_ulong*) out)[0] = 0;
+    } else if (x >= 9223372036854775808.0f) {   // 2^63
+        x -= 9223372036854775808.0f;
+        ((cl_ulong*) out)[0] = x;
+        ((cl_ulong*) out)[0] += 9223372036854775808ULL;
+    } else {
+        ((cl_ulong*) out)[0] = x;
+    }
+#else
+    float f = my_rintf(((float*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f;
+#endif
+}
+// The final cast used to be (cl_ulong) f, but on Linux (RHEL5 at least)
+// if f = -1.0f, then (cl_ulong) f = 0xffffffff, which clearly isn't right.
+// Switching it to (cl_long) f seems to fix that.
+static void float2long_sat( void *out, void *in){ float f = my_rintf(((float*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; }
+static void double2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_UCHAR_MAX ); }
+static void double2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrint_clamped(((cl_double*) in)[0]), CL_CHAR_MAX); }
+static void double2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_USHRT_MAX ); }
+static void double2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_SHRT_MAX ); }
+static void double2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrint_clamped(((cl_double*) in)[0]), CL_UINT_MAX ); }
+static void double2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_INT_MAX ); }
+static void double2float_sat( void *out, void *in){ ((cl_float*) out)[0] = (cl_float) ((double*) in)[0]; }
+static void double2ulong_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; }
+static void double2long_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; }
+static void ulong2uchar_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); }
+static void ulong2char_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); }
+static void ulong2ushort_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); }
+static void ulong2short_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX ); }
+static void ulong2uint_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); }
+static void ulong2int_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_int*) out)[0] = (cl_int) CLAMP( 0, u, CL_INT_MAX ); }
+static void ulong2float_sat( void *out, void *in){ ((float*) out)[0] = my_fabsf((float) ((cl_ulong*) in)[0]); }  // my_fabs workaround for <rdar://problem/5965527>
+static void ulong2double_sat( void *out, void *in){ ((double*) out)[0] = my_fabs( ((cl_ulong*) in)[0]); }        // my_fabs workaround for <rdar://problem/5965527>
+static void ulong2long_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_long*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); }
+static void long2uchar_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); }
+static void long2char_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, u, CL_CHAR_MAX ); }
+static void long2ushort_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); }
+static void long2short_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, u, CL_SHRT_MAX ); }
+static void long2uint_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); }
+static void long2int_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_int*) out)[0] = (int) CLAMP( CL_INT_MIN, u, CL_INT_MAX ); }
+static void long2float_sat( void *out, void *in){ ((float*) out)[0] = (float) ((cl_long*) in)[0]; }
+static void long2double_sat( void *out, void *in){ ((double*) out)[0] = ((cl_long*) in)[0]; }
+static void long2ulong_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ulong*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); }
+
+/*
+#include <stdio.h>
+
+char *ground[] = {   "",
+                                                            "_rte",
+                                                            "_rtp",
+                                                            "_rtn",
+                                                            "_rtz"
+                    };
+
+const char *gTypeNames[  ] = {
+                                            "uchar", "char",
+                                            "ushort", "short",
+                                            "uint",   "int",
+                                            "float", "double",
+                                            "ulong", "long"
+                                        };
+
+
+int main( void )
+{
+    int i, j;
+
+    for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ )
+        for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ )
+        {
+            vlog( "float clampf_%s%s( float );\n", gTypeNames[i], ground[j] );
+            vlog( "double clampd_%s%s( double );\n", gTypeNames[i], ground[j] );
+        }
+
+    return 0;
+
+}
+*/
+
+
+float clampf_uchar( float );
+double clampd_uchar( double );
+float clampf_uchar_rte( float );
+double clampd_uchar_rte( double );
+float clampf_uchar_rtp( float );
+double clampd_uchar_rtp( double );
+float clampf_uchar_rtn( float );
+double clampd_uchar_rtn( double );
+float clampf_uchar_rtz( float );
+double clampd_uchar_rtz( double );
+float clampf_char( float );
+double clampd_char( double );
+float clampf_char_rte( float );
+double clampd_char_rte( double );
+float clampf_char_rtp( float );
+double clampd_char_rtp( double );
+float clampf_char_rtn( float );
+double clampd_char_rtn( double );
+float clampf_char_rtz( float );
+double clampd_char_rtz( double );
+float clampf_ushort( float );
+double clampd_ushort( double );
+float clampf_ushort_rte( float );
+double clampd_ushort_rte( double );
+float clampf_ushort_rtp( float );
+double clampd_ushort_rtp( double );
+float clampf_ushort_rtn( float );
+double clampd_ushort_rtn( double );
+float clampf_ushort_rtz( float );
+double clampd_ushort_rtz( double );
+float clampf_short( float );
+double clampd_short( double );
+float clampf_short_rte( float );
+double clampd_short_rte( double );
+float clampf_short_rtp( float );
+double clampd_short_rtp( double );
+float clampf_short_rtn( float );
+double clampd_short_rtn( double );
+float clampf_short_rtz( float );
+double clampd_short_rtz( double );
+float clampf_uint( float );
+double clampd_uint( double );
+float clampf_uint_rte( float );
+double clampd_uint_rte( double );
+float clampf_uint_rtp( float );
+double clampd_uint_rtp( double );
+float clampf_uint_rtn( float );
+double clampd_uint_rtn( double );
+float clampf_uint_rtz( float );
+double clampd_uint_rtz( double );
+float clampf_int( float );
+double clampd_int( double );
+float clampf_int_rte( float );
+double clampd_int_rte( double );
+float clampf_int_rtp( float );
+double clampd_int_rtp( double );
+float clampf_int_rtn( float );
+double clampd_int_rtn( double );
+float clampf_int_rtz( float );
+double clampd_int_rtz( double );
+float clampf_float( float );
+double clampd_float( double );
+float clampf_float_rte( float );
+double clampd_float_rte( double );
+float clampf_float_rtp( float );
+double clampd_float_rtp( double );
+float clampf_float_rtn( float );
+double clampd_float_rtn( double );
+float clampf_float_rtz( float );
+double clampd_float_rtz( double );
+float clampf_double( float );
+double clampd_double( double );
+float clampf_double_rte( float );
+double clampd_double_rte( double );
+float clampf_double_rtp( float );
+double clampd_double_rtp( double );
+float clampf_double_rtn( float );
+double clampd_double_rtn( double );
+float clampf_double_rtz( float );
+double clampd_double_rtz( double );
+float clampf_ulong( float );
+double clampd_ulong( double );
+float clampf_ulong_rte( float );
+double clampd_ulong_rte( double );
+float clampf_ulong_rtp( float );
+double clampd_ulong_rtp( double );
+float clampf_ulong_rtn( float );
+double clampd_ulong_rtn( double );
+float clampf_ulong_rtz( float );
+double clampd_ulong_rtz( double );
+float clampf_long( float );
+double clampd_long( double );
+float clampf_long_rte( float );
+double clampd_long_rte( double );
+float clampf_long_rtp( float );
+double clampd_long_rtp( double );
+float clampf_long_rtn( float );
+double clampd_long_rtn( double );
+float clampf_long_rtz( float );
+double clampd_long_rtz( double );
+
+/*
+#include <stdio.h>
+
+char *ground[] = {   "",
+                                                            "_rte",
+                                                            "_rtp",
+                                                            "_rtn",
+                                                            "_rtz"
+                    };
+
+const char *gTypeNames[  ] = {
+                                            "uchar", "char",
+                                            "ushort", "short",
+                                            "uint",   "int",
+                                            "float", "double",
+                                            "ulong", "long"
+                                        };
+
+
+int main( void )
+{
+    int i, j;
+
+    for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ )
+    {
+        vlog( "{\t" );
+        for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ )
+            vlog( "clampf_%s%s,\t", gTypeNames[i], ground[j] );
+
+        vlog( "\t},\n" );
+    }
+
+    return 0;
+
+}
+*/
+clampf gClampFloat[ kTypeCount ][kRoundingModeCount] = {
+    {    clampf_uchar,    clampf_uchar_rte,    clampf_uchar_rtp,    clampf_uchar_rtn,    clampf_uchar_rtz,        },
+    {    clampf_char,    clampf_char_rte,    clampf_char_rtp,    clampf_char_rtn,    clampf_char_rtz,        },
+    {    clampf_ushort,    clampf_ushort_rte,    clampf_ushort_rtp,    clampf_ushort_rtn,    clampf_ushort_rtz,        },
+    {    clampf_short,    clampf_short_rte,    clampf_short_rtp,    clampf_short_rtn,    clampf_short_rtz,        },
+    {    clampf_uint,    clampf_uint_rte,    clampf_uint_rtp,    clampf_uint_rtn,    clampf_uint_rtz,        },
+    {    clampf_int,     clampf_int_rte,     clampf_int_rtp,     clampf_int_rtn,     clampf_int_rtz,         },
+    {    clampf_float,    clampf_float_rte,    clampf_float_rtp,    clampf_float_rtn,    clampf_float_rtz,        },
+    {    clampf_double,    clampf_double_rte,    clampf_double_rtp,    clampf_double_rtn,    clampf_double_rtz,        },
+    {    clampf_ulong,    clampf_ulong_rte,    clampf_ulong_rtp,    clampf_ulong_rtn,    clampf_ulong_rtz,        },
+    {    clampf_long,    clampf_long_rte,    clampf_long_rtp,    clampf_long_rtn,    clampf_long_rtz,        }
+};
+
+clampd gClampDouble[ kTypeCount ][kRoundingModeCount] = {
+    {    clampd_uchar,    clampd_uchar_rte,    clampd_uchar_rtp,    clampd_uchar_rtn,    clampd_uchar_rtz,        },
+    {    clampd_char,    clampd_char_rte,    clampd_char_rtp,    clampd_char_rtn,    clampd_char_rtz,        },
+    {    clampd_ushort,    clampd_ushort_rte,    clampd_ushort_rtp,    clampd_ushort_rtn,    clampd_ushort_rtz,        },
+    {    clampd_short,    clampd_short_rte,    clampd_short_rtp,    clampd_short_rtn,    clampd_short_rtz,        },
+    {    clampd_uint,    clampd_uint_rte,    clampd_uint_rtp,    clampd_uint_rtn,    clampd_uint_rtz,        },
+    {    clampd_int,     clampd_int_rte,     clampd_int_rtp,     clampd_int_rtn,     clampd_int_rtz,         },
+    {    clampd_float,    clampd_float_rte,    clampd_float_rtp,    clampd_float_rtn,    clampd_float_rtz,        },
+    {    clampd_double,    clampd_double_rte,    clampd_double_rtp,    clampd_double_rtn,    clampd_double_rtz,        },
+    {    clampd_ulong,    clampd_ulong_rte,    clampd_ulong_rtp,    clampd_ulong_rtn,    clampd_ulong_rtz,        },
+    {    clampd_long,    clampd_long_rte,    clampd_long_rtp,    clampd_long_rtn,    clampd_long_rtz,        }
+};
+
+#if defined (_WIN32)
+#define __attribute__(X)
+#endif
+
+static inline float fclamp( float lo, float v, float hi ) __attribute__ ((always_inline));
+static inline double dclamp( double lo, double v, double hi ) __attribute__ ((always_inline));
+
+static inline float fclamp( float lo, float v, float hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; }
+static inline double dclamp( double lo, double v, double hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; }
+
+// Clamp unsaturated inputs into range so we don't get test errors:
+float clampf_uchar( float f )       { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); }
+double clampd_uchar( double f )     { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); }
+float clampf_uchar_rte( float f )   { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); }
+double clampd_uchar_rte( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); }
+float clampf_uchar_rtp( float f )   { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 255.0f ); }
+double clampd_uchar_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 255.0 ); }
+float clampf_uchar_rtn( float f )   { return fclamp( -0.0f, f, 256.0f - 128.0f * FLT_EPSILON); }
+double clampd_uchar_rtn( double f ) { return dclamp( -0.0, f, 256.0 - 128.0 * DBL_EPSILON); }
+float clampf_uchar_rtz( float f )   { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 256.0f - 128.0f * FLT_EPSILON); }
+double clampd_uchar_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 256.0 - 128.0f * DBL_EPSILON); }
+
+float clampf_char( float f )        { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); }
+double clampd_char( double f )      { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); }
+float clampf_char_rte( float f )    { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); }
+double clampd_char_rte( double f )  { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); }
+float clampf_char_rtp( float f )    { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 127.f ); }
+double clampd_char_rtp( double f )  { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 127. ); }
+float clampf_char_rtn( float f )    { return fclamp( -128.0f, f, 128.f - 64.0f*FLT_EPSILON ); }
+double clampd_char_rtn( double f )  { return dclamp( -128.0, f, 128. - 64.0*DBL_EPSILON ); }
+float clampf_char_rtz( float f )    { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 128.f - 64.0f*FLT_EPSILON ); }
+double clampd_char_rtz( double f )  { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 128. - 64.0*DBL_EPSILON ); }
+
+float clampf_ushort( float f )       { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); }
+double clampd_ushort( double f )     { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); }
+float clampf_ushort_rte( float f )   { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); }
+double clampd_ushort_rte( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); }
+float clampf_ushort_rtp( float f )   { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65535.0f ); }
+double clampd_ushort_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65535.0 ); }
+float clampf_ushort_rtn( float f )   { return fclamp( -0.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); }
+double clampd_ushort_rtn( double f ) { return dclamp( -0.0, f, 65536.0 - 32768.0 * DBL_EPSILON); }
+float clampf_ushort_rtz( float f )   { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); }
+double clampd_ushort_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65536.0 - 32768.0f * DBL_EPSILON); }
+
+float clampf_short( float f )        { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); }
+double clampd_short( double f )      { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); }
+float clampf_short_rte( float f )    { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); }
+double clampd_short_rte( double f )  { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); }
+float clampf_short_rtp( float f )    { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32767.f ); }
+double clampd_short_rtp( double f )  { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32767. ); }
+float clampf_short_rtn( float f )    { return fclamp( -32768.0f, f, 32768.f - 16384.0f*FLT_EPSILON ); }
+double clampd_short_rtn( double f )  { return dclamp( -32768.0, f, 32768. - 16384.0*DBL_EPSILON ); }
+float clampf_short_rtz( float f )    { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32768.f - 16384.0f*FLT_EPSILON ); }
+double clampd_short_rtz( double f )  { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32768. - 16384.0*DBL_EPSILON ); }
+
+float clampf_uint( float f )        { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
+double clampd_uint( double f )      { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); }
+float clampf_uint_rte( float f )    { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
+double clampd_uint_rte( double f )  { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); }
+float clampf_uint_rtp( float f )    { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
+double clampd_uint_rtp( double f )  { return dclamp( -1.0 + DBL_EPSILON/2.0, f, CL_UINT_MAX ); }
+float clampf_uint_rtn( float f )    { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); }
+double clampd_uint_rtn( double f )  { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) ); }
+float clampf_uint_rtz( float f )    { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); }
+double clampd_uint_rtz( double f )  { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)); }
+
+float clampf_int( float f )         { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
+double clampd_int( double f )       { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
+float clampf_int_rte( float f )     { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
+double clampd_int_rte( double f )   { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
+float clampf_int_rtp( float f )     { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
+double clampd_int_rtp( double f )   { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX ); }
+float clampf_int_rtn( float f )     { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
+double clampd_int_rtn( double f )   { return dclamp( INT_MIN, f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
+float clampf_int_rtz( float f )     { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
+double clampd_int_rtz( double f )   { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
+
+float clampf_float( float f ){ return f; }
+double clampd_float( double f ){ return f; }
+float clampf_float_rte( float f ){ return f; }
+double clampd_float_rte( double f ){ return f; }
+float clampf_float_rtp( float f ){ return f; }
+double clampd_float_rtp( double f ){ return f; }
+float clampf_float_rtn( float f ){ return f; }
+double clampd_float_rtn( double f ){ return f; }
+float clampf_float_rtz( float f ){ return f; }
+double clampd_float_rtz( double f ){ return f; }
+
+float clampf_double( float f ){ return f; }
+double clampd_double( double f ){ return f; }
+float clampf_double_rte( float f ){ return f; }
+double clampd_double_rte( double f ){ return f; }
+float clampf_double_rtp( float f ){ return f; }
+double clampd_double_rtp( double f ){ return f; }
+float clampf_double_rtn( float f ){ return f; }
+double clampd_double_rtn( double f ){ return f; }
+float clampf_double_rtz( float f ){ return f; }
+double clampd_double_rtz( double f ){ return f; }
+
+float clampf_ulong( float f )       { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
+double clampd_ulong( double f )     { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
+float clampf_ulong_rte( float f )   { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
+double clampd_ulong_rte( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
+float clampf_ulong_rtp( float f )   { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
+double clampd_ulong_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
+float clampf_ulong_rtn( float f )   { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
+double clampd_ulong_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
+float clampf_ulong_rtz( float f )   { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
+double clampd_ulong_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
+
+float clampf_long( float f )        { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
+double clampd_long( double f )      { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
+float clampf_long_rte( float f )    { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
+double clampd_long_rte( double f )  { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
+float clampf_long_rtp( float f )    { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
+double clampd_long_rtp( double f )  { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
+float clampf_long_rtn( float f )    { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
+double clampd_long_rtn( double f )  { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
+float clampf_long_rtz( float f )    { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
+double clampd_long_rtz( double f )  { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
+
+#pragma mark -
+
+int alwaysPass( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int alwaysFail( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_uchar( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_char( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_ushort( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_short( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_uint( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_int( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_ulong( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_long( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_float( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+int check_double( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+
+void init_uchar( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_char( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_ushort( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_short( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_uint( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_int( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_float( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_double( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_ulong( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+void init_long( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+
+InitDataFunc gInitFunctions[ kTypeCount ] = {
+                                                init_uchar, init_char,
+                                                init_ushort, init_short,
+                                                init_uint, init_int,
+                                                init_float, init_double,
+                                                init_ulong, init_long
+                                            };
+
+
+CheckResults gCheckResults[ kTypeCount ] = {
+                                                check_uchar, check_char, check_ushort, check_short, check_uint,
+                                                check_int, check_float, check_double, check_ulong, check_long
+                                            };
+#if !defined (__APPLE__)
+#define UNUSED
+#else
+#define UNUSED  __attribute__((unused))
+#endif
+
+int alwaysPass( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize){ return 0; }
+int alwaysFail( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize ){ return -1; }
+
+int check_uchar( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_uchar *t = (const cl_uchar*)test;
+    const cl_uchar *c = (const cl_uchar*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uchar)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_char( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_char *t = (const cl_char*)test;
+    const cl_char *c = (const cl_char*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_char)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_ushort( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_ushort *t = (const cl_ushort*)test;
+    const cl_ushort *c = (const cl_ushort*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ushort)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_short( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_short *t = (const cl_short*)test;
+    const cl_short *c = (const cl_short*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_short)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_uint( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_uint *t = (const cl_uint*)test;
+    const cl_uint *c = (const cl_uint*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uint)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_int( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_int *t = (const cl_int*)test;
+    const cl_int *c = (const cl_int*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_int)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_ulong( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_ulong *t = (const cl_ulong*)test;
+    const cl_ulong *c = (const cl_ulong*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ulong)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_long( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_long *t = (const cl_long*)test;
+    const cl_long *c = (const cl_long*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_long)0))
+        {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_float( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_uint *t = (const cl_uint*)test;
+    const cl_uint *c = (const cl_uint*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if (t[i] != c[i] &&
+            // Allow nan's to be binary different
+            !((t[i] & 0x7fffffffU) > 0x7f800000U &&
+              (c[i] & 0x7fffffffU) > 0x7f800000U) &&
+            !(a[i] != (cl_uchar)0 &&
+              t[i] == (c[i] & 0x80000000U))) {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *%a vs %a\n",
+                    vectorSize, i, ((float*)correct)[i], ((float*)test)[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+int check_double( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+{
+    const cl_ulong *t = (const cl_ulong*)test;
+    const cl_ulong *c = (const cl_ulong*)correct;
+    const cl_uchar *a = (const cl_uchar*)allowZ;
+    uint32_t i;
+
+    for( i = 0; i < count; i++ )
+        if (t[i] != c[i] &&
+            // Allow nan's to be binary different
+            !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL &&
+              (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) &&
+            !(a[i] != (cl_uchar)0 &&
+              t[i] == (c[i] & 0x8000000000000000ULL))) {
+            vlog( "\nError for vector size %d found at 0x%8.8x:  *%a vs %a\n",
+                  vectorSize, i, ((double*)correct)[i], ((double*)test)[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+
+void init_uchar( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d )
+{
+    cl_uchar *o = (cl_uchar *)out;
+    int i;
+
+    for( i = 0; i < count; i++ )
+        o[i] = start++;
+}
+
+void init_char( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d )
+{
+    char *o = (char *)out;
+    int i;
+
+    for( i = 0; i < count; i++ )
+        o[i] = start++;
+}
+
+void init_ushort( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d )
+{
+    cl_ushort *o = (cl_ushort *)out;
+    int i;
+
+    for( i = 0; i < count; i++ )
+        o[i] = start++;
+}
+
+void init_short( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, UNUSED Type destType, uint64_t start, int count, MTdata UNUSED d )
+{
+    short *o = (short *)out;
+    int i;
+
+    for( i = 0; i < count; i++ )
+        o[i] = start++;
+}
+
+void init_uint( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d )
+{
+    static const unsigned int specialValuesUInt[] = {
+    INT_MIN, INT_MIN + 1, INT_MIN + 2,
+    -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3,
+    -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3,
+    -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3,
+    -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3,
+    -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3,
+    -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3,
+    -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3,
+    -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3,
+    -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3,
+    -4, -3, -2, -1, 0, 1, 2, 3, 4,
+    (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3,
+    (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3,
+    (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3,
+    (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3,
+    (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3,
+    (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3,
+    (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3,
+    (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3,
+    (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3,
+    INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
+    UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX
+    };
+
+    cl_uint *o = (cl_uint *)out;
+    int i;
+
+    for( i = 0; i < count; i++) {
+    if( gIsEmbedded )
+        o[i] = (cl_uint) genrand_int32(d);
+    else
+        o[i] = (cl_uint)i + start;
+    }
+
+    if( 0 == start )
+    {
+    size_t tableSize = sizeof( specialValuesUInt );
+    if( sizeof( cl_uint) * count < tableSize )
+        tableSize = sizeof( cl_uint) * count;
+    memcpy( (char*)(o + i) - tableSize, specialValuesUInt, tableSize );
+    }
+}
+
+void init_int( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d )
+{
+    static const unsigned int specialValuesInt[] = {
+    INT_MIN, INT_MIN + 1, INT_MIN + 2,
+    -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3,
+    -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3,
+    -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3,
+    -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3,
+    -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3,
+    -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3,
+    -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3,
+    -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3,
+    -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3,
+    -4, -3, -2, -1, 0, 1, 2, 3, 4,
+    (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3,
+    (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3,
+    (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3,
+    (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3,
+    (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3,
+    (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3,
+    (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3,
+    (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3,
+    (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3,
+    INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
+    UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX
+    };
+
+    int *o = (int *)out;
+    int i;
+
+    for( i = 0; i < count; i++ ) {
+    if( gIsEmbedded ) {
+        o[i] = (int) genrand_int32(d);
+    }
+    else {
+        o[i] = (int) i + start;
+    }
+    }
+
+    if( 0 == start )
+    {
+    size_t tableSize = sizeof( specialValuesInt );
+    if( sizeof( int) * count < tableSize )
+        tableSize = sizeof( int) * count;
+    memcpy( (char*)(o + i) - tableSize, specialValuesInt, tableSize );
+    }
+}
+
+void init_float( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d )
+{
+    static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+    };
+
+    cl_uint *o = (cl_uint *)out;
+    int i;
+
+    for( i = 0; i < count; i++ ) {
+    if( gIsEmbedded )
+        o[i] = (cl_uint) genrand_int32(d);
+    else
+        o[i] = (cl_uint) i + start;
+    }
+
+    if( 0 == start )
+    {
+    size_t tableSize = sizeof( specialValuesFloat );
+    if( sizeof( float) * count < tableSize )
+        tableSize = sizeof( float) * count;
+    memcpy( (char*)(o + i) - tableSize, specialValuesFloat, tableSize );
+    }
+
+    if( kUnsaturated == sat )
+    {
+        clampf func = gClampFloat[ destType ][round];
+        float *f = (float *)out;
+
+        for( i = 0; i < count; i++ )
+            f[i] = func( f[i] );
+    }
+}
+
+// used to convert a bucket of bits into a search pattern through double
+static inline double DoubleFromUInt32( uint32_t bits );
+static inline double DoubleFromUInt32( uint32_t bits )
+{
+    union{ uint64_t u; double d;} u;
+
+    // split 0x89abcdef to 0x89abc00000000def
+    u.u = bits & 0xfffU;
+    u.u |= (uint64_t) (bits & ~0xfffU) << 32;
+
+    // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
+    u.u -= (bits & 0x800U) << 1;
+
+    // return result
+    return u.d;
+}
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8),
+    MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12),     MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11),
+     MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+     MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+
+    MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1),
+    MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0),
+    MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1),
+    MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52),
+    MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54),
+    MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
+};
+
+
+void init_double( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata UNUSED d )
+{
+    double *o = (double*)out;
+    int i;
+
+    for( i = 0; i < count; i++ )
+    {
+        uint64_t z = i + start;
+        o[i] = DoubleFromUInt32( (uint32_t) z ^ (uint32_t) (z >> 32));
+    }
+
+    if( 0 == start )
+    {
+        size_t tableSize = sizeof( specialValuesDouble );
+        if( sizeof( cl_double) * count < tableSize )
+            tableSize = sizeof( cl_double) * count;
+        memcpy( (char*)(o + i) - tableSize, specialValuesDouble, tableSize );
+    }
+
+    if( 0 == sat )
+    {
+        clampd func = gClampDouble[ destType ][round];
+
+        for( i = 0; i < count; i++ )
+            o[i] = func( o[i] );
+    }
+}
+
+cl_ulong random64( MTdata d )
+{
+    return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+}
+
+void init_ulong( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d )
+{
+    cl_ulong *o = (cl_ulong *)out;
+    cl_ulong i, j, k;
+
+    i = 0;
+    if( start == 0 )
+    {
+        //Try various powers of two
+        for( j = 0; j < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ )
+            o[j] = (cl_ulong) 1 << j;
+        i = j;
+
+        // try the complement of those
+        for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ )
+            o[i++] = ~((cl_ulong) 1 << j);
+
+        //Try various negative powers of two
+        for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ )
+            o[i++] = (cl_ulong) 0xFFFFFFFFFFFFFFFEULL << j;
+
+        //try various powers of two plus 1, shifted by various amounts
+        for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ )
+            for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ )
+                o[i++] = (((cl_ulong) 1 << j) + 1) << k;
+
+        //try various powers of two minus 1
+        for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ )
+            for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ )
+                o[i++] = (((cl_ulong) 1 << j) - 1) << k;
+
+        // Other patterns
+        cl_ulong pattern[] = { 0x3333333333333333ULL, 0x5555555555555555ULL, 0x9999999999999999ULL, 0x6666666666666666ULL, 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL };
+        cl_ulong mask[] = { 0xffffffffffffffffULL, 0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL, 0xffffffff00000000ULL };
+        for( j = 0; i < (cl_ulong) count && j < sizeof(pattern) / sizeof( pattern[0]); j++ )
+            for( k = 0; i + 2 <= (cl_ulong) count && k < sizeof(mask) / sizeof( mask[0]); k++ )
+            {
+                o[i++] = pattern[j] & mask[k];
+                o[i++] = pattern[j] & ~mask[k];
+            }
+    }
+
+    for( ; i < (cl_ulong) count; i++ )
+        o[i] = random64(d);
+}
+
+void init_long( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d )
+{
+    init_ulong( out, sat, round, destType, start, count, d );
+}
+
+// ======
+
+void uchar2uchar_many( void *out, void *in, size_t n);
+void uchar2uchar_sat_many( void *out, void *in, size_t n);
+void char2uchar_many( void *out, void *in, size_t n);
+void char2uchar_sat_many( void *out, void *in, size_t n);
+void ushort2uchar_many( void *out, void *in, size_t n);
+void ushort2uchar_sat_many( void *out, void *in, size_t n);
+void short2uchar_many( void *out, void *in, size_t n);
+void short2uchar_sat_many( void *out, void *in, size_t n);
+void uint2uchar_many( void *out, void *in, size_t n);
+void uint2uchar_sat_many( void *out, void *in, size_t n);
+void int2uchar_many( void *out, void *in, size_t n);
+void int2uchar_sat_many( void *out, void *in, size_t n);
+void float2uchar_many( void *out, void *in, size_t n);
+void float2uchar_sat_many( void *out, void *in, size_t n);
+void double2uchar_many( void *out, void *in, size_t n);
+void double2uchar_sat_many( void *out, void *in, size_t n);
+void ulong2uchar_many( void *out, void *in, size_t n);
+void ulong2uchar_sat_many( void *out, void *in, size_t n);
+void long2uchar_many( void *out, void *in, size_t n);
+void long2uchar_sat_many( void *out, void *in, size_t n);
+void uchar2char_many( void *out, void *in, size_t n);
+void uchar2char_sat_many( void *out, void *in, size_t n);
+void char2char_many( void *out, void *in, size_t n);
+void char2char_sat_many( void *out, void *in, size_t n);
+void ushort2char_many( void *out, void *in, size_t n);
+void ushort2char_sat_many( void *out, void *in, size_t n);
+void short2char_many( void *out, void *in, size_t n);
+void short2char_sat_many( void *out, void *in, size_t n);
+void uint2char_many( void *out, void *in, size_t n);
+void uint2char_sat_many( void *out, void *in, size_t n);
+void int2char_many( void *out, void *in, size_t n);
+void int2char_sat_many( void *out, void *in, size_t n);
+void float2char_many( void *out, void *in, size_t n);
+void float2char_sat_many( void *out, void *in, size_t n);
+void double2char_many( void *out, void *in, size_t n);
+void double2char_sat_many( void *out, void *in, size_t n);
+void ulong2char_many( void *out, void *in, size_t n);
+void ulong2char_sat_many( void *out, void *in, size_t n);
+void long2char_many( void *out, void *in, size_t n);
+void long2char_sat_many( void *out, void *in, size_t n);
+void uchar2ushort_many( void *out, void *in, size_t n);
+void uchar2ushort_sat_many( void *out, void *in, size_t n);
+void char2ushort_many( void *out, void *in, size_t n);
+void char2ushort_sat_many( void *out, void *in, size_t n);
+void ushort2ushort_many( void *out, void *in, size_t n);
+void ushort2ushort_sat_many( void *out, void *in, size_t n);
+void short2ushort_many( void *out, void *in, size_t n);
+void short2ushort_sat_many( void *out, void *in, size_t n);
+void uint2ushort_many( void *out, void *in, size_t n);
+void uint2ushort_sat_many( void *out, void *in, size_t n);
+void int2ushort_many( void *out, void *in, size_t n);
+void int2ushort_sat_many( void *out, void *in, size_t n);
+void float2ushort_many( void *out, void *in, size_t n);
+void float2ushort_sat_many( void *out, void *in, size_t n);
+void double2ushort_many( void *out, void *in, size_t n);
+void double2ushort_sat_many( void *out, void *in, size_t n);
+void ulong2ushort_many( void *out, void *in, size_t n);
+void ulong2ushort_sat_many( void *out, void *in, size_t n);
+void long2ushort_many( void *out, void *in, size_t n);
+void long2ushort_sat_many( void *out, void *in, size_t n);
+void uchar2short_many( void *out, void *in, size_t n);
+void uchar2short_sat_many( void *out, void *in, size_t n);
+void char2short_many( void *out, void *in, size_t n);
+void char2short_sat_many( void *out, void *in, size_t n);
+void ushort2short_many( void *out, void *in, size_t n);
+void ushort2short_sat_many( void *out, void *in, size_t n);
+void short2short_many( void *out, void *in, size_t n);
+void short2short_sat_many( void *out, void *in, size_t n);
+void uint2short_many( void *out, void *in, size_t n);
+void uint2short_sat_many( void *out, void *in, size_t n);
+void int2short_many( void *out, void *in, size_t n);
+void int2short_sat_many( void *out, void *in, size_t n);
+void float2short_many( void *out, void *in, size_t n);
+void float2short_sat_many( void *out, void *in, size_t n);
+void double2short_many( void *out, void *in, size_t n);
+void double2short_sat_many( void *out, void *in, size_t n);
+void ulong2short_many( void *out, void *in, size_t n);
+void ulong2short_sat_many( void *out, void *in, size_t n);
+void long2short_many( void *out, void *in, size_t n);
+void long2short_sat_many( void *out, void *in, size_t n);
+void uchar2uint_many( void *out, void *in, size_t n);
+void uchar2uint_sat_many( void *out, void *in, size_t n);
+void char2uint_many( void *out, void *in, size_t n);
+void char2uint_sat_many( void *out, void *in, size_t n);
+void ushort2uint_many( void *out, void *in, size_t n);
+void ushort2uint_sat_many( void *out, void *in, size_t n);
+void short2uint_many( void *out, void *in, size_t n);
+void short2uint_sat_many( void *out, void *in, size_t n);
+void uint2uint_many( void *out, void *in, size_t n);
+void uint2uint_sat_many( void *out, void *in, size_t n);
+void int2uint_many( void *out, void *in, size_t n);
+void int2uint_sat_many( void *out, void *in, size_t n);
+void float2uint_many( void *out, void *in, size_t n);
+void float2uint_sat_many( void *out, void *in, size_t n);
+void double2uint_many( void *out, void *in, size_t n);
+void double2uint_sat_many( void *out, void *in, size_t n);
+void ulong2uint_many( void *out, void *in, size_t n);
+void ulong2uint_sat_many( void *out, void *in, size_t n);
+void long2uint_many( void *out, void *in, size_t n);
+void long2uint_sat_many( void *out, void *in, size_t n);
+void uchar2int_many( void *out, void *in, size_t n);
+void uchar2int_sat_many( void *out, void *in, size_t n);
+void char2int_many( void *out, void *in, size_t n);
+void char2int_sat_many( void *out, void *in, size_t n);
+void ushort2int_many( void *out, void *in, size_t n);
+void ushort2int_sat_many( void *out, void *in, size_t n);
+void short2int_many( void *out, void *in, size_t n);
+void short2int_sat_many( void *out, void *in, size_t n);
+void uint2int_many( void *out, void *in, size_t n);
+void uint2int_sat_many( void *out, void *in, size_t n);
+void int2int_many( void *out, void *in, size_t n);
+void int2int_sat_many( void *out, void *in, size_t n);
+void float2int_many( void *out, void *in, size_t n);
+void float2int_sat_many( void *out, void *in, size_t n);
+void double2int_many( void *out, void *in, size_t n);
+void double2int_sat_many( void *out, void *in, size_t n);
+void ulong2int_many( void *out, void *in, size_t n);
+void ulong2int_sat_many( void *out, void *in, size_t n);
+void long2int_many( void *out, void *in, size_t n);
+void long2int_sat_many( void *out, void *in, size_t n);
+void uchar2float_many( void *out, void *in, size_t n);
+void uchar2float_sat_many( void *out, void *in, size_t n);
+void char2float_many( void *out, void *in, size_t n);
+void char2float_sat_many( void *out, void *in, size_t n);
+void ushort2float_many( void *out, void *in, size_t n);
+void ushort2float_sat_many( void *out, void *in, size_t n);
+void short2float_many( void *out, void *in, size_t n);
+void short2float_sat_many( void *out, void *in, size_t n);
+void uint2float_many( void *out, void *in, size_t n);
+void uint2float_sat_many( void *out, void *in, size_t n);
+void int2float_many( void *out, void *in, size_t n);
+void int2float_sat_many( void *out, void *in, size_t n);
+void float2float_many( void *out, void *in, size_t n);
+void float2float_sat_many( void *out, void *in, size_t n);
+void double2float_many( void *out, void *in, size_t n);
+void double2float_sat_many( void *out, void *in, size_t n);
+void ulong2float_many( void *out, void *in, size_t n);
+void ulong2float_sat_many( void *out, void *in, size_t n);
+void long2float_many( void *out, void *in, size_t n);
+void long2float_sat_many( void *out, void *in, size_t n);
+void uchar2double_many( void *out, void *in, size_t n);
+void uchar2double_sat_many( void *out, void *in, size_t n);
+void char2double_many( void *out, void *in, size_t n);
+void char2double_sat_many( void *out, void *in, size_t n);
+void ushort2double_many( void *out, void *in, size_t n);
+void ushort2double_sat_many( void *out, void *in, size_t n);
+void short2double_many( void *out, void *in, size_t n);
+void short2double_sat_many( void *out, void *in, size_t n);
+void uint2double_many( void *out, void *in, size_t n);
+void uint2double_sat_many( void *out, void *in, size_t n);
+void int2double_many( void *out, void *in, size_t n);
+void int2double_sat_many( void *out, void *in, size_t n);
+void float2double_many( void *out, void *in, size_t n);
+void float2double_sat_many( void *out, void *in, size_t n);
+void double2double_many( void *out, void *in, size_t n);
+void double2double_sat_many( void *out, void *in, size_t n);
+void ulong2double_many( void *out, void *in, size_t n);
+void ulong2double_sat_many( void *out, void *in, size_t n);
+void long2double_many( void *out, void *in, size_t n);
+void long2double_sat_many( void *out, void *in, size_t n);
+void uchar2ulong_many( void *out, void *in, size_t n);
+void uchar2ulong_sat_many( void *out, void *in, size_t n);
+void char2ulong_many( void *out, void *in, size_t n);
+void char2ulong_sat_many( void *out, void *in, size_t n);
+void ushort2ulong_many( void *out, void *in, size_t n);
+void ushort2ulong_sat_many( void *out, void *in, size_t n);
+void short2ulong_many( void *out, void *in, size_t n);
+void short2ulong_sat_many( void *out, void *in, size_t n);
+void uint2ulong_many( void *out, void *in, size_t n);
+void uint2ulong_sat_many( void *out, void *in, size_t n);
+void int2ulong_many( void *out, void *in, size_t n);
+void int2ulong_sat_many( void *out, void *in, size_t n);
+void float2ulong_many( void *out, void *in, size_t n);
+void float2ulong_sat_many( void *out, void *in, size_t n);
+void double2ulong_many( void *out, void *in, size_t n);
+void double2ulong_sat_many( void *out, void *in, size_t n);
+void ulong2ulong_many( void *out, void *in, size_t n);
+void ulong2ulong_sat_many( void *out, void *in, size_t n);
+void long2ulong_many( void *out, void *in, size_t n);
+void long2ulong_sat_many( void *out, void *in, size_t n);
+void uchar2long_many( void *out, void *in, size_t n);
+void uchar2long_sat_many( void *out, void *in, size_t n);
+void char2long_many( void *out, void *in, size_t n);
+void char2long_sat_many( void *out, void *in, size_t n);
+void ushort2long_many( void *out, void *in, size_t n);
+void ushort2long_sat_many( void *out, void *in, size_t n);
+void short2long_many( void *out, void *in, size_t n);
+void short2long_sat_many( void *out, void *in, size_t n);
+void uint2long_many( void *out, void *in, size_t n);
+void uint2long_sat_many( void *out, void *in, size_t n);
+void int2long_many( void *out, void *in, size_t n);
+void int2long_sat_many( void *out, void *in, size_t n);
+void float2long_many( void *out, void *in, size_t n);
+void float2long_sat_many( void *out, void *in, size_t n);
+void double2long_many( void *out, void *in, size_t n);
+void double2long_sat_many( void *out, void *in, size_t n);
+void ulong2long_many( void *out, void *in, size_t n);
+void ulong2long_sat_many( void *out, void *in, size_t n);
+void long2long_many( void *out, void *in, size_t n);
+void long2long_sat_many( void *out, void *in, size_t n);
+
+void uchar2uchar_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); }
+void uchar2uchar_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); }
+void char2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }}
+void char2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }}
+void ushort2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }}
+void short2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }}
+void short2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }}
+void uint2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }}
+void uint2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }}
+void int2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }}
+void int2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }}
+void float2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }}
+void float2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }}
+void double2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }}
+void double2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }}
+void ulong2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }}
+void long2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }}
+void long2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }}
+void uchar2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }}
+void char2char_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); }
+void char2char_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); }
+void ushort2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }}
+void short2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }}
+void short2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }}
+void uint2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }}
+void uint2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }}
+void int2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }}
+void int2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }}
+void float2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }}
+void float2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }}
+void double2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }}
+void double2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }}
+void ulong2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }}
+void long2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }}
+void long2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }}
+void uchar2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }}
+void char2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }}
+void char2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }}
+void ushort2ushort_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); }
+void ushort2ushort_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); }
+void short2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }}
+void short2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }}
+void uint2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }}
+void uint2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }}
+void int2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }}
+void int2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }}
+void float2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }}
+void float2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }}
+void double2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }}
+void double2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }}
+void ulong2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }}
+void long2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }}
+void long2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }}
+void uchar2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }}
+void char2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }}
+void char2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }}
+void ushort2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }}
+void short2short_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); }
+void short2short_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); }
+void uint2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }}
+void uint2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }}
+void int2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }}
+void int2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }}
+void float2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }}
+void float2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }}
+void double2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }}
+void double2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }}
+void ulong2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }}
+void long2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }}
+void long2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }}
+void uchar2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }}
+void char2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }}
+void char2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }}
+void ushort2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }}
+void short2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }}
+void short2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }}
+void uint2uint_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); }
+void uint2uint_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); }
+void int2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }}
+void int2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }}
+void float2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }}
+void float2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }}
+void double2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }}
+void double2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }}
+void ulong2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }}
+void long2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }}
+void long2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }}
+void uchar2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }}
+void char2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }}
+void char2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }}
+void ushort2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }}
+void short2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }}
+void short2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }}
+void uint2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }}
+void uint2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }}
+void int2int_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); }
+void int2int_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); }
+void float2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }}
+void float2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }}
+void double2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }}
+void double2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }}
+void ulong2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }}
+void long2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }}
+void long2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }}
+void uchar2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }}
+void char2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }}
+void char2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }}
+void ushort2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }}
+void short2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }}
+void short2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }}
+void uint2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }}
+void uint2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }}
+void int2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }}
+void int2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }}
+void float2float_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); }
+void float2float_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); }
+void double2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }}
+void double2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }}
+void ulong2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }}
+void long2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }}
+void long2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }}
+void uchar2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }}
+void char2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }}
+void char2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }}
+void ushort2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }}
+void short2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }}
+void short2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }}
+void uint2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }}
+void uint2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }}
+void int2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }}
+void int2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }}
+void float2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }}
+void float2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }}
+void double2double_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); }
+void double2double_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); }
+void ulong2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }}
+void long2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }}
+void long2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }}
+void uchar2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }}
+void char2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }}
+void char2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }}
+void ushort2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }}
+void short2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }}
+void short2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }}
+void uint2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }}
+void uint2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }}
+void int2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }}
+void int2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }}
+void float2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }}
+void float2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }}
+void double2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }}
+void double2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }}
+void ulong2ulong_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); }
+void ulong2ulong_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); }
+void long2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }}
+void long2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }}
+void uchar2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }}
+void uchar2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }}
+void char2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }}
+void char2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }}
+void ushort2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }}
+void ushort2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }}
+void short2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }}
+void short2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }}
+void uint2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }}
+void uint2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }}
+void int2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }}
+void int2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }}
+void float2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }}
+void float2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }}
+void double2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }}
+void double2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }}
+void ulong2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }}
+void ulong2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }}
+void long2long_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); }
+void long2long_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); }
+
+Convert gSaturatedConversions[kTypeCount][kTypeCount] = {
+    {    uchar2uchar_sat_many,    char2uchar_sat_many,    ushort2uchar_sat_many,    short2uchar_sat_many,    uint2uchar_sat_many,    int2uchar_sat_many,    float2uchar_sat_many,    double2uchar_sat_many,    ulong2uchar_sat_many,    long2uchar_sat_many,     },
+    {    uchar2char_sat_many,    char2char_sat_many,    ushort2char_sat_many,    short2char_sat_many,    uint2char_sat_many,    int2char_sat_many,    float2char_sat_many,    double2char_sat_many,    ulong2char_sat_many, long2char_sat_many,     },
+    {    uchar2ushort_sat_many,    char2ushort_sat_many,    ushort2ushort_sat_many,    short2ushort_sat_many,    uint2ushort_sat_many,    int2ushort_sat_many,    float2ushort_sat_many,    double2ushort_sat_many,    ulong2ushort_sat_many,    long2ushort_sat_many,     },
+    {    uchar2short_sat_many,    char2short_sat_many,    ushort2short_sat_many,    short2short_sat_many,    uint2short_sat_many,    int2short_sat_many,    float2short_sat_many,    double2short_sat_many,    ulong2short_sat_many,    long2short_sat_many,     },
+    {    uchar2uint_sat_many,    char2uint_sat_many,    ushort2uint_sat_many,    short2uint_sat_many,    uint2uint_sat_many,    int2uint_sat_many,    float2uint_sat_many,    double2uint_sat_many,    ulong2uint_sat_many, long2uint_sat_many,     },
+    {    uchar2int_sat_many,    char2int_sat_many,    ushort2int_sat_many,    short2int_sat_many,    uint2int_sat_many,    int2int_sat_many,    float2int_sat_many,    double2int_sat_many,    ulong2int_sat_many,long2int_sat_many,     },
+    {    uchar2float_sat_many,    char2float_sat_many,    ushort2float_sat_many,    short2float_sat_many,    uint2float_sat_many,    int2float_sat_many,    float2float_sat_many,    double2float_sat_many,    ulong2float_sat_many,    long2float_sat_many,     },
+    {    uchar2double_sat_many,    char2double_sat_many,    ushort2double_sat_many,    short2double_sat_many,    uint2double_sat_many,    int2double_sat_many,    float2double_sat_many,    double2double_sat_many,    ulong2double_sat_many,    long2double_sat_many,     },
+    {    uchar2ulong_sat_many,    char2ulong_sat_many,    ushort2ulong_sat_many,    short2ulong_sat_many,    uint2ulong_sat_many,    int2ulong_sat_many,    float2ulong_sat_many,    double2ulong_sat_many,    ulong2ulong_sat_many,    long2ulong_sat_many,     },
+    {    uchar2long_sat_many,    char2long_sat_many,    ushort2long_sat_many,    short2long_sat_many,    uint2long_sat_many,    int2long_sat_many,    float2long_sat_many,    double2long_sat_many,    ulong2long_sat_many, long2long_sat_many,     },
+};
+
+Convert gConversions[kTypeCount][kTypeCount] = {
+    {    uchar2uchar_many,    char2uchar_many,    ushort2uchar_many,    short2uchar_many,    uint2uchar_many,    int2uchar_many,    float2uchar_many,    double2uchar_many,    ulong2uchar_many,    long2uchar_many,     },
+    {    uchar2char_many,    char2char_many,    ushort2char_many,    short2char_many,    uint2char_many,    int2char_many,    float2char_many,    double2char_many,    ulong2char_many,    long2char_many,     },
+    {    uchar2ushort_many,    char2ushort_many,    ushort2ushort_many,    short2ushort_many,    uint2ushort_many,    int2ushort_many,    float2ushort_many,    double2ushort_many,    ulong2ushort_many,    long2ushort_many,     },
+    {    uchar2short_many,    char2short_many,    ushort2short_many,    short2short_many,    uint2short_many,    int2short_many,    float2short_many,    double2short_many,    ulong2short_many,    long2short_many,     },
+    {    uchar2uint_many,    char2uint_many,    ushort2uint_many,    short2uint_many,    uint2uint_many,    int2uint_many,    float2uint_many,    double2uint_many,    ulong2uint_many,    long2uint_many,     },
+    {    uchar2int_many,    char2int_many,    ushort2int_many,    short2int_many,    uint2int_many,    int2int_many,    float2int_many,    double2int_many,    ulong2int_many,    long2int_many,     },
+    {    uchar2float_many,    char2float_many,    ushort2float_many,    short2float_many,    uint2float_many,    int2float_many,    float2float_many,    double2float_many,    ulong2float_many,    long2float_many,     },
+    {    uchar2double_many,    char2double_many,    ushort2double_many,    short2double_many,    uint2double_many,    int2double_many,    float2double_many,    double2double_many,    ulong2double_many,    long2double_many,     },
+    {    uchar2ulong_many,    char2ulong_many,    ushort2ulong_many,    short2ulong_many,    uint2ulong_many,    int2ulong_many,    float2ulong_many,    double2ulong_many,    ulong2ulong_many,    long2ulong_many,     },
+    {    uchar2long_many,    char2long_many,    ushort2long_many,    short2long_many,    uint2long_many,    int2long_many,    float2long_many,    double2long_many,    ulong2long_many,    long2long_many,     },
+};
diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h
new file mode 100644
index 00000000..ab8c68cd
--- /dev/null
+++ b/test_conformance/conversions/basic_test_conversions.h
@@ -0,0 +1,73 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef BASIC_TEST_CONVERSIONS_H
+#define BASIC_TEST_CONVERSIONS_H
+
+#include "../../test_common/harness/compat.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+#include <stdio.h>
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+
+#include "../../test_common/harness/mt19937.h"
+
+typedef void (*Convert)( void *dest, void *src, size_t );
+
+#define kVectorSizeCount    6
+#define kMaxVectorSize      16
+
+typedef enum
+{
+    kUnsaturated = 0,
+    kSaturated,
+
+    kSaturationModeCount
+}SaturationMode;
+
+extern Convert gConversions[kTypeCount][kTypeCount];                // [dest format][source format]
+extern Convert gSaturatedConversions[kTypeCount][kTypeCount];       // [dest format][source format]
+extern const char *gTypeNames[ kTypeCount ];
+extern const char *gRoundingModeNames[ kRoundingModeCount ];        // { "", "_rte", "_rtp", "_rtn", "_rtz" }
+extern const char *gSaturationNames[ kSaturationModeCount ];        // { "", "_sat" }
+extern const char *gVectorSizeNames[kVectorSizeCount];              // { "", "2", "4", "8", "16" }
+extern size_t gTypeSizes[ kTypeCount ];
+extern int gIsEmbedded;
+
+//Functions for clamping floating point numbers into the representable range for the type
+typedef float (*clampf)( float );
+typedef double (*clampd)( double );
+
+extern clampf gClampFloat[ kTypeCount ][kRoundingModeCount];
+extern clampd gClampDouble[ kTypeCount ][kRoundingModeCount];
+
+typedef void (*InitDataFunc)( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
+extern InitDataFunc gInitFunctions[ kTypeCount ];
+
+typedef int (*CheckResults)( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
+extern CheckResults gCheckResults[ kTypeCount ];
+
+#endif /* BASIC_TEST_CONVERSIONS_H */
+
diff --git a/test_conformance/conversions/fplib.c b/test_conformance/conversions/fplib.c
new file mode 100644
index 00000000..a18b919c
--- /dev/null
+++ b/test_conformance/conversions/fplib.c
@@ -0,0 +1,221 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdint.h>
+#include <math.h>
+#include "fplib.h"
+
+#define FLT_MANT_DIG    24
+#define as_float(x)     (*((float *)(&x)))
+#define as_long(x)      (*((int64_t *)(&x)))
+
+static uint32_t clz(uint64_t value)
+{
+    uint32_t num_zeros;
+
+    for( num_zeros = 0; num_zeros < (sizeof(uint64_t)*8); num_zeros++)
+    {
+        if(0x8000000000000000 & (value << num_zeros))
+            break;
+    }
+    return num_zeros;
+}
+
+float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
+{
+    switch (rnd) {
+        case qcomRTZ: {
+            int sign = 0;
+            if (!data)
+                return 0.0f;
+            if (data < 0){
+                data = - data;
+                sign = 1;
+            }
+            uint32_t    exponent   = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
+            int         mantShift  = 40 - clz(data);
+            uint32_t    mantissa;
+            if (mantShift >= 0)
+                mantissa = (uint32_t)((uint64_t)data >> mantShift);
+            else
+                mantissa = (uint32_t)((uint64_t)data << -mantShift);
+            mantissa &= 0x7fffff;//mask off the leading 1
+
+            uint32_t result = exponent | mantissa;
+            if (sign)
+                result |= 0x80000000;
+            return as_float(result);
+            break;
+        }
+        case qcomRTE: return (float)(data); break;
+        case qcomRTP: {
+            int         sign    = 0;
+            int         inExact = 0;
+            uint32_t    f       = 0xdf000000;
+            if (!data)
+                return 0.0f;
+            if (data == 0x8000000000000000)
+                return as_float(f);
+            if (data < 0){
+                data = - data;
+                sign = 1;
+            }
+            uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
+            int         mantShift   = 40 - clz(data);
+            uint32_t mantissa;
+            if (mantShift >= 0){
+                uint64_t temp = (uint64_t)data >> mantShift;
+                uint64_t mask = (1 << mantShift) - 1;
+                if ((temp << mantShift) != data)
+                    inExact = 1;
+                mantissa = (uint32_t)temp;
+            }
+            else
+            {
+                mantissa = (uint32_t)((uint64_t)data << -mantShift);
+            }
+            mantissa &= 0x7fffff;//mask off the leading 1
+
+            uint32_t result = exponent | mantissa;
+            if (sign)
+                result |= 0x80000000;
+            if (sign)
+                return as_float(result); // for negative inputs return rtz results
+            else
+            {
+                if(inExact)
+                { // for positive inputs return higher next fp
+                    uint32_t high_float = 0x7f7fffff;
+                    return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
+                }
+                else
+                    return as_float(result);
+            }
+        }
+        break;
+        case qcomRTN: {
+            int sign = 0;
+            int inExact = 0;
+            uint32_t f = 0xdf000000;
+            if (!data)
+                return 0.0f;
+            if (data == 0x8000000000000000)
+                return as_float(f);
+            if (data < 0){
+                data = - data;
+                sign = 1;
+            }
+            uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
+            int         mantShift   = 40 - clz(data);
+            uint32_t    mantissa;
+            if (mantShift >= 0){
+                uint64_t temp = (uint64_t)data >> mantShift;
+                uint64_t mask = (1 << mantShift) - 1;
+                if (temp << mantShift != data)
+                    inExact = 1;
+                mantissa = (uint32_t)temp;
+            }
+            else
+                mantissa = (uint32_t)((uint64_t)data << -mantShift);
+            mantissa &= 0x7fffff;//mask off the leading 1
+
+            uint32_t result = exponent | mantissa;
+            if (sign)
+                result |= 0x80000000;
+            if (!sign)
+                return as_float(result); // for positive inputs return RTZ result
+            else{
+                if(inExact){ // for negative inputs find the lower next fp number
+                    uint32_t low_float = 0xff7fffff;
+                    return nextafterf(as_float(result), as_float(low_float)); // could be simplified with some inc and carry operation
+                }
+                else
+                    return as_float(result);
+            }
+        }
+    }
+    return 0.0f;
+}
+
+float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
+{
+    switch (rnd) {
+        case qcomRTZ: {
+            if (!data)
+                return 0.0f;
+            uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
+            int         mantShift   = 40 - clz(data);
+            uint32_t    mantissa;
+            if (mantShift >= 0)
+                mantissa = (uint32_t)(data >> mantShift);
+            else
+                mantissa = (uint32_t)(data << -mantShift);
+            mantissa &= 0x7fffff;//mask off the leading 1
+
+            uint32_t result = exponent | mantissa;
+            return as_float(result);
+            break;
+        }
+        case qcomRTE: return (float)(data); break;
+        case qcomRTP: {
+            int inExact = 0;
+            if (!data)
+                return 0.0f;
+            uint32_t    exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
+            int         mantShift   = 40 - clz(data);
+            uint32_t    mantissa;
+            if (mantShift >= 0){
+                uint64_t temp = data >> mantShift;
+                uint64_t mask = (1 << mantShift) - 1;
+                if (temp << mantShift != data)
+                    inExact = 1;
+                mantissa = (uint32_t)temp;
+            }
+            else
+                mantissa = (uint32_t)(data << -mantShift);
+            mantissa &= 0x7fffff;//mask off the leading 1
+
+            uint32_t result = exponent | mantissa;
+            if(inExact){ // for positive inputs return higher next fp
+                uint32_t high_float = 0x7f7fffff;
+                return nextafterf(as_float(result), as_float(high_float)); // could be simplified with some inc and carry operation
+            }
+            else
+                return as_float(result);
+        }
+        case qcomRTN: {
+            int inExact = 0;
+            if (!data)
+                return 0.0f;
+            uint32_t  exponent    = (127 + 64 - clz(data) - 1) << (FLT_MANT_DIG - 1); //add 1 for the implied 1.0 in normalized fp32 numbers
+            int       mantShift   = 40 - clz(data);
+            uint32_t  mantissa;
+            if (mantShift >= 0){
+                uint64_t temp = (uint64_t)data >> mantShift;
+                uint64_t mask = (1 << mantShift) - 1;
+                if (temp << mantShift != data)
+                    inExact = 1;
+                mantissa = (uint32_t)temp;
+            }
+            else
+                mantissa = (uint32_t)((uint64_t)data << -mantShift);
+            mantissa &= 0x7fffff;//mask off the leading 1
+
+            uint32_t result = exponent | mantissa;
+            return as_float(result); // for positive inputs return RTZ result
+        }
+    }
+    return 0.0f;
+}
diff --git a/test_conformance/conversions/fplib.h b/test_conformance/conversions/fplib.h
new file mode 100644
index 00000000..576e4afe
--- /dev/null
+++ b/test_conformance/conversions/fplib.h
@@ -0,0 +1,29 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdbool.h>
+
+typedef enum
+{
+    qcomRTZ = 0,
+    qcomRTE,
+    qcomRTP,
+    qcomRTN,
+
+    qcomRoundingModeCount
+}roundingMode;
+
+float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd);
+float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd);
diff --git a/test_conformance/conversions/run_batch b/test_conformance/conversions/run_batch
new file mode 100644
index 00000000..a99abeee
--- /dev/null
+++ b/test_conformance/conversions/run_batch
@@ -0,0 +1,16 @@
+#!/usr/bin/csh
+#
+# This runs the conversions in 32- and 64-bit modes, split into 9 processes for better throughput.
+# It is intended to allow for quicker debugging turnaround for code development purposes
+#
+
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU   0 100 > 0_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU   0 100 > 0_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 100 100 > 1_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 100 100 > 1_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 200 100 > 2_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 200 100 > 2_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 300 100 > 3_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 300 100 > 3_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 400 100 > 4_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 400 100 > 4_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 500 100 > 5_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 500 100 > 5_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 600 100 > 6_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 600 100 > 6_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 700 100 > 7_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 700 100 > 7_64.txt &
+/usr/bin/arch -i386 ./test_conversions CL_DEVICE_TYPE_CPU 800 100 > 8_32.txt; /usr/bin/arch -x86_64 ./test_conversions CL_DEVICE_TYPE_CPU 800 100 > 8_64.txt &
+
diff --git a/test_conformance/conversions/test_conversions.c b/test_conformance/conversions/test_conversions.c
new file mode 100644
index 00000000..fc6f6315
--- /dev/null
+++ b/test_conformance/conversions/test_conversions.c
@@ -0,0 +1,1865 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include "../../test_common/harness/rounding_mode.h"
+#include "../../test_common/harness/ThreadPool.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/parseParameters.h"
+#if !defined(_WIN32) && !defined(__ANDROID__)
+#include <sys/sysctl.h>
+#endif
+
+#if defined( __linux__ )
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/sysctl.h>
+#endif
+#if defined(__linux__)
+#include <sys/param.h>
+#include <libgen.h>
+#endif
+
+#include "mingw_compat.h"
+#if defined(__MINGW32__)
+#include <sys/param.h>
+#endif
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#if !defined(_WIN32)
+#include <libgen.h>
+#include <sys/mman.h>
+#endif
+#include <time.h>
+
+#include "Sleep.h"
+#include "basic_test_conversions.h"
+
+#pragma STDC FENV_ACCESS ON
+
+#if (defined(_WIN32) && defined (_MSC_VER))
+// need for _controlfp_s and rouinding modes in RoundingMode
+#include "../../test_common/harness/testHarness.h"
+#endif
+
+#pragma mark -
+#pragma mark globals
+
+#define BUFFER_SIZE     (1024*1024)
+#define kPageSize       4096
+#define EMBEDDED_REDUCTION_FACTOR 16
+#define PERF_LOOP_COUNT 100
+
+#define      kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
+
+#if defined( __arm__ ) && defined( __GNUC__ )
+#include "fplib.h"
+    extern bool            qcom_sat;
+    extern roundingMode    qcom_rm;
+#endif
+
+const char **   argList = NULL;
+int             argCount = 0;
+cl_device_type  gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_device_id    gDevice = NULL;
+cl_context      gContext = NULL;
+cl_command_queue      gQueue = NULL;
+char            appName[64] = "ctest";
+int             gTestCount = 0;
+int             gFailCount = 0;
+int             gStartTestNumber = -1;
+int             gEndTestNumber = 0;
+#if defined( __APPLE__ )
+int             gTimeResults = 1;
+#else
+int             gTimeResults = 0;
+#endif
+int             gReportAverageTimes = 0;
+void            *gIn = NULL;
+void            *gRef = NULL;
+void        *gAllowZ = NULL;
+void            *gOut[ kCallStyleCount ] = { NULL };
+cl_mem          gInBuffer;
+cl_mem          gOutBuffers[ kCallStyleCount ];
+size_t          gComputeDevices = 0;
+uint32_t        gDeviceFrequency = 0;
+int             gWimpyMode = 0;
+int             gSkipTesting = 0;
+int             gForceFTZ = 0;
+int             gMultithread = 1;
+int             gIsRTZ = 0;
+uint32_t        gSimdSize = 1;
+int             gHasDouble = 0;
+int                   gIsEmbedded = 0;
+int             gHasLong = 1;
+int             gTestDouble = 1;
+cl_uint         choosen_device_index = 0;
+const char *    sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
+const int       vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
+int             gMinVectorSize = 0;
+int             gMaxVectorSize = sizeof(vectorSizes) / sizeof( vectorSizes[0] );
+
+#pragma mark -
+#pragma mark Declarations
+
+static int ParseArgs( int argc, const char **argv );
+static void PrintUsage( void );
+static void PrintArch(void);
+static int InitCL( void );
+static int GetTestCase( const char *name, Type *outType, Type *inType, SaturationMode *sat, RoundingMode *round );
+static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode round, MTdata d );
+static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, cl_kernel *outKernel );
+static int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount );
+
+void *FlushToZero( void );
+void UnFlushToZero( void *);
+
+static cl_program CreateImplicitConvertProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, char testName[256], cl_int *error );
+static cl_program CreateStandardProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, char testName[256], cl_int *error );
+
+
+// Windows (since long double got deprecated) sets the x87 to 53-bit precision
+// (that's x87 default state).  This causes problems with the tests that
+// convert long and ulong to float and double or otherwise deal with values
+// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
+static inline void Force64BitFPUPrecision(void)
+{
+#if __MINGW32__
+    // The usual method is to use _controlfp as follows:
+    //     #include <float.h>
+    //     _controlfp(_PC_64, _MCW_PC);
+    //
+    // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
+    // divergent code just use inline assembly which works for both.
+    unsigned short int orig_cw = 0;
+    unsigned short int new_cw = 0;
+    __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
+    new_cw = orig_cw | 0x0300;   // set precision to 64-bit
+    __asm__ __volatile__ ("fldcw  %0"::"m" (new_cw));
+#else
+    /* Implement for other platforms if needed */
+#endif
+}
+
+
+#pragma mark -
+
+int main (int argc, const char **argv )
+{
+    int error, i, testNumber = -1;
+    Type inType, outType;
+    RoundingMode round;
+    SaturationMode sat;
+    MTdata  d = NULL;
+    cl_uint seed = (cl_uint) time( NULL );
+
+    test_start();
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    if( (error = ParseArgs( argc, argv )) )
+        return error;
+
+    //Turn off sleep so our tests run to completion
+    PreventSleep();
+    atexit( ResumeSleep );
+
+    // Init CL data structures
+    if( (error = InitCL()) )
+        return error;
+
+    if(!gMultithread)
+        SetThreadCount(1);
+
+#if defined(_MSC_VER) && defined(_M_IX86)
+    // VS2005 (and probably others, since long double got deprecated) sets
+    // the x87 to 53-bit precision. This causes problems with the tests
+    // that convert long and ulong to float and double, since they deal
+    // with values that need more precision than that. So, set the x87
+    // to 64-bit precision.
+    unsigned int ignored;
+    _controlfp_s(&ignored, _PC_64, _MCW_PC);
+#endif
+
+    vlog( "===========================================================\n" );
+    vlog( "Random seed: %u\n", seed );
+    d = init_genrand( seed );
+    int startMinVectorSize = gMinVectorSize;
+    if( argCount )
+    {
+        for( i = 0; i < argCount; i++ )
+        {
+            if( GetTestCase( argList[i], &outType, &inType, &sat, &round ) )
+            {
+                vlog_error( "\n\t\t**** ERROR:  Unable to parse function name %s.  Skipping....  *****\n\n", argList[i] );
+                continue;
+            }
+
+            // skip double if we don't have it
+            if( !gTestDouble && (inType == kdouble || outType == kdouble ) )
+            {
+                if( gHasDouble )
+                {
+                    vlog_error( "\t *** convert_%sn%s%s( %sn ) FAILED ** \n", gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+                    vlog( "\t\tcl_khr_fp64 enabled, but double testing turned off.\n" );
+                }
+
+                continue;
+            }
+
+            // skip longs on embedded
+            if( ! gHasLong &&
+               (inType == klong || outType == klong || inType == kulong || outType == kulong))
+                continue;
+
+            // Skip the implicit converts if the rounding mode is not default or test is saturated
+            if( 0 == startMinVectorSize )
+            {
+                if( sat || round != kDefaultRoundingMode )
+                    gMinVectorSize = 1;
+                else
+                    gMinVectorSize = 0;
+            }
+
+            if( (error = DoTest( outType, inType, sat, round, d )) )
+                vlog_error( "\t *** convert_%sn%s%s( %sn ) FAILED ** \n", gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+        }
+    }
+    else
+    {
+
+        for( outType = (Type)0; outType < kTypeCount; outType = (Type)(outType+1) )
+        {
+            for( inType = (Type)0; inType < kTypeCount; inType = (Type)(inType+1) )
+            {
+                // skip longs on embedded
+                if( ! gHasLong &&
+                   (inType == klong || outType == klong || inType == kulong || outType == kulong))
+                    continue;
+
+                for( sat = (SaturationMode)0; sat < kSaturationModeCount; sat = (SaturationMode)(sat+1) )
+                {
+                    //skip illegal saturated conversions to float type
+                    if( kSaturated == sat && ( outType == kfloat || outType == kdouble ) )
+                        continue;
+
+                    for( round = (RoundingMode)0; round < kRoundingModeCount; round = (RoundingMode)(round+1) )
+                    {
+                        if( ++testNumber < gStartTestNumber )
+                        {
+                            //     vlog( "%d) skipping convert_%sn%s%s( %sn )\n", testNumber, gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+                            continue;
+                        }
+                        else
+                            if( gEndTestNumber > 0 && testNumber >= gEndTestNumber  )
+                                goto exit;
+
+                        vlog( "%d) Testing convert_%sn%s%s( %sn ):\n", testNumber, gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+
+                        // skip double if we don't have it
+                        if( ! gTestDouble && (inType == kdouble || outType == kdouble ) )
+                        {
+                            if( gHasDouble )
+                            {
+                                vlog_error( "\t *** %d) convert_%sn%s%s( %sn ) FAILED ** \n", testNumber, gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+                                vlog( "\t\tcl_khr_fp64 enabled, but double testing turned off.\n" );
+                            }
+                            continue;
+                        }
+
+                        // Skip the implicit converts if the rounding mode is not default or test is saturated
+                        if( 0 == startMinVectorSize )
+                        {
+                            if( sat || round != kDefaultRoundingMode )
+                                gMinVectorSize = 1;
+                            else
+                                gMinVectorSize = 0;
+                        }
+
+                        if( (error = DoTest( outType, inType, sat, round, d) ) )
+                            vlog_error( "\t *** %d) convert_%sn%s%s( %sn ) FAILED ** \n", testNumber, gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+                    }
+                }
+            }
+        }
+    }
+
+exit:
+    free_mtdata(d);
+    vlog( "\n\n" );
+    vlog( "Tests completed: %d\n", gTestCount );
+
+    error = clFinish(gQueue);
+    if (error)
+        vlog_error("clFinish failed: %d\n", error);
+
+    if (gFailCount == 0 && gTestCount >= 0) {
+        vlog("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+    } else if (gFailCount > 0) {
+        vlog_error("FAILED %d of %d tests.\n", gFailCount, gTestCount);
+    }
+
+    clReleaseMemObject(gInBuffer);
+
+    for( i = 0; i < kCallStyleCount; i++ ) {
+        clReleaseMemObject(gOutBuffers[i]);
+    }
+    clReleaseCommandQueue(gQueue);
+    clReleaseContext(gContext);
+
+    test_finish();
+    if (gFailCount > 0)
+        return -1;
+
+    return 0;
+}
+
+#pragma mark -
+#pragma mark setup
+
+static int ParseArgs( int argc, const char **argv )
+{
+    int i;
+    argList = (const char **)calloc( argc - 1, sizeof( char*) );
+    argCount = 0;
+
+    if( NULL == argList && argc > 1 )
+        return -1;
+
+#if (defined( __APPLE__ ) || defined(__linux__) || defined (__MINGW32__))
+    { // Extract the app name
+        char baseName[ MAXPATHLEN ];
+        strncpy( baseName, argv[0], MAXPATHLEN );
+        char *base = basename( baseName );
+        if( NULL != base )
+        {
+            strncpy( appName, base, sizeof( appName )  );
+            appName[ sizeof( appName ) -1 ] = '\0';
+        }
+    }
+#elif defined (_WIN32)
+    {
+        char fname[_MAX_FNAME + _MAX_EXT + 1];
+        char ext[_MAX_EXT];
+
+        errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
+                                   fname, _MAX_FNAME, ext, _MAX_EXT );
+        if (err == 0) { // no error
+            strcat (fname, ext); //just cat them, size of frame can keep both
+            strncpy (appName, fname, sizeof(appName));
+            appName[ sizeof( appName ) -1 ] = '\0';
+        }
+    }
+#endif
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        vlog( "CL_DEVICE_TYPE: %s\n", env_mode );
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            vlog_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+            abort();
+        }
+    }
+
+
+    vlog( "\n%s", appName );
+    for( i = 1; i < argc; i++ )
+    {
+        const char *arg = argv[i];
+        if( NULL == arg )
+            break;
+
+        vlog( "\t%s", arg );
+        if( arg[0] == '-' )
+        {
+            arg++;
+            while( *arg != '\0' )
+            {
+                switch( *arg )
+                {
+                    case 'd':
+                        gTestDouble ^= 1;
+                        break;
+                    case 'l':
+                        gSkipTesting ^= 1;
+                        break;
+                    case 'm':
+                        gMultithread ^= 1;
+                        break;
+                    case 'w':
+                        gWimpyMode ^= 1;
+                        break;
+                    case 'z':
+                        gForceFTZ ^= 1;
+                        break;
+                    case 't':
+                        gTimeResults ^= 1;
+                        break;
+                    case 'a':
+                        gReportAverageTimes ^= 1;
+                        break;
+                    case '1':
+                        if( arg[1] == '6' )
+                        {
+                            gMinVectorSize = 6;
+                            gMaxVectorSize = 7;
+                            arg++;
+                        }
+                        else
+                        {
+                            gMinVectorSize = 0;
+                            gMaxVectorSize = 2;
+                        }
+                        break;
+
+                    case '2':
+                        gMinVectorSize = 2;
+                        gMaxVectorSize = 3;
+                        break;
+
+                    case '3':
+                        gMinVectorSize = 3;
+                        gMaxVectorSize = 4;
+                        break;
+
+                    case '4':
+                        gMinVectorSize = 4;
+                        gMaxVectorSize = 5;
+                        break;
+
+                    case '8':
+                        gMinVectorSize = 5;
+                        gMaxVectorSize = 6;
+                        break;
+
+                    default:
+                        vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
+                        PrintUsage();
+                        return -1;
+                }
+                arg++;
+            }
+        }
+        // Check if a particular device id was requested
+        else if (strlen(argv[i]) >= 3 && argv[i][0] == 'i' && argv[i][1] =='d')
+        {
+            choosen_device_index = atoi(&(argv[i][2]));
+        }
+        else
+        {
+            char *t = NULL;
+            long number = strtol( arg, &t, 0 );
+            if( t != arg )
+            {
+                if( gStartTestNumber != -1 )
+                    gEndTestNumber = gStartTestNumber + (int) number;
+                else
+                    gStartTestNumber = (int) number;
+            }
+            else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_CPU"))
+                gDeviceType = CL_DEVICE_TYPE_CPU;
+            else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_GPU"))
+                gDeviceType = CL_DEVICE_TYPE_GPU;
+            else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_ACCELERATOR"))
+                gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+            else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_DEFAULT"))
+                gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+            else
+            {
+                argList[ argCount ] = arg;
+                argCount++;
+            }
+        }
+    }
+
+    // Check for the wimpy mode environment variable
+    if (getenv("CL_WIMPY_MODE")) {
+      vlog( "\n" );
+      vlog( "*** Detected CL_WIMPY_MODE env                          ***\n" );
+      gWimpyMode = 1;
+    }
+
+    vlog( "\n" );
+
+    vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
+
+    PrintArch();
+
+    if( gWimpyMode )
+    {
+        vlog( "\n" );
+        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
+        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
+        vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
+    }
+
+    return 0;
+}
+
+static void PrintUsage( void )
+{
+    int i;
+    vlog( "%s [-wz#]: <optional: test names>\n", appName );
+    vlog( "\ttest names:\n" );
+    vlog( "\t\tdestFormat<_sat><_round>_sourceFormat\n" );
+    vlog( "\t\t\tPossible format types are:\n\t\t\t\t" );
+    for( i = 0; i < kTypeCount; i++ )
+        vlog( "%s, ", gTypeNames[i] );
+    vlog( "\n\n\t\t\tPossible saturation values are: (empty) and _sat\n" );
+    vlog( "\t\t\tPossible rounding values are:\n\t\t\t\t(empty), " );
+    for( i = 1; i < kRoundingModeCount; i++ )
+        vlog( "%s, ", gRoundingModeNames[i] );
+    vlog( "\n\t\t\tExamples:\n" );
+    vlog( "\t\t\t\tulong_short   converts short to ulong\n" );
+    vlog( "\t\t\t\tchar_sat_rte_float   converts float to char with saturated clipping in round to nearest rounding mode\n\n" );
+    vlog( "\toptions:\n" );
+    vlog( "\t\t-d\tToggle testing of double precision.  On by default if cl_khr_fp64 is enabled, ignored otherwise.\n" );
+    vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" );
+    vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" );
+    vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" );
+    vlog( "\t\t-z\tToggle flush to zero mode  (Default: per device)\n" );
+    vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" );
+    vlog( "\n" );
+    vlog( "You may also pass the number of the test on which to start.\nA second number can be then passed to indicate how many tests to run\n\n" );
+}
+
+static void PrintArch( void )
+{
+    vlog( "sizeof( void*) = %ld\n", sizeof( void *) );
+#if defined( __ppc__ )
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __ppc64__ )
+    vlog( "ARCH:\tppc64\n" );
+#elif defined( __PPC__ )
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __i386__ )
+    vlog( "ARCH:\ti386\n" );
+#elif defined( __x86_64__ )
+    vlog( "ARCH:\tx86_64\n" );
+#elif defined( __arm__ )
+    vlog( "ARCH:\tarm\n" );
+// Add 64 bit support
+#elif defined(__aarch64__)
+    vlog( "ARCH:\taarch64\n" );
+#elif defined (_WIN32)
+    vlog( "ARCH:\tWindows\n" );
+#else
+#error unknown arch
+#endif
+
+#if defined( __APPLE__ )
+
+    int type = 0;
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu subtype:\t%d\n", type );
+
+#elif defined( __linux__ ) && !defined(__aarch64__)
+#define OSNAMESZ 100
+    int _sysctl(struct __sysctl_args *args );
+
+    struct __sysctl_args args;
+    char osname[OSNAMESZ];
+    size_t osnamelth;
+    int name[] = { CTL_KERN, KERN_OSTYPE };
+    memset(&args, 0, sizeof(struct __sysctl_args));
+    args.name = name;
+    args.nlen = sizeof(name)/sizeof(name[0]);
+    args.oldval = osname;
+    args.oldlenp = &osnamelth;
+
+    osnamelth = sizeof(osname);
+
+    if (syscall(SYS__sysctl, &args) == -1) {
+        vlog( "_sysctl error\n" );
+    }
+    else {
+        vlog("this machine is running %*s\n", osnamelth, osname);
+    }
+
+#endif
+}
+
+
+
+
+
+static int GetTestCase( const char *name, Type *outType, Type *inType, SaturationMode *sat, RoundingMode *round )
+{
+    int i;
+
+    //Find the return type
+    for( i = 0; i < kTypeCount; i++ )
+        if( name == strstr( name, gTypeNames[i] ) )
+        {
+            *outType = (Type)i;
+            name += strlen( gTypeNames[i] );
+
+            break;
+        }
+
+    if( i == kTypeCount )
+        return -1;
+
+    // Check to see if _sat appears next
+    *sat = (SaturationMode)0;
+    for( i = 1; i < kSaturationModeCount; i++ )
+        if( name == strstr( name, gSaturationNames[i] ) )
+        {
+            *sat = (SaturationMode)i;
+            name += strlen( gSaturationNames[i] );
+            break;
+        }
+
+    *round = (RoundingMode)0;
+    for( i = 1; i < kRoundingModeCount; i++ )
+        if( name == strstr( name, gRoundingModeNames[i] ) )
+        {
+            *round = (RoundingMode)i;
+            name += strlen( gRoundingModeNames[i] );
+            break;
+        }
+
+    if( *name != '_' )
+        return -2;
+    name++;
+
+    for( i = 0; i < kTypeCount; i++ )
+        if( name == strstr( name, gTypeNames[i] ) )
+        {
+            *inType = (Type)i;
+            name += strlen( gTypeNames[i] );
+
+            break;
+        }
+
+    if( i == kTypeCount )
+        return -3;
+
+    if( *name != '\0' )
+        return -4;
+
+    return 0;
+}
+
+#pragma mark -
+#pragma mark OpenCL
+
+static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    vlog( "%s\n", errinfo );
+}
+
+static int InitCL( void )
+{
+    int error, i;
+    size_t configSize = sizeof( gComputeDevices );
+
+    cl_platform_id     platform = NULL;
+    cl_uint            num_devices = 0;
+    cl_device_id       *devices = NULL;
+
+    /* Get the platform */
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if (error) {
+        vlog_error( "clGetPlatformIDs failed: %d\n", error );
+        return error;
+    }
+
+    /* Get the number of requested devices */
+    error = clGetDeviceIDs(platform,  gDeviceType, 0, NULL, &num_devices );
+    if (error) {
+        vlog_error( "clGetDeviceIDs failed: %d\n", error );
+        return error;
+    }
+
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || choosen_device_index >= num_devices) {
+        vlog_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
+        return -1;
+    }
+
+    /* Get the requested device */
+    error = clGetDeviceIDs(platform,  gDeviceType, num_devices, devices, NULL );
+    if (error) {
+        vlog_error( "clGetDeviceIDs failed: %d\n", error );
+        return error;
+    }
+
+    gDevice = devices[choosen_device_index];
+    free(devices);
+    devices = NULL;
+
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
+        gComputeDevices = 1;
+
+    configSize = sizeof( gDeviceFrequency );
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
+        gDeviceFrequency = 0;
+
+    cl_device_fp_config floatCapabilities = 0;
+    if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities,  NULL)))
+        floatCapabilities = 0;
+    if(0 == (CL_FP_DENORM & floatCapabilities) )
+        gForceFTZ ^= 1;
+
+    if( 0 == (floatCapabilities & CL_FP_ROUND_TO_NEAREST ) )
+    {
+        char profileStr[128] = "";
+        // Verify that we are an embedded profile device
+        if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_PROFILE, sizeof( profileStr ), profileStr, NULL ) ) )
+        {
+            vlog_error( "FAILURE: Could not get device profile: error %d\n", error );
+            return -1;
+        }
+
+        if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
+        {
+            vlog_error( "FAILURE: non-embedded profile device does not support CL_FP_ROUND_TO_NEAREST\n" );
+            return -1;
+        }
+
+        if( 0 == (floatCapabilities & CL_FP_ROUND_TO_ZERO ) )
+        {
+            vlog_error( "FAILURE: embedded profile device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" );
+            return -1;
+        }
+
+        gIsRTZ = 1;
+    }
+
+    char extensions[2048] = "";
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, sizeof( extensions ), extensions,  NULL ) ) )
+    {
+        vlog_error( "FAILURE: unable to get device info for CL_DEVICE_EXTENSIONS!" );
+        return -1;
+    }
+    else if( strstr( extensions, "cl_khr_fp64" ) )
+    {
+        gHasDouble = 1;
+    }
+    gTestDouble &= gHasDouble;
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) ){}
+    else if( strstr(profile, "EMBEDDED_PROFILE" ) )
+    {
+        gIsEmbedded = 1;
+        if( !strstr( extensions, "cles_khr_int64" ) )
+            gHasLong = 0;
+    }
+
+
+    gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error );
+    if( NULL == gDevice || error )
+    {
+        vlog_error( "clCreateContext failed. (%d)\n", error );
+        return error;
+    }
+
+    gQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+    if( NULL == gQueue || error )
+    {
+        vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+        return error;
+    }
+
+    //Allocate buffers
+    //FIXME: use clProtectedArray for guarded allocations?
+    gIn   = malloc( BUFFER_SIZE + 2 * kPageSize );
+    gAllowZ = malloc( BUFFER_SIZE + 2 * kPageSize );
+    gRef  = malloc( BUFFER_SIZE + 2 * kPageSize );
+    for( i = 0; i < kCallStyleCount; i++ )
+    {
+        gOut[i] = malloc( BUFFER_SIZE + 2 * kPageSize );
+        if( NULL == gOut[i] )
+            return -3;
+    }
+
+    // setup input buffers
+    gInBuffer = clCreateBuffer(gContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, BUFFER_SIZE, NULL, &error);
+    if( gInBuffer == NULL || error)
+    {
+        vlog_error( "clCreateBuffer failed for input (%d)\n", error );
+        return error;
+    }
+
+    // setup output buffers
+    for( i = 0; i < kCallStyleCount; i++ )
+    {
+        gOutBuffers[i] = clCreateBuffer(  gContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, BUFFER_SIZE, NULL, &error );
+        if( gOutBuffers[i] == NULL || error )
+        {
+            vlog_error( "clCreateArray failed for output (%d)\n", error );
+            return error;
+        }
+    }
+
+#if defined( __APPLE__ )
+
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
+    /* check our environment for a hint to disable SSE variants */
+    {
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
+        {
+            extern int _cpu_capabilities;
+            int mask = 0;
+            if( 0 == strcasecmp( env, "SSE4.1" ) )
+                mask = kHasSSE4_2;
+            else if( 0 == strcasecmp( env, "SSSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1;
+            else if( 0 == strcasecmp( env, "SSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+            else if( 0 == strcasecmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+            else
+            {
+                vlog_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
+                return -2;
+            }
+
+            vlog( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+            _cpu_capabilities &= ~mask;
+        }
+    }
+#endif
+#endif
+
+
+    char c[1024];
+    static const char *no_yes[] = { "NO", "YES" };
+    vlog( "\nCompute Device info:\n" );
+    clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), c, NULL);
+    vlog( "\tDevice Name: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), c, NULL);
+    vlog( "\tVendor: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), c, NULL);
+    vlog( "\tDevice Version: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
+    vlog( "\tCL C Version: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), c, NULL);
+    vlog( "\tDriver Version: %s\n", c );
+    vlog( "\tProcessing with %ld devices\n", gComputeDevices );
+    vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
+    vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & floatCapabilities)] );
+    vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ] );
+    vlog( "\tTesting with default RTZ mode for floats? %s\n", no_yes[0 != gIsRTZ] );
+    vlog( "\tHas Double? %s\n", no_yes[0 != gHasDouble] );
+    if( gHasDouble )
+        vlog( "\tTest Double? %s\n", no_yes[0 != gTestDouble] );
+    vlog( "\tHas Long? %s\n", no_yes[0 != gHasLong] );
+    vlog( "\tTesting vector sizes: " );
+    for( i = gMinVectorSize; i < gMaxVectorSize; i++ )
+        vlog("\t%d", vectorSizes[i]);
+    vlog( "\n" );
+    return 0;
+}
+
+static int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount )
+{
+    // The global dimensions are just the blockCount to execute since we haven't set up multiple queues for multiple devices.
+    int error;
+
+    error = clSetKernelArg(kernel, 0, sizeof( inBuf ), &inBuf);
+    error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf);
+
+    if( error )
+    {
+        vlog_error( "FAILED -- could not set kernel args (%d)\n", error );
+        return error;
+    }
+
+    if( (error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, NULL, 0, NULL, NULL)))
+    {
+        vlog_error( "FAILED -- could not execute kernel (%d)\n", error );
+        return error;
+    }
+
+    return 0;
+}
+
+#if ! defined( __APPLE__ )
+static void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+static void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
+{
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
+    size_t count = bytes / 4;
+    size_t i;
+    uint32_t *d = (uint32_t *)dest;
+
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
+
+    d += i;
+
+    bytes &= 3;
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
+}
+
+#endif
+
+#if defined( __APPLE__ )
+#include <mach/mach_time.h>
+#endif
+
+uint64_t GetTime( void );
+uint64_t GetTime( void )
+{
+#if defined( __APPLE__ )
+    return mach_absolute_time();
+#elif defined(_MSC_VER)
+    return  ReadTime();
+#else
+    //mach_absolute_time is a high precision timer with precision < 1 microsecond.
+#warning need accurate clock here.  Times are invalid.
+    return 0;
+#endif
+}
+
+
+#if defined (_MSC_VER)
+/* function is defined in "compat.h" */
+#else
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    uint64_t diff = endTime - startTime;
+    static double conversion = 0.0;
+
+    if( 0.0 == conversion )
+    {
+#if defined( __APPLE__ )
+        mach_timebase_info_data_t info = {0,0};
+        kern_return_t   err = mach_timebase_info( &info );
+        if( 0 == err )
+            conversion = 1e-9 * (double) info.numer / (double) info.denom;
+#else
+        // This function consumes output from GetTime() above, and converts the time to secionds.
+#warning need accurate ticks to seconds conversion factor here. Times are invalid.
+#endif
+    }
+
+    // strictly speaking we should also be subtracting out timer latency here
+    return conversion * (double) diff;
+}
+#endif
+
+typedef struct CalcReferenceValuesInfo
+{
+    struct WriteInputBufferInfo *parent;        // pointer back to the parent WriteInputBufferInfo struct
+    cl_kernel                   kernel;         // the kernel for this vector size
+    cl_program                  program;        // the program for this vector size
+    cl_uint                     vectorSize;     // the vector size for this callback chain
+    void                        *p;             // the pointer to mapped result data for this vector size
+    cl_int                      result;
+}CalcReferenceValuesInfo;
+
+typedef struct WriteInputBufferInfo
+{
+    volatile cl_event           calcReferenceValues;   // user event which signals when main thread is done calculating reference values
+    volatile cl_event           doneBarrier;     // user event which signals when worker threads are done
+    cl_uint                     count;           // the number of elements in the array
+    Type                        outType;         // the data type of the conversion result
+    Type                        inType;          // the data type of the conversion input
+    volatile int                barrierCount;
+    CalcReferenceValuesInfo     calcInfo[kCallStyleCount];
+}WriteInputBufferInfo;
+
+cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
+cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
+{
+    if( 0 == (x & (x-1)))
+        return x;
+
+    while( x & (x-1) )
+       x &= x-1;
+
+    return x + x;
+}
+
+void CL_CALLBACK WriteInputBufferComplete( cl_event, cl_int, void * );
+
+typedef struct DataInitInfo
+{
+    cl_ulong        start;
+    cl_uint         size;
+    Type            outType;
+    Type            inType;
+    SaturationMode  sat;
+    RoundingMode    round;
+    MTdata          *d;
+}DataInitInfo;
+
+cl_int InitData( cl_uint job_id, cl_uint thread_id, void *p );
+cl_int InitData( cl_uint job_id, cl_uint thread_id, void *p )
+{
+    DataInitInfo *info = (DataInitInfo*) p;
+
+    gInitFunctions[ info->inType ]( (char*)gIn + job_id * info->size * gTypeSizes[info->inType], info->sat, info->round,
+                                   info->outType, info->start + job_id * info->size, info->size, info->d[thread_id] );
+    return CL_SUCCESS;
+}
+
+static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
+{
+    cl_uint i;
+    for (i = 0; i < count; ++i)
+    allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
+}
+
+cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p );
+cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p )
+{
+    DataInitInfo *info = (DataInitInfo*) p;
+    cl_uint count = info->size;
+    Type inType = info->inType;
+    Type outType = info->outType;
+    RoundingMode round = info->round;
+    size_t j;
+
+    Force64BitFPUPrecision();
+
+    void *s = (cl_uchar*) gIn + job_id * count * gTypeSizes[info->inType];
+    void *a = (cl_uchar*) gAllowZ + job_id * count;
+    void *d = (cl_uchar*) gRef + job_id * count * gTypeSizes[info->outType];
+
+    if (outType != inType)
+    {
+        //create the reference while we wait
+        Convert f = gConversions[ outType ][ inType ];
+        if( info->sat )
+            f = gSaturatedConversions[ outType ][ inType ];
+
+#if defined( __arm__ ) && defined( __GNUC__ )
+       /* ARM VFP doesn't have hardware instruction for converting from 64-bit integer to float types, hence GCC ARM uses the floating-point emulation code
+        * despite which -mfloat-abi setting it is. But the emulation code in libgcc.a has only one rounding mode (round to nearest even in this case)
+        * and ignores the user rounding mode setting in hardware.
+        * As a result setting rounding modes in hardware won't give correct rounding results for type covert from 64-bit integer to float using GCC for ARM compiler
+        * so for testing different rounding modes, we need to use alternative reference function */
+        switch (round)
+        {
+            /* conversions to floating-point type use the current rounding mode.
+             * The only default floating-point rounding mode supported is round to nearest even
+             * i.e the current rounding mode will be _rte for floating-point types. */
+            case kDefaultRoundingMode:
+                    qcom_rm = qcomRTE;
+                    break;
+            case kRoundToNearestEven:
+                    qcom_rm = qcomRTE;
+                    break;
+            case kRoundUp:
+                    qcom_rm = qcomRTP;
+                    break;
+            case kRoundDown:
+                    qcom_rm = qcomRTN;
+                    break;
+            case kRoundTowardZero:
+                    qcom_rm = qcomRTZ;
+                    break;
+            default:
+                    vlog_error("ERROR: undefined rounding mode %d\n", round);
+                    break;
+        }
+        qcom_sat =  info->sat;
+#endif
+
+        RoundingMode oldRound = set_round( round, outType );
+        f( d, s, count );
+        set_round( oldRound, outType );
+
+    // Decide if we allow a zero result in addition to the correctly rounded one
+        memset(a, 0, count);
+    if (gForceFTZ) {
+        if (inType == kfloat)
+        setAllowZ((uint8_t*)a, (uint32_t*)s, count);
+        if (outType == kfloat)
+        setAllowZ((uint8_t*)a, (uint32_t*)d, count);
+    }
+    }
+    else
+    {
+        // Copy the input to the reference
+        memcpy(d, s, info->size * gTypeSizes[inType]);
+    }
+
+    //Patch up NaNs conversions to integer to zero -- these can be converted to any integer
+    if( info->outType != kfloat && info->outType != kdouble )
+    {
+        if( inType == kfloat )
+        {
+            float *inp = (float*) s;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) )
+                    memset( (char*) d + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+            }
+        }
+        if( inType == kdouble )
+        {
+            double *inp = (double*) s;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) )
+                    memset( (char*) d + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+            }
+        }
+    }
+    else if( inType == kfloat || inType == kdouble )
+    {  // outtype and intype is float or double.  NaN conversions for float <-> double can be any NaN
+        if( inType == kfloat && outType == kdouble )
+        {
+            float *inp = (float*) s;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) )
+                    ((double*) d)[j] = NAN;
+            }
+        }
+        if( inType == kdouble && outType == kfloat )
+        {
+            double *inp = (double*) s;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) )
+                    ((float*) d)[j] = NAN;
+            }
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode round, MTdata d )
+{
+#ifdef __APPLE__
+    cl_ulong wall_start = mach_absolute_time();
+#endif
+
+    DataInitInfo  init_info = { 0, 0, outType, inType, sat, round, NULL };
+    WriteInputBufferInfo writeInputBufferInfo;
+    int vectorSize;
+    int error = 0;
+    cl_uint threads = GetThreadCount();
+    uint64_t i;
+
+    gTestCount++;
+    size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] );
+    size_t step = blockCount;
+    uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]);
+    cl_event writeInputBuffer = NULL;
+
+    memset( &writeInputBufferInfo, 0, sizeof( writeInputBufferInfo ) );
+    init_info.d = (MTdata*)malloc( threads * sizeof( MTdata ) );
+    if( NULL == init_info.d )
+    {
+        vlog_error( "ERROR: Unable to allocate storage for random number generator!\n" );
+        return -1;
+    }
+    for( i = 0; i < threads; i++ )
+    {
+        init_info.d[i] = init_genrand( genrand_int32( d ) );
+        if( NULL == init_info.d[i] )
+        {
+            vlog_error( "ERROR: Unable to allocate storage for random number generator!\n" );
+            return -1;
+        }
+    }
+
+    writeInputBufferInfo.outType = outType;
+    writeInputBufferInfo.inType = inType;
+
+    for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+    {
+        writeInputBufferInfo.calcInfo[vectorSize].program = MakeProgram( outType, inType, sat, round, vectorSize,
+                                                                        &writeInputBufferInfo.calcInfo[vectorSize].kernel );
+        if( NULL == writeInputBufferInfo.calcInfo[vectorSize].program )
+        {
+            gFailCount++;
+            return -1;
+        }
+        if( NULL == writeInputBufferInfo.calcInfo[vectorSize].kernel )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create kernel.\n" );
+            return -2;
+        }
+
+        writeInputBufferInfo.calcInfo[vectorSize].parent = &writeInputBufferInfo;
+        writeInputBufferInfo.calcInfo[vectorSize].vectorSize = vectorSize;
+        writeInputBufferInfo.calcInfo[vectorSize].result = -1;
+    }
+
+    if( gSkipTesting )
+        goto exit;
+
+    // Patch up rounding mode if default is RTZ
+    // We leave the part above in default rounding mode so that the right kernel is compiled.
+    if( round == kDefaultRoundingMode && gIsRTZ && (outType == kfloat) )
+        init_info.round = round = kRoundTowardZero;
+
+    // Figure out how many elements are in a work block
+
+    // we handle 64-bit types a bit differently.
+    if( 8*gTypeSizes[ inType ] > 32 )
+        lastCase = 0x100000000ULL;
+
+    if ( !gWimpyMode && gIsEmbedded )
+      step = blockCount * EMBEDDED_REDUCTION_FACTOR;
+
+    vlog( "Testing... " );
+    fflush(stdout);
+    for( i = 0; i < (uint64_t)lastCase; i += step )
+    {
+        if (gWimpyMode) {
+            uint64_t blockIndex = (i / blockCount) & 0xFF;
+            if (blockIndex != 0 && blockIndex != 0xFF)
+                continue;
+        }
+
+        if( 0 == ( i & ((lastCase >> 3) -1))) {
+            vlog(".");
+            fflush(stdout);
+        }
+
+        cl_uint count = (uint32_t) MIN( blockCount, lastCase - i );
+        writeInputBufferInfo.count = count;
+
+        // Crate a user event to represent the status of the reference value computation completion
+        writeInputBufferInfo.calcReferenceValues = clCreateUserEvent( gContext, &error);
+        if( error || NULL == writeInputBufferInfo.calcReferenceValues )
+        {
+            vlog_error( "ERROR: Unable to create user event. (%d)\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        // retain for consumption by MapOutputBufferComplete
+        for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+        {
+            if( (error = clRetainEvent(writeInputBufferInfo.calcReferenceValues) ))
+            {
+                vlog_error( "ERROR: Unable to retain user event. (%d)\n", error );
+                gFailCount++;
+                goto exit;
+            }
+        }
+
+        // Crate a user event to represent when the callbacks are done verifying correctness
+        writeInputBufferInfo.doneBarrier = clCreateUserEvent( gContext, &error);
+        if( error || NULL == writeInputBufferInfo.calcReferenceValues )
+        {
+            vlog_error( "ERROR: Unable to create user event for barrier. (%d)\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        // retain for use by the callback that calls this
+        if( (error = clRetainEvent(writeInputBufferInfo.doneBarrier) ))
+        {
+            vlog_error( "ERROR: Unable to retain user event doneBarrier. (%d)\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        //      Call this in a multithreaded manner
+        //      gInitFunctions[ inType ]( gIn, sat, round, outType, i, count, d );
+        cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2;
+        init_info.start = i;
+        init_info.size = count / chunks;
+        if( init_info.size < 16384 )
+        {
+            chunks = RoundUpToNextPowerOfTwo(threads);
+            init_info.size = count / chunks;
+            if( init_info.size < 16384 )
+            {
+                init_info.size = count;
+                chunks = 1;
+            }
+        }
+        ThreadPool_Do(InitData, chunks, &init_info);
+
+        // Copy the results to the device
+        writeInputBuffer = NULL;
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, count * gTypeSizes[inType], gIn, 0, NULL, &writeInputBuffer )))
+        {
+            vlog_error( "ERROR: clEnqueueWriteBuffer failed. (%d)\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        // Setup completion callback for the write, which will enqueue the rest of the work
+        // This is somewhat gratuitous.  Because this is an in order queue, we didn't really need to
+        // do this work in a callback. We could have done it from the main thread.  Here we are
+        // verifying that the implementation can enqueue work from a callback, while at the same time
+        // also checking to make sure that the conversions work.
+        //
+        // Because the verification code is also moved to a callback, it is hoped that implementations will
+        // achieve a test performance improvement because they can verify the results in parallel.  If the
+        // implementation serializes callbacks however, that won't happen.   Consider it some motivation
+        // to do the right thing! :-)
+        if( (error = clSetEventCallback( writeInputBuffer, CL_COMPLETE, WriteInputBufferComplete, &writeInputBufferInfo)) )
+        {
+            vlog_error( "ERROR: clSetEventCallback failed. (%d)\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        // The event can't be destroyed until the callback is called, so we can release it now.
+        if( (error = clReleaseEvent(writeInputBuffer) ))
+        {
+            vlog_error( "ERROR: clReleaseEvent failed. (%d)\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        // Make sure the work is actually running, so we don't deadlock
+        if( (error = clFlush( gQueue ) ) )
+        {
+            vlog_error( "clFlush failed with error %d\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        ThreadPool_Do(PrepareReference, chunks, &init_info);
+
+        // signal we are done calculating the reference results
+        if( (error = clSetUserEventStatus( writeInputBufferInfo.calcReferenceValues, CL_COMPLETE ) ) )
+        {
+            vlog_error( "Error:  Failed to set user event status to CL_COMPLETE:  %d\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        // Wait for the event callbacks to finish verifying correctness.
+        if( (error = clWaitForEvents( 1, (cl_event*) &writeInputBufferInfo.doneBarrier ) ))
+        {
+            vlog_error( "Error:  Failed to wait for barrier:  %d\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        if( (error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues ) ))
+        {
+            vlog_error( "Error:  Failed to release calcReferenceValues:  %d\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+        if( (error = clReleaseEvent(writeInputBufferInfo.doneBarrier ) ))
+        {
+            vlog_error( "Error:  Failed to release done barrier:  %d\n", error );
+            gFailCount++;
+            goto exit;
+        }
+
+
+        for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+        {
+            if( ( error = writeInputBufferInfo.calcInfo[ vectorSize ].result ))
+            {
+                switch( inType )
+                {
+                    case kuchar:
+                    case kchar:
+                        vlog( "Input value: 0x%2.2x ", ((unsigned char*)gIn)[error - 1] );
+                        break;
+                    case kushort:
+                    case kshort:
+                        vlog( "Input value: 0x%4.4x ", ((unsigned short*)gIn)[error - 1] );
+                        break;
+                    case kuint:
+                    case kint:
+                        vlog( "Input value: 0x%8.8x ", ((unsigned int*)gIn)[error - 1] );
+                        break;
+                    case kfloat:
+                        vlog( "Input value: %a ", ((float*)gIn)[error - 1] );
+                        break;
+                        break;
+                    case kulong:
+                    case klong:
+                        vlog( "Input value: 0x%16.16llx ", ((unsigned long long*)gIn)[error - 1] );
+                        break;
+                    case kdouble:
+                        vlog( "Input value: %a ", ((double*)gIn)[error - 1]);
+                        break;
+                    default:
+                        vlog_error( "Internal error at %s: %d\n", __FILE__, __LINE__ );
+                        abort();
+                        break;
+                }
+
+                // tell the user which conversion it was.
+                if( 0 == vectorSize )
+                    vlog( " (implicit scalar conversion from %s to %s)\n", gTypeNames[ inType ], gTypeNames[ outType ] );
+                else
+                    vlog( " (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], sizeNames[vectorSize], gSaturationNames[ sat ],
+                                                            gRoundingModeNames[ round ], gTypeNames[inType], sizeNames[vectorSize] );
+
+                gFailCount++;
+                goto exit;
+            }
+        }
+    }
+
+    log_info( "done.\n" );
+
+    if( gTimeResults )
+    {
+        //Kick off tests for the various vector lengths
+        for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+        {
+            size_t workItemCount = blockCount / vectorSizes[vectorSize];
+            if( vectorSizes[vectorSize] * gTypeSizes[outType] < 4 )
+                workItemCount /= 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            cl_uint k;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = RunKernel( writeInputBufferInfo.calcInfo[vectorSize].kernel, gInBuffer, gOutBuffers[ vectorSize ], workItemCount )) )
+                {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (workItemCount * vectorSizes[vectorSize]);
+            if( 0 == vectorSize )
+                vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "implicit convert %s -> %s", gTypeNames[ inType ], gTypeNames[ outType ] );
+            else
+                vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "convert_%s%s%s%s( %s%s )", gTypeNames[ outType ], sizeNames[vectorSize], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType], sizeNames[vectorSize] );
+        }
+    }
+
+    if( gWimpyMode )
+        vlog( "\tWimp pass" );
+    else
+        vlog( "\tpassed" );
+
+#ifdef __APPLE__
+    // record the run time
+    vlog( "\t(%f s)", 1e-9 * ( mach_absolute_time() - wall_start ) );
+#endif
+    vlog( "\n\n" );
+    fflush( stdout );
+
+
+exit:
+    //clean up
+    for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+    {
+        clReleaseProgram( writeInputBufferInfo.calcInfo[vectorSize].program );
+        clReleaseKernel( writeInputBufferInfo.calcInfo[vectorSize].kernel );
+    }
+
+    if( init_info.d )
+    {
+        for( i = 0; i < threads; i++ )
+            free_mtdata(init_info.d[i]);
+        free(init_info.d);
+    }
+
+    return error;
+}
+
+void CL_CALLBACK MapResultValuesComplete( cl_event e, cl_int status, void *data );
+
+// Note: not called reentrantly
+void CL_CALLBACK WriteInputBufferComplete( cl_event e, cl_int status, void *data )
+{
+    WriteInputBufferInfo *info = (WriteInputBufferInfo*) data;
+    cl_uint count = info->count;
+    int vectorSize;
+
+    if( CL_SUCCESS != status )
+    {
+        vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
+        gFailCount++;
+        return;
+    }
+
+    info->barrierCount = gMaxVectorSize - gMinVectorSize;
+
+    // now that we know that the write buffer is complete, enqueue callbacks to wait for the main thread to
+    // finish calculating the reference results.
+    for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+    {
+        size_t workItemCount = (count + vectorSizes[vectorSize] - 1) / ( vectorSizes[vectorSize]);
+        cl_event mapComplete = NULL;
+
+        if( (status = RunKernel( info->calcInfo[ vectorSize ].kernel, gInBuffer, gOutBuffers[ vectorSize ], workItemCount )) )
+        {
+            gFailCount++;
+            return;
+        }
+
+        info->calcInfo[vectorSize].p = clEnqueueMapBuffer( gQueue, gOutBuffers[ vectorSize ], CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
+                                                          0, count * gTypeSizes[ info->outType ], 0, NULL, &mapComplete, &status);
+        {
+            if( status )
+            {
+                vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
+                gFailCount++;
+                return;
+            }
+        }
+
+        if( (status = clSetEventCallback( mapComplete, CL_COMPLETE, MapResultValuesComplete, info->calcInfo + vectorSize)))
+        {
+            vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
+            gFailCount++;
+            return;
+        }
+
+        if( (status = clReleaseEvent(mapComplete)))
+        {
+            vlog_error( "ERROR: clReleaseEvent calback failed in WriteInputBufferComplete for vector size %d with status: %d\n", vectorSize, status );
+            gFailCount++;
+            return;
+        }
+    }
+
+    // Make sure the work starts moving -- otherwise we may deadlock
+    if( (status = clFlush(gQueue)))
+    {
+        vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
+        gFailCount++;
+        return;
+    }
+
+    // e was already released by the main thread. It should be destroyed automatically soon after we exit.
+}
+
+void CL_CALLBACK CalcReferenceValuesComplete( cl_event e, cl_int status, void *data );
+
+// Note: May be called reentrantly
+void CL_CALLBACK MapResultValuesComplete( cl_event e, cl_int status, void *data )
+{
+    CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo*) data;
+    cl_event calcReferenceValues = info->parent->calcReferenceValues;
+
+    if( CL_SUCCESS != status )
+    {
+        vlog_error( "ERROR: MapResultValuesComplete calback failed with status: %d\n", status );
+        gFailCount++;       // not thread safe -- being lazy here
+        clReleaseEvent(calcReferenceValues);
+        return;
+    }
+
+    // we know that the map is done, wait for the main thread to finish calculating the reference values
+    if( (status = clSetEventCallback( calcReferenceValues, CL_COMPLETE, CalcReferenceValuesComplete, data )))
+    {
+        vlog_error( "ERROR: clSetEventCallback failed in MapResultValuesComplete with status: %d\n", status );
+        gFailCount++;       // not thread safe -- being lazy here
+    }
+
+    // this thread no longer needs its reference to info->calcReferenceValues, so release it
+    if( (status = clReleaseEvent(calcReferenceValues) ))
+    {
+        vlog_error( "ERROR: clReleaseEvent(info->calcReferenceValues) failed with status: %d\n", status );
+        gFailCount++;       // not thread safe -- being lazy here
+    }
+
+    // no need to flush since we didn't enqueue anything
+
+    // e was already released by WriteInputBufferComplete. It should be destroyed automatically soon after we exit.
+}
+
+
+void CL_CALLBACK CalcReferenceValuesComplete( cl_event e, cl_int status, void *data )
+{
+    CalcReferenceValuesInfo     *info = (CalcReferenceValuesInfo*) data;
+    cl_uint                     vectorSize = info->vectorSize;
+    cl_uint                     count = info->parent->count;
+    Type                        outType = info->parent->outType;        // the data type of the conversion result
+    Type                        inType = info->parent->inType;          // the data type of the conversion input
+    size_t                      j;
+    cl_int                      error;
+    cl_event                    doneBarrier = info->parent->doneBarrier;
+
+    // report spurious error condition
+    if( CL_SUCCESS != status )
+    {
+        vlog_error( "ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", status );
+        gFailCount++;       // lazy about thread safety here
+        return;
+    }
+
+    // Now we know that both results have been mapped back from the device, and the
+    // main thread is done calculating the reference results. It is now time to check
+    // the results.
+
+    // verify results
+    void *mapped = info->p;
+
+    //Patch up NaNs conversions to integer to zero -- these can be converted to any integer
+    if( outType != kfloat && outType != kdouble )
+    {
+        if( inType == kfloat )
+        {
+            float *inp = (float*) gIn;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) )
+                    memset( (char*) mapped + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+            }
+        }
+        if( inType == kdouble )
+        {
+            double *inp = (double*) gIn;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) )
+                    memset( (char*) mapped + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+            }
+        }
+    }
+    else if( inType == kfloat || inType == kdouble )
+    {  // outtype and intype is float or double.  NaN conversions for float <-> double can be any NaN
+        if( inType == kfloat && outType == kdouble )
+        {
+            float *inp = (float*) gIn;
+            double *outp = (double*) mapped;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) && isnan(outp[j]) )
+                    outp[j] = NAN;
+            }
+        }
+        if( inType == kdouble && outType == kfloat )
+        {
+            double *inp = (double*) gIn;
+            float *outp = (float*) mapped;
+            for( j = 0; j < count; j++ )
+            {
+                if( isnan( inp[j] ) && isnan(outp[j]) )
+                    outp[j] = NAN;
+            }
+        }
+    }
+
+    if( memcmp( mapped, gRef, count * gTypeSizes[ outType ] ) )
+        info->result = gCheckResults[outType]( mapped, gRef, gAllowZ, count, vectorSizes[vectorSize] );
+    else
+        info->result = 0;
+
+    // Fill the output buffer with junk and release it
+    {
+        cl_uint pattern =  0xffffdead;
+        memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]);
+        if((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[ vectorSize ], mapped, 0, NULL, NULL)))
+        {
+            vlog_error( "ERROR: clEnqueueUnmapMemObject failed in CalcReferenceValuesComplete  (%d)\n", error );
+            gFailCount++;
+        }
+    }
+
+    if( 1 == ThreadPool_AtomicAdd( &info->parent->barrierCount, -1) )
+    {
+        if( (status = clSetUserEventStatus( doneBarrier, CL_COMPLETE) ))
+        {
+            vlog_error( "ERROR: clSetUserEventStatus failed in CalcReferenceValuesComplete (err: %d). We're probably going to deadlock.\n", status );
+            gFailCount++;
+            return;
+        }
+
+        if( (status = clReleaseEvent( doneBarrier ) ) )
+        {
+            vlog_error( "ERROR: clReleaseEvent failed in CalcReferenceValuesComplete (err: %d).\n", status );
+            gFailCount++;
+            return;
+        }
+    }
+
+
+    // e was already released by WriteInputBufferComplete. It should be destroyed automatically soon after
+    // all the calls to CalcReferenceValuesComplete exit.
+}
+
+static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, cl_kernel *outKernel )
+{
+    cl_program program;
+    char testName[256];
+    int error = 0;
+    const char **strings;
+    size_t stringCount = 0;
+
+    // Create the program. This is a bit complicated because we are trying to avoid byte and short stores.
+    if (0 == vectorSize)
+    {
+        char inName[32];
+        char outName[32];
+        const char *programSource[] =
+        {
+            "", // optional pragma
+            "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   dest[i] =  src[i];\n"
+            "}\n"
+        };
+        stringCount = sizeof(programSource) / sizeof(programSource[0]);
+        strings = programSource;
+
+        if (outType == kdouble || inType == kdouble)
+            programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+        //create the type name
+        strncpy(inName, gTypeNames[inType], sizeof(inName));
+        strncpy(outName, gTypeNames[outType], sizeof(outName));
+        sprintf(testName, "test_implicit_%s_%s", outName, inName);
+        vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], gTypeNames[outType]);
+        fflush(stdout);
+    }
+    else
+    {
+        int vectorSizetmp = vectorSizes[vectorSize];
+
+        char convertString[128];
+        char inName[32];
+        char outName[32];
+        const char *programSource[] =
+        {
+            "", // optional pragma
+            "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   dest[i] = ", convertString, "( src[i] );\n"
+            "}\n"
+        };
+        const char *programSourceV3[] =
+        {
+            "", // optional pragma
+            "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   if( i + 1 < get_global_size(0))\n"
+            "       vstore3( ", convertString, "( vload3( i, src)), i, dest );\n"
+            "   else\n"
+            "   {\n"
+            "       ", inName, "3 in;\n"
+            "       ", outName, "3 out;\n"
+            "       if( 0 == (i & 1) )\n"
+            "           in.y = src[3*i+1];\n"
+            "       in.x = src[3*i];\n"
+            "       out = ", convertString, "( in ); \n"
+            "       dest[3*i] = out.x;\n"
+            "       if( 0 == (i & 1) )\n"
+            "           dest[3*i+1] = out.y;\n"
+            "   }\n"
+            "}\n"
+        };
+        stringCount = 3 == vectorSizetmp ? sizeof(programSourceV3) / sizeof(programSourceV3[0]) :
+            sizeof(programSource) / sizeof(programSource[0]);
+        strings = 3 == vectorSizetmp ? programSourceV3 : programSource;
+
+        if (outType == kdouble || inType == kdouble) {
+            programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+            programSourceV3[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        }
+
+        //create the type name
+        switch (vectorSizetmp)
+        {
+        case 1:
+            strncpy(inName, gTypeNames[inType], sizeof(inName));
+            strncpy(outName, gTypeNames[outType], sizeof(outName));
+            snprintf(convertString, sizeof(convertString), "convert_%s%s%s", outName, gSaturationNames[sat], gRoundingModeNames[round]);
+            snprintf(testName, 256, "test_%s_%s", convertString, inName);
+            vlog("Building %s( %s ) test\n", convertString, inName);
+            break;
+        case 3:
+            strncpy(inName, gTypeNames[inType], sizeof(inName));
+            strncpy(outName, gTypeNames[outType], sizeof(outName));
+            snprintf(convertString, sizeof(convertString), "convert_%s3%s%s", outName, gSaturationNames[sat], gRoundingModeNames[round]);
+            snprintf(testName, 256, "test_%s_%s3", convertString, inName);
+            vlog("Building %s( %s3 ) test\n", convertString, inName);
+            break;
+        default:
+            snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], vectorSizetmp);
+            snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], vectorSizetmp);
+            snprintf(convertString, sizeof(convertString), "convert_%s%s%s", outName, gSaturationNames[sat], gRoundingModeNames[round]);
+            snprintf(testName, 256, "test_%s_%s", convertString, inName);
+            vlog("Building %s( %s ) test\n", convertString, inName);
+            break;
+        }
+
+        fflush(stdout);
+    }
+    *outKernel = NULL;
+
+    const char *flags = NULL;
+    if( gForceFTZ )
+        flags = "-cl-denorms-are-zero";
+
+    // build it
+    error = create_single_kernel_helper(gContext, &program, outKernel, (cl_uint)stringCount, strings, testName, flags);
+    if (error)
+    {
+        char    buffer[2048] = "";
+
+        vlog_error("Failed to build kernel/program.\n", error);
+        clReleaseProgram(program);
+        return NULL;
+    }
+
+    return program;
+}
+
diff --git a/test_conformance/d3d10/CMakeLists.txt b/test_conformance/d3d10/CMakeLists.txt
new file mode 100644
index 00000000..dbe39f0e
--- /dev/null
+++ b/test_conformance/d3d10/CMakeLists.txt
@@ -0,0 +1,47 @@
+if(WIN32)
+
+set(D3D10_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
+
+if(${ARCH} STREQUAL "i686")
+set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
+endif(${ARCH} STREQUAL "i686")
+
+if(${ARCH} STREQUAL "x86_64")
+set(D3D10_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
+endif(${ARCH} STREQUAL "x86_64")
+
+list(APPEND CLConform_INCLUDE_DIR ${D3D10_INCLUDE_DIR})
+include_directories (${CLConform_SOURCE_DIR}/test_common/harness
+                     ${CLConform_INCLUDE_DIR} )
+link_directories(${CL_LIB_DIR}, ${D3D10_LIB_DIR})
+
+list(APPEND CLConform_LIBRARIES d3d10 dxgi)
+
+
+set(D3D10_SOURCES
+    buffer.cpp
+    texture2d.cpp
+    texture3d.cpp
+    misc.cpp
+    main.cpp
+    harness.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp)
+
+add_executable(conformance_test_d3d10
+    ${D3D10_SOURCES})
+
+set_source_files_properties(
+    ${D3D10_SOURCES}
+    PROPERTIES LANGUAGE CXX)
+
+TARGET_LINK_LIBRARIES(conformance_test_d3d10
+    ${CLConform_LIBRARIES})
+
+endif(WIN32)
diff --git a/test_conformance/d3d10/buffer.cpp b/test_conformance/d3d10/buffer.cpp
new file mode 100644
index 00000000..ed411db0
--- /dev/null
+++ b/test_conformance/d3d10/buffer.cpp
@@ -0,0 +1,310 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness.h"
+
+#define ADD_BUFFER_PROPERTIES(w, x, y, z) \
+    { w, x, y, z, #x, #y, #z, }
+
+BufferProperties bufferProperties[] =
+{
+    ADD_BUFFER_PROPERTIES(     0x100, D3D10_BIND_CONSTANT_BUFFER, D3D10_USAGE_DYNAMIC, D3D10_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(    0x1000, D3D10_BIND_CONSTANT_BUFFER, D3D10_USAGE_DYNAMIC, D3D10_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(    0x8000, D3D10_BIND_CONSTANT_BUFFER, D3D10_USAGE_DYNAMIC, D3D10_CPU_ACCESS_WRITE),
+
+    ADD_BUFFER_PROPERTIES(   0x7FFFF, D3D10_BIND_SHADER_RESOURCE, D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(  0x100000, D3D10_BIND_SHADER_RESOURCE, D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(  0x100000, D3D10_BIND_STREAM_OUTPUT,   D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(  0x100001, D3D10_BIND_STREAM_OUTPUT,   D3D10_USAGE_DEFAULT, 0),
+
+    ADD_BUFFER_PROPERTIES(      0x10, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(      0x11, D3D10_BIND_INDEX_BUFFER,    D3D10_USAGE_DYNAMIC, D3D10_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(     0x121, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(    0x1234, D3D10_BIND_INDEX_BUFFER,    D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(   0x12345, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DYNAMIC, D3D10_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(  0x123456, D3D10_BIND_INDEX_BUFFER,    D3D10_USAGE_DEFAULT, 0),
+#if 0 // avoid large sizes on automation
+    ADD_BUFFER_PROPERTIES( 0x1234567, D3D10_BIND_INDEX_BUFFER,    D3D10_USAGE_DYNAMIC, D3D10_CPU_ACCESS_WRITE),
+
+    ADD_BUFFER_PROPERTIES( 0x4000000, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000004, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000008, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000010, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000014, D3D10_BIND_VERTEX_BUFFER,   D3D10_USAGE_DEFAULT, 0),
+#endif
+};
+UINT bufferPropertyCount = sizeof(bufferProperties)/sizeof(bufferProperties[0]);
+
+void SubTestBuffer(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice,
+    const BufferProperties* props)
+{
+    ID3D10Buffer* pBuffer = NULL;
+    HRESULT hr = S_OK;
+    cl_mem mem = NULL;
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D10_TestBegin("Buffer: Size=%d, BindFlags=%s, Usage=%s, CPUAccess=%s",
+        props->ByteWidth,
+        props->name_BindFlags,
+        props->name_Usage,
+        props->name_CPUAccess);
+
+    // create the D3D10 resource
+    {
+        D3D10_BUFFER_DESC desc = {0};
+        desc.ByteWidth = props->ByteWidth;
+        desc.Usage = props->Usage;
+        desc.CPUAccessFlags = props->CPUAccess;
+        desc.BindFlags = props->BindFlags;
+        desc.MiscFlags = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating vertex buffer failed!");
+    }
+
+    // populate the D3D10 resource with data
+    {
+        ID3D10Buffer* pStagingBuffer = NULL;
+        char *pStagingData = NULL;
+
+        // create a staging buffer to use to copy data to the D3D buffer
+        D3D10_BUFFER_DESC desc = {0};
+        desc.ByteWidth      = 16;
+        desc.Usage          = D3D10_USAGE_STAGING;
+        desc.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE|D3D10_CPU_ACCESS_READ;
+        desc.BindFlags      = 0;
+        desc.MiscFlags      = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pStagingBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating staging vertex buffer failed!");
+
+        // populate the staging buffer
+        hr = pStagingBuffer->Map(
+            D3D10_MAP_READ_WRITE,
+            0,
+            (void **)&pStagingData);
+        TestRequire(SUCCEEDED(hr), "Map failed!");
+        memcpy(pStagingData, "abcdXXXXxxxx1234", 16);
+        pStagingBuffer->Unmap();
+        TestRequire(SUCCEEDED(hr), "Unmap failed!");
+
+        // copy 'abcdXXXX' to the front of the buffer and 'xxxx1234' to the back
+        D3D10_BOX box = {0};
+        box.front   = 0;
+        box.back    = 1;
+        box.top     = 0;
+        box.bottom  = 1;
+
+        box.left    = 0;
+        box.right   = 8;
+        pDevice->CopySubresourceRegion(
+            pBuffer,
+            0,
+            0,
+            0,
+            0,
+            pStagingBuffer,
+            0,
+            &box);
+        box.left    = 8;
+        box.right   = 16;
+        pDevice->CopySubresourceRegion(
+            pBuffer,
+            0,
+            props->ByteWidth-8,
+            0,
+            0,
+            pStagingBuffer,
+            0,
+            &box);
+        pStagingBuffer->Release();
+    }
+
+    // share the resource with OpenCL
+    {
+        mem = clCreateFromD3D10BufferKHR(
+            context,
+            0,
+            pBuffer,
+            &result);
+        TestRequire(CL_SUCCESS == result, "clCreateFromD3D10BufferKHR failed");
+    }
+
+    // validate the OpenCL mem obj's properties
+    {
+        ID3D10Resource* clResource = NULL;
+        result = clGetMemObjectInfo(
+            mem,
+            CL_MEM_D3D10_RESOURCE_KHR,
+            sizeof(clResource),
+            &clResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetMemObjectInfo for CL_MEM_D3D10_RESOURCE_KHR failed.");
+        TestRequire(clResource == pBuffer, "clGetMemObjectInfo for CL_MEM_D3D10_RESOURCE_KHR returned incorrect value.");
+    }
+
+    // acquire the resource from OpenCL
+    {
+        result = clEnqueueAcquireD3D10ObjectsKHR(
+            command_queue,
+            1,
+            &mem,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D10ObjectsKHR failed.");
+    }
+
+    // read+write data from the buffer in OpenCL
+    {
+        // overwrite the 'XXXX' with '1234' and the 'xxxx' with 'abcd' so we now have
+        // 'abcd1234' at the beginning and end of the buffer
+        result = clEnqueueCopyBuffer(
+            command_queue,
+            mem,
+            mem,
+            0,
+            props->ByteWidth-8,
+            4,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueCopyBuffer failed.");
+
+        result = clEnqueueCopyBuffer(
+            command_queue,
+            mem,
+            mem,
+            props->ByteWidth-4,
+            4,
+            4,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueCopyBuffer failed.");
+    }
+
+    // release the resource from OpenCL
+    {
+        result = clEnqueueReleaseD3D10ObjectsKHR(
+            command_queue,
+            1,
+            &mem,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D10ObjectsKHR failed.");
+    }
+
+    // read data in D3D
+    {
+        ID3D10Buffer* pStagingBuffer = NULL;
+        char *pStagingData = NULL;
+
+        // create a staging buffer to read the data back
+        D3D10_BUFFER_DESC desc = {0};
+        desc.ByteWidth      = 16;
+        desc.Usage          = D3D10_USAGE_STAGING;
+        desc.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE|D3D10_CPU_ACCESS_READ;
+        desc.BindFlags      = 0;
+        desc.MiscFlags      = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pStagingBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating staging vertex buffer failed!");
+
+        // make sure the staging buffer doesn't get stale data
+        hr = pStagingBuffer->Map(
+            D3D10_MAP_READ_WRITE,
+            0,
+            (void **)&pStagingData);
+        TestRequire(SUCCEEDED(hr), "Map failed!");
+        memset(pStagingData, 0, 16);
+        pStagingBuffer->Unmap();
+
+
+        // copy the 'abcd1234' from the front and back of the buffer to the staging buffer
+        D3D10_BOX box = {0};
+        box.front   = 0;
+        box.back    = 1;
+        box.top     = 0;
+        box.bottom  = 1;
+
+        box.left    = 0;
+        box.right   = 8;
+        pDevice->CopySubresourceRegion(
+            pStagingBuffer,
+            0,
+            0,
+            0,
+            0,
+            pBuffer,
+            0,
+            &box);
+        box.left    = props->ByteWidth-8;
+        box.right   = props->ByteWidth;
+        pDevice->CopySubresourceRegion(
+            pStagingBuffer,
+            0,
+            8,
+            0,
+            0,
+            pBuffer,
+            0,
+            &box);
+        TestRequire(SUCCEEDED(hr), "CopySubresourceRegion failed!");
+
+        // verify that we got the 'abcd1234'
+        hr = pStagingBuffer->Map(
+            D3D10_MAP_READ_WRITE,
+            0,
+            (void **)&pStagingData);
+        TestRequire(SUCCEEDED(hr), "Map failed!");
+        TestRequire(!memcmp(pStagingData, "abcd1234abcd1234", 16), "Data was not accurately");
+        pStagingBuffer->Unmap();
+        TestRequire(SUCCEEDED(hr), "Unmap failed!");
+
+        pStagingBuffer->Release();
+    }
+
+Cleanup:
+
+    if (pBuffer)
+    {
+        pBuffer->Release();
+    }
+    if (mem)
+    {
+        clReleaseMemObject(mem);
+    }
+
+    HarnessD3D10_TestEnd();
+}
+
+
+void TestDeviceBuffer(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice)
+{
+    for (UINT i = 0; i < bufferPropertyCount; ++i)
+    {
+        SubTestBuffer(
+            context,
+            command_queue,
+            pDevice,
+            &bufferProperties[i]);
+    }
+}
+
diff --git a/test_conformance/d3d10/harness.cpp b/test_conformance/d3d10/harness.cpp
new file mode 100644
index 00000000..e93cee94
--- /dev/null
+++ b/test_conformance/d3d10/harness.cpp
@@ -0,0 +1,413 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define INITGUID
+#include "harness.h"
+#include <vector>
+
+#include <tchar.h>
+
+/*
+ * OpenCL state
+ */
+
+clGetDeviceIDsFromD3D10KHR_fn      clGetDeviceIDsFromD3D10KHR      = NULL;
+clCreateFromD3D10BufferKHR_fn      clCreateFromD3D10BufferKHR      = NULL;
+clCreateFromD3D10Texture2DKHR_fn   clCreateFromD3D10Texture2DKHR   = NULL;
+clCreateFromD3D10Texture3DKHR_fn   clCreateFromD3D10Texture3DKHR   = NULL;
+clEnqueueAcquireD3D10ObjectsKHR_fn clEnqueueAcquireD3D10ObjectsKHR = NULL;
+clEnqueueReleaseD3D10ObjectsKHR_fn clEnqueueReleaseD3D10ObjectsKHR = NULL;
+
+#define INITPFN(x) \
+    x = (x ## _fn)clGetExtensionFunctionAddressForPlatform(platform, #x); NonTestRequire(x, "Failed to get function pointer for %s", #x);
+
+void
+HarnessD3D10_ExtensionCheck()
+{
+    bool extensionPresent = false;
+    cl_int result = CL_SUCCESS;
+    cl_platform_id platform = NULL;
+    char extensions[1024];
+
+    HarnessD3D10_TestBegin("Extension query");
+
+    result = clGetPlatformIDs(1, &platform, NULL);
+        NonTestRequire(result == CL_SUCCESS, "Failed to get any platforms.");
+    result = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        NonTestRequire(result == CL_SUCCESS, "Failed to list extensions.");
+    extensionPresent = strstr(extensions, "cl_khr_d3d10_sharing") ? true : false;
+
+    if (!extensionPresent) {
+      // platform is required to report the extension only if all devices support it
+      cl_uint devicesCount;
+      result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &devicesCount);
+      NonTestRequire(result == CL_SUCCESS, "Failed to get devices count.");
+      std::vector<cl_device_id> devices(devicesCount);
+      result = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, devicesCount, &devices[0], NULL);
+      NonTestRequire(result == CL_SUCCESS, "Failed to get devices count.");
+
+      for (cl_uint i = 0; i < devicesCount; i++) {
+        clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+        NonTestRequire(result == CL_SUCCESS, "Failed to list extensions.");
+        extensionPresent = strstr(extensions, "cl_khr_d3d10_sharing") ? true : false;
+        if (extensionPresent)
+          break;
+      }
+    }
+
+    OSVERSIONINFO osvi;
+    osvi.dwOSVersionInfoSize = sizeof(osvi);
+    GetVersionEx(&osvi);
+    if (osvi.dwMajorVersion <= 5)
+    {
+        TestRequire(!extensionPresent, "Extension should not be exported on Windows < 6");
+    }
+    else
+    {
+        TestRequire(extensionPresent, "Extension should be exported on Windows >= 6");
+    }
+
+Cleanup:
+    HarnessD3D10_TestEnd();
+
+    // early-out of the extension is not present
+    if (!extensionPresent)
+    {
+        HarnessD3D10_TestStats();
+    }
+}
+
+void
+HarnessD3D10_Initialize(cl_platform_id platform)
+{
+    HarnessD3D10_ExtensionCheck();
+
+    // extract function pointers for exported functions
+    INITPFN(clGetDeviceIDsFromD3D10KHR);
+    INITPFN(clCreateFromD3D10BufferKHR);
+    INITPFN(clCreateFromD3D10Texture2DKHR);
+    INITPFN(clCreateFromD3D10Texture3DKHR);
+    INITPFN(clEnqueueAcquireD3D10ObjectsKHR);
+    INITPFN(clEnqueueReleaseD3D10ObjectsKHR);
+}
+
+/*
+ * Window management
+ */
+
+static IDXGISwapChain*  HarnessD3D10_pSwapChain = NULL;
+static ID3D10Device*    HarnessD3D10_pDevice = NULL;
+static HWND             HarnessD3D10_hWnd = NULL;
+
+static LRESULT WINAPI HarnessD3D10_Proc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
+{
+    switch(msg)
+    {
+        case WM_KEYDOWN:
+            return 0;
+            break;
+        case WM_DESTROY:
+            HarnessD3D10_hWnd = NULL;
+            PostQuitMessage(0);
+            return 0;
+        case WM_PAINT:
+            ValidateRect(hWnd, NULL);
+            return 0;
+    }
+    return DefWindowProc(hWnd, msg, wParam, lParam);
+}
+
+static void HarnessD3D10_InteractiveLoop()
+{
+    MSG msg;
+    while(PeekMessage(&msg,HarnessD3D10_hWnd,0,0,PM_REMOVE))
+    {
+        TranslateMessage(&msg);
+        DispatchMessage(&msg);
+    }
+}
+
+cl_int HarnessD3D10_CreateDevice(IDXGIAdapter* pAdapter, ID3D10Device **ppDevice)
+{
+    HRESULT hr = S_OK;
+    unsigned int cuStatus = 1;
+
+    *ppDevice = NULL;
+
+    // create window
+    static WNDCLASSEX wc =
+    {
+        sizeof(WNDCLASSEX),
+        CS_CLASSDC,
+        HarnessD3D10_Proc,
+        0L,
+        0L,
+        GetModuleHandle(NULL),
+        NULL,
+        NULL,
+        NULL,
+        NULL,
+        _T( "cl_khr_d3d10_sharing_conformance" ),
+        NULL
+    };
+    RegisterClassEx(&wc);
+    HarnessD3D10_hWnd = CreateWindow(
+        _T( "cl_khr_d3d10_sharing_conformance" ),
+        _T( "cl_khr_d3d10_sharing_conformance" ),
+        WS_OVERLAPPEDWINDOW,
+        0, 0, 256, 256,
+        NULL,
+        NULL,
+        wc.hInstance,
+        NULL);
+    NonTestRequire(0 != HarnessD3D10_hWnd, "Failed to create window");
+
+    ShowWindow(HarnessD3D10_hWnd,SW_SHOWDEFAULT);
+    UpdateWindow(HarnessD3D10_hWnd);
+
+    RECT rc;
+    GetClientRect(HarnessD3D10_hWnd, &rc);
+    UINT width = rc.right - rc.left;
+    UINT height = rc.bottom - rc.top;
+
+    // Create device and swapchain
+    DXGI_SWAP_CHAIN_DESC sd;
+    ZeroMemory(&sd, sizeof(sd));
+    sd.BufferCount = 1;
+    sd.BufferDesc.Width = width;
+    sd.BufferDesc.Height = height;
+    sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+    sd.BufferDesc.RefreshRate.Numerator = 60;
+    sd.BufferDesc.RefreshRate.Denominator = 1;
+    sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+    sd.OutputWindow = HarnessD3D10_hWnd;
+    sd.SampleDesc.Count = 1;
+    sd.SampleDesc.Quality = 0;
+    sd.Windowed = TRUE;
+    hr = D3D10CreateDeviceAndSwapChain(
+        pAdapter,
+        D3D10_DRIVER_TYPE_HARDWARE,
+        NULL,
+        D3D10_CREATE_DEVICE_DEBUG,
+        D3D10_SDK_VERSION,
+        &sd,
+        &HarnessD3D10_pSwapChain,
+        &HarnessD3D10_pDevice);
+
+    if (FAILED(hr) ) {
+        return CL_DEVICE_NOT_FOUND;
+    }
+
+    *ppDevice = HarnessD3D10_pDevice;
+    return CL_SUCCESS;
+}
+
+void HarnessD3D10_DestroyDevice()
+{
+    HarnessD3D10_pSwapChain->Release();
+    HarnessD3D10_pDevice->Release();
+
+    if (HarnessD3D10_hWnd) DestroyWindow(HarnessD3D10_hWnd);
+    HarnessD3D10_hWnd = 0;
+}
+
+/*
+ *
+ * texture formats
+ *
+ */
+
+#define ADD_TEXTURE_FORMAT(x,y,z,a,b,g) { x, y, z, a*b/8, g, #x, #y, #z, }
+TextureFormat formats[] =
+{
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32B32A32_FLOAT , CL_RGBA , CL_FLOAT           , 32, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32B32A32_UINT  , CL_RGBA , CL_UNSIGNED_INT32  , 32, 4, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32B32A32_SINT  , CL_RGBA , CL_SIGNED_INT32    , 32, 4, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_FLOAT , CL_RGBA , CL_HALF_FLOAT      , 16, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_UNORM , CL_RGBA , CL_UNORM_INT16     , 16, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_UINT  , CL_RGBA , CL_UNSIGNED_INT16  , 16, 4, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_SNORM , CL_RGBA , CL_SNORM_INT16     , 16, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_SINT  , CL_RGBA , CL_SIGNED_INT16    , 16, 4, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_UNORM     , CL_RGBA , CL_UNORM_INT8      ,  8, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_UINT      , CL_RGBA , CL_UNSIGNED_INT8   ,  8, 4, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_SNORM     , CL_RGBA , CL_SNORM_INT8      ,  8, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_SINT      , CL_RGBA , CL_SIGNED_INT8     ,  8, 4, TextureFormat::GENERIC_SINT  ),
+
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32_FLOAT       , CL_RG   , CL_FLOAT           , 32, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32_UINT        , CL_RG   , CL_UNSIGNED_INT32  , 32, 2, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32_SINT        , CL_RG   , CL_SIGNED_INT32    , 32, 2, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_FLOAT       , CL_RG   , CL_HALF_FLOAT      , 16, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_UNORM       , CL_RG   , CL_UNORM_INT16     , 16, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_UINT        , CL_RG   , CL_UNSIGNED_INT16  , 16, 2, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_SNORM       , CL_RG   , CL_SNORM_INT16     , 16, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_SINT        , CL_RG   , CL_SIGNED_INT16    , 16, 2, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_UNORM         , CL_RG   , CL_UNORM_INT8      ,  8, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_UINT          , CL_RG   , CL_UNSIGNED_INT8   ,  8, 2, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_SNORM         , CL_RG   , CL_SNORM_INT8      ,  8, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_SINT          , CL_RG   , CL_SIGNED_INT8     ,  8, 2, TextureFormat::GENERIC_SINT  ),
+
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32_FLOAT          , CL_R    , CL_FLOAT           , 32, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32_UINT           , CL_R    , CL_UNSIGNED_INT32  , 32, 1, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32_SINT           , CL_R    , CL_SIGNED_INT32    , 32, 1, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_FLOAT          , CL_R    , CL_HALF_FLOAT      , 16, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_UNORM          , CL_R    , CL_UNORM_INT16     , 16, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_UINT           , CL_R    , CL_UNSIGNED_INT16  , 16, 1, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_SNORM          , CL_R    , CL_SNORM_INT16     , 16, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_SINT           , CL_R    , CL_SIGNED_INT16    , 16, 1, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_UNORM           , CL_R    , CL_UNORM_INT8      ,  8, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_UINT            , CL_R    , CL_UNSIGNED_INT8   ,  8, 1, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_SNORM           , CL_R    , CL_SNORM_INT8      ,  8, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_SINT            , CL_R    , CL_SIGNED_INT8     ,  8, 1, TextureFormat::GENERIC_SINT  ),
+};
+UINT formatCount = sizeof(formats)/sizeof(formats[0]);
+
+/*
+ *
+ * Logging and error reporting
+ *
+ */
+
+static struct
+{
+    cl_int testCount;
+    cl_int passCount;
+
+    cl_int nonTestFailures;
+    cl_int inTest;
+    cl_int currentTestPass;
+
+    char currentTestName[1024];
+} HarnessD3D10_testStats = {0};
+
+void HarnessD3D10_TestBegin(const char* fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vsprintf(HarnessD3D10_testStats.currentTestName, fmt, ap);
+    va_end(ap);
+
+    TestPrint("[%s] ... ", HarnessD3D10_testStats.currentTestName);
+
+    HarnessD3D10_testStats.inTest = 1;
+    HarnessD3D10_testStats.currentTestPass = 1;
+}
+
+void HarnessD3D10_TestFail()
+{
+    if (HarnessD3D10_testStats.inTest)
+    {
+        HarnessD3D10_testStats.currentTestPass = 0;
+    }
+    else
+    {
+        ++HarnessD3D10_testStats.nonTestFailures;
+    }
+}
+
+void HarnessD3D10_TestEnd()
+{
+    HarnessD3D10_testStats.inTest = 0;
+
+    HarnessD3D10_testStats.testCount += 1;
+    HarnessD3D10_testStats.passCount += HarnessD3D10_testStats.currentTestPass;
+
+    TestPrint("%s\n",
+        HarnessD3D10_testStats.currentTestPass ? "PASSED" : "FAILED");
+}
+
+void HarnessD3D10_TestStats()
+{
+    TestPrint("PASSED %d of %d tests.\n", HarnessD3D10_testStats.passCount, HarnessD3D10_testStats.testCount);
+    if (HarnessD3D10_testStats.testCount > HarnessD3D10_testStats.passCount)
+    {
+        TestPrint("***FAILED***\n");
+        exit(1);
+    }
+    else
+    {
+        TestPrint("&&&& cl_khr_d3d10_sharing test PASSED\n");
+    }
+    exit(0);
+}
+
+/*
+ *
+ * Helper function
+ *
+ */
+
+cl_int HarnessD3D10_CreateKernelFromSource(
+    cl_kernel *outKernel,
+    cl_device_id device,
+    cl_context context,
+    const char *source,
+    const char *entrypoint)
+{
+    cl_int status;
+    cl_program program = NULL;
+    cl_kernel kernel = NULL;
+
+    // compile program
+    {
+        const char *sourceTexts[] = {source};
+        size_t sourceLengths[] = {strlen(source) };
+
+        status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]);
+        TestRequire(
+            CL_SUCCESS == status,
+            "clCreateProgramWithSource failed");
+    }
+    status = clBuildProgram(
+        program,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        NULL);
+    if (CL_SUCCESS != status)
+    {
+        char log[2048] = {0};
+        status = clGetProgramBuildInfo(
+            program,
+            device,
+            CL_PROGRAM_BUILD_LOG,
+            sizeof(log),
+            log,
+            NULL);
+        TestPrint("error: %s\n", log);
+        TestRequire(
+            CL_SUCCESS == status,
+            "Compilation error log:\n%s\n", log);
+    }
+
+    kernel = clCreateKernel(
+        program,
+        entrypoint,
+        &status);
+    TestRequire(
+        CL_SUCCESS == status,
+        "clCreateKernel failed");
+
+    clReleaseProgram(program);
+    *outKernel = kernel;
+
+Cleanup:
+
+    return CL_SUCCESS;
+}
+
+
+
diff --git a/test_conformance/d3d10/harness.h b/test_conformance/d3d10/harness.h
new file mode 100644
index 00000000..a17839b2
--- /dev/null
+++ b/test_conformance/d3d10/harness.h
@@ -0,0 +1,213 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _HARNESS_H_
+#define _HARNESS_H_
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#if defined (__MINGW32__)
+#include <rpcsal.h>
+typedef unsigned char UINT8;
+#define __out
+#define __in
+#define __inout
+#define __out_bcount_opt(size)
+#define __in_opt
+#define __in_ecount(size)
+#define __in_ecount_opt(size)
+#define __out_opt
+#define __out_ecount(size)
+#define __out_ecount_opt(size)
+#define __in_bcount_opt(size)
+#define __inout_opt
+#endif
+
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+#include <CL/cl_d3d10.h>
+#include <stdio.h>
+#include "errorHelpers.h"
+#include "../test_common/harness/kernelHelpers.h"
+
+// #define log_info(...) printf(__VA_ARGS__)
+// #define log_error(...) printf(__VA_ARGS__)
+
+#define NonTestRequire(x, ...) \
+do \
+{ \
+    if (!(x) ) \
+    { \
+        log_info("\n[assertion failed: %s at %s:%d]\n", #x, __FILE__, __LINE__); \
+        log_info("CATASTROPHIC NON-TEST ERROR: "); \
+        log_error(__VA_ARGS__); \
+        log_info("\n"); \
+        log_info("***FAILED***\n"); \
+        exit(1); \
+    } \
+} while (0)
+
+#define TestRequire(x, ...) \
+    do \
+    { \
+        if (!(x) ) \
+        { \
+            log_info("\n[assertion failed: %s at %s:%d]\n", #x, __FILE__, __LINE__); \
+            log_info("ERROR: "); \
+            log_error(__VA_ARGS__); \
+            log_info("\n"); \
+            HarnessD3D10_TestFail(); \
+            goto Cleanup; \
+        } \
+    } while (0)
+
+#define TestPrint(...) \
+    do \
+    { \
+        log_error(__VA_ARGS__); \
+    } while (0)
+
+struct TextureFormat
+{
+    DXGI_FORMAT format;
+    cl_channel_order channel_order;
+    cl_channel_type  channel_type;
+    UINT bytesPerPixel;
+    enum
+    {
+        GENERIC_FLOAT = 0,
+        GENERIC_UINT  = 1,
+        GENERIC_SINT  = 2,
+    } generic;
+
+    const char *name_format;
+    const char *name_channel_order;
+    const char *name_channel_type;
+};
+extern TextureFormat formats[];
+extern UINT formatCount;
+
+
+#define MAX_REGISTERED_SUBRESOURCES 4 // limit to just make life easier
+
+struct BufferProperties
+{
+    UINT ByteWidth;
+    UINT BindFlags;
+    D3D10_USAGE Usage;
+    UINT CPUAccess;
+    const char* name_BindFlags;
+    const char* name_Usage;
+    const char* name_CPUAccess;
+};
+
+struct Texture2DSize
+{
+    UINT Width;
+    UINT Height;
+    UINT MipLevels;
+    UINT ArraySize;
+    UINT SubResourceCount;
+    struct
+    {
+        UINT MipLevel;
+        UINT ArraySlice;
+    } subResources[MAX_REGISTERED_SUBRESOURCES];
+    UINT MiscFlags;
+};
+struct Texture3DSize
+{
+    UINT Width;
+    UINT Height;
+    UINT Depth;
+    UINT MipLevels;
+    UINT SubResourceCount;
+    struct
+    {
+        UINT MipLevel;
+    } subResources[MAX_REGISTERED_SUBRESOURCES];
+    UINT MiscFlags;
+};
+
+void HarnessD3D10_Initialize(cl_platform_id platform);
+cl_int HarnessD3D10_CreateDevice(IDXGIAdapter* pAdapter, ID3D10Device **ppDevice);
+void HarnessD3D10_DestroyDevice();
+
+void HarnessD3D10_TestBegin(const char* fmt, ...);
+void HarnessD3D10_TestFail();
+void HarnessD3D10_TestEnd();
+void HarnessD3D10_TestStats();
+
+
+void TestAdapterEnumeration(
+    cl_platform_id platform,
+    IDXGIAdapter* pAdapter,
+    ID3D10Device* pDevice,
+    cl_uint* num_devices);
+
+void TestAdapterDevices(
+    cl_platform_id platform,
+    IDXGIAdapter* pAdapter,
+    ID3D10Device* pDevice,
+    cl_uint num_devices);
+
+void TestDevice(
+    cl_device_id device,
+    ID3D10Device* pDevice);
+
+bool TestDeviceContextCreate(
+    cl_device_id device,
+    ID3D10Device* pDevice,
+    cl_context* out_context,
+    cl_command_queue* out_command_queue);
+
+void TestDeviceBuffer(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice);
+
+void TestDeviceTexture2D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice);
+
+void TestDeviceTexture3D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice);
+
+void TestDeviceMisc(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice);
+
+cl_int HarnessD3D10_CreateKernelFromSource(
+    cl_kernel *outKernel,
+    cl_device_id device,
+    cl_context context,
+    const char *source,
+    const char *entrypoint);
+
+extern clGetDeviceIDsFromD3D10KHR_fn      clGetDeviceIDsFromD3D10KHR;
+extern clCreateFromD3D10BufferKHR_fn      clCreateFromD3D10BufferKHR;
+extern clCreateFromD3D10Texture2DKHR_fn   clCreateFromD3D10Texture2DKHR;
+extern clCreateFromD3D10Texture3DKHR_fn   clCreateFromD3D10Texture3DKHR;
+extern clEnqueueAcquireD3D10ObjectsKHR_fn clEnqueueAcquireD3D10ObjectsKHR;
+extern clEnqueueReleaseD3D10ObjectsKHR_fn clEnqueueReleaseD3D10ObjectsKHR;
+
+#endif
diff --git a/test_conformance/d3d10/main.cpp b/test_conformance/d3d10/main.cpp
new file mode 100644
index 00000000..7c862e81
--- /dev/null
+++ b/test_conformance/d3d10/main.cpp
@@ -0,0 +1,433 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( _WIN32 )
+
+#define _CRT_SECURE_NO_WARNINGS
+#include "harness.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/parseParameters.h"
+
+int main(int argc, const char* argv[])
+{
+    cl_int result;
+    cl_platform_id platform = NULL;
+    cl_uint num_devices_tested = 0;
+
+    argc = parseCustomParam(argc, argv);
+
+    // get the platform to test
+    result = clGetPlatformIDs(1, &platform, NULL); NonTestRequire(result == CL_SUCCESS, "Failed to get any platforms.");
+
+    HarnessD3D10_Initialize(platform);
+
+    // for each adapter...
+    IDXGIFactory* pFactory = NULL;
+    HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory, (void**)(&pFactory) );
+    NonTestRequire(SUCCEEDED(hr), "Failed to create DXGI factory.");
+    for (UINT adapter = 0;; ++adapter)
+    {
+        IDXGIAdapter* pAdapter = NULL;
+        ID3D10Device* pDevice = NULL;
+        HRESULT hr = pFactory->EnumAdapters(adapter, &pAdapter);
+        if (FAILED(hr))
+        {
+            break;
+        }
+
+        // print data about the adapter
+        DXGI_ADAPTER_DESC desc;
+        hr = pAdapter->GetDesc(&desc);
+        NonTestRequire(SUCCEEDED(hr), "IDXGIAdapter::GetDesc failed.");
+
+        TestPrint("=====================================\n");
+        TestPrint("Testing DXGI Adapter and D3D10 Device\n");
+        TestPrint("Description=%ls, VendorID=%x, DeviceID=%x\n", desc.Description, desc.VendorId, desc.DeviceId);
+        TestPrint("=====================================\n");
+
+        // run the test on the adapter
+        HarnessD3D10_CreateDevice(pAdapter, &pDevice);
+
+        cl_uint num_devices = 0;
+
+        // test adapter and device enumeration
+        TestAdapterEnumeration(platform, pAdapter, pDevice, &num_devices);
+
+        // if there were any devices found in enumeration, run the tests on them
+        if (num_devices)
+        {
+            TestAdapterDevices(platform, pAdapter, pDevice, num_devices);
+        }
+        num_devices_tested += num_devices;
+
+        // destroy the D3D10 device
+        if (pDevice)
+        {
+            HarnessD3D10_DestroyDevice();
+        }
+
+        pAdapter->Release();
+    }
+    pFactory->Release();
+
+    NonTestRequire(num_devices_tested, "No D3D10 compatible cl_device_ids were found.");
+
+    HarnessD3D10_TestStats();
+}
+
+void TestAdapterEnumeration(cl_platform_id platform, IDXGIAdapter* pAdapter, ID3D10Device* pDevice, cl_uint* num_devices)
+{
+    cl_uint num_adapter_devices = 0;
+    cl_device_id* adapter_devices = NULL;
+
+    cl_uint num_device_devices = 0;
+    cl_device_id* device_devices = NULL;
+
+     cl_int result;
+
+    HarnessD3D10_TestBegin("cl_device_id Enumeration");
+
+    // get the cl_device_ids for the adapter
+    {
+        result = clGetDeviceIDsFromD3D10KHR(
+            platform,
+            CL_D3D10_DXGI_ADAPTER_KHR,
+            pAdapter,
+            CL_ALL_DEVICES_FOR_D3D10_KHR,
+            0,
+            NULL,
+            &num_adapter_devices);
+        TestRequire(
+            (result == CL_SUCCESS || result == CL_DEVICE_NOT_FOUND),
+            "clGetDeviceIDsFromD3D10KHR failed.");
+
+        if (result == CL_DEVICE_NOT_FOUND)
+        {
+            TestPrint("No devices found for adapter.\n");
+        }
+        else
+        {
+            // if there were devices, query them
+            adapter_devices = new cl_device_id[num_adapter_devices];
+            result = clGetDeviceIDsFromD3D10KHR(
+                platform,
+                CL_D3D10_DXGI_ADAPTER_KHR,
+                pAdapter,
+                CL_ALL_DEVICES_FOR_D3D10_KHR,
+                num_adapter_devices,
+                adapter_devices,
+                NULL);
+            TestRequire(
+                (result == CL_SUCCESS),
+                "clGetDeviceIDsFromD3D10KHR failed.");
+        }
+    }
+
+    // get the cl_device_ids for the device (if it was successfully created)
+    if (pDevice)
+    {
+        result = clGetDeviceIDsFromD3D10KHR(
+            platform,
+            CL_D3D10_DEVICE_KHR,
+            pDevice,
+            CL_ALL_DEVICES_FOR_D3D10_KHR,
+            0,
+            NULL,
+            &num_device_devices);
+        TestRequire(
+            (result == CL_SUCCESS || result == CL_DEVICE_NOT_FOUND),
+            "clGetDeviceIDsFromD3D10KHR failed.");
+
+        if (result == CL_DEVICE_NOT_FOUND)
+        {
+            TestPrint("No devices found for D3D device.\n");
+        }
+        else
+        {
+            // if there were devices, query them
+            device_devices = new cl_device_id[num_device_devices];
+            result = clGetDeviceIDsFromD3D10KHR(
+                platform,
+                CL_D3D10_DEVICE_KHR,
+                pDevice,
+                CL_ALL_DEVICES_FOR_D3D10_KHR,
+                num_device_devices,
+                device_devices,
+                NULL);
+            TestRequire(
+                (result == CL_SUCCESS),
+                "clGetDeviceIDsFromD3D10KHR failed.");
+        }
+
+        // require that each cl_device_id returned for the ID3D10Device was among the devices listed for the adapter
+        for (cl_uint device_device = 0; device_device < num_device_devices; ++device_device)
+        {
+            cl_uint adapter_device;
+            for (adapter_device = 0; adapter_device < num_adapter_devices; ++adapter_device)
+            {
+                if (device_devices[device_device] == adapter_devices[adapter_device])
+                {
+                    break;
+                }
+            }
+            TestRequire(
+                (adapter_device != num_adapter_devices),
+                "CL_D3D10_DEVICE_KHR devices not a subset of CL_D3D10_DXGI_ADAPTER_KHR devices");
+        }
+    }
+
+Cleanup:
+
+    if (adapter_devices)
+    {
+        delete[] adapter_devices;
+    }
+    if (device_devices)
+    {
+        delete[] device_devices;
+    }
+
+    *num_devices = num_device_devices;
+
+    HarnessD3D10_TestEnd();
+}
+
+void TestAdapterDevices(cl_platform_id platform, IDXGIAdapter* pAdapter, ID3D10Device* pDevice, cl_uint num_devices_expected)
+{
+    cl_int result;
+    cl_uint num_devices = 0;
+    cl_device_id* devices = NULL;
+    char extensions[8192];
+
+    devices = new cl_device_id[num_devices_expected];
+    NonTestRequire(
+        devices,
+        "Memory allocation failure.");
+
+    result = clGetDeviceIDsFromD3D10KHR(
+        platform,
+        CL_D3D10_DEVICE_KHR,
+        pDevice,
+        CL_ALL_DEVICES_FOR_D3D10_KHR,
+        num_devices_expected,
+        devices,
+        &num_devices);
+    NonTestRequire(
+        (result == CL_SUCCESS),
+        "clGetDeviceIDsFromD3D10KHR failed.");
+    NonTestRequire(
+        (num_devices == num_devices_expected),
+        "clGetDeviceIDsFromD3D10KHR returned an unexpected number of devices.");
+
+    for (cl_uint i = 0; i < num_devices; ++i)
+    {
+        // make sure the device supports the extension
+        result = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL); NonTestRequire(result == CL_SUCCESS, "Failed to get extensions.");
+        if (strstr(extensions, "cl_khr_d3d10_sharing") == NULL) {
+          TestPrint("Device does not support cl_khr_d3d10_sharing extension\n");
+          continue;
+        }
+
+        TestDevice(devices[i], pDevice);
+    }
+}
+
+void TestDevice(cl_device_id device, ID3D10Device* pDevice)
+{
+    char device_name[1024];
+    cl_int result = CL_SUCCESS;
+    cl_context context = NULL;
+    cl_command_queue command_queue = NULL;
+    cl_bool prefer_shared_resources = CL_FALSE;
+    ID3D10Device* clDevice = NULL;
+
+    result = clGetDeviceInfo(
+        device,
+        CL_DEVICE_NAME,
+        sizeof(device_name),
+        device_name,
+        NULL);
+    NonTestRequire(CL_SUCCESS == result, "clGetDeviceInfo with CL_DEVICE_NAME failed");
+    TestPrint("--------------------\n");
+    TestPrint("Testing cl_device_id\n");
+    TestPrint("Name=%s\n", device_name);
+    TestPrint("--------------------\n");
+
+    if (!TestDeviceContextCreate(device, pDevice, &context, &command_queue) )
+    {
+        return;
+    }
+
+    // make sure that we can query the shared resource preference
+    result = clGetContextInfo(
+        context,
+        CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR,
+        sizeof(prefer_shared_resources),
+        &prefer_shared_resources,
+        NULL);
+    NonTestRequire(CL_SUCCESS == result, "clGetContextInfo with CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR failed");
+
+    // run buffer tests
+    TestDeviceBuffer(
+        context,
+        command_queue,
+        pDevice);
+
+    // run 2D texture tests
+    TestDeviceTexture2D(
+        device,
+        context,
+        command_queue,
+        pDevice);
+
+    // run 3D texture tests
+    TestDeviceTexture3D(
+        device,
+        context,
+        command_queue,
+        pDevice);
+
+    // run misc tests
+    TestDeviceMisc(
+        device,
+        context,
+        command_queue,
+        pDevice);
+
+    clReleaseContext(context);
+    clReleaseCommandQueue(command_queue);
+}
+
+bool TestDeviceContextCreate(
+    cl_device_id device,
+    ID3D10Device* pDevice,
+    cl_context* out_context,
+    cl_command_queue* out_command_queue)
+{
+    cl_int result = CL_SUCCESS;
+    cl_context context = NULL;
+    cl_command_queue command_queue = NULL;
+
+    ID3D10Device* clDevice = NULL;
+
+    bool succeeded = false;
+
+    HarnessD3D10_TestBegin("Context creation");
+
+    cl_context_properties properties[5];
+
+    // create the context
+    properties[0] = (cl_context_properties)CL_CONTEXT_D3D10_DEVICE_KHR;
+    properties[1] = (cl_context_properties)pDevice;
+    properties[2] = (cl_context_properties)CL_CONTEXT_INTEROP_USER_SYNC;
+    properties[3] = (cl_context_properties)CL_TRUE;
+    properties[4] = (cl_context_properties)0;
+    context = clCreateContext(
+        properties,
+        1,
+        &device,
+        NULL,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D10_DEVICE_KHR failed");
+    result = clReleaseContext(context);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clReleaseContext with CL_CONTEXT_D3D10_DEVICE_KHR failed");
+
+    // create the context
+    properties[0] = (cl_context_properties)CL_CONTEXT_D3D10_DEVICE_KHR;
+    properties[1] = (cl_context_properties)pDevice;
+    properties[2] = (cl_context_properties)CL_CONTEXT_INTEROP_USER_SYNC;
+    properties[3] = (cl_context_properties)CL_FALSE;
+    properties[4] = (cl_context_properties)0;
+    context = clCreateContext(
+        properties,
+        1,
+        &device,
+        NULL,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D10_DEVICE_KHR failed");
+    result = clReleaseContext(context);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clReleaseContext with CL_CONTEXT_D3D10_DEVICE_KHR failed");
+
+    // create the context
+    properties[0] = (cl_context_properties)CL_CONTEXT_D3D10_DEVICE_KHR;
+    properties[1] = (cl_context_properties)pDevice;
+    properties[2] = (cl_context_properties)0;
+    context = clCreateContext(
+        properties,
+        1,
+        &device,
+        NULL,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D10_DEVICE_KHR failed");
+
+    // create the command queue
+    TestPrint("Creating a command queue.\n");
+    command_queue = clCreateCommandQueueWithProperties(
+        context,
+        device,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D10_DEVICE_KHR failed");
+
+    succeeded = true;
+
+Cleanup:
+
+    if (succeeded)
+    {
+        *out_context = context;
+        *out_command_queue = command_queue;
+    }
+    else
+    {
+        if (context)
+        {
+            clReleaseContext(context);
+        }
+        if (command_queue)
+        {
+            clReleaseCommandQueue(command_queue);
+        }
+    }
+    HarnessD3D10_TestEnd();
+    return succeeded;
+}
+
+#else
+
+#include "errorHelpers.h"
+
+int main(int argc, char* argv[])
+{
+    log_info( "Windows-specific test skipped.\n" );
+    return 0;
+}
+
+#endif
diff --git a/test_conformance/d3d10/misc.cpp b/test_conformance/d3d10/misc.cpp
new file mode 100644
index 00000000..4ccd492e
--- /dev/null
+++ b/test_conformance/d3d10/misc.cpp
@@ -0,0 +1,259 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness.h"
+
+void SubTestMiscMultipleCreates(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice)
+{
+    ID3D10Buffer* pBuffer = NULL;
+    ID3D10Texture2D* pTexture = NULL;
+    HRESULT hr = S_OK;
+    cl_mem mem[5] = {NULL, NULL, NULL, NULL, NULL};
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D10_TestBegin("Misc: Multiple Creates");
+
+    // create the D3D10 resources
+    {
+        D3D10_TEXTURE2D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = 256;
+        desc.Height     = 256;
+        desc.MipLevels  = 4;
+        desc.ArraySize  = 4;
+        desc.Format     = DXGI_FORMAT_R32G32B32A32_FLOAT;
+        desc.SampleDesc.Count = 1;
+        desc.SampleDesc.Quality = 0;
+        desc.Usage = D3D10_USAGE_DEFAULT;
+        desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture2D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "Failed to create texture.");
+    }
+
+    // create the D3D10 buffer
+    {
+        D3D10_BUFFER_DESC desc = {0};
+        desc.ByteWidth = 1024;
+        desc.Usage = D3D10_USAGE_DEFAULT;
+        desc.CPUAccessFlags = 0;
+        desc.BindFlags = D3D10_BIND_VERTEX_BUFFER;
+        desc.MiscFlags = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating vertex buffer failed!");
+    }
+
+    mem[0] = clCreateFromD3D10BufferKHR(
+        context,
+        0,
+        pBuffer,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D10BufferKHR");
+
+    mem[1] = clCreateFromD3D10BufferKHR(
+        context,
+        0,
+        pBuffer,
+        &result);
+    TestRequire(result == CL_INVALID_D3D10_RESOURCE_KHR, "clCreateFromD3D10BufferKHR succeeded when it shouldn't");
+
+    mem[2] = clCreateFromD3D10Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        1,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D10Texture2DKHR failed");
+
+    mem[3] = clCreateFromD3D10Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        1,
+        &result);
+    TestRequire(result == CL_INVALID_D3D10_RESOURCE_KHR, "clCreateFromD3D10Texture2DKHR succeeded when it shouldn't");
+
+    mem[4] = clCreateFromD3D10Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        16,
+        &result);
+    TestRequire(result == CL_INVALID_VALUE, "clCreateFromD3D10Texture2DKHR succeeded when it shouldn't");
+
+
+Cleanup:
+
+    for (UINT i = 0; i < 4; ++i)
+    {
+        if (mem[i])
+        {
+            clReleaseMemObject(mem[i]);
+        }
+    }
+    if (pBuffer)
+    {
+        pBuffer->Release();
+    }
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+
+    HarnessD3D10_TestEnd();
+}
+
+void SubTestMiscAcquireRelease(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice)
+{
+    ID3D10Buffer* pBuffer = NULL;
+    ID3D10Texture2D* pTexture = NULL;
+    HRESULT hr = S_OK;
+
+    cl_int result = CL_SUCCESS;
+    cl_mem mem[2] = {NULL, NULL};
+
+    HarnessD3D10_TestBegin("Misc: Acquire Release");
+
+    // create the D3D10 resources
+    {
+        D3D10_TEXTURE2D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = 256;
+        desc.Height     = 256;
+        desc.MipLevels  = 4;
+        desc.ArraySize  = 4;
+        desc.Format     = DXGI_FORMAT_R32G32B32A32_FLOAT;
+        desc.SampleDesc.Count = 1;
+        desc.SampleDesc.Quality = 0;
+        desc.Usage = D3D10_USAGE_DEFAULT;
+        desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture2D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "Failed to create texture.");
+    }
+
+    // create the D3D10 buffer
+    {
+        D3D10_BUFFER_DESC desc = {0};
+        desc.ByteWidth = 1024;
+        desc.Usage = D3D10_USAGE_DEFAULT;
+        desc.CPUAccessFlags = 0;
+        desc.BindFlags = D3D10_BIND_VERTEX_BUFFER;
+        desc.MiscFlags = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating vertex buffer failed!");
+    }
+
+    // create cl_mem objects for the resources
+    mem[0] = clCreateFromD3D10BufferKHR(
+        context,
+        0,
+        pBuffer,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D10BufferKHR");
+    mem[1] = clCreateFromD3D10Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        1,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D10Texture2DKHR failed");
+
+    // test some acquire/release patterns
+    result = clEnqueueAcquireD3D10ObjectsKHR(
+        command_queue,
+        1,
+        &mem[1],
+        0,
+        NULL,
+        NULL);
+    TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D10ObjectsKHR failed.");
+
+    result = clEnqueueAcquireD3D10ObjectsKHR(
+        command_queue,
+        1,
+        &mem[0],
+        0,
+        NULL,
+        NULL);
+    TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D10ObjectsKHR failed.");
+
+    result = clEnqueueReleaseD3D10ObjectsKHR(
+        command_queue,
+        1,
+        &mem[1],
+        0,
+        NULL,
+        NULL);
+    TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D10ObjectsKHR failed.");
+
+
+    result = clEnqueueReleaseD3D10ObjectsKHR(
+        command_queue,
+        1,
+        &mem[0],
+        0,
+        NULL,
+        NULL);
+    TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D10ObjectsKHR failed.");
+
+Cleanup:
+
+    for (UINT i = 0; i < 2; ++i)
+    {
+        if (mem[i])
+        {
+            clReleaseMemObject(mem[i]);
+        }
+    }
+    if (pBuffer)
+    {
+        pBuffer->Release();
+    }
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+
+    HarnessD3D10_TestEnd();
+}
+
+void TestDeviceMisc(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice)
+{
+    SubTestMiscMultipleCreates(
+        context,
+        command_queue,
+        pDevice);
+
+    SubTestMiscAcquireRelease(
+        context,
+        command_queue,
+        pDevice);
+}
+
diff --git a/test_conformance/d3d10/texture2d.cpp b/test_conformance/d3d10/texture2d.cpp
new file mode 100644
index 00000000..123ba3f8
--- /dev/null
+++ b/test_conformance/d3d10/texture2d.cpp
@@ -0,0 +1,694 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define _CRT_SECURE_NO_WARNINGS
+#include "harness.h"
+
+Texture2DSize texture2DSizes[] =
+{
+    {
+        4, // Width
+        4, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        15, // Width
+        37, // Height
+        2, // MipLevels
+        1, // ArraySize
+        2, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {1, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        65, // Width
+        17, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        D3D10_RESOURCE_MISC_SHARED, // MiscFlags
+    },
+
+    {
+        127, // Width
+        125, // Height
+        4, // MipLevels
+        1, // ArraySize
+        4, // SubResourceCount
+        {  // SubResources
+            {3, 0}, // MipLevel, ArraySlice
+            {2, 0}, // MipLevel, ArraySlice
+            {1, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        128, // Width
+        128, // Height
+        4, // MipLevels
+        6, // ArraySize
+        4, // SubResourceCount
+        {  // SubResources
+            {0, 1}, // MipLevel, ArraySlice
+            {1, 0}, // MipLevel, ArraySlice
+            {0, 2}, // MipLevel, ArraySlice
+            {3, 5}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        256, // Width
+        256, // Height
+        0, // MipLevels
+        256, // ArraySize
+        4, // SubResourceCount
+        {  // SubResources
+            {0,   0}, // MipLevel, ArraySlice
+            {1, 255}, // MipLevel, ArraySlice
+            {2, 127}, // MipLevel, ArraySlice
+            {3, 128}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        258, // Width
+        511, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        767, // Width
+        1025, // Height
+        4, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        2048, // Width
+        2048, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+};
+UINT texture2DSizeCount = sizeof(texture2DSizes)/sizeof(texture2DSizes[0]);
+
+const char *
+texture2DPatterns[2][2] =
+{
+    {"aAbBcCdDeEfFgGhHiIjJ", "AaBbCcDdEeFfGgHhIiJj"},
+    {"zZyYxXwWvVuUtTsSrRqQ", "ZzYyXxWwVvUuTtSsRrQq"},
+};
+
+void SubTestTexture2D(
+    cl_context context,
+    cl_command_queue command_queue,
+    cl_kernel kernel,
+    ID3D10Device* pDevice,
+    const TextureFormat* format,
+    const Texture2DSize* size)
+{
+    ID3D10Texture2D* pTexture = NULL;
+    HRESULT hr = S_OK;
+
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D10_TestBegin("2D Texture: Format=%s, Width=%d, Height=%d, MipLevels=%d, ArraySize=%d",
+        format->name_format,
+        size->Width,
+        size->Height,
+        size->MipLevels,
+        size->ArraySize);
+
+    struct
+    {
+        cl_mem mem;
+        UINT subResource;
+        UINT width;
+        UINT height;
+    }
+    subResourceInfo[4];
+
+    cl_event events[4] = {NULL, NULL, NULL, NULL};
+
+    // create the D3D10 resources
+    {
+        D3D10_TEXTURE2D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = size->Width;
+        desc.Height     = size->Height;
+        desc.MipLevels  = size->MipLevels;
+        desc.ArraySize  = size->ArraySize;
+        desc.Format     = format->format;
+        desc.SampleDesc.Count = 1;
+        desc.SampleDesc.Quality = 0;
+        desc.Usage = D3D10_USAGE_DEFAULT;
+        desc.BindFlags = D3D10_BIND_SHADER_RESOURCE | D3D10_BIND_RENDER_TARGET;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture2D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "ID3D10Device::CreateTexture2D failed (non-OpenCL D3D error, but test is invalid).");
+    }
+
+    // initialize some useful variables
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // compute the expected values for the subresource
+        subResourceInfo[i].subResource = D3D10CalcSubresource(
+            size->subResources[i].MipLevel,
+            size->subResources[i].ArraySlice,
+            size->MipLevels);
+        subResourceInfo[i].width = size->Width;
+        subResourceInfo[i].height = size->Height;
+        for (UINT j = 0; j < size->subResources[i].MipLevel; ++j) {
+            subResourceInfo[i].width /= 2;
+            subResourceInfo[i].height /= 2;
+        }
+    }
+
+    // copy a pattern into the corners of the image, coordinates
+    // (0,0), (w,0-1), (0,h-1), (w-1,h-1)
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    {
+        // create the staging buffer
+        ID3D10Texture2D* pStagingBuffer = NULL;
+        {
+            D3D10_TEXTURE2D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.MipLevels  = 1;
+            desc.ArraySize  = 1;
+            desc.Format     = format->format;
+            desc.SampleDesc.Count = 1;
+            desc.SampleDesc.Quality = 0;
+            desc.Usage = D3D10_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture2D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "ID3D10Device::CreateTexture2D failed (non-OpenCL D3D error, but test is invalid).");
+        }
+
+        // write the data to the staging buffer
+        {
+            D3D10_MAPPED_TEXTURE2D mappedTexture;
+            hr = pStagingBuffer->Map(
+                0,
+                D3D10_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            memcpy(mappedTexture.pData, texture2DPatterns[x][y], format->bytesPerPixel);
+            pStagingBuffer->Unmap(0);
+        }
+
+        // copy the data to to the texture
+        {
+            D3D10_BOX box = {0};
+            box.front   = 0; box.back    = 1;
+            box.top     = 0; box.bottom  = 1;
+            box.left    = 0; box.right   = 1;
+            pDevice->CopySubresourceRegion(
+                pTexture,
+                subResourceInfo[i].subResource,
+                x ? subResourceInfo[i].width  - 1 : 0,
+                y ? subResourceInfo[i].height - 1 : 0,
+                0,
+                pStagingBuffer,
+                0,
+                &box);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+    // create the cl_mem objects for the resources and verify its sanity
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // create a cl_mem for the resource
+        subResourceInfo[i].mem = clCreateFromD3D10Texture2DKHR(
+            context,
+            0,
+            pTexture,
+            subResourceInfo[i].subResource,
+            &result);
+        TestRequire(result == CL_SUCCESS, "clCreateFromD3D10Texture2DKHR failed");
+
+        // query resource pointer and verify
+        ID3D10Resource* clResource = NULL;
+        result = clGetMemObjectInfo(
+            subResourceInfo[i].mem,
+            CL_MEM_D3D10_RESOURCE_KHR,
+            sizeof(clResource),
+            &clResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetMemObjectInfo for CL_MEM_D3D10_RESOURCE_KHR failed.");
+        TestRequire(clResource == pTexture, "clGetMemObjectInfo for CL_MEM_D3D10_RESOURCE_KHR returned incorrect value.");
+
+        // query subresource and verify
+        UINT clSubResource;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_D3D10_SUBRESOURCE_KHR,
+            sizeof(clSubResource),
+            &clSubResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_D3D10_SUBRESOURCE_KHR failed");
+        TestRequire(clSubResource == subResourceInfo[i].subResource, "clGetImageInfo for CL_IMAGE_D3D10_SUBRESOURCE_KHR returned incorrect value.");
+
+        // query format and verify
+        cl_image_format clFormat;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_FORMAT,
+            sizeof(clFormat),
+            &clFormat,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_FORMAT failed");
+        TestRequire(clFormat.image_channel_order == format->channel_order, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel order.");
+        TestRequire(clFormat.image_channel_data_type == format->channel_type, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel data type.");
+
+        // query width
+        size_t width;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_WIDTH,
+            sizeof(width),
+            &width,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_WIDTH failed");
+        TestRequire(width == subResourceInfo[i].width, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+        // query height
+        size_t height;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_HEIGHT,
+            sizeof(height),
+            &height,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_HEIGHT failed");
+        TestRequire(height == subResourceInfo[i].height, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+    }
+
+    // acquire the resources for OpenCL
+    for (UINT i = 0; i < 2; ++i)
+    {
+        cl_uint memCount = 0;
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+
+        // cut the registered sub-resources into two sets and send the acquire calls for them separately
+        if (i == 0)
+        {
+            for(UINT j = 0; j < size->SubResourceCount/2; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        else
+        {
+            for(UINT j = size->SubResourceCount/2; j < size->SubResourceCount; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        if (!memCount) continue;
+
+        // do the acquire
+        result = clEnqueueAcquireD3D10ObjectsKHR(
+            command_queue,
+            memCount,
+            memToAcquire,
+            0,
+            NULL,
+            &events[0+i]);
+        TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D10ObjectsKHR failed.");
+        TestRequire(events[0+i], "clEnqueueAcquireD3D10ObjectsKHR did not return an event.");
+
+        // make sure the event type is correct
+        cl_uint eventType = 0;
+        result = clGetEventInfo(
+            events[0+i],
+            CL_EVENT_COMMAND_TYPE,
+            sizeof(eventType),
+            &eventType,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetEventInfo for event created by clEnqueueAcquireD3D10ObjectsKHR failed.");
+        TestRequire(eventType == CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR, "clGetEventInfo for CL_EVENT_COMMAND_TYPE was not CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR.");
+    }
+
+    // download the data using OpenCL & compare with the expected results
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // copy (0,0) to (1,1) and (w-1,h-1) to (w-2,h-2) using a kernel
+        {
+            result = clSetKernelArg(
+                kernel,
+                0,
+                sizeof(cl_mem),
+                (void *)&subResourceInfo[i].mem);
+            result = clSetKernelArg(
+                kernel,
+                1,
+                sizeof(cl_mem),
+                (void *)&subResourceInfo[i].mem);
+
+            TestRequire(CL_SUCCESS == result, "clSetKernelArg failed");
+
+            size_t localWorkSize[] = {1};
+            size_t globalWorkSize[] = {1};
+            result = clEnqueueNDRangeKernel(
+                command_queue,
+                kernel,
+                1,
+                NULL,
+                globalWorkSize,
+                localWorkSize,
+                0,
+                NULL,
+                NULL);
+            TestRequire(CL_SUCCESS == result, "clEnqueueNDRangeKernel failed");
+        }
+        // copy (w-1,0) to (w-2,1) and (0,h) to (1,h-2) using a memcpy
+        for (UINT x = 0; x < 2; ++x)
+        for (UINT y = 0; y < 2; ++y)
+        {
+            if (x == y)
+            {
+                continue;
+            }
+
+            size_t src[3] =
+            {
+                x ? subResourceInfo[i].width  - 1 : 0,
+                y ? subResourceInfo[i].height - 1 : 0,
+                0,
+            };
+            size_t dst[3] =
+            {
+                x ? subResourceInfo[i].width  - 2 : 1,
+                y ? subResourceInfo[i].height - 2 : 1,
+                0,
+            };
+            size_t region[3] =
+            {
+                1,
+                1,
+                1,
+            };
+            result = clEnqueueCopyImage(
+                command_queue,
+                subResourceInfo[i].mem,
+                subResourceInfo[i].mem,
+                src,
+                dst,
+                region,
+                0,
+                NULL,
+                NULL);
+            TestRequire(result == CL_SUCCESS, "clEnqueueCopyImage failed.");
+        }
+    }
+
+    // release the resource from OpenCL
+    for (UINT i = 0; i < 2; ++i)
+    {
+        cl_uint memCount = 0;
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+
+        // cut the registered sub-resources into two sets and send the release calls for them separately
+        if (i == 0)
+        {
+            for(UINT j = size->SubResourceCount/4; j < size->SubResourceCount; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        else
+        {
+            for(UINT j = 0; j < size->SubResourceCount/4; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        if (!memCount) continue;
+
+        // do the release
+        result = clEnqueueReleaseD3D10ObjectsKHR(
+            command_queue,
+            memCount,
+            memToAcquire,
+            0,
+            NULL,
+            &events[2+i]);
+        TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D10ObjectsKHR failed.");
+        TestRequire(events[2+i], "clEnqueueReleaseD3D10ObjectsKHR did not return an event.");
+
+        // make sure the event type is correct
+        cl_uint eventType = 0;
+        result = clGetEventInfo(
+            events[2+i],
+            CL_EVENT_COMMAND_TYPE,
+            sizeof(eventType),
+            &eventType,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetEventInfo for event created by clEnqueueReleaseD3D10ObjectsKHR failed.");
+        TestRequire(eventType == CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR, "clGetEventInfo for CL_EVENT_COMMAND_TYPE was not CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR.");
+    }
+
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    {
+        // create the staging buffer
+        ID3D10Texture2D* pStagingBuffer = NULL;
+        {
+            D3D10_TEXTURE2D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.MipLevels  = 1;
+            desc.ArraySize  = 1;
+            desc.Format     = format->format;
+            desc.SampleDesc.Count = 1;
+            desc.SampleDesc.Quality = 0;
+            desc.Usage = D3D10_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture2D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "Failed to create staging buffer.");
+        }
+
+        // wipe out the staging buffer to make sure we don't get stale values
+        {
+            D3D10_MAPPED_TEXTURE2D mappedTexture;
+            hr = pStagingBuffer->Map(
+                0,
+                D3D10_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+            memset(mappedTexture.pData, 0, format->bytesPerPixel);
+            pStagingBuffer->Unmap(0);
+        }
+
+        // copy the pixel to the staging buffer
+        {
+            D3D10_BOX box = {0};
+            box.left    = x ? subResourceInfo[i].width  - 2 : 1; box.right  = box.left + 1;
+            box.top     = y ? subResourceInfo[i].height - 2 : 1; box.bottom = box.top + 1;
+            box.front   = 0;                                     box.back   = 1;
+            pDevice->CopySubresourceRegion(
+                pStagingBuffer,
+                0,
+                0,
+                0,
+                0,
+                pTexture,
+                subResourceInfo[i].subResource,
+                &box);
+        }
+
+        // make sure we read back what was written next door
+        {
+            D3D10_MAPPED_TEXTURE2D mappedTexture;
+            hr = pStagingBuffer->Map(
+                0,
+                D3D10_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+
+            /*
+            // This can be helpful in debugging...
+            printf("\n");
+            for (UINT k = 0; k < format->bytesPerPixel; ++k)
+            {
+                printf("[%c %c]\n",
+                    texture2DPatterns[x][y][k],
+                    ( (char *)mappedTexture.pData )[k]);
+            }
+            */
+
+            TestRequire(
+                !memcmp(mappedTexture.pData, texture2DPatterns[x][y], format->bytesPerPixel),
+                "Failed to map staging buffer");
+
+            pStagingBuffer->Unmap(0);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+
+Cleanup:
+
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        clReleaseMemObject(subResourceInfo[i].mem);
+    }
+    for (UINT i = 0; i < 4; ++i)
+    {
+        if (events[i])
+        {
+            result = clReleaseEvent(events[i]);
+            TestRequire(result == CL_SUCCESS, "clReleaseEvent for event failed.");
+        }
+    }
+
+    HarnessD3D10_TestEnd();
+}
+
+void TestDeviceTexture2D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice)
+{
+    cl_int result = CL_SUCCESS;
+    cl_kernel kernels[3] = {NULL, NULL, NULL};
+
+    const char *sourceRaw =
+        " \
+        __kernel void texture2D\n\
+        ( \n\
+            __read_only  image2d_t texIn, \n\
+            __write_only image2d_t texOut \n\
+        ) \n\
+        { \n\
+            const sampler_t smp = CLK_FILTER_NEAREST; \n\
+            %s value;  \n\
+            int2 coordIn;  \n\
+            int2 coordOut; \n\
+            int w = get_image_width(texIn); \n\
+            int h = get_image_height(texIn); \n\
+            \n\
+            coordIn  = (int2)(0, 0); \n\
+            coordOut = (int2)(1, 1); \n\
+            value = read_image%s(texIn, smp, coordIn); \n\
+            write_image%s(texOut, coordOut, value); \n\
+            \n\
+            coordIn  = (int2)(w-1, h-1); \n\
+            coordOut = (int2)(w-2, h-2); \n\
+            value = read_image%s(texIn, smp, coordIn); \n\
+            write_image%s(texOut, coordOut, value); \n\
+        } \n\
+        ";
+
+    char source[2048];
+    sprintf(source, sourceRaw, "float4", "f", "f", "f", "f");
+    result = HarnessD3D10_CreateKernelFromSource(&kernels[0], device, context, source, "texture2D");
+    TestRequire(CL_SUCCESS == result, "HarnessD3D10_CreateKernelFromSource failed.");
+
+    sprintf(source, sourceRaw, "uint4", "ui", "ui", "ui", "ui");
+    result = HarnessD3D10_CreateKernelFromSource(&kernels[1], device, context, source, "texture2D");
+    TestRequire(CL_SUCCESS == result, "HarnessD3D10_CreateKernelFromSource failed.");
+
+    sprintf(source, sourceRaw, "int4", "i", "i", "i", "i");
+    result = HarnessD3D10_CreateKernelFromSource(&kernels[2], device, context, source, "texture2D");
+    TestRequire(CL_SUCCESS == result, "HarnessD3D10_CreateKernelFromSource failed.");
+
+    for (UINT format = 0, size = 0; format < formatCount; ++size, ++format)
+    {
+        SubTestTexture2D(
+            context,
+            command_queue,
+            kernels[formats[format].generic],
+            pDevice,
+            &formats[format],
+            &texture2DSizes[size % texture2DSizeCount]);
+    }
+
+Cleanup:
+
+    for (UINT i = 0; i < 3; ++i)
+    {
+        if (kernels[i])
+        {
+            clReleaseKernel(kernels[i]);
+        }
+    }
+}
+
diff --git a/test_conformance/d3d10/texture3d.cpp b/test_conformance/d3d10/texture3d.cpp
new file mode 100644
index 00000000..39bcff47
--- /dev/null
+++ b/test_conformance/d3d10/texture3d.cpp
@@ -0,0 +1,491 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define _CRT_SECURE_NO_WARNINGS
+#include "harness.h"
+
+Texture3DSize texture3DSizes[] =
+{
+    {
+        4, // Width
+        4, // Height
+        4, // Depth
+        1, // MipLevels
+        1, // SubResourceCount
+        {  // SubResources
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+    {
+        127, // Width
+        25, // Height
+        33, // Depth
+        1, // MipLevels
+        1, // SubResourceCount
+        {  // SubResources
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+    {
+        128, // Width
+        256, // Height
+        64, // Depth
+        4, // MipLevels
+        3, // SubResourceCount
+        {  // SubResources
+            { 2 }, // MipLevel
+            { 1 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+    {
+        512, // Width
+         64, // Height
+         32, // Depth
+        3, // MipLevels
+        1, // SubResourceCount
+        {  // SubResources
+            { 2 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+};
+UINT texture3DSizeCount = sizeof(texture3DSizes)/sizeof(texture3DSizes[0]);
+
+const char *
+texture3DPatterns[2][2][2] =
+{
+    {
+        {"PlaceTheCasseroleDis", "hInAColdOvenPlaceACh"},
+        {"airFacingTheOvenAndS", "itInItForeverThinkAb"},
+    },
+    {
+        {"outHowHungryYouAreWh", "enNightFallsDoNotTur"},
+        {"nOnTheLightMyEyeBeca", "meInflamedIHateCamus"},
+    },
+};
+
+void SubTestTexture3D(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice,
+    const TextureFormat* format,
+    const Texture3DSize* size)
+{
+    ID3D10Texture3D* pTexture = NULL;
+    HRESULT hr = S_OK;
+
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D10_TestBegin("3D Texture: Format=%s, Width=%d, Height=%d, Depth=%d, MipLevels=%d",
+        format->name_format,
+        size->Width,
+        size->Height,
+        size->Depth,
+        size->MipLevels);
+
+    struct
+    {
+        cl_mem mem;
+        UINT subResource;
+        UINT width;
+        UINT height;
+        UINT depth;
+    }
+    subResourceInfo[4];
+
+    // create the D3D10 resources
+    {
+        D3D10_TEXTURE3D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = size->Width;
+        desc.Height     = size->Height;
+        desc.Depth      = size->Depth;
+        desc.MipLevels  = size->MipLevels;
+        desc.Format     = format->format;
+        desc.Usage = D3D10_USAGE_DEFAULT;
+        desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture3D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "CreateTexture3D failed.");
+    }
+
+    // initialize some useful variables
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // compute the expected values for the subresource
+        subResourceInfo[i].subResource = size->subResources[i].MipLevel;
+        subResourceInfo[i].width = size->Width;
+        subResourceInfo[i].height = size->Height;
+        subResourceInfo[i].depth = size->Depth;
+        for (UINT j = 0; j < size->subResources[i].MipLevel; ++j) {
+            subResourceInfo[i].width /= 2;
+            subResourceInfo[i].height /= 2;
+            subResourceInfo[i].depth /= 2;
+        }
+    }
+
+    // copy a pattern into the corners of the image, coordinates
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    for (UINT z = 0; z < 2; ++z)
+    {
+        // create the staging buffer
+        ID3D10Texture3D* pStagingBuffer = NULL;
+        {
+            D3D10_TEXTURE3D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.Depth      = 1;
+            desc.MipLevels  = 1;
+            desc.Format     = format->format;
+            desc.Usage = D3D10_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture3D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "CreateTexture3D failed.");
+        }
+
+        // write the data to the staging buffer
+        {
+            D3D10_MAPPED_TEXTURE3D mappedTexture;
+            hr = pStagingBuffer->Map(
+                0,
+                D3D10_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            memcpy(mappedTexture.pData, texture3DPatterns[x][y][z], format->bytesPerPixel);
+            pStagingBuffer->Unmap(0);
+        }
+
+        // copy the data to to the texture
+        {
+            D3D10_BOX box = {0};
+            box.front   = 0; box.back    = 1;
+            box.top     = 0; box.bottom  = 1;
+            box.left    = 0; box.right   = 1;
+            pDevice->CopySubresourceRegion(
+                pTexture,
+                subResourceInfo[i].subResource,
+                x ? subResourceInfo[i].width  - 1 : 0,
+                y ? subResourceInfo[i].height - 1 : 0,
+                z ? subResourceInfo[i].depth  - 1 : 0,
+                pStagingBuffer,
+                0,
+                &box);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+    // create the cl_mem objects for the resources and verify its sanity
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // create a cl_mem for the resource
+        subResourceInfo[i].mem = clCreateFromD3D10Texture3DKHR(
+            context,
+            0,
+            pTexture,
+            subResourceInfo[i].subResource,
+            &result);
+        TestRequire(result == CL_SUCCESS, "clCreateFromD3D10Texture3DKHR failed");
+
+        // query resource pointer and verify
+        ID3D10Resource* clResource = NULL;
+        result = clGetMemObjectInfo(
+            subResourceInfo[i].mem,
+            CL_MEM_D3D10_RESOURCE_KHR,
+            sizeof(clResource),
+            &clResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetMemObjectInfo for CL_MEM_D3D10_RESOURCE_KHR failed.");
+        TestRequire(clResource == pTexture, "clGetMemObjectInfo for CL_MEM_D3D10_RESOURCE_KHR returned incorrect value.");
+
+        // query subresource and verify
+        UINT clSubResource;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_D3D10_SUBRESOURCE_KHR,
+            sizeof(clSubResource),
+            &clSubResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_D3D10_SUBRESOURCE_KHR failed");
+        TestRequire(clSubResource == subResourceInfo[i].subResource, "clGetImageInfo for CL_IMAGE_D3D10_SUBRESOURCE_KHR returned incorrect value.");
+
+        // query format and verify
+        cl_image_format clFormat;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_FORMAT,
+            sizeof(clFormat),
+            &clFormat,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_FORMAT failed");
+        TestRequire(clFormat.image_channel_order == format->channel_order, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel order.");
+        TestRequire(clFormat.image_channel_data_type == format->channel_type, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel data type.");
+
+        // query width
+        size_t width;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_WIDTH,
+            sizeof(width),
+            &width,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_WIDTH failed");
+        TestRequire(width == subResourceInfo[i].width, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+        // query height
+        size_t height;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_HEIGHT,
+            sizeof(height),
+            &height,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_HEIGHT failed");
+        TestRequire(height == subResourceInfo[i].height, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+        // query depth
+        size_t depth;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_DEPTH,
+            sizeof(depth),
+            &depth,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_DEPTH failed");
+        TestRequire(depth == subResourceInfo[i].depth, "clGetImageInfo for CL_IMAGE_DEPTH returned incorrect value.");
+
+    }
+
+    // acquire the resources for OpenCL
+    {
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+
+        // cut the registered sub-resources into two sets and send the acquire calls for them separately
+        for(UINT i = 0; i < size->SubResourceCount; ++i)
+        {
+            memToAcquire[i] = subResourceInfo[i].mem;
+        }
+
+        // do the acquire
+        result = clEnqueueAcquireD3D10ObjectsKHR(
+            command_queue,
+            size->SubResourceCount,
+            memToAcquire,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D10ObjectsKHR failed.");
+    }
+
+    // download the data using OpenCL & compare with the expected results
+    // copy the corners of the image into the image
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    for (UINT z = 0; z < 2; ++z)
+    {
+        if (x == y && y == z && 0)
+        {
+            continue;
+        }
+        size_t src[3] =
+        {
+            x ? subResourceInfo[i].width  - 1 : 0,
+            y ? subResourceInfo[i].height - 1 : 0,
+            z ? subResourceInfo[i].depth  - 1 : 0,
+        };
+        size_t dst[3] =
+        {
+            x ? subResourceInfo[i].width  - 2 : 1,
+            y ? subResourceInfo[i].height - 2 : 1,
+            z ? subResourceInfo[i].depth  - 2 : 1,
+        };
+        size_t region[3] =
+        {
+            1,
+            1,
+            1,
+        };
+        result = clEnqueueCopyImage(
+            command_queue,
+            subResourceInfo[i].mem,
+            subResourceInfo[i].mem,
+            src,
+            dst,
+            region,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueCopyImage failed.");
+    }
+
+    // release the resource from OpenCL
+    {
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+        for(UINT i = 0; i < size->SubResourceCount; ++i)
+        {
+            memToAcquire[i] = subResourceInfo[i].mem;
+        }
+
+        // do the release
+        result = clEnqueueReleaseD3D10ObjectsKHR(
+            command_queue,
+            size->SubResourceCount,
+            memToAcquire,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D10ObjectsKHR failed.");
+    }
+
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    for (UINT z = 0; z < 2; ++z)
+    {
+        // create the staging buffer
+        ID3D10Texture3D* pStagingBuffer = NULL;
+        {
+            D3D10_TEXTURE3D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.Depth      = 1;
+            desc.MipLevels  = 1;
+            desc.Format     = format->format;
+            desc.Usage = D3D10_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture3D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "Failed to create staging buffer.");
+        }
+
+        // wipe out the staging buffer to make sure we don't get stale values
+        {
+            D3D10_MAPPED_TEXTURE3D mappedTexture;
+            hr = pStagingBuffer->Map(
+                0,
+                D3D10_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+            memset(mappedTexture.pData, 0, format->bytesPerPixel);
+            pStagingBuffer->Unmap(0);
+        }
+
+        // copy the pixel to the staging buffer
+        {
+            D3D10_BOX box = {0};
+            box.left    = x ? subResourceInfo[i].width  - 2 : 1; box.right  = box.left  + 1;
+            box.top     = y ? subResourceInfo[i].height - 2 : 1; box.bottom = box.top   + 1;
+            box.front   = z ? subResourceInfo[i].depth  - 2 : 1; box.back   = box.front + 1;
+            pDevice->CopySubresourceRegion(
+                pStagingBuffer,
+                0,
+                0,
+                0,
+                0,
+                pTexture,
+                subResourceInfo[i].subResource,
+                &box);
+        }
+
+        // make sure we read back what was written next door
+        {
+            D3D10_MAPPED_TEXTURE3D mappedTexture;
+            hr = pStagingBuffer->Map(
+                0,
+                D3D10_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+
+            /*
+            // This can be helpful in debugging...
+            printf("\n");
+            for (UINT k = 0; k < format->bytesPerPixel; ++k)
+            {
+                printf("[%c %c]\n",
+                    texture2DPatterns[x][y][k],
+                    ( (char *)mappedTexture.pData )[k]);
+            }
+            */
+
+            TestRequire(
+                !memcmp(mappedTexture.pData, texture3DPatterns[x][y][z], format->bytesPerPixel),
+                "Failed to map staging buffer");
+
+            pStagingBuffer->Unmap(0);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+
+Cleanup:
+
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        clReleaseMemObject(subResourceInfo[i].mem);
+    }
+
+    HarnessD3D10_TestEnd();
+}
+
+
+void TestDeviceTexture3D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D10Device* pDevice)
+{
+    cl_int result = CL_SUCCESS;
+
+
+    for (UINT format = 0, size = 0; format < formatCount; ++size, ++format)
+    {
+        SubTestTexture3D(
+            context,
+            command_queue,
+            pDevice,
+            &formats[format],
+            &texture3DSizes[size % texture3DSizeCount]);
+    }
+}
+
diff --git a/test_conformance/d3d11/CMakeLists.txt b/test_conformance/d3d11/CMakeLists.txt
new file mode 100644
index 00000000..21a035ed
--- /dev/null
+++ b/test_conformance/d3d11/CMakeLists.txt
@@ -0,0 +1,47 @@
+if(WIN32)
+
+set(D3D11_INCLUDE_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Include)
+
+if(${ARCH} STREQUAL "i686")
+set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x86)
+endif(${ARCH} STREQUAL "i686")
+
+if(${ARCH} STREQUAL "x86_64")
+set(D3D11_LIB_DIR $ENV{NV_TOOLS}/sdk/DirectX_Aug2009/Lib/x64)
+endif(${ARCH} STREQUAL "x86_64")
+
+list(APPEND CLConform_INCLUDE_DIR ${D3D11_INCLUDE_DIR})
+include_directories (${CLConform_SOURCE_DIR}/test_common/harness
+                     ${CLConform_INCLUDE_DIR} )
+link_directories(${CL_LIB_DIR}, ${D3D11_LIB_DIR})
+
+list(APPEND CLConform_LIBRARIES d3d11 dxgi)
+
+
+set(D3D11_SOURCES
+    buffer.cpp
+    texture2d.cpp
+    texture3d.cpp
+    misc.cpp
+    main.cpp
+    harness.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp)
+
+add_executable(conformance_test_d3d11
+    ${D3D11_SOURCES})
+
+set_source_files_properties(
+    ${D3D11_SOURCES}
+    PROPERTIES LANGUAGE CXX)
+
+TARGET_LINK_LIBRARIES(conformance_test_d3d11
+    ${CLConform_LIBRARIES})
+
+endif(WIN32)
diff --git a/test_conformance/d3d11/buffer.cpp b/test_conformance/d3d11/buffer.cpp
new file mode 100644
index 00000000..e10e45ca
--- /dev/null
+++ b/test_conformance/d3d11/buffer.cpp
@@ -0,0 +1,321 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness.h"
+
+#define ADD_BUFFER_PROPERTIES(w, x, y, z) \
+    { w, x, y, z, #x, #y, #z, }
+
+BufferProperties bufferProperties[] =
+{
+    ADD_BUFFER_PROPERTIES(     0x110, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(    0x1100, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(    0x8000, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE),
+
+    ADD_BUFFER_PROPERTIES(   0x7FFFF, D3D11_BIND_SHADER_RESOURCE, D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(  0x110000, D3D11_BIND_SHADER_RESOURCE, D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(  0x110000, D3D11_BIND_STREAM_OUTPUT,   D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(  0x110001, D3D11_BIND_STREAM_OUTPUT,   D3D11_USAGE_DEFAULT, 0),
+
+    ADD_BUFFER_PROPERTIES(      0x11, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(      0x11, D3D11_BIND_INDEX_BUFFER,    D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(     0x121, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(    0x1234, D3D11_BIND_INDEX_BUFFER,    D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES(   0x12345, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE),
+    ADD_BUFFER_PROPERTIES(  0x123456, D3D11_BIND_INDEX_BUFFER,    D3D11_USAGE_DEFAULT, 0),
+#if 0 // avoid large sizes on automation
+    ADD_BUFFER_PROPERTIES( 0x1234567, D3D11_BIND_INDEX_BUFFER,    D3D11_USAGE_DYNAMIC, D3D11_CPU_ACCESS_WRITE),
+
+    ADD_BUFFER_PROPERTIES( 0x4000000, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000004, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000008, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000011, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DEFAULT, 0),
+    ADD_BUFFER_PROPERTIES( 0x4000014, D3D11_BIND_VERTEX_BUFFER,   D3D11_USAGE_DEFAULT, 0),
+#endif
+};
+UINT bufferPropertyCount = sizeof(bufferProperties)/sizeof(bufferProperties[0]);
+
+void SubTestBuffer(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC,
+    const BufferProperties* props)
+{
+    ID3D11Buffer* pBuffer = NULL;
+    HRESULT hr = S_OK;
+    cl_mem mem = NULL;
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D11_TestBegin("Buffer: Size=%d, BindFlags=%s, Usage=%s, CPUAccess=%s",
+        props->ByteWidth,
+        props->name_BindFlags,
+        props->name_Usage,
+        props->name_CPUAccess);
+
+    // create the D3D11 resource
+    {
+        D3D11_BUFFER_DESC desc = {0};
+        desc.ByteWidth = props->ByteWidth;
+        desc.Usage = props->Usage;
+        desc.CPUAccessFlags = props->CPUAccess;
+        desc.BindFlags = props->BindFlags;
+        desc.MiscFlags = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating vertex buffer failed!");
+    }
+
+    // populate the D3D11 resource with data
+    {
+        ID3D11Buffer* pStagingBuffer = NULL;
+        char *pStagingData = NULL;
+        D3D11_MAPPED_SUBRESOURCE map = {0};
+
+        // create a staging buffer to use to copy data to the D3D buffer
+        D3D11_BUFFER_DESC desc = {0};
+        desc.ByteWidth      = 16;
+        desc.Usage          = D3D11_USAGE_STAGING;
+        desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE|D3D11_CPU_ACCESS_READ;
+        desc.BindFlags      = 0;
+        desc.MiscFlags      = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pStagingBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating staging vertex buffer failed!");
+
+        // populate the staging buffer
+        hr = pDC->Map(
+            pStagingBuffer,
+            0,
+            D3D11_MAP_READ_WRITE,
+            0,
+            &map);
+        TestRequire(SUCCEEDED(hr), "Map failed!");
+        memcpy(map.pData, "abcdXXXXxxxx1234", 16);
+        pDC->Unmap(pStagingBuffer, 0);
+        TestRequire(SUCCEEDED(hr), "Unmap failed!");
+
+        // copy 'abcdXXXX' to the front of the buffer and 'xxxx1234' to the back
+        D3D11_BOX box = {0};
+        box.front   = 0;
+        box.back    = 1;
+        box.top     = 0;
+        box.bottom  = 1;
+
+        box.left    = 0;
+        box.right   = 8;
+        pDC->CopySubresourceRegion(
+            pBuffer,
+            0,
+            0,
+            0,
+            0,
+            pStagingBuffer,
+            0,
+            &box);
+        box.left    = 8;
+        box.right   = 16;
+        pDC->CopySubresourceRegion(
+            pBuffer,
+            0,
+            props->ByteWidth-8,
+            0,
+            0,
+            pStagingBuffer,
+            0,
+            &box);
+        pStagingBuffer->Release();
+    }
+
+    // share the resource with OpenCL
+    {
+        mem = clCreateFromD3D11BufferKHR(
+            context,
+            0,
+            pBuffer,
+            &result);
+        TestRequire(CL_SUCCESS == result, "clCreateFromD3D11BufferKHR failed");
+    }
+
+    // validate the OpenCL mem obj's properties
+    {
+        ID3D11Resource* clResource = NULL;
+        result = clGetMemObjectInfo(
+            mem,
+            CL_MEM_D3D11_RESOURCE_KHR,
+            sizeof(clResource),
+            &clResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetMemObjectInfo for CL_MEM_D3D11_RESOURCE_KHR failed.");
+        TestRequire(clResource == pBuffer, "clGetMemObjectInfo for CL_MEM_D3D11_RESOURCE_KHR returned incorrect value.");
+    }
+
+    // acquire the resource from OpenCL
+    {
+        result = clEnqueueAcquireD3D11ObjectsKHR(
+            command_queue,
+            1,
+            &mem,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D11ObjectsKHR failed.");
+    }
+
+    // read+write data from the buffer in OpenCL
+    {
+        // overwrite the 'XXXX' with '1234' and the 'xxxx' with 'abcd' so we now have
+        // 'abcd1234' at the beginning and end of the buffer
+        result = clEnqueueCopyBuffer(
+            command_queue,
+            mem,
+            mem,
+            0,
+            props->ByteWidth-8,
+            4,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueCopyBuffer failed.");
+
+        result = clEnqueueCopyBuffer(
+            command_queue,
+            mem,
+            mem,
+            props->ByteWidth-4,
+            4,
+            4,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueCopyBuffer failed.");
+    }
+
+    // release the resource from OpenCL
+    {
+        result = clEnqueueReleaseD3D11ObjectsKHR(
+            command_queue,
+            1,
+            &mem,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D11ObjectsKHR failed.");
+    }
+
+    // read data in D3D
+    {
+        ID3D11Buffer* pStagingBuffer = NULL;
+        char *pStagingData = NULL;
+        D3D11_MAPPED_SUBRESOURCE map = {0};
+
+        // create a staging buffer to read the data back
+        D3D11_BUFFER_DESC desc = {0};
+        desc.ByteWidth      = 16;
+        desc.Usage          = D3D11_USAGE_STAGING;
+        desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE|D3D11_CPU_ACCESS_READ;
+        desc.BindFlags      = 0;
+        desc.MiscFlags      = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pStagingBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating staging vertex buffer failed!");
+
+        // make sure the staging buffer doesn't get stale data
+        hr = pDC->Map(
+            pStagingBuffer,
+            0,
+            D3D11_MAP_READ_WRITE,
+            0,
+            &map);
+        TestRequire(SUCCEEDED(hr), "Map failed!");
+        memset(map.pData, 0, 16);
+        pDC->Unmap(pStagingBuffer, 0);
+        TestRequire(SUCCEEDED(hr), "Unmap failed!");
+
+        // copy the 'abcd1234' from the front and back of the buffer to the staging buffer
+        D3D11_BOX box = {0};
+        box.front   = 0;
+        box.back    = 1;
+        box.top     = 0;
+        box.bottom  = 1;
+
+        box.left    = 0;
+        box.right   = 8;
+        pDC->CopySubresourceRegion(
+            pStagingBuffer,
+            0,
+            0,
+            0,
+            0,
+            pBuffer,
+            0,
+            &box);
+        box.left    = props->ByteWidth-8;
+        box.right   = props->ByteWidth;
+        pDC->CopySubresourceRegion(
+            pStagingBuffer,
+            0,
+            8,
+            0,
+            0,
+            pBuffer,
+            0,
+            &box);
+        TestRequire(SUCCEEDED(hr), "CopySubresourceRegion failed!");
+
+        // verify that we got the 'abcd1234'
+        hr = pDC->Map(
+            pStagingBuffer,
+            0,
+            D3D11_MAP_READ_WRITE,
+            0,
+            &map);
+        TestRequire(SUCCEEDED(hr), "Map failed!");
+        TestRequire(!memcmp(map.pData, "abcd1234abcd1234", 16), "Data was not accurately");
+        pDC->Unmap(pStagingBuffer, 0);
+        TestRequire(SUCCEEDED(hr), "Unmap failed!");
+
+        pStagingBuffer->Release();
+    }
+
+Cleanup:
+
+    if (pBuffer)
+    {
+        pBuffer->Release();
+    }
+    if (mem)
+    {
+        clReleaseMemObject(mem);
+    }
+
+    HarnessD3D11_TestEnd();
+}
+
+
+void TestDeviceBuffer(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC)
+{
+    for (UINT i = 0; i < bufferPropertyCount; ++i)
+    {
+        SubTestBuffer(
+            context,
+            command_queue,
+            pDevice,
+            pDC,
+            &bufferProperties[i]);
+    }
+}
+
diff --git a/test_conformance/d3d11/harness.cpp b/test_conformance/d3d11/harness.cpp
new file mode 100644
index 00000000..7d33b7f0
--- /dev/null
+++ b/test_conformance/d3d11/harness.cpp
@@ -0,0 +1,456 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define INITGUID
+#include "harness.h"
+
+#include <tchar.h>
+#include <string>
+#include <vector>
+
+/*
+ * OpenCL state
+ */
+
+clGetDeviceIDsFromD3D11KHR_fn      clGetDeviceIDsFromD3D11KHR      = NULL;
+clCreateFromD3D11BufferKHR_fn      clCreateFromD3D11BufferKHR      = NULL;
+clCreateFromD3D11Texture2DKHR_fn   clCreateFromD3D11Texture2DKHR   = NULL;
+clCreateFromD3D11Texture3DKHR_fn   clCreateFromD3D11Texture3DKHR   = NULL;
+clEnqueueAcquireD3D11ObjectsKHR_fn clEnqueueAcquireD3D11ObjectsKHR = NULL;
+clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR = NULL;
+
+#define INITPFN(x) \
+    x = (x ## _fn)clGetExtensionFunctionAddressForPlatform(platform, #x); NonTestRequire(x, "Failed to get function pointer for %s", #x);
+
+void
+HarnessD3D11_ExtensionCheck()
+{
+    cl_int result = CL_SUCCESS;
+    cl_platform_id platform = NULL;
+
+    HarnessD3D11_TestBegin("Extension query");
+
+    bool platform_d3d11 = false; // Does platform support the extension?
+    {
+        std::vector< char > buffer;
+        size_t size = 0;
+        result = clGetPlatformIDs( 1, &platform, NULL );
+            NonTestRequire( result == CL_SUCCESS, "Failed to get any platforms." );
+        result = clGetPlatformInfo( platform, CL_PLATFORM_EXTENSIONS, 0, NULL, & size );
+            NonTestRequire( result == CL_SUCCESS, "Failed to get size of extension string." );
+        buffer.resize( size );
+        result = clGetPlatformInfo( platform, CL_PLATFORM_EXTENSIONS, buffer.size(), & buffer.front(), & size );
+            NonTestRequire( result == CL_SUCCESS, "Failed to get extension string." );
+        std::string extensions = std::string( " " ) + & buffer.front() + " ";
+        platform_d3d11 = ( extensions.find( " cl_khr_d3d11_sharing " ) != std::string::npos );
+    }
+
+    // Platform is required to report the extension only if all devices support it,
+    // so let us iterate through all the devices and count devices supporting the extension.
+
+    // Get list of all devices.
+    std::vector< cl_device_id > devices;
+    cl_uint num_devices = 0;
+    result = clGetDeviceIDs( platform, CL_DEVICE_TYPE_ALL, 0, NULL, & num_devices );
+        NonTestRequire( result == CL_SUCCESS, "Failed to get number of devices." );
+    devices.resize( num_devices );
+    result = clGetDeviceIDs( platform, CL_DEVICE_TYPE_ALL, devices.size(), & devices.front(), & num_devices );
+        NonTestRequire( result == CL_SUCCESS, "Failed to get list of device ids." );
+        NonTestRequire( num_devices == devices.size(), "Failed to get list of device ids." );
+
+    // Iterate through the devices and count devices supporting the extension.
+    cl_uint num_devices_d3d11 = 0; // Number of devices supporting cl_khr_d3d11_sharing.
+    for ( cl_uint i = 0; i < devices.size(); ++ i )
+    {
+        std::vector< char > buffer;
+        size_t size = 0;
+        result = clGetDeviceInfo( devices[ i ], CL_DEVICE_EXTENSIONS, 0, NULL, & size );
+            NonTestRequire( result == CL_SUCCESS, "Failed to get size of extension string." );
+        buffer.resize( size );
+        result = clGetDeviceInfo( devices[ i ], CL_DEVICE_EXTENSIONS, buffer.size(), & buffer.front(), & size );
+            NonTestRequire( result == CL_SUCCESS, "Failed to get extension string." );
+        std::string extensions = std::string( " " ) + & buffer.front() + " ";
+        if ( extensions.find( " cl_khr_d3d11_sharing " ) != std::string::npos )
+        {
+            ++ num_devices_d3d11;
+        }
+    }
+
+    OSVERSIONINFO osvi;
+    osvi.dwOSVersionInfoSize = sizeof(osvi);
+    GetVersionEx(&osvi);
+    if (osvi.dwMajorVersion <= 5)
+    {
+        // Neither platform nor devices should declare support.
+        TestRequire( ! platform_d3d11, "Platform should not declare extension on Windows < 6" );
+        TestRequire( num_devices_d3d11 == 0, "Devices should not declare extension on Windows < 6" );
+    }
+    else
+    {
+        if ( num_devices_d3d11 == num_devices )
+        {
+            // All the devices declare support, so platform must declare support as well.
+            TestRequire( platform_d3d11, "Extension should be exported on Windows >= 6" );
+        }
+        else
+        {
+            // Not all the devices support th eextension => platform should not declare it.
+            TestRequire( ! platform_d3d11, "Extension should not be exported on Windows >= 6" );
+        }
+    }
+
+Cleanup:
+    HarnessD3D11_TestEnd();
+
+    // early-out of the extension is not present
+    if ( num_devices_d3d11 == 0 )
+    {
+        HarnessD3D11_TestStats();
+    }
+}
+
+void
+HarnessD3D11_Initialize(cl_platform_id platform)
+{
+    HarnessD3D11_ExtensionCheck();
+
+    // extract function pointers for exported functions
+    INITPFN(clGetDeviceIDsFromD3D11KHR);
+    INITPFN(clCreateFromD3D11BufferKHR);
+    INITPFN(clCreateFromD3D11Texture2DKHR);
+    INITPFN(clCreateFromD3D11Texture3DKHR);
+    INITPFN(clEnqueueAcquireD3D11ObjectsKHR);
+    INITPFN(clEnqueueReleaseD3D11ObjectsKHR);
+}
+
+/*
+ * Window management
+ */
+
+static IDXGISwapChain*       HarnessD3D11_pSwapChain = NULL;
+static ID3D11Device*         HarnessD3D11_pDevice = NULL;
+static ID3D11DeviceContext*  HarnessD3D11_pDC = NULL;
+static HWND                  HarnessD3D11_hWnd = NULL;
+
+static LRESULT WINAPI HarnessD3D11_Proc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
+{
+    switch(msg)
+    {
+        case WM_KEYDOWN:
+            return 0;
+            break;
+        case WM_DESTROY:
+            HarnessD3D11_hWnd = NULL;
+            PostQuitMessage(0);
+            return 0;
+        case WM_PAINT:
+            ValidateRect(hWnd, NULL);
+            return 0;
+    }
+    return DefWindowProc(hWnd, msg, wParam, lParam);
+}
+
+static void HarnessD3D11_InteractiveLoop()
+{
+    MSG msg;
+    while(PeekMessage(&msg,HarnessD3D11_hWnd,0,0,PM_REMOVE))
+    {
+        TranslateMessage(&msg);
+        DispatchMessage(&msg);
+    }
+}
+
+cl_int HarnessD3D11_CreateDevice(
+    IDXGIAdapter* pAdapter,
+    ID3D11Device **ppDevice,
+    ID3D11DeviceContext** ppDC)
+{
+    HRESULT hr = S_OK;
+    unsigned int cuStatus = 1;
+
+    *ppDevice = NULL;
+
+    // create window
+    static WNDCLASSEX wc =
+    {
+        sizeof(WNDCLASSEX),
+        CS_CLASSDC,
+        HarnessD3D11_Proc,
+        0L,
+        0L,
+        GetModuleHandle(NULL),
+        NULL,
+        NULL,
+        NULL,
+        NULL,
+        _T( "cl_khr_d3d11_sharing_conformance" ),
+        NULL
+    };
+    RegisterClassEx(&wc);
+    HarnessD3D11_hWnd = CreateWindow(
+        _T( "cl_khr_d3d11_sharing_conformance" ),
+        _T( "cl_khr_d3d11_sharing_conformance" ),
+        WS_OVERLAPPEDWINDOW,
+        0, 0, 256, 256,
+        NULL,
+        NULL,
+        wc.hInstance,
+        NULL);
+    NonTestRequire(0 != HarnessD3D11_hWnd, "Failed to create window");
+
+    ShowWindow(HarnessD3D11_hWnd,SW_SHOWDEFAULT);
+    UpdateWindow(HarnessD3D11_hWnd);
+
+    RECT rc;
+    GetClientRect(HarnessD3D11_hWnd, &rc);
+    UINT width = rc.right - rc.left;
+    UINT height = rc.bottom - rc.top;
+
+    // Create device and swapchain
+    DXGI_SWAP_CHAIN_DESC sd;
+    ZeroMemory( &sd, sizeof(sd) );
+    sd.BufferCount = 1;
+    sd.BufferDesc.Width = width;
+    sd.BufferDesc.Height = height;
+    sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+    sd.BufferDesc.RefreshRate.Numerator = 60;
+    sd.BufferDesc.RefreshRate.Denominator = 1;
+    sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+    sd.OutputWindow = HarnessD3D11_hWnd;
+    sd.SampleDesc.Count = 1;
+    sd.SampleDesc.Quality = 0;
+    sd.Windowed = TRUE;
+    D3D_FEATURE_LEVEL requestedFeatureLevels[] = {D3D_FEATURE_LEVEL_10_0};
+    D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_11_0;
+    hr = D3D11CreateDeviceAndSwapChain(
+        NULL, // pAdapter,
+        D3D_DRIVER_TYPE_HARDWARE,
+        NULL,
+        0,
+        requestedFeatureLevels,
+        1,
+        D3D11_SDK_VERSION,
+        &sd,
+        &HarnessD3D11_pSwapChain,
+        &HarnessD3D11_pDevice,
+        &featureLevel,
+        &HarnessD3D11_pDC);
+    if (FAILED(hr) ) {
+        return CL_DEVICE_NOT_FOUND;
+    }
+
+    *ppDevice = HarnessD3D11_pDevice;
+    *ppDC = HarnessD3D11_pDC;
+    return CL_SUCCESS;
+}
+
+void HarnessD3D11_DestroyDevice()
+{
+    HarnessD3D11_pSwapChain->Release();
+    HarnessD3D11_pDevice->Release();
+    HarnessD3D11_pDC->Release();
+
+    if (HarnessD3D11_hWnd) DestroyWindow(HarnessD3D11_hWnd);
+    HarnessD3D11_hWnd = 0;
+}
+
+/*
+ *
+ * texture formats
+ *
+ */
+
+#define ADD_TEXTURE_FORMAT(x,y,z,a,b,g) { x, y, z, a*b/8, g, #x, #y, #z, }
+TextureFormat formats[] =
+{
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32B32A32_FLOAT , CL_RGBA , CL_FLOAT           , 32, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32B32A32_UINT  , CL_RGBA , CL_UNSIGNED_INT32  , 32, 4, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32B32A32_SINT  , CL_RGBA , CL_SIGNED_INT32    , 32, 4, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_FLOAT , CL_RGBA , CL_HALF_FLOAT      , 16, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_UNORM , CL_RGBA , CL_UNORM_INT16     , 16, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_UINT  , CL_RGBA , CL_UNSIGNED_INT16  , 16, 4, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_SNORM , CL_RGBA , CL_SNORM_INT16     , 16, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16B16A16_SINT  , CL_RGBA , CL_SIGNED_INT16    , 16, 4, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_UNORM     , CL_RGBA , CL_UNORM_INT8      ,  8, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_UINT      , CL_RGBA , CL_UNSIGNED_INT8   ,  8, 4, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_SNORM     , CL_RGBA , CL_SNORM_INT8      ,  8, 4, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8B8A8_SINT      , CL_RGBA , CL_SIGNED_INT8     ,  8, 4, TextureFormat::GENERIC_SINT  ),
+
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32_FLOAT       , CL_RG   , CL_FLOAT           , 32, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32_UINT        , CL_RG   , CL_UNSIGNED_INT32  , 32, 2, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32G32_SINT        , CL_RG   , CL_SIGNED_INT32    , 32, 2, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_FLOAT       , CL_RG   , CL_HALF_FLOAT      , 16, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_UNORM       , CL_RG   , CL_UNORM_INT16     , 16, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_UINT        , CL_RG   , CL_UNSIGNED_INT16  , 16, 2, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_SNORM       , CL_RG   , CL_SNORM_INT16     , 16, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16G16_SINT        , CL_RG   , CL_SIGNED_INT16    , 16, 2, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_UNORM         , CL_RG   , CL_UNORM_INT8      ,  8, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_UINT          , CL_RG   , CL_UNSIGNED_INT8   ,  8, 2, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_SNORM         , CL_RG   , CL_SNORM_INT8      ,  8, 2, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8G8_SINT          , CL_RG   , CL_SIGNED_INT8     ,  8, 2, TextureFormat::GENERIC_SINT  ),
+
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32_FLOAT          , CL_R    , CL_FLOAT           , 32, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32_UINT           , CL_R    , CL_UNSIGNED_INT32  , 32, 1, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R32_SINT           , CL_R    , CL_SIGNED_INT32    , 32, 1, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_FLOAT          , CL_R    , CL_HALF_FLOAT      , 16, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_UNORM          , CL_R    , CL_UNORM_INT16     , 16, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_UINT           , CL_R    , CL_UNSIGNED_INT16  , 16, 1, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_SNORM          , CL_R    , CL_SNORM_INT16     , 16, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R16_SINT           , CL_R    , CL_SIGNED_INT16    , 16, 1, TextureFormat::GENERIC_SINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_UNORM           , CL_R    , CL_UNORM_INT8      ,  8, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_UINT            , CL_R    , CL_UNSIGNED_INT8   ,  8, 1, TextureFormat::GENERIC_UINT  ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_SNORM           , CL_R    , CL_SNORM_INT8      ,  8, 1, TextureFormat::GENERIC_FLOAT ),
+    ADD_TEXTURE_FORMAT( DXGI_FORMAT_R8_SINT            , CL_R    , CL_SIGNED_INT8     ,  8, 1, TextureFormat::GENERIC_SINT  ),
+};
+UINT formatCount = sizeof(formats)/sizeof(formats[0]);
+
+/*
+ *
+ * Logging and error reporting
+ *
+ */
+
+static struct
+{
+    cl_int testCount;
+    cl_int passCount;
+
+    cl_int nonTestFailures;
+    cl_int inTest;
+    cl_int currentTestPass;
+
+    char currentTestName[1024];
+} HarnessD3D11_testStats = {0};
+
+void HarnessD3D11_TestBegin(const char* fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vsprintf(HarnessD3D11_testStats.currentTestName, fmt, ap);
+    va_end(ap);
+
+    printf("[%s] ... ", HarnessD3D11_testStats.currentTestName);
+
+    HarnessD3D11_testStats.inTest = 1;
+    HarnessD3D11_testStats.currentTestPass = 1;
+}
+
+void HarnessD3D11_TestFail()
+{
+    if (HarnessD3D11_testStats.inTest)
+    {
+        HarnessD3D11_testStats.currentTestPass = 0;
+    }
+    else
+    {
+        ++HarnessD3D11_testStats.nonTestFailures;
+    }
+}
+
+void HarnessD3D11_TestEnd()
+{
+    HarnessD3D11_testStats.inTest = 0;
+
+    HarnessD3D11_testStats.testCount += 1;
+    HarnessD3D11_testStats.passCount += HarnessD3D11_testStats.currentTestPass;
+
+    TestPrint("%s\n",
+        HarnessD3D11_testStats.currentTestPass ? "PASSED" : "FAILED");
+}
+
+void HarnessD3D11_TestStats()
+{
+    TestPrint("PASSED %d of %d tests.\n", HarnessD3D11_testStats.passCount, HarnessD3D11_testStats.testCount);
+    if (HarnessD3D11_testStats.testCount > HarnessD3D11_testStats.passCount)
+    {
+        TestPrint("***FAILED***\n");
+        exit(1);
+    }
+    else
+    {
+        TestPrint("&&&& cl_khr_d3d11_sharing test PASSED\n");
+    }
+    exit(0);
+}
+
+/*
+ *
+ * Helper function
+ *
+ */
+
+cl_int HarnessD3D11_CreateKernelFromSource(
+    cl_kernel *outKernel,
+    cl_device_id device,
+    cl_context context,
+    const char *source,
+    const char *entrypoint)
+{
+    cl_int status;
+    cl_kernel kernel = NULL;
+
+    // compile program
+    cl_program program = NULL;
+    {
+        const char *sourceTexts[] = {source};
+        size_t sourceLengths[] = {strlen(source) };
+
+        status = create_single_kernel_helper_create_program(context, &program, 1, &sourceTexts[0]);
+        TestRequire(
+            CL_SUCCESS == status,
+            "clCreateProgramWithSource failed");
+    }
+    status = clBuildProgram(
+        program,
+        0,
+        NULL,
+        NULL,
+        NULL,
+        NULL);
+    if (CL_SUCCESS != status)
+    {
+        char log[2048] = {0};
+        status = clGetProgramBuildInfo(
+            program,
+            device,
+            CL_PROGRAM_BUILD_LOG,
+            sizeof(log),
+            log,
+            NULL);
+        TestPrint("error: %s\n", log);
+        TestRequire(
+            CL_SUCCESS == status,
+            "Compilation error log:\n%s\n", log);
+    }
+
+    kernel = clCreateKernel(
+        program,
+        entrypoint,
+        &status);
+    TestRequire(
+        CL_SUCCESS == status,
+        "clCreateKernel failed");
+
+    clReleaseProgram(program);
+    *outKernel = kernel;
+
+Cleanup:
+
+    return CL_SUCCESS;
+}
+
+
+
diff --git a/test_conformance/d3d11/harness.h b/test_conformance/d3d11/harness.h
new file mode 100644
index 00000000..c1b66100
--- /dev/null
+++ b/test_conformance/d3d11/harness.h
@@ -0,0 +1,221 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _HARNESS_H_
+#define _HARNESS_H_
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#if defined (__MINGW32__)
+#include <rpcsal.h>
+typedef unsigned char UINT8;
+#define __out
+#define __in
+#define __inout
+#define __out_bcount(size)
+#define __out_bcount_opt(size)
+#define __in_opt
+#define __in_ecount(size)
+#define __in_ecount_opt(size)
+#define __out_opt
+#define __out_ecount(size)
+#define __out_ecount_opt(size)
+#define __in_bcount_opt(size)
+#define __inout_opt
+#endif
+
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+#include <CL/cl_d3d11.h>
+#include <stdio.h>
+#include "errorHelpers.h"
+#include "../test_common/harness/kernelHelpers.h"
+
+// #define log_info(...) printf(__VA_ARGS__)
+// #define log_error(...) printf(__VA_ARGS__)
+
+#define NonTestRequire(x, ...) \
+do \
+{ \
+    if (!(x) ) \
+    { \
+        log_info("\n[assertion failed: %s at %s:%d]\n", #x, __FILE__, __LINE__); \
+        log_info("CATASTROPHIC NON-TEST ERROR: "); \
+        log_error(__VA_ARGS__); \
+        log_info("\n"); \
+        log_info("***FAILED***\n"); \
+        exit(1); \
+    } \
+} while (0)
+
+#define TestRequire(x, ...) \
+    do \
+    { \
+        if (!(x) ) \
+        { \
+            log_info("\n[assertion failed: %s at %s:%d]\n", #x, __FILE__, __LINE__); \
+            log_info("ERROR: "); \
+            log_error(__VA_ARGS__); \
+            log_info("\n"); \
+            HarnessD3D11_TestFail(); \
+            goto Cleanup; \
+        } \
+    } while (0)
+
+#define TestPrint(...) \
+    do \
+    { \
+        log_error(__VA_ARGS__); \
+    } while (0)
+
+struct TextureFormat
+{
+    DXGI_FORMAT format;
+    cl_channel_order channel_order;
+    cl_channel_type  channel_type;
+    UINT bytesPerPixel;
+    enum
+    {
+        GENERIC_FLOAT = 0,
+        GENERIC_UINT  = 1,
+        GENERIC_SINT  = 2,
+    } generic;
+
+    const char *name_format;
+    const char *name_channel_order;
+    const char *name_channel_type;
+};
+extern TextureFormat formats[];
+extern UINT formatCount;
+
+
+#define MAX_REGISTERED_SUBRESOURCES 4 // limit to just make life easier
+
+struct BufferProperties
+{
+    UINT ByteWidth;
+    UINT BindFlags;
+    D3D11_USAGE Usage;
+    UINT CPUAccess;
+    const char* name_BindFlags;
+    const char* name_Usage;
+    const char* name_CPUAccess;
+};
+
+struct Texture2DSize
+{
+    UINT Width;
+    UINT Height;
+    UINT MipLevels;
+    UINT ArraySize;
+    UINT SubResourceCount;
+    struct
+    {
+        UINT MipLevel;
+        UINT ArraySlice;
+    } subResources[MAX_REGISTERED_SUBRESOURCES];
+    UINT MiscFlags;
+};
+struct Texture3DSize
+{
+    UINT Width;
+    UINT Height;
+    UINT Depth;
+    UINT MipLevels;
+    UINT SubResourceCount;
+    struct
+    {
+        UINT MipLevel;
+    } subResources[MAX_REGISTERED_SUBRESOURCES];
+    UINT MiscFlags;
+};
+
+void HarnessD3D11_Initialize(cl_platform_id platform);
+cl_int HarnessD3D11_CreateDevice(
+    IDXGIAdapter* pAdapter,
+    ID3D11Device **ppDevice,
+    ID3D11DeviceContext** ppDC);
+void HarnessD3D11_DestroyDevice();
+
+void HarnessD3D11_TestBegin(const char* fmt, ...);
+void HarnessD3D11_TestFail();
+void HarnessD3D11_TestEnd();
+void HarnessD3D11_TestStats();
+
+void TestAdapterEnumeration(
+    cl_platform_id platform,
+    IDXGIAdapter* pAdapter,
+    ID3D11Device* pDevice,
+    cl_uint* num_devices);
+
+void TestAdapterDevices(
+    cl_platform_id platform,
+    IDXGIAdapter* pAdapter,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC,
+    cl_uint num_devices);
+
+void TestDevice(
+    cl_device_id device,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC);
+
+bool TestDeviceContextCreate(
+    cl_device_id device,
+    ID3D11Device* pDevice,
+    cl_context* out_context,
+    cl_command_queue* out_command_queue);
+
+void TestDeviceBuffer(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC);
+
+void TestDeviceTexture2D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC);
+
+void TestDeviceTexture3D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC);
+
+void TestDeviceMisc(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice);
+
+cl_int HarnessD3D11_CreateKernelFromSource(
+    cl_kernel *outKernel,
+    cl_device_id device,
+    cl_context context,
+    const char *source,
+    const char *entrypoint);
+
+extern clGetDeviceIDsFromD3D11KHR_fn      clGetDeviceIDsFromD3D11KHR;
+extern clCreateFromD3D11BufferKHR_fn      clCreateFromD3D11BufferKHR;
+extern clCreateFromD3D11Texture2DKHR_fn   clCreateFromD3D11Texture2DKHR;
+extern clCreateFromD3D11Texture3DKHR_fn   clCreateFromD3D11Texture3DKHR;
+extern clEnqueueAcquireD3D11ObjectsKHR_fn clEnqueueAcquireD3D11ObjectsKHR;
+extern clEnqueueReleaseD3D11ObjectsKHR_fn clEnqueueReleaseD3D11ObjectsKHR;
+
+#endif
diff --git a/test_conformance/d3d11/main.cpp b/test_conformance/d3d11/main.cpp
new file mode 100644
index 00000000..f477d7bb
--- /dev/null
+++ b/test_conformance/d3d11/main.cpp
@@ -0,0 +1,464 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( _WIN32 )
+
+#define _CRT_SECURE_NO_WARNINGS
+#include <vector>
+#include <algorithm>
+#include "harness.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/parseParameters.h"
+
+int main(int argc, const char* argv[])
+{
+    cl_int result;
+    cl_platform_id platform = NULL;
+    cl_uint num_devices_tested = 0;
+
+    argc = parseCustomParam(argc, argv);
+
+    // get the platforms to test
+    result = clGetPlatformIDs(1, &platform, NULL); NonTestRequire(result == CL_SUCCESS, "Failed to get any platforms.");
+
+    HarnessD3D11_Initialize(platform);
+
+    // for each adapter...
+    IDXGIFactory* pFactory = NULL;
+    HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory, (void**)(&pFactory) );
+    NonTestRequire(SUCCEEDED(hr), "Failed to create DXGI factory.");
+    for (UINT adapter = 0;; ++adapter)
+    {
+        IDXGIAdapter* pAdapter = NULL;
+        ID3D11Device* pDevice = NULL;
+        ID3D11DeviceContext* pDC = NULL;
+        HRESULT hr = pFactory->EnumAdapters(adapter, &pAdapter);
+        if (FAILED(hr))
+        {
+            break;
+        }
+
+        // print data about the adapter
+        DXGI_ADAPTER_DESC desc;
+        hr = pAdapter->GetDesc(&desc);
+        NonTestRequire(SUCCEEDED(hr), "IDXGIAdapter::GetDesc failed.");
+
+        TestPrint("=====================================\n");
+        TestPrint("Testing DXGI Adapter and D3D11 Device\n");
+        TestPrint("Description=%ls, VendorID=%x, DeviceID=%x\n", desc.Description, desc.VendorId, desc.DeviceId);
+        TestPrint("=====================================\n");
+
+        // run the test on the adapter
+        HarnessD3D11_CreateDevice(pAdapter, &pDevice, &pDC);
+
+        cl_uint num_devices = 0;
+
+        // test adapter and device enumeration
+        TestAdapterEnumeration(platform, pAdapter, pDevice, &num_devices);
+
+        // if there were any devices found in enumeration, run the tests on them
+        if (num_devices)
+        {
+            TestAdapterDevices(platform, pAdapter, pDevice, pDC, num_devices);
+        }
+        num_devices_tested += num_devices;
+
+        // destroy the D3D11 device
+        if (pDevice)
+        {
+            HarnessD3D11_DestroyDevice();
+        }
+
+        pAdapter->Release();
+    }
+    pFactory->Release();
+
+    // allow the test to be waived in automation
+    // NonTestRequire(num_devices_tested, "No D3D11 compatible cl_device_ids were found.");
+
+    HarnessD3D11_TestStats();
+}
+
+void TestAdapterEnumeration(
+    cl_platform_id platform,
+    IDXGIAdapter* pAdapter,
+    ID3D11Device* pDevice,
+    cl_uint* num_devices)
+{
+    cl_uint num_adapter_devices = 0;
+    cl_device_id* adapter_devices = NULL;
+
+    cl_uint num_device_devices = 0;
+    cl_device_id* device_devices = NULL;
+
+     cl_int result;
+
+    HarnessD3D11_TestBegin("cl_device_id Enumeration");
+
+    // get the cl_device_ids for the adapter
+    {
+        result = clGetDeviceIDsFromD3D11KHR(
+            platform,
+            CL_D3D11_DXGI_ADAPTER_KHR,
+            pAdapter,
+            CL_ALL_DEVICES_FOR_D3D11_KHR,
+            0,
+            NULL,
+            &num_adapter_devices);
+        TestRequire(
+            (result == CL_SUCCESS || result == CL_DEVICE_NOT_FOUND),
+            "clGetDeviceIDsFromD3D11KHR failed.");
+
+        if (result == CL_DEVICE_NOT_FOUND)
+        {
+            TestPrint("No devices found for adapter.\n");
+        }
+        else
+        {
+            // if there were devices, query them
+            adapter_devices = new cl_device_id[num_adapter_devices];
+            result = clGetDeviceIDsFromD3D11KHR(
+                platform,
+                CL_D3D11_DXGI_ADAPTER_KHR,
+                pAdapter,
+                CL_ALL_DEVICES_FOR_D3D11_KHR,
+                num_adapter_devices,
+                adapter_devices,
+                NULL);
+            TestRequire(
+                (result == CL_SUCCESS),
+                "clGetDeviceIDsFromD3D11KHR failed.");
+        }
+    }
+
+    // get the cl_device_ids for the device (if it was successfully created)
+    if (pDevice)
+    {
+        result = clGetDeviceIDsFromD3D11KHR(
+            platform,
+            CL_D3D11_DEVICE_KHR,
+            pDevice,
+            CL_ALL_DEVICES_FOR_D3D11_KHR,
+            0,
+            NULL,
+            &num_device_devices);
+        TestRequire(
+            (result == CL_SUCCESS || result == CL_DEVICE_NOT_FOUND),
+            "clGetDeviceIDsFromD3D11KHR failed.");
+
+        if (result == CL_DEVICE_NOT_FOUND)
+        {
+            TestPrint("No devices found for D3D device.\n");
+        }
+        else
+        {
+            // if there were devices, query them
+            device_devices = new cl_device_id[num_device_devices];
+            result = clGetDeviceIDsFromD3D11KHR(
+                platform,
+                CL_D3D11_DEVICE_KHR,
+                pDevice,
+                CL_ALL_DEVICES_FOR_D3D11_KHR,
+                num_device_devices,
+                device_devices,
+                NULL);
+            TestRequire(
+                (result == CL_SUCCESS),
+                "clGetDeviceIDsFromD3D11KHR failed.");
+        }
+
+    }
+
+Cleanup:
+
+    if (adapter_devices)
+    {
+        delete[] adapter_devices;
+    }
+    if (device_devices)
+    {
+        delete[] device_devices;
+    }
+
+    *num_devices = num_device_devices;
+
+    HarnessD3D11_TestEnd();
+}
+
+void TestAdapterDevices(
+    cl_platform_id platform,
+    IDXGIAdapter* pAdapter,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC,
+    cl_uint num_devices_expected)
+{
+    cl_int result;
+    cl_uint num_devices = 0;
+    cl_device_id* devices = NULL;
+
+    devices = new cl_device_id[num_devices_expected];
+    NonTestRequire(
+        devices,
+        "Memory allocation failure.");
+
+    result = clGetDeviceIDsFromD3D11KHR(
+        platform,
+        CL_D3D11_DEVICE_KHR,
+        pDevice,
+        CL_ALL_DEVICES_FOR_D3D11_KHR,
+        num_devices_expected,
+        devices,
+        &num_devices);
+    NonTestRequire(
+        (result == CL_SUCCESS),
+        "clGetDeviceIDsFromD3D11KHR failed.");
+    NonTestRequire(
+        (num_devices == num_devices_expected),
+        "clGetDeviceIDsFromD3D11KHR returned an unexpected number of devices.");
+
+    for (cl_uint i = 0; i < num_devices; ++i)
+    {
+        TestDevice(devices[i], pDevice, pDC);
+    }
+}
+
+void TestDevice(
+    cl_device_id device,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC)
+{
+    char device_name[1024];
+    cl_int result = CL_SUCCESS;
+    cl_context context = NULL;
+    cl_command_queue command_queue = NULL;
+    ID3D11Device* clDevice = NULL;
+    cl_uint prefer_shared_resources;
+
+    result = clGetDeviceInfo(
+        device,
+        CL_DEVICE_NAME,
+        sizeof(device_name),
+        device_name,
+        NULL);
+    NonTestRequire(CL_SUCCESS == result, "clGetDeviceInfo with CL_DEVICE_NAME failed");
+    TestPrint("--------------------\n");
+    TestPrint("Testing cl_device_id\n");
+    TestPrint("Name=%s\n", device_name);
+    TestPrint("--------------------\n");
+
+    if (!TestDeviceContextCreate(device, pDevice, &context, &command_queue) )
+    {
+        return;
+    }
+
+    // make sure that we can query the shared resource preference
+    result = clGetContextInfo(
+        context,
+        CL_CONTEXT_D3D11_PREFER_SHARED_RESOURCES_KHR,
+        sizeof(prefer_shared_resources),
+        &prefer_shared_resources,
+        NULL);
+    NonTestRequire(CL_SUCCESS == result, "clGetContextInfo with CL_CONTEXT_D3D10_PREFER_SHARED_RESOURCES_KHR failed");
+
+    // run buffer tests
+    TestDeviceBuffer(
+        context,
+        command_queue,
+        pDevice,
+        pDC);
+
+    // run 2D texture tests
+    TestDeviceTexture2D(
+        device,
+        context,
+        command_queue,
+        pDevice,
+        pDC);
+
+    // run 3D texture tests
+    TestDeviceTexture3D(
+        device,
+        context,
+        command_queue,
+        pDevice,
+        pDC);
+
+    // run misc tests
+    TestDeviceMisc(
+        device,
+        context,
+        command_queue,
+        pDevice);
+
+    clReleaseContext(context);
+    clReleaseCommandQueue(command_queue);
+}
+
+bool TestDeviceContextCreate(
+    cl_device_id device,
+    ID3D11Device* pDevice,
+    cl_context* out_context,
+    cl_command_queue* out_command_queue)
+{
+    cl_int result = CL_SUCCESS;
+    cl_context context = NULL;
+    cl_command_queue command_queue = NULL;
+
+    ID3D11Device* clDevice = NULL;
+
+    bool succeeded = false;
+
+    HarnessD3D11_TestBegin("Context creation");
+
+    cl_context_properties properties[5];
+
+    // create the context
+    properties[0] = (cl_context_properties)CL_CONTEXT_D3D11_DEVICE_KHR;
+    properties[1] = (cl_context_properties)pDevice;
+    properties[2] = (cl_context_properties)CL_CONTEXT_INTEROP_USER_SYNC;
+    properties[3] = (cl_context_properties)CL_TRUE;
+    properties[4] = (cl_context_properties)0;
+    context = clCreateContext(
+        properties,
+        1,
+        &device,
+        NULL,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D11_DEVICE_KHR failed");
+    result = clReleaseContext(context);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clReleaseContext with CL_CONTEXT_D3D11_DEVICE_KHR failed");
+
+    // create the context
+    properties[0] = (cl_context_properties)CL_CONTEXT_D3D11_DEVICE_KHR;
+    properties[1] = (cl_context_properties)pDevice;
+    properties[2] = (cl_context_properties)CL_CONTEXT_INTEROP_USER_SYNC;
+    properties[3] = (cl_context_properties)CL_FALSE;
+    properties[4] = (cl_context_properties)0;
+    context = clCreateContext(
+        properties,
+        1,
+        &device,
+        NULL,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D11_DEVICE_KHR failed");
+    result = clReleaseContext(context);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clReleaseContext with CL_CONTEXT_D3D11_DEVICE_KHR failed");
+
+    // create the context
+    properties[0] = (cl_context_properties)CL_CONTEXT_D3D11_DEVICE_KHR;
+    properties[1] = (cl_context_properties)pDevice;
+    properties[2] = (cl_context_properties)0;
+    context = clCreateContext(
+        properties,
+        1,
+        &device,
+        NULL,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D11_DEVICE_KHR failed");
+
+    // check CL_CONTEXT_D3D11_DEVICE_KHR
+    {
+        size_t param_value_size_ret;
+        result = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, 0, NULL, &param_value_size_ret);
+        TestRequire(
+            (result == CL_SUCCESS),
+            "clGetContextInfo with CL_CONTEXT_PROPERTIES failed");
+
+        TestRequire(
+            ((param_value_size_ret % sizeof(cl_context_properties)) == 0),
+            "param_value_size_ret is not a multiple of sizeof(cl_context_properties)");
+
+        std::vector<cl_context_properties> contextProperties(param_value_size_ret / sizeof(cl_context_properties));
+        result = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, param_value_size_ret, &contextProperties[0], NULL);
+        TestRequire(
+            (result == CL_SUCCESS),
+            "clGetContextInfo with CL_CONTEXT_PROPERTIES failed");
+
+        TestRequire(contextProperties.size() % 2 == 1, "Property list size is not odd.");
+        TestRequire(contextProperties[contextProperties.size() - 1] == 0, "last property is not zero");
+
+        std::vector<cl_context_properties>::const_iterator iter;
+        for (iter = contextProperties.begin(); *iter != 0; iter+=2)
+        {
+            if (CL_CONTEXT_D3D11_DEVICE_KHR == *iter)
+            {
+                TestRequire((ID3D11Device*)*(iter+1) == pDevice, "CL_CONTEXT_D3D11_DEVICE_KHR returned invalid value");
+                break;
+            }
+        }
+
+        TestRequire((iter != contextProperties.end()), "CL_CONTEXT_PROPERTIES doesn't include CL_CONTEXT_D3D11_DEVICE_KHR");
+    }
+
+
+    // create the command queue
+    TestPrint("Creating a command queue.\n");
+    command_queue = clCreateCommandQueueWithProperties(
+        context,
+        device,
+        NULL,
+        &result);
+    TestRequire(
+        (result == CL_SUCCESS),
+        "clCreateContext with CL_CONTEXT_D3D11_DEVICE_KHR failed");
+
+    succeeded = true;
+
+Cleanup:
+
+    if (succeeded)
+    {
+        *out_context = context;
+        *out_command_queue = command_queue;
+    }
+    else
+    {
+        if (context)
+        {
+            clReleaseContext(context);
+        }
+        if (command_queue)
+        {
+            clReleaseCommandQueue(command_queue);
+        }
+    }
+    HarnessD3D11_TestEnd();
+    return succeeded;
+}
+
+#else
+
+#include "errorHelpers.h"
+
+int main(int argc, char* argv[])
+{
+    log_info( "Windows-specific test skipped.\n" );
+    return 0;
+}
+
+#endif
diff --git a/test_conformance/d3d11/misc.cpp b/test_conformance/d3d11/misc.cpp
new file mode 100644
index 00000000..1eba7e2d
--- /dev/null
+++ b/test_conformance/d3d11/misc.cpp
@@ -0,0 +1,224 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define _CRT_SECURE_NO_WARNINGS
+#include "harness.h"
+
+void SubTestMiscMultipleCreates(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice)
+{
+    cl_mem mem[5] = {NULL, NULL, NULL, NULL, NULL};
+    ID3D11Buffer* pBuffer = NULL;
+    ID3D11Texture2D* pTexture = NULL;
+    HRESULT hr = S_OK;
+
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D11_TestBegin("Misc: Multiple Creates");
+
+    // create the D3D11 resources
+    {
+        D3D11_TEXTURE2D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = 256;
+        desc.Height     = 256;
+        desc.MipLevels  = 4;
+        desc.ArraySize  = 4;
+        desc.Format     = DXGI_FORMAT_R32G32B32A32_FLOAT;
+        desc.SampleDesc.Count = 1;
+        desc.SampleDesc.Quality = 0;
+        desc.Usage = D3D11_USAGE_DEFAULT;
+        desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture2D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "Failed to create texture.");
+    }
+
+    // create the D3D11 buffer
+    {
+        D3D11_BUFFER_DESC desc = {0};
+        desc.ByteWidth = 1124;
+        desc.Usage = D3D11_USAGE_DEFAULT;
+        desc.CPUAccessFlags = 0;
+        desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
+        desc.MiscFlags = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating vertex buffer failed!");
+    }
+
+    mem[0] = clCreateFromD3D11BufferKHR(
+        context,
+        0,
+        pBuffer,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D11BufferKHR");
+
+    mem[1] = clCreateFromD3D11BufferKHR(
+        context,
+        0,
+        pBuffer,
+        &result);
+    TestRequire(result == CL_INVALID_D3D11_RESOURCE_KHR, "clCreateFromD3D11BufferKHR succeeded when it shouldn't");
+
+    mem[2] = clCreateFromD3D11Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        1,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D11Texture2DKHR failed");
+
+    mem[3] = clCreateFromD3D11Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        1,
+        &result);
+    TestRequire(result == CL_INVALID_D3D11_RESOURCE_KHR, "clCreateFromD3D11Texture2DKHR succeeded when it shouldn't");
+
+    mem[4] = clCreateFromD3D11Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        16,
+        &result);
+    TestRequire(result == CL_INVALID_VALUE, "clCreateFromD3D11Texture2DKHR succeeded when it shouldn't");
+
+
+Cleanup:
+
+    for (UINT i = 0; i < 4; ++i)
+    {
+        if (mem[i])
+        {
+            clReleaseMemObject(mem[i]);
+        }
+    }
+    if (pBuffer)
+    {
+        pBuffer->Release();
+    }
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+
+    HarnessD3D11_TestEnd();
+}
+
+void SubTestMiscAcquireRelease(
+    cl_device_id  device,
+    cl_context context,
+    ID3D11Device* pDevice)
+{
+    ID3D11Buffer* pBuffer = NULL;
+    ID3D11Texture2D* pTexture = NULL;
+    HRESULT hr = S_OK;
+
+    cl_int result = CL_SUCCESS;
+    cl_mem mem[2] = {NULL, NULL};
+
+    HarnessD3D11_TestBegin("Misc: Acquire Release");
+
+
+    // create the D3D11 resources
+    {
+        D3D11_TEXTURE2D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = 256;
+        desc.Height     = 256;
+        desc.MipLevels  = 4;
+        desc.ArraySize  = 4;
+        desc.Format     = DXGI_FORMAT_R32G32B32A32_FLOAT;
+        desc.SampleDesc.Count = 1;
+        desc.SampleDesc.Quality = 0;
+        desc.Usage = D3D11_USAGE_DEFAULT;
+        desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture2D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "Failed to create texture.");
+    }
+
+    // create the D3D11 buffer
+    {
+        D3D11_BUFFER_DESC desc = {0};
+        desc.ByteWidth = 1124;
+        desc.Usage = D3D11_USAGE_DEFAULT;
+        desc.CPUAccessFlags = 0;
+        desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
+        desc.MiscFlags = 0;
+        hr = pDevice->CreateBuffer(&desc, NULL, &pBuffer);
+        TestRequire(SUCCEEDED(hr), "Creating vertex buffer failed!");
+    }
+
+    // create cl_mem objects for the resources
+    mem[0] = clCreateFromD3D11BufferKHR(
+        context,
+        0,
+        pBuffer,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D11BufferKHR");
+    mem[1] = clCreateFromD3D11Texture2DKHR(
+        context,
+        0,
+        pTexture,
+        1,
+        &result);
+    TestRequire(result == CL_SUCCESS, "clCreateFromD3D11Texture2DKHR failed");
+
+Cleanup:
+    for (UINT i = 0; i < 2; ++i)
+    {
+        if (mem[i])
+        {
+            clReleaseMemObject(mem[i]);
+        }
+    }
+    if (pBuffer)
+    {
+        pBuffer->Release();
+    }
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+
+    HarnessD3D11_TestEnd();
+}
+
+void TestDeviceMisc(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice)
+{
+    SubTestMiscMultipleCreates(
+        context,
+        command_queue,
+        pDevice);
+
+    SubTestMiscAcquireRelease(
+        device,
+        context,
+        pDevice);
+}
+
+
diff --git a/test_conformance/d3d11/texture2d.cpp b/test_conformance/d3d11/texture2d.cpp
new file mode 100644
index 00000000..abfdbf06
--- /dev/null
+++ b/test_conformance/d3d11/texture2d.cpp
@@ -0,0 +1,749 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define _CRT_SECURE_NO_WARNINGS
+#include "harness.h"
+#include <vector>
+
+Texture2DSize texture2DSizes[] =
+{
+    {
+        4, // Width
+        4, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        15, // Width
+        37, // Height
+        2, // MipLevels
+        1, // ArraySize
+        2, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {1, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        65, // Width
+        17, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        D3D11_RESOURCE_MISC_SHARED, // MiscFlags
+    },
+
+    {
+        127, // Width
+        125, // Height
+        4, // MipLevels
+        1, // ArraySize
+        4, // SubResourceCount
+        {  // SubResources
+            {3, 0}, // MipLevel, ArraySlice
+            {2, 0}, // MipLevel, ArraySlice
+            {1, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        128, // Width
+        128, // Height
+        4, // MipLevels
+        6, // ArraySize
+        4, // SubResourceCount
+        {  // SubResources
+            {0, 1}, // MipLevel, ArraySlice
+            {1, 0}, // MipLevel, ArraySlice
+            {0, 2}, // MipLevel, ArraySlice
+            {3, 5}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        256, // Width
+        256, // Height
+        0, // MipLevels
+        256, // ArraySize
+        4, // SubResourceCount
+        {  // SubResources
+            {0,   0}, // MipLevel, ArraySlice
+            {1, 255}, // MipLevel, ArraySlice
+            {2, 127}, // MipLevel, ArraySlice
+            {3, 128}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        258, // Width
+        511, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        767, // Width
+        1025, // Height
+        4, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+    {
+        2048, // Width
+        2048, // Height
+        1, // MipLevels
+        1, // ArraySize
+        1, // SubResourceCount
+        {  // SubResources
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+            {0, 0}, // MipLevel, ArraySlice
+        },
+        0, // MiscFlags
+    },
+};
+UINT texture2DSizeCount = sizeof(texture2DSizes)/sizeof(texture2DSizes[0]);
+
+const char *
+texture2DPatterns[2][2] =
+{
+    {"aAbBcCdDeEfFgGhHiIjJ", "AaBbCcDdEeFfGgHhIiJj"},
+    {"zZyYxXwWvVuUtTsSrRqQ", "ZzYyXxWwVvUuTtSsRrQq"},
+};
+
+void SubTestTexture2D(
+    cl_context context,
+    cl_command_queue command_queue,
+    cl_kernel kernel,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC,
+    const TextureFormat* format,
+    const Texture2DSize* size)
+{
+    ID3D11Texture2D* pTexture = NULL;
+    HRESULT hr = S_OK;
+    cl_image_format clFormat;
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D11_TestBegin("2D Texture: Format=%s, Width=%d, Height=%d, MipLevels=%d, ArraySize=%d",
+        format->name_format,
+        size->Width,
+        size->Height,
+        size->MipLevels,
+        size->ArraySize);
+
+    struct
+    {
+        cl_mem mem;
+        UINT subResource;
+        UINT width;
+        UINT height;
+    }
+    subResourceInfo[4];
+
+    cl_event events[4] = {NULL, NULL, NULL, NULL};
+
+    // create the D3D11 resources
+    {
+        D3D11_TEXTURE2D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = size->Width;
+        desc.Height     = size->Height;
+        desc.MipLevels  = size->MipLevels;
+        desc.ArraySize  = size->ArraySize;
+        desc.Format     = format->format;
+        desc.SampleDesc.Count = 1;
+        desc.SampleDesc.Quality = 0;
+        desc.Usage = D3D11_USAGE_DEFAULT;
+        desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture2D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "ID3D11Device::CreateTexture2D failed (non-OpenCL D3D error, but test is invalid).");
+    }
+
+    // initialize some useful variables
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // compute the expected values for the subresource
+        subResourceInfo[i].subResource = D3D11CalcSubresource(
+            size->subResources[i].MipLevel,
+            size->subResources[i].ArraySlice,
+            size->MipLevels);
+        subResourceInfo[i].width = size->Width;
+        subResourceInfo[i].height = size->Height;
+        for (UINT j = 0; j < size->subResources[i].MipLevel; ++j) {
+            subResourceInfo[i].width /= 2;
+            subResourceInfo[i].height /= 2;
+        }
+        subResourceInfo[i].mem = NULL;
+    }
+
+    // copy a pattern into the corners of the image, coordinates
+    // (0,0), (w,0-1), (0,h-1), (w-1,h-1)
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    {
+        // create the staging buffer
+        ID3D11Texture2D* pStagingBuffer = NULL;
+        {
+            D3D11_TEXTURE2D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.MipLevels  = 1;
+            desc.ArraySize  = 1;
+            desc.Format     = format->format;
+            desc.SampleDesc.Count = 1;
+            desc.SampleDesc.Quality = 0;
+            desc.Usage = D3D11_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture2D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "ID3D11Device::CreateTexture2D failed (non-OpenCL D3D error, but test is invalid).");
+        }
+
+        // write the data to the staging buffer
+        {
+            D3D11_MAPPED_SUBRESOURCE mappedTexture;
+            hr = pDC->Map(
+                pStagingBuffer,
+                0,
+                D3D11_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            memcpy(mappedTexture.pData, texture2DPatterns[x][y], format->bytesPerPixel);
+            pDC->Unmap(pStagingBuffer, 0);
+        }
+
+        // copy the data to to the texture
+        {
+            D3D11_BOX box = {0};
+            box.front   = 0; box.back    = 1;
+            box.top     = 0; box.bottom  = 1;
+            box.left    = 0; box.right   = 1;
+            pDC->CopySubresourceRegion(
+                pTexture,
+                subResourceInfo[i].subResource,
+                x ? subResourceInfo[i].width  - 1 : 0,
+                y ? subResourceInfo[i].height - 1 : 0,
+                0,
+                pStagingBuffer,
+                0,
+                &box);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+    // create the cl_mem objects for the resources and verify its sanity
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // create a cl_mem for the resource
+        subResourceInfo[i].mem = clCreateFromD3D11Texture2DKHR(
+            context,
+            0,
+            pTexture,
+            subResourceInfo[i].subResource,
+            &result);
+        if (CL_IMAGE_FORMAT_NOT_SUPPORTED == result)
+        {
+            goto Cleanup;
+        }
+        TestRequire(result == CL_SUCCESS, "clCreateFromD3D11Texture2DKHR failed");
+
+        // query resource pointer and verify
+        ID3D11Resource* clResource = NULL;
+        result = clGetMemObjectInfo(
+            subResourceInfo[i].mem,
+            CL_MEM_D3D11_RESOURCE_KHR,
+            sizeof(clResource),
+            &clResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetMemObjectInfo for CL_MEM_D3D11_RESOURCE_KHR failed.");
+        TestRequire(clResource == pTexture, "clGetMemObjectInfo for CL_MEM_D3D11_RESOURCE_KHR returned incorrect value.");
+
+        // query subresource and verify
+        UINT clSubResource;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_D3D11_SUBRESOURCE_KHR,
+            sizeof(clSubResource),
+            &clSubResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_D3D11_SUBRESOURCE_KHR failed");
+        TestRequire(clSubResource == subResourceInfo[i].subResource, "clGetImageInfo for CL_IMAGE_D3D11_SUBRESOURCE_KHR returned incorrect value.");
+
+        // query format and verify
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_FORMAT,
+            sizeof(clFormat),
+            &clFormat,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_FORMAT failed");
+        TestRequire(clFormat.image_channel_order == format->channel_order, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel order.");
+        TestRequire(clFormat.image_channel_data_type == format->channel_type, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel data type.");
+
+        // query width
+        size_t width;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_WIDTH,
+            sizeof(width),
+            &width,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_WIDTH failed");
+        TestRequire(width == subResourceInfo[i].width, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+        // query height
+        size_t height;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_HEIGHT,
+            sizeof(height),
+            &height,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_HEIGHT failed");
+        TestRequire(height == subResourceInfo[i].height, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+    }
+
+    // acquire the resources for OpenCL
+    for (UINT i = 0; i < 2; ++i)
+    {
+        cl_uint memCount = 0;
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+
+        // cut the registered sub-resources into two sets and send the acquire calls for them separately
+        if (i == 0)
+        {
+            for(UINT j = 0; j < size->SubResourceCount/2; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        else
+        {
+            for(UINT j = size->SubResourceCount/2; j < size->SubResourceCount; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        if (!memCount) continue;
+
+        // do the acquire
+        result = clEnqueueAcquireD3D11ObjectsKHR(
+            command_queue,
+            memCount,
+            memToAcquire,
+            0,
+            NULL,
+            &events[0+i]);
+        TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D11ObjectsKHR failed.");
+        TestRequire(events[0+i], "clEnqueueAcquireD3D11ObjectsKHR did not return an event.");
+
+        // make sure the event type is correct
+        cl_uint eventType = 0;
+        result = clGetEventInfo(
+            events[0+i],
+            CL_EVENT_COMMAND_TYPE,
+            sizeof(eventType),
+            &eventType,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetEventInfo for event created by clEnqueueAcquireD3D11ObjectsKHR failed.");
+        TestRequire(eventType == CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR, "clGetEventInfo for CL_EVENT_COMMAND_TYPE was not CL_COMMAND_ACQUIRE_D3D11_OBJECTS_KHR.");
+    }
+
+    // download the data using OpenCL & compare with the expected results
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        size_t origin[3] = {0,0,0};
+        size_t region[3] = {subResourceInfo[i].width, subResourceInfo[i].height, 1};
+        cl_mem tempImage;
+        cl_image_desc image_desc = { 0 };
+        image_desc.image_depth = 1;
+        image_desc.image_height = subResourceInfo[i].height;
+        image_desc.image_width = subResourceInfo[i].width;
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+
+        tempImage = clCreateImage(context, 0, &clFormat, &image_desc, NULL, &result);
+        TestRequire(result == CL_SUCCESS, "clCreateImage failed");
+
+        result = clEnqueueCopyImage(command_queue, subResourceInfo[i].mem, tempImage,
+                origin, origin, region, 0, NULL, NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueCopyImage failed");
+
+        // copy (0,0) to (1,1) and (w-1,h-1) to (w-2,h-2) using a kernel
+        {
+            result = clSetKernelArg(
+                kernel,
+                0,
+                sizeof(cl_mem),
+                (void *)&tempImage);
+            result = clSetKernelArg(
+                kernel,
+                1,
+                sizeof(cl_mem),
+                (void *)&subResourceInfo[i].mem);
+
+            TestRequire(CL_SUCCESS == result, "clSetKernelArg failed");
+
+            size_t localWorkSize[] = {1};
+            size_t globalWorkSize[] = {1};
+            result = clEnqueueNDRangeKernel(
+                command_queue,
+                kernel,
+                1,
+                NULL,
+                globalWorkSize,
+                localWorkSize,
+                0,
+                NULL,
+                NULL);
+            TestRequire(CL_SUCCESS == result, "clEnqueueNDRangeKernel failed");
+        }
+        // copy (w-1,0) to (w-2,1) and (0,h) to (1,h-2) using a memcpy
+        for (UINT x = 0; x < 2; ++x)
+        for (UINT y = 0; y < 2; ++y)
+        {
+            if (x == y)
+            {
+                continue;
+            }
+
+            size_t src[3] =
+            {
+                x ? subResourceInfo[i].width  - 1 : 0,
+                y ? subResourceInfo[i].height - 1 : 0,
+                0,
+            };
+            size_t dst[3] =
+            {
+                x ? subResourceInfo[i].width  - 2 : 1,
+                y ? subResourceInfo[i].height - 2 : 1,
+                0,
+            };
+            size_t region[3] =
+            {
+                1,
+                1,
+                1,
+            };
+            result = clEnqueueCopyImage(
+                command_queue,
+                subResourceInfo[i].mem,
+                subResourceInfo[i].mem,
+                src,
+                dst,
+                region,
+                0,
+                NULL,
+                NULL);
+            TestRequire(result == CL_SUCCESS, "clEnqueueCopyImage failed.");
+        }
+        clReleaseMemObject(tempImage);
+    }
+
+    // release the resource from OpenCL
+    for (UINT i = 0; i < 2; ++i)
+    {
+        cl_uint memCount = 0;
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+
+        // cut the registered sub-resources into two sets and send the release calls for them separately
+        if (i == 0)
+        {
+            for(UINT j = size->SubResourceCount/4; j < size->SubResourceCount; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        else
+        {
+            for(UINT j = 0; j < size->SubResourceCount/4; ++j)
+            {
+                memToAcquire[memCount++] = subResourceInfo[j].mem;
+            }
+        }
+        if (!memCount) continue;
+
+        // do the release
+        result = clEnqueueReleaseD3D11ObjectsKHR(
+            command_queue,
+            memCount,
+            memToAcquire,
+            0,
+            NULL,
+            &events[2+i]);
+        TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D11ObjectsKHR failed.");
+        TestRequire(events[2+i], "clEnqueueReleaseD3D11ObjectsKHR did not return an event.");
+
+        // make sure the event type is correct
+        cl_uint eventType = 0;
+        result = clGetEventInfo(
+            events[2+i],
+            CL_EVENT_COMMAND_TYPE,
+            sizeof(eventType),
+            &eventType,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetEventInfo for event created by clEnqueueReleaseD3D11ObjectsKHR failed.");
+        TestRequire(eventType == CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR, "clGetEventInfo for CL_EVENT_COMMAND_TYPE was not CL_COMMAND_RELEASE_D3D11_OBJECTS_KHR.");
+    }
+
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    {
+        // create the staging buffer
+        ID3D11Texture2D* pStagingBuffer = NULL;
+        {
+            D3D11_TEXTURE2D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.MipLevels  = 1;
+            desc.ArraySize  = 1;
+            desc.Format     = format->format;
+            desc.SampleDesc.Count = 1;
+            desc.SampleDesc.Quality = 0;
+            desc.Usage = D3D11_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture2D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "Failed to create staging buffer.");
+        }
+
+        // wipe out the staging buffer to make sure we don't get stale values
+        {
+            D3D11_MAPPED_SUBRESOURCE mappedTexture;
+            hr = pDC->Map(
+                pStagingBuffer,
+                0,
+                D3D11_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+            memset(mappedTexture.pData, 0, format->bytesPerPixel);
+            pDC->Unmap(pStagingBuffer, 0);
+        }
+
+        // copy the pixel to the staging buffer
+        {
+            D3D11_BOX box = {0};
+            box.left    = x ? subResourceInfo[i].width  - 2 : 1; box.right  = box.left + 1;
+            box.top     = y ? subResourceInfo[i].height - 2 : 1; box.bottom = box.top + 1;
+            box.front   = 0;                                     box.back   = 1;
+            pDC->CopySubresourceRegion(
+                pStagingBuffer,
+                0,
+                0,
+                0,
+                0,
+                pTexture,
+                subResourceInfo[i].subResource,
+                &box);
+        }
+
+        // make sure we read back what was written next door
+        {
+            D3D11_MAPPED_SUBRESOURCE mappedTexture;
+            hr = pDC->Map(
+                pStagingBuffer,
+                0,
+                D3D11_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+            TestRequire(
+                !memcmp(mappedTexture.pData, texture2DPatterns[x][y], format->bytesPerPixel),
+                "Failed to map staging buffer");
+            pDC->Unmap(pStagingBuffer, 0);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+
+Cleanup:
+
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        clReleaseMemObject(subResourceInfo[i].mem);
+    }
+    for (UINT i = 0; i < 4; ++i)
+    {
+        if (events[i])
+        {
+            result = clReleaseEvent(events[i]);
+            TestRequire(result == CL_SUCCESS, "clReleaseEvent for event failed.");
+        }
+    }
+
+
+    HarnessD3D11_TestEnd();
+}
+
+bool is_format_supported(
+                         cl_channel_order channel_order,
+                         cl_channel_type channel_type,
+                         const std::vector<cl_image_format> &supported_image_formats)
+{
+  for (std::vector<cl_image_format>::const_iterator it = supported_image_formats.begin(); it != supported_image_formats.end(); ++it)
+    if (it->image_channel_data_type == channel_type && it->image_channel_order == channel_order)
+      return true;
+
+  return false;
+}
+
+void TestDeviceTexture2D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC)
+{
+    cl_int result = CL_SUCCESS;
+    cl_kernel kernels[3] = {NULL, NULL, NULL};
+
+    const char *sourceRaw =
+        " \
+        __kernel void texture2D\n\
+        ( \n\
+            __read_only  image2d_t texIn, \n\
+            __write_only image2d_t texOut \n\
+        ) \n\
+        { \n\
+            const sampler_t smp = CLK_FILTER_NEAREST; \n\
+                                  CLK_NORMALIZED_COORDS_FALSE |\n\
+                                  CLK_ADDRESS_CLAMP_TO_EDGE;  \n\
+            %s value;  \n\
+            int2 coordIn;  \n\
+            int2 coordOut; \n\
+            int w = get_image_width(texIn); \n\
+            int h = get_image_height(texIn); \n\
+            \n\
+            coordIn  = (int2)(0, 0); \n\
+            coordOut = (int2)(1, 1); \n\
+            value = read_image%s(texIn, smp, coordIn); \n\
+            write_image%s(texOut, coordOut, value); \n\
+            \n\
+            coordIn  = (int2)(w-1, h-1); \n\
+            coordOut = (int2)(w-2, h-2); \n\
+            value = read_image%s(texIn, smp, coordIn); \n\
+            write_image%s(texOut, coordOut, value); \n\
+        } \n\
+        ";
+
+    cl_uint supported_formats_count;
+    std::vector<cl_image_format> supported_image_formats;
+    result = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &supported_formats_count);
+    TestRequire(CL_SUCCESS == result, "clGetSupportedImageFormats failed.");
+
+    supported_image_formats.resize(supported_formats_count);
+    result = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, supported_formats_count, &supported_image_formats[0], NULL);
+    TestRequire(CL_SUCCESS == result, "clGetSupportedImageFormats failed.");
+
+    char source[2048];
+    sprintf(source, sourceRaw, "float4", "f", "f", "f", "f");
+    result = HarnessD3D11_CreateKernelFromSource(&kernels[0], device, context, source, "texture2D");
+    TestRequire(CL_SUCCESS == result, "HarnessD3D11_CreateKernelFromSource failed.");
+
+    sprintf(source, sourceRaw, "uint4", "ui", "ui", "ui", "ui");
+    result = HarnessD3D11_CreateKernelFromSource(&kernels[1], device, context, source, "texture2D");
+    TestRequire(CL_SUCCESS == result, "HarnessD3D11_CreateKernelFromSource failed.");
+
+    sprintf(source, sourceRaw, "int4", "i", "i", "i", "i");
+    result = HarnessD3D11_CreateKernelFromSource(&kernels[2], device, context, source, "texture2D");
+    TestRequire(CL_SUCCESS == result, "HarnessD3D11_CreateKernelFromSource failed.");
+
+    for (UINT format = 0, size = 0; format < formatCount; ++size, ++format)
+    {
+        if (!is_format_supported(formats[format].channel_order, formats[format].channel_type, supported_image_formats))
+        {
+          HarnessD3D11_TestBegin("2D_texture: Format=%s, Width=%d, Height=%d, MipLevels=%d, ArraySize=%d\n",
+            formats[format].name_format,
+            texture2DSizes[size % texture2DSizeCount].Width,
+            texture2DSizes[size % texture2DSizeCount].Height,
+            texture2DSizes[size % texture2DSizeCount].MipLevels,
+            texture2DSizes[size % texture2DSizeCount].ArraySize);
+          log_info("\tFormat not supported, skipping test!\n");
+          HarnessD3D11_TestEnd();
+
+          continue;
+        }
+
+        SubTestTexture2D(
+            context,
+            command_queue,
+            kernels[formats[format].generic],
+            pDevice,
+            pDC,
+            &formats[format],
+            &texture2DSizes[size % texture2DSizeCount]);
+    }
+
+Cleanup:
+
+
+    for (UINT i = 0; i < 3; ++i)
+    {
+        if (kernels[i])
+        {
+            clReleaseKernel(kernels[i]);
+        }
+    }
+}
+
+
diff --git a/test_conformance/d3d11/texture3d.cpp b/test_conformance/d3d11/texture3d.cpp
new file mode 100644
index 00000000..05f53b74
--- /dev/null
+++ b/test_conformance/d3d11/texture3d.cpp
@@ -0,0 +1,489 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#define _CRT_SECURE_NO_WARNINGS
+#include "harness.h"
+
+Texture3DSize texture3DSizes[] =
+{
+    {
+        4, // Width
+        4, // Height
+        4, // Depth
+        1, // MipLevels
+        1, // SubResourceCount
+        {  // SubResources
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+    {
+        127, // Width
+        55, // Height
+        33, // Depth
+        1, // MipLevels
+        1, // SubResourceCount
+        {  // SubResources
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+    {
+        128, // Width
+        256, // Height
+        64, // Depth
+        4, // MipLevels
+        3, // SubResourceCount
+        {  // SubResources
+            { 2 }, // MipLevel
+            { 1 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+    {
+        512, // Width
+         64, // Height
+         32, // Depth
+        3, // MipLevels
+        1, // SubResourceCount
+        {  // SubResources
+            { 2 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+            { 0 }, // MipLevel
+        },
+        0, // MiscFlags
+    },
+};
+UINT texture3DSizeCount = sizeof(texture3DSizes)/sizeof(texture3DSizes[0]);
+
+const char *
+texture3DPatterns[2][2][2] =
+{
+    {
+        {"PlaceTheCasseroleDis", "hInAColdOvenPlaceACh"},
+        {"airFacingTheOvenAndS", "itInItForeverThinkAb"},
+    },
+    {
+        {"outHowHungryYouAreWh", "enNightFallsDoNotTur"},
+        {"nOnTheLightMyEyeBeca", "meInflamedIHateCamus"},
+    },
+};
+
+void SubTestTexture3D(
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC,
+    const TextureFormat* format,
+    const Texture3DSize* size)
+{
+    ID3D11Texture3D* pTexture = NULL;
+    HRESULT hr = S_OK;
+
+    cl_int result = CL_SUCCESS;
+
+    HarnessD3D11_TestBegin("3D Texture: Format=%s, Width=%d, Height=%d, Depth=%d, MipLevels=%d",
+        format->name_format,
+        size->Width,
+        size->Height,
+        size->Depth,
+        size->MipLevels);
+
+    struct
+    {
+        cl_mem mem;
+        UINT subResource;
+        UINT width;
+        UINT height;
+        UINT depth;
+    }
+    subResourceInfo[4];
+
+    // create the D3D11 resources
+    {
+        D3D11_TEXTURE3D_DESC desc;
+        memset(&desc, 0, sizeof(desc) );
+        desc.Width      = size->Width;
+        desc.Height     = size->Height;
+        desc.Depth      = size->Depth;
+        desc.MipLevels  = size->MipLevels;
+        desc.Format     = format->format;
+        desc.Usage = D3D11_USAGE_DEFAULT;
+        desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
+        desc.CPUAccessFlags = 0;
+        desc.MiscFlags = 0;
+
+        hr = pDevice->CreateTexture3D(&desc, NULL, &pTexture);
+        TestRequire(SUCCEEDED(hr), "CreateTexture3D failed.");
+    }
+
+    // initialize some useful variables
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // compute the expected values for the subresource
+        subResourceInfo[i].subResource = size->subResources[i].MipLevel;
+        subResourceInfo[i].width = size->Width;
+        subResourceInfo[i].height = size->Height;
+        subResourceInfo[i].depth = size->Depth;
+        for (UINT j = 0; j < size->subResources[i].MipLevel; ++j) {
+            subResourceInfo[i].width /= 2;
+            subResourceInfo[i].height /= 2;
+            subResourceInfo[i].depth /= 2;
+        }
+        subResourceInfo[i].mem = NULL;
+    }
+
+    // copy a pattern into the corners of the image, coordinates
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    for (UINT z = 0; z < 2; ++z)
+    {
+        // create the staging buffer
+        ID3D11Texture3D* pStagingBuffer = NULL;
+        {
+            D3D11_TEXTURE3D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.Depth      = 1;
+            desc.MipLevels  = 1;
+            desc.Format     = format->format;
+            desc.Usage = D3D11_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture3D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "CreateTexture3D failed.");
+        }
+
+        // write the data to the staging buffer
+        {
+            D3D11_MAPPED_SUBRESOURCE mappedTexture;
+            hr = pDC->Map(
+                pStagingBuffer,
+                0,
+                D3D11_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            memcpy(mappedTexture.pData, texture3DPatterns[x][y][z], format->bytesPerPixel);
+            pDC->Unmap(pStagingBuffer, 0);
+        }
+
+        // copy the data to to the texture
+        {
+            D3D11_BOX box = {0};
+            box.front   = 0; box.back    = 1;
+            box.top     = 0; box.bottom  = 1;
+            box.left    = 0; box.right   = 1;
+            pDC->CopySubresourceRegion(
+                pTexture,
+                subResourceInfo[i].subResource,
+                x ? subResourceInfo[i].width  - 1 : 0,
+                y ? subResourceInfo[i].height - 1 : 0,
+                z ? subResourceInfo[i].depth  - 1 : 0,
+                pStagingBuffer,
+                0,
+                &box);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+    // create the cl_mem objects for the resources and verify its sanity
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        // create a cl_mem for the resource
+        subResourceInfo[i].mem = clCreateFromD3D11Texture3DKHR(
+            context,
+            0,
+            pTexture,
+            subResourceInfo[i].subResource,
+            &result);
+        if (CL_IMAGE_FORMAT_NOT_SUPPORTED == result)
+        {
+            goto Cleanup;
+        }
+        TestRequire(result == CL_SUCCESS, "clCreateFromD3D11Texture3DKHR failed");
+
+        // query resource pointer and verify
+        ID3D11Resource* clResource = NULL;
+        result = clGetMemObjectInfo(
+            subResourceInfo[i].mem,
+            CL_MEM_D3D11_RESOURCE_KHR,
+            sizeof(clResource),
+            &clResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetMemObjectInfo for CL_MEM_D3D11_RESOURCE_KHR failed.");
+        TestRequire(clResource == pTexture, "clGetMemObjectInfo for CL_MEM_D3D11_RESOURCE_KHR returned incorrect value.");
+
+        // query subresource and verify
+        UINT clSubResource;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_D3D11_SUBRESOURCE_KHR,
+            sizeof(clSubResource),
+            &clSubResource,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_D3D11_SUBRESOURCE_KHR failed");
+        TestRequire(clSubResource == subResourceInfo[i].subResource, "clGetImageInfo for CL_IMAGE_D3D11_SUBRESOURCE_KHR returned incorrect value.");
+
+        // query format and verify
+        cl_image_format clFormat;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_FORMAT,
+            sizeof(clFormat),
+            &clFormat,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_FORMAT failed");
+        TestRequire(clFormat.image_channel_order == format->channel_order, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel order.");
+        TestRequire(clFormat.image_channel_data_type == format->channel_type, "clGetImageInfo for CL_IMAGE_FORMAT returned incorrect channel data type.");
+
+        // query width
+        size_t width;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_WIDTH,
+            sizeof(width),
+            &width,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_WIDTH failed");
+        TestRequire(width == subResourceInfo[i].width, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+        // query height
+        size_t height;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_HEIGHT,
+            sizeof(height),
+            &height,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_HEIGHT failed");
+        TestRequire(height == subResourceInfo[i].height, "clGetImageInfo for CL_IMAGE_HEIGHT returned incorrect value.");
+
+        // query depth
+        size_t depth;
+        result = clGetImageInfo(
+            subResourceInfo[i].mem,
+            CL_IMAGE_DEPTH,
+            sizeof(depth),
+            &depth,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clGetImageInfo for CL_IMAGE_DEPTH failed");
+        TestRequire(depth == subResourceInfo[i].depth, "clGetImageInfo for CL_IMAGE_DEPTH returned incorrect value.");
+
+    }
+
+    // acquire the resources for OpenCL
+    {
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+
+        // cut the registered sub-resources into two sets and send the acquire calls for them separately
+        for(UINT i = 0; i < size->SubResourceCount; ++i)
+        {
+            memToAcquire[i] = subResourceInfo[i].mem;
+        }
+
+        // do the acquire
+        result = clEnqueueAcquireD3D11ObjectsKHR(
+            command_queue,
+            size->SubResourceCount,
+            memToAcquire,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueAcquireD3D11ObjectsKHR failed.");
+    }
+
+    // download the data using OpenCL & compare with the expected results
+    // copy the corners of the image into the image
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    for (UINT z = 0; z < 2; ++z)
+    {
+        if (x == y && y == z && 0)
+        {
+            continue;
+        }
+        size_t src[3] =
+        {
+            x ? subResourceInfo[i].width  - 1 : 0,
+            y ? subResourceInfo[i].height - 1 : 0,
+            z ? subResourceInfo[i].depth  - 1 : 0,
+        };
+        size_t dst[3] =
+        {
+            x ? subResourceInfo[i].width  - 2 : 1,
+            y ? subResourceInfo[i].height - 2 : 1,
+            z ? subResourceInfo[i].depth  - 2 : 1,
+        };
+        size_t region[3] =
+        {
+            1,
+            1,
+            1,
+        };
+        result = clEnqueueCopyImage(
+            command_queue,
+            subResourceInfo[i].mem,
+            subResourceInfo[i].mem,
+            src,
+            dst,
+            region,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueCopyImage failed.");
+    }
+
+    // release the resource from OpenCL
+    {
+        cl_mem memToAcquire[MAX_REGISTERED_SUBRESOURCES];
+        for(UINT i = 0; i < size->SubResourceCount; ++i)
+        {
+            memToAcquire[i] = subResourceInfo[i].mem;
+        }
+
+        // do the release
+        result = clEnqueueReleaseD3D11ObjectsKHR(
+            command_queue,
+            size->SubResourceCount,
+            memToAcquire,
+            0,
+            NULL,
+            NULL);
+        TestRequire(result == CL_SUCCESS, "clEnqueueReleaseD3D11ObjectsKHR failed.");
+    }
+
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    for (UINT x = 0; x < 2; ++x)
+    for (UINT y = 0; y < 2; ++y)
+    for (UINT z = 0; z < 2; ++z)
+    {
+        // create the staging buffer
+        ID3D11Texture3D* pStagingBuffer = NULL;
+        {
+            D3D11_TEXTURE3D_DESC desc = {0};
+            desc.Width      = 1;
+            desc.Height     = 1;
+            desc.Depth      = 1;
+            desc.MipLevels  = 1;
+            desc.Format     = format->format;
+            desc.Usage = D3D11_USAGE_STAGING;
+            desc.BindFlags = 0;
+            desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
+            desc.MiscFlags = 0;
+            hr = pDevice->CreateTexture3D(&desc, NULL, &pStagingBuffer);
+            TestRequire(SUCCEEDED(hr), "Failed to create staging buffer.");
+        }
+
+        // wipe out the staging buffer to make sure we don't get stale values
+        {
+            D3D11_MAPPED_SUBRESOURCE mappedTexture;
+            hr = pDC->Map(
+                pStagingBuffer,
+                0,
+                D3D11_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+            memset(mappedTexture.pData, 0, format->bytesPerPixel);
+            pDC->Unmap(pStagingBuffer, 0);
+        }
+
+        // copy the pixel to the staging buffer
+        {
+            D3D11_BOX box = {0};
+            box.left    = x ? subResourceInfo[i].width  - 2 : 1; box.right  = box.left  + 1;
+            box.top     = y ? subResourceInfo[i].height - 2 : 1; box.bottom = box.top   + 1;
+            box.front   = z ? subResourceInfo[i].depth  - 2 : 1; box.back   = box.front + 1;
+            pDC->CopySubresourceRegion(
+                pStagingBuffer,
+                0,
+                0,
+                0,
+                0,
+                pTexture,
+                subResourceInfo[i].subResource,
+                &box);
+        }
+
+        // make sure we read back what was written next door
+        {
+            D3D11_MAPPED_SUBRESOURCE  mappedTexture;
+            hr = pDC->Map(
+                pStagingBuffer,
+                0,
+                D3D11_MAP_READ_WRITE,
+                0,
+                &mappedTexture);
+            TestRequire(SUCCEEDED(hr), "Failed to map staging buffer");
+            TestRequire(
+                !memcmp(mappedTexture.pData, texture3DPatterns[x][y][z], format->bytesPerPixel),
+                "Failed to map staging buffer");
+            pDC->Unmap(pStagingBuffer, 0);
+        }
+
+        pStagingBuffer->Release();
+    }
+
+
+Cleanup:
+
+    if (pTexture)
+    {
+        pTexture->Release();
+    }
+    for (UINT i = 0; i < size->SubResourceCount; ++i)
+    {
+        clReleaseMemObject(subResourceInfo[i].mem);
+    }
+
+    HarnessD3D11_TestEnd();
+}
+
+
+void TestDeviceTexture3D(
+    cl_device_id device,
+    cl_context context,
+    cl_command_queue command_queue,
+    ID3D11Device* pDevice,
+    ID3D11DeviceContext* pDC)
+{
+    cl_int result = CL_SUCCESS;
+
+
+    for (UINT format = 0, size = 0; format < formatCount; ++size, ++format)
+    {
+        SubTestTexture3D(
+            context,
+            command_queue,
+            pDevice,
+            pDC,
+            &formats[format],
+            &texture3DSizes[size % texture3DSizeCount]);
+    }
+}
+
diff --git a/test_conformance/device_execution/CMakeLists.txt b/test_conformance/device_execution/CMakeLists.txt
new file mode 100644
index 00000000..450db838
--- /dev/null
+++ b/test_conformance/device_execution/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(MODULE_NAME DEVICE_EXECUTION)
+
+set(DEVICE_EXECUTION_SOURCES
+    device_info.cpp
+    device_queue.cpp
+    enqueue_block.cpp
+    enqueue_flags.cpp
+    enqueue_multi_queue.cpp
+    enqueue_ndrange.cpp
+    enqueue_wg_size.cpp
+    execute_block.cpp
+    host_multi_queue.cpp
+    host_queue_order.cpp
+    main.c
+    nested_blocks.cpp
+    utils.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
+
+# end of file #
diff --git a/test_conformance/device_execution/Makefile b/test_conformance/device_execution/Makefile
new file mode 100644
index 00000000..59271df0
--- /dev/null
+++ b/test_conformance/device_execution/Makefile
@@ -0,0 +1,52 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+	device_info.cpp \
+	device_queue.cpp \
+	enqueue_block.cpp \
+	enqueue_flags.cpp \
+	enqueue_multi_queue.cpp \
+	enqueue_ndrange.cpp \
+	enqueue_wg_size.cpp \
+	execute_block.cpp \
+	host_multi_queue.cpp \
+	host_queue_order.cpp \
+	nested_blocks.cpp \
+	utils.cpp \
+	../../test_common/harness/errorHelpers.c \
+	../../test_common/harness/testHarness.c \
+	../../test_common/harness/kernelHelpers.c \
+	../../test_common/harness/typeWrappers.cpp \
+	../../test_common/harness/mt19937.c \
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_device_execution
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/device_execution/device_info.cpp b/test_conformance/device_execution/device_info.cpp
new file mode 100644
index 00000000..fa37fe6b
--- /dev/null
+++ b/test_conformance/device_execution/device_info.cpp
@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include "procs.h"
+#include "utils.h"
+
+static const cl_uint MIN_DEVICE_PREFFERED_QUEUE_SIZE =  16 * 1024;
+static const cl_uint MAX_DEVICE_QUEUE_SIZE           = 256 * 1024;
+static const cl_uint MAX_DEVICE_EMBEDDED_QUEUE_SIZE  =  64 * 1024;
+
+#ifdef CL_VERSION_2_0
+
+int test_device_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int err_ret;
+    int embedded = 0;
+    size_t ret_len;
+    char profile[32] = {0};
+    cl_command_queue_properties properties;
+    cl_uint size;
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_PROFILE) failed");
+    if(ret_len < sizeof(profile) && strcmp(profile, "FULL_PROFILE") == 0) embedded = 0;
+    else if(ret_len < sizeof(profile) && strcmp(profile, "EMBEDDED_PROFILE") == 0) embedded = 1;
+    else
+    {
+        log_error("Unknown device profile: %s\n", profile);
+        return -1;
+    }
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(properties), &properties, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_HOST_PROPERTIES) failed");
+    if(!(properties&CL_QUEUE_PROFILING_ENABLE))
+    {
+        log_error("Host command-queue does not support mandated minimum capability: CL_QUEUE_PROFILING_ENABLE\n");
+        return -1;
+    }
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, sizeof(properties), &properties, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES) failed");
+    if(!(properties&CL_QUEUE_PROFILING_ENABLE))
+    {
+        log_error("Device command-queue does not support mandated minimum capability: CL_QUEUE_PROFILING_ENABLE\n");
+        return -1;
+    }
+    if(!(properties&CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
+    {
+        log_error("Device command-queue does not support mandated minimum capability: CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE\n");
+        return -1;
+    }
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, sizeof(size), &size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE) failed");
+    if(size < MIN_DEVICE_PREFFERED_QUEUE_SIZE)
+    {
+        log_error("Device command-queue preferred size is less than minimum %dK: %dK\n", MIN_DEVICE_PREFFERED_QUEUE_SIZE/1024, size/1024);
+        return -1;
+    }
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(size), &size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+    if(size < (embedded ? MAX_DEVICE_EMBEDDED_QUEUE_SIZE : MAX_DEVICE_QUEUE_SIZE))
+    {
+        log_error("Device command-queue maximum size is less than minimum %dK: %dK\n", (embedded ? MAX_DEVICE_EMBEDDED_QUEUE_SIZE : MAX_DEVICE_QUEUE_SIZE)/1024, size/1024);
+        return -1;
+    }
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(size), &size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+    if(size < 1)
+    {
+        log_error("Maximum number of device queues is less than minimum 1: %d\n", size);
+        return -1;
+    }
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_EVENTS, sizeof(size), &size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_EVENTS) failed");
+    if(size < 1024)
+    {
+        log_error("Maximum number of events in use by a device queue is less than minimum 1024: %d\n", size);
+        return -1;
+    }
+
+    return 0;
+}
+
+#endif
+
diff --git a/test_conformance/device_execution/device_queue.cpp b/test_conformance/device_execution/device_queue.cpp
new file mode 100644
index 00000000..2a0e3961
--- /dev/null
+++ b/test_conformance/device_execution/device_queue.cpp
@@ -0,0 +1,188 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+
+static int check_device_queue(cl_device_id device, cl_context context, cl_command_queue queue, cl_uint size)
+{
+    cl_int err_ret;
+    cl_context q_context;
+    cl_device_id q_device;
+    cl_command_queue_properties q_properties;
+    cl_uint q_size;
+    size_t size_ret;
+
+    err_ret = clRetainCommandQueue(queue);
+    test_error(err_ret, "clRetainCommandQueue() failed");
+
+    err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(q_context), &q_context, &size_ret);
+    test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_CONTEXT) failed");
+    if(size_ret != sizeof(q_context) || q_context != context)
+    {
+        log_error("clGetCommandQueueInfo(CL_QUEUE_CONTEXT) returned invalid context\n");
+        return -1;
+    }
+
+    err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(q_device), &q_device, &size_ret);
+    test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_DEVICE) failed");
+    if(size_ret != sizeof(q_device) || q_device != device)
+    {
+        log_error("clGetCommandQueueInfo(CL_QUEUE_DEVICE) returned invalid device\n");
+        return -1;
+    }
+
+    err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES, sizeof(q_properties), &q_properties, &size_ret);
+    test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_PROPERTIES) failed");
+    if(size_ret != sizeof(q_properties) || !(q_properties & (CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE)))
+    {
+        log_error("clGetCommandQueueInfo(CL_QUEUE_PROPERTIES) returned invalid properties\n");
+        return -1;
+    }
+
+    err_ret = clGetCommandQueueInfo(queue, CL_QUEUE_SIZE, sizeof(q_size), &q_size, &size_ret);
+    test_error(err_ret, "clGetCommandQueueInfo(CL_QUEUE_SIZE) failed");
+    if(size_ret != sizeof(q_size) || q_size < 1)
+    {
+        log_error("clGetCommandQueueInfo(CL_QUEUE_SIZE) returned invalid queue size\n");
+        return -1;
+    }
+
+    err_ret = clReleaseCommandQueue(queue);
+    test_error(err_ret, "clReleaseCommandQueue() failed");
+
+
+    return 0;
+}
+
+static int check_device_queues(cl_device_id device, cl_context context, cl_uint num_queues, cl_queue_properties *properties, cl_uint size)
+{
+    cl_int err_ret, res = 0;
+    cl_uint i;
+    std::vector<clCommandQueueWrapper> queue(num_queues);
+
+    // Create all queues
+    for(i = 0; i < num_queues; ++i)
+    {
+        queue[i] = clCreateCommandQueueWithProperties(context, device, properties, &err_ret);
+        test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE) failed");
+    }
+
+    // Validate all queues
+    for(i = 0; i < num_queues; ++i)
+    {
+        err_ret = check_device_queue(device, context, queue[i], size);
+        if(check_error(err_ret, "Device queue[%d] validation failed", i)) res = -1;
+
+    }
+    return res;
+}
+
+int test_device_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int err_ret, res = 0;
+    size_t ret_len;
+    clCommandQueueWrapper dev_queue;
+    cl_uint preffered_size, max_size, max_queues;
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        0
+    };
+
+    cl_queue_properties queue_prop[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
+        0
+    };
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, sizeof(preffered_size), &preffered_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(max_size), &max_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    if(max_queues > MAX_QUEUES) max_queues = MAX_QUEUES;
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    err_ret = check_device_queue(device, context, dev_queue, preffered_size);
+    if(check_error(err_ret, "Default device queue validation failed")) res = -1;
+
+    log_info("Default device queue is OK.\n");
+
+    if(max_queues > 1) // Check more queues if supported.
+    {
+        cl_uint q_size = preffered_size-1024;
+        cl_queue_properties queue_prop_size[] =
+        {
+            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
+            CL_QUEUE_SIZE, q_size,
+            0
+        };
+
+        cl_queue_properties queue_prop_max[] =
+        {
+            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
+            CL_QUEUE_SIZE, max_size,
+            0
+        };
+        {
+            err_ret = check_device_queues(device, context, 1, queue_prop, preffered_size);
+            if(check_error(err_ret, "Second device queue validation failed")) res = -1;
+            else log_info("Second device queue is OK.\n");
+        }
+        {
+            err_ret = check_device_queues(device, context, 1, queue_prop_size, q_size);
+            if(check_error(err_ret, "Device queue with size validation failed")) res = -1;
+            else log_info("Device queue with size is OK.\n");
+        }
+        {
+            err_ret = check_device_queues(device, context, 1, queue_prop_max, max_size);
+            if(check_error(err_ret, "Device queue max size validation failed")) res = -1;
+            else log_info("Device queue max size is OK.\n");
+        }
+        {
+            err_ret = check_device_queues(device, context, max_queues, queue_prop, preffered_size);
+            if(check_error(err_ret, "Max number device queue validation failed")) res = -1;
+            else log_info("Max number device queue is OK.\n");
+        }
+        {
+            err_ret = check_device_queues(device, context, max_queues, queue_prop_size, q_size);
+            if(check_error(err_ret, "Max number device queue with size validation failed")) res = -1;
+            else log_info("Max number device queue with size is OK.\n");
+        }
+        {
+            err_ret = check_device_queues(device, context, max_queues, queue_prop_max, max_size);
+            if(check_error(err_ret, "Max number device queue with max size validation failed")) res = -1;
+            else log_info("Max number device queue with max size is OK.\n");
+        }
+    }
+
+    return res;
+}
+
diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp
new file mode 100644
index 00000000..b3966e57
--- /dev/null
+++ b/test_conformance/device_execution/enqueue_block.cpp
@@ -0,0 +1,684 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+#ifdef CL_VERSION_2_0
+extern int gWimpyMode;
+static const char* enqueue_simple_block[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  res[tid] = mul * 7 - 21;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_simple_block(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "  res[tid] = -1;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_with_local_arg1[] =
+{
+    NL, "#define LOCAL_MEM_SIZE 10"
+    NL, ""
+    NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)"
+    NL, "{"
+    NL, "  for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
+    NL, "  {"
+    NL, "    tmp[i] = mul * 7 - 21;"
+    NL, "    res[tid] += tmp[i];"
+    NL, "  }"
+    NL, "  res[tid] += 2;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_with_local_arg1(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };"
+    NL, ""
+    NL, "  res[tid] = -2;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_with_local_arg2[] =
+{
+    NL, "#define LOCAL_MEM_SIZE 10"
+    NL, ""
+    NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)"
+    NL, "{"
+    NL, "  for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
+    NL, "  {"
+    NL, "    tmp1[i]   = mul * 7 - 21;"
+    NL, "    tmp2[i].x = (float)(mul * 7 - 21);"
+    NL, "    tmp2[i].y = (float)(mul * 7 - 21);"
+    NL, "    tmp2[i].z = (float)(mul * 7 - 21);"
+    NL, "    tmp2[i].w = (float)(mul * 7 - 21);"
+    NL, ""
+    NL, "    res[tid] += tmp1[i];"
+    NL, "    res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);"
+    NL, "  }"
+    NL, "  res[tid] += 2;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_with_local_arg2(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)"
+    NL, "    { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };"
+    NL, ""
+    NL, "  res[tid] = -2;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_with_wait_list[] =
+{
+    NL, "#define BLOCK_SUBMITTED 1"
+    NL, "#define BLOCK_COMPLETED 2"
+    NL, "#define CHECK_SUCCESS   0"
+    NL, ""
+    NL, "kernel void enqueue_block_with_wait_list(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  clk_event_t user_evt = create_user_event();"
+    NL, ""
+    NL, "  res[tid] = BLOCK_SUBMITTED;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  clk_event_t block_evt;"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,"
+    NL, "  ^{"
+    NL, "      res[tid] = BLOCK_COMPLETED;"
+    NL, "   });"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, ""
+    NL, "  retain_event(block_evt);"
+    NL, "  release_event(block_evt);"
+    NL, ""
+    NL, "  //check block is not started"
+    NL, "  if(res[tid] == BLOCK_SUBMITTED)"
+    NL, "  {"
+    NL, "    clk_event_t my_evt;"
+    NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
+    NL, "    ^{"
+    NL, "       //check block is completed"
+    NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
+    NL, "     });"
+    NL, "    release_event(my_evt);"
+    NL, "  }"
+    NL, ""
+    NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, ""
+    NL, "  release_event(user_evt);"
+    NL, "  release_event(block_evt);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_with_wait_list_and_local_arg[] =
+{
+    NL, "#define LOCAL_MEM_SIZE 10"
+    NL, "#define BLOCK_COMPLETED 1"
+    NL, "#define BLOCK_SUBMITTED 2"
+    NL, "#define BLOCK_STARTED   3"
+    NL, "#define CHECK_SUCCESS   0"
+    NL, ""
+    NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)"
+    NL, "{"
+    NL, "  res[tid] = BLOCK_STARTED;"
+    NL, "  for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
+    NL, "  {"
+    NL, "    tmp[i] = mul * 7 - 21;"
+    NL, "    res[tid] += tmp[i];"
+    NL, "  }"
+    NL, "  if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  clk_event_t user_evt = create_user_event();"
+    NL, ""
+    NL, "  res[tid] = BLOCK_SUBMITTED;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  clk_event_t block_evt;"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, "
+    NL, "    ^(__local void* buf) {"
+    NL, "       block_fn_local_arg(tid, multiplier, res, (__local int*)buf);"
+    NL, "     }, LOCAL_MEM_SIZE*sizeof(int));"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, ""
+    NL, "  retain_event(block_evt);"
+    NL, "  release_event(block_evt);"
+    NL, ""
+    NL, "  //check block is not started"
+    NL, "  if(res[tid] == BLOCK_SUBMITTED)"
+    NL, "  {"
+    NL, "    clk_event_t my_evt;"
+    NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
+    NL, "    ^{"
+    NL, "       //check block is completed"
+    NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
+    NL, "     });"
+    NL, "    release_event(my_evt);"
+    NL, "  }"
+    NL, ""
+    NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, ""
+    NL, "  release_event(user_evt);"
+    NL, "  release_event(block_evt);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_get_kernel_work_group_size[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  res[tid] = mul * 7 - 21;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)"
+    NL, "{"
+    NL, "    int multiplier = 3;"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "    void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "    size_t local_work_size = get_kernel_work_group_size(kernelBlock);"
+    NL, "    if (local_work_size <= 0){ res[tid] = -1; return; }"
+    NL, "    size_t global_work_size = local_work_size * 4;"
+    NL, ""
+    NL, "    res[tid] = -1;"
+    NL, "    queue_t q1 = get_default_queue();"
+    NL, "    ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
+    NL, ""
+    NL, "    int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+};
+
+static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  res[tid] = mul * 7 - 21;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)"
+    NL, "{"
+    NL, "    int multiplier = 3;"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "    void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "    size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);"
+    NL, "    if (local_work_size <= 0){ res[tid] = -1; return; }"
+    NL, "    size_t global_work_size = local_work_size * 4;"
+    NL, ""
+    NL, "    res[tid] = -1;"
+    NL, "    queue_t q1 = get_default_queue();"
+    NL, "    ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
+    NL, ""
+    NL, "    int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+};
+
+static const char* enqueue_block_capture_event_profiling_info_after_execution[] =
+{
+    NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
+    NL, ""
+    NL, "__global ulong value[MAX_GWS*2] = {0};"
+    NL, ""
+    NL, "void block_fn(size_t tid, __global int* res)"
+    NL, "{"
+    NL, "    res[tid] = -2;"
+    NL, "}"
+    NL, ""
+    NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)"
+    NL, "{"
+    NL, "    capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
+    NL, ""
+    NL, "    if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] =  0;"
+    NL, "    else                                        res[tid] = -4;"
+    NL, "    release_event(evt);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)"
+    NL, "{"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "    res[tid] = -1;"
+    NL, "    queue_t def_q = get_default_queue();"
+    NL, "    ndrange_t ndrange = ndrange_1D(1);"
+    NL, "    clk_event_t block_evt1;"
+    NL, ""
+    NL, "    void (^kernelBlock)(void)  = ^{ block_fn (tid, res);                   };"
+    NL, ""
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, ""
+    NL, "    void (^checkBlock) (void)  = ^{ check_res(tid, block_evt1, res);      };"
+    NL, ""
+    NL, "    enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_capture_event_profiling_info_before_execution[] =
+{
+    NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
+    NL, ""
+    NL, "__global ulong value[MAX_GWS*2] = {0};"
+    NL, ""
+    NL, "void block_fn(size_t tid, __global int* res)"
+    NL, "{"
+    NL, "    res[tid] = -2;"
+    NL, "}"
+    NL, ""
+    NL, "void check_res(size_t tid, const ulong *value, __global int* res)"
+    NL, "{"
+    NL, "    if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] =  0;"
+    NL, "    else                                        res[tid] = -4;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)"
+    NL, "{"
+    NL, "    int multiplier = 3;"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, "    clk_event_t user_evt = create_user_event();"
+    NL, ""
+    NL, "    res[tid] = -1;"
+    NL, "    queue_t def_q = get_default_queue();"
+    NL, "    ndrange_t ndrange = ndrange_1D(1);"
+    NL, "    clk_event_t block_evt1;"
+    NL, "    clk_event_t block_evt2;"
+    NL, ""
+    NL, "    void (^kernelBlock)(void)  = ^{ block_fn (tid, res);                   };"
+    NL, ""
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, ""
+    NL, "    capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
+    NL, ""
+    NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, ""
+    NL, "    void (^checkBlock) (void)  = ^{ check_res(tid, &value, res);      };"
+    NL, ""
+    NL, "    enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
+    NL, ""
+    NL, "    release_event(user_evt);"
+    NL, "    release_event(block_evt1);"
+    NL, "    release_event(block_evt2);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_with_barrier[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);"
+    NL, "  res[tid] = mul * 7 -21;"
+    NL, "}"
+    NL, ""
+    NL, "void loop_fn(size_t tid, int n, __global int* res)"
+    NL, "{"
+    NL, "  while(n > 0)"
+    NL, "  {"
+    NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
+    NL, "    res[tid] = 0;"
+    NL, "    --n;"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_with_barrier(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  res[tid] = -1;"
+    NL, "  size_t n = 256;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(n);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, ""
+    NL, "  void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };"
+    NL, ""
+    NL, "  enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_marker_with_block_event[] =
+{
+    NL, "#define BLOCK_COMPLETED 1"
+    NL, "#define BLOCK_SUBMITTED 2"
+    NL, "#define CHECK_SUCCESS   0"
+    NL, ""
+    NL, "kernel void enqueue_marker_with_block_event(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  clk_event_t user_evt = create_user_event();"
+    NL, ""
+    NL, "  res[tid] = BLOCK_SUBMITTED;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, ""
+    NL, "  clk_event_t block_evt1;"
+    NL, "  clk_event_t marker_evt;"
+    NL, ""
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,"
+    NL, "  ^{"
+    NL, "     res[tid] = BLOCK_COMPLETED;"
+    NL, "   });"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
+    NL, ""
+    NL, "  enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
+    NL, ""
+    NL, "  retain_event(marker_evt);"
+    NL, "  release_event(marker_evt);"
+    NL, ""
+    NL, "  //check block is not started"
+    NL, "  if(res[tid] == BLOCK_SUBMITTED)"
+    NL, "  {"
+    NL, "    clk_event_t my_evt;"
+    NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
+    NL, "    ^{"
+    NL, "       //check block is completed"
+    NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
+    NL, "     });"
+    NL, "    release_event(my_evt);"
+    NL, "  }"
+    NL, ""
+    NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, ""
+    NL, "  release_event(block_evt1);"
+    NL, "  release_event(marker_evt);"
+    NL, "  release_event(user_evt);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_marker_with_user_event[] =
+{
+    NL, "#define BLOCK_COMPLETED 1"
+    NL, "#define BLOCK_SUBMITTED 2"
+    NL, "#define CHECK_SUCCESS   0"
+    NL, ""
+    NL, "kernel void enqueue_marker_with_user_event(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  uint multiplier = 7;"
+    NL, ""
+    NL, "  clk_event_t user_evt = create_user_event();"
+    NL, ""
+    NL, "  res[tid] = BLOCK_SUBMITTED;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, ""
+    NL, "  clk_event_t marker_evt;"
+    NL, "  clk_event_t block_evt;"
+    NL, ""
+    NL, "  int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, ""
+    NL, "  retain_event(marker_evt);"
+    NL, "  release_event(marker_evt);"
+    NL, ""
+    NL, "  enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, "
+    NL, "  ^{"
+    NL, "     if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;"
+    NL, "   });"
+    NL, ""
+    NL, "  //check block is not started"
+    NL, "  if(res[tid] != BLOCK_SUBMITTED)  { res[tid] = -2; return; }"
+    NL, ""
+    NL, "  set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, ""
+    NL, "  release_event(block_evt);"
+    NL, "  release_event(marker_evt);"
+    NL, "  release_event(user_evt);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_marker_with_mixed_events[] =
+{
+    NL, "#define BLOCK_COMPLETED 1"
+    NL, "#define BLOCK_SUBMITTED 2"
+    NL, "#define CHECK_SUCCESS   0"
+    NL, ""
+    NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  clk_event_t mix_ev[2];"
+    NL, "  mix_ev[0] = create_user_event();"
+    NL, ""
+    NL, "  res[tid] = BLOCK_SUBMITTED;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, ""
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],"
+    NL, "  ^{"
+    NL, "     res[tid] = BLOCK_COMPLETED;"
+    NL, "   });"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
+    NL, ""
+    NL, "  clk_event_t marker_evt;"
+    NL, ""
+    NL, "  enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
+    NL, ""
+    NL, "  retain_event(marker_evt);"
+    NL, "  release_event(marker_evt);"
+    NL, ""
+    NL, "  //check block is not started"
+    NL, "  if(res[tid] == BLOCK_SUBMITTED)"
+    NL, "  {"
+    NL, "    clk_event_t my_evt;"
+    NL, "    enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
+    NL, "    ^{"
+    NL, "       //check block is completed"
+    NL, "       if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
+    NL, "     });"
+    NL, "    release_event(my_evt);"
+    NL, "  }"
+    NL, ""
+    NL, "  set_user_event_status(mix_ev[0], CL_COMPLETE);"
+    NL, ""
+    NL, "  release_event(mix_ev[1]);"
+    NL, "  release_event(marker_evt);"
+    NL, "  release_event(mix_ev[0]);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_with_mixed_events[] =
+{
+    NL, "kernel void enqueue_block_with_mixed_events(__global int* res)"
+    NL, "{"
+    NL, "  int enq_res;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  clk_event_t mix_ev[3];"
+    NL, "  mix_ev[0] = create_user_event();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  res[tid] = -2;"
+    NL, ""
+    NL, "  enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, ""
+    NL, "  enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
+    NL, ""
+    NL, "  enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }"
+    NL, ""
+    NL, "  set_user_event_status(mix_ev[0], CL_COMPLETE);"
+    NL, ""
+    NL, "  release_event(mix_ev[0]);"
+    NL, "  release_event(mix_ev[1]);"
+    NL, "  release_event(mix_ev[2]);"
+    NL, "}"
+    NL
+};
+
+static const kernel_src sources_enqueue_block[] =
+{
+    KERNEL(enqueue_simple_block),
+    // Block with local mem
+    KERNEL(enqueue_block_with_local_arg1),
+    KERNEL(enqueue_block_with_local_arg2),
+    KERNEL(enqueue_block_with_wait_list),
+    KERNEL(enqueue_block_with_wait_list_and_local_arg),
+    // WG size built-ins
+    KERNEL(enqueue_block_get_kernel_work_group_size),
+    KERNEL(enqueue_block_get_kernel_preferred_work_group_size_multiple),
+    // Event profiling info
+    KERNEL(enqueue_block_capture_event_profiling_info_after_execution),
+    KERNEL(enqueue_block_capture_event_profiling_info_before_execution),
+    // Marker
+    KERNEL(enqueue_marker_with_block_event),
+    KERNEL(enqueue_marker_with_user_event),
+    // Mixed events
+    KERNEL(enqueue_marker_with_mixed_events),
+    KERNEL(enqueue_block_with_mixed_events),
+    // Barrier
+    KERNEL(enqueue_block_with_barrier),
+
+};
+static const size_t num_kernels_enqueue_block = arr_size(sources_enqueue_block);
+
+static int check_kernel_results(cl_int* results, cl_int len)
+{
+    for(cl_int i = 0; i < len; ++i)
+    {
+        if(results[i] != 0) return i;
+    }
+    return -1;
+}
+
+int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint i;
+    cl_int n, err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    cl_int kernel_results[MAX_GWS] = {0};
+
+    size_t ret_len;
+    cl_uint max_queues = 1;
+    cl_uint maxQueueSize = 0;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    size_t max_local_size = 1;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT|CL_QUEUE_PROFILING_ENABLE,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    size_t global_size = MAX_GWS;
+    size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
+    if(gWimpyMode)
+    {
+        global_size = 4;
+        local_size = 2;
+    }
+
+    size_t failCnt = 0;
+    for(i = 0; i < num_kernels_enqueue_block; ++i)
+    {
+        if (!gKernelName.empty() && gKernelName != sources_enqueue_block[i].kernel_name)
+            continue;
+
+        log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block[i].kernel_name, i + 1, num_kernels_enqueue_block);
+        err_ret = run_n_kernel_args(context, queue, sources_enqueue_block[i].lines, sources_enqueue_block[i].num_lines, sources_enqueue_block[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), 0, NULL);
+        if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block[i].kernel_name)) { ++failCnt; res = -1; }
+        else if((n = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_enqueue_block[i].kernel_name, n, kernel_results[n])) res = -1;
+        else log_info("'%s' kernel is OK.\n", sources_enqueue_block[i].kernel_name);
+    }
+
+    if (failCnt > 0)
+    {
+      log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_enqueue_block);
+    }
+
+    return res;
+}
+
+
+
+#endif
+
+
diff --git a/test_conformance/device_execution/enqueue_flags.cpp b/test_conformance/device_execution/enqueue_flags.cpp
new file mode 100644
index 00000000..d3858c66
--- /dev/null
+++ b/test_conformance/device_execution/enqueue_flags.cpp
@@ -0,0 +1,756 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+#ifdef CL_VERSION_2_0
+extern int gWimpyMode;
+#define BITS_DEPTH 28
+
+static const char* enqueue_flags_wait_kernel_simple[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
+    NL, "{"
+    NL, "  int val = 0;"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    if((index + 1) < BITS_DEPTH)"
+    NL, "    {"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
+    NL, "      ^{"
+    NL, "         block_fn(array, index + 1, ls, gs, res);"
+    NL, "       });"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      size_t gid = get_group_id(0);"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_kernel_simple(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t ls  = get_local_size(0);"
+    NL, "  size_t gs  = get_global_size(0);"
+    NL, "  size_t tid  = get_global_id(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
+    NL, "    ^{"
+    NL, "       block_fn(array, 1, ls, gs, res);"
+    NL, "     });"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_flags_wait_kernel_event[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res)"
+    NL, "{"
+    NL, "  int val = 0;"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    if((index + 1) < BITS_DEPTH)"
+    NL, "    {"
+    NL, "      clk_event_t block_evt;"
+    NL, "      clk_event_t user_evt = create_user_event();"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "      ^{"
+    NL, "         block_fn(array, index + 1, ls, gs, res);"
+    NL, "       });"
+    NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "      release_event(user_evt);"
+    NL, "      release_event(block_evt);"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      size_t gid = get_group_id(0);"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_kernel_event(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t tid  = get_global_id(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, "  size_t ls = get_local_size(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    clk_event_t block_evt;"
+    NL, "    clk_event_t user_evt = create_user_event();"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "    ^{"
+    NL, "       block_fn(array, 1, ls, gs, res);"
+    NL, "     });"
+    NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "    release_event(user_evt);"
+    NL, "    release_event(block_evt);"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_flags_wait_kernel_local[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
+    NL, "{"
+    NL, "  int val = 0;"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
+    NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "  for(int i = 0; i < ls; i++)"
+    NL, "  {"
+    NL, "    int id = gid * ls + i;"
+    NL, "    val += sub_array[i];"
+    NL, "    val -= (tid == id)? 0: (id + index - 1);"
+    NL, "  }"
+    NL, "  array[index * gs + tid] = val + 1;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    if((index + 1) < BITS_DEPTH)"
+    NL, "    {"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
+    NL, "      ^(__local void* sub_array){"
+    NL, "        block_fn(array, index + 1, ls, gs, res, sub_array);"
+    NL, "      }, ls * sizeof(int));"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_kernel_local(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t ls  = get_local_size(0);"
+    NL, "  size_t gs  = get_global_size(0);"
+    NL, "  size_t tid  = get_global_id(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), "
+    NL, "    ^(__local void* sub_array){"
+    NL, "      block_fn(array, 1, ls, gs, res, sub_array);"
+    NL, "    }, ls * sizeof(int));"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_flags_wait_kernel_event_local[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, size_t gs, __global int* res, __local int* sub_array)"
+    NL, "{"
+    NL, "  int val = 0;"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
+    NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "  for(int i = 0; i < ls; i++)"
+    NL, "  {"
+    NL, "    int id = gid * ls + i;"
+    NL, "    val += sub_array[i];"
+    NL, "    val -= (tid == id)? 0: (id + index - 1);"
+    NL, "  }"
+    NL, "  array[index * gs + tid] = val + 1;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    if((index + 1) < BITS_DEPTH)"
+    NL, "    {"
+    NL, "      clk_event_t block_evt;"
+    NL, "      clk_event_t user_evt = create_user_event();"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "      ^(__local void* sub_array){"
+    NL, "        block_fn(array, index + 1, ls, gs, res, sub_array);"
+    NL, "      }, ls * sizeof(int));"
+    NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "      release_event(user_evt);"
+    NL, "      release_event(block_evt);"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_kernel_event_local(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t ls  = get_local_size(0);"
+    NL, "  size_t gs  = get_global_size(0);"
+    NL, "  size_t tid  = get_global_id(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(tid == 0)"
+    NL, "  {"
+    NL, "    clk_event_t block_evt;"
+    NL, "    clk_event_t user_evt = create_user_event();"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "    ^(__local void* sub_array){"
+    NL, "      block_fn(array, 1, ls, gs, res, sub_array);"
+    NL, "    }, ls * sizeof(int));"
+    NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "    release_event(user_evt);"
+    NL, "    release_event(block_evt);"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_flags_wait_work_group_simple[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  "
+    NL, "  if(gid == group_id)"
+    NL, "  {"
+    NL, "    array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
+    NL, "   "
+    NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
+    NL, "    {"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
+    NL, "      ^{"
+    NL, "         block_fn(array, index + 1, ls, res, gid);"
+    NL, "       });"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_work_group_simple(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t ls = get_local_size(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(lid == 0)"
+    NL, "  {"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
+    NL, "    ^{"
+    NL, "       block_fn(array, 1, ls, res, gid);"
+    NL, "     });"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_flags_wait_work_group_event[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, int group_id)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  "
+    NL, "  if(gid == group_id)"
+    NL, "  {"
+    NL, "    array[index * gs + tid] = array[(index - 1) * gs + tid] + 1;"
+    NL, "   "
+    NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
+    NL, "    {"
+    NL, "      clk_event_t block_evt;"
+    NL, "      clk_event_t user_evt = create_user_event();"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "      ^{"
+    NL, "         block_fn(array, index + 1, ls, res, gid);"
+    NL, "       });"
+    NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "      release_event(user_evt);"
+    NL, "      release_event(block_evt);"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_work_group_event(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t ls = get_local_size(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(lid == 0)"
+    NL, "  {"
+    NL, "    clk_event_t block_evt;"
+    NL, "    clk_event_t user_evt = create_user_event();"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "    ^{"
+    NL, "       block_fn(array, 1, ls, res, gid);"
+    NL, "     });"
+    NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "    release_event(user_evt);"
+    NL, "    release_event(block_evt);"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_flags_wait_work_group_local[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
+    NL, "{"
+    NL, "  int val = 0;"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, ""
+    NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
+    NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "  for(int i = 0; i < ls; i++)"
+    NL, "  {"
+    NL, "    int id = gid * ls + i;"
+    NL, "    val += sub_array[i];"
+    NL, "    val -= (tid == id)? 0: (id + index - 1);"
+    NL, "  }"
+    NL, " "
+    NL, "  if(gid == group_id)"
+    NL, "  {"
+    NL, "    array[index * gs + tid] = val + 1;"
+    NL, " "
+    NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
+    NL, "    {"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
+    NL, "      ^(__local void* sub_array){"
+    NL, "        block_fn(array, index + 1, ls, res, sub_array, gid);"
+    NL, "      }, ls * sizeof(int));"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_work_group_local(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t ls  = get_local_size(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, "  size_t tid  = get_global_id(0);"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(lid == 0)"
+    NL, "  {"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), "
+    NL, "    ^(__local void* sub_array){"
+    NL, "      block_fn(array, 1, ls, res, sub_array, gid);"
+    NL, "    }, ls * sizeof(int));"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_flags_wait_work_group_event_local[] =
+{
+    NL, "#define BITS_DEPTH " STRINGIFY_VALUE(BITS_DEPTH)
+    NL, ""
+    NL, "void block_fn(__global int* array, int index, size_t ls, __global int* res, __local int* sub_array, int group_id)"
+    NL, "{"
+    NL, "  int val = 0;"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid = get_local_id(0);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  size_t gs = get_global_size(0);"
+    NL, ""
+    NL, "  sub_array[lid] = array[(index - 1) * gs + tid];"
+    NL, "  barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "  for(int i = 0; i < ls; i++)"
+    NL, "  {"
+    NL, "    int id = gid * ls + i;"
+    NL, "    val += sub_array[i];"
+    NL, "    val -= (tid == id)? 0: (id + index - 1);"
+    NL, "  }"
+    NL, ""
+    NL, "  if(gid == group_id)"
+    NL, "  {"
+    NL, "    array[index * gs + tid] = val + 1;"
+    NL, " "
+    NL, "    if((index + 1) < BITS_DEPTH && lid == 0)"
+    NL, "    {"
+    NL, "      clk_event_t block_evt;"
+    NL, "      clk_event_t user_evt = create_user_event();"
+    NL, "      enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "      ^(__local void* sub_array){"
+    NL, "        block_fn(array, index + 1, ls, res, sub_array, gid);"
+    NL, "      }, ls * sizeof(int));"
+    NL, "      set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "      release_event(user_evt);"
+    NL, "      release_event(block_evt);"
+    NL, "    }"
+    NL, "  }"
+    NL, ""
+    NL, "  if((index + 1) == BITS_DEPTH)"
+    NL, "  {"
+    NL, "    barrier(CLK_LOCAL_MEM_FENCE);"
+    NL, ""
+    NL, "    if(lid == 0)"
+    NL, "    {"
+    NL, "      res[gid] = 1;"
+    NL, ""
+    NL, "      for(int j = 0; j < BITS_DEPTH; j++)"
+    NL, "      {"
+    NL, "        for(int i = 0; i < ls; i++)"
+    NL, "        {"
+    NL, "          if(array[j * gs + ls * gid + i] != ((ls * gid + i) + j))"
+    NL, "          {"
+    NL, "            res[gid] = 2;"
+    NL, "            break;"
+    NL, "          }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_flags_wait_work_group_event_local(__global int* res, __global int* array)"
+    NL, "{"
+    NL, "  size_t ls  = get_local_size(0);"
+    NL, "  size_t gs  = get_global_size(0);"
+    NL, "  size_t tid  = get_global_id(0);"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t lid  = get_local_id(0);"
+    NL, ""
+    NL, "  res[tid] = 0;"
+    NL, "  array[tid] = tid;"
+    NL, ""
+    NL, "  if(lid == 0)"
+    NL, "  {"
+    NL, "    clk_event_t block_evt;"
+    NL, "    clk_event_t user_evt = create_user_event();"
+    NL, "    enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP, ndrange_1D(gs, ls), 1, &user_evt, &block_evt, "
+    NL, "    ^(__local void* sub_array){"
+    NL, "      block_fn(array, 1, ls, res, sub_array, gid);"
+    NL, "    }, ls * sizeof(int));"
+    NL, "    set_user_event_status(user_evt, CL_COMPLETE);"
+    NL, "    release_event(user_evt);"
+    NL, "    release_event(block_evt);"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const kernel_src sources_enqueue_block_flags[] =
+{
+    KERNEL(enqueue_flags_wait_kernel_simple),
+    KERNEL(enqueue_flags_wait_kernel_event),
+    KERNEL(enqueue_flags_wait_kernel_local),
+    KERNEL(enqueue_flags_wait_kernel_event_local),
+    KERNEL(enqueue_flags_wait_work_group_simple),
+    KERNEL(enqueue_flags_wait_work_group_event),
+    KERNEL(enqueue_flags_wait_work_group_local),
+    KERNEL(enqueue_flags_wait_work_group_event_local)
+};
+static const size_t num_enqueue_block_flags = arr_size(sources_enqueue_block_flags);
+
+
+int test_enqueue_flags(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint i;
+    cl_int err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    cl_int kernel_results[MAX_GWS] = { -1 };
+    int buff[MAX_GWS * BITS_DEPTH] = { 0 };
+
+    size_t ret_len;
+    size_t max_local_size = 1;
+    cl_uint maxQueueSize = 0;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    size_t global_size = MAX_GWS;
+    size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
+    if(gWimpyMode)
+    {
+        global_size = 4;
+        local_size = 2;
+    }
+
+    size_t failCnt = 0;
+    for(i = 0; i < num_enqueue_block_flags; ++i)
+    {
+        if (!gKernelName.empty() && gKernelName != sources_enqueue_block_flags[i].kernel_name)
+            continue;
+
+        log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_block_flags[i].kernel_name, i + 1, num_enqueue_block_flags);
+
+        clMemWrapper mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, global_size * BITS_DEPTH * sizeof(cl_int), buff, &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+
+        kernel_arg args[] =
+        {
+            { sizeof(cl_mem),  &mem }
+        };
+
+        err_ret = run_n_kernel_args(context, queue, sources_enqueue_block_flags[i].lines, sources_enqueue_block_flags[i].num_lines, sources_enqueue_block_flags[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), arr_size(args), args);
+        if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_block_flags[i].kernel_name)) { ++failCnt; res = -1; }
+        else
+        {
+            int r = 0;
+            for (int j=0; j<global_size; j++)
+            {
+                if (kernel_results[j] != 1 && j < (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 1", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
+                {
+                    r = -1;
+                    break;
+                }
+                else if (kernel_results[j] != 0 && j >= (global_size / local_size) && check_error(-1, "'%s' kernel result[idx: %d] validation failed (test) %d != (expected) 0", sources_enqueue_block_flags[i].kernel_name, j, kernel_results[j]))
+                {
+                    r = -1;
+                    break;
+                }
+            }
+            if(r == 0) log_info("'%s' kernel is OK.\n", sources_enqueue_block_flags[i].kernel_name);
+            else res = -1;
+        }
+    }
+
+    if (failCnt > 0)
+    {
+        log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_enqueue_block_flags);
+    }
+
+    return res;
+}
+
+
+
+#endif
diff --git a/test_conformance/device_execution/enqueue_multi_queue.cpp b/test_conformance/device_execution/enqueue_multi_queue.cpp
new file mode 100644
index 00000000..76d69168
--- /dev/null
+++ b/test_conformance/device_execution/enqueue_multi_queue.cpp
@@ -0,0 +1,198 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+
+
+#ifdef CL_VERSION_2_0
+extern int gWimpyMode;
+static const char enqueue_block_multi_queue[] =
+    NL "#define BLOCK_COMPLETED 0"
+    NL "#define BLOCK_SUBMITTED 1"
+    NL ""
+    NL "kernel void enqueue_block_multi_queue(__global int* res, __global int* buff %s)"
+    NL "{"
+    NL "  uint i, n = %d;"
+    NL "  clk_event_t block_evt[%d];"
+    NL "  queue_t q[] = { %s };"
+    NL "  queue_t *queue = q;"
+    NL ""
+    NL "  clk_event_t user_evt = create_user_event();"
+    NL "  queue_t def_q = get_default_queue();"
+    NL "  size_t tid = get_global_id(0);"
+    NL "  res[tid] = -1;"
+    NL "  __global int* b = buff + tid*n;"
+    NL "  for(i=0; i<n; ++i) b[i] = -1;"
+    NL ""
+    NL "  ndrange_t ndrange = ndrange_1D(1);"
+    NL "  for(i = 0; i < n; ++i)"
+    NL "  {"
+    NL "    b[i] = BLOCK_SUBMITTED;"
+    NL "    int enq_res = enqueue_kernel(queue[i], CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt[i], "
+    NL "    ^{"
+    NL "       b[i] = BLOCK_COMPLETED;"
+    NL "     });"
+    NL "    if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
+    NL "  }"
+    NL ""
+    NL "  // check blocks are not started"
+    NL "  for(i = 0; i < n; ++i)"
+    NL "  {"
+    NL "    if(b[i] != BLOCK_SUBMITTED) { res[tid] = -5; return; }"
+    NL "  }"
+    NL ""
+    NL "  res[tid] = BLOCK_SUBMITTED;"
+    NL "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, n, block_evt, NULL, "
+    NL "  ^{"
+    NL "     uint k;"
+    NL "     // check blocks are finished"
+    NL "     for(k = 0; k < n; ++k)"
+    NL "     {"
+    NL "       if(b[k] != BLOCK_COMPLETED) { res[tid] = -3; return; }"
+    NL "     }"
+    NL "     res[tid] = BLOCK_COMPLETED;"
+    NL "   });"
+    NL "  for(i = 0; i < n; ++i)"
+    NL "  {"
+    NL "    release_event(block_evt[i]);"
+    NL "  }"
+    NL "  if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }"
+    NL ""
+    NL "  set_user_event_status(user_evt, CL_COMPLETE);"
+    NL "  release_event(user_evt);"
+    NL "}";
+
+
+static int check_kernel_results(cl_int* results, cl_int len)
+{
+    for(cl_int i = 0; i < len; ++i)
+    {
+        if(results[i] != 0) return i;
+    }
+    return -1;
+}
+
+int test_enqueue_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint i;
+    cl_int k, err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    cl_int kernel_results[MAX_GWS] = {0};
+
+    size_t ret_len;
+    cl_uint n, max_queues = 1;
+    cl_uint maxQueueSize = 0;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    size_t max_local_size = 1;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    if(max_queues > 1)
+    {
+        n = (max_queues > MAX_QUEUES) ? MAX_QUEUES : max_queues-1;
+        clMemWrapper mem, buff, evt;
+        std::vector<clCommandQueueWrapper> queues(n);
+        std::vector<cl_command_queue> q(n);
+        cl_queue_properties queue_prop[] =
+        {
+            CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE,
+            CL_QUEUE_SIZE, maxQueueSize,
+            0
+        };
+
+        for(i = 0; i < n; ++i)
+        {
+            queues[i] = clCreateCommandQueueWithProperties(context, device, queue_prop, &err_ret);
+            test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE) failed");
+            q[i] = queues[i];
+        }
+
+        size_t global_size = MAX_GWS;
+        size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
+        if(gWimpyMode)
+        {
+            global_size = 4;
+            local_size = 2;
+        }
+
+        evt = clCreateBuffer(context, CL_MEM_READ_WRITE, n * sizeof(cl_event), NULL, &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+
+        mem = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, n * sizeof(cl_command_queue), &q[0], &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+
+        buff = clCreateBuffer(context, CL_MEM_READ_WRITE, global_size * n * sizeof(cl_int), NULL, &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+
+        // Prepare CL source
+        char cl[65536] = { 0 };
+        char q_args[16384] = { 0 };
+        char q_list[8192] = { 0 };
+
+        kernel_arg arg_res = { sizeof(cl_mem), &buff };
+
+        std::vector<kernel_arg> args(n+1);
+        args[0] = arg_res;
+
+        for(i = 0; i < n; ++i)
+        {
+            snprintf(q_args+strlen(q_args), sizeof(q_args)-strlen(q_args)-1, ", queue_t q%d", i);
+            snprintf(q_list+strlen(q_list), sizeof(q_list)-strlen(q_list)-1, "q%d, ", i);
+            kernel_arg arg_q = { sizeof(cl_command_queue), &q[i] };
+            args[i+1] = arg_q;
+        }
+
+        snprintf(cl, sizeof(cl)-1, enqueue_block_multi_queue, q_args, n, n, q_list);
+        const char *source = cl;
+
+        err_ret = run_n_kernel_args(context, queue, &source, 1, "enqueue_block_multi_queue", local_size, global_size, kernel_results, sizeof(kernel_results), args.size(), &args[0]);
+        if(check_error(err_ret, "'%s' kernel execution failed", "enqueue_block_multi_queue")) res = -1;
+        else if((k = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", "enqueue_block_multi_queue", k, kernel_results[k])) res = -1;
+        else log_info("'%s' kernel is OK.\n", "enqueue_block_multi_queue");
+    }
+    return res;
+}
+
+
+
+#endif
+
+
diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp
new file mode 100644
index 00000000..0055b618
--- /dev/null
+++ b/test_conformance/device_execution/enqueue_ndrange.cpp
@@ -0,0 +1,681 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+#ifdef CL_VERSION_2_0
+extern int gWimpyMode;
+static const char* helper_ndrange_1d_glo[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    ndrange_t ndrange = ndrange_1D(glob_size_arr[i]);"
+    NL, "    int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* helper_ndrange_1d_loc[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      if (glob_size_arr[i] >= loc_size_arr[k])"
+    NL, "      {"
+    NL, "        ndrange_t ndrange = ndrange_1D(glob_size_arr[i], loc_size_arr[k]);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* helper_ndrange_1d_ofs[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_1d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global atomic_uint* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        if (glob_size_arr[i] >= loc_size_arr[k])"
+    NL, "        {"
+    NL, "          ndrange_t ndrange = ndrange_1D(ofs_arr[l], glob_size_arr[i], loc_size_arr[k]);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* helper_ndrange_2d_glo[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    size_t glob_size[2] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
+    NL, "    ndrange_t ndrange = ndrange_2D(glob_size);"
+    NL, "    int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* helper_ndrange_2d_loc[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
+    NL, "      {"
+    NL, "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n] };"
+    NL, "        size_t loc_size[] = { 1, loc_size_arr[k] };"
+    NL, ""
+    NL, "        ndrange_t ndrange = ndrange_2D(glob_size, loc_size);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+
+static const char* helper_ndrange_2d_ofs[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_2d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        if (glob_size_arr[(i + 1) % n] >= loc_size_arr[k])"
+    NL, "        {"
+    NL, "          size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n]};"
+    NL, "          size_t loc_size[] = { 1, loc_size_arr[k] };"
+    NL, "          size_t ofs[] = { ofs_arr[l], ofs_arr[(l + 1) % n] };"
+    NL, ""
+    NL, "          ndrange_t ndrange = ndrange_2D(ofs,glob_size,loc_size);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+
+static const char* helper_ndrange_3d_glo[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_glo(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int i = 0; i < n; i++)"
+    NL, "  {"
+    NL, "    uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "    if (global_work_size <= (len * len))"
+    NL, "    {"
+    NL, "      size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
+    NL, "      ndrange_t ndrange = ndrange_3D(glob_size);"
+    NL, "      int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+
+static const char* helper_ndrange_3d_loc[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[get_global_linear_id() % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_loc(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int k = 0; k < n; k++)"
+    NL, "  {"
+    NL, "    for(int i = 0; i < n; i++)"
+    NL, "    {"
+    NL, "      uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "      if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
+    NL, "      {"
+    NL, "        size_t glob_size[] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n] };"
+    NL, "        size_t loc_size[] = { 1, 1, loc_size_arr[k] };"
+    NL, "        ndrange_t ndrange = ndrange_3D(glob_size,loc_size);"
+    NL, "        int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      "
+    NL, "        if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* helper_ndrange_3d_ofs[] =
+{
+    NL, "void block_fn(int len, __global atomic_uint* val)"
+    NL, "{"
+    NL, "  atomic_fetch_add_explicit(&val[(get_global_offset(2) * get_global_size(0) * get_global_size(1) + get_global_offset(1) * get_global_size(0) + get_global_offset(0) + get_global_linear_id()) % len], 1, memory_order_relaxed, memory_scope_device);"
+    NL, "}"
+    NL, ""
+    NL, "kernel void helper_ndrange_3d_ofs(__global int* res, uint n, uint len, __global uint* glob_size_arr, __global uint* loc_size_arr, __global int* val,  __global uint* ofs_arr)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(len, val); };"
+    NL, ""
+    NL, "  for(int l = 0; l < n; l++)"
+    NL, "  {"
+    NL, "    for(int k = 0; k < n; k++)"
+    NL, "    {"
+    NL, "      for(int i = 0; i < n; i++)"
+    NL, "      {"
+    NL, "        uint global_work_size = glob_size_arr[i] *  glob_size_arr[(i + 1) % n] * glob_size_arr[(i + 2) % n];"
+    NL, "        if (glob_size_arr[(i + 2) % n] >= loc_size_arr[k] && global_work_size <= (len * len))"
+    NL, "        {"
+    NL, "          size_t glob_size[3] = { glob_size_arr[i], glob_size_arr[(i + 1) % n], glob_size_arr[(i + 2) % n]};"
+    NL, "          size_t loc_size[3] = { 1, 1, loc_size_arr[k] };"
+    NL, "          size_t ofs[3] = { ofs_arr[l], ofs_arr[(l + 1) % n], ofs_arr[(l + 2) % n] };"
+    NL, "          ndrange_t ndrange = ndrange_3D(ofs,glob_size,loc_size);"
+    NL, "          int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "          if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "        }"
+    NL, "      }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const kernel_src_dim_check sources_ndrange_Xd[] =
+{
+    { KERNEL(helper_ndrange_1d_glo), 1, CL_FALSE, CL_FALSE},
+    { KERNEL(helper_ndrange_1d_loc), 1, CL_TRUE, CL_FALSE},
+    { KERNEL(helper_ndrange_1d_ofs), 1, CL_TRUE, CL_TRUE},
+    { KERNEL(helper_ndrange_2d_glo), 2, CL_FALSE, CL_FALSE},
+    { KERNEL(helper_ndrange_2d_loc), 2, CL_TRUE, CL_FALSE},
+    { KERNEL(helper_ndrange_2d_ofs), 2, CL_TRUE, CL_TRUE},
+    { KERNEL(helper_ndrange_3d_glo), 3, CL_FALSE, CL_FALSE},
+    { KERNEL(helper_ndrange_3d_loc), 3, CL_TRUE, CL_FALSE},
+    { KERNEL(helper_ndrange_3d_ofs), 3, CL_TRUE, CL_TRUE},
+};
+static const size_t num_kernels_ndrange_Xd = arr_size(sources_ndrange_Xd);
+
+static int check_kernel_results(cl_int* results, cl_int len)
+{
+    for(cl_int i = 0; i < len; ++i)
+    {
+        if(results[i] != 0) return i;
+    }
+    return -1;
+}
+
+void generate_reference_1D(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr)
+{
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t w = 0; w < glob_size_arr[g]; ++w)
+        {
+            ++reference_results[w];
+        }
+    }
+}
+
+void generate_reference_1D_local(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr)
+{
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t l = 0; l < loc_size_arr.size(); ++l)
+        {
+            if (glob_size_arr[g] >= loc_size_arr[l])
+            {
+                for (size_t w = 0; w < glob_size_arr[g]; ++w)
+                {
+                    ++reference_results[w];
+                }
+            }
+        }
+    }
+}
+
+void generate_reference_1D_offset(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_uint len)
+{
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t l = 0; l < loc_size_arr.size(); ++l)
+        {
+            if (glob_size_arr[g] >= loc_size_arr[l])
+            {
+                for (size_t o = 0; o < offset.size(); ++o)
+                {
+                    for (size_t w = 0; w < glob_size_arr[g]; ++w)
+                    {
+                        ++reference_results[(offset[o] + w) % len];
+                    }
+                }
+            }
+        }
+    }
+}
+
+void generate_reference_2D(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, cl_uint len)
+{
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t h = 0; h < glob_size_arr[(g + 1) % glob_size_arr.size()]; ++h)
+        {
+            for (size_t w = 0; w < glob_size_arr[g]; ++w)
+            {
+                ++reference_results[(h * glob_size_arr[g] + w) % len];
+            }
+        }
+    }
+}
+
+void generate_reference_2D_local(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, cl_uint len)
+{
+    size_t n = glob_size_arr.size();
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t l = 0; l < loc_size_arr.size(); ++l)
+        {
+            if (glob_size_arr[(g + 1) % n] >= loc_size_arr[l])
+            {
+                for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
+                {
+                    for (size_t w = 0; w < glob_size_arr[g]; ++w)
+                    {
+                        ++reference_results[(h * glob_size_arr[g] + w) % len];
+                    }
+                }
+            }
+        }
+    }
+}
+
+void generate_reference_2D_offset(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_uint len)
+{
+    size_t n = glob_size_arr.size();
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t l = 0; l < loc_size_arr.size(); ++l)
+        {
+            if (glob_size_arr[(g + 1) % n] >= loc_size_arr[l])
+            {
+                for (size_t o = 0; o < offset.size(); ++o)
+                {
+                    for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
+                    {
+                        for (size_t w = 0; w < glob_size_arr[g]; ++w)
+                        {
+                            ++reference_results[(glob_size_arr[g] * offset[(o + 1) % n] + offset[o] + h * glob_size_arr[g] + w) % len];
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void generate_reference_3D(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, cl_uint len)
+{
+    size_t n = glob_size_arr.size();
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        size_t global_work_size = glob_size_arr[(g + 2) % n] * glob_size_arr[(g + 1) % n] * glob_size_arr[g];
+        if(global_work_size <= (len * len))
+        {
+            for (size_t d = 0; d < glob_size_arr[(g + 2) % n]; ++d)
+            {
+                for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
+                {
+                    for (size_t w = 0; w < glob_size_arr[g]; ++w)
+                    {
+                        ++reference_results[(d * glob_size_arr[(g + 1) % n] * glob_size_arr[g] + h * glob_size_arr[g] + w) % len];
+                    }
+                }
+            }
+        }
+    }
+}
+
+void generate_reference_3D_local(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, cl_uint len)
+{
+    size_t n = glob_size_arr.size();
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t l = 0; l < loc_size_arr.size(); ++l)
+        {
+            size_t global_work_size = glob_size_arr[(g + 2) % n] * glob_size_arr[(g + 1) % n] * glob_size_arr[g];
+            if (glob_size_arr[(g + 2) % n] >= loc_size_arr[l] && global_work_size <= (len * len))
+            {
+                for (size_t d = 0; d < glob_size_arr[(g + 2) % n]; ++d)
+                {
+                    for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
+                    {
+                        for (size_t w = 0; w < glob_size_arr[g]; ++w)
+                        {
+                            ++reference_results[(d * glob_size_arr[(g + 1) % n] * glob_size_arr[g] + h * glob_size_arr[g] + w) % len];
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+void generate_reference_3D_offset(std::vector<cl_int> &reference_results, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_uint len)
+{
+    size_t n = glob_size_arr.size();
+    for (size_t g = 0; g < glob_size_arr.size(); ++g)
+    {
+        for (size_t l = 0; l < loc_size_arr.size(); ++l)
+        {
+            size_t global_work_size = glob_size_arr[(g + 2) % n] * glob_size_arr[(g + 1) % n] * glob_size_arr[g];
+            if (glob_size_arr[(g + 2) % n] >= loc_size_arr[l] && global_work_size <= (len * len))
+            {
+                for (size_t o = 0; o < offset.size(); ++o)
+                {
+                    for (size_t d = 0; d < glob_size_arr[(g + 2) % n]; ++d)
+                    {
+                        for (size_t h = 0; h < glob_size_arr[(g + 1) % n]; ++h)
+                        {
+                            for (size_t w = 0; w < glob_size_arr[g]; ++w)
+                            {
+                                ++reference_results[(glob_size_arr[g] * glob_size_arr[(g + 1) % n] * offset[(o + 2) % n] + glob_size_arr[g] * offset[(o + 1) % n] + offset[o] + d * glob_size_arr[(g + 1) % n] * glob_size_arr[g] + h * glob_size_arr[g] + w) % len];
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int check_kernel_results(cl_int* results, cl_int len, std::vector<cl_uint> &glob_size_arr, std::vector<cl_uint> &loc_size_arr, std::vector<cl_uint> &offset, cl_int dim, cl_bool use_local, cl_bool use_offset)
+{
+    std::vector<cl_int> reference_results(len, 0);
+    switch (dim)
+    {
+    case 1:
+        if (use_local == CL_FALSE)
+        {
+            generate_reference_1D(reference_results, glob_size_arr);
+        }
+        else if(use_local == CL_TRUE && use_offset == CL_FALSE)
+        {
+            generate_reference_1D_local(reference_results, glob_size_arr, loc_size_arr);
+        }
+        else
+        {
+            generate_reference_1D_offset(reference_results, glob_size_arr, loc_size_arr, offset, len);
+        }
+        break;
+    case 2:
+        if (use_local == CL_FALSE)
+        {
+            generate_reference_2D(reference_results, glob_size_arr, len);
+        }
+        else if (use_local == CL_TRUE && use_offset == CL_FALSE)
+        {
+            generate_reference_2D_local(reference_results, glob_size_arr, loc_size_arr, len);
+        }
+        else
+        {
+            generate_reference_2D_offset(reference_results, glob_size_arr, loc_size_arr, offset, len);
+        }
+        break;
+    case 3:
+        if (use_local == CL_FALSE)
+        {
+            generate_reference_3D(reference_results, glob_size_arr, len);
+        }
+        else if (use_local == CL_TRUE && use_offset == CL_FALSE)
+        {
+            generate_reference_3D_local(reference_results, glob_size_arr, loc_size_arr, len);
+        }
+        else
+        {
+            generate_reference_3D_offset(reference_results, glob_size_arr, loc_size_arr, offset, len);
+        }
+        break;
+    default:
+        return 0;
+        break;
+    }
+
+    for (cl_int i = 0; i < len; ++i)
+    {
+        if (results[i] != reference_results[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d\n", results[i], reference_results[i]);
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    MTdata d;
+    cl_uint i;
+    cl_int err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    cl_int k, kernel_results[MAX_GWS] = { 0 };
+
+    size_t ret_len;
+    cl_uint max_queues = 1;
+    cl_uint maxQueueSize = 0;
+
+    d = init_genrand(gRandomSeed);
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    size_t max_local_size = 1;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    max_local_size = (max_local_size > MAX_GWS)? MAX_GWS: max_local_size;
+    if(gWimpyMode)
+    {
+        max_local_size = MIN(8, max_local_size);
+    }
+
+    cl_uint num = 10;
+    cl_uint global_work_size = max_local_size * 2;
+    std::vector<cl_uint> glob_size_arr(num);
+    std::vector<cl_uint> loc_size_arr(num);
+    std::vector<cl_uint> ofs_arr(num);
+    std::vector<cl_int> glob_results(global_work_size, 0);
+
+    glob_size_arr[0] = 1;
+    glob_size_arr[1] = global_work_size;
+    loc_size_arr[0] = 1;
+    loc_size_arr[1] = max_local_size;
+    ofs_arr[0] = 0;
+    ofs_arr[1] = 1;
+
+    for(i = 2; i < num; ++i)
+    {
+        glob_size_arr[i] = genrand_int32(d) % global_work_size;
+        glob_size_arr[i] = glob_size_arr[i] ? glob_size_arr[i]: 1;
+        loc_size_arr[i] = genrand_int32(d) % max_local_size;
+        loc_size_arr[i] = loc_size_arr[i] ? loc_size_arr[i]: 1;
+        ofs_arr[i] = genrand_int32(d) % global_work_size;
+    }
+
+    // check ndrange_dX functions
+    size_t failCnt = 0;
+    for(i = 0; i < num_kernels_ndrange_Xd; ++i)
+    {
+        if (!gKernelName.empty() && gKernelName != sources_ndrange_Xd[i].src.kernel_name)
+            continue;
+
+        clMemWrapper mem1 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, glob_size_arr.size() * sizeof(cl_uint), &glob_size_arr[0], &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+        clMemWrapper mem2 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, loc_size_arr.size() * sizeof(cl_uint), &loc_size_arr[0], &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+        clMemWrapper mem3 = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, glob_results.size() * sizeof(cl_int), &glob_results[0], &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+        clMemWrapper mem4 = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, ofs_arr.size() * sizeof(cl_uint), &ofs_arr[0], &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+
+        kernel_arg args[] =
+        {
+            { sizeof(cl_uint), &num },
+            { sizeof(cl_uint), &global_work_size },
+            { sizeof(cl_mem), &mem1 },
+            { sizeof(cl_mem), &mem2 },
+            { sizeof(cl_mem), &mem3 },
+            { sizeof(cl_mem), &mem4 },
+        };
+
+        log_info("Running '%s' kernel (%d of %d) ...\n",  sources_ndrange_Xd[i].src.kernel_name, i + 1, num_kernels_ndrange_Xd);
+        err_ret = run_single_kernel_args(context, queue, sources_ndrange_Xd[i].src.lines, sources_ndrange_Xd[i].src.num_lines, sources_ndrange_Xd[i].src.kernel_name, kernel_results, sizeof(kernel_results), arr_size(args), args);
+
+        cl_int *ptr = (cl_int *)clEnqueueMapBuffer(queue, mem3, CL_TRUE, CL_MAP_READ, 0, glob_results.size() * sizeof(cl_int), 0, 0, 0, &err_ret);
+        test_error(err_ret, "clEnqueueMapBuffer() failed");
+
+        if(check_error(err_ret, "'%s' kernel execution failed", sources_ndrange_Xd[i].src.kernel_name)) { ++failCnt; res = -1; }
+        else if((k = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_ndrange_Xd[i].src.kernel_name, k, kernel_results[k])) res = -1;
+        else if((k = check_kernel_results(ptr, global_work_size, glob_size_arr, loc_size_arr, ofs_arr, sources_ndrange_Xd[i].dim, sources_ndrange_Xd[i].localSize, sources_ndrange_Xd[i].offset)) >= 0 && check_error(-1, "'%s' global kernel results validation failed: [%d] returned %d expected 0", sources_ndrange_Xd[i].src.kernel_name, k, glob_results[k])) res = -1;
+        else log_info("'%s' kernel is OK.\n", sources_ndrange_Xd[i].src.kernel_name);
+
+        err_ret = clEnqueueUnmapMemObject(queue, mem3, ptr, 0, 0, 0);
+        test_error(err_ret, "clEnqueueUnmapMemObject() failed");
+
+    }
+
+    if (failCnt > 0)
+    {
+        log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_ndrange_Xd);
+    }
+
+    return res;
+}
+
+
+#endif
+
diff --git a/test_conformance/device_execution/enqueue_wg_size.cpp b/test_conformance/device_execution/enqueue_wg_size.cpp
new file mode 100644
index 00000000..e1da04ce
--- /dev/null
+++ b/test_conformance/device_execution/enqueue_wg_size.cpp
@@ -0,0 +1,1724 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+#ifdef CL_VERSION_2_0
+extern int gWimpyMode;
+static int nestingLevel = 3;
+
+static const char* enqueue_1D_wg_size_single[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs = 64 * 64 * 64;"
+    NL, "  size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "  ls = ls? ls: 1;"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(gs, ls);"
+    NL, ""
+    NL, "  // Only 1 work-item enqueues block"
+    NL, "  if(tidX == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[tidX % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_1D_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+static int check_single(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    for(size_t i = 0; i < len; ++i)
+    {
+        if(i == 0 && results[i] != nestingLevel)
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], nestingLevel, i);
+            return (int)i;
+        }
+
+        if(i > 0 && results[i] != 0)
+        {
+            log_error("ERROR: Kernel returned %d vs. expected 0, index: %d\n", results[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_1D_wg_size_some_eq[] =
+{
+    NL, "void block_fn(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(res, level, maxGlobalWorkSize, rnd); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs = 8 * 8 * 2;"
+    NL, "  size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "  ls = ls? ls: 1;"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(gs, ls);"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with the same level"
+    NL, "  if((tidX % (maxGlobalWorkSize / 8)) == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[tidX % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_1D_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(res, level, maxGlobalWorkSize, rnd);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_eq_1D(std::vector<cl_int> &referenceResults, cl_int maxGlobalWorkSize, cl_int level)
+{
+    size_t globalSize = (level == nestingLevel) ? maxGlobalWorkSize: (8 * 8 * 2);
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidX = 0; tidX < globalSize; ++tidX)
+    {
+        if ((tidX % (maxGlobalWorkSize / 8)) == 0)
+        {
+            ++referenceResults[tidX % maxGlobalWorkSize];
+            generate_reference_results_some_eq_1D(referenceResults, maxGlobalWorkSize, level);
+        }
+    }
+}
+
+static int check_some_eq_1D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_some_eq_1D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_1D_wg_size_some_diff[] =
+{
+    NL, "void block_fn(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(res, level, maxGlobalWorkSize, rnd); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs = 8 * 8 * 8;"
+    NL, "  size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "  ls = ls? ls: 1;"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(gs, ls);"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with different levels"
+    NL, "  if((tidX % 2) == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[tidX % maxGlobalWorkSize]);"
+    NL, "    if(level >= tidX)"
+    NL, "    {"
+    NL, "      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_1D_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(res, level, maxGlobalWorkSize, rnd);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_diff_1D(std::vector<cl_int> &referenceResults, cl_int maxGlobalWorkSize, cl_int level)
+{
+    size_t globalSize = (level == nestingLevel) ? maxGlobalWorkSize: (8 * 8 * 8);
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidX = 0; tidX < globalSize; ++tidX)
+    {
+        if ((tidX % 2) == 0)
+        {
+            ++referenceResults[tidX % maxGlobalWorkSize];
+            if (level >= tidX)
+            {
+                generate_reference_results_some_diff_1D(referenceResults, maxGlobalWorkSize, level);
+            }
+        }
+    }
+}
+
+static int check_some_diff_1D(cl_int* results, cl_int maxGlobalWorkSize, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(maxGlobalWorkSize, 0);
+    generate_reference_results_some_diff_1D(referenceResults, maxGlobalWorkSize, nesting_level);
+
+    for(size_t i = 0; i < maxGlobalWorkSize; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_1D_wg_size_all_eq[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs = 8;"
+    NL, "  size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "  ls = ls? ls: 1;"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(gs, ls);"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with the same level"
+    NL, "  atomic_inc(&res[tidX % maxGlobalWorkSize]);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_1D_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_eq_1D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSize = (level == nestingLevel) ? len: 8;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidX = 0; tidX < globalSize; ++tidX)
+    {
+        ++referenceResults[tidX % len];
+        generate_reference_results_all_eq_1D(referenceResults, len, level);
+    }
+}
+
+static int check_all_eq_1D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_eq_1D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_1D_wg_size_all_diff[] =
+{
+    NL, "void block_fn(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if((--level) < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(res, level, maxGlobalWorkSize, rnd); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs = 8 * 8 * 8;"
+    NL, "  size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "  ls = ls? ls: 1;"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(gs, ls);"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with different levels"
+    NL, "  atomic_inc(&res[tidX % maxGlobalWorkSize]);"
+    NL, "  if(level >= tidX)"
+    NL, "  {"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_1D_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(res, level, maxGlobalWorkSize, rnd);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_diff_1D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSize = (level == nestingLevel) ? len: (8 * 8 * 8);
+    if((--level) < 0)
+    {
+        return;
+    }
+
+    for (size_t threadIdx = 0; threadIdx < globalSize; ++threadIdx)
+    {
+        ++referenceResults[threadIdx % len];
+        if (level >= threadIdx)
+        {
+            generate_reference_results_all_diff_1D(referenceResults, len, level);
+        }
+    }
+}
+
+static int check_all_diff_1D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_diff_1D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_2D_wg_size_single[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 64, 64 * 64 };"
+    NL, "  size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "  ls[1] = ls[1]? ls[1]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_2D(gs, ls);"
+    NL, ""
+    NL, "  // Only 1 work-item enqueues block"
+    NL, "  if(tidX == 0 && tidY == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_2D_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_2D_wg_size_some_eq[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 4, 4 };"
+    NL, "  size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "  ls[1] = ls[1]? ls[1]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_2D(gs, ls);"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with the same level"
+    NL, "  if((tidX < (get_global_size(0) >> 1)) && ((tidY < (get_global_size(1) >> 1)) || get_global_size(1) == 1))"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_2D_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_eq_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 4;
+    size_t globalSizeY = (level == nestingLevel) ? 1: 4;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+    {
+        for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+        {
+            if ((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1))
+            {
+                ++referenceResults[(globalSizeX * tidY + tidX) % len];
+                generate_reference_results_some_eq_2D(referenceResults, len, level);
+            }
+        }
+    }
+}
+
+static int check_some_eq_2D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_some_eq_2D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_2D_wg_size_some_diff[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 8, 8 };"
+    NL, "  size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "  ls[1] = ls[1]? ls[1]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_2D(gs, ls);"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with different levels"
+    NL, "  if((tidX % 2) == 0 && (tidY % 2) == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    if(level >= tidX && level >= tidY)"
+    NL, "    {"
+    NL, "      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_2D_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_diff_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 8;
+    size_t globalSizeY = (level == nestingLevel) ? 1: 8;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+    {
+        for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+        {
+            if ((tidX % 2) == 0 && (tidY % 2) == 0)
+            {
+                ++referenceResults[(globalSizeX * tidY + tidX) % len];
+                if (level >= tidX && level >= tidY)
+                {
+                    generate_reference_results_some_diff_2D(referenceResults, len, level);
+                }
+            }
+        }
+    }
+}
+
+static int check_some_diff_2D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_some_diff_2D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_2D_wg_size_all_eq[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 2, 2 };"
+    NL, "  size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "  ls[1] = ls[1]? ls[1]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_2D(gs, ls);"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with the same level"
+    NL, "  atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_2D_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_eq_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 2;
+    size_t globalSizeY = (level == nestingLevel) ? 1: 2;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+    {
+        for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+        {
+            ++referenceResults[(globalSizeX * tidY + tidX) % len];
+            generate_reference_results_all_eq_2D(referenceResults, len, level);
+        }
+    }
+}
+
+static int check_all_eq_2D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_eq_2D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_2D_wg_size_all_diff[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  size_t gs[] = { 8, 8 * 8 };"
+    NL, "  size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "  ls[1] = ls[1]? ls[1]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_2D(gs, ls);"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with different levels"
+    NL, "  atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "  if(level >= tidX && level >= tidY)"
+    NL, "  {"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_2D_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_diff_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 8;
+    size_t globalSizeY = (level == nestingLevel) ? 1: (8 * 8);
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+    {
+        for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+        {
+            ++referenceResults[(globalSizeX * tidY + tidX) % len];
+            if (level >= tidX && level >= tidY)
+            {
+                generate_reference_results_all_diff_2D(referenceResults, len, level);
+            }
+        }
+    }
+}
+
+static int check_all_diff_2D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_diff_2D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_3D_wg_size_single[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 64, 64, 64 };"
+    NL, "  size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "  ls[2] = ls[2]? ls[2]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_3D(gs, ls);"
+    NL, ""
+    NL, "  // Only 1 work-item enqueues block"
+    NL, "  if(tidX == 0 && tidY == 0 && tidZ == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_3D_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_3D_wg_size_some_eq[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 4, 4, 4 };"
+    NL, "  size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "  ls[2] = ls[2]? ls[2]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_3D(gs, ls);"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with the same level"
+    NL, "  if((tidX < (get_global_size(0) >> 1)) && "
+    NL, "    ((tidY < (get_global_size(1) >> 1)) || get_global_size(1) == 1) &&"
+    NL, "    ((tidZ < (get_global_size(2) >> 1)) || get_global_size(2) == 1))"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_3D_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_eq_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 4;
+    size_t globalSizeY = (level == nestingLevel) ? 1: 4;
+    size_t globalSizeZ = (level == nestingLevel) ? 1: 4;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                if ((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1) && ((tidZ < (globalSizeZ >> 1)) || globalSizeZ == 1))
+                {
+                    ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
+                    generate_reference_results_some_eq_3D(referenceResults, len, level);
+                }
+            }
+        }
+    }
+}
+
+static int check_some_eq_3D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_some_eq_3D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_3D_wg_size_some_diff[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 8, 8, 8 };"
+    NL, "  size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "  ls[2] = ls[2]? ls[2]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_3D(gs, ls);"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with different levels"
+    NL, "  if((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    if(level >= tidX && level >= tidY && level >= tidZ)"
+    NL, "    {"
+    NL, "      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_3D_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_diff_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 8;
+    size_t globalSizeY = (level == nestingLevel) ? 1: 8;
+    size_t globalSizeZ = (level == nestingLevel) ? 1: 8;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                if ((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)
+                {
+                    ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
+                    if (level >= tidX && level >= tidY && level >= tidZ)
+                    {
+                        generate_reference_results_some_diff_3D(referenceResults, len, level);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int check_some_diff_3D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_some_diff_3D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_3D_wg_size_all_eq[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 2, 2, 2 };"
+    NL, "  size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "  ls[2] = ls[2]? ls[2]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_3D(gs, ls);"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with the same level"
+    NL, "  atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_3D_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_eq_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 2;
+    size_t globalSizeY = (level == nestingLevel) ? 1: 2;
+    size_t globalSizeZ = (level == nestingLevel) ? 1: 2;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
+                generate_reference_results_all_eq_3D(referenceResults, len, level);
+            }
+        }
+    }
+}
+
+static int check_all_eq_3D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_eq_3D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_3D_wg_size_all_diff[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  const size_t gs[] = { 8, 8, 8 };"
+    NL, "  size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "  ls[2] = ls[2]? ls[2]: 1;"
+    NL, "  "
+    NL, "  ndrange_t ndrange = ndrange_3D(gs, ls);"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with different levels"
+    NL, "  atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "  if(level >= tidX && level >= tidY && level >= tidZ)"
+    NL, "  {"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_3D_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_diff_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
+{
+    size_t globalSizeX = (level == nestingLevel) ? len: 8;
+    size_t globalSizeY = (level == nestingLevel) ? 1: 8;
+    size_t globalSizeZ = (level == nestingLevel) ? 1: 8;
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
+                if (level >= tidX && level >= tidY && level >= tidZ)
+                {
+                    generate_reference_results_all_diff_3D(referenceResults, len, level);
+                }
+            }
+        }
+    }
+}
+
+static int check_all_diff_3D(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_diff_3D(referenceResults, len, nesting_level);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_mix_wg_size_single[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  ndrange_t ndrange;"
+    NL, "  switch((linearId + level) % 3)"
+    NL, "  {"
+    NL, "    case 0:"
+    NL, "      {"
+    NL, "        const size_t gs = 64 * 64 * 64;"
+    NL, "        size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "        ls = ls? ls: 1;"
+    NL, "        ndrange = ndrange_1D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 1:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 64, 64 * 64 };"
+    NL, "        size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "        ls[1] = ls[1]? ls[1]: 1;"
+    NL, "        ndrange = ndrange_2D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 2:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 64, 64, 64 };"
+    NL, "        size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "        ls[2] = ls[2]? ls[2]: 1;"
+    NL, "        ndrange = ndrange_3D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    default:"
+    NL, "      break;"
+    NL, "  }"
+    NL, ""
+    NL, "  // Only 1 work-item enqueues block"
+    NL, "  if(tidX == 0 && (tidY == 0 || get_global_size(1) == 1) && (tidZ == 0 || get_global_size(2) == 1))"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_mix_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_mix_wg_size_some_eq[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  ndrange_t ndrange;"
+    NL, "  switch((linearId + level) % 3)"
+    NL, "  {"
+    NL, "    case 0:"
+    NL, "      {"
+    NL, "        const size_t gs = 2 * 4 * 4;"
+    NL, "        size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "        ls = ls? ls: 1;"
+    NL, "        ndrange = ndrange_1D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 1:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 2, 4 * 4 };"
+    NL, "        size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "        ls[1] = ls[1]? ls[1]: 1;"
+    NL, "        ndrange = ndrange_2D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 2:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 2, 4, 4 };"
+    NL, "        size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "        ls[2] = ls[2]? ls[2]: 1;"
+    NL, "        ndrange = ndrange_3D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    default:"
+    NL, "      break;"
+    NL, "  }"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with the same level"
+    NL, "  size_t globalSizeX = get_global_size(0);"
+    NL, "  size_t globalSizeY = get_global_size(1);"
+    NL, "  size_t globalSizeZ = get_global_size(2);"
+    NL, "  if((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1) && ((tidZ < (globalSizeZ >> 1)) || globalSizeZ == 1))"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_mix_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_eq_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
+{
+    size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
+    switch (dim)
+    {
+    case 0:
+      globalSizeX = (level == nestingLevel) ? len: (2 * 4 * 4);
+      break;
+    case 1:
+      globalSizeX = 2;
+      globalSizeY = 4 * 4;
+      break;
+    case 2:
+      globalSizeX = 2;
+      globalSizeY = 4;
+      globalSizeZ = 4;
+      break;
+    default:
+      break;
+    }
+
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
+                cl_int nextDim = (linearID + level) % 3;
+                if ((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1) && ((tidZ < (globalSizeZ >> 1)) || globalSizeZ == 1))
+                {
+                    ++referenceResults[linearID % len];
+                    generate_reference_results_some_eq_mix(referenceResults, len, level, nextDim);
+                }
+            }
+        }
+    }
+}
+
+static int check_some_eq_mix(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_some_eq_mix(referenceResults, len, nesting_level, 0);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_mix_wg_size_some_diff[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  ndrange_t ndrange;"
+    NL, "  switch((linearId + level) % 3)"
+    NL, "  {"
+    NL, "    case 0:"
+    NL, "      {"
+    NL, "        const size_t gs = 8 * 8 * 8;"
+    NL, "        size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "        ls = ls? ls: 1;"
+    NL, "        ndrange = ndrange_1D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 1:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 8, 8 * 8 };"
+    NL, "        size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "        ls[1] = ls[1]? ls[1]: 1;"
+    NL, "        ndrange = ndrange_2D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 2:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 8, 8, 8 };"
+    NL, "        size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "        ls[2] = ls[2]? ls[2]: 1;"
+    NL, "        ndrange = ndrange_3D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    default:"
+    NL, "      break;"
+    NL, "  }"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with different levels"
+    NL, "  if((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "    if(level >= tidX && level >= tidY && level >= tidZ)"
+    NL, "    {"
+    NL, "      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_mix_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_some_diff_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
+{
+    size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
+    switch (dim)
+    {
+    case 0:
+      globalSizeX = (level == nestingLevel) ? len: (8 * 8 * 8);
+      break;
+    case 1:
+      globalSizeX = 8;
+      globalSizeY = 8 * 8;
+      break;
+    case 2:
+      globalSizeX = 8;
+      globalSizeY = 8;
+      globalSizeZ = 8;
+      break;
+    default:
+      return;
+      break;
+    }
+
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
+                cl_int nextDim = (linearID + level) % 3;
+                if ((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)
+                {
+                    ++referenceResults[linearID % len];
+                    if (level >= tidX && level >= tidY && level >= tidZ)
+                    {
+                        generate_reference_results_some_diff_mix(referenceResults, len, level, nextDim);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static int check_some_diff_mix(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_some_diff_mix(referenceResults, len, nesting_level, 0);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_mix_wg_size_all_eq[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  ndrange_t ndrange;"
+    NL, "  switch((linearId + level) % 3)"
+    NL, "  {"
+    NL, "    case 0:"
+    NL, "      {"
+    NL, "        const size_t gs = 2 * 2 * 2;"
+    NL, "        size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "        ls = ls? ls: 1;"
+    NL, "        ndrange = ndrange_1D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 1:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 2, 2 * 2 };"
+    NL, "        size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "        ls[1] = ls[1]? ls[1]: 1;"
+    NL, "        ndrange = ndrange_2D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 2:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 2, 2, 2 };"
+    NL, "        size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "        ls[2] = ls[2]? ls[2]: 1;"
+    NL, "        ndrange = ndrange_3D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    default:"
+    NL, "      break;"
+    NL, "  }"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with the same level"
+    NL, "  atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_mix_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_eq_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
+{
+    size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
+    switch (dim)
+    {
+    case 0:
+      globalSizeX = (level == nestingLevel) ? len: (2 * 2 * 2);
+      break;
+    case 1:
+      globalSizeX = 2;
+      globalSizeY = 2 * 2;
+      break;
+    case 2:
+      globalSizeX = 2;
+      globalSizeY = 2;
+      globalSizeZ = 2;
+      break;
+    default:
+      break;
+    }
+
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
+                cl_int nextDim = (linearID + level) % 3;
+                ++referenceResults[linearID % len];
+                generate_reference_results_all_eq_mix(referenceResults, len, level, nextDim);
+            }
+        }
+    }
+}
+
+static int check_all_eq_mix(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_eq_mix(referenceResults, len, nesting_level, 0);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const char* enqueue_mix_wg_size_all_diff[] =
+{
+    NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
+    NL, "{"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  size_t tidX = get_global_id(0);"
+    NL, "  size_t tidY = get_global_id(1);"
+    NL, "  size_t tidZ = get_global_id(2);"
+    NL, "  size_t linearId = get_global_linear_id();"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
+    NL, "  uint wg = get_kernel_work_group_size(kernelBlock);"
+    NL, ""
+    NL, "  ndrange_t ndrange;"
+    NL, "  switch((linearId + level) % 3)"
+    NL, "  {"
+    NL, "    case 0:"
+    NL, "      {"
+    NL, "        const size_t gs = 8 * 8 * 8;"
+    NL, "        size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
+    NL, "        ls = ls? ls: 1;"
+    NL, "        ndrange = ndrange_1D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 1:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 8, 8 * 8 };"
+    NL, "        size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
+    NL, "        ls[1] = ls[1]? ls[1]: 1;"
+    NL, "        ndrange = ndrange_2D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    case 2:"
+    NL, "      {"
+    NL, "        const size_t gs[] = { 8, 8, 8 };"
+    NL, "        size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
+    NL, "        ls[2] = ls[2]? ls[2]: 1;"
+    NL, "        ndrange = ndrange_3D(gs, ls);"
+    NL, "      }"
+    NL, "      break;"
+    NL, "    default:"
+    NL, "      break;"
+    NL, "  }"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with different levels"
+    NL, "  atomic_inc(&res[linearId % maxGlobalWorkSize]);"
+    NL, "  if(level >= tidX && level >= tidY && level >= tidZ)"
+    NL, "  {"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_mix_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
+    NL, "{"
+    NL, "  block_fn(level, maxGlobalWorkSize, rnd, res);"
+    NL, "}"
+    NL
+};
+
+void generate_reference_results_all_diff_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
+{
+    size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
+    switch (dim)
+    {
+    case 0:
+      globalSizeX = (level == nestingLevel) ? len: (8 * 8 * 8);
+      break;
+    case 1:
+      globalSizeX = 8;
+      globalSizeY = 8 * 8;
+      break;
+    case 2:
+      globalSizeX = 8;
+      globalSizeY = 8;
+      globalSizeZ = 8;
+      break;
+    default:
+      break;
+    }
+
+    if(--level < 0)
+    {
+        return;
+    }
+
+    for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
+    {
+        for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
+        {
+            for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
+            {
+                size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
+                cl_int nextDim = (linearID + level) % 3;
+                ++referenceResults[linearID % len];
+                if (level >= tidX && level >= tidY && level >= tidZ)
+                {
+                    generate_reference_results_all_diff_mix(referenceResults, len, level, nextDim);
+                }
+            }
+        }
+    }
+}
+
+static int check_all_diff_mix(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_results_all_diff_mix(referenceResults, len, nesting_level, 0);
+
+    for(size_t i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i])
+        {
+            log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
+            return (int)i;
+        }
+    }
+
+    return -1;
+}
+
+static const kernel_src_check sources_enqueue_wg_size[] =
+{
+    { KERNEL(enqueue_1D_wg_size_single), check_single },
+    { KERNEL(enqueue_1D_wg_size_some_eq), check_some_eq_1D },
+    { KERNEL(enqueue_1D_wg_size_some_diff), check_some_diff_1D },
+    { KERNEL(enqueue_1D_wg_size_all_eq), check_all_eq_1D },
+    { KERNEL(enqueue_1D_wg_size_all_diff), check_all_diff_1D },
+
+    { KERNEL(enqueue_2D_wg_size_single), check_single },
+    { KERNEL(enqueue_2D_wg_size_some_eq), check_some_eq_2D },
+    { KERNEL(enqueue_2D_wg_size_some_diff), check_some_diff_2D },
+    { KERNEL(enqueue_2D_wg_size_all_eq), check_all_eq_2D },
+    { KERNEL(enqueue_2D_wg_size_all_diff), check_all_diff_2D },
+
+    { KERNEL(enqueue_3D_wg_size_single), check_single },
+    { KERNEL(enqueue_3D_wg_size_some_eq), check_some_eq_3D },
+    { KERNEL(enqueue_3D_wg_size_some_diff), check_some_diff_3D },
+    { KERNEL(enqueue_3D_wg_size_all_eq), check_all_eq_3D },
+    { KERNEL(enqueue_3D_wg_size_all_diff), check_all_diff_3D },
+
+    { KERNEL(enqueue_mix_wg_size_single), check_single },
+    { KERNEL(enqueue_mix_wg_size_some_eq), check_some_eq_mix },
+    { KERNEL(enqueue_mix_wg_size_some_diff), check_some_diff_mix },
+    { KERNEL(enqueue_mix_wg_size_all_eq), check_all_eq_mix },
+    { KERNEL(enqueue_mix_wg_size_all_diff), check_all_diff_mix }
+};
+
+int test_enqueue_wg_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    MTdata d;
+    cl_uint i, k;
+    cl_int err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    const cl_int MAX_GLOBAL_WORK_SIZE = MAX_GWS / 4;
+    cl_int kernel_results[MAX_GLOBAL_WORK_SIZE] = { 0 };
+    cl_uint vrnd[MAX_GLOBAL_WORK_SIZE] = { 0 };
+
+    size_t ret_len;
+    cl_uint max_queues = 1;
+    cl_uint maxQueueSize = 0;
+    d = init_genrand(gRandomSeed);
+
+    if(gWimpyMode)
+    {
+        nestingLevel = 2;
+        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
+        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
+    }
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    size_t max_local_size = 1;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+
+    size_t failCnt = 0;
+    for(k = 0; k < arr_size(sources_enqueue_wg_size); ++k)
+    {
+        if (!gKernelName.empty() && gKernelName != sources_enqueue_wg_size[k].src.kernel_name)
+            continue;
+
+        log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_wg_size[k].src.kernel_name, k + 1, arr_size(sources_enqueue_wg_size));
+        for(i = 0; i < MAX_GLOBAL_WORK_SIZE; ++i)
+        {
+            kernel_results[i] = 0;
+            vrnd[i] = genrand_int32(d);
+        }
+
+        // Fill some elements with prime numbers
+        cl_uint prime[] = { 3,   5,   7,  11,  13,  17,  19,  23,
+            29,  31,  37,  41,  43,  47,  53,  59,
+            61,  67,  71,  73,  79,  83,  89,  97,
+            101, 103, 107, 109, 113, 127 };
+
+        for(i = 0; i < arr_size(prime); ++i)
+        {
+            vrnd[genrand_int32(d) % MAX_GLOBAL_WORK_SIZE] = prime[i];
+        }
+
+        clMemWrapper mem;
+        mem = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(vrnd), vrnd, &err_ret);
+        test_error(err_ret, "clCreateBuffer() failed");
+
+        kernel_arg args[] =
+        {
+            { sizeof(cl_uint), &nestingLevel },
+            { sizeof(cl_uint), &MAX_GLOBAL_WORK_SIZE },
+            { sizeof(cl_mem),  &mem }
+        };
+
+        size_t global_size = MAX_GLOBAL_WORK_SIZE;
+        size_t local_size = (max_local_size > global_size) ? global_size : max_local_size;
+
+        err_ret = run_n_kernel_args(context, queue, sources_enqueue_wg_size[k].src.lines, sources_enqueue_wg_size[k].src.num_lines, sources_enqueue_wg_size[k].src.kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), arr_size(args), args);
+
+        //check results
+        int fail = sources_enqueue_wg_size[k].check(kernel_results, global_size, nestingLevel);
+
+        if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_wg_size[k].src.kernel_name)) { ++failCnt; res = -1; continue; }
+        else if(fail >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d]", sources_enqueue_wg_size[k].src.kernel_name, fail)) { ++failCnt; res = -1; continue; }
+        else log_info("'%s' kernel is OK.\n", sources_enqueue_wg_size[k].src.kernel_name);
+    }
+
+    if (failCnt > 0)
+    {
+        log_error("ERROR: %d of %d kernels failed.\n", failCnt, arr_size(sources_enqueue_wg_size));
+    }
+
+    free_mtdata(d);
+
+    return res;
+}
+
+#endif
+
diff --git a/test_conformance/device_execution/execute_block.cpp b/test_conformance/device_execution/execute_block.cpp
new file mode 100644
index 00000000..e10b7c6e
--- /dev/null
+++ b/test_conformance/device_execution/execute_block.cpp
@@ -0,0 +1,1050 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+#ifdef CL_VERSION_2_0
+
+static const char* block_global_scope[] =
+{
+    NL, "int __constant globalVar = 7;"
+    NL, "int (^__constant globalBlock)(int) = ^int(int num)"
+    NL, "{"
+    NL, "   return globalVar * num * (1+ get_global_id(0));"
+    NL, "};"
+    NL, "kernel void block_global_scope(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  res[tid] = globalBlock(3) - 21*(tid + 1);"
+    NL, "}"
+    NL
+};
+
+static const char* block_kernel_scope[] =
+{
+    NL, "kernel void block_kernel_scope(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  multiplier = 8;"
+    NL, "  res[tid] = kernelBlock(7) - 21;"
+    NL, "}"
+    NL
+};
+
+static const char* block_statement_scope[] =
+{
+    NL, "kernel void block_statement_scope(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 0;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  multiplier = 9;"
+    NL, "  res[tid] = ^int(int num) { return multiplier * num; } (11) - 99;"
+    NL, "}"
+    NL
+};
+
+static const char* block_function_scope[] =
+{
+    NL, "int fnTest(int a)"
+    NL, "{"
+    NL, "  int localVar = 17;"
+    NL, "  int (^functionBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return localVar * num;"
+    NL, "  };"
+    NL, "  return 111 - functionBlock(a+1);"
+    NL, "}"
+    NL, "kernel void block_function_scope(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  res[tid] = fnTest(5) - 9;"
+    NL, "}"
+    NL
+};
+
+static const char* block_nested_scope[] =
+{
+    NL, "kernel void block_nested_scope(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    int (^innerBlock)(int) = ^(int n)"
+    NL, "    {"
+    NL, "      return multiplier * n;"
+    NL, "    };"
+    NL, "    return num * innerBlock(23);"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  multiplier = 8;"
+    NL, "  res[tid] = kernelBlock(13) - 897;"
+    NL, "}"
+    NL
+};
+
+static const char* block_arg_struct[] =
+{
+    NL, "struct two_ints {"
+    NL, "    short x;"
+    NL, "    long y;"
+    NL, "};"
+    NL, "struct two_structs {"
+    NL, "    struct two_ints a;"
+    NL, "    struct two_ints b;"
+    NL, "};"
+    NL, "kernel void block_arg_struct(__global int* res)"
+    NL, "{"
+    NL, "  int (^kernelBlock)(struct two_ints, struct two_structs) = ^int(struct two_ints ti, struct two_structs ts)"
+    NL, "  {"
+    NL, "    return ti.x * ti.y * ts.a.x * ts.a.y * ts.b.x * ts.b.y;"
+    NL, "  };"
+    NL, "  struct two_ints i;"
+    NL, "  i.x = 2;"
+    NL, "  i.y = 3;"
+    NL, "  struct two_structs s;"
+    NL, "  s.a.x = 4;"
+    NL, "  s.a.y = 5;"
+    NL, "  s.b.x = 6;"
+    NL, "  s.b.y = 7;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  res[tid] = kernelBlock(i,s) - 5040;"
+    NL, "}"
+    NL
+};
+
+static const char* block_arg_types_mix[] =
+{
+    NL, "union number {"
+    NL, "    long l;"
+    NL, "    float f;"
+    NL, "};"
+    NL, "enum color {"
+    NL, "    RED = 0,"
+    NL, "    GREEN,"
+    NL, "    BLUE" // Using this value - it is actualy "2"
+    NL, "};"
+    NL, "typedef int _INT ;"
+    NL, "typedef char _ACHAR[3] ;"
+    NL, "kernel void block_arg_types_mix(__global int* res)"
+    NL, "{"
+    NL, "  int (^kernelBlock)(_INT, _ACHAR, union number, enum color, int, int, int, int, int, int, int, int, int, int, int, int, int) ="
+    NL, "    ^int(_INT bi, _ACHAR bch, union number bn, enum color bc, int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8,"
+    NL, "      int i9, int i10, int i11, int i12, int i13)"
+    NL, "  {"
+    NL, "    return bi * bch[0] * bch[1] * bch[2] * bn.l * bc - i1 - i2 - i3 - i4 - i5 - i6 - i7 - i8 - i9 - i10 - i11 - i12 - i13;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  _INT x = -5;"
+    NL, "  _ACHAR char_arr = { 1, 2, 3 };"
+    NL, "  union number n;"
+    NL, "  n.l = 4;"
+    NL, "  enum color c = BLUE;"
+    NL, "  res[tid] = kernelBlock(x,char_arr,n,c,1,2,3,4,5,6,7,8,9,10,11,12,13) + 331;"
+    NL, "}"
+    NL
+};
+
+static const char* block_arg_pointer[] =
+{
+    NL, "struct two_ints {"
+    NL, "    short x;"
+    NL, "    long y;"
+    NL, "};"
+    NL, "kernel void block_arg_pointer(__global int* res)"
+    NL, "{"
+    NL, "  int (^kernelBlock)(struct two_ints*, struct two_ints*, int*, int*) = "
+    NL, "    ^int(struct two_ints* bs1, struct two_ints* bs2, int* bi1, int* bi2)"
+    NL, "  {"
+    NL, "    return (*bs1).x * (*bs1).y * (*bs2).x * (*bs2).y * (*bi1) * (*bi2);"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  struct two_ints s[2];"
+    NL, "  s[0].x = 4;"
+    NL, "  s[0].y = 5;"
+    NL, "  struct two_ints* ps = s + 1;"
+    NL, "  (*ps).x = 6;"
+    NL, "  (*ps).y = 7;"
+    NL, "  int i = 2;"
+    NL, "  int * pi = &i;"
+    NL, "  res[tid] = kernelBlock(s,ps,&i,pi) - 3360;"
+    NL, "}"
+    NL
+};
+
+static const char* block_arg_global_p[] =
+{
+    NL, "kernel void block_arg_global_p(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  typedef __global int* int_ptr_to_global_t;"
+    NL, "  int_ptr_to_global_t (^kernelBlock)(__global int*, int) =^ int_ptr_to_global_t (__global int* bres, int btid)"
+    NL, "  {"
+    NL, "    bres[tid] = 5;"
+    NL, "    return bres;"
+    NL, "  };"
+    NL, "  res = kernelBlock(res, tid);"
+    NL, "  res[tid] -= 5;"
+    NL, "}"
+    NL
+};
+
+static const char* block_arg_const_p[] =
+{
+    NL, "constant int ci = 8;"
+    NL, "kernel void block_arg_const_p(__global int* res)"
+    NL, "{"
+    NL, "  __constant int* (^kernelBlock)(__constant int*) = ^(__constant int* bpci)"
+    NL, "  {"
+    NL, "    return bpci;"
+    NL, "  };"
+    NL, "  constant int* pci = &ci;"
+    NL, "  constant int* pci_check;"
+    NL, "  pci_check = kernelBlock(pci);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = pci == pci_check ? 0 : -1;"
+    NL, "}"
+    NL
+};
+
+static const char* block_ret_struct[] =
+{
+    NL, "kernel void block_ret_struct(__global int* res)"
+    NL, "{"
+    NL, "  struct A {"
+    NL, "      int a;"
+    NL, "  };      "
+    NL, "  struct A (^kernelBlock)(struct A) = ^struct A(struct A a)"
+    NL, "  {        "
+    NL, "    a.a = 6;"
+    NL, "    return a;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  struct A aa;"
+    NL, "  aa.a = 5;"
+    NL, "  res[tid] = kernelBlock(aa).a - 6;"
+    NL, "}"
+    NL
+};
+
+static const char* block_arg_global_var[] =
+{
+    NL, "constant int gi = 8;"
+    NL, "kernel void block_arg_global_var(__global int* res)"
+    NL, "{"
+    NL, "  int (^kernelBlock)(int) = ^(int bgi)"
+    NL, "  {"
+    NL, "    return bgi - 8;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = kernelBlock(gi);"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_for_init[] =
+{
+    NL, "kernel void block_in_for_init(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 27;"
+    NL, "  for(int i=kernelBlock(9); i>0; i--)"
+    NL, "  {"
+    NL, "       res[tid]--;"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_for_cond[] =
+{
+    NL, "kernel void block_in_for_cond(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 39;"
+    NL, "  for(int i=0; i<kernelBlock(13); i++)"
+    NL, "  {"
+    NL, "       res[tid]--;"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_for_iter[] =
+{
+    NL, "kernel void block_in_for_iter(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 2;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 4;"
+    NL, "  for(int i=2; i<17; i=kernelBlock(i))"
+    NL, "  {"
+    NL, "       res[tid]--;"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_while_cond[] =
+{
+    NL, "kernel void block_in_while_cond(__global int* res)"
+    NL, "{"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return res[num];"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 27*(tid+1);"
+    NL, "  while(kernelBlock(tid))"
+    NL, "  {"
+    NL, "      res[tid]--;"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_while_body[] =
+{
+    NL, "kernel void block_in_while_body(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  int i = 7;"
+    NL, "  res[tid] = 3*(7+6+5+4+3+2+1);"
+    NL, "  while(i)"
+    NL, "  {"
+    NL, "      res[tid]-=kernelBlock(i--);"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_do_while_body[] =
+{
+    NL, "kernel void block_in_do_while_body(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  int i = 100;"
+    NL, "  res[tid] = 3*5050;"
+    NL, "  do"
+    NL, "  {"
+    NL, "      int (^kernelBlock)(int) = ^(int num)"
+    NL, "      {"
+    NL, "          return num * multiplier;"
+    NL, "      };"
+    NL, "      res[tid]-=kernelBlock(i--);"
+    NL, "  } while(i);"
+    NL, "}"
+    NL
+};
+
+static const char* block_cond_statement[] =
+{
+    NL, "kernel void block_cond_statement(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 2;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 120;"
+    NL, "  res[tid] = (kernelBlock(2) == 4) ? res[tid] - 30              : res[tid] - 1;"
+    NL, "  res[tid] = (kernelBlock(2) == 5) ? res[tid] - 3               : res[tid] - 30;"
+    NL, "  res[tid] = (1)                   ? res[tid] - kernelBlock(15) : res[tid] - 7;"
+    NL, "  res[tid] = (0)                   ? res[tid] - 13              : res[tid] - kernelBlock(15);"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_if_cond[] =
+{
+    NL, "kernel void block_in_if_cond(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 2;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 7;"
+    NL, "  if (kernelBlock(5))"
+    NL, "  {"
+    NL, "      res[tid]-= 3;"
+    NL, "  }"
+    NL, "  if (kernelBlock(0))"
+    NL, "  {"
+    NL, "      res[tid]-= 2;"
+    NL, "  }"
+    NL, "  else"
+    NL, "  {"
+    NL, "      res[tid]-= 4;"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_in_if_branch[] =
+{
+    NL, "kernel void block_in_if_branch(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 2;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 7;"
+    NL, "  if (kernelBlock(5))"
+    NL, "  {"
+    NL, "      res[tid]-= ^(int num){ return num - 1; }(4);" // res[tid]-=3;
+    NL, "  }"
+    NL, "  if (kernelBlock(0))"
+    NL, "  {"
+    NL, "      res[tid]-= ^(int num){ return num - 1; }(3);" // res[tid]-=2;
+    NL, "  }"
+    NL, "  else"
+    NL, "  {"
+    NL, "      int (^ifBlock)(int) = ^(int num)"
+    NL, "      {"
+    NL, "          return num + 1;"
+    NL, "      };"
+    NL, "      res[tid]-= ifBlock(3);"                     // res[tid]-=4;
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_switch_cond[] =
+{
+    NL, "kernel void block_switch_cond(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 2;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 12;"
+    NL, "  int i = 1;"
+    NL, "  while(i <= 3)"
+    NL, "  {"
+    NL, "      switch (kernelBlock(i))"
+    NL, "      {"
+    NL, "          case 2:"
+    NL, "              res[tid] = res[tid] - 2;"
+    NL, "              break;"
+    NL, "          case 4:"
+    NL, "              res[tid] = res[tid] - 4;"
+    NL, "              break;"
+    NL, "          case 6:"
+    NL, "              res[tid] = res[tid] - 6;"
+    NL, "              break;"
+    NL, "          default:"
+    NL, "              break;"
+    NL, "      }"
+    NL, "      i++;"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+static const char* block_switch_case[] =
+{
+    NL, "kernel void block_switch_case(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 2;"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    return num * multiplier;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = 12;"
+    NL, "  int i = 1;"
+    NL, "  while(i <= 3)"
+    NL, "  {"
+    NL, "      switch (kernelBlock(i))"
+    NL, "      {"
+    NL, "          case 2:"
+    NL, "              res[tid]-=^(int num){ return num - 1; }(3);" // res[tid]-=2;
+    NL, "              break;"
+    NL, "          case 4:"
+    NL, "          {"
+    NL, "              int (^caseBlock)(int) = ^(int num)"
+    NL, "              {"
+    NL, "                  return num + 1;"
+    NL, "              };"
+    NL, "              res[tid]-=caseBlock(3);"                   // res[tid]-=4;
+    NL, "              break;"
+    NL, "          }"
+    NL, "          case 6:"
+    NL, "              res[tid]-=kernelBlock(3);"                 // res[tid]-=6;
+    NL, "              break;"
+    NL, "          default:"
+    NL, "              break;"
+    NL, "      }"
+    NL, "      i++;"
+    NL, "  }"
+    NL, "}"
+    NL
+};
+
+// Accessing data from Block
+
+static const char* block_access_program_data[] =
+{
+    NL, "int __constant globalVar1 = 7;"
+    NL, "int __constant globalVar2 = 11;"
+    NL, "int __constant globalVar3 = 13;"
+    NL, "int (^__constant globalBlock)(int) = ^int(int num)"
+    NL, "{"
+    NL, "    return globalVar1 * num;"
+    NL, "};"
+    NL, "kernel void block_access_program_data(__global int* res)"
+    NL, "{"
+    NL, "    int (^ kernelBlock)(int) = ^int(int num)"
+    NL, "    {"
+    NL, "        return globalVar2 * num;"
+    NL, "    };"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, "    res[tid] = tid + 1;"
+    NL, "    res[tid] = globalBlock(res[tid]);"
+    NL, "    res[tid] = kernelBlock(res[tid]);"
+    NL, "    res[tid] = ^(int num){ return globalVar3*num; }(res[tid]) - (7*11*13)*(tid + 1);"
+    NL, "}"
+    NL
+};
+
+static const char* block_access_kernel_data[] =
+{
+    NL, "kernel void block_access_kernel_data(__global int* res)"
+    NL, "{"
+    NL, "    int var1 = 7;"
+    NL, "    int var2 = 11;"
+    NL, "    int var3 = 13;"
+    NL, "    int (^ kernelBlock)(int) = ^int(int num)"
+    NL, "    {"
+    NL, "        int (^ nestedBlock)(int) = ^int (int num)"
+    NL, "        {"
+    NL, "            return var1 * num;"
+    NL, "        };"
+    NL, "        return var2 * nestedBlock(num);"
+    NL, "    };"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, "    res[tid] = tid + 1;"
+    NL, "    res[tid] = kernelBlock(res[tid]);"
+    NL, "    res[tid] = ^(int num){ return var3*num; }(res[tid]) - (7*11*13)*(tid + 1);"
+    NL, "}"
+    NL
+};
+
+static const char* block_access_chained_data[] =
+{
+    NL, "kernel void block_access_chained_data(__global int* res)"
+    NL, "{"
+    NL, "    int (^ kernelBlock)(int) = ^int(int num)"
+    NL, "    {"
+    NL, "        int var1 = 7;"
+    NL, "        int var2 = 11;"
+    NL, "        int var3 = 13;"
+    NL, "        int (^ nestedBlock1)(int) = ^int (int num)"
+    NL, "        {"
+    NL, "            int (^ nestedBlock2) (int) = ^int (int num)"
+    NL, "            {"
+    NL, "                return var2 * ^(int num){ return var3*num; }(num);"
+    NL, "            };"
+    NL, "            return var1 * nestedBlock2(num);"
+    NL, "        };"
+    NL, "        return nestedBlock1(num);"
+    NL, "    };"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, "    res[tid] = tid + 1;"
+    NL, "    res[tid] = kernelBlock(res[tid]) - (7*11*13)*(tid + 1);"
+    NL, "}"
+    NL
+};
+
+static const char* block_access_volatile_data[] =
+{
+    NL, "kernel void block_access_volatile_data(__global int* res)"
+    NL, "{"
+    NL, "    int var1 = 7;"
+    NL, "    int var2 = 11;"
+    NL, "    volatile int var3 = 13;"
+    NL, ""
+    NL, "    int (^ kernelBlock)(int) = ^int(int num)"
+    NL, "    {"
+    NL, "        int (^ nestedBlock)(int) = ^int (int num)"
+    NL, "        {"
+    NL, "            return var1 * num;"
+    NL, "        };"
+    NL, "        return var2 * nestedBlock(num);"
+    NL, "    };"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, "    res[tid] = tid + 1;"
+    NL, "    res[tid] = kernelBlock(res[tid]);"
+    NL, "    res[tid] = ^(int num){ return var3*num; }(res[tid]) - (7*11*13)*(tid + 1);"
+    NL, "}"
+    NL
+};
+
+static const char* block_typedef_kernel[] =
+{
+    NL, "kernel void block_typedef_kernel(__global int* res)"
+    NL, "{"
+    NL, "  typedef int* (^block_t)(int*);"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  int i[4] = { 3, 4, 4, 1 };"
+    NL, "  int *temp = i; // workaround clang bug"
+    NL, "  block_t kernelBlock = ^(int* pi)"
+    NL, "  {"
+    NL, "    block_t b = ^(int* n) { return n - 1; };"
+    NL, "    return pi + *(b(temp+4));"
+    NL, "  };"
+    NL, "  switch (*(kernelBlock(i))) {"
+    NL, "    case 4:"
+    NL, "      res[tid] += *(kernelBlock(i+1));"
+    NL, "      break;"
+    NL, "    default:"
+    NL, "      res[tid] = -100;"
+    NL, "      break;"
+    NL, "  }"
+    NL, "  res[tid] += *(kernelBlock(i)) - 7;"
+    NL, "}"
+    NL
+};
+
+static const char* block_typedef_func[] =
+{
+    NL, "int func(int fi)"
+    NL, "{"
+    NL, "  typedef int (^block_t)(int);"
+    NL, "  const block_t funcBlock = ^(int bi)"
+    NL, "  {"
+    NL, "    typedef short (^block2_t)(short);"
+    NL, "    block2_t nestedBlock = ^(short ni)"
+    NL, "    {"
+    NL, "      return (short)(ni - 1);"
+    NL, "    };"
+    NL, "    return bi * nestedBlock(3);"
+    NL, "  };"
+    NL, "  return funcBlock(fi * 2);"
+    NL, "}"
+    NL, "kernel void block_typedef_func(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  res[tid] = func(1) - 4;"
+    NL, "}"
+    NL
+};
+
+static const char* block_typedef_stmnt_if[] =
+{
+    NL, "kernel void block_typedef_stmnt_if(__global int* res)"
+    NL, "{      "
+    NL, "  int flag = 1;"
+    NL, "  int sum = 0;"
+    NL, "  if (flag) {"
+    NL, "    typedef int (^block_t)(int);"
+    NL, "    const block_t kernelBlock = ^(int bi)"
+    NL, "    {"
+    NL, "      block_t b = ^(int bi)"
+    NL, "      {"
+    NL, "        return bi + 1;"
+    NL, "      };"
+    NL, "      return bi + b(1);"
+    NL, "    };"
+    NL, "    sum = kernelBlock(sum);"
+    NL, "  }"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = sum - 2;"
+    NL, "}"
+    NL
+};
+
+static const char* block_typedef_loop[] =
+{
+    NL, "kernel void block_typedef_loop(__global int* res)"
+    NL, "{      "
+    NL, "  int sum = -1;"
+    NL, "  for (int i = 0; i < 3; i++) {"
+    NL, "    typedef int (^block_t)(void);"
+    NL, "    const block_t kernelBlock = ^()"
+    NL, "    {"
+    NL, "      return i + 1;"
+    NL, "    };"
+    NL, "    sum += kernelBlock();"
+    NL, "  }"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = sum - 5;"
+    NL, "}"
+    NL
+};
+
+static const char* block_typedef_mltpl_func[] =
+{
+    NL, "int func(int fi)"
+    NL, "{"
+    NL, "  typedef int (^block_t)(int);"
+    NL, "  typedef int (^block2_t)(int);"
+    NL, "  const block_t funcBlock1 = ^(int bi) { return bi; };"
+    NL, "  const block2_t funcBlock2 = ^(int bi)"
+    NL, "  {"
+    NL, "    typedef short (^block3_t)(short);"
+    NL, "    typedef short (^block4_t)(short);"
+    NL, "    const block3_t nestedBlock1 = ^(short ni)"
+    NL, "    {"
+    NL, "      return (short)(ni - 1);"
+    NL, "    };"
+    NL, "    const block4_t nestedBlock2 = ^(short ni)"
+    NL, "    {"
+    NL, "      return (short)(ni - 2);"
+    NL, "    };"
+    NL, "    return bi * nestedBlock1(3) * nestedBlock2(3);"
+    NL, "  };"
+    NL, "  return funcBlock2(fi * 2) + funcBlock1(1);"
+    NL, "}"
+    NL, "kernel void block_typedef_mltpl_func(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  typedef int (^block1_t)(int);"
+    NL, "  typedef int (^block2_t)(int);"
+    NL, "  const block1_t kernelBlock1 = ^(int bi) { return bi + 8; };"
+    NL, "  const block2_t kernelBlock2 = ^(int bi) { return bi + 3; };"
+    NL, "  res[tid] = func(1) -  kernelBlock1(2) / kernelBlock2(-1);"
+    NL, "}"
+    NL
+};
+
+static const char* block_typedef_mltpl_stmnt[] =
+{
+    NL, "kernel void block_typedef_mltpl_stmnt(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  int a;"
+    NL, "  do"
+    NL, "  {"
+    NL, "    typedef float (^blockf_t)(float);"
+    NL, "    typedef int (^blocki_t)(int);"
+    NL, "    const blockf_t blockF = ^(float bi) { return (float)(bi + 3.3); };"
+    NL, "    const blocki_t blockI = ^(int bi) { return bi + 2; };"
+    NL, "    if ((blockF(.0)-blockI(0)) > 0)"
+    NL, "    {"
+    NL, "      typedef uint (^block_t)(uint);"
+    NL, "      const block_t nestedBlock = ^(uint bi) { return (uint)(bi + 4); };"
+    NL, "      a = nestedBlock(1) + nestedBlock(2);"
+    NL, "      break;"
+    NL, "    }"
+    NL, "  } while(1);  "
+    NL, "  res[tid] = a - 11;"
+    NL, "}"
+    NL
+};
+
+static const char* block_typedef_mltpl_g[] =
+{
+    NL, "typedef int (^block1_t)(float, int); "
+    NL, "constant block1_t b1 = ^(float fi, int ii) { return (int)(ii + fi); };"
+    NL, "typedef int (^block2_t)(float, int);"
+    NL, "constant block2_t b2 = ^(float fi, int ii) { return (int)(ii + fi); };"
+    NL, "typedef float (^block3_t)(int, int);"
+    NL, "constant block3_t b3 = ^(int i1, int i2) { return (float)(i1 + i2); };"
+    NL, "typedef int (^block4_t)(float, float);"
+    NL, "kernel void block_typedef_mltpl_g(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  block4_t b4 = ^(float f1, float f2) { return (int)(f1 + f2); };"
+    NL, "  res[tid] = b1(1.1, b2(1.1, 1)) - b4(b3(1,1), 1.1);"
+    NL, "}"
+    NL
+};
+
+static const char* block_literal[] =
+{
+    NL, "int func()"
+    NL, "{"
+    NL, "  return ^(int i) {"
+    NL, "    return ^(ushort us)"
+    NL, "    {"
+    NL, "      return (int)us + i;"
+    NL, "    }(3);"
+    NL, "  }(7) - 10;"
+    NL, "}"
+    NL, "kernel void block_literal(__global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  res[tid] = func();"
+    NL, "}"
+    NL
+};
+
+static const char* block_complex[] =
+{
+    NL, "kernel void block_complex(__global int* res)"
+    NL, "{"
+    NL, "  int (^kernelBlock)(int) = ^(int num)"
+    NL, "  {"
+    NL, "    int result = 1;"
+    NL, "    for (int i = 0; i < num; i++)"
+    NL, "    {"
+    NL, "      switch(i)"
+    NL, "      {"
+    NL, "      case 0:"
+    NL, "      case 1:"
+    NL, "      case 2:"
+    NL, "        result += i;"
+    NL, "        break;"
+    NL, "      case 3:"
+    NL, "        if (result < num)"
+    NL, "          result += i;"
+    NL, "        else"
+    NL, "          result += i * 2;"
+    NL, "        break;"
+    NL, "      case 4:"
+    NL, "        while (1)"
+    NL, "        {"
+    NL, "          result++;"
+    NL, "          if (result)"
+    NL, "            goto ret;"
+    NL, "        }"
+    NL, "        break;"
+    NL, "      default:"
+    NL, "        return 777;"
+    NL, "      }"
+    NL, "    }"
+    NL, "    ret: ;"
+    NL, "    while (num) {"
+    NL, "      num--;"
+    NL, "      if (num % 2 == 0)"
+    NL, "        continue;"
+    NL, "      result++;"
+    NL, "    }"
+    NL, "    return result;"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  res[tid] = kernelBlock(7) - 11;"
+    NL, "}"
+    NL
+};
+
+static const char* block_empty[] =
+{
+    NL, "kernel void block_empty(__global int* res)"
+    NL, "{"
+    NL, "  void (^kernelBlock)(void) = ^(){};"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  kernelBlock();"
+    NL, "  res[tid] = 0;"
+    NL, "}"
+    NL
+};
+
+static const char* block_builtin[] =
+{
+    NL, "kernel void block_builtin(__global int* res)"
+    NL, "{"
+    NL, "  int b = 3;"
+    NL, "  int (^kernelBlock)(int) = ^(int a)"
+    NL, "  {"
+    NL, "    return (int)abs(a - b);"
+    NL, "  };"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  res[tid] = -1;"
+    NL, "  res[tid] = kernelBlock(2) - 1;"
+    NL, "}"
+    NL
+};
+
+static const char* block_barrier[] =
+{
+    NL, "kernel void block_barrier(__global int* res)"
+    NL, "{"
+    NL, "  int b = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  size_t lsz = get_local_size(0);"
+    NL, "  size_t gid = get_group_id(0);"
+    NL, "  size_t idx = gid*lsz;"
+    NL, ""
+    NL, "  int (^kernelBlock)(int) = ^(int a)"
+    NL, "  {"
+    NL, "    atomic_inc(res+idx);"
+    NL, "    barrier(CLK_GLOBAL_MEM_FENCE);"
+    NL, "    return (int)abs(a - b) - (res[idx] != lsz ? 0 : 1);"
+    NL, "  };"
+    NL, ""
+    NL, "  int d = kernelBlock(2);"
+    NL, "  barrier(CLK_GLOBAL_MEM_FENCE);"
+    NL, "  res[tid] = d;"
+    NL, "}"
+    NL
+};
+
+
+
+static const kernel_src sources_execute_block[] =
+{
+    // Simple blocks
+    KERNEL(block_global_scope),
+    KERNEL(block_kernel_scope),
+    KERNEL(block_statement_scope),
+    KERNEL(block_function_scope),
+    KERNEL(block_nested_scope),
+
+    // Kernels with Block in for/while/if/switch
+    KERNEL(block_in_for_init),
+    KERNEL(block_in_for_cond),
+    KERNEL(block_in_for_iter),
+    KERNEL(block_in_while_cond),
+    KERNEL(block_in_while_body),
+    KERNEL(block_in_do_while_body),
+    KERNEL(block_cond_statement),
+    KERNEL(block_in_if_cond),
+    KERNEL(block_in_if_branch),
+    KERNEL(block_switch_cond),
+    KERNEL(block_switch_case),
+    KERNEL(block_literal),
+
+    // Accessing data from block
+    KERNEL(block_access_program_data),
+    KERNEL(block_access_kernel_data),
+    KERNEL(block_access_chained_data),
+    KERNEL(block_access_volatile_data),
+
+    // Block args
+    KERNEL(block_arg_struct),
+    KERNEL(block_arg_types_mix),
+    KERNEL(block_arg_pointer),
+    KERNEL(block_arg_global_p),
+    KERNEL(block_arg_const_p),
+    KERNEL(block_ret_struct),
+    KERNEL(block_arg_global_var),
+
+    // Block in typedef
+    KERNEL(block_typedef_kernel),
+    KERNEL(block_typedef_func),
+    KERNEL(block_typedef_stmnt_if),
+    KERNEL(block_typedef_loop),
+    KERNEL(block_typedef_mltpl_func),
+    KERNEL(block_typedef_mltpl_stmnt),
+    KERNEL(block_typedef_mltpl_g),
+
+    // Non - trivial blocks
+    KERNEL(block_complex),
+    KERNEL(block_empty),
+    KERNEL(block_builtin),
+    KERNEL(block_barrier),
+
+};
+static const size_t num_kernels_execute_block = arr_size(sources_execute_block);
+
+static int check_kernel_results(cl_int* results, cl_int len)
+{
+    for(cl_int i = 0; i < len; ++i)
+    {
+        if(results[i] != 0) return i;
+    }
+    return -1;
+}
+
+int test_execute_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t i;
+    size_t ret_len;
+    cl_int n, err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    cl_int kernel_results[MAX_GWS] = {0xDEADBEEF};
+
+    size_t max_local_size = 1;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    size_t global_size = MAX_GWS;
+    size_t local_size = (max_local_size > global_size/16) ? global_size/16 : max_local_size;
+
+    size_t failCnt = 0;
+    for(i = 0; i < num_kernels_execute_block; ++i)
+    {
+        if (!gKernelName.empty() && gKernelName != sources_execute_block[i].kernel_name)
+            continue;
+
+        log_info("Running '%s' kernel (%d of %d) ...\n", sources_execute_block[i].kernel_name, i + 1, num_kernels_execute_block);
+        err_ret = run_n_kernel_args(context, queue, sources_execute_block[i].lines, sources_execute_block[i].num_lines, sources_execute_block[i].kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), 0, NULL);
+        if(check_error(err_ret, "'%s' kernel execution failed", sources_execute_block[i].kernel_name)) { ++failCnt; res = -1; }
+        else if((n = check_kernel_results(kernel_results, arr_size(kernel_results))) >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_execute_block[i].kernel_name, n, kernel_results[n])) { ++failCnt; res = -1; }
+        else log_info("'%s' kernel is OK.\n", sources_execute_block[i].kernel_name);
+    }
+
+    if (failCnt > 0)
+    {
+      log_error("ERROR: %d of %d kernels failed.\n", failCnt, num_kernels_execute_block);
+    }
+
+    return res;
+}
+
+
+#endif
+
diff --git a/test_conformance/device_execution/host_multi_queue.cpp b/test_conformance/device_execution/host_multi_queue.cpp
new file mode 100644
index 00000000..5b572d72
--- /dev/null
+++ b/test_conformance/device_execution/host_multi_queue.cpp
@@ -0,0 +1,228 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+#ifdef CL_VERSION_2_0
+extern int gWimpyMode;
+static const char* multi_queue_simple_block1[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  res[tid] = mul * 7 - 21;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void multi_queue_simple_block1(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "  res[tid] = -1;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* multi_queue_simple_block2[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  res[tid] = mul * 7 - 21;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void multi_queue_simple_block2(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "  res[tid] = -1;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* multi_queue_simple_block3[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  res[tid] = mul * 7 - 21;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void multi_queue_simple_block3(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "  res[tid] = -1;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const char* multi_queue_simple_block4[] =
+{
+    NL, "void block_fn(size_t tid, int mul, __global int* res)"
+    NL, "{"
+    NL, "  res[tid] = mul * 7 - 21;"
+    NL, "}"
+    NL, ""
+    NL, "kernel void multi_queue_simple_block4(__global int* res)"
+    NL, "{"
+    NL, "  int multiplier = 3;"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
+    NL, ""
+    NL, "  res[tid] = -1;"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL
+};
+
+static const kernel_src sources_multi_queue_block[] =
+{
+    KERNEL(multi_queue_simple_block1),
+    KERNEL(multi_queue_simple_block2),
+    KERNEL(multi_queue_simple_block3),
+    KERNEL(multi_queue_simple_block4),
+};
+static const size_t num_kernels_multi_queue_block = arr_size(sources_multi_queue_block);
+
+
+int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint i;
+    cl_int err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    cl_int kernel_results[MAX_GWS] = {0};
+
+    size_t ret_len;
+    cl_uint max_queues = 1;
+    cl_uint maxQueueSize = 0;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    size_t max_local_size = 1;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    cl_uint n = num_kernels_multi_queue_block; // Number of host queues
+    std::vector<clCommandQueueWrapper> queues(n);
+    std::vector<cl_command_queue> q(n);
+    std::vector<clProgramWrapper> program(n);
+    std::vector<clKernelWrapper> kernel(n);
+    std::vector<clMemWrapper> mem(n);
+    std::vector<clEventWrapper> event(n);
+
+    for(i = 0; i < n; ++i)
+    {
+        queues[i] = clCreateCommandQueueWithProperties(context, device, NULL, &err_ret);
+        if(check_error(err_ret, "clCreateCommandQueueWithProperties() failed")) { res = -1; break; }
+        q[i] = queues[i];
+    }
+
+    if(err_ret == CL_SUCCESS)
+    {
+        for(i = 0; i < n; ++i)
+        {
+            size_t global = MAX_GWS;
+            if(gWimpyMode)
+            {
+                global = 16;
+            }
+
+            err_ret |= create_single_kernel_helper_with_build_options(context, &program[i], &kernel[i], sources_multi_queue_block[i].num_lines, sources_multi_queue_block[i].lines, sources_multi_queue_block[i].kernel_name, "-cl-std=CL2.0");
+            if(check_error(err_ret, "Create single kernel failed")) { res = -1; break; }
+
+            mem[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);
+            if(check_error(err_ret, "clCreateBuffer() failed")) { res = -1; break; }
+
+            err_ret |= clSetKernelArg(kernel[i], 0, sizeof(cl_mem), &mem[i]);
+            if(check_error(err_ret, "clSetKernelArg(0) failed")) { res = -1; break; }
+
+            err_ret |= clEnqueueNDRangeKernel(q[i], kernel[i], 1, NULL, &global, 0, 0, NULL, &event[i]);
+            if(check_error(err_ret, "clEnqueueNDRangeKernel() failed")) { res = -1; break; }
+        }
+    }
+
+    if(err_ret == CL_SUCCESS)
+    {
+        for(i = 0; i < n; ++i)
+        {
+            cl_int status;
+            err_ret = clEnqueueReadBuffer(q[i], mem[i], CL_TRUE, 0, sizeof(kernel_results), kernel_results, 0, NULL, NULL);
+            if(check_error(err_ret, "clEnqueueReadBuffer() failed")) { res = -1; break; }
+
+            err_ret = clGetEventInfo(event[i], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &ret_len);
+            if(check_error(err_ret, "clGetEventInfo() failed")) { res = -1; break; }
+
+#if CL_COMPLETE != CL_SUCCESS
+#error Fix me!
+#endif
+            // This hack is possible because both CL_COMPLETE and CL_SUCCESS defined as 0x00
+            if(check_error(status, "Kernel execution status %d", status)) { err_ret = status; res = -1; break; }
+            else if(kernel_results[0] != 0 && check_error(-1, "'%s' kernel results validation failed = %d", sources_multi_queue_block[i].kernel_name, kernel_results[0])) { res = -1; break; }
+        }
+    }
+
+    return res;
+}
+
+
+
+
+#endif
+
diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp
new file mode 100644
index 00000000..51a7b718
--- /dev/null
+++ b/test_conformance/device_execution/host_queue_order.cpp
@@ -0,0 +1,185 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+extern int gWimpyMode;
+
+#ifdef CL_VERSION_2_0
+
+static const char* enqueue_block_first_kernel[] =
+{
+    NL, "void block_fn(uint num, __global int* res)"
+    NL, "{"
+    NL, "    size_t tid = get_global_id(0);"
+    NL, ""
+    NL, "    for(int i = 1 ; i < tid ; i++)"
+    NL, "    {"
+    NL, "      for(int j = 0 ; j < num ; j++)"
+    NL, "        atomic_add(res+tid, (int)sqrt((float)i*i) / i);"
+    NL, "    }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_first_kernel(uint num, __global int* res)"
+    NL, "{"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(num, res); };"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(num, 1);"
+    NL, ""
+    NL, "  int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[0] = -1; return; }"
+    NL, ""
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_block_second_kernel[] =
+{
+    NL, "void block_fn(uint num, __global int* res)"
+    NL, "{"
+    NL, "    for(int i = 2 ; i < num ; i++)"
+    NL, "    {"
+    NL, "      res[i] = res[i]/num - (i-1);"
+    NL, "    }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_block_second_kernel(uint num, __global int* res)"
+    NL, "{"
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(num, res); };"
+    NL, ""
+    NL, "  ndrange_t ndrange = ndrange_1D(1);"
+    NL, ""
+    NL, "  int enq_res = enqueue_kernel(get_default_queue(), CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[0] = -1; return; }"
+    NL, ""
+    NL, "}"
+    NL
+};
+
+static int check_kernel_results(cl_int* results, cl_int len)
+{
+    for(cl_int i = 0; i < len; ++i)
+    {
+        if(results[i] != 0) return i;
+    }
+    return -1;
+}
+
+/*
+    Test checks kernel block execution order in case of two different kernels with enqueue block submitted to one ordered host queue.
+*/
+int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int k, err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    cl_int kernel_results[MAX_GWS] = {0};
+
+    size_t ret_len;
+    cl_uint max_queues = 1;
+    cl_uint maxQueueSize = 0;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    size_t max_local_size = 1;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    cl_int status;
+    size_t size = 1;
+    cl_int result[MAX_GWS] = { 0 };
+    cl_uint num = arr_size(result);
+    if( gWimpyMode )
+    {
+        num = MAX(num / 16, 4);
+    }
+
+    clMemWrapper res_mem;
+    clProgramWrapper program1, program2;
+    clKernelWrapper kernel1, kernel2;
+
+    cl_event kernel_event;
+
+    err_ret = create_single_kernel_helper_with_build_options(context, &program1, &kernel1,  arr_size(enqueue_block_first_kernel), enqueue_block_first_kernel, "enqueue_block_first_kernel", "-cl-std=CL2.0");
+    if(check_error(err_ret, "Create single kernel failed")) return -1;
+
+    err_ret = create_single_kernel_helper_with_build_options(context, &program2, &kernel2, arr_size(enqueue_block_second_kernel), enqueue_block_second_kernel, "enqueue_block_second_kernel", "-cl-std=CL2.0");
+    if(check_error(err_ret, "Create single kernel failed")) return -1;
+
+    res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, sizeof(kernel_results), kernel_results, &err_ret);
+    test_error(err_ret, "clCreateBuffer() failed");
+
+    // Enqueue first kernel
+    err_ret = clSetKernelArg(kernel1, 0, sizeof(num), &num);
+    test_error(err_ret, "clSetKernelArg(0) failed");
+    err_ret = clSetKernelArg(kernel1, 1, sizeof(cl_mem), &res_mem);
+    test_error(err_ret, "clSetKernelArg(1) failed");
+
+    cl_event event1 = clCreateUserEvent(context, &err_ret);
+    if(check_error(err_ret, "Create user event failed")) return -1;
+
+    err_ret = clEnqueueNDRangeKernel(queue, kernel1, 1, NULL, &size, &size, 1, &event1, NULL);
+    test_error(err_ret, "clEnqueueNDRangeKernel('enqueue_block_first_kernel') failed");
+
+    // Enqueue second kernel
+    err_ret = clSetKernelArg(kernel2, 0, sizeof(num), &num);
+    test_error(err_ret, "clSetKernelArg(0) failed");
+    err_ret = clSetKernelArg(kernel2, 1, sizeof(cl_mem), &res_mem);
+    test_error(err_ret, "clSetKernelArg(1) failed");
+
+    err_ret = clEnqueueNDRangeKernel(queue, kernel2, 1, NULL, &size, &size, 0, NULL, &kernel_event);
+    test_error(err_ret, "clEnqueueNDRangeKernel('enqueue_block_second_kernel') failed");
+
+    //Triger execution of first kernel
+    err_ret = clSetUserEventStatus(event1, CL_COMPLETE);
+    test_error(err_ret, "clSetUserEventStatus() failed");
+
+    // Collect resulsts
+    err_ret = clEnqueueReadBuffer(queue, res_mem, CL_TRUE, 0, sizeof(result), result, 0, NULL, NULL);
+    test_error(err_ret, "clEnqueueReadBuffer() failed");
+
+    err_ret = clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &ret_len);
+    test_error(err_ret, "clGetEventInfo() failed");
+
+    if(check_error(status, "Kernel execution status %d", status)) return status;
+
+    if((k = check_kernel_results(result, num)) >= 0 && check_error(-1, "'%s' results validation failed: [%d] returned %d expected 0", "test_host_queue_order", k, result[k])) res = -1;
+
+    return res;
+}
+
+#endif
+
diff --git a/test_conformance/device_execution/main.c b/test_conformance/device_execution/main.c
new file mode 100644
index 00000000..da2cdc42
--- /dev/null
+++ b/test_conformance/device_execution/main.c
@@ -0,0 +1,101 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/parseParameters.h"
+#include "utils.h"
+#include "procs.h"
+
+std::string gKernelName;
+int gWimpyMode = 0;
+
+basefn basefn_list[] =
+{
+#ifdef CL_VERSION_2_0
+    test_device_info,
+    test_device_queue,
+    test_execute_block,
+    test_enqueue_block,
+    test_enqueue_nested_blocks,
+    test_enqueue_wg_size,
+    test_enqueue_flags,
+    test_enqueue_multi_queue,
+    test_host_multi_queue,
+    test_enqueue_ndrange,
+    test_host_queue_order,
+#endif
+};
+
+const char *commonfn_names[] =
+{
+#ifdef CL_VERSION_2_0
+    "test_device_info",
+    "test_device_queue",
+    "test_execute_block",
+    "test_enqueue_block",
+    "test_enqueue_nested_blocks",
+    "test_enqueue_wg_size",
+    "test_enqueue_flags",
+    "test_enqueue_multi_queue",
+    "test_host_multi_queue",
+    "test_enqueue_ndrange",
+    "test_host_queue_order",
+#endif
+};
+
+ct_assert(arr_size(commonfn_names) == arr_size(basefn_list))
+
+static const int num_commonfns = arr_size(commonfn_names);
+
+int
+main(int argc, const char *argv[])
+{
+    argc = parseCustomParam(argc, argv);
+
+    for (int i = 0; i < argc; ++i) {
+      int argsRemoveNum = 0;
+      if ( strcmp(argv[i], "-kernelName") == 0 ) {
+        if((i + 1) > argc && argv[i + 1] == NULL) {
+          vlog( "Missing value for -kernelName argument\n");
+          return -1;
+        }
+
+        gKernelName = std::string(argv[i + 1]);
+        argsRemoveNum += 2;
+      }
+     if (strcmp(argv[i], "-w") == 0 ){
+        gWimpyMode = 1;
+        argsRemoveNum += 1;
+     }
+
+
+      if (argsRemoveNum > 0) {
+        for (int j = i; j < (argc - argsRemoveNum); ++j)
+          argv[j] = argv[j + argsRemoveNum];
+
+        argc -= argsRemoveNum;
+        --i;
+      }
+    }
+
+    return runTestHarness(argc, argv, num_commonfns, basefn_list, commonfn_names, false, false, 0);
+}
diff --git a/test_conformance/device_execution/nested_blocks.cpp b/test_conformance/device_execution/nested_blocks.cpp
new file mode 100644
index 00000000..fd075527
--- /dev/null
+++ b/test_conformance/device_execution/nested_blocks.cpp
@@ -0,0 +1,374 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <vector>
+
+#include "procs.h"
+#include "utils.h"
+#include <time.h>
+
+
+#ifdef CL_VERSION_2_0
+
+static int gNestingLevel = 4;
+extern int gWimpyMode;
+
+static const char* enqueue_nested_blocks_single[] =
+{
+    NL, "void block_fn(__global int* res, int level)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(3);"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(res, level); };"
+    NL, ""
+    NL, "  // Only 1 work-item enqueues block"
+    NL, "  if(tid == 1)"
+    NL, "  {"
+    NL, "    res[tid]++;"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_nested_blocks_single(__global int* res, int level)"
+    NL, "{"
+    NL, "  block_fn(res, level);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_nested_blocks_some_eq[] =
+{
+    NL, "void block_fn(int level, __global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(10);"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with the same level"
+    NL, "  if(tid < (get_global_size(0) >> 1))"
+    NL, "  {"
+    NL, "    atomic_inc(&res[tid]);"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_nested_blocks_some_eq(__global int* res, int level)"
+    NL, "{"
+    NL, "  block_fn(level, res);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_nested_blocks_some_diff[] =
+{
+    NL, "void block_fn(int level, __global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(10);"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
+    NL, ""
+    NL, "  // Some work-items enqueues nested blocks with different levels"
+    NL, "  if(tid % 2)"
+    NL, "  {"
+    NL, "    atomic_inc(&res[tid]);"
+    NL, "    if(level >= tid)"
+    NL, "    {"
+    NL, "      int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "      if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "    }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_nested_blocks_some_diff(__global int* res, int level)"
+    NL, "{"
+    NL, "  block_fn(level, res);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_nested_blocks_all_eq[] =
+{
+    NL, "void block_fn(int level, __global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(4);"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with the same level"
+    NL, "  atomic_inc(&res[tid]);"
+    NL, "  int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "  if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_nested_blocks_all_eq(__global int* res, int level)"
+    NL, "{"
+    NL, "  block_fn(level, res);"
+    NL, "}"
+    NL
+};
+
+static const char* enqueue_nested_blocks_all_diff[] =
+{
+    NL, "void block_fn(int level, __global int* res)"
+    NL, "{"
+    NL, "  size_t tid = get_global_id(0);"
+    NL, "  queue_t def_q = get_default_queue();"
+    NL, "  ndrange_t ndrange = ndrange_1D(10);"
+    NL, "  if(--level < 0) return;"
+    NL, ""
+    NL, "  void (^kernelBlock)(void) = ^{ block_fn(level, res); };"
+    NL, ""
+    NL, "  // All work-items enqueues nested blocks with different levels"
+    NL, "  atomic_inc(&res[tid]);"
+    NL, "  if(level >= tid)"
+    NL, "  {"
+    NL, "    int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
+    NL, "    if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
+    NL, "  }"
+    NL, "}"
+    NL, ""
+    NL, "kernel void enqueue_nested_blocks_all_diff(__global int* res, int level)"
+    NL, "{"
+    NL, "  block_fn(level, res);"
+    NL, "}"
+    NL
+};
+
+static int check_single(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    int i, fail = -1;
+    const cl_uint tid = 1;
+
+    for(i = 0; i < len; ++i)
+    {
+        if(i != tid && results[i] != 0) { fail = i; break; }
+        if(i == tid && results[i] != nesting_level) { fail = i; break; }
+    }
+    return fail;
+}
+
+void generate_reference_some_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
+{
+    size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
+    if(--nesting_level < 0) return;
+
+    for (size_t tid = 0; tid < globalWorkSize; ++tid)
+    {
+        if (tid < (globalWorkSize >> 1))
+        {
+            ++referenceResults[tid];
+            generate_reference_some_eq(referenceResults, len, nesting_level);
+        }
+    }
+}
+
+static int check_some_eq(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    int i, fail = -1;
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_some_eq(referenceResults, len, nesting_level);
+
+    for(i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i]) { fail = i; break; }
+    }
+
+    return fail;
+}
+
+void generate_reference_some_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
+{
+    size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
+    if(--nesting_level < 0) return;
+
+    for (size_t tid = 0; tid < globalWorkSize; ++tid)
+    {
+        if (tid % 2)
+        {
+            ++referenceResults[tid];
+            if (nesting_level >= tid)
+            {
+                generate_reference_some_diff(referenceResults, len, nesting_level);
+            }
+        }
+    }
+}
+
+static int check_some_diff(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    int i, fail = -1;
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_some_diff(referenceResults, len, nesting_level);
+
+    for(i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i]) { fail = i; break; }
+    }
+
+    return fail;
+}
+
+void generate_reference_all_eq(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
+{
+    size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 4;
+    if(--nesting_level < 0) return;
+
+    for (size_t tid = 0; tid < globalWorkSize; ++tid)
+    {
+        ++referenceResults[tid];
+        generate_reference_all_eq(referenceResults, len, nesting_level);
+    }
+}
+
+static int check_all_eq(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    int i, fail = -1;
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_all_eq(referenceResults, len, nesting_level);
+
+    for(i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i]) { fail = i; break; }
+    }
+
+    return fail;
+}
+
+void generate_reference_all_diff(std::vector<cl_int> &referenceResults, cl_int len, cl_int nesting_level)
+{
+    size_t globalWorkSize = (nesting_level == gNestingLevel)? len: 10;
+    if(--nesting_level < 0) return;
+
+    for (size_t tid = 0; tid < globalWorkSize; ++tid)
+    {
+        ++referenceResults[tid];
+        if (nesting_level >= tid)
+        {
+            generate_reference_all_diff(referenceResults, len, nesting_level);
+        }
+    }
+}
+
+static int check_all_diff(cl_int* results, cl_int len, cl_int nesting_level)
+{
+    int i, fail = -1;
+    std::vector<cl_int> referenceResults(len, 0);
+    generate_reference_all_diff(referenceResults, len, nesting_level);
+
+    for(i = 0; i < len; ++i)
+    {
+        if (results[i] != referenceResults[i]) { fail = i; break; }
+    }
+
+    return fail;
+}
+
+static const kernel_src_check sources_nested_blocks[] =
+{
+    { KERNEL(enqueue_nested_blocks_single), check_single },
+    { KERNEL(enqueue_nested_blocks_some_eq), check_some_eq },
+    { KERNEL(enqueue_nested_blocks_some_diff), check_some_diff },
+    { KERNEL(enqueue_nested_blocks_all_eq), check_all_eq },
+    { KERNEL(enqueue_nested_blocks_all_diff), check_all_diff }
+};
+
+int test_enqueue_nested_blocks(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_uint i, k;
+    cl_int err_ret, res = 0;
+    clCommandQueueWrapper dev_queue;
+    const size_t MAX_GLOBAL_WORK_SIZE = MAX_GWS / 4;
+    cl_int kernel_results[MAX_GLOBAL_WORK_SIZE] = {0};
+
+    if(gWimpyMode)
+    {
+        gNestingLevel = 2;
+        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
+        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
+    }
+
+    size_t ret_len;
+    cl_uint max_queues = 1;
+    cl_uint maxQueueSize = 0;
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
+
+    err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
+    test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
+
+    cl_queue_properties queue_prop_def[] =
+    {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
+        CL_QUEUE_SIZE, maxQueueSize,
+        0
+    };
+
+    dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
+    test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
+
+    kernel_arg args[] =
+    {
+        { sizeof(cl_int), &gNestingLevel }
+    };
+
+    size_t failCnt = 0;
+    for(k = 0; k < arr_size(sources_nested_blocks); ++k)
+    {
+        if (!gKernelName.empty() && gKernelName != sources_nested_blocks[k].src.kernel_name)
+            continue;
+
+        log_info("Running '%s' kernel (%d of %d) ...\n", sources_nested_blocks[k].src.kernel_name, k + 1, arr_size(sources_nested_blocks));
+        for(i = 0; i < MAX_GLOBAL_WORK_SIZE; ++i) kernel_results[i] = 0;
+
+        err_ret = run_n_kernel_args(context, queue, sources_nested_blocks[k].src.lines, sources_nested_blocks[k].src.num_lines, sources_nested_blocks[k].src.kernel_name, 0, MAX_GLOBAL_WORK_SIZE, kernel_results, sizeof(kernel_results), arr_size(args), args);
+        if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { res = -1; continue ; }
+
+        //check results
+        int fail = sources_nested_blocks[k].check(kernel_results, MAX_GLOBAL_WORK_SIZE, gNestingLevel);
+
+        if(check_error(err_ret, "'%s' kernel execution failed", sources_nested_blocks[k].src.kernel_name)) { ++failCnt; res = -1; continue; }
+        else if(fail >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d] returned %d expected 0", sources_nested_blocks[k].src.kernel_name, fail, kernel_results[fail])) { ++failCnt; res = -1; continue; }
+        else log_info("'%s' kernel is OK.\n", sources_nested_blocks[k].src.kernel_name);
+    }
+
+    if (failCnt > 0)
+    {
+        log_error("ERROR: %d of %d kernels failed.\n", failCnt, arr_size(sources_nested_blocks));
+    }
+
+    return res;
+}
+
+#endif
+
diff --git a/test_conformance/device_execution/procs.h b/test_conformance/device_execution/procs.h
new file mode 100644
index 00000000..53b6be41
--- /dev/null
+++ b/test_conformance/device_execution/procs.h
@@ -0,0 +1,40 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int test_device_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_device_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_execute_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_enqueue_block(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_enqueue_nested_blocks(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_enqueue_wg_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_enqueue_flags(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_enqueue_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_host_multi_queue(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_host_queue_order(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_execution_stress(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+
+#ifdef __cplusplus
+}
+#endif
+
+
diff --git a/test_conformance/device_execution/utils.cpp b/test_conformance/device_execution/utils.cpp
new file mode 100644
index 00000000..b349b2d6
--- /dev/null
+++ b/test_conformance/device_execution/utils.cpp
@@ -0,0 +1,76 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#include "utils.h"
+
+int run_single_kernel(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size)
+{
+    return run_single_kernel_args(context, queue, source, num_lines, kernel_name, results, res_size, 0, NULL);
+}
+
+int run_single_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size, cl_uint num_args, kernel_arg* args)
+{
+    return run_n_kernel_args(context, queue, source, num_lines, kernel_name, 1, 1, results, res_size, num_args, args);
+}
+
+int run_n_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, size_t local, size_t global, void* results, size_t res_size, cl_uint num_args, kernel_arg* args)
+{
+    cl_int err_ret, status;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper mem;
+    clEventWrapper event;
+    cl_uint i;
+    size_t ret_len;
+
+    err_ret = create_single_kernel_helper_with_build_options(context, &program, &kernel, num_lines, source, kernel_name, "-cl-std=CL2.0");
+    if(check_error(err_ret, "Create single kernel failed")) return -1;
+
+    mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, res_size, results, &err_ret);
+    test_error(err_ret, "clCreateBuffer() failed");
+
+    err_ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem);
+    if(check_error(err_ret, "clSetKernelArg(%d, %d, %p) for kernel: '%s' failed: %d", 0, (int)sizeof(cl_mem), &mem, kernel_name, err_ret)) return err_ret;
+
+    for(i = 0; i < num_args; ++i)
+    {
+        err_ret = clSetKernelArg(kernel, i+1, args[i].size, args[i].ptr);
+        if(check_error(err_ret, "clSetKernelArg(%d, %d, %p) for kernel: '%s' failed: %d", (int)(i+1), (int)args[i].size, args[i].ptr, kernel_name, err_ret)) return err_ret;
+    }
+
+    err_ret = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, (local ? &local : NULL), 0, NULL, &event);
+    if(check_error(err_ret, "clEnqueueNDRangeKernel('%s', gws=%d, lws=%d) failed", kernel_name, (int)global, (int)local)) return err_ret;
+
+    err_ret = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, res_size, results, 0, NULL, NULL);
+    test_error(err_ret, "clEnqueueReadBuffer() failed");
+
+    err_ret = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, &ret_len);
+    test_error(err_ret, "clGetEventInfo() failed");
+
+#if CL_COMPLETE != CL_SUCCESS
+#error Fix me!
+#endif
+
+    // This hack is possible because CL_COMPLETE and CL_SUCCESS defined as 0x0
+    if(check_error(status, "Kernel execution status %d", status)) return status;
+
+    return 0;
+}
+
diff --git a/test_conformance/device_execution/utils.h b/test_conformance/device_execution/utils.h
new file mode 100644
index 00000000..02d7b6c7
--- /dev/null
+++ b/test_conformance/device_execution/utils.h
@@ -0,0 +1,73 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _utils_h_
+#define _utils_h_
+
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/mt19937.h"
+
+#include <string>
+
+#ifndef CL_VERSION_2_0
+#define CL_VERSION_2_0
+#endif
+
+#define MAX_QUEUES    1000 // Max number of queues to test
+#define MAX_GWS       256 // Global Work Size (must be multiple of 16)
+
+
+#define NL "\n"
+#define arr_size(a) (sizeof(a)/sizeof(a[0]))
+#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
+
+#define KERNEL(name) { arr_size(name), name, #name }
+
+extern std::string gKernelName;
+
+typedef struct
+{
+    unsigned int num_lines;
+    const char** lines;
+    const char*  kernel_name;
+} kernel_src;
+
+typedef int (*fn_check)(cl_int*, cl_int, cl_int);
+
+typedef struct
+{
+    kernel_src src;
+    fn_check   check;
+} kernel_src_check;
+
+typedef struct
+{
+    size_t size;
+    const void*  ptr;
+} kernel_arg;
+
+typedef struct
+{
+  kernel_src src;
+  cl_int dim;
+  cl_bool localSize;
+  cl_bool offset;
+} kernel_src_dim_check;
+
+int run_single_kernel(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size);
+int run_single_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, void* results, size_t res_size, cl_uint num_args, kernel_arg* args);
+int run_n_kernel_args(cl_context context, cl_command_queue queue, const char** source, unsigned int num_lines, const char* kernel_name, size_t local, size_t global, void* results, size_t res_size, cl_uint num_args, kernel_arg* args);
+
+#endif
diff --git a/test_conformance/device_partition/CMakeLists.txt b/test_conformance/device_partition/CMakeLists.txt
new file mode 100644
index 00000000..c391e4c0
--- /dev/null
+++ b/test_conformance/device_partition/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(MODULE_NAME DEVICE_PARTITION)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_device_partition.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/genericThread.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+include(../CMakeCommon.txt)
diff --git a/test_conformance/device_partition/Jamfile b/test_conformance/device_partition/Jamfile
new file mode 100644
index 00000000..0e1d4d6a
--- /dev/null
+++ b/test_conformance/device_partition/Jamfile
@@ -0,0 +1,32 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_device_partition
+    : main.c
+      test_device_partition.cpp
+      ../../test_common/harness/errorHelpers.c
+      ../../test_common/harness/threadTesting.c
+      ../../test_common/harness/testHarness.c
+      ../../test_common/harness/kernelHelpers.c
+      ../../test_common/harness/genericThread.cpp
+      ../../test_common/harness/mt19937.c
+      ../../test_common/harness/conversions.c
+      ../../test_common/harness/typeWrappers.cpp
+    : <target-os>windows:<source>../../test_common/harness/msvc9.c
+    ;
+
+install dist
+    : test_device_partition
+    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86/device_partition
+      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86/device_partition
+    ;
+
+install dist
+    : test_device_partition
+    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86_64/device_partition
+      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86_64/device_partition
+      <address-model>64
+    ;
diff --git a/test_conformance/device_partition/Makefile b/test_conformance/device_partition/Makefile
new file mode 100644
index 00000000..493b0609
--- /dev/null
+++ b/test_conformance/device_partition/Makefile
@@ -0,0 +1,45 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		test_device_partition.cpp \
+		../../test_common/harness/errorHelpers.c \
+		../../test_common/harness/threadTesting.c \
+		../../test_common/harness/testHarness.c \
+		../../test_common/harness/kernelHelpers.c \
+		../../test_common/harness/genericThread.cpp \
+		../../test_common/harness/mt19937.c \
+		../../test_common/harness/conversions.c \
+		../../test_common/harness/typeWrappers.cpp \
+		../../test_common/harness/msvc9.c
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(abspath $(SRCS))
+HEADERS =  procs.h testBase.h
+TARGET = test_device_partition
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Os -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${RC_CFLAGS} ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/device_partition/main.c b/test_conformance/device_partition/main.c
new file mode 100644
index 00000000..cb4173ec
--- /dev/null
+++ b/test_conformance/device_partition/main.c
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/mt19937.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn    basefn_list[] = {
+            test_partition_equally,
+            test_partition_by_counts,
+            test_partition_by_affinity_domain_numa,
+            test_partition_by_affinity_domain_l4_cache,
+            test_partition_by_affinity_domain_l3_cache,
+            test_partition_by_affinity_domain_l2_cache,
+            test_partition_by_affinity_domain_l1_cache,
+            test_partition_by_affinity_domain_next_partitionable,
+            test_partition
+};
+
+
+const char    *basefn_names[] = {
+            "device_partition_equally",
+            "device_partition_by_counts",
+            "device_partition_by_affinity_domain_numa",
+            "device_partition_by_affinity_domain_l4_cache",
+            "device_partition_by_affinity_domain_l3_cache",
+            "device_partition_by_affinity_domain_l2_cache",
+            "device_partition_by_affinity_domain_l1_cache",
+            "device_partition_by_affinity_domain_next_partitionable",
+            "device_partition_all",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
+}
diff --git a/test_conformance/device_partition/procs.h b/test_conformance/device_partition/procs.h
new file mode 100644
index 00000000..0f9718f7
--- /dev/null
+++ b/test_conformance/device_partition/procs.h
@@ -0,0 +1,29 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/mt19937.h"
+
+extern int      test_partition(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_equally(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_by_counts(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_by_affinity_domain_numa(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/device_partition/testBase.h b/test_conformance/device_partition/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/device_partition/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/device_partition/test_device_partition.cpp b/test_conformance/device_partition/test_device_partition.cpp
new file mode 100644
index 00000000..bd012522
--- /dev/null
+++ b/test_conformance/device_partition/test_device_partition.cpp
@@ -0,0 +1,589 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/conversions.h"
+
+#include <vector>
+
+typedef long long int lld;
+typedef long long unsigned llu;
+
+const char *test_kernels[] = {
+"__kernel void kernelA(__global int *dst)\n"
+"{\n"
+"\n"
+" dst[get_global_id(0)]*=3;\n"
+"\n"
+"}\n"
+"__kernel void kernelB(__global int *dst)\n"
+"{\n"
+"\n"
+" dst[get_global_id(0)]++;\n"
+"\n"
+"}\n"
+};
+
+#define TEST_SIZE 512
+#define MAX_QUEUES 1000
+
+const char *printPartition(cl_device_partition_property partition)
+{
+  switch (partition) {
+    case (0):                                      return "<NONE>";
+    case (CL_DEVICE_PARTITION_EQUALLY):            return "CL_DEVICE_PARTITION_EQUALLY";
+    case (CL_DEVICE_PARTITION_BY_COUNTS):          return "CL_DEVICE_PARTITION_BY_COUNTS";
+    case (CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN): return "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN";
+    default:                                       return "<unknown>";
+  } // switch
+}
+
+const char *printAffinity(cl_device_affinity_domain affinity)
+{
+  switch (affinity) {
+    case (0):                                            return "<NONE>";
+    case (CL_DEVICE_AFFINITY_DOMAIN_NUMA):               return "CL_DEVICE_AFFINITY_DOMAIN_NUMA";
+    case (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE";
+    case (CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE";
+    case (CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE";
+    case (CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE";
+    case (CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE): return "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE";
+    default:                                             return "<unknown>";
+  } // switch
+}
+int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const cl_device_id *parentDevice )
+{
+    int error = CL_SUCCESS;
+
+    /* Create the program object from source */
+    error = create_single_kernel_helper_create_program(context, outProgram, numKernelLines, kernelProgram);
+    if( *outProgram == NULL || error != CL_SUCCESS)
+    {
+        print_error( error, "clCreateProgramWithSource failed" );
+        return error;
+    }
+
+    /* Compile the program */
+    int buildProgramFailed = 0;
+    int printedSource = 0;
+    error = clBuildProgram( *outProgram, ((parentDevice == NULL) ? 0 : 1), parentDevice, NULL, NULL, NULL );
+    if (error != CL_SUCCESS)
+    {
+        unsigned int i;
+        print_error(error, "clBuildProgram failed");
+        buildProgramFailed = 1;
+        printedSource = 1;
+        log_error( "Original source is: ------------\n" );
+        for( i = 0; i < numKernelLines; i++ )
+            log_error( "%s", kernelProgram[ i ] );
+    }
+
+    // Verify the build status on all devices
+    cl_uint deviceCount = 0;
+    error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
+    if (error != CL_SUCCESS) {
+        print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
+        return error;
+    }
+
+    if (deviceCount == 0) {
+        log_error("No devices found for program.\n");
+        return -1;
+    }
+
+    cl_device_id    *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
+    if( NULL == devices )
+        return -1;
+    memset( devices, 0, deviceCount * sizeof( cl_device_id ));
+    error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
+    if (error != CL_SUCCESS) {
+        print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
+        free( devices );
+        return error;
+    }
+
+    cl_uint z;
+    for( z = 0; z < deviceCount; z++ )
+    {
+        char deviceName[4096] = "";
+        error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
+        if (error != CL_SUCCESS || deviceName[0] == '\0') {
+            log_error("Device \"%d\" failed to return a name\n", z);
+            print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
+        }
+
+        cl_build_status buildStatus;
+        error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
+        if (error != CL_SUCCESS) {
+            print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+            free( devices );
+            return error;
+        }
+
+        if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
+            char log[10240] = "";
+            if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
+
+            char statusString[64] = "";
+            if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
+                sprintf(statusString, "CL_BUILD_SUCCESS");
+            else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
+                sprintf(statusString, "CL_BUILD_NONE");
+            else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
+                sprintf(statusString, "CL_BUILD_ERROR");
+            else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
+                sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+            else
+                sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+
+            if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+            error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
+            if (error != CL_SUCCESS || log[0]=='\0'){
+                log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
+                if (error) {
+                    print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
+                    free( devices );
+                    return error;
+                } else {
+                    log_error("clGetProgramBuildInfo returned an empty log.\n");
+                    free( devices );
+                    return -1;
+                }
+            }
+            // In this case we've already printed out the code above.
+            if (!printedSource)
+            {
+                unsigned int i;
+                log_error( "Original source is: ------------\n" );
+                for( i = 0; i < numKernelLines; i++ )
+                    log_error( "%s", kernelProgram[ i ] );
+                printedSource = 1;
+            }
+            log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
+            log_error( "%s\n", log );
+            log_error( "\n----------\n" );
+            free( devices );
+            return -1;
+        }
+    }
+
+    /* And create a kernel from it */
+    *outKernel = clCreateKernel( *outProgram, kernelName, &error );
+    if( *outKernel == NULL || error != CL_SUCCESS)
+    {
+        print_error( error, "Unable to create kernel" );
+        free( devices );
+        return error;
+    }
+
+    free( devices );
+    return 0;
+}
+
+template<class T>
+class AutoDestructArray
+{
+public:
+    AutoDestructArray(T* arr) : m_arr(arr) {}
+    ~AutoDestructArray() { if (m_arr) delete [] m_arr; }
+
+private:
+    T* m_arr;
+};
+
+int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices, int num_elements, cl_device_id *parentDevice = NULL)
+{
+    int error;
+    clContextWrapper context;
+    clProgramWrapper program;
+    clKernelWrapper kernels[2];
+    clMemWrapper  stream;
+    clCommandQueueWrapper queues[MAX_QUEUES];
+    size_t threads[1], localThreads[1];
+    int data[TEST_SIZE];
+    int outputData[TEST_SIZE];
+    int expectedResults[TEST_SIZE];
+    int *expectedResultsOneDeviceArray = new int[deviceCount * TEST_SIZE];
+    int **expectedResultsOneDevice = (int**)alloca(sizeof(int**) * deviceCount);
+    size_t i;
+    AutoDestructArray<int> autoDestruct(expectedResultsOneDeviceArray);
+
+    for (i=0; i<deviceCount; i++) {
+        expectedResultsOneDevice[i] = expectedResultsOneDeviceArray + (i * TEST_SIZE);
+    }
+
+    memset(queues, 0, sizeof(queues));
+
+    RandomSeed seed( gRandomSeed );
+
+    if (queueCount > MAX_QUEUES) {
+        log_error("Number of queues (%ld) is greater than the number for which the test was written (%d).", queueCount, MAX_QUEUES);
+        return -1;
+    }
+
+    log_info("Testing with %ld queues on %ld devices, %ld kernel executions.\n", queueCount, deviceCount, queueCount*num_elements/TEST_SIZE);
+
+    for (i=0; i<deviceCount; i++) {
+        size_t deviceNameSize;
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &deviceNameSize);
+        test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
+        char *deviceName = (char *)alloca(deviceNameSize * (sizeof(char)));
+        error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, deviceNameSize, deviceName, NULL);
+        test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
+        log_info("Device %ld is \"%s\".\n", i, deviceName);
+    }
+
+    /* Create a context */
+    context = clCreateContext( NULL, (cl_uint)deviceCount, devices, notify_callback, NULL, &error );
+    test_error( error, "Unable to create testing context" );
+
+    /* Create our kernels (they all have the same arguments so we don't need multiple ones for each device) */
+    if( create_single_kernel_helper( context, &program, &kernels[0], 1, test_kernels, "kernelA", parentDevice ) != 0 )
+    {
+        return -1;
+    }
+
+    kernels[1] = clCreateKernel(program, "kernelB", &error);
+    test_error(error, "clCreateKernel failed");
+
+
+    /* Now create I/O streams */
+    for( i = 0; i < TEST_SIZE; i++ )
+        data[i] = genrand_int32(seed);
+
+    stream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * TEST_SIZE, data, &error);
+    test_error( error, "Unable to create test array" );
+
+    // Update the expected results
+    for( i = 0; i < TEST_SIZE; i++ ) {
+        expectedResults[i] = data[i];
+        for (size_t j=0; j<deviceCount; j++)
+            expectedResultsOneDevice[j][i] = data[i];
+    }
+
+
+    // Set the arguments
+    error = clSetKernelArg( kernels[0], 0, sizeof( stream ), &stream);
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernels[1], 0, sizeof( stream ), &stream);
+    test_error( error, "Unable to set kernel arguments" );
+
+    /* Run the test */
+    threads[0] = (size_t)TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernels[0], threads[0], &localThreads[ 0 ] );
+    test_error( error, "Unable to calc work group size" );
+
+    /* Create work queues */
+    for( i = 0; i < queueCount; i++ )
+    {
+        queues[i] = clCreateCommandQueueWithProperties( context, devices[ i % deviceCount ], 0, &error );
+        if (error != CL_SUCCESS || queues[i] == NULL) {
+            log_info("Could not create queue[%d].\n", (int)i);
+            queueCount = i;
+            break;
+        }
+    }
+    log_info("Testing with %d queues.\n", (int)queueCount);
+
+    /* Enqueue executions */
+    for( int z = 0; z<num_elements/TEST_SIZE; z++) {
+        for( i = 0; i < queueCount; i++ )
+        {
+            // Randomly choose a kernel to execute.
+            int kernel_selection = (int)get_random_float(0, 2, seed);
+            error = clEnqueueNDRangeKernel( queues[ i ], kernels[ kernel_selection ], 1, NULL, threads, localThreads, 0, NULL, NULL );
+            test_error( error, "Kernel execution failed" );
+
+            // Update the expected results
+            for( int j = 0; j < TEST_SIZE; j++ ) {
+                expectedResults[j] = (kernel_selection) ? expectedResults[j]+1 : expectedResults[j]*3;
+                expectedResultsOneDevice[i % deviceCount][j] = (kernel_selection) ? expectedResultsOneDevice[i % deviceCount][j]+1 : expectedResultsOneDevice[i % deviceCount][j]*3;
+            }
+
+            // Force the queue to finish so the next one will be in sync
+            error = clFinish(queues[i]);
+            test_error( error, "clFinish failed");
+        }
+    }
+
+    /* Read results */
+    int errors = 0;
+    for (int q = 0; q<(int)queueCount; q++) {
+        error = clEnqueueReadBuffer( queues[ q ], stream, CL_TRUE, 0, sizeof(cl_int)*TEST_SIZE, (char *)outputData, 0, NULL, NULL );
+        test_error( error, "Unable to get result data set" );
+
+        int errorsThisTime = 0;
+        /* Verify all of the data now */
+        for( i = 0; i < TEST_SIZE; i++ )
+        {
+            if( expectedResults[ i ] != outputData[ i ] )
+            {
+                log_error( "ERROR: Sample data did not verify for queue %d on device %ld (sample %d, expected %d, got %d)\n",
+                    q, q % deviceCount, (int)i, expectedResults[ i ], outputData[ i ] );
+                for (size_t j=0; j<deviceCount; j++) {
+                    if (expectedResultsOneDevice[j][i] == outputData[i])
+                        log_info("Sample consistent with only device %ld having modified the data.\n", j);
+                }
+                errorsThisTime++;
+                break;
+            }
+        }
+        if (errorsThisTime)
+            errors++;
+    }
+
+    /* All done now! */
+    if (errors)
+        return -1;
+    return 0;
+}
+
+
+int init_device_partition_test(cl_device_id parentDevice, cl_uint &maxComputeUnits, cl_uint &maxSubDevices)
+{
+    int err = clGetDeviceInfo(parentDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL);
+    test_error( err, "Unable to get maximal number of compute units" );
+    err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL);
+    test_error( err, "Unable to get maximal number of sub-devices" );
+
+    log_info("Maximal number of sub-devices on device %p is %d.\n", parentDevice, maxSubDevices );
+    return 0;
+}
+
+int test_device_partition_type_support(cl_device_id parentDevice, const cl_device_partition_property partitionType, const cl_device_affinity_domain affinityDomain)
+{
+    typedef std::vector< cl_device_partition_property > properties_t;
+    properties_t supportedProps( 3 ); // only 3 types defined in the spec (but implementation can define more)
+    size_t const propSize = sizeof( cl_device_partition_property ); // Size of one property in bytes.
+    size_t size;    // size of all properties in bytes.
+    cl_int err;
+    size = 0;
+    err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, & size );
+    if ( err == CL_SUCCESS ) {
+        if ( size % propSize != 0 ) {
+            log_error( "ERROR: clGetDeviceInfo: Bad size of returned partition properties (%llu), it must me a multiply of partition property size (%llu)\n", llu( size ), llu( propSize ) );
+            return -1;
+        }
+        supportedProps.resize( size / propSize );
+        size = 0;
+        err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, supportedProps.size() * propSize, & supportedProps.front(), & size );
+        test_error_ret( err, "Unable to get device partition properties (2)", -1 );
+    } else if ( err == CL_INVALID_VALUE ) {
+        log_error( "ERROR: clGetDeviceInfo: CL_DEVICE_PARTITION_PROPERTIES is not supported.\n" );
+        return -1;
+    } else {
+        test_error_ret( err, "Unable to get device partition properties (1)", -1 );
+    };
+    for ( int i = 0; i < supportedProps.size(); i++)
+    {
+        if (supportedProps[i] == partitionType)
+        {
+           if (partitionType == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
+           {
+              cl_device_affinity_domain supportedAffinityDomain;
+              err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(supportedAffinityDomain), &supportedAffinityDomain, NULL);
+              test_error( err, "Unable to get supported affinity domains" );
+              if (supportedAffinityDomain & affinityDomain)
+                return 0;
+           }
+           else
+            return 0;
+        }
+    }
+
+    return -1;
+}
+
+int test_partition_of_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_device_partition_property *partition_type,
+                             cl_uint starting_property, cl_uint ending_property)
+{
+    cl_uint maxComputeUnits;
+    cl_uint maxSubDevices;    // maximal number of sub-devices that can be created in one call to clCreateSubDevices
+    int err = 0;
+
+    if (init_device_partition_test(deviceID, maxComputeUnits, maxSubDevices) != 0)
+        return -1;
+
+    if (maxComputeUnits <= 1)
+        return 0;
+    // confirm that this devices reports how it was partitioned
+    if (partition_type != NULL)
+    { // if we're not the root device
+      size_t psize;
+      err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, 0,  NULL, &psize);
+      test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
+      cl_device_partition_property *properties_returned = (cl_device_partition_property *)alloca(psize);
+      err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, psize, (void *) properties_returned, NULL);
+      test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
+
+      // test returned type
+      for (cl_uint i = 0;i < psize / sizeof(cl_device_partition_property);i++) {
+        if (properties_returned[i] != partition_type[i]) {
+          if (!(partition_type[0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
+              i == 1 && partition_type[1] == CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE &&
+              (properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_NUMA     ||
+               properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ||
+               properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ||
+               properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ||
+               properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE))) {
+            log_error("properties_returned[%d] 0x%x != 0x%x partition_type[%d].", i, properties_returned[i], partition_type[i], i);
+            return -1;
+              }
+        }
+      } // for
+    }
+
+#define PROPERTY_TYPES 8
+    cl_device_partition_property partitionProp[PROPERTY_TYPES][5] = {
+        { CL_DEVICE_PARTITION_EQUALLY, maxComputeUnits / 2, 0, 0, 0 } ,
+        { CL_DEVICE_PARTITION_BY_COUNTS, 1, maxComputeUnits - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 } ,
+        { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0, 0, 0 } ,
+        { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE, 0, 0, 0 } ,
+        { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE, 0, 0, 0 } ,
+        { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, 0, 0 } ,
+        { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE, 0, 0, 0 } ,
+        { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0, 0, 0 }
+    };
+
+    // loop thru each type, creating sub-devices for each type
+    for (cl_uint i = starting_property;i < ending_property;i++) {
+
+      if (test_device_partition_type_support(deviceID, partitionProp[i][0], partitionProp[i][1]) != 0)
+      {
+        if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
+        {
+          log_info( "Device partition type \"%s\" \"%s\" is not supported on device %p. Skipping test...\n",
+                      printPartition(partitionProp[i][0]),
+                      printAffinity(partitionProp[i][1]), deviceID);
+        }
+        else
+        {
+          log_info( "Device partition type \"%s\" is not supported on device %p. Skipping test...\n",
+                      printPartition(partitionProp[i][0]), deviceID);
+        }
+        continue;
+      }
+
+      if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
+      {
+        log_info("Testing on device %p partition type \"%s\" \"%s\"\n", deviceID, printPartition(partitionProp[i][0]),
+                  printAffinity(partitionProp[i][1]));
+      }
+      else
+      {
+        log_info("Testing on device %p partition type \"%s\" (%d,%d)\n", deviceID, printPartition(partitionProp[i][0]),
+                  partitionProp[i][1], partitionProp[i][2]);
+      }
+
+      cl_uint deviceCount;
+
+      // how many sub-devices can we create?
+      err = clCreateSubDevices(deviceID, partitionProp[i], 0, NULL, &deviceCount);
+      if ( err == CL_DEVICE_PARTITION_FAILED ) {
+          log_info( "The device %p could not be further partitioned.\n", deviceID );
+          continue;
+      }
+      test_error( err, "Failed to get number of sub-devices" );
+
+      // get the list of subDevices
+      //  create room for 1 more device_id, so that we can put the parent device in there.
+      cl_device_id *subDevices = (cl_device_id*)alloca(sizeof(cl_device_id) * (deviceCount + 1));
+      err = clCreateSubDevices(deviceID, partitionProp[i], deviceCount, subDevices, &deviceCount);
+      test_error( err, "Actual creation of sub-devices failed" );
+
+      log_info("Testing on all devices in context\n");
+      err = test_device_set(deviceCount, deviceCount, subDevices, num_elements);
+      if (err == 0)
+      {
+          log_info("Testing on a parent device for context\n");
+
+          // add the parent device
+          subDevices[deviceCount] = deviceID;
+          err = test_device_set(deviceCount + 1, deviceCount, subDevices, num_elements, &deviceID);
+      }
+      if (err != 0)
+      {
+          printf("error! returning %d\n",err);
+          return err;
+      }
+
+      // now, recurse and test the FIRST of these sub-devices, to make sure it can be further partitioned
+      err = test_partition_of_device(subDevices[0], context, queue, num_elements, partitionProp[i], starting_property, ending_property);
+      if (err != 0)
+      {
+          printf("error! returning %d\n",err);
+          return err;
+      }
+
+      for (cl_uint j=0;j < deviceCount;j++)
+      {
+        err = clReleaseDevice(subDevices[j]);
+        test_error( err, "\n Releasing sub-device failed \n" );
+      }
+
+    } // for
+
+    log_info("Testing on all device %p finished\n", deviceID);
+    return 0;
+}
+
+
+int test_partition_equally(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 1);
+}
+
+int test_partition_by_counts(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 1, 2);
+}
+
+int test_partition_by_affinity_domain_numa(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 2, 3);
+}
+
+int test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 3, 4);
+}
+
+int test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 4, 5);
+}
+
+int test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 5, 6);
+}
+
+int test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 6, 7);
+}
+
+int test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 7, 8);
+}
+
+int test_partition(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 8);
+}
diff --git a/test_conformance/device_timer/CMakeLists.txt b/test_conformance/device_timer/CMakeLists.txt
new file mode 100644
index 00000000..b37f6382
--- /dev/null
+++ b/test_conformance/device_timer/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(MODULE_NAME DEVICE_TIMER)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_device_timer.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/parseParameters.cpp
+    ../../test_common/harness/msvc9.c
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/device_timer/main.c b/test_conformance/device_timer/main.c
new file mode 100644
index 00000000..c3cb2611
--- /dev/null
+++ b/test_conformance/device_timer/main.c
@@ -0,0 +1,45 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#include "procs.h"
+
+basefn basefn_list[] = {
+    test_timer_resolution_queries,
+    test_device_and_host_timers
+};
+
+const char *basefn_names[] = {
+    "test_timer_resolution_queries",
+    "test_device_and_host_timers",
+    "all"
+};
+
+size_t num_fns = sizeof(basefn_names)/sizeof(basefn_names[0]);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
diff --git a/test_conformance/device_timer/procs.h b/test_conformance/device_timer/procs.h
new file mode 100644
index 00000000..8157ec7f
--- /dev/null
+++ b/test_conformance/device_timer/procs.h
@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __PROCS_H__
+#define __PROCS_H__
+
+extern int test_device_and_host_timers(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements);
+
+extern int test_timer_resolution_queries(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements);
+#endif // #ifndef __PROCS_H__
diff --git a/test_conformance/device_timer/test_device_timer.c b/test_conformance/device_timer/test_device_timer.c
new file mode 100644
index 00000000..7b171c02
--- /dev/null
+++ b/test_conformance/device_timer/test_device_timer.c
@@ -0,0 +1,165 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <CL/cl.h>
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/compat.h"
+
+#if !defined(_WIN32)
+    #include "unistd.h" // For "sleep"
+#endif
+
+#define ALLOWED_ERROR 0.005f
+
+int test_device_and_host_timers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int errors = 0;
+    cl_int result = CL_SUCCESS;
+    cl_ulong deviceStartTime, deviceEndTime, deviceTimeDiff;
+    cl_ulong hostStartTime, hostEndTime, hostTimeDiff;
+    cl_ulong hostOnlyStartTime, hostOnlyEndTime, hostOnlyTimeDiff;
+    cl_ulong observedDiff;
+    cl_ulong allowedDiff;
+
+    result = clGetDeviceAndHostTimer(deviceID, &deviceStartTime, &hostStartTime);
+    if (result != CL_SUCCESS) {
+        log_error("clGetDeviceAndHostTimer failed with error %s\n", IGetErrorString(result));
+        errors++;
+        goto End;
+    }
+
+    result = clGetHostTimer(deviceID, &hostOnlyStartTime);
+    if (result != CL_SUCCESS) {
+        log_error("clGetHostTimer failed with error %s\n", IGetErrorString(result));
+        errors++;
+        goto End;
+    }
+
+    // Wait for a while to allow the timers to increment substantially.
+    sleep(5);
+
+    result = clGetDeviceAndHostTimer(deviceID, &deviceEndTime, &hostEndTime);
+    if (result != CL_SUCCESS) {
+        log_error("clGetDeviceAndHostTimer failed with error %s\n", IGetErrorString(result));
+        errors++;
+        goto End;
+    }
+
+    result = clGetHostTimer(deviceID, &hostOnlyEndTime);
+    if (result != CL_SUCCESS) {
+        log_error("clGetHostTimer failed with error %s\n", IGetErrorString(result));
+        errors++;
+        goto End;
+    }
+
+    deviceTimeDiff = deviceEndTime - deviceStartTime ;
+    hostTimeDiff = hostEndTime - hostStartTime ;
+    hostOnlyTimeDiff = hostOnlyEndTime - hostOnlyStartTime;
+
+    log_info("Checking results from clGetDeviceAndHostTimer ...\n");
+
+    if (deviceEndTime <= deviceStartTime) {
+        log_error("Device timer is not monotonically increasing.\n");
+        log_error("    deviceStartTime: %lu, deviceEndTime: %lu\n", deviceStartTime, deviceEndTime);
+        errors++;
+    }
+
+    if (hostEndTime <= hostStartTime) {
+        log_error("Error: Host timer is not monotonically increasing.\n");
+        log_error("    hostStartTime: %lu, hostEndTime: %lu\n", hostStartTime, hostEndTime);
+        errors++;
+    }
+
+    if (deviceTimeDiff > hostTimeDiff) {
+        observedDiff = deviceTimeDiff - hostTimeDiff;
+        allowedDiff = (cl_ulong)(hostTimeDiff * ALLOWED_ERROR);
+    }
+    else {
+        observedDiff = hostTimeDiff - deviceTimeDiff;
+        allowedDiff = (cl_ulong)(deviceTimeDiff * ALLOWED_ERROR);
+    }
+
+    if (observedDiff > allowedDiff) {
+        log_error("Error: Device and host timers did not increase by same amount\n");
+        log_error("    Observed difference between timers %lu (max allowed %lu).\n", observedDiff, allowedDiff);
+        errors++;
+    }
+
+    log_info("Cross-checking results with clGetHostTimer ...\n");
+
+    if (hostOnlyEndTime <= hostOnlyStartTime) {
+        log_error("Error: Host timer is not monotonically increasing.\n");
+        log_error("    hostStartTime: %lu, hostEndTime: %lu\n", hostOnlyStartTime, hostOnlyEndTime);
+        errors++;
+    }
+
+    if (hostOnlyStartTime < hostStartTime) {
+        log_error("Error: Host start times do not correlate.\n");
+        log_error("clGetDeviceAndHostTimer was called before clGetHostTimer but timers are not in that order.\n");
+        log_error("    clGetDeviceAndHostTimer: %lu, clGetHostTimer: %lu\n", hostStartTime, hostOnlyStartTime);
+        errors++;
+    }
+
+    if (hostOnlyEndTime < hostEndTime) {
+        log_error("Error: Host end times do not correlate.\n");
+        log_error("clGetDeviceAndHostTimer was called before clGetHostTimer but timers are not in that order.\n");
+        log_error("    clGetDeviceAndHostTimer: %lu, clGetHostTimer: %lu\n", hostEndTime, hostOnlyEndTime);
+        errors++;
+    }
+
+End:
+    return errors;
+}
+
+int test_timer_resolution_queries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int errors = 0;
+    cl_int result = CL_SUCCESS;
+    cl_platform_id platform = 0;
+    cl_ulong deviceTimerResolution = 0;
+    cl_ulong hostTimerResolution = 0;
+
+    result = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+    if (result != CL_SUCCESS) {
+        log_error("clGetDeviceInfo(CL_DEVICE_PLATFORM) failed with error %s.\n", IGetErrorString(result));
+        errors++;
+    }
+    
+    result = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(deviceTimerResolution), &deviceTimerResolution, NULL);
+    if (result != CL_SUCCESS) {
+        log_error("clGetDeviceInfo(CL_DEVICE_PROFILING_TIMER_RESOLUTION) failed with error %s.\n", IGetErrorString(result));
+        errors++;
+    }
+    else {
+        log_info("CL_DEVICE_PROFILING_TIMER_RESOLUTION == %lu nanoseconds\n", deviceTimerResolution);
+    }
+   
+    if (platform) {
+        result = clGetPlatformInfo(platform, CL_PLATFORM_HOST_TIMER_RESOLUTION, sizeof(hostTimerResolution), &hostTimerResolution, NULL);
+        if (result != CL_SUCCESS) {
+            log_error("clGetPlatformInfo(CL_PLATFORM_HOST_TIMER_RESOLUTION) failed with error %s.\n", IGetErrorString(result));
+            errors++;
+        }
+        else {
+            log_info("CL_PLATFORM_HOST_TIMER_RESOLUTION == %lu nanoseconds\n", hostTimerResolution);
+        }
+    }
+    else {
+        log_error("Could not find platform ID to query CL_PLATFORM_HOST_TIMER_RESOLUTION\n");
+    }
+
+    return errors;
+}
diff --git a/test_conformance/events/CMakeLists.txt b/test_conformance/events/CMakeLists.txt
new file mode 100644
index 00000000..88f7d319
--- /dev/null
+++ b/test_conformance/events/CMakeLists.txt
@@ -0,0 +1,25 @@
+set(MODULE_NAME EVENTS)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_events.cpp
+    test_event_dependencies.cpp
+    test_waitlists.cpp
+    test_userevents.cpp
+    test_userevents_multithreaded.cpp
+    action_classes.cpp
+    test_callbacks.cpp
+    ../../test_common/harness/genericThread.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/ThreadPool.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/events/Jamfile b/test_conformance/events/Jamfile
new file mode 100644
index 00000000..45979f76
--- /dev/null
+++ b/test_conformance/events/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_events
+    : main.c
+      test_event_dependencies.cpp
+      test_events.cpp
+      test_waitlists.cpp
+    ;
+
+install dist
+    : test_events
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/events
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/events
+    ;
diff --git a/test_conformance/events/Makefile b/test_conformance/events/Makefile
new file mode 100644
index 00000000..c2025261
--- /dev/null
+++ b/test_conformance/events/Makefile
@@ -0,0 +1,51 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		  test_events.cpp \
+		  test_event_dependencies.cpp \
+		  test_userevents.cpp \
+		  test_waitlists.cpp \
+		  test_callbacks.cpp \
+		  action_classes.cpp \
+		  test_userevents_multithreaded.cpp \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/genericThread.cpp \
+		  ../../test_common/harness/kernelHelpers.c \
+		  ../../test_common/harness/typeWrappers.cpp \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/ThreadPool.c \
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_events
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/events/action_classes.cpp b/test_conformance/events/action_classes.cpp
new file mode 100644
index 00000000..122c21ff
--- /dev/null
+++ b/test_conformance/events/action_classes.cpp
@@ -0,0 +1,658 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "action_classes.h"
+
+#pragma mark -------------------- Base Action Class -------------------------
+
+const cl_uint BufferSizeReductionFactor = 20;
+
+cl_int    Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight )
+{
+    cl_ulong maxAllocSize;
+    size_t maxWidth, maxHeight;
+    cl_int error;
+
+
+    // Get the largest possible buffer we could allocate
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    test_error( error, "Unable to get device config" );
+
+    // Create something of a decent size
+    if( maxWidth * maxHeight * 4 > maxAllocSize / BufferSizeReductionFactor )
+    {
+        float rootSize = sqrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) );
+
+        if( (size_t)rootSize > maxWidth )
+            outWidth = maxWidth;
+        else
+            outWidth = (size_t)rootSize;
+        outHeight = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / outWidth );
+        if( outHeight > maxHeight )
+            outHeight = maxHeight;
+    }
+    else
+    {
+        outWidth = maxWidth;
+        outHeight = maxHeight;
+    }
+
+    outWidth /=2;
+    outHeight /=2;
+
+    if (outWidth > 2048)
+        outWidth = 2048;
+    if (outHeight > 2048)
+        outHeight = 2048;
+    log_info("\tImage size: %d x %d (%gMB)\n", (int)outWidth, (int)outHeight,
+             (double)((int)outWidth*(int)outHeight*4)/(1024.0*1024.0));
+    return CL_SUCCESS;
+}
+
+cl_int    Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth )
+{
+    cl_ulong maxAllocSize;
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_int error;
+
+
+    // Get the largest possible buffer we could allocate
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    test_error( error, "Unable to get device config" );
+
+    // Create something of a decent size
+    if( (cl_ulong)maxWidth * maxHeight * maxDepth > maxAllocSize / ( BufferSizeReductionFactor * 4 ) )
+    {
+        float rootSize = cbrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) );
+
+        if( (size_t)rootSize > maxWidth )
+            outWidth = maxWidth;
+        else
+            outWidth = (size_t)rootSize;
+        if( (size_t)rootSize > maxHeight )
+            outHeight = maxHeight;
+        else
+            outHeight = (size_t)rootSize;
+        outDepth = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / ( outWidth * outHeight ) );
+        if( outDepth > maxDepth )
+            outDepth = maxDepth;
+    }
+    else
+    {
+        outWidth = maxWidth;
+        outHeight = maxHeight;
+        outDepth = maxDepth;
+    }
+
+    outWidth /=2;
+    outHeight /=2;
+    outDepth /=2;
+
+    if (outWidth > 512)
+        outWidth = 512;
+    if (outHeight > 512)
+        outHeight = 512;
+    if (outDepth > 512)
+        outDepth = 512;
+    log_info("\tImage size: %d x %d x %d (%gMB)\n", (int)outWidth, (int)outHeight, (int)outDepth,
+             (double)((int)outWidth*(int)outHeight*(int)outDepth*4)/(1024.0*1024.0));
+
+    return CL_SUCCESS;
+}
+
+#pragma mark -------------------- Execution Sub-Classes -------------------------
+
+cl_int NDRangeKernelAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    const char *long_kernel[] = {
+        "__kernel void sample_test(__global float *src, __global int *dst)\n"
+        "{\n"
+        "    int  tid = get_global_id(0);\n"
+        "     int  i;\n"
+        "\n"
+        "    for( i = 0; i < 100000; i++ )\n"
+        "    {\n"
+        "        dst[tid] = (int)src[tid] * 3;\n"
+        "    }\n"
+        "\n"
+        "}\n" };
+
+    size_t threads[1] = { 1000 };
+    int error;
+
+    if( create_single_kernel_helper( context, &mProgram, &mKernel, 1, long_kernel, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    error = get_max_common_work_group_size( context, mKernel, threads[0], &mLocalThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    mStreams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1000, NULL, &error );
+    test_error( error, "Creating test array failed" );
+    mStreams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 1000, NULL, &error );
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg( mKernel, 0, sizeof( mStreams[0] ), &mStreams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( mKernel, 1, sizeof( mStreams[1] ), &mStreams[1] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    return CL_SUCCESS;
+}
+
+cl_int    NDRangeKernelAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t threads[1] = { 1000 };
+    cl_int error = clEnqueueNDRangeKernel( queue, mKernel, 1, NULL, threads, mLocalThreads, numWaits, waits, outEvent );
+    test_error( error, "Unable to execute kernel" );
+
+    return CL_SUCCESS;
+}
+
+#pragma mark -------------------- Buffer Sub-Classes -------------------------
+
+cl_int BufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate )
+{
+    cl_int error;
+    cl_ulong maxAllocSize;
+
+
+    // Get the largest possible buffer we could allocate
+    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+
+    // Don't create a buffer quite that big, just so we have some space left over for other work
+    mSize = (size_t)( maxAllocSize / BufferSizeReductionFactor );
+
+    // Cap at 128M so tests complete in a reasonable amount of time.
+    if (mSize > 128 << 20)
+        mSize = 128 << 20;
+
+    mSize /=2;
+
+    log_info("\tBuffer size: %gMB\n", (double)mSize/(1024.0*1024.0));
+
+    mBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mSize, NULL, &error );
+    test_error( error, "Unable to create buffer to test against" );
+
+    mOutBuffer = malloc( mSize );
+    if( mOutBuffer == NULL )
+    {
+        log_error( "ERROR: Unable to allocate temp buffer (out of memory)\n" );
+        return CL_OUT_OF_RESOURCES;
+    }
+
+    return CL_SUCCESS;
+}
+
+cl_int ReadBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    return BufferAction::Setup( device, context, queue, true );
+}
+
+cl_int    ReadBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    cl_int error = clEnqueueReadBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue buffer read" );
+
+    return CL_SUCCESS;
+}
+
+cl_int WriteBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    return BufferAction::Setup( device, context, queue, true );
+}
+
+cl_int WriteBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    cl_int error = clEnqueueWriteBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue buffer write" );
+
+    return CL_SUCCESS;
+}
+
+MapBufferAction::~MapBufferAction()
+{
+    if (mQueue)
+        clEnqueueUnmapMemObject( mQueue, mBuffer, mMappedPtr, 0, NULL, NULL );
+}
+
+cl_int MapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    return BufferAction::Setup( device, context, queue, false );
+}
+
+cl_int MapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    cl_int error;
+    mQueue = queue;
+    mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_FALSE, CL_MAP_READ, 0, mSize, numWaits, waits, outEvent, &error );
+    test_error( error, "Unable to enqueue buffer map" );
+
+    return CL_SUCCESS;
+}
+
+cl_int UnmapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error = BufferAction::Setup( device, context, queue, false );
+    if( error != CL_SUCCESS )
+        return error;
+
+    mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_TRUE, CL_MAP_READ, 0, mSize, 0, NULL, NULL, &error );
+    test_error( error, "Unable to enqueue buffer map" );
+
+    return CL_SUCCESS;
+}
+
+cl_int UnmapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    cl_int error = clEnqueueUnmapMemObject( queue, mBuffer, mMappedPtr, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue buffer unmap" );
+
+    return CL_SUCCESS;
+}
+
+
+#pragma mark -------------------- Read/Write Image Classes -------------------------
+
+cl_int ReadImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+        return error;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+
+    test_error( error, "Unable to create image to test against" );
+
+    mOutput = malloc( mWidth * mHeight * 4 );
+    if( mOutput == NULL )
+    {
+        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        return CL_OUT_OF_RESOURCES;
+    }
+
+    return CL_SUCCESS;
+}
+
+cl_int ReadImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+
+    cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue image read" );
+
+    return CL_SUCCESS;
+}
+
+cl_int ReadImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+        return error;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mOutput = malloc( mWidth * mHeight * mDepth * 4 );
+    if( mOutput == NULL )
+    {
+        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        return CL_OUT_OF_RESOURCES;
+    }
+
+    return CL_SUCCESS;
+}
+
+cl_int ReadImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+
+    cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue image read" );
+
+    return CL_SUCCESS;
+}
+
+cl_int WriteImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+        return error;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mOutput = malloc( mWidth * mHeight * 4 );
+    if( mOutput == NULL )
+    {
+        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        return CL_OUT_OF_RESOURCES;
+    }
+
+    return CL_SUCCESS;
+}
+
+cl_int WriteImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+
+    cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue image write" );
+
+    return CL_SUCCESS;
+}
+
+cl_int WriteImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+        return error;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mOutput = malloc( mWidth * mHeight * mDepth * 4 );
+    if( mOutput == NULL )
+    {
+        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        return CL_OUT_OF_RESOURCES;
+    }
+
+    return CL_SUCCESS;
+}
+
+cl_int WriteImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+
+    cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue image write" );
+
+    return CL_SUCCESS;
+}
+
+#pragma mark -------------------- Copy Image Classes -------------------------
+
+cl_int CopyImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+
+    cl_int error = clEnqueueCopyImage( queue, mSrcImage, mDstImage, origin, origin, region, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue image copy" );
+
+    return CL_SUCCESS;
+}
+
+cl_int CopyImage2Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+        return error;
+
+    mWidth /= 2;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDepth = 1;
+    return CL_SUCCESS;
+}
+
+cl_int CopyImage2Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+        return error;
+
+    mDepth /= 2;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDepth = 1;
+    return CL_SUCCESS;
+}
+
+cl_int CopyImage3Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+        return error;
+
+    mDepth /= 2;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDepth = 1;
+    return CL_SUCCESS;
+}
+
+cl_int CopyImage3Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+        return error;
+
+    mDepth /= 2;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    return CL_SUCCESS;
+}
+
+#pragma mark -------------------- Copy Image/Buffer Classes -------------------------
+
+cl_int Copy2DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+        return error;
+
+    mWidth /= 2;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * 4, NULL, &error );
+    test_error( error, "Unable to create buffer to test against" );
+
+    return CL_SUCCESS;
+}
+
+cl_int Copy2DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+
+    cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue image to buffer copy" );
+
+    return CL_SUCCESS;
+}
+
+cl_int Copy3DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+        return error;
+
+    mDepth /= 2;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error );
+    test_error( error, "Unable to create buffer to test against" );
+
+    return CL_SUCCESS;
+}
+
+cl_int Copy3DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+
+    cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue image to buffer copy" );
+
+    return CL_SUCCESS;
+}
+
+cl_int CopyBufferTo2DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+        return error;
+
+    mWidth /= 2;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+
+    mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * 4, NULL, &error );
+    test_error( error, "Unable to create buffer to test against" );
+
+    mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    return CL_SUCCESS;
+}
+
+cl_int CopyBufferTo2DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+
+    cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue buffer to image copy" );
+
+    return CL_SUCCESS;
+}
+
+cl_int CopyBufferTo3DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+        return error;
+
+    mDepth /= 2;
+
+    mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error );
+    test_error( error, "Unable to create buffer to test against" );
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    return CL_SUCCESS;
+}
+
+cl_int CopyBufferTo3DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+
+    cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent );
+    test_error( error, "Unable to enqueue buffer to image copy" );
+
+    return CL_SUCCESS;
+}
+
+#pragma mark -------------------- Map Image Class -------------------------
+
+MapImageAction::~MapImageAction()
+{
+    if (mQueue)
+        clEnqueueUnmapMemObject( mQueue, mImage, mMappedPtr, 0, NULL, NULL );
+}
+
+cl_int MapImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+{
+    cl_int error;
+
+
+    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+        return error;
+
+    cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
+    mImage = create_image_2d( context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, &format, mWidth, mHeight, 0, NULL, &error );
+    test_error( error, "Unable to create image to test against" );
+
+    return CL_SUCCESS;
+}
+
+cl_int MapImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+{
+    cl_int error;
+
+    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+    size_t outPitch;
+
+    mQueue = queue;
+    mMappedPtr = clEnqueueMapImage( queue, mImage, CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error );
+    test_error( error, "Unable to enqueue image map" );
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/events/action_classes.h b/test_conformance/events/action_classes.h
new file mode 100644
index 00000000..069ed346
--- /dev/null
+++ b/test_conformance/events/action_classes.h
@@ -0,0 +1,326 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _action_classes_h
+#define _action_classes_h
+
+#include "testBase.h"
+
+// This is a base class from which all actions are born
+// Note: No actions should actually feed I/O to each other, because then
+// it would potentially be possible for an implementation to make actions
+// wait on one another based on their shared I/O, not because of their
+// wait lists!
+class Action
+{
+    public:
+        Action() {}
+        virtual ~Action() {}
+
+        virtual cl_int        Setup( cl_device_id device, cl_context context, cl_command_queue queue ) = 0;
+        virtual cl_int        Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) = 0;
+
+        virtual const char * GetName( void ) const = 0;
+
+    protected:
+
+        cl_int    IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight );
+        cl_int    IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth );
+};
+
+// Simple NDRangeKernel execution that takes a noticable amount of time
+class NDRangeKernelAction : public Action
+{
+    public:
+        NDRangeKernelAction() {}
+        virtual ~NDRangeKernelAction() {}
+
+        size_t                mLocalThreads[ 1 ];
+        clMemWrapper        mStreams[ 2 ];
+        clProgramWrapper    mProgram;
+        clKernelWrapper        mKernel;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "NDRangeKernel"; }
+};
+
+// Base action for buffer actions
+class BufferAction : public Action
+{
+    public:
+        clMemWrapper        mBuffer;
+        size_t                mSize;
+        void                *mOutBuffer;
+
+        BufferAction() { mOutBuffer = NULL; }
+        virtual ~BufferAction() { free( mOutBuffer ); }
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate );
+};
+
+class ReadBufferAction : public BufferAction
+{
+    public:
+        ReadBufferAction() {}
+        virtual ~ReadBufferAction() {}
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "ReadBuffer"; }
+};
+
+class WriteBufferAction : public BufferAction
+{
+    public:
+        WriteBufferAction() {}
+        virtual ~WriteBufferAction() {}
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "WriteBuffer"; }
+};
+
+class MapBufferAction : public BufferAction
+{
+    public:
+        MapBufferAction() : mQueue(0) {}
+
+        cl_command_queue    mQueue;
+        void                *mMappedPtr;
+
+        virtual ~MapBufferAction();
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "MapBuffer"; }
+};
+
+class UnmapBufferAction : public BufferAction
+{
+    public:
+        UnmapBufferAction() {}
+        virtual ~UnmapBufferAction() {}
+
+        void                *mMappedPtr;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "UnmapBuffer"; }
+};
+
+class ReadImage2DAction : public Action
+{
+    public:
+        ReadImage2DAction() { mOutput = NULL; }
+        virtual ~ReadImage2DAction() { free( mOutput ); }
+
+        clMemWrapper        mImage;
+        size_t                mWidth, mHeight;
+        void                *mOutput;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "ReadImage2D"; }
+};
+
+class ReadImage3DAction : public Action
+{
+    public:
+        ReadImage3DAction() { mOutput = NULL; }
+        virtual ~ReadImage3DAction() { free( mOutput ); }
+
+        clMemWrapper        mImage;
+        size_t                mWidth, mHeight, mDepth;
+        void                *mOutput;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "ReadImage3D"; }
+};
+
+class WriteImage2DAction : public Action
+{
+    public:
+        clMemWrapper        mImage;
+        size_t                mWidth, mHeight;
+        void                *mOutput;
+
+        WriteImage2DAction() { mOutput = NULL; }
+        virtual ~WriteImage2DAction() { free( mOutput ); }
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "WriteImage2D"; }
+};
+
+class WriteImage3DAction : public Action
+{
+    public:
+        clMemWrapper        mImage;
+        size_t                mWidth, mHeight, mDepth;
+        void                *mOutput;
+
+        WriteImage3DAction() { mOutput = NULL; }
+        virtual ~WriteImage3DAction() { free( mOutput ); }
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "WriteImage3D"; }
+};
+
+class CopyImageAction : public Action
+{
+    public:
+        CopyImageAction() {}
+        virtual ~CopyImageAction() {}
+
+        clMemWrapper        mSrcImage, mDstImage;
+        size_t                mWidth, mHeight, mDepth;
+
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+};
+
+class CopyImage2Dto2DAction : public CopyImageAction
+{
+    public:
+        CopyImage2Dto2DAction() {}
+        virtual ~CopyImage2Dto2DAction() {}
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+
+        virtual const char * GetName( void ) const { return "CopyImage2Dto2D"; }
+};
+
+class CopyImage2Dto3DAction : public CopyImageAction
+{
+    public:
+        CopyImage2Dto3DAction() {}
+        virtual ~CopyImage2Dto3DAction() {}
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+
+        virtual const char * GetName( void ) const { return "CopyImage2Dto3D"; }
+};
+
+class CopyImage3Dto2DAction : public CopyImageAction
+{
+    public:
+        CopyImage3Dto2DAction() {}
+        virtual ~CopyImage3Dto2DAction() {}
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+
+        virtual const char * GetName( void ) const { return "CopyImage3Dto2D"; }
+};
+
+class CopyImage3Dto3DAction : public CopyImageAction
+{
+    public:
+        CopyImage3Dto3DAction() {}
+        virtual ~CopyImage3Dto3DAction() {}
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+
+        virtual const char * GetName( void ) const { return "CopyImage3Dto3D"; }
+};
+
+class Copy2DImageToBufferAction : public Action
+{
+    public:
+        Copy2DImageToBufferAction() {}
+        virtual ~Copy2DImageToBufferAction() {}
+
+        clMemWrapper        mSrcImage, mDstBuffer;
+        size_t                mWidth, mHeight;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "Copy2DImageToBuffer"; }
+};
+
+class Copy3DImageToBufferAction : public Action
+{
+    public:
+        Copy3DImageToBufferAction() {}
+        virtual ~Copy3DImageToBufferAction() {}
+
+        clMemWrapper        mSrcImage, mDstBuffer;
+        size_t                mWidth, mHeight, mDepth;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "Copy3DImageToBuffer"; }
+};
+
+class CopyBufferTo2DImageAction : public Action
+{
+    public:
+        CopyBufferTo2DImageAction() {}
+        virtual ~CopyBufferTo2DImageAction() {}
+
+        clMemWrapper        mSrcBuffer, mDstImage;
+        size_t                mWidth, mHeight;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "CopyBufferTo2D"; }
+};
+
+class CopyBufferTo3DImageAction : public Action
+{
+    public:
+        CopyBufferTo3DImageAction() {}
+        virtual ~CopyBufferTo3DImageAction() {}
+
+        clMemWrapper        mSrcBuffer, mDstImage;
+        size_t                mWidth, mHeight, mDepth;
+
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "CopyBufferTo3D"; }
+};
+
+class MapImageAction : public Action
+{
+    public:
+        MapImageAction() : mQueue(0) {}
+
+        clMemWrapper        mImage;
+        size_t                mWidth, mHeight;
+        void                *mMappedPtr;
+        cl_command_queue    mQueue;
+
+        virtual ~MapImageAction();
+        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+
+        virtual const char * GetName( void ) const { return "MapImage"; }
+};
+
+
+#endif // _action_classes_h
diff --git a/test_conformance/events/main.c b/test_conformance/events/main.c
new file mode 100644
index 00000000..5a6600c7
--- /dev/null
+++ b/test_conformance/events/main.c
@@ -0,0 +1,108 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn    basefn_list[] = {
+            test_event_get_execute_status,
+            test_event_get_write_array_status,
+            test_event_get_read_array_status,
+            test_event_get_info,
+            test_event_wait_for_execute,
+            test_event_wait_for_array,
+            test_event_flush,
+            test_event_finish_execute,
+            test_event_finish_array,
+            test_event_release_before_done,
+            test_event_enqueue_marker,
+    #ifdef CL_VERSION_1_2
+            test_event_enqueue_marker_with_list,
+            test_event_enqueue_barrier_with_list,
+    #endif
+
+
+      test_event_waitlist_single_queue,
+      test_event_waitlist_multi_queue,
+      test_event_waitlist_multi_queue_multi_device,
+      test_event_enqueue_wait_for_events_single_queue,
+            test_event_enqueue_wait_for_events_multi_queue,
+            test_event_enqueue_wait_for_events_multi_queue_multi_device,
+      test_event_enqueue_marker_single_queue,
+      test_event_enqueue_marker_multi_queue,
+      test_event_enqueue_marker_multi_queue_multi_device,
+          test_event_enqueue_barrier_single_queue,
+
+            test_waitlists,
+            test_userevents,
+            test_callbacks,
+            test_callbacks_simultaneous,
+            test_userevents_multithreaded,
+};
+
+const char    *basefn_names[] = {
+            "event_get_execute_status",
+            "event_get_write_array_status",
+            "event_get_read_array_status",
+            "event_get_info",
+            "event_wait_for_execute",
+            "event_wait_for_array",
+            "event_flush",
+            "event_finish_execute",
+            "event_finish_array",
+            "event_release_before_done",
+            "event_enqueue_marker",
+#ifdef CL_VERSION_1_2
+    "event_enqueue_marker_with_event_list",
+    "event_enqueue_barrier_with_event_list",
+#endif
+
+      "out_of_order_event_waitlist_single_queue",
+      "out_of_order_event_waitlist_multi_queue",
+      "out_of_order_event_waitlist_multi_queue_multi_device",
+      "out_of_order_event_enqueue_wait_for_events_single_queue",
+      "out_of_order_event_enqueue_wait_for_events_multi_queue",
+      "out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device",
+      "out_of_order_event_enqueue_marker_single_queue",
+      "out_of_order_event_enqueue_marker_multi_queue",
+      "out_of_order_event_enqueue_marker_multi_queue_multi_device",
+      "out_of_order_event_enqueue_barrier_single_queue",
+
+            "waitlists",
+            "test_userevents",
+
+            "callbacks",
+            "callbacks_simultaneous",
+
+            "userevents_multithreaded",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/events/procs.h b/test_conformance/events/procs.h
new file mode 100644
index 00000000..4dcf7509
--- /dev/null
+++ b/test_conformance/events/procs.h
@@ -0,0 +1,61 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/clImageHelper.h"
+
+extern float    random_float(float low, float high);
+extern float    calculate_ulperror(float a, float b);
+
+
+extern int        test_event_get_execute_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_get_write_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_get_read_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_wait_for_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_wait_for_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_flush(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_finish_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_finish_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_release_before_done(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_enqueue_marker(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+#ifdef CL_VERSION_1_2
+extern int        test_event_enqueue_marker_with_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_enqueue_barrier_with_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+#endif
+
+extern int        test_event_waitlist_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_waitlist_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_waitlist_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_event_enqueue_wait_for_events_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_enqueue_wait_for_events_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_enqueue_wait_for_events_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_event_enqueue_barrier_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_event_enqueue_marker_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_enqueue_marker_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_event_enqueue_marker_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int        test_userevents( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int        test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int        test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int        test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+
diff --git a/test_conformance/events/testBase.h b/test_conformance/events/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/events/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp
new file mode 100644
index 00000000..3286fd6e
--- /dev/null
+++ b/test_conformance/events/test_callbacks.cpp
@@ -0,0 +1,341 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "action_classes.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/ThreadPool.h"
+
+#if !defined (_MSC_VER)
+#include <unistd.h>
+#endif // !_MSC_VER
+
+extern const char *IGetStatusString( cl_int status );
+
+#define PRINT_OPS 0
+
+// Yes, this is somewhat nasty, in that we're relying on the CPU (the real CPU, not the OpenCL device)
+// to be atomic w.r.t. boolean values. Although if it isn't, we'll just miss the check on this bool
+// until the next time around, so it's not that big of a deal. Ideally, we'd be using a semaphore with
+// a trywait on it, but then that introduces the fun issue of what to do on Win32, etc. This way is
+// far more portable, and worst case of failure is a slightly longer test run.
+static bool sCallbackTriggered = false;
+
+
+#define EVENT_CALLBACK_TYPE_TOTAL 3
+static bool sCallbackTriggered_flag[ EVENT_CALLBACK_TYPE_TOTAL ] ={ false,false, false };
+cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] ={ CL_SUBMITTED, CL_RUNNING, CL_COMPLETE};
+
+// Our callback function
+/*void CL_CALLBACK single_event_callback_function( cl_event event, cl_int commandStatus, void * userData )
+{
+     int i=*static_cast<int *>(userData);
+    log_info( "\tEvent callback  %d   triggered\n",  i);
+    sCallbackTriggered_flag [ i ] = true;
+}*/
+
+/*   use struct as call back para */
+typedef struct { cl_int enevt_type; int index; } CALL_BACK_USER_DATA;
+
+void CL_CALLBACK single_event_callback_function_flags( cl_event event, cl_int commandStatus, void * userData )
+{
+   // int i=*static_cast<int *>(userData);
+    CALL_BACK_USER_DATA *pdata= static_cast<CALL_BACK_USER_DATA *>(userData);
+
+    log_info( "\tEvent callback  %d  of type %d triggered\n",  pdata->index, pdata->enevt_type);
+    sCallbackTriggered_flag [pdata->index ] = true;
+}
+
+int test_callback_event_single( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest )
+{
+    // Note: we don't use the waiting feature here. We just want to verify that we get a callback called
+    // when the given event finishes
+
+    cl_int error = actionToTest->Setup( device, context, queue );
+    test_error( error, "Unable to set up test action" );
+
+    // Set up a user event, which we use as a gate for the second event
+    clEventWrapper gateEvent = clCreateUserEvent( context, &error );
+    test_error( error, "Unable to set up user gate event" );
+
+    // Set up the execution of the action with its actual event
+    clEventWrapper actualEvent;
+    error = actionToTest->Execute( queue, 1, &gateEvent, &actualEvent );
+    test_error( error, "Unable to set up action execution" );
+
+    // Set up the callback on the actual event
+
+  /*  use struct as call back para */
+  CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL];
+  int index [EVENT_CALLBACK_TYPE_TOTAL]={ 0,1,2};
+  for( int i=0;i< EVENT_CALLBACK_TYPE_TOTAL; i++)
+  {
+       user_data[i].enevt_type=event_callback_types[i];
+       user_data[i].index =i;
+       error = clSetEventCallback( actualEvent, event_callback_types[i], single_event_callback_function_flags, user_data+i );
+
+  }
+
+    // Now release the user event, which will allow our actual action to run
+    error = clSetUserEventStatus( gateEvent, CL_COMPLETE );
+    test_error( error, "Unable to trigger gate event" );
+
+    // Now we wait for completion. Note that we can actually wait on the event itself, at least at first
+    error = clWaitForEvents( 1, &actualEvent );
+    test_error( error, "Unable to wait for actual test event" );
+
+    // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed
+    if( sCallbackTriggered )
+    {
+        // We're all good, so return success
+        return 0;
+    }
+
+    // The callback has not yet been called, but that doesn't mean it won't be. So wait for it
+    log_info( "\tWaiting for callback..." );
+    fflush( stdout );
+    for( int i = 0; i < 10 * 10; i++ )
+    {
+        usleep( 100000 );    // 1/10th second
+
+    int cc=0;
+    for( int k=0;k< EVENT_CALLBACK_TYPE_TOTAL;k++)
+        if (sCallbackTriggered_flag[k]) {
+            cc++;
+        }
+
+        if  (cc== EVENT_CALLBACK_TYPE_TOTAL  )
+        {
+            log_info( "\n" );
+            return 0;
+        }
+        log_info( "." );
+        fflush( stdout );
+    }
+
+    // If we got here, we never got the callback
+    log_error( "\nCallback not called within 10 seconds! (assuming failure)\n" );
+    return -1;
+}
+
+#define TEST_ACTION( name ) \
+{    \
+    name##Action action;    \
+    log_info( "-- Testing " #name "...\n" );    \
+    if( ( error = test_callback_event_single( deviceID, context, queue, &action ) ) != CL_SUCCESS )    \
+        retVal++;            \
+    clFinish( queue ); \
+}
+
+int test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int error;
+    int retVal = 0;
+
+    log_info( "\n" );
+
+    TEST_ACTION( NDRangeKernel )
+
+    TEST_ACTION( ReadBuffer )
+    TEST_ACTION( WriteBuffer )
+    TEST_ACTION( MapBuffer )
+    TEST_ACTION( UnmapBuffer )
+
+    if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+    {
+        log_info( "\nNote: device does not support images. Skipping remainder of callback tests...\n" );
+    }
+    else
+    {
+        TEST_ACTION( ReadImage2D )
+        TEST_ACTION( WriteImage2D )
+        TEST_ACTION( CopyImage2Dto2D )
+        TEST_ACTION( Copy2DImageToBuffer )
+        TEST_ACTION( CopyBufferTo2DImage )
+        TEST_ACTION( MapImage )
+
+        if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+            log_info( "\nNote: device does not support 3D images. Skipping remainder of waitlist tests...\n" );
+        else
+        {
+            TEST_ACTION( ReadImage3D )
+            TEST_ACTION( WriteImage3D )
+            TEST_ACTION( CopyImage2Dto3D )
+            TEST_ACTION( CopyImage3Dto2D )
+            TEST_ACTION( CopyImage3Dto3D )
+            TEST_ACTION( Copy3DImageToBuffer )
+            TEST_ACTION( CopyBufferTo3DImage )
+        }
+    }
+
+    return retVal;
+}
+
+#define  SIMUTANEOUS_ACTION_TOTAL  18
+static bool sSimultaneousFlags[ 54 ];// for 18 actions with 3 callback status
+static volatile int sSimultaneousCount;
+
+Action * actions[ 19 ] = { 0 };
+
+// Callback for the simultaneous tests
+void CL_CALLBACK simultaneous_event_callback_function( cl_event event, cl_int commandStatus, void * userData )
+{
+    int eventIndex = (int)(size_t)userData;
+  int actionIndex = eventIndex/EVENT_CALLBACK_TYPE_TOTAL;
+  int statusIndex = eventIndex%EVENT_CALLBACK_TYPE_TOTAL;
+    log_info( "\tEvent callback triggered for action %s callback type %s \n", actions[actionIndex]->GetName(), IGetStatusString(statusIndex) );
+    sSimultaneousFlags[ actionIndex ] = true;
+    ThreadPool_AtomicAdd(&sSimultaneousCount,1);
+}
+
+int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int error;
+
+    // Unlike the singles test, in this one, we run a bunch of events all at once, to verify that
+    // the callbacks do get called once-and-only-once for each event, even if the run out of order or
+    // are dependent on each other
+
+    // First, the list of actions to run
+    int actionCount = 0, index = 0;
+
+    actions[ index++ ] = new NDRangeKernelAction();
+    actions[ index++ ] = new ReadBufferAction();
+    actions[ index++ ] = new WriteBufferAction();
+    actions[ index++ ] = new MapBufferAction();
+    actions[ index++ ] = new UnmapBufferAction();
+
+    if( checkForImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED )
+    {
+        actions[ index++ ] = new ReadImage2DAction();
+        actions[ index++ ] = new WriteImage2DAction();
+        actions[ index++ ] = new CopyImage2Dto2DAction();
+        actions[ index++ ] = new Copy2DImageToBufferAction();
+        actions[ index++ ] = new CopyBufferTo2DImageAction();
+        actions[ index++ ] = new MapImageAction();
+
+        if( checkFor3DImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED )
+        {
+            actions[ index++ ] = new ReadImage3DAction();
+            actions[ index++ ] = new WriteImage3DAction();
+            actions[ index++ ] = new CopyImage2Dto3DAction();
+            actions[ index++ ] = new CopyImage3Dto2DAction();
+            actions[ index++ ] = new CopyImage3Dto3DAction();
+            actions[ index++ ] = new Copy3DImageToBufferAction();
+            actions[ index++ ] = new CopyBufferTo3DImageAction();
+        }
+    }
+    actionCount = index;
+    actions[ index++ ] = NULL;
+
+    // Now set them all up
+    log_info( "\tSetting up test events...\n" );
+    for( index = 0; actions[ index ] != NULL; index++ )
+    {
+        error = actions[ index ]->Setup( deviceID, context, queue );
+        test_error( error, "Unable to set up test action" );
+        sSimultaneousFlags[ index ] = false;
+    }
+    sSimultaneousCount = 0;
+
+    // Set up the user event to start them all
+    clEventWrapper gateEvent = clCreateUserEvent( context, &error );
+    test_error( error, "Unable to set up user gate event" );
+
+    // Start executing, all tied to the gate event
+    //clEventWrapper actionEvents[ 18 ];// current actionCount is 18
+    clEventWrapper *actionEvents= new clEventWrapper[actionCount];
+    if (actionEvents == NULL)
+    {
+        log_error(" memory error in test_callbacks_simultaneous  \n");
+      for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
+        if (actions[i]) delete actions[i];
+        return  -1;
+    }
+
+    RandomSeed seed( gRandomSeed );
+    for( index = 0; actions[ index ] != NULL; index++ )
+    {
+        // Randomly choose to wait on the gate, or wait on the previous event
+        cl_event * eventPtr = &gateEvent;
+        if( ( index > 0 ) && ( random_in_range( 0, 255, seed ) & 1 ) )
+            eventPtr = &actionEvents[ index - 1 ];
+
+        error = actions[ index ]->Execute( queue, 1, eventPtr, &actionEvents[ index ] );
+        test_error( error, "Unable to execute test action" );
+
+
+    for( int k=0; k< EVENT_CALLBACK_TYPE_TOTAL; k++)
+    {
+       error = clSetEventCallback( actionEvents[index], event_callback_types[k], simultaneous_event_callback_function, (void *)(size_t)(index*EVENT_CALLBACK_TYPE_TOTAL+k ) );
+       test_error( error, "Unable to set event callback function" );
+
+    }
+    }
+
+  int total_callbacks= actionCount * EVENT_CALLBACK_TYPE_TOTAL;
+
+    // Now release the user event, which will allow our actual action to run
+    error = clSetUserEventStatus( gateEvent, CL_COMPLETE );
+    test_error( error, "Unable to trigger gate event" );
+
+    // Wait on the actual action events now
+    log_info( "\tWaiting for test completions...\n" );
+    error = clWaitForEvents( actionCount, &actionEvents[ 0 ] );
+    test_error( error, "Unable to wait for actual test events" );
+
+    // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed
+  int last_count = 0;
+    if( ((last_count = sSimultaneousCount)) == total_callbacks)
+    {
+        // We're all good, so return success
+        log_info( "\t%d of %d callbacks received\n", sSimultaneousCount, total_callbacks );
+
+        if (actionEvents) delete [] actionEvents;
+    for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
+      if (actions[i]) delete actions[i];
+        return 0;
+    }
+
+    // We haven't gotten (all) of the callbacks, so wait for them
+    log_info( "\tWe've only received %d of the %d callbacks we expected; waiting for more...\n", last_count, total_callbacks );
+
+    for( int i = 0; i < 10 * 10; i++ )
+    {
+        usleep( 100000 );    // 1/10th second
+        if( ((last_count = sSimultaneousCount)) == total_callbacks )
+        {
+      // All of the callbacks were executed
+      if (actionEvents) delete [] actionEvents;
+      for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
+        if (actions[i]) delete actions[i];
+        return 0;
+        }
+    }
+
+    // If we got here, some of the callbacks did not occur in time
+    log_error( "\nError: We only ever received %d of our %d callbacks!\n", last_count, total_callbacks );
+    log_error( "Events that did not receive callbacks:\n" );
+    for( index = 0; actions[ index ] != NULL; index++ )
+    {
+        if( !sSimultaneousFlags[ index ] )
+            log_error( "\t%s\n", actions[ index ]->GetName() );
+    }
+
+  if (actionEvents) delete [] actionEvents;
+    return -1;
+
+}
+
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
new file mode 100644
index 00000000..6665d8d5
--- /dev/null
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -0,0 +1,509 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+const char *write_kernels[] = {
+    "__kernel void write_up(__global int *dst, int length)\n"
+    "{\n"
+    "\n"
+    " dst[get_global_id(0)] *= 2;\n"
+    "\n"
+    "}\n"
+    "__kernel void write_down(__global int *dst, int length)\n"
+    "{\n"
+    "\n"
+    " dst[get_global_id(0)]--;\n"
+    "\n"
+    "}\n"
+};
+
+#define TEST_SIZE 10000
+#define TEST_COUNT 100
+#define RANDOMIZE 1
+#define DEBUG_OUT 0
+
+/*
+ Tests event dependencies by running two kernels that use the same buffer.
+ If two_queues is set they are run in separate queues.
+ If test_enqueue_wait_for_events is set then clEnqueueWaitForEvent is called between them.
+ If test_barrier is set then clEnqueueBarrier is called between them (only for single queue).
+ If neither are set, nothing is done to prevent them from executing in the wrong order. This can be used for verification.
+ */
+int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int two_queues, int two_devices,
+                                                int test_enqueue_wait_for_events, int test_barrier, int use_waitlist, int use_marker)
+{
+    cl_int error = CL_SUCCESS;
+    size_t threads[3] = {TEST_SIZE,0,0};
+    int i, loop_count, event_count, expected_value, failed;
+    int expected_if_only_queue[2];
+    int max_count = TEST_SIZE;
+
+    cl_platform_id platform;
+    cl_command_queue queues[2];    // Not a wrapper so we don't autorelease if they are the same
+    clCommandQueueWrapper queueWrappers[2];    // If they are different, we use the wrapper so it will auto release
+    clContextWrapper context_to_use;
+    clMemWrapper data;
+    clProgramWrapper program;
+    clKernelWrapper kernel1[TEST_COUNT], kernel2[TEST_COUNT];
+    clEventWrapper event[TEST_COUNT*4+2]; // If we usemarkers we get 2 more events per iteration
+
+    if (test_enqueue_wait_for_events)
+        log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n");
+    if (test_barrier)
+        log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n");
+    if (use_waitlist)
+        log_info("\tTesting with waitlist-based depenednecies between kernels.\n");
+    if (use_marker)
+        log_info("\tTesting with clEnqueueMarker as a barrier function.\n");
+    if (test_barrier && (two_queues || two_devices)) {
+        log_error("\tTest requested with clEnqueueBarrier across two queues. This is not a valid combination.\n");
+        return -1;
+    }
+
+    error = clGetPlatformIDs(1, &platform, NULL);
+    test_error(error, "clGetPlatformIDs failed.");
+
+    // If we are to use two devices, then get them and create a context with both.
+    cl_device_id *two_device_ids;
+    if (two_devices) {
+        two_device_ids = (cl_device_id*)malloc(sizeof(cl_device_id)*2);
+        cl_uint number_returned;
+        error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids, &number_returned);
+        test_error( error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed.");
+        if (number_returned != 2) {
+            log_info("Failed to obtain two devices. Test can not run.\n");
+            free(two_device_ids);
+            return 0;
+        }
+
+        for (i=0; i<2; i++) {
+            cl_device_type type;
+            error = clGetDeviceInfo(two_device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
+            test_error( error, "clGetDeviceInfo failed.");
+            if (type & CL_DEVICE_TYPE_CPU)
+                log_info("\tDevice %d is CL_DEVICE_TYPE_CPU.\n", i);
+            if (type & CL_DEVICE_TYPE_GPU)
+                log_info("\tDevice %d is CL_DEVICE_TYPE_GPU.\n", i);
+            if (type & CL_DEVICE_TYPE_ACCELERATOR)
+                log_info("\tDevice %d is CL_DEVICE_TYPE_ACCELERATOR.\n", i);
+            if (type & CL_DEVICE_TYPE_DEFAULT)
+                log_info("\tDevice %d is CL_DEVICE_TYPE_DEFAULT.\n", i);
+        }
+
+        context_to_use = clCreateContext(NULL, 2, two_device_ids, notify_callback, NULL, &error);
+        test_error(error, "clCreateContext failed for two devices.");
+
+        log_info("\tTesting with two devices.\n");
+    } else {
+        context_to_use = clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error);
+        test_error(error, "clCreateContext failed for one device.");
+
+        log_info("\tTesting with one device.\n");
+    }
+
+    // If we are using two queues then create them
+    cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
+    if (two_queues) {
+        // Get a second queue
+        if (two_devices)
+        {
+            if( !checkDeviceForQueueSupport( two_device_ids[ 0 ], props[1] ) ||
+               !checkDeviceForQueueSupport( two_device_ids[ 1 ], props[1] ) )
+            {
+                log_info( "WARNING: One or more device for multi-device test does not support out-of-order exec mode; skipping test.\n" );
+                return -1942;
+            }
+
+            queueWrappers[0] = clCreateCommandQueueWithProperties(context_to_use, two_device_ids[0], &props[0], &error);
+            test_error(error, "clCreateCommandQueue for first queue on first device failed.");
+            queueWrappers[1] = clCreateCommandQueueWithProperties(context_to_use, two_device_ids[1], &props[0], &error);
+            test_error(error, "clCreateCommandQueue for second queue on second device failed.");
+
+        }
+        else
+        {
+            // Single device has already been checked for out-of-order exec support
+            queueWrappers[0] = clCreateCommandQueueWithProperties(context_to_use, deviceID, &props[0], &error);
+            test_error(error, "clCreateCommandQueue for first queue failed.");
+            queueWrappers[1] = clCreateCommandQueueWithProperties(context_to_use, deviceID, &props[0], &error);
+            test_error(error, "clCreateCommandQueue for second queue failed.");
+        }
+        // Ugly hack to make sure we only have the wrapper auto-release if they are different queues
+        queues[0] = queueWrappers[0];
+        queues[1] = queueWrappers[1];
+        log_info("\tTesting with two queues.\n");
+    }
+    else
+    {
+        // (Note: single device has already been checked for out-of-order exec support)
+        // Otherwise create one queue and have the second one be the same
+        queueWrappers[0] = clCreateCommandQueueWithProperties(context_to_use, deviceID, &props[0], &error);
+        test_error(error, "clCreateCommandQueue for first queue failed.");
+        queues[0] = queueWrappers[0];
+        queues[1] = (cl_command_queue)queues[0];
+        log_info("\tTesting with one queue.\n");
+    }
+
+
+    // Setup - create a buffer and the two kernels
+    data = clCreateBuffer(context_to_use, CL_MEM_READ_WRITE, TEST_SIZE*sizeof(cl_int), NULL, &error);
+    test_error( error, "clCreateBuffer failed");
+
+
+    // Initialize the values to zero
+    cl_int *values = (cl_int*)malloc(TEST_SIZE*sizeof(cl_int));
+    for (i=0; i<(int)TEST_SIZE; i++)
+        values[i] = 0;
+    error = clEnqueueWriteBuffer(queues[0], data, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), values, 0, NULL, NULL);
+    test_error( error, "clEnqueueWriteBuffer failed");
+    expected_value = 0;
+
+    // Build the kernels
+    if (create_single_kernel_helper( context_to_use, &program, &kernel1[0], 1, write_kernels, "write_up" ))
+        return -1;
+
+    error = clSetKernelArg(kernel1[0], 0, sizeof(data), &data);
+    error |= clSetKernelArg(kernel1[0], 1, sizeof(max_count), &max_count);
+    test_error( error, "clSetKernelArg 1 failed");
+
+    for (i=1; i<TEST_COUNT; i++) {
+        kernel1[i] = clCreateKernel(program, "write_up", &error);
+        test_error( error, "clCreateKernel 1 failed");
+
+        error = clSetKernelArg(kernel1[i], 0, sizeof(data), &data);
+        error |= clSetKernelArg(kernel1[i], 1, sizeof(max_count), &max_count);
+        test_error( error, "clSetKernelArg 1 failed");
+    }
+
+    for (i=0; i<TEST_COUNT; i++) {
+        kernel2[i] = clCreateKernel(program, "write_down", &error);
+        test_error( error, "clCreateKernel 2 failed");
+
+        error = clSetKernelArg(kernel2[i], 0, sizeof(data), &data);
+        error |= clSetKernelArg(kernel2[i], 1, sizeof(max_count), &max_count);
+        test_error( error, "clSetKernelArg 2 failed");
+    }
+
+    // Execution - run the first kernel, then enqueue the wait on the events, then the second kernel
+    // If clEnqueueBarrierWithWaitList works, the buffer will be filled with 1s, then multiplied by 4s,
+    // then incremented to 5s, repeatedly. Otherwise the values may be 2s (if the first one doesn't work) or 8s
+    // (if the second one doesn't work).
+    if (RANDOMIZE)
+        log_info("Queues chosen randomly for each kernel exection.\n");
+    else
+        log_info("Queues chosen alternatily for each kernel execution.\n");
+
+    event_count = 0;
+    for (i=0; i<(int)TEST_SIZE; i++)
+        values[i] = 1;
+    error = clEnqueueWriteBuffer(queues[0], data, CL_FALSE, 0, TEST_SIZE*sizeof(cl_int), values, 0, NULL, &event[event_count]);
+    test_error( error, "clEnqueueWriteBuffer 2 failed");
+    expected_value = 1;
+    expected_if_only_queue[0] = 1;
+    expected_if_only_queue[1] = 1;
+
+    int queue_to_use = 1;
+    if (test_enqueue_wait_for_events) {
+        error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL );
+        test_error( error, "Unable to queue wait for events" );
+    } else if (test_barrier) {
+        error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
+        test_error( error, "Unable to queue barrier" );
+    }
+
+    for (loop_count=0; loop_count<TEST_COUNT; loop_count++) {
+        // Execute kernel 1
+        event_count++;
+        if (use_waitlist | use_marker) {
+            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, NULL, threads, NULL, 1, &event[%d], &event[%d])\n", queue_to_use, loop_count, event_count-1, event_count);
+            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel1[loop_count], 1, NULL, threads, NULL, 1, &event[event_count-1], &event[event_count]);
+        } else {
+            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, NULL, threads, NULL, 0, NULL, &event[%d])\n", queue_to_use, loop_count, event_count);
+            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel1[loop_count], 1, NULL, threads, NULL, 0, NULL, &event[event_count]);
+        }
+        if (error) {
+            log_info("\tLoop count %d\n", loop_count);
+            print_error( error, "clEnqueueNDRangeKernel for kernel 1 failed");
+            return error;
+        }
+        expected_value *= 2;
+        expected_if_only_queue[queue_to_use] *= 2;
+
+        // If we are using a marker, it needs to go in the same queue
+        if (use_marker) {
+            event_count++;
+            if (DEBUG_OUT) log_info("clEnqueueMarker(queues[%d], event[%d])\n", queue_to_use, event_count);
+
+            #ifdef CL_VERSION_1_2
+                error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL, &event[event_count]);
+            #else
+                error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
+            #endif
+
+        }
+
+        // Pick the next queue to run
+        if (RANDOMIZE)
+            queue_to_use = rand()%2;
+        else
+            queue_to_use = (queue_to_use + 1)%2;
+
+        // Put in a barrier if requested
+        if (test_enqueue_wait_for_events) {
+            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, &event[%d], NULL)\n", queue_to_use, event_count);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL);
+            test_error( error, "Unable to queue wait for events" );
+        } else if (test_barrier) {
+            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d])\n", queue_to_use);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
+            test_error( error, "Unable to queue barrier" );
+        }
+
+        // Execute Kernel 2
+        event_count++;
+        if (use_waitlist | use_marker) {
+            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, NULL, threads, NULL, 1, &event[%d], &event[%d])\n", queue_to_use, loop_count, event_count-1, event_count);
+            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel2[loop_count], 1, NULL, threads, NULL, 1, &event[event_count-1], &event[event_count]);
+        } else {
+            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, NULL, threads, NULL, 0, NULL, &event[%d])\n", queue_to_use, loop_count, event_count);
+            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel2[loop_count], 1, NULL, threads, NULL, 0, NULL, &event[event_count]);
+        }
+        if (error) {
+            log_info("\tLoop count %d\n", loop_count);
+            print_error( error, "clEnqueueNDRangeKernel for kernel 2 failed");
+            return error;
+        }
+        expected_value--;
+        expected_if_only_queue[queue_to_use]--;
+
+        // If we are using a marker, it needs to go in the same queue
+        if (use_marker) {
+            event_count++;
+            if (DEBUG_OUT) log_info("clEnqueueMarker(queues[%d], event[%d])\n", queue_to_use, event_count);
+
+        #ifdef CL_VERSION_1_2
+            error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL, &event[event_count]);
+        #else
+            error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
+        #endif
+        }
+
+        // Pick the next queue to run
+        if (RANDOMIZE)
+            queue_to_use = rand()%2;
+        else
+            queue_to_use = (queue_to_use + 1)%2;
+
+        // Put in a barrier if requested
+        if (test_enqueue_wait_for_events) {
+            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, &event[%d], NULL)\n", queue_to_use, event_count);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL );
+            test_error( error, "Unable to queue wait for events" );
+        } else if (test_barrier) {
+            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d])\n", queue_to_use);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
+            test_error( error, "Unable to queue barrier" );
+        }
+    }
+
+    // Now finish up everything
+    if (two_queues) {
+        error = clFlush(queues[1]);
+        test_error( error, "clFlush[1] failed");
+    }
+
+    error = clEnqueueReadBuffer(queues[0], data, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), values, 1, &event[event_count], NULL);
+
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    failed = 0;
+    for (i=0; i<(int)TEST_SIZE; i++)
+        if (values[i] != expected_value) {
+            failed = 1;
+            log_info("\tvalues[%d] = %d, expected %d (If only queue 1 accessed memory: %d only queue 2 accessed memory: %d)\n",
+                     i, values[i], expected_value, expected_if_only_queue[0], expected_if_only_queue[1]);
+            break;
+        }
+
+    free(values);
+    if (two_devices)
+        free(two_device_ids);
+
+    return failed;
+}
+
+int test( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+         int two_queues, int two_devices,
+         int test_enqueue_wait_for_events, int test_barrier, int use_waitlists, int use_marker)
+{
+    if( !checkDeviceForQueueSupport( deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) )
+    {
+        log_info( "WARNING: Device does not support out-of-order exec mode; skipping test.\n" );
+        return 0;
+    }
+
+    log_info("Running test for baseline results to determine if out-of-order execution can be detected...\n");
+    int baseline_results = test_event_enqueue_wait_for_events_run_test(deviceID, context, queue, num_elements, two_queues, two_devices, 0, 0, 0, 0);
+    if (baseline_results == 0) {
+        if (test_enqueue_wait_for_events)
+            log_info("WARNING: could not detect any out-of-order execution without using clEnqueueBarrierWithWaitList, so this test is not a valid test of out-of-order event dependencies.\n");
+        if (test_barrier)
+            log_info("WARNING: could not detect any out-of-order execution without using clEnqueueBarrierWithWaitList, so this test is not a valid test of out-of-order event dependencies.\n");
+        if (use_waitlists)
+            log_info("WARNING: could not detect any out-of-order execution without using waitlists, so this test is not a valid test of out-of-order event dependencies.\n");
+        if (use_marker)
+            log_info("WARNING: could not detect any out-of-order execution without using clEnqueueMarker, so this test is not a valid test of out-of-order event dependencies.\n");
+    } else if (baseline_results == 1) {
+        if (test_enqueue_wait_for_events)
+            log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueBarrierWithWaitList. Test can be a valid test of out-of-order event dependencies.\n");
+        if (test_barrier)
+            log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueBarrierWithWaitList. Test can be a valid test of out-of-order event dependencies.\n");
+        if (use_waitlists)
+            log_info("Detected incorrect execution (possibly out-of-order) without waitlists. Test can be a valid test of out-of-order event dependencies.\n");
+        if (use_marker)
+            log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueMarker. Test can be a valid test of out-of-order event dependencies.\n");
+    } else if( baseline_results == -1942 ) {
+        // Just ignore and return (out-of-order exec mode not supported)
+        return 0;
+    } else {
+        print_error(baseline_results, "Baseline run failed");
+        return baseline_results;
+    }
+    log_info("Running test for actual results...\n");
+    return test_event_enqueue_wait_for_events_run_test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                                                       test_enqueue_wait_for_events, test_barrier,  use_waitlists, use_marker);
+}
+
+
+int test_event_waitlist_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 0;
+    int two_devices = 0;
+    int test_enqueue_wait_for_events = 0;
+    int test_barrier = 0;
+    int use_waitlists = 1;
+    int use_marker = 0;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+int test_event_waitlist_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 1;
+    int two_devices = 0;
+    int test_enqueue_wait_for_events = 0;
+    int test_barrier = 0;
+    int use_waitlists = 1;
+    int use_marker = 0;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+int test_event_waitlist_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 1;
+    int two_devices = 1;
+    int test_enqueue_wait_for_events = 0;
+    int test_barrier = 0;
+    int use_waitlists = 1;
+    int use_marker = 0;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+
+int test_event_enqueue_wait_for_events_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 0;
+    int two_devices = 0;
+    int test_enqueue_wait_for_events = 1;
+    int test_barrier = 0;
+    int use_waitlists = 0;
+    int use_marker = 0;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+int test_event_enqueue_wait_for_events_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 1;
+    int two_devices = 0;
+    int test_enqueue_wait_for_events = 1;
+    int test_barrier = 0;
+    int use_waitlists = 0;
+    int use_marker = 0;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+
+int test_event_enqueue_wait_for_events_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 1;
+    int two_devices = 1;
+    int test_enqueue_wait_for_events = 1;
+    int test_barrier = 0;
+    int use_waitlists = 0;
+    int use_marker = 0;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+
+
+
+int test_event_enqueue_barrier_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 0;
+    int two_devices = 0;
+    int test_enqueue_wait_for_events = 0;
+    int test_barrier = 1;
+    int use_waitlists = 0;
+    int use_marker = 0;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+
+int test_event_enqueue_marker_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 0;
+    int two_devices = 0;
+    int test_enqueue_wait_for_events = 0;
+    int test_barrier = 0;
+    int use_waitlists = 0;
+    int use_marker = 1;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+int test_event_enqueue_marker_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 1;
+    int two_devices = 0;
+    int test_enqueue_wait_for_events = 0;
+    int test_barrier = 0;
+    int use_waitlists = 0;
+    int use_marker = 1;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+
+int test_event_enqueue_marker_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int two_queues = 1;
+    int two_devices = 1;
+    int test_enqueue_wait_for_events = 0;
+    int test_barrier = 0;
+    int use_waitlists = 0;
+    int use_marker = 1;
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+}
+
+
diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp
new file mode 100644
index 00000000..d3a02c08
--- /dev/null
+++ b/test_conformance/events/test_events.cpp
@@ -0,0 +1,682 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#if ! defined( _WIN32 )
+    #include "unistd.h" // for "sleep" used in the "while (1)" busy wait loop in
+#endif
+// test_event_flush
+
+const char *sample_long_test_kernel[] = {
+"__kernel void sample_test(__global float *src, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"     int  i;\n"
+"\n"
+"    for( i = 0; i < 10000; i++ )\n"
+"    {\n"
+"        dst[tid] = (int)src[tid] * 3;\n"
+"    }\n"
+"\n"
+"}\n" };
+
+int create_and_execute_kernel( cl_context inContext, cl_command_queue inQueue, cl_program *outProgram, cl_kernel *outKernel, cl_mem *streams,
+                              unsigned int lineCount, const char **lines, const char *kernelName, cl_event *outEvent )
+{
+    size_t threads[1] = { 1000 }, localThreads[1];
+    int error;
+
+    if( create_single_kernel_helper( inContext, outProgram, outKernel, lineCount, lines, kernelName ) )
+    {
+        return -1;
+    }
+
+    error = get_max_common_work_group_size( inContext, *outKernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    streams[0] = clCreateBuffer(inContext, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1000, NULL, &error);
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer(inContext, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 1000, NULL, &error);
+    test_error( error, "Creating test array failed" );
+
+    /* Set the arguments */
+    error = clSetKernelArg( *outKernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( *outKernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    error = clEnqueueNDRangeKernel(inQueue, *outKernel, 1, NULL, threads, localThreads, 0, NULL, outEvent);
+    test_error( error, "Unable to execute test kernel" );
+
+    return 0;
+}
+
+#define SETUP_EVENT( c, q ) \
+clProgramWrapper program; \
+clKernelWrapper kernel; \
+clMemWrapper streams[2]; \
+clEventWrapper event; \
+int error; \
+if( create_and_execute_kernel( c, q, &program, &kernel, &streams[0], 1, sample_long_test_kernel, "sample_test", &event ) ) return -1;
+
+#define FINISH_EVENT(_q) clFinish(_q)
+
+const char *IGetStatusString( cl_int status )
+{
+    static char tempString[ 128 ];
+    switch( status )
+    {
+        case CL_COMPLETE:    return "CL_COMPLETE";
+        case CL_RUNNING:    return "CL_RUNNING";
+        case CL_QUEUED:        return "CL_QUEUED";
+        case CL_SUBMITTED:    return "CL_SUBMITTED";
+        default:
+            sprintf( tempString, "<unknown: %d>", (int)status );
+            return tempString;
+    }
+}
+
+/* Note: tests clGetEventStatus and clReleaseEvent (implicitly) */
+int test_event_get_execute_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int status;
+    SETUP_EVENT( context, queue );
+
+    /* Now wait for it to be done */
+    error = clWaitForEvents( 1, &event );
+    test_error( error, "Unable to wait for event" );
+
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+
+int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    SETUP_EVENT( context, queue );
+
+    /* Verify parameters of clGetEventInfo not already tested by other tests */
+    cl_command_queue otherQueue;
+    size_t size;
+
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_QUEUE, sizeof( otherQueue ), &otherQueue, &size );
+    test_error( error, "Unable to get event info!" );
+    // We can not check if this is the right queue because this is an opaque object.
+    if( size != sizeof( queue ) )
+    {
+        log_error( "ERROR: Returned command queue size does not validate (expected %d, got %d)\n", (int)sizeof( queue ), (int)size );
+        return -1;
+    }
+
+    cl_command_type type;
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof( type ), &type, &size );
+    test_error( error, "Unable to get event info!" );
+    if( type != CL_COMMAND_NDRANGE_KERNEL )
+    {
+        log_error( "ERROR: Returned command type does not validate (expected %d, got %d)\n", (int)CL_COMMAND_NDRANGE_KERNEL, (int)type );
+        return -1;
+    }
+    if( size != sizeof( type ) )
+    {
+        log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size );
+        return -1;
+    }
+
+    cl_uint count;
+    error = clGetEventInfo( event, CL_EVENT_REFERENCE_COUNT, sizeof( count ), &count, &size );
+    test_error( error, "Unable to get event info for CL_EVENT_REFERENCE_COUNT!" );
+    if( size != sizeof( count ) )
+    {
+        log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size );
+        return -1;
+    }
+
+    cl_context testCtx;
+    error = clGetEventInfo( event, CL_EVENT_CONTEXT, sizeof( testCtx ), &testCtx, &size );
+    test_error( error, "Unable to get event context info!" );
+    if( size != sizeof( context ) )
+    {
+        log_error( "ERROR: Returned context size does not validate (expected %d, got %d)\n", (int)sizeof( context ), (int)size );
+        return -1;
+    }
+    if( testCtx != context )
+    {
+        log_error( "ERROR: Returned context does not match (expected %p, got %p)\n", (void *)context, (void *)testCtx );
+        return -1;
+    }
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+
+int test_event_get_write_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem stream;
+    cl_float testArray[ 1024 * 32 ];
+    cl_event event;
+    int error;
+    cl_int status;
+
+
+    stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
+    test_error( error, "Creating test array failed" );
+
+    error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
+    test_error( error, "Unable to set testing kernel data" );
+
+    /* Now wait for it to be done */
+    error = clWaitForEvents( 1, &event );
+    test_error( error, "Unable to wait for event" );
+
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+
+    clReleaseMemObject( stream );
+    clReleaseEvent( event );
+
+    return 0;
+}
+
+int test_event_get_read_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem stream;
+    cl_float testArray[ 1024 * 32 ];
+    cl_event event;
+    int error;
+    cl_int status;
+
+
+    stream = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
+    test_error( error, "Creating test array failed" );
+
+    error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
+    test_error( error, "Unable to get testing kernel data" );
+
+
+    /* It should still be running... */
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+
+    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    /* Now wait for it to be done */
+    error = clWaitForEvents( 1, &event );
+    test_error( error, "Unable to wait for event" );
+
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+
+    clReleaseMemObject( stream );
+    clReleaseEvent( event );
+
+    return 0;
+}
+
+/* clGetEventStatus not implemented yet */
+
+int test_event_wait_for_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int status;
+    SETUP_EVENT( context, queue );
+
+    /* Now we wait for it to be done, then test the status again */
+    error = clWaitForEvents( 1, &event );
+    test_error( error, "Unable to wait for execute event" );
+
+    /* Make sure it worked */
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+
+int test_event_wait_for_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_float readArray[ 1024 * 32 ];
+    cl_float writeArray[ 1024 * 32 ];
+    cl_event events[2];
+    int error;
+    cl_int status;
+
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
+    test_error( error, "Creating test array failed" );
+
+    error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
+    test_error( error, "Unable to read testing kernel data" );
+
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]);
+    test_error( error, "Unable to write testing kernel data" );
+
+    /* Both should still be running */
+    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    /* Now try waiting for both */
+    error = clWaitForEvents( 2, events );
+    test_error( error, "Unable to wait for array events" );
+
+    /* Double check status on both */
+    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    clReleaseMemObject( streams[0] );
+    clReleaseMemObject( streams[1] );
+    clReleaseEvent( events[0] );
+    clReleaseEvent( events[1] );
+
+    return 0;
+}
+
+int test_event_flush( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int loopCount = 0;
+    cl_int status;
+    SETUP_EVENT( context, queue );
+
+    /* Now flush. Note that we can't guarantee this actually lets the op finish, but we can guarantee it's no longer queued */
+    error = clFlush( queue );
+    test_error( error, "Unable to flush events" );
+
+    /* Make sure it worked */
+         while (1) {
+        error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                                                                sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+
+        if( status != CL_QUEUED )
+                  break;
+
+#if ! defined( _WIN32 )
+        sleep(1); // give it some time here.
+#else // _WIN32
+            Sleep(1000);
+#endif
+        ++loopCount;
+          }
+
+/*
+CL_QUEUED (command has been enqueued in the command-queue),
+CL_SUBMITTED (enqueued command has been submitted by the host to the device associated with the command-queue),
+CL_RUNNING (device is currently executing this command),
+CL_COMPLETE (the command has completed), or
+Error code given by a negative integer value. (command was abnormally terminated – this may be caused by a bad memory access etc.).
+*/
+     if(status != CL_COMPLETE && status != CL_SUBMITTED &&
+        status != CL_RUNNING && status != CL_COMPLETE)
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event flush (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    /* Now wait */
+    error = clFinish( queue );
+    test_error( error, "Unable to finish events" );
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+
+
+int test_event_finish_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int status;
+    SETUP_EVENT( context, queue );
+
+    /* Now flush and finish all ops */
+    error = clFinish( queue );
+    test_error( error, "Unable to finish all events" );
+
+    /* Make sure it worked */
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+
+int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem streams[2];
+    cl_float readArray[ 1024 * 32 ];
+    cl_float writeArray[ 1024 * 32 ];
+    cl_event events[2];
+    int error;
+    cl_int status;
+
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
+    test_error( error, "Creating test array failed" );
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 1024 * 32, NULL, &error );
+    test_error( error, "Creating test array failed" );
+
+    error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
+    test_error( error, "Unable to read testing kernel data" );
+
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]);
+    test_error( error, "Unable to write testing kernel data" );
+
+    /* Both should still be running */
+    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    /* Now try finishing all ops */
+    error = clFinish( queue );
+    test_error( error, "Unable to finish all events" );
+
+    /* Double check status on both */
+    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventStatus didn't work!" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    clReleaseMemObject( streams[0] );
+    clReleaseMemObject( streams[1] );
+    clReleaseEvent( events[0] );
+    clReleaseEvent( events[1] );
+
+    return 0;
+}
+
+
+#define NUM_EVENT_RUNS 100
+
+int test_event_release_before_done( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    // Create a kernel to run
+    clProgramWrapper program;
+    clKernelWrapper kernel[NUM_EVENT_RUNS];
+    size_t threads[1] = { 1000 };
+    cl_event events[NUM_EVENT_RUNS];
+    cl_int status;
+    clMemWrapper streams[NUM_EVENT_RUNS][2];
+    int error, i;
+
+    // Create a kernel
+    if( create_single_kernel_helper( context, &program, &kernel[0], 1, sample_long_test_kernel, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    for( i = 1; i < NUM_EVENT_RUNS; i++ ) {
+       kernel[i] = clCreateKernel(program, "sample_test", &error);
+    test_error(error, "Unable to create kernel");
+  }
+
+    error = get_max_common_work_group_size( context, kernel[0], 1024, &threads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    // Create a set of streams to use as arguments
+    for( i = 0; i < NUM_EVENT_RUNS; i++ )
+    {
+        streams[i][0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * threads[0], NULL, &error );
+        streams[i][1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * threads[0], NULL, &error );
+        if( ( streams[i][0] == NULL ) || ( streams[i][1] == NULL ) )
+        {
+            log_error( "ERROR: Unable to allocate testing streams" );
+            return -1;
+        }
+    }
+
+    // Execute the kernels one by one, hopefully making sure they won't be done by the time we get to the end
+    for( i = 0; i < NUM_EVENT_RUNS; i++ )
+    {
+        error = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), &streams[i][0] );
+        error |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), &streams[i][1] );
+        test_error( error, "Unable to set kernel arguments" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, threads, 0, NULL, &events[i]);
+        test_error( error, "Unable to execute test kernel" );
+    }
+
+    // Free all but the last event
+    for( i = 0; i < NUM_EVENT_RUNS - 1; i++ )
+    {
+        clReleaseEvent( events[ i ] );
+    }
+
+    // Get status on the last one, then free it
+    error = clGetEventInfo( events[ NUM_EVENT_RUNS - 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Unable to get event status" );
+
+    clReleaseEvent( events[ NUM_EVENT_RUNS - 1 ] );
+
+    // Was the status still-running?
+    if( status == CL_COMPLETE )
+    {
+        log_info( "WARNING: Events completed before they could be released, so test is a null-op. Increase workload and try again." );
+    }
+    else if( status == CL_RUNNING || status == CL_QUEUED || status == CL_SUBMITTED )
+    {
+        log_info( "Note: Event status was running or queued when released, so test was good.\n" );
+    }
+
+    // If we didn't crash by now, the test succeeded
+    clFinish( queue );
+
+    return 0;
+}
+
+int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_int status;
+    SETUP_EVENT( context, queue );
+
+    /* Now we queue a marker and wait for that, which--since it queues afterwards--should guarantee the execute finishes too */
+    clEventWrapper markerEvent;
+    //error = clEnqueueMarker( queue, &markerEvent );
+
+#ifdef CL_VERSION_1_2
+    error = clEnqueueMarkerWithWaitList(queue, 0, NULL, &markerEvent );
+#else
+    error = clEnqueueMarker( queue, &markerEvent );
+#endif
+       test_error( error, "Unable to queue marker" );
+    /* Now we wait for it to be done, then test the status again */
+    error = clWaitForEvents( 1, &markerEvent );
+    test_error( error, "Unable to wait for marker event" );
+
+    /* Check the status of the first event */
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
+    test_error( error, "Calling clGetEventInfo didn't work!" );
+    if( status != CL_COMPLETE )
+    {
+        log_error( "ERROR: Incorrect status returned from clGetEventInfo after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        return -1;
+    }
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+
+#ifdef CL_VERSION_1_2
+int test_event_enqueue_marker_with_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+
+    cl_int status;
+    SETUP_EVENT( context, queue );
+    cl_event event_list[3]={ NULL, NULL, NULL};
+
+    size_t threads[1] = { 10 }, localThreads[1]={1};
+    cl_uint event_count=2;
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
+      test_error( error, " clEnqueueMarkerWithWaitList   1 " );
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
+      test_error( error, " clEnqueueMarkerWithWaitList 2" );
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL);
+      test_error( error, " clEnqueueMarkerWithWaitList  3" );
+
+    // test the case event returned
+    error =clEnqueueMarkerWithWaitList(queue, event_count, event_list,  &event_list[2]);
+      test_error( error, " clEnqueueMarkerWithWaitList " );
+
+    error = clReleaseEvent(event_list[0]);
+    error |= clReleaseEvent(event_list[1]);
+    test_error( error, "clReleaseEvent" );
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
+    test_error( error, " clEnqueueMarkerWithWaitList   1 -1 " );
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
+    test_error( error, " clEnqueueMarkerWithWaitList  2-2" );
+
+    // test the case event =NULL,   caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value
+    error =clEnqueueMarkerWithWaitList(queue, event_count, event_list,  NULL);
+    test_error( error, " clEnqueueMarkerWithWaitList " );
+
+    error = clReleaseEvent(event_list[0]);
+    error |= clReleaseEvent(event_list[1]);
+    error |= clReleaseEvent(event_list[2]);
+    test_error( error, "clReleaseEvent" );
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+#endif
+
+#ifdef CL_VERSION_1_2
+int test_event_enqueue_barrier_with_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+
+    cl_int status;
+    SETUP_EVENT( context, queue );
+    cl_event event_list[3]={ NULL, NULL, NULL};
+
+    size_t threads[1] = { 10 }, localThreads[1]={1};
+    cl_uint event_count=2;
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
+    test_error( error, " clEnqueueBarrierWithWaitList   1 " );
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
+    test_error( error, " clEnqueueBarrierWithWaitList 2" );
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL);
+    test_error( error, " clEnqueueBarrierWithWaitList  20" );
+
+    // test the case event returned
+    error =clEnqueueBarrierWithWaitList(queue, event_count, event_list,  &event_list[2]);
+    test_error( error, " clEnqueueBarrierWithWaitList " );
+
+    clReleaseEvent(event_list[0]);
+    clReleaseEvent(event_list[1]);
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
+    test_error( error, " clEnqueueBarrierWithWaitList   1 " );
+
+    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
+    test_error( error, " clEnqueueBarrierWithWaitList 2" );
+
+    // test the case event =NULL,   caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value
+    error = clEnqueueBarrierWithWaitList(queue, event_count, event_list,  NULL);
+    test_error( error, " clEnqueueBarrierWithWaitList " );
+
+    clReleaseEvent(event_list[0]);
+    clReleaseEvent(event_list[1]);
+    clReleaseEvent(event_list[2]);
+
+    FINISH_EVENT(queue);
+    return 0;
+}
+#endif
diff --git a/test_conformance/events/test_userevents.cpp b/test_conformance/events/test_userevents.cpp
new file mode 100644
index 00000000..39dea376
--- /dev/null
+++ b/test_conformance/events/test_userevents.cpp
@@ -0,0 +1,293 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined(__APPLE__)
+    #include <OpenCL/opencl.h>
+    #include <mach/mach_time.h>
+#else
+    #include <CL/cl.h>
+  #include <malloc.h>
+#endif
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include "../../test_common/harness/kernelHelpers.h"
+
+///////////////////////////////////////////////////////////////////////////////
+// ATF performance framework.
+
+#if USE_ATF
+#include <ATF/ATF.h>
+#define test_start() ATFTestStart()
+#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
+#define log_info ATFLogInfo
+#define log_error ATFLogError
+#define log_no_atf
+#define test_finish() ATFTestFinish()
+#else
+#define test_start()
+#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, _higherBetter?"higher is better":"lower is better" , _number)
+#define log_info(...) fprintf(stdout, ## __VA_ARGS__ )
+#define log_error(...) fprintf(stderr, ## __VA_ARGS__ )
+#define log_info_no_atf(...) log_info(## __VA_ARGS__ )
+#define test_finish()
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// CL error checking.
+
+#define CL_DEVICE_TYPE_ENV_MUST_BE( bitfield_ )\
+{\
+cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;\
+const char* device_env = getenv("CL_DEVICE_TYPE");\
+if (device_env != NULL) {\
+if (!strcmp( device_env, "gpu" ) || !strcmp( device_env, "CL_DEVICE_TYPE_GPU" ))\
+device_type = CL_DEVICE_TYPE_GPU;\
+else if(!strcmp( device_env, "cpu" ) || !strcmp( device_env, "CL_DEVICE_TYPE_CPU" ))\
+device_type = CL_DEVICE_TYPE_CPU;\
+else if(!strcmp( device_env, "default" ) || !strcmp( device_env, "CL_DEVICE_TYPE_DEFAULT" ))\
+device_type = CL_DEVICE_TYPE_DEFAULT;\
+if (!(device_type & bitfield_)) {\
+log_error( "CL_DEVICE_TYPE environment variable \"%s\" must be \"%s\".", device_env, #bitfield_ );\
+abort();\
+}\
+}\
+}\
+
+#define CL_DEVICE_TYPE_ENV( device_type_ )\
+{\
+const char* device_env = getenv("CL_DEVICE_TYPE");\
+if (device_env != NULL) {\
+if (!strcmp( device_env, "gpu" ) || !strcmp( device_env, "CL_DEVICE_TYPE_GPU" ))\
+device_type_ = CL_DEVICE_TYPE_GPU;\
+else if(!strcmp( device_env, "cpu" ) || !strcmp( device_env, "CL_DEVICE_TYPE_CPU" ))\
+device_type_ = CL_DEVICE_TYPE_CPU;\
+else if(!strcmp( device_env, "default" ) || !strcmp( device_env, "CL_DEVICE_TYPE_DEFAULT" ))\
+device_type_ = CL_DEVICE_TYPE_DEFAULT;\
+}\
+}
+
+#if defined(_MSC_VER)
+#define CL_EXIT_ERROR(cmd,...) \
+{ \
+if ((cmd) != CL_SUCCESS) { \
+log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\
+log_error(## __VA_ARGS__ );\
+log_error("\n");\
+return -1;\
+}\
+}
+#else
+#define CL_EXIT_ERROR(cmd,format,...) \
+{ \
+if ((cmd) != CL_SUCCESS) { \
+log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\
+log_error(format,## __VA_ARGS__ );\
+log_error("\n");\
+return -1;\
+}\
+}
+#endif
+
+#define CL_EXIT_BUILD_ERROR(cmd,program,format,...) \
+{ \
+if ((cmd) != CL_SUCCESS) { \
+cl_uint num_devices_;\
+clGetProgramInfo(program,CL_PROGRAM_NUM_DEVICES,sizeof(num_devices_),&num_devices_,NULL);\
+cl_device_id *device_list;\
+device_list=(cl_device_id *)malloc(num_devices_*sizeof(cl_device_id));\
+clGetProgramInfo(program,CL_PROGRAM_DEVICES,num_devices_*sizeof(cl_device_id),device_list,NULL);\
+for (unsigned i=0;i<num_devices_;++i) {\
+size_t len;\
+char buffer[2048];\
+clGetProgramBuildInfo(program,device_list[i],CL_PROGRAM_BUILD_LOG,sizeof(buffer),buffer,&len);\
+log_error("DEVICE %u CL BUILD ERROR: %s(%u): ",i,__FILE__,__LINE__);\
+log_error(format,## __VA_ARGS__ );\
+log_error("\n");\
+}\
+free(device_list);\
+return -1;\
+}\
+}
+
+const char* src[] = {
+  "__kernel void simple_task(__global float* output) {\n"
+  "  output[0] += 1;\n"
+  "}\n"
+};
+
+enum { MaxDevices = 8 };
+
+int test_userevents( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+
+  cl_int err;
+
+  cl_event u1 = clCreateUserEvent( context, &err );
+  CL_EXIT_ERROR(err,"clCreateUserEvent failed");
+
+  // Test event properties.
+  cl_int s;
+  size_t sizeofs;
+  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof s, &s, &sizeofs),"clGetEventInfo failed");
+  CL_EXIT_ERROR((sizeof s == sizeofs) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_EXECUTION_STATUS");
+  CL_EXIT_ERROR((s == CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_EXECUTION_STATUS");
+
+  cl_command_type t;
+  size_t sizeoft;
+  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_TYPE, sizeof t, &t, &sizeoft),"clGetEventInfo failed");
+  CL_EXIT_ERROR((sizeof t == sizeoft) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_TYPE");
+  CL_EXIT_ERROR((t == CL_COMMAND_USER) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_TYPE");
+
+  cl_command_queue q;
+  size_t sizeofq;
+  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_QUEUE, sizeof q, &q, &sizeofq),"clGetEventInfo failed");
+  CL_EXIT_ERROR((sizeof q == sizeofq) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_QUEUE");
+  CL_EXIT_ERROR((q == NULL) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_QUEUE");
+
+  cl_context c;
+  size_t sizeofc;
+  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_CONTEXT, sizeof c, &c, &sizeofc),"clGetEventInfo failed");
+  CL_EXIT_ERROR((sizeof c == sizeofc) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_CONTEXT");
+  CL_EXIT_ERROR((c == context) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_CONTEXT");
+
+  cl_ulong p;
+  err = clGetEventProfilingInfo(u1,CL_PROFILING_COMMAND_QUEUED,sizeof p,&p,0);
+  CL_EXIT_ERROR((err != CL_SUCCESS) ? CL_SUCCESS : -1,"clGetEventProfilingInfo returned wrong error.");
+
+  // Test semantics.
+  cl_program program;
+  err = create_single_kernel_helper_create_program(context, &program, 1, src);
+  CL_EXIT_ERROR(err,"clCreateProgramWithSource failed");
+
+  CL_EXIT_BUILD_ERROR(clBuildProgram(program,0,NULL,"",NULL,NULL),program,"Building program from inline src:\t%s",src[0]);
+
+  cl_kernel k0 = clCreateKernel(program,"simple_task",&err);
+  CL_EXIT_ERROR(err,"clCreateKernel failed");
+
+  float buffer[1];
+  cl_mem output = clCreateBuffer(context,CL_MEM_USE_HOST_PTR,sizeof buffer, buffer, &err);
+  CL_EXIT_ERROR(err,"clCreateBuffer failed.");
+
+  CL_EXIT_ERROR(clSetKernelArg(k0,0,sizeof(output),&output),"clSetKernelArg failed");
+
+
+  // Successful case. //////////////////////////////////////////////////////////////////////////////////////
+  {
+    cl_event e[4];
+    cl_uint  N = sizeof e / sizeof(cl_event);
+
+    log_info("Enqueuing tasks\n");
+    for (cl_uint i = 0; i != N; ++i)
+      CL_EXIT_ERROR(clEnqueueTask(queue,k0,1,&u1,&e[i]),"clEnqueueTaskFailed");
+
+    log_info("Checking task status before setting user event status\n");
+    for (cl_uint i = 0; i != N; ++i) {
+      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
+      CL_EXIT_ERROR((s >= CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status before user event",i);
+    }
+
+    log_info("Setting user event status to complete\n");
+    CL_EXIT_ERROR(clSetUserEventStatus(u1,CL_COMPLETE),"clSetUserEventStatus failed");
+
+    log_info("Waiting for tasks to finish executing\n");
+    CL_EXIT_ERROR(clWaitForEvents( 1, &e[N-1] ),"clWaitForEvent failed");
+
+    log_info("Checking task status after setting user event status\n");
+    for (cl_uint i = 0; i != N; ++i) {
+      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
+      CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after successful user event",i,s);
+    }
+
+    CL_EXIT_ERROR(clReleaseEvent(u1),"clReleaseEvent failed");
+
+    for (cl_uint i = 0; i != N; ++i)
+      CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed");
+
+    log_info("Successful user event case passed.\n");
+
+  }
+
+  // Test unsuccessful user event case. ///////////////////////////////////////////////////////////////////
+  {
+    cl_event u2 = clCreateUserEvent( context, &err );
+    CL_EXIT_ERROR(err,"clCreateUserEvent failed");
+
+    cl_event e[4];
+    cl_uint  N = sizeof e / sizeof(cl_event);
+
+    log_info("Enqueuing tasks\n");
+    for (cl_uint i = 0; i != N; ++i)
+      CL_EXIT_ERROR(clEnqueueTask(queue,k0,1,&u2,&e[i]),"clEnqueueTaskFailed");
+
+    log_info("Checking task status before setting user event status\n");
+    for (cl_uint i = 0; i != N; ++i) {
+      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
+      CL_EXIT_ERROR((s == CL_QUEUED || s == CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %d before user event",i, (int) s);
+    }
+
+    log_info("Setting user event status to unsuccessful result\n");
+    CL_EXIT_ERROR(clSetUserEventStatus(u2,-1),"clSetUserEventStatus failed");
+
+    log_info("Waiting for tasks to finish executing\n");
+    CL_EXIT_ERROR((clWaitForEvents( N, &e[0] )!=CL_SUCCESS) ? CL_SUCCESS : -1,"clWaitForEvent succeeded when it should have failed");
+
+    log_info("Checking task status after setting user event status\n");
+    for (cl_uint i = 0; i != N; ++i) {
+      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
+      CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after unsuccessful user event",i,s);
+    }
+
+    CL_EXIT_ERROR(clReleaseEvent(u2),"clReleaseEvent failed");
+
+    for (cl_uint i = 0; i != N; ++i)
+      CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed");
+
+    log_info("Unsuccessful user event case passed.\n");
+  }
+
+  return 0;
+
+}
+
+#if 0
+int main(int argc, char** argv)
+{
+
+  cl_int err;
+
+  test_start();
+
+  cl_device_type device_type;
+  CL_DEVICE_TYPE_ENV( device_type );
+
+  cl_device_id device_id;
+  CL_EXIT_ERROR(clGetDeviceIDs(NULL, device_type, 1, &device_id, NULL),"GetDeviceIDs");
+
+  // Create a context.
+  cl_context context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
+  CL_EXIT_ERROR(err,"CreateContext");
+
+  // Create a command queue.
+  q = clCreateCommandQueueWithProperties(context,device_id,0,&err);
+  CL_EXIT_ERROR(err,"clCreateCommandQueue failed");
+
+  int ret = test_userevents( device_type, context, queue, 0 );
+
+  test_finish();
+
+  return ret;
+}
+#endif
diff --git a/test_conformance/events/test_userevents_multithreaded.cpp b/test_conformance/events/test_userevents_multithreaded.cpp
new file mode 100644
index 00000000..6c54e780
--- /dev/null
+++ b/test_conformance/events/test_userevents_multithreaded.cpp
@@ -0,0 +1,82 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "action_classes.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/genericThread.h"
+
+#if !defined (_MSC_VER)
+    #include <unistd.h>
+#endif // !_MSC_VER
+
+class releaseEvent_thread : public genericThread
+{
+    public:
+        releaseEvent_thread( cl_event *event ) : mEvent( event ) {}
+
+        cl_event * mEvent;
+
+    protected:
+        virtual void *    IRun( void )
+        {
+            usleep( 1000000 );
+            log_info( "\tTriggering gate from separate thread...\n" );
+            clSetUserEventStatus( *mEvent, CL_COMPLETE );
+            return NULL;
+        }
+};
+
+int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int error;
+
+
+    // Set up a user event to act as a gate
+    clEventWrapper gateEvent = clCreateUserEvent( context, &error );
+    test_error( error, "Unable to create user gate event" );
+
+    // Set up a few actions gated on the user event
+    NDRangeKernelAction action1;
+    ReadBufferAction action2;
+    WriteBufferAction action3;
+
+    clEventWrapper actionEvents[ 3 ];
+    Action * actions[] = { &action1, &action2, &action3, NULL };
+
+    for( int i = 0; actions[ i ] != NULL; i++ )
+    {
+        error = actions[ i ]->Setup( deviceID, context, queue );
+        test_error( error, "Unable to set up test action" );
+
+        error = actions[ i ]->Execute( queue, 1, &gateEvent, &actionEvents[ i ] );
+        test_error( error, "Unable to execute test action" );
+    }
+
+    // Now, instead of releasing the gate, we spawn a separate thread to do so
+    releaseEvent_thread thread( &gateEvent );
+    log_info( "\tStarting trigger thread...\n" );
+    thread.Start();
+
+    log_info( "\tWaiting for actions...\n" );
+    error = clWaitForEvents( 3, &actionEvents[ 0 ] );
+    test_error( error, "Unable to wait for action events" );
+
+    log_info( "\tActions completed.\n" );
+
+    // If we got here without error, we're good
+    return 0;
+}
+
diff --git a/test_conformance/events/test_waitlists.cpp b/test_conformance/events/test_waitlists.cpp
new file mode 100644
index 00000000..fa5db3f2
--- /dev/null
+++ b/test_conformance/events/test_waitlists.cpp
@@ -0,0 +1,322 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "action_classes.h"
+
+
+extern const char *IGetStatusString( cl_int status );
+
+#define PRINT_OPS 0
+
+int test_waitlist( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest, bool multiple )
+{
+    NDRangeKernelAction    actions[ 2 ];
+    clEventWrapper events[ 3 ];
+    cl_int status[ 3 ];
+    cl_int error;
+
+  if (multiple)
+    log_info("\tExecuting reference event 0, then reference event 1 with reference event 0 in its waitlist, then test event 2 with reference events 0 and 1 in its waitlist.\n");
+  else
+    log_info("\tExecuting reference event 0, then test event 2 with reference event 0 in its waitlist.\n");
+
+    // Set up the first base action to wait against
+    error = actions[ 0 ].Setup( device, context, queue );
+    test_error( error, "Unable to setup base event to wait against" );
+
+    if( multiple )
+    {
+        // Set up a second event to wait against
+        error = actions[ 1 ].Setup( device, context, queue );
+        test_error( error, "Unable to setup second base event to wait against" );
+    }
+
+    // Now set up the actual action to test
+    error = actionToTest->Setup( device, context, queue );
+    test_error( error, "Unable to set up test event" );
+
+    // Execute all events now
+  if (PRINT_OPS) log_info("\tExecuting action 0...\n");
+    error = actions[ 0 ].Execute( queue, 0, NULL, &events[ 0 ] );
+    test_error( error, "Unable to execute first event" );
+
+    if( multiple )
+    {
+    if (PRINT_OPS) log_info("\tExecuting action 1...\n");
+        error = actions[ 1 ].Execute( queue, 1, &events[0], &events[ 1 ] );
+        test_error( error, "Unable to execute second event" );
+    }
+
+    // Sanity check
+  if( multiple ) {
+    if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+        error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
+    test_error( error, "Unable to get event status" );
+  }
+  if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
+  test_error( error, "Unable to get event status" );
+
+  log_info("\t\tEvent status after starting reference events: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
+           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), "N/A");
+
+    if( ( status[ 0 ] == CL_COMPLETE ) || ( multiple && status[ 1 ] == CL_COMPLETE ) )
+    {
+        log_info( "WARNING: Reference event(s) already completed before we could execute test event! Possible that the reference event blocked (implicitly passing)\n" );
+        return 0;
+    }
+
+  if (PRINT_OPS) log_info("\tExecuting action to test...\n");
+    error = actionToTest->Execute( queue, ( multiple ) ? 2 : 1, &events[ 0 ], &events[ 2 ] );
+    test_error( error, "Unable to execute test event" );
+
+    // Hopefully, the first event is still running
+  if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+    error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
+    test_error( error, "Unable to get event status" );
+  if( multiple ) {
+    if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+        error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
+    test_error( error, "Unable to get event status" );
+  }
+  if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
+  test_error( error, "Unable to get event status" );
+
+  log_info("\t\tEvent status after starting test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
+           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+
+    if( multiple )
+    {
+        if( status[ 0 ] == CL_COMPLETE && status[ 1 ] == CL_COMPLETE )
+        {
+            log_info( "WARNING: Both events completed, so unable to test further (implicitly passing).\n" );
+            clFinish( queue );
+            return 0;
+        }
+
+    if(status[1] == CL_COMPLETE && status[0] != CL_COMPLETE)
+    {
+      log_error("ERROR: Test failed because the second wait event is complete and the first is not.(status: 0: %s and 1: %s)\n", IGetStatusString( status[ 0 ] ), IGetStatusString( status[ 1 ] ) );
+            clFinish( queue );
+            return -1;
+    }
+    }
+    else
+    {
+        if( status[ 0 ] == CL_COMPLETE )
+        {
+            log_info( "WARNING: Reference event completed, so unable to test further (implicitly passing).\n" );
+            clFinish( queue );
+            return 0;
+        }
+        if( status[ 0 ] != CL_RUNNING && status[ 0 ] != CL_QUEUED && status[ 0 ] != CL_SUBMITTED )
+        {
+            log_error( "ERROR: Test failed because first wait event is not currently running, queued, or submitted! (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) );
+            clFinish( queue );
+            return -1;
+        }
+    }
+
+    if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED )
+    {
+        log_error( "ERROR: Test event is not waiting to run! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
+        clFinish( queue );
+        return -1;
+    }
+
+    // Now wait for the first reference event
+  if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
+    error = clWaitForEvents( 1, &events[ 0 ] );
+    test_error( error, "Unable to wait for reference event" );
+
+    // Grab statuses again
+  if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+    error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
+    test_error( error, "Unable to get event status" );
+  if( multiple ) {
+    if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+        error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
+    test_error( error, "Unable to get event status" );
+  }
+  if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
+  test_error( error, "Unable to get event status" );
+
+  log_info("\t\tEvent status after waiting for reference event 0: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
+           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+
+    // Sanity
+    if( status[ 0 ] != CL_COMPLETE )
+    {
+        log_error( "ERROR: Waited for first event but it's not complete (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) );
+        clFinish( queue );
+        return -1;
+    }
+
+    // If we're multiple, and the second event isn't complete, then our test event should still be queued
+    if( multiple && status[ 1 ] != CL_COMPLETE )
+    {
+    if( status[ 1 ] == CL_RUNNING && status[ 2 ] == CL_RUNNING ) {
+      log_error("ERROR: Test event and second event are both running.\n");
+      clFinish( queue );
+      return -1;
+    }
+        if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED )
+        {
+            log_error( "ERROR: Test event did not wait for second event before starting! (status of ref: 1: %s, of test: 2: %s)\n", IGetStatusString( status[ 1 ] ), IGetStatusString( status[ 2 ] ) );
+            clFinish( queue );
+            return -1;
+        }
+
+        // Now wait for second event to complete, too
+    if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
+        error = clWaitForEvents( 1, &events[ 1 ] );
+        test_error( error, "Unable to wait for second reference event" );
+
+        // Grab statuses again
+    if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+    error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
+    test_error( error, "Unable to get event status" );
+    if( multiple ) {
+      if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+      error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
+      test_error( error, "Unable to get event status" );
+    }
+    if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
+    test_error( error, "Unable to get event status" );
+
+    log_info("\t\tEvent status after waiting for reference event 1: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
+             IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+
+        // Sanity
+        if( status[ 1 ] != CL_COMPLETE )
+        {
+            log_error( "ERROR: Waited for second reference event but it didn't complete (status: 1: %s)\n", IGetStatusString( status[ 1 ] ) );
+            clFinish( queue );
+            return -1;
+        }
+    }
+
+    // At this point, the test event SHOULD be running, but if it completed, we consider it a pass
+    if( status[ 2 ] == CL_COMPLETE )
+    {
+        log_info( "WARNING: Test event already completed. Assumed valid.\n" );
+        clFinish( queue );
+        return 0;
+    }
+    if( status[ 2 ] != CL_RUNNING && status[ 2 ] != CL_SUBMITTED && status[ 2 ] != CL_QUEUED)
+    {
+        log_error( "ERROR: Second event did not start running after reference event(s) completed! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
+        clFinish( queue );
+        return -1;
+    }
+
+    // Wait for the test event, then return
+  if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n");
+    error = clWaitForEvents( 1, &events[ 2 ] );
+    test_error( error, "Unable to wait for test event" );
+
+  error |= clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
+  test_error( error, "Unable to get event status" );
+
+  log_info("\t\tEvent status after waiting for test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
+           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+
+  // Sanity
+  if( status[ 2 ] != CL_COMPLETE )
+  {
+    log_error( "ERROR: Test event didn't complete (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
+    clFinish( queue );
+    return -1;
+  }
+
+  clFinish(queue);
+    return 0;
+}
+
+#define TEST_ACTION( name ) \
+    {    \
+        name##Action action;    \
+        log_info( "-- Testing " #name " (waiting on 1 event)...\n" );    \
+        if( ( error = test_waitlist( deviceID, context, queue, &action, false ) ) != CL_SUCCESS )    \
+            retVal++;            \
+        clFinish( queue ); \
+    }    \
+    if( error == CL_SUCCESS )    /* Only run multiples test if single test passed */ \
+    {    \
+        name##Action action;    \
+        log_info( "-- Testing " #name " (waiting on 2 events)...\n" );    \
+        if( ( error = test_waitlist( deviceID, context, queue, &action, true ) ) != CL_SUCCESS )    \
+            retVal++;            \
+        clFinish( queue ); \
+    }
+
+int test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue oldQueue, int num_elements )
+{
+    cl_int error;
+    int retVal = 0;
+    cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};
+
+    if( !checkDeviceForQueueSupport( deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) )
+    {
+        log_info( "WARNING: Device does not support out-of-order exec mode; skipping test.\n" );
+        return 0;
+    }
+
+    clCommandQueueWrapper queue = clCreateCommandQueueWithProperties( context, deviceID, &props[0], &error );
+    test_error(error, "Unable to create out-of-order queue");
+
+    log_info( "\n" );
+
+    TEST_ACTION( NDRangeKernel )
+
+    TEST_ACTION( ReadBuffer )
+    TEST_ACTION( WriteBuffer )
+    TEST_ACTION( MapBuffer )
+    TEST_ACTION( UnmapBuffer )
+
+    if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+    {
+        log_info( "\nNote: device does not support images. Skipping remainder of waitlist tests...\n" );
+    }
+    else
+    {
+        TEST_ACTION( ReadImage2D )
+        TEST_ACTION( WriteImage2D )
+        TEST_ACTION( CopyImage2Dto2D )
+        TEST_ACTION( Copy2DImageToBuffer )
+        TEST_ACTION( CopyBufferTo2DImage )
+        TEST_ACTION( MapImage )
+
+        if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+            log_info("Device does not support 3D images. Skipping remainder of waitlist tests...\n");
+        else
+        {
+            TEST_ACTION( ReadImage3D )
+            TEST_ACTION( WriteImage3D )
+            TEST_ACTION( CopyImage2Dto3D )
+            TEST_ACTION( CopyImage3Dto2D )
+            TEST_ACTION( CopyImage3Dto3D )
+            TEST_ACTION( Copy3DImageToBuffer )
+            TEST_ACTION( CopyBufferTo3DImage )
+        }
+    }
+
+    return retVal;
+}
+
diff --git a/test_conformance/generic_address_space/CMakeLists.txt b/test_conformance/generic_address_space/CMakeLists.txt
new file mode 100644
index 00000000..da347abc
--- /dev/null
+++ b/test_conformance/generic_address_space/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(MODULE_NAME GENERIC_ADDRESS_SPACE)
+
+set(${MODULE_NAME}_SOURCES
+    advanced_tests.cpp
+    basic_tests.cpp
+    main.cpp
+    stress_tests.cpp
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
+
+# end of file #
diff --git a/test_conformance/generic_address_space/Makefile b/test_conformance/generic_address_space/Makefile
new file mode 100644
index 00000000..b59fe4c6
--- /dev/null
+++ b/test_conformance/generic_address_space/Makefile
@@ -0,0 +1,44 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+	basic_tests.cpp \
+	advanced_tests.cpp \
+	stress_tests.cpp \
+	../../test_common/harness/errorHelpers.c \
+	../../test_common/harness/threadTesting.c \
+	../../test_common/harness/testHarness.c \
+	../../test_common/harness/kernelHelpers.c \
+	../../test_common/harness/typeWrappers.cpp \
+	../../test_common/harness/mt19937.c \
+
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_uniform_address_space
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/generic_address_space/advanced_tests.cpp b/test_conformance/generic_address_space/advanced_tests.cpp
new file mode 100644
index 00000000..05bd7ae9
--- /dev/null
+++ b/test_conformance/generic_address_space/advanced_tests.cpp
@@ -0,0 +1,1031 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "base.h"
+
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <sstream>
+
+typedef enum {
+    ARG_TYPE_NONE,
+
+    ARG_TYPE_HOST_PTR,
+    ARG_TYPE_HOST_LOCAL,
+
+    ARG_TYPE_COARSE_GRAINED_SVM,
+    ARG_TYPE_FINE_GRAINED_BUFFER_SVM,
+    ARG_TYPE_FINE_GRAINED_SYSTEM_SVM,
+    ARG_TYPE_ATOMICS_SVM
+} ExtraKernelArgMemType;
+
+class CSVMWrapper {
+public:
+    CSVMWrapper() : ptr_(NULL), context_(NULL) { }
+
+    void Attach(cl_context context, void *ptr) {
+        context_ = context;
+        ptr_ = ptr;
+    }
+
+    ~CSVMWrapper() {
+        if (ptr_)
+            clSVMFree(context_, ptr_);
+    }
+
+    operator void *() {
+        return ptr_;
+    }
+
+private:
+    void *ptr_;
+    cl_context context_;
+};
+
+class CAdvancedTest : public CTest {
+public:
+    CAdvancedTest(const std::vector<std::string>& kernel) : CTest(), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
+
+    }
+
+    CAdvancedTest(const std::string& library, const std::vector<std::string>& kernel) : CTest(), _libraryCode(library), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
+
+    }
+
+    CAdvancedTest(const std::string& kernel, ExtraKernelArgMemType argType = ARG_TYPE_NONE) : CTest(), _kernels(1, kernel), _extraKernelArgMemType(argType) {
+
+    }
+
+    CAdvancedTest(const std::string& library, const std::string& kernel) : CTest(), _libraryCode(library), _kernels(1, kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
+
+    }
+
+    int PrintCompilationLog(cl_program program, cl_device_id device) {
+        cl_int error;
+        size_t buildLogSize = 0;
+
+        error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize);
+        test_error(error, "clGetProgramBuildInfo failed");
+
+        std::string log;
+        log.resize(buildLogSize);
+
+        error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, buildLogSize, &log[0], NULL);
+        test_error(error, "clGetProgramBuildInfo failed");
+
+        log_error("Build log for device is:\n------------\n");
+        log_error("%s\n", log.c_str() );
+        log_error( "\n----------\n" );
+
+        return CL_SUCCESS;
+    }
+
+    int ExecuteSubcase(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const std::string& src) {
+        cl_int error;
+
+        clProgramWrapper program, preCompiledLibrary, library, finalProgram;
+        clKernelWrapper kernel;
+
+        const char *srcPtr = src.c_str();
+
+        if (!_libraryCode.empty()) {
+            program = clCreateProgramWithSource(context, 1, &srcPtr, NULL, &error);
+            test_error(error, "clCreateProgramWithSource failed");
+
+            error = clCompileProgram(program, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
+
+            if (error != CL_SUCCESS)
+                PrintCompilationLog(program, deviceID);
+            test_error(error, "clCompileProgram failed");
+
+            const char *srcPtrLibrary = _libraryCode.c_str();
+
+            preCompiledLibrary = clCreateProgramWithSource(context, 1, &srcPtrLibrary, NULL, &error);
+            test_error(error, "clCreateProgramWithSource failed");
+
+            error = clCompileProgram(preCompiledLibrary, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
+
+            if (error != CL_SUCCESS)
+                PrintCompilationLog(preCompiledLibrary, deviceID);
+            test_error(error, "clCompileProgram failed");
+
+            library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &preCompiledLibrary, NULL, NULL, &error);
+            test_error(error, "clLinkProgram failed");
+
+            cl_program objects[] = { program, library };
+            finalProgram = clLinkProgram(context, 1, &deviceID, "", 2, objects, NULL, NULL, &error);
+            test_error(error, "clLinkProgram failed");
+
+            kernel = clCreateKernel(finalProgram, "testKernel", &error);
+            test_error(error, "clCreateKernel failed");
+        }
+
+        else {
+            if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
+                log_error("create_single_kernel_helper failed\n");
+                return -1;
+            }
+        }
+
+        size_t bufferSize = num_elements * sizeof(cl_uint);
+        clMemWrapper buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
+        test_error(error, "clSetKernelArg(0) failed");
+
+        // Warning: the order below is very important as SVM buffer cannot be free'd before corresponding mem_object
+        CSVMWrapper svmWrapper;
+        clMemWrapper extraArg;
+        std::vector<cl_uint> extraArgData(num_elements);
+        for (cl_uint i = 0; i < (cl_uint)num_elements; i++)
+            extraArgData[i] = i;
+
+        if (_extraKernelArgMemType != ARG_TYPE_NONE) {
+            if (_extraKernelArgMemType == ARG_TYPE_HOST_PTR) {
+                extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, &extraArgData[0], &error);
+                test_error(error, "clCreateBuffer failed");
+            }
+
+            else {
+                void *ptr = NULL;
+
+                switch (_extraKernelArgMemType) {
+                case ARG_TYPE_COARSE_GRAINED_SVM:
+                    ptr = clSVMAlloc(context, CL_MEM_READ_WRITE, bufferSize, 0);
+                    break;
+                case ARG_TYPE_FINE_GRAINED_BUFFER_SVM:
+                    ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE, bufferSize, 0);
+                    break;
+                case ARG_TYPE_FINE_GRAINED_SYSTEM_SVM:
+                    ptr = &extraArgData[0];
+                    break;
+                case ARG_TYPE_ATOMICS_SVM:
+                    ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS | CL_MEM_READ_WRITE, bufferSize, 0);
+                    break;
+                default:
+                    break;
+                }
+
+                if(_extraKernelArgMemType != ARG_TYPE_HOST_LOCAL) {
+                  if (!ptr) {
+                    log_error("Allocation failed\n");
+                    return -1;
+                  }
+
+                  if (_extraKernelArgMemType != ARG_TYPE_FINE_GRAINED_SYSTEM_SVM) {
+                  svmWrapper.Attach(context, ptr);
+                  }
+
+                  if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
+                    error = clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, ptr, bufferSize, 0, NULL, NULL);
+                    test_error(error, "clEnqueueSVMMap failed");
+                  }
+
+                  memcpy(ptr, &extraArgData[0], bufferSize);
+
+                  if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
+                    error = clEnqueueSVMUnmap(queue, ptr, 0, NULL, NULL);
+                    test_error(error, "clEnqueueSVMUnmap failed");
+                    clFinish(queue);
+                  }
+
+                  extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, ptr, &error);
+                  test_error(error, "clCreateBuffer from SVM buffer failed");
+                }
+            }
+
+            if(_extraKernelArgMemType == ARG_TYPE_HOST_LOCAL)
+              error = clSetKernelArg(kernel, 1, bufferSize, NULL);
+            else
+              error = clSetKernelArg(kernel, 1, sizeof(extraArg), &extraArg);
+
+
+            test_error(error, "clSetKernelArg(1) failed");
+        }
+
+        size_t globalWorkGroupSize = num_elements;
+        size_t localWorkGroupSize = 0;
+        error = get_max_common_work_group_size(context, kernel, globalWorkGroupSize, &localWorkGroupSize);
+        test_error(error, "Unable to get common work group size");
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkGroupSize, &localWorkGroupSize, 0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+
+        // verify results
+        std::vector<cl_uint> results(num_elements);
+
+        error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize, &results[0], 0, NULL, NULL);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        size_t passCount = std::count(results.begin(), results.end(), 1);
+        if (passCount != results.size()) {
+            std::vector<cl_uint>::iterator iter = std::find(results.begin(), results.end(), 0);
+            log_error("Verification on device failed at index %ld\n", std::distance(results.begin(), iter));
+            log_error("%ld out of %ld failed\n", (results.size()-passCount), results.size());
+            return -1;
+        }
+
+        return CL_SUCCESS;
+    }
+
+    int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+        cl_int result = CL_SUCCESS;
+
+        for (std::vector<std::string>::const_iterator it = _kernels.begin(); it != _kernels.end(); ++it) {
+            log_info("Executing subcase #%ld out of %ld\n", (it - _kernels.begin() + 1), _kernels.size());
+
+            result |= ExecuteSubcase(deviceID, context, queue, num_elements, *it);
+        }
+
+        return result;
+    }
+
+private:
+    const std::string _libraryCode;
+    const std::vector<std::string> _kernels;
+    const ExtraKernelArgMemType _extraKernelArgMemType;
+};
+
+int test_library_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string LIBRARY_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(float *floatp, float val) {"
+        NL "    if (!isFenceValid(get_fence(floatp)))"
+        NL "        return false;"
+        NL
+        NL "    if (*floatp != val)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL;
+
+    const std::string KERNEL_FUNCTION =
+        NL
+        NL "extern bool helperFunction(float *floatp, float val);"
+        NL
+        NL "__global float gfloat = 1.0f;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __global float *gfloatp = &gfloat;"
+        NL "    __local float lfloat;"
+        NL "    lfloat = 2.0f;"
+        NL "    __local float *lfloatp = &lfloat;"
+        NL "    float pfloat = 3.0f;"
+        NL "    __private float *pfloatp = &pfloat;"
+        NL
+        NL "    uint failures = 0;"
+        NL
+        NL "    failures += helperFunction(gfloatp, gfloat) ? 0 : 1;"
+        NL "    failures += helperFunction(lfloatp, lfloat) ? 0 : 1;"
+        NL "    failures += helperFunction(pfloatp, pfloat) ? 0 : 1;"
+        NL
+        NL "    results[tid] = failures == 0;"
+        NL "}"
+        NL;
+
+    CAdvancedTest test(LIBRARY_FUNCTION, KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_generic_variable_volatile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(float *floatp, float val) {"
+        NL "    if (!isFenceValid(get_fence(floatp)))"
+        NL "        return false;"
+        NL
+        NL "    if (*floatp != val)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    static __global float val;"
+        NL "    val = 0.1f;"
+        NL "    float * volatile ptr = &val;"
+        NL
+        NL "    results[tid] = helperFunction(ptr, val);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(float *floatp, float val) {"
+        NL "    if (!isFenceValid(get_fence(floatp)))"
+        NL "        return false;"
+        NL
+        NL "    if (*floatp != val)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local float val;"
+        NL "    val = 0.1f;"
+        NL "    float * ptr = &val;"
+        NL
+        NL "    results[tid] = helperFunction(ptr, val);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(float *floatp, float val) {"
+        NL "    if (!isFenceValid(get_fence(floatp)))"
+        NL "        return false;"
+        NL
+        NL "    if (*floatp != val)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __private float val;"
+        NL "    val = 0.1f;"
+        NL "    float * volatile ptr = &val;"
+        NL
+        NL "    results[tid] = helperFunction(ptr, val);"
+        NL "}"
+        NL
+    );
+
+    CAdvancedTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_generic_variable_const(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(const float *floatp, float val) {"
+        NL "    if (!isFenceValid(get_fence(floatp)))"
+        NL "        return false;"
+        NL
+        NL "    if (*floatp != val)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    const __private float val = 0.1f;"
+        NL "    const float * ptr = &val;"
+        NL
+        NL "    results[tid] = helperFunction(ptr, val);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(const float *floatp, float val) {"
+        NL "    if (!isFenceValid(get_fence(floatp)))"
+        NL "        return false;"
+        NL
+        NL "    if (*floatp != val)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    const static __global float val = 0.1f;"
+        NL "    const float * ptr = &val;"
+        NL
+        NL "    results[tid] = helperFunction(ptr, val);"
+        NL "}"
+        NL
+    );
+
+    CAdvancedTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_generic_variable_gentype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION_TEMPLATE = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "%s"
+        NL
+        NL "bool helperFunction(const %s *%sp, %s val) {"
+        NL "    if (!isFenceValid(get_fence(%sp)))"
+        NL "        return false;"
+        NL
+        NL "    return %s(*%sp == val);"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    %s %s val = (%s)1;"
+        NL "    %s * ptr = &val;"
+        NL
+        NL "    results[tid] = helperFunction(ptr, val);"
+        NL "}"
+        NL;
+/* Qualcomm fix: 12502  Gen Addr Space - Fix kernel for generic variable gentype (half) test
+   const std::string KERNEL_FUNCTION_TEMPLATE_HALF = common::CONFORMANCE_VERIFY_FENCE */
+    const std::string vector_sizes[] = { "", "2", "3", "4", "8", "16" };
+    const std::string gentype_base[] = { "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
+    const std::string gentype_others[] = { "bool", "size_t", "ptrdiff_t", "intptr_t", "uintptr_t" };
+
+    const std::string address_spaces[] = { "static __global", "__private" };
+
+    const std::string vector_cmp = "all";
+
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    // Add base types plus theirs vector variants
+    for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
+        for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
+            for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
+                char temp_kernel[1024];
+                const std::string fulltype = gentype_base[i] + vector_sizes[j];
+                sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
+                    "",
+                    fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
+                    (j > 0 ? vector_cmp.c_str() : ""),
+                    fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
+                    fulltype.c_str());
+
+                KERNEL_FUNCTIONS.push_back(temp_kernel);
+            }
+        }
+    }
+
+    const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
+
+    // Add double floating types if they are supported
+    if (is_extension_available(deviceID, "cl_khr_fp64")) {
+        for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
+            for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
+                char temp_kernel[1024];
+                const std::string fulltype = std::string("double") + vector_sizes[j];
+                sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
+                    cl_khr_fp64_pragma.c_str(),
+                    fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
+                    (j > 0 ? vector_cmp.c_str() : ""),
+                    fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
+                    fulltype.c_str());
+
+                KERNEL_FUNCTIONS.push_back(temp_kernel);
+            }
+        }
+    }
+/* Qualcomm fix: 12502  Gen Addr Space - Fix kernel for generic variable gentype (half) test */
+    const std::string cl_khr_fp16_pragma = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
+
+    // Add half floating types if they are supported
+    if (is_extension_available(deviceID, "cl_khr_fp16")) {
+        for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
+            for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
+                char temp_kernel[1024];
+                const std::string fulltype = std::string("half") + vector_sizes[j];
+                sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
+                    cl_khr_fp16_pragma.c_str(),
+                    fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
+                    (j > 0 ? vector_cmp.c_str() : ""),
+                    fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
+                    fulltype.c_str());
+/* Qualcomm fix: end */
+                KERNEL_FUNCTIONS.push_back(temp_kernel);
+            }
+        }
+    }
+
+    // Add other types that do not have vector variants
+    for (size_t i = 0; i < sizeof(gentype_others) / sizeof(gentype_others[0]); i++) {
+        for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
+            char temp_kernel[1024];
+            const std::string fulltype = gentype_others[i];
+            sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
+                "",
+                fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
+                "",
+                fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
+                fulltype.c_str());
+
+            KERNEL_FUNCTIONS.push_back(temp_kernel);
+        }
+    }
+
+    CAdvancedTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+void create_math_kernels(std::vector<std::string>& KERNEL_FUNCTIONS) {
+    const std::string KERNEL_FUNCTION_TEMPLATE =
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    const %s param1 = %s;"
+        NL "    %s param2_generic;"
+        NL "    %s param2_reference;"
+        NL "    %s * ptr = &param2_generic;"
+        NL "    %s return_value_generic;"
+        NL "    %s return_value_reference;"
+        NL
+        NL "    return_value_generic = %s(param1, ptr);"
+        NL "    return_value_reference = %s(param1, &param2_reference);"
+        NL
+        NL "    results[tid] = (%s(*ptr == param2_reference) && %s(return_value_generic == return_value_reference));"
+        NL "}"
+        NL;
+
+    typedef struct {
+        std::string bulitin_name;
+        std::string base_gentype;
+        std::string pointer_gentype;
+        std::string first_param_value;
+        std::string compare_fn;
+    } BuiltinDescriptor;
+
+    BuiltinDescriptor builtins[] = {
+        { "fract", "float", "float", "133.55f", "" },
+        { "frexp", "float2", "int2", "(float2)(24.12f, 99999.7f)", "all" },
+        { "frexp", "float", "int", "1234.5f", "" },
+        { "lgamma_r", "float2", "int2", "(float2)(1000.0f, 9999.5f)", "all" },
+        { "lgamma_r", "float", "int", "1000.0f", "" },
+        { "modf", "float", "float", "1234.56789f", "" },
+        { "sincos", "float", "float", "3.141592f", "" }
+    };
+
+    for (size_t i = 0; i < sizeof(builtins) / sizeof(builtins[0]); i++) {
+        char temp_kernel[1024];
+        sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(), builtins[i].base_gentype.c_str(), builtins[i].first_param_value.c_str(),
+            builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].base_gentype.c_str(),
+            builtins[i].base_gentype.c_str(), builtins[i].bulitin_name.c_str(), builtins[i].bulitin_name.c_str(),
+            builtins[i].compare_fn.c_str(), builtins[i].compare_fn.c_str());
+
+        KERNEL_FUNCTIONS.push_back(temp_kernel);
+    }
+
+    // add special case for remquo (3 params)
+    KERNEL_FUNCTIONS.push_back(
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    const float param1 = 1234.56789f;"
+        NL "    const float param2 = 123.456789f;"
+        NL "    int param3_generic;"
+        NL "    int param3_reference;"
+        NL "    int * ptr = &param3_generic;"
+        NL "    float return_value_generic;"
+        NL "    float return_value_reference;"
+        NL
+        NL "    return_value_generic = remquo(param1, param2, ptr);"
+        NL "    return_value_reference = remquo(param1, param2, &param3_reference);"
+        NL
+        NL "    results[tid] = (*ptr == param3_reference && return_value_generic == return_value_reference);"
+        NL "}"
+        NL
+    );
+}
+
+std::string get_default_data_for_type(const std::string& type) {
+    std::string result;
+
+    if (type == "float") {
+        for (int i = 0; i < 10; i++) {
+            for (int j = 0; j < 10; j++) {
+                char temp[10];
+                sprintf(temp, "%d.%df, ", i, j);
+                result += std::string(temp);
+            }
+        }
+    }
+
+    else if (type == "double") {
+        for (int i = 0; i < 10; i++) {
+            for (int j = 0; j < 10; j++) {
+                char temp[10];
+                sprintf(temp, "%d.%d, ", i, j);
+                result += std::string(temp);
+            }
+        }
+    }
+
+    else {
+        for (int i = 0; i < 100; i++) {
+            char temp[10];
+            sprintf(temp, "%d, ", i);
+            result += std::string(temp);
+        }
+    }
+
+    return result;
+}
+
+void create_vload_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
+    const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
+        NL
+        NL "%s"
+        NL "__global %s data[] = { %s };"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    // Testing: %s"
+        NL "    const %s * ptr = data;"
+        NL "    %s%s result_generic = vload%s(2, ptr);"
+        NL "    %s%s result_reference = vload%s(2, data);"
+        NL
+        NL "    results[tid] = all(result_generic == result_reference);"
+        NL "}"
+        NL;
+
+    const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
+        NL
+        NL "%s"
+        NL "__constant %s to_copy_from[] = { %s };"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local %s data[100];"
+        NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
+        NL "        data[i] = to_copy_from[i];"
+        NL
+        NL "    const %s * ptr = data;"
+        NL "    %s%s result_generic = vload%s(2, ptr);"
+        NL "    %s%s result_reference = vload%s(2, data);"
+        NL
+        NL "    results[tid] = all(result_generic == result_reference);"
+        NL "}"
+        NL;
+
+    const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
+        NL
+        NL "%s"
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    %s data[] = { %s };"
+        NL "    // Testing: %s"
+        NL "    const %s * ptr = data;"
+        NL "    %s%s result_generic = vload%s(2, ptr);"
+        NL "    %s%s result_reference = vload%s(2, data);"
+        NL
+        NL "    results[tid] = all(result_generic == result_reference);"
+        NL "}"
+        NL;
+
+    const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
+    const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
+    const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
+
+    const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
+
+    for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
+        const char *pragma_str = "";
+
+        if (i == 0) {
+            if (!is_extension_available(deviceID, "cl_khr_fp64"))
+                continue;
+            else
+                pragma_str = cl_khr_fp64_pragma.c_str();
+        }
+
+        for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
+            for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
+                char temp_kernel[4098];
+                sprintf(temp_kernel, kernel_variants[k].c_str(),
+                    pragma_str,
+                    gentype_base[i].c_str(),
+                    get_default_data_for_type(gentype_base[i]).c_str(),
+                    gentype_base[i].c_str(),
+                    gentype_base[i].c_str(),
+                    gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str(),
+                    gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str()
+                );
+
+                KERNEL_FUNCTIONS.push_back(temp_kernel);
+            }
+        }
+    }
+}
+
+void create_vstore_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
+    const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
+        NL
+        NL "%s"
+        NL "__global %s data_generic[] = { %s };"
+        NL "__global %s data_reference[] = { %s };"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    %s%s input = (%s%s)(1);"
+        NL "    %s * ptr = data_generic;"
+        NL
+        NL "    vstore%s(input, 2, ptr);"
+        NL "    vstore%s(input, 2, data_reference);"
+        NL
+        NL "    bool result = true;"
+        NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
+        NL "        if (data_generic[i] != data_reference[i])"
+        NL "            result = false;"
+        NL
+        NL "    results[tid] = result;"
+        NL "}"
+        NL;
+
+    const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
+        NL
+        NL "%s"
+        NL "__constant %s to_copy_from[] = { %s };"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local %s data_generic[100];"
+        NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
+        NL "        data_generic[i] = to_copy_from[i];"
+        NL
+        NL "    __local %s data_reference[100];"
+        NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
+        NL "        data_reference[i] = to_copy_from[i];"
+        NL
+        NL "    %s%s input = (%s%s)(1);"
+        NL "    %s * ptr = data_generic;"
+        NL
+        NL "    vstore%s(input, 2, ptr);"
+        NL "    vstore%s(input, 2, data_reference);"
+        NL
+        NL "    work_group_barrier(CLK_LOCAL_MEM_FENCE);"
+        NL
+        NL "    bool result = true;"
+        NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
+        NL "        if (data_generic[i] != data_reference[i])"
+        NL "            result = false;"
+        NL
+        NL "    results[tid] = result;"
+        NL "}"
+        NL;
+
+    const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
+        NL
+        NL "%s"
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __private %s data_generic[] = { %s };"
+        NL "    __private %s data_reference[] = { %s };"
+        NL
+        NL "    %s%s input = (%s%s)(1);"
+        NL "    %s * ptr = data_generic;"
+        NL
+        NL "    vstore%s(input, 2, ptr);"
+        NL "    vstore%s(input, 2, data_reference);"
+        NL
+        NL "    bool result = true;"
+        NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
+        NL "        if (data_generic[i] != data_reference[i])"
+        NL "            result = false;"
+        NL
+        NL "    results[tid] = result;"
+        NL "}"
+        NL;
+
+    const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
+    const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
+    const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
+
+    const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
+
+    for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
+        const char *pragma_str = "";
+        if (i == 0) {
+            if (!is_extension_available(deviceID, "cl_khr_fp64"))
+                continue;
+            else
+                pragma_str = cl_khr_fp64_pragma.c_str();
+        }
+
+
+        for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
+            for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
+                char temp_kernel[4098];
+
+                switch (k) {
+                    case 0: // global template
+                    case 2: // private template
+                        sprintf(temp_kernel, kernel_variants[k].c_str(),
+                            pragma_str,
+                            gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
+                            gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
+                            gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
+                            gentype_base[i].c_str(),
+                            vector_sizes[j].c_str(),
+                            vector_sizes[j].c_str()
+                        );
+                        break;
+
+                    case 1: // local template
+                        sprintf(temp_kernel, kernel_variants[k].c_str(),
+                            pragma_str,
+                            gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
+                            gentype_base[i].c_str(),
+                            gentype_base[i].c_str(),
+                            gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
+                            gentype_base[i].c_str(),
+                            vector_sizes[j].c_str(),
+                            vector_sizes[j].c_str()
+                        );
+                        break;
+                }
+
+                KERNEL_FUNCTIONS.push_back(temp_kernel);
+            }
+        }
+    }
+}
+
+int test_builtin_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    create_math_kernels(KERNEL_FUNCTIONS);
+    create_vload_kernels(KERNEL_FUNCTIONS, deviceID);
+    create_vstore_kernels(KERNEL_FUNCTIONS, deviceID);
+
+    CAdvancedTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_generic_advanced_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    KERNEL_FUNCTIONS.push_back(
+        NL
+        NL "__global char arr[16] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 };"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    const int * volatile ptr = (const int *)arr;"
+        NL
+        NL "    results[tid] = (ptr[0] == 0x00000000) && (ptr[1] == 0x01010101) && (ptr[2] == 0x02020202) && (ptr[3] == 0x03030303);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local int i;"
+        NL "    i = 0x11112222;"
+        NL "    short *ptr = (short *)&i;"
+        NL "    local int *lptr = (local int *)ptr;"
+        NL
+        NL "    results[tid] = (lptr == &i) && (*lptr == i);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int i = 0x11112222;"
+        NL
+        NL "    void *ptr = &i;"
+        NL "    int copy = *((int *)ptr);"
+        NL
+        NL "    results[tid] = (copy == i);"
+        NL "}"
+        NL
+    );
+
+    CAdvancedTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    cl_int result = CL_SUCCESS;
+
+    const std::string GLOBAL_KERNEL_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(uint *ptr, uint tid) {"
+        NL "    if (!isFenceValid(get_fence(ptr)))"
+        NL "        return false;"
+        NL
+        NL "    if (*ptr != tid)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results, __global uint *buf) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    results[tid] = helperFunction(&buf[tid], tid);"
+        NL "}"
+        NL;
+
+    const std::string LOCAL_KERNEL_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(uint *ptr, uint tid) {"
+        NL "    if (!isFenceValid(get_fence(ptr)))"
+        NL "        return false;"
+        NL
+        NL "    if (*ptr != tid)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results, __local uint *buf) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    if (get_local_id(0) == 0) {"
+        NL "        for (uint i = 0; i < get_local_size(0); ++i) {"
+        NL "            uint idx = get_local_size(0) * get_group_id(0) + i;"
+        NL "            buf[idx] = idx;"
+        NL "        }"
+        NL "    }"
+        NL
+        NL "    work_group_barrier(CLK_LOCAL_MEM_FENCE);"
+        NL "    results[tid] = helperFunction(&buf[tid], tid);"
+        NL "}"
+        NL;
+
+    CAdvancedTest test_global_ptr(GLOBAL_KERNEL_FUNCTION, ARG_TYPE_HOST_PTR);
+    result |= test_global_ptr.Execute(deviceID, context, queue, num_elements);
+
+    CAdvancedTest test_local_ptr(LOCAL_KERNEL_FUNCTION, ARG_TYPE_HOST_LOCAL);
+    result |= test_local_ptr.Execute(deviceID, context, queue, num_elements / 64);
+
+    /* Test SVM capabilities and select matching tests */
+    cl_device_svm_capabilities caps;
+
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, NULL);
+    test_error(error, "clGetDeviceInfo(CL_DEVICE_SVM_CAPABILITIES) failed");
+
+    if (caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) {
+        CAdvancedTest test_global_svm_ptr(GLOBAL_KERNEL_FUNCTION, ARG_TYPE_COARSE_GRAINED_SVM);
+        result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
+    }
+
+    if (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
+        CAdvancedTest test_global_svm_ptr(GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_BUFFER_SVM);
+        result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
+    }
+
+    if (caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) {
+        CAdvancedTest test_global_svm_ptr(GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_SYSTEM_SVM);
+        result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
+    }
+
+    if (caps & CL_DEVICE_SVM_ATOMICS) {
+        CAdvancedTest test_global_svm_ptr(GLOBAL_KERNEL_FUNCTION, ARG_TYPE_ATOMICS_SVM);
+        result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
+    }
+
+    return result;
+}
diff --git a/test_conformance/generic_address_space/base.h b/test_conformance/generic_address_space/base.h
new file mode 100644
index 00000000..5ea71055
--- /dev/null
+++ b/test_conformance/generic_address_space/base.h
@@ -0,0 +1,39 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+
+#include <string>
+
+class CTest  {
+public:
+    virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) = 0;
+};
+
+#define NL "\n"
+
+namespace common {
+    static const std::string CONFORMANCE_VERIFY_FENCE =
+        NL
+        NL "// current spec says get_fence can return any valid fence"
+        NL "bool isFenceValid(cl_mem_fence_flags fence) {"
+        NL "    if ((fence == 0) || (fence == CLK_GLOBAL_MEM_FENCE) || (fence == CLK_LOCAL_MEM_FENCE) "
+        NL "        || (fence == (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE)))"
+        NL "        return true;"
+        NL "    else"
+        NL "        return false;"
+        NL "}"
+        NL;
+}
diff --git a/test_conformance/generic_address_space/basic_tests.cpp b/test_conformance/generic_address_space/basic_tests.cpp
new file mode 100644
index 00000000..ff42edd9
--- /dev/null
+++ b/test_conformance/generic_address_space/basic_tests.cpp
@@ -0,0 +1,877 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "base.h"
+
+#include <string>
+#include <vector>
+#include <algorithm>
+
+class CBasicTest : CTest {
+public:
+    CBasicTest(const std::vector<std::string>& kernel) : CTest(), _kernels(kernel) {
+
+    }
+
+    CBasicTest(const std::string& kernel) : CTest(), _kernels(1, kernel) {
+
+    }
+
+    int ExecuteSubcase(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const std::string& src) {
+        cl_int error;
+
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        const char *srcPtr = src.c_str();
+
+        if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
+            log_error("create_single_kernel_helper failed");
+            return -1;
+        }
+
+        size_t bufferSize = num_elements * sizeof(cl_uint);
+        clMemWrapper buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
+        test_error(error, "clSetKernelArg failed");
+
+        size_t globalWorkGroupSize = num_elements;
+        size_t localWorkGroupSize = 0;
+        error = get_max_common_work_group_size(context, kernel, globalWorkGroupSize, &localWorkGroupSize);
+        test_error(error, "Unable to get common work group size");
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkGroupSize, &localWorkGroupSize, 0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+
+        // verify results
+        std::vector<cl_uint> results(num_elements);
+
+        error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize, &results[0], 0, NULL, NULL);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        size_t passCount = std::count(results.begin(), results.end(), 1);
+        if (passCount != results.size()) {
+            std::vector<cl_uint>::iterator iter = std::find(results.begin(), results.end(), 0);
+            log_error("Verification on device failed at index %ld\n", std::distance(results.begin(), iter));
+            log_error("%ld out of %ld failed\n", (results.size()-passCount), results.size());
+            return -1;
+        }
+
+        return CL_SUCCESS;
+    }
+
+    int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+        cl_int result = CL_SUCCESS;
+
+        for (std::vector<std::string>::const_iterator it = _kernels.begin(); it != _kernels.end(); ++it) {
+            log_info("Executing subcase #%ld out of %ld\n", (it - _kernels.begin() + 1), _kernels.size());
+
+            result |= ExecuteSubcase(deviceID, context, queue, num_elements, *it);
+        }
+
+        return result;
+    }
+
+private:
+    const std::vector<std::string> _kernels;
+};
+
+int test_function_params_get_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL "__global uchar guchar = 3;"
+        NL
+        NL "bool helperFunction(int *intp, float *floatp, uchar *ucharp, ushort *ushortp, long *longp) {"
+        NL "    if (!isFenceValid(get_fence(intp)))"
+        NL "        return false;"
+        NL "    if (!isFenceValid(get_fence(floatp)))"
+        NL "        return false;"
+        NL "    if (!isFenceValid(get_fence(ucharp)))"
+        NL "        return false;"
+        NL "    if (!isFenceValid(get_fence(ushortp)))"
+        NL "        return false;"
+        NL "    if (!isFenceValid(get_fence(longp)))"
+        NL "        return false;"
+        NL
+        NL "    if (*intp != 1 || *floatp != 2.0f || *ucharp != 3 || *ushortp != 4 || *longp != 5)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local float lfloat;"
+        NL "    lfloat = 2.0f;"
+        NL "    __local ushort lushort;"
+        NL "    lushort = 4;"
+        NL "    long plong = 5;"
+        NL
+        NL "    __global int *gintp = &gint;"
+        NL "    __local float *lfloatp = &lfloat;"
+        NL "    __global uchar *gucharp = &guchar;"
+        NL "    __local ushort *lushortp = &lushort;"
+        NL "    __private long *plongp = &plong;"
+        NL
+        NL "    results[tid] = helperFunction(gintp, lfloatp, gucharp, lushortp, plongp);"
+        NL "}"
+        NL;
+
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_function_params_to_address_space(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION =
+        NL
+        NL "__global int gint = 1;"
+        NL "__global uchar guchar = 3;"
+        NL
+        NL "bool helperFunction(int *gintp, float *lfloatp, uchar *gucharp, ushort *lushortp, long *plongp) {"
+        NL "    if (to_global(gintp) == NULL)"
+        NL "        return false;"
+        NL "    if (to_local(lfloatp) == NULL)"
+        NL "        return false;"
+        NL "    if (to_global(gucharp) == NULL)"
+        NL "        return false;"
+        NL "    if (to_local(lushortp) == NULL)"
+        NL "        return false;"
+        NL "    if (to_private(plongp) == NULL)"
+        NL "        return false;"
+        NL
+        NL "    if (*gintp != 1 || *lfloatp != 2.0f || *gucharp != 3 || *lushortp != 4 || *plongp != 5)"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local float lfloat;"
+        NL "    lfloat = 2.0f;"
+        NL "    __local ushort lushort;"
+        NL "    lushort = 4;"
+        NL "    long plong = 5;"
+        NL
+        NL "    __global int *gintp = &gint;"
+        NL "    __local float *lfloatp = &lfloat;"
+        NL "    __global uchar *gucharp = &guchar;"
+        NL "    __local ushort *lushortp = &lushort;"
+        NL "    __private long *plongp = &plong;"
+        NL
+        NL "    results[tid] = helperFunction(gintp, lfloatp, gucharp, lushortp, plongp);"
+        NL "}"
+        NL;
+
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_variable_get_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local ushort lushort;"
+        NL "    lushort = 2;"
+        NL "    float pfloat = 3.0f;"
+        NL
+        NL "    // tested pointers"
+        NL "    __global int *gintp = &gint;"
+        NL "    __local ushort *lushortp = &lushort;"
+        NL "    __private float *pfloatp = &pfloat;"
+        NL
+        NL "    int failures = 0;"
+        NL "    if (!isFenceValid(get_fence(gintp)))"
+        NL "        failures++;"
+        NL "    if (!isFenceValid(get_fence(lushortp)))"
+        NL "        failures++;"
+        NL "    if (!isFenceValid(get_fence(pfloatp)))"
+        NL "        failures++;"
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL;
+
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_variable_to_address_space(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION =
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local ushort lushort;"
+        NL "    lushort = 2;"
+        NL "    float pfloat = 3.0f;"
+        NL
+        NL "    // tested pointers"
+        NL "    __global int * gintp = &gint;"
+        NL "    __local ushort *lushortp = &lushort;"
+        NL "    __private float *pfloatp = &pfloat;"
+        NL
+        NL "    int failures = 0;"
+        NL "    if (to_global(gintp) == NULL)"
+        NL "        failures++;"
+        NL "    if (to_local(lushortp) == NULL)"
+        NL "        failures++;"
+        NL "    if (to_private(pfloatp) == NULL)"
+        NL "        failures++;"
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL;
+
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    // pointers to global, local or private are implicitly convertible to generic
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL "    int pint = 3;"
+        NL
+        NL "    // count mismatches with expected fence types"
+        NL "    int failures = 0;"
+        NL
+        NL "    // tested pointer"
+        NL "    // generic can be reassigned to different named address spaces"
+        NL "    int * intp;"
+        NL
+        NL "    intp = &gint;"
+        NL "    failures += !(isFenceValid(get_fence(intp)));"
+        NL "    failures += !(to_global(intp));"
+        NL "    failures += (*intp != 1);"
+        NL
+        NL "    intp = &lint;"
+        NL "    failures += !(isFenceValid(get_fence(intp)));"
+        NL "    failures += !(to_local(intp));"
+        NL "    failures += (*intp != 2);"
+        NL
+        NL "    intp = &pint;"
+        NL "    failures += !(isFenceValid(get_fence(intp)));"
+        NL "    failures += !(to_private(intp));"
+        NL "    failures += (*intp != 3);"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    // converting from a generic pointer to a named address space is legal only with explicit casting
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL "    int pint = 3;"
+        NL
+        NL "    // count mismatches with expected fence types"
+        NL "    int failures = 0;"
+        NL
+        NL "    // tested pointer"
+        NL "    // generic can be reassigned to different named address spaces"
+        NL "    int * intp;"
+        NL
+        NL "    intp = &gint;"
+        NL "    global int * gintp = (global int *)intp;"
+        NL "    failures += !(isFenceValid(get_fence(gintp)));"
+        NL "    failures += !(to_global(gintp));"
+        NL "    failures += (*gintp != 1);"
+        NL
+        NL "    intp = &lint;"
+        NL "    local int * lintp = (local int *)intp;"
+        NL "    failures += !(isFenceValid(get_fence(lintp)));"
+        NL "    failures += !(to_local(lintp));"
+        NL "    failures += (*lintp != 2);"
+        NL
+        NL "    intp = &pint;"
+        NL "    private int * pintp = (private int *)intp;"
+        NL "    failures += !(isFenceValid(get_fence(pintp)));"
+        NL "    failures += !(to_private(pintp));"
+        NL "    failures += (*pintp != 3);"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    CBasicTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_conditional_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr;"
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL
+        NL "    if (tid % 2)"
+        NL "        ptr = &gint;"
+        NL "    else"
+        NL "        ptr = &lint;"
+        NL
+        NL "    barrier(CLK_GLOBAL_MEM_FENCE);"
+        NL
+        NL "    if (tid % 2)"
+        NL "        results[tid] = (isFenceValid(get_fence(ptr)) && to_global(ptr) && *ptr == 1);"
+        NL "    else"
+        NL "        results[tid] = (isFenceValid(get_fence(ptr)) && to_local(ptr) && *ptr == 2);"
+        NL "}"
+        NL;
+
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_chain_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "int f4(int val, int *ptr) { return (isFenceValid(get_fence(ptr)) && val == *ptr) ? 0 : 1; }"
+        NL "int f3(int val, int *ptr) { return f4(val, ptr); }"
+        NL "int f2(int *ptr, int val) { return f3(val, ptr); }"
+        NL "int f1(int *ptr, int val) { return f2(ptr, val); }"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr;"
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL "    __private int pint = 3;"
+        NL
+        NL "    int failures = 0;"
+        NL "    failures += f1(&gint, gint);"
+        NL "    failures += f1(&lint, lint);"
+        NL "    failures += f1(&pint, pint);"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL;
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_ternary_operator_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr;"
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL
+        NL "    ptr = (tid % 2) ? &gint : (int *)&lint; // assuming there is an implicit conversion from named address space to generic"
+        NL
+        NL "    barrier(CLK_GLOBAL_MEM_FENCE);"
+        NL
+        NL "    if (tid % 2)"
+        NL "        results[tid] = (isFenceValid(get_fence(ptr)) && to_global(ptr) && *ptr == gint);"
+        NL "    else"
+        NL "        results[tid] = (isFenceValid(get_fence(ptr)) && to_local(ptr) && *ptr == lint);"
+        NL "}"
+        NL;
+
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_language_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    // implicit private struct
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL "    __private int pint = 3;"
+        NL
+        NL "    struct {"
+        NL "        __global int *gintp;"
+        NL "        __local  int *lintp;"
+        NL "        __private int *pintp;"
+        NL "    } structWithPointers;"
+        NL
+        NL "    structWithPointers.gintp = &gint;"
+        NL "    structWithPointers.lintp = &lint;"
+        NL "    structWithPointers.pintp = &pint;"
+        NL
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.gintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.lintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.pintp)));"
+        NL
+        NL "    failures += !(to_global(structWithPointers.gintp));"
+        NL "    failures += !(to_local(structWithPointers.lintp));"
+        NL "    failures += !(to_private(structWithPointers.pintp));"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    // explicit __private struct
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL "    __private int pint = 3;"
+        NL
+        NL "    typedef struct {"
+        NL "        __global int * gintp;"
+        NL "        __local  int * lintp;"
+        NL "        __private int * pintp;"
+        NL "    } S;"
+        NL
+        NL "    __private S structWithPointers;"
+        NL "    structWithPointers.gintp = &gint;"
+        NL "    structWithPointers.lintp = &lint;"
+        NL "    structWithPointers.pintp = &pint;"
+        NL
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.gintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.lintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.pintp)));"
+        NL
+        NL "    failures += !(to_global(structWithPointers.gintp));"
+        NL "    failures += !(to_local(structWithPointers.lintp));"
+        NL "    failures += !(to_private(structWithPointers.pintp));"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL "    __private int pint = 3;"
+        NL
+        NL "    typedef struct {"
+        NL "        __global int * gintp;"
+        NL "        __local  int * lintp;"
+        NL "        __private int * pintp;"
+        NL "    } S;"
+        NL
+        NL "    __local S structWithPointers;"
+        NL "    structWithPointers.gintp = &gint;"
+        NL "    structWithPointers.lintp = &lint;"
+        NL "    structWithPointers.pintp = &pint;"
+        NL
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.gintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.lintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.pintp)));"
+        NL
+        NL "    failures += !(to_global(structWithPointers.gintp));"
+        NL "    failures += !(to_local(structWithPointers.lintp));"
+        NL "    failures += !(to_private(structWithPointers.pintp));"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "typedef struct {"
+        NL "    __global int *gintp;"
+        NL "    __local  int *lintp;"
+        NL "    __private int *pintp;"
+        NL "} S;"
+        NL
+        NL "__global S structWithPointers;"
+        NL "__global int gint = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int lint;"
+        NL "    lint = 2;"
+        NL "    __private int pint = 3;"
+        NL
+        NL "    structWithPointers.gintp = &gint;"
+        NL "    structWithPointers.lintp = &lint;"
+        NL "    structWithPointers.pintp = &pint;"
+        NL
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.gintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.lintp)));"
+        NL "    failures += !(isFenceValid(get_fence(structWithPointers.pintp)));"
+        NL
+        NL "    failures += !(to_global(structWithPointers.gintp));"
+        NL "    failures += !(to_local(structWithPointers.lintp));"
+        NL "    failures += !(to_private(structWithPointers.pintp));"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    CBasicTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_language_union(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int g = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int l;"
+        NL "    l = 2;"
+        NL "    int p = 3;"
+        NL
+        NL "    union {"
+        NL "        __global int *gintp;"
+        NL "        __local  int *lintp;"
+        NL "        __private int *pintp;"
+        NL "    } u;"
+        NL
+        NL "    u.gintp = &g;"
+        NL "    failures += !(isFenceValid(get_fence(u.gintp)));"
+        NL "    failures += !to_global(u.gintp);"
+        NL "    failures += (*(u.gintp) != 1);"
+        NL
+        NL "    u.lintp = &l;"
+        NL "    failures += !(isFenceValid(get_fence(u.lintp)));"
+        NL "    failures += !to_local(u.lintp);"
+        NL "    failures += (*(u.lintp) != 2);"
+        NL
+        NL "    u.pintp = &p;"
+        NL "    failures += !(isFenceValid(get_fence(u.pintp)));"
+        NL "    failures += !to_private(u.pintp);"
+        NL "    failures += (*(u.pintp) != 3);"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "__global int g = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int l;"
+        NL "    l = 2;"
+        NL "    int p = 3;"
+        NL
+        NL "    typedef union {"
+        NL "        __global int * gintp;"
+        NL "        __local  int * lintp;"
+        NL "        __private int * pintp;"
+        NL "    } U;"
+        NL
+        NL "    __local U u;"
+        NL
+        NL "    u.gintp = &g;"
+        NL "    failures += !(isFenceValid(get_fence(u.gintp)));"
+        NL "    failures += !to_global(u.gintp);"
+        NL "    failures += (*(u.gintp) != 1);"
+        NL
+        NL "    u.lintp = &l;"
+        NL "    failures += !(isFenceValid(get_fence(u.lintp)));"
+        NL "    failures += !to_local(u.lintp);"
+        NL "    failures += (*(u.lintp) != 2);"
+        NL
+        NL "    u.pintp = &p;"
+        NL "    failures += !(isFenceValid(get_fence(u.pintp)));"
+        NL "    failures += !to_private(u.pintp);"
+        NL "    failures += (*(u.pintp) != 3);"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "typedef union {"
+        NL "    __global int * gintp;"
+        NL "    __local  int * lintp;"
+        NL "    __private int * pintp;"
+        NL "} U;"
+        NL
+        NL "__global U u;"
+        NL "__global int g = 1;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    // for global unions only one thread should modify union's content"
+        NL "    if (tid != 0) {"
+        NL "        results[tid] = 1;"
+        NL "        return;"
+        NL "    }"
+        NL
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int l;"
+        NL "    l = 2;"
+        NL "    int p = 3;"
+        NL
+        NL "    u.gintp = &g;"
+        NL "    failures += !(isFenceValid(get_fence(u.gintp)));"
+        NL "    failures += !to_global(u.gintp);"
+        NL "    failures += (*(u.gintp) != 1);"
+        NL
+        NL "    u.lintp = &l;"
+        NL "    failures += !(isFenceValid(get_fence(u.lintp)));"
+        NL "    failures += !to_local(u.lintp);"
+        NL "    failures += (*(u.lintp) != 2);"
+        NL
+        NL "    u.pintp = &p;"
+        NL "    failures += !(isFenceValid(get_fence(u.pintp)));"
+        NL "    failures += !to_private(u.pintp);"
+        NL "    failures += (*(u.pintp) != 3);"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    );
+
+    CBasicTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_multiple_calls_same_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    const std::string KERNEL_FUNCTION =
+        NL
+        NL "int shift2(const int *ptr, int arg) {"
+        NL "    return *ptr << arg;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL "    int failures = 0;"
+        NL
+        NL "    __local int val;"
+        NL "    val = get_group_id(0);"
+        NL
+        NL "    for (int i = 0; i < 5; i++) {"
+        NL "        if (shift2(&val, i) != (val << i))"
+        NL "            failures++;"
+        NL "    }"
+        NL
+        NL "    for (int i = 10; i > 5; i--) {"
+        NL "        if (shift2(&val, i) != (val << i))"
+        NL "            failures++;"
+        NL "    }"
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL;
+
+    CBasicTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
+
+int test_compare_pointers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    std::vector<std::string> KERNEL_FUNCTIONS;
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr = NULL;"
+        NL
+        NL "    results[tid] = (ptr == NULL);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr = NULL;"
+        NL "    __global int *gptr = NULL;"
+        NL
+        NL "    results[tid] = (ptr == gptr);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr = NULL;"
+        NL "    __local int *lptr = NULL;"
+        NL
+        NL "    results[tid] = (ptr == lptr);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr = NULL;"
+        NL "    __private int *pptr = NULL;"
+        NL
+        NL "    results[tid] = (ptr == pptr);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr = NULL;"
+        NL "    __local int *lptr = NULL;"
+        NL "    __global int *gptr = NULL;"
+        NL
+        NL "    ptr = lptr;"
+        NL
+        NL "    results[tid] = (gptr == ptr) && (lptr == ptr);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int some_value = 7;"
+        NL "    int *ptr = NULL;"
+        NL "    __private int *pptr = &some_value;"
+        NL
+        NL "    results[tid] = (ptr != pptr);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __local int some_value;"
+        NL "    some_value = 7;"
+        NL "    int *ptr = NULL;"
+        NL "    __local int *lptr = &some_value;"
+        NL
+        NL "    results[tid] = (ptr != lptr);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__global int some_value = 7;"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr = NULL;"
+        NL "    __global int *gptr = &some_value;"
+        NL
+        NL "    results[tid] = (ptr != gptr);"
+        NL "}"
+        NL
+    );
+
+    KERNEL_FUNCTIONS.push_back(
+        NL "__global int arr[5] = { 0, 1, 2, 3, 4 };"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    int *ptr = &arr[1];"
+        NL "    __global int *gptr = &arr[3];"
+        NL
+        NL "    results[tid] = (gptr >= ptr);"
+        NL "}"
+        NL
+    );
+
+    CBasicTest test(KERNEL_FUNCTIONS);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
diff --git a/test_conformance/generic_address_space/main.cpp b/test_conformance/generic_address_space/main.cpp
new file mode 100644
index 00000000..e59fd49e
--- /dev/null
+++ b/test_conformance/generic_address_space/main.cpp
@@ -0,0 +1,106 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+
+#include <iostream>
+
+// basic tests
+extern int test_function_params_get_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_function_params_to_address_space(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_variable_get_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_variable_to_address_space(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_conditional_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_chain_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ternary_operator_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_language_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_language_union(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_multiple_calls_same_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_compare_pointers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+// advanced tests
+extern int test_library_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_generic_variable_volatile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_generic_variable_const(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_generic_variable_gentype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_builtin_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_generic_advanced_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_max_number_of_params(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+basefn basefn_list[] = {
+    // basic tests
+    test_function_params_get_fence,
+    test_function_params_to_address_space,
+    test_variable_get_fence,
+    test_variable_to_address_space,
+    test_casting,
+    test_conditional_casting,
+    test_chain_casting,
+    test_ternary_operator_casting,
+    test_language_struct,
+    test_language_union,
+    test_multiple_calls_same_function,
+    test_compare_pointers,
+    // advanced tests
+    test_library_function,
+    test_generic_variable_volatile,
+    test_generic_variable_const,
+    test_generic_variable_gentype,
+    test_builtin_functions,
+    test_generic_advanced_casting,
+    test_generic_ptr_to_host_mem,
+    test_max_number_of_params,
+};
+
+const char *basefn_names[] = {
+    //basic tests
+    "function_get_fence",
+    "function_to_address_space",
+    "variable_get_fence",
+    "variable_to_address_space",
+    "casting",
+    "conditional_casting",
+    "chain_casting",
+    "ternary_operator_casting",
+    "language_struct",
+    "language_union",
+    "multiple_calls_same_function",
+    "compare_pointers",
+    // advanced tests
+    "library_function",
+    "generic_variable_volatile",
+    "generic_variable_const",
+    "generic_variable_gentype",
+    "builtin_functions",
+    "generic_advanced_casting",
+    "generic_ptr_to_host_mem",
+    "max_number_of_params",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int num_fns = sizeof(basefn_names) / sizeof(char *);
+
+/*
+    Generic Address Space
+    Tests for unnamed generic address space. This feature allows developers to create single generic functions
+    that are able to operate on pointers from various address spaces instead of writing separate instances for every combination.
+*/
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness(argc, argv, num_fns, basefn_list, basefn_names, false, false, NULL);
+}
diff --git a/test_conformance/generic_address_space/stress_tests.cpp b/test_conformance/generic_address_space/stress_tests.cpp
new file mode 100644
index 00000000..58ceb514
--- /dev/null
+++ b/test_conformance/generic_address_space/stress_tests.cpp
@@ -0,0 +1,173 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/mt19937.h"
+#include "base.h"
+
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <sstream>
+
+class CStressTest : public CTest {
+public:
+    CStressTest(const std::vector<std::string>& kernel) : CTest(), _kernels(kernel) {
+
+    }
+
+    CStressTest(const std::string& kernel) : CTest(), _kernels(1, kernel) {
+
+    }
+
+    int ExecuteSubcase(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const std::string& src) {
+        cl_int error;
+
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        const char *srcPtr = src.c_str();
+
+        if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
+            log_error("create_single_kernel_helper failed");
+            return -1;
+        }
+
+        size_t bufferSize = num_elements * sizeof(cl_uint);
+        clMemWrapper buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
+        test_error(error, "clSetKernelArg failed");
+
+        size_t globalWorkGroupSize = num_elements;
+        size_t localWorkGroupSize = 0;
+        error = get_max_common_work_group_size(context, kernel, globalWorkGroupSize, &localWorkGroupSize);
+        test_error(error, "Unable to get common work group size");
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkGroupSize, &localWorkGroupSize, 0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed");
+
+        // verify results
+        std::vector<cl_uint> results(num_elements);
+
+        error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize, &results[0], 0, NULL, NULL);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        size_t passCount = std::count(results.begin(), results.end(), 1);
+        if (passCount != results.size()) {
+            std::vector<cl_uint>::iterator iter = std::find(results.begin(), results.end(), 0);
+            log_error("Verification on device failed at index %ld\n", std::distance(results.begin(), iter));
+            log_error("%ld out of %ld failed\n", (results.size()-passCount), results.size());
+            return -1;
+        }
+
+        return CL_SUCCESS;
+    }
+
+    int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+        cl_int result = CL_SUCCESS;
+
+        for (std::vector<std::string>::const_iterator it = _kernels.begin(); it != _kernels.end(); ++it) {
+            log_info("Executing subcase #%ld out of %ld\n", (it - _kernels.begin() + 1), _kernels.size());
+
+            result |= ExecuteSubcase(deviceID, context, queue, num_elements, *it);
+        }
+
+        return result;
+    }
+
+private:
+    const std::vector<std::string> _kernels;
+};
+
+int test_max_number_of_params(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    cl_int error;
+
+    size_t deviceMaxParameterSize;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(deviceMaxParameterSize), &deviceMaxParameterSize, NULL);
+    test_error(error, "clGetDeviceInfo failed");
+
+    size_t deviceAddressBits;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(deviceAddressBits), &deviceAddressBits, NULL);
+    test_error(error, "clGetDeviceInfo failed");
+
+    size_t maxParams = deviceMaxParameterSize / (deviceAddressBits / 8);
+
+    const std::string KERNEL_FUNCTION_TEMPLATE[] = {
+        common::CONFORMANCE_VERIFY_FENCE +
+        NL
+        NL "bool helperFunction(int *ptr0 ",
+            // the rest of arguments goes here
+           ") {"
+        NL "    // check first pointer only"
+        NL "    if (!isFenceValid(get_fence(ptr0)))"
+        NL "        return false;"
+        NL
+        NL "    return true;"
+        NL "}"
+        NL
+        NL "__kernel void testKernel(__global uint *results) {"
+        NL "    uint tid = get_global_id(0);"
+        NL
+        NL "    __global int * gptr;"
+        NL "    __local int * lptr;"
+        NL "    __private int * pptr;"
+        NL
+        NL "    size_t failures = 0;"
+        NL
+        NL,
+            // the function body goes here
+        NL
+        NL "    results[tid] = (failures == 0);"
+        NL "}"
+        NL
+    };
+
+    std::ostringstream type_params;
+    std::ostringstream function_calls;
+
+    for (size_t i = 0; i < maxParams; i++) {
+        type_params << ", int *ptr" << i+1;
+    }
+
+    // use pseudo random generator to shuffle params
+    MTdata d = init_genrand(gRandomSeed);
+    if (!d)
+        return -1;
+
+    std::string pointers[] = { "gptr", "lptr", "pptr" };
+
+    size_t totalCalls = maxParams / 2;
+    for (size_t i = 0; i < totalCalls; i++) {
+        function_calls << "\tif (!helperFunction(gptr";
+
+        for (size_t j = 0; j < maxParams; j++) {
+            function_calls << ", " << pointers[genrand_int32(d)%3];
+        }
+
+        function_calls << ")) failures++;" << NL;
+    }
+
+    free_mtdata(d);
+    d = NULL;
+
+    const std::string KERNEL_FUNCTION = KERNEL_FUNCTION_TEMPLATE[0] + type_params.str() + KERNEL_FUNCTION_TEMPLATE[1] + function_calls.str() + KERNEL_FUNCTION_TEMPLATE[2];
+
+    CStressTest test(KERNEL_FUNCTION);
+
+    return test.Execute(deviceID, context, queue, num_elements);
+}
diff --git a/test_conformance/geometrics/CMakeLists.txt b/test_conformance/geometrics/CMakeLists.txt
new file mode 100644
index 00000000..f88f0364
--- /dev/null
+++ b/test_conformance/geometrics/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(MODULE_NAME GEOMETRICS)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_geometrics_double.cpp
+    test_geometrics.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+# VS2005 optimization WAR for geom_length)
+if(MSVC)
+set(CMAKE_C_FLAGS_RELEASE "/Od /Ob0")
+set(CMAKE_CXX_FLAGS_RELEASE "/Od /Ob0")
+endif(MSVC)
+
+include(../CMakeCommon.txt)
+
diff --git a/test_conformance/geometrics/Jamfile b/test_conformance/geometrics/Jamfile
new file mode 100644
index 00000000..3a9f200b
--- /dev/null
+++ b/test_conformance/geometrics/Jamfile
@@ -0,0 +1,17 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_geometrics
+    : main.c
+      test_geometrics.cpp
+      test_geometrics_double.cpp
+    ;
+
+install dist
+    : test_geometrics
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/geometrics
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/geometrics
+    ;
diff --git a/test_conformance/geometrics/Makefile b/test_conformance/geometrics/Makefile
new file mode 100644
index 00000000..e35c93e0
--- /dev/null
+++ b/test_conformance/geometrics/Makefile
@@ -0,0 +1,44 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		  test_geometrics.c \
+		  test_geometrics_double.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/kernelHelpers.c
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_geometrics
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/geometrics/main.c b/test_conformance/geometrics/main.c
new file mode 100644
index 00000000..0a99f8ee
--- /dev/null
+++ b/test_conformance/geometrics/main.c
@@ -0,0 +1,61 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn    basefn_list[] = {
+            test_geom_cross,
+            test_geom_dot,
+            test_geom_distance,
+            test_geom_fast_distance,
+            test_geom_length,
+            test_geom_fast_length,
+            test_geom_normalize,
+            test_geom_fast_normalize
+};
+
+
+const char    *basefn_names[] = {
+            "geom_cross",
+            "geom_dot",
+            "geom_distance",
+            "geom_fast_distance",
+            "geom_length",
+            "geom_fast_length",
+            "geom_normalize",
+            "geom_fast_normalize",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+const unsigned int g_vecSizeof[] = {0,1,2,4,4,0,0,0,8,
+               0,0,0,0,0,0,0,16};
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/geometrics/procs.h b/test_conformance/geometrics/procs.h
new file mode 100644
index 00000000..25af3e38
--- /dev/null
+++ b/test_conformance/geometrics/procs.h
@@ -0,0 +1,38 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+extern const unsigned int g_vecSizeof[];
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+extern int test_geom_cross(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_geom_dot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_geom_distance(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_geom_fast_distance(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_geom_length(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_geom_fast_length(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_geom_normalize(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_geom_fast_normalize(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d);
+extern int test_geom_dot_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d);
+extern int test_geom_distance_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d);
+extern int test_geom_length_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d);
+extern int test_geom_normalize_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d);
diff --git a/test_conformance/geometrics/testBase.h b/test_conformance/geometrics/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/geometrics/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/geometrics/test_geometrics.cpp b/test_conformance/geometrics/test_geometrics.cpp
new file mode 100644
index 00000000..ac16b2a4
--- /dev/null
+++ b/test_conformance/geometrics/test_geometrics.cpp
@@ -0,0 +1,1109 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include <float.h>
+
+const char *crossKernelSource =
+"__kernel void sample_test(__global float4 *sourceA, __global float4 *sourceB, __global float4 *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = cross( sourceA[tid], sourceB[tid] );\n"
+"\n"
+"}\n" ;
+
+const char *crossKernelSourceV3 =
+"__kernel void sample_test(__global float *sourceA, __global float *sourceB, __global float *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    vstore3( cross( vload3( tid, sourceA), vload3( tid,  sourceB) ), tid, destValues );\n"
+"\n"
+"}\n";
+
+const char *twoToFloatKernelPattern =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global float *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"\n"
+"}\n";
+
+const char *twoToFloatKernelPatternV3 =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global float *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( vload3( tid, (__global float*) sourceA), vload3( tid, (__global float*) sourceB) );\n"
+"\n"
+"}\n";
+
+const char *oneToFloatKernelPattern =
+"__kernel void sample_test(__global float%s *sourceA, __global float *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid] );\n"
+"\n"
+"}\n";
+
+const char *oneToFloatKernelPatternV3 =
+"__kernel void sample_test(__global float%s *sourceA, __global float *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( vload3( tid, (__global float*) sourceA) );\n"
+"\n"
+"}\n";
+
+const char *oneToOneKernelPattern =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid] );\n"
+"\n"
+"}\n";
+
+const char *oneToOneKernelPatternV3 =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    vstore3( %s( vload3( tid, (__global float*) sourceA) ), tid, (__global float*) destValues );\n"
+"\n"
+"}\n";
+
+#define TEST_SIZE (1 << 20)
+
+double verifyFastDistance( float *srcA, float *srcB, size_t vecSize );
+double verifyFastLength( float *srcA, size_t vecSize );
+
+
+
+void vector2string( char *string, float *vector, size_t elements )
+{
+    *string++ = '{';
+    *string++ = ' ';
+    string += sprintf( string, "%a", vector[0] );
+    size_t i;
+    for( i = 1; i < elements; i++ )
+        string += sprintf( string, ", %a", vector[i] );
+    *string++ = ' ';
+    *string++ = '}';
+    *string = '\0';
+}
+
+void fillWithTrickyNumbers( float *aVectors, float *bVectors, size_t vecSize )
+{
+    static const cl_float trickyValues[] = { -FLT_EPSILON, FLT_EPSILON,
+        MAKE_HEX_FLOAT(0x1.0p63f, 0x1L, 63), MAKE_HEX_FLOAT(0x1.8p63f, 0x18L, 59), MAKE_HEX_FLOAT(0x1.0p64f, 0x1L, 64), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.8p-63f, -0x18L, -67), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64),
+        MAKE_HEX_FLOAT(0x1.0p-63f, 0x1L, -63), MAKE_HEX_FLOAT(0x1.8p-63f, 0x18L, -67), MAKE_HEX_FLOAT(0x1.0p-64f, 0x1L, -64), MAKE_HEX_FLOAT(-0x1.0p-63f, -0x1L, -63), MAKE_HEX_FLOAT(-0x1.8p-63f, -0x18L, -67), MAKE_HEX_FLOAT(-0x1.0p-64f, -0x1L, -64),
+        FLT_MAX / 2.f, -FLT_MAX / 2.f, INFINITY,  -INFINITY, 0.f, -0.f };
+    static const size_t trickyCount = sizeof( trickyValues ) / sizeof( trickyValues[0] );
+    static const size_t stride[4] = {1, trickyCount, trickyCount*trickyCount, trickyCount*trickyCount*trickyCount };
+    size_t i, j, k;
+
+    for( j = 0; j < vecSize; j++ )
+        for( k = 0; k < vecSize; k++ )
+            for( i = 0; i < trickyCount; i++ )
+                aVectors[ j + stride[j] * (i + k*trickyCount)*vecSize] = trickyValues[i];
+
+    if( bVectors )
+    {
+        size_t copySize = vecSize * vecSize * trickyCount;
+        memset( bVectors, 0, sizeof(float) * copySize );
+        memset( aVectors + copySize, 0, sizeof(float) * copySize );
+        memcpy( bVectors + copySize, aVectors, sizeof(float) * copySize );
+    }
+}
+
+
+void cross_product( const float *vecA, const float *vecB, float *outVector, float *errorTolerances, float ulpTolerance )
+{
+    outVector[ 0 ] = ( vecA[ 1 ] * vecB[ 2 ] ) - ( vecA[ 2 ] * vecB[ 1 ] );
+    outVector[ 1 ] = ( vecA[ 2 ] * vecB[ 0 ] ) - ( vecA[ 0 ] * vecB[ 2 ] );
+    outVector[ 2 ] = ( vecA[ 0 ] * vecB[ 1 ] ) - ( vecA[ 1 ] * vecB[ 0 ] );
+    outVector[ 3 ] = 0.0f;
+
+    errorTolerances[ 0 ] = fmaxf( fabsf( vecA[ 1 ] ), fmaxf( fabsf( vecB[ 2 ] ), fmaxf( fabsf( vecA[ 2 ] ), fabsf( vecB[ 1 ] ) ) ) );
+    errorTolerances[ 1 ] = fmaxf( fabsf( vecA[ 2 ] ), fmaxf( fabsf( vecB[ 0 ] ), fmaxf( fabsf( vecA[ 0 ] ), fabsf( vecB[ 2 ] ) ) ) );
+    errorTolerances[ 2 ] = fmaxf( fabsf( vecA[ 0 ] ), fmaxf( fabsf( vecB[ 1 ] ), fmaxf( fabsf( vecA[ 1 ] ), fabsf( vecB[ 0 ] ) ) ) );
+
+    errorTolerances[ 0 ] = errorTolerances[ 0 ] * errorTolerances[ 0 ] * ( ulpTolerance * FLT_EPSILON );    // This gives us max squared times ulp tolerance, i.e. the worst-case expected variance we could expect from this result
+    errorTolerances[ 1 ] = errorTolerances[ 1 ] * errorTolerances[ 1 ] * ( ulpTolerance * FLT_EPSILON );
+    errorTolerances[ 2 ] = errorTolerances[ 2 ] * errorTolerances[ 2 ] * ( ulpTolerance * FLT_EPSILON );
+}
+
+
+
+
+int test_geom_cross(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int vecsize;
+    RandomSeed seed(gRandomSeed);
+
+    /* Get the default rounding mode */
+    cl_device_fp_config defaultRoundingMode = get_default_rounding_mode(deviceID);
+    if( 0 == defaultRoundingMode )
+        return -1;
+
+
+    for(vecsize = 3; vecsize <= 4; ++vecsize)
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        clMemWrapper streams[3];
+        BufferOwningPtr<cl_float> A(malloc(sizeof(cl_float) * TEST_SIZE * vecsize));
+        BufferOwningPtr<cl_float> B(malloc(sizeof(cl_float) * TEST_SIZE * vecsize));
+        BufferOwningPtr<cl_float> C(malloc(sizeof(cl_float) * TEST_SIZE * vecsize));
+        cl_float testVector[4];
+        int error, i;
+        cl_float *inDataA = A;
+        cl_float *inDataB = B;
+        cl_float *outData = C;
+        size_t threads[1], localThreads[1];
+
+        /* Create kernels */
+        if( create_single_kernel_helper( context, &program, &kernel, 1, vecsize == 3 ? &crossKernelSourceV3 : &crossKernelSource, "sample_test" ) )
+            return -1;
+
+        /* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */
+        for( i = 0; i < TEST_SIZE * vecsize; i++ )
+        {
+            inDataA[ i ] = get_random_float( -512.f, 512.f, seed );
+            inDataB[ i ] = get_random_float( -512.f, 512.f, seed );
+        }
+        fillWithTrickyNumbers( inDataA, inDataB, vecsize );
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecsize * TEST_SIZE, inDataA, NULL);
+        if( streams[0] == NULL )
+        {
+            log_error("ERROR: Creating input array A failed!\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecsize * TEST_SIZE, inDataB, NULL);
+        if( streams[1] == NULL )
+        {
+            log_error("ERROR: Creating input array B failed!\n");
+            return -1;
+        }
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * vecsize * TEST_SIZE, NULL, NULL);
+        if( streams[2] == NULL )
+        {
+            log_error("ERROR: Creating output array failed!\n");
+            return -1;
+        }
+
+        /* Assign streams and execute */
+        for( i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]);
+            test_error( error, "Unable to set indexed kernel arguments" );
+        }
+
+        /* Run the kernel */
+        threads[0] = TEST_SIZE;
+
+        error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+        test_error( error, "Unable to get work group size to use" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Unable to execute test kernel" );
+
+        /* Now get the results */
+        error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( cl_float ) * TEST_SIZE * vecsize, outData, 0, NULL, NULL );
+        test_error( error, "Unable to read output array!" );
+
+        /* And verify! */
+        for( i = 0; i < TEST_SIZE; i++ )
+        {
+            float errorTolerances[ 4 ];
+            // On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product
+            cross_product( inDataA + i * vecsize, inDataB + i * vecsize, testVector, errorTolerances, 3.f );
+
+        // RTZ devices accrue approximately double the amount of error per operation.  Allow for that.
+        if( defaultRoundingMode == CL_FP_ROUND_TO_ZERO )
+        {
+            errorTolerances[0] *= 2.0f;
+            errorTolerances[1] *= 2.0f;
+            errorTolerances[2] *= 2.0f;
+            errorTolerances[3] *= 2.0f;
+        }
+
+            float errs[] = { fabsf( testVector[ 0 ] - outData[ i * vecsize + 0 ] ),
+                             fabsf( testVector[ 1 ] - outData[ i * vecsize + 1 ] ),
+                             fabsf( testVector[ 2 ] - outData[ i * vecsize + 2 ] ) };
+
+            if( errs[ 0 ] > errorTolerances[ 0 ] || errs[ 1 ] > errorTolerances[ 1 ] || errs[ 2 ] > errorTolerances[ 2 ] )
+            {
+                log_error( "ERROR: Data sample %d does not validate! Expected (%a,%a,%a,%a), got (%a,%a,%a,%a)\n",
+                          i, testVector[0], testVector[1], testVector[2], testVector[3],
+                          outData[i*vecsize], outData[i*vecsize+1], outData[i*vecsize+2], outData[i*vecsize+3] );
+                log_error( "    Input: (%a %a %a) and (%a %a %a)\n",
+                          inDataA[ i * vecsize + 0 ], inDataA[ i * vecsize + 1 ], inDataA[ i * vecsize + 2 ],
+                          inDataB[ i * vecsize + 0 ], inDataB[ i * vecsize + 1 ], inDataB[ i * vecsize + 2 ] );
+                log_error( "    Errors: (%a out of %a), (%a out of %a), (%a out of %a)\n",
+                          errs[ 0 ], errorTolerances[ 0 ], errs[ 1 ], errorTolerances[ 1 ], errs[ 2 ], errorTolerances[ 2 ] );
+                log_error("     ulp %f\n", Ulp_Error( outData[ i * vecsize + 1 ], testVector[ 1 ] ) );
+                return -1;
+            }
+        }
+    } // for(vecsize=...
+
+    if(!is_extension_available(deviceID, "cl_khr_fp64")) {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        return 0;
+    } else {
+        log_info("Testing doubles...\n");
+        return test_geom_cross_double( deviceID,  context,  queue,  num_elements, seed);
+    }
+}
+
+float getMaxValue( float vecA[], float vecB[], size_t vecSize )
+{
+    float a = fmaxf( fabsf( vecA[ 0 ] ), fabsf( vecB[ 0 ] ) );
+    for( size_t i = 1; i < vecSize; i++ )
+        a = fmaxf( fabsf( vecA[ i ] ), fmaxf( fabsf( vecB[ i ] ), a ) );
+    return a;
+}
+
+typedef double (*twoToFloatVerifyFn)( float *srcA, float *srcB, size_t vecSize );
+
+int test_twoToFloat_kernel(cl_command_queue queue, cl_context context, const char *fnName,
+                           size_t vecSize, twoToFloatVerifyFn verifyFn, float ulpLimit, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[3];
+    int error;
+    size_t i, threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeNames[][4] = { "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    int hasInfNan = 1;
+    cl_device_id device = NULL;
+
+    error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof( device ), &device, NULL );
+    test_error( error, "Unable to get command queue device" );
+
+    /* Check for embedded devices doing nutty stuff */
+    error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( kernelSource ), kernelSource, NULL );
+    test_error( error, "Unable to get device profile" );
+    if( 0 == strcmp( kernelSource, "EMBEDDED_PROFILE" ) )
+    {
+        cl_device_fp_config config = 0;
+        error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( config ), &config, NULL );
+        test_error( error, "Unable to get CL_DEVICE_SINGLE_FP_CONFIG" );
+
+        if( CL_FP_ROUND_TO_ZERO == (config & (CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO)))
+            ulpLimit *= 2.0f; // rtz operations average twice the accrued error of rte operations
+
+        if( 0 == (config & CL_FP_INF_NAN) )
+            hasInfNan = 0;
+    }
+
+    BufferOwningPtr<cl_float> A(malloc(sizeof(cl_float) * TEST_SIZE * 4));
+    BufferOwningPtr<cl_float> B(malloc(sizeof(cl_float) * TEST_SIZE * 4));
+    BufferOwningPtr<cl_float> C(malloc(sizeof(cl_float) * TEST_SIZE));
+
+    cl_float *inDataA = A;
+    cl_float *inDataB = B;
+    cl_float *outData = C;
+
+    /* Create the source */
+    sprintf( kernelSource, vecSize == 3 ? twoToFloatKernelPatternV3 : twoToFloatKernelPattern, sizeNames[vecSize-1], sizeNames[vecSize-1], fnName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+    /* Generate some streams */
+    for( i = 0; i < TEST_SIZE * vecSize; i++ )
+    {
+        inDataA[ i ] = get_random_float( -512.f, 512.f, d );
+        inDataB[ i ] = get_random_float( -512.f, 512.f, d );
+    }
+    fillWithTrickyNumbers( inDataA, inDataB, vecSize );
+
+    /* Clamp values to be in range for fast_ functions */
+    if( verifyFn == verifyFastDistance )
+    {
+        for( i = 0; i < TEST_SIZE * vecSize; i++ )
+        {
+            if( fabsf( inDataA[i] ) > MAKE_HEX_FLOAT(0x1.0p62f, 0x1L, 62) || fabsf( inDataA[i] ) < MAKE_HEX_FLOAT(0x1.0p-62f, 0x1L, -62) )
+                inDataA[ i ] = get_random_float( -512.f, 512.f, d );
+            if( fabsf( inDataB[i] ) > MAKE_HEX_FLOAT(0x1.0p62f, 0x1L, 62) || fabsf( inDataB[i] ) < MAKE_HEX_FLOAT(0x1.0p-62f, 0x1L, -62) )
+                inDataB[ i ] = get_random_float( -512.f, 512.f, d );
+        }
+    }
+
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize * TEST_SIZE, inDataB, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating input array B failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * TEST_SIZE, NULL, NULL);
+    if( streams[2] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    for( i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg(kernel, (int)i, sizeof( streams[i] ), &streams[i]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+    }
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( cl_float ) * TEST_SIZE, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+
+    /* And verify! */
+    int skipCount = 0;
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        cl_float *src1 = inDataA + i * vecSize;
+        cl_float *src2 = inDataB + i * vecSize;
+        double expected = verifyFn( src1, src2, vecSize );
+        if( (float) expected != outData[ i ] )
+        {
+            if( isnan(expected) && isnan( outData[i] ) )
+                continue;
+
+            if( ! hasInfNan )
+            {
+                size_t ii;
+                for( ii = 0; ii < vecSize; ii++ )
+                {
+                    if( ! isfinite( src1[ii] ) || ! isfinite( src2[ii] ) )
+                    {
+                        skipCount++;
+                        continue;
+                    }
+                }
+                if( ! isfinite( (cl_float) expected ) )
+                {
+                    skipCount++;
+                    continue;
+                }
+            }
+
+            if( ulpLimit < 0 )
+            {
+                // Limit below zero means we need to test via a computed error (like cross product does)
+                float maxValue =
+                getMaxValue( inDataA + i * vecSize, inDataB + i * vecSize,vecSize );
+                // In this case (dot is the only one that gets here), the ulp is 2*vecSize - 1 (n + n-1 max # of errors)
+                float errorTolerance = maxValue * maxValue * ( 2.f * (float)vecSize - 1.f ) * FLT_EPSILON;
+
+                // Limit below zero means test via epsilon instead
+                double error =
+                fabs( (double)expected - (double)outData[ i ] );
+                if( error > errorTolerance )
+                {
+
+                    log_error( "ERROR: Data sample %d at size %d does not validate! Expected (%a), got (%a), sources (%a and %a) error of %g against tolerance %g\n",
+                              (int)i, (int)vecSize, expected,
+                              outData[ i ],
+                              inDataA[i*vecSize],
+                              inDataB[i*vecSize],
+                              (float)error,
+                              (float)errorTolerance );
+
+                    char vecA[1000], vecB[1000];
+                    vector2string( vecA, inDataA +i * vecSize, vecSize );
+                    vector2string( vecB, inDataB + i * vecSize, vecSize );
+                    log_error( "\tvector A: %s, vector B: %s\n", vecA, vecB );
+                    return -1;
+                }
+            }
+            else
+            {
+                float error = Ulp_Error( outData[ i ], expected );
+                if( fabsf(error) > ulpLimit )
+                {
+                    log_error( "ERROR: Data sample %d at size %d does not validate! Expected (%a), got (%a), sources (%a and %a) ulp of %f\n",
+                              (int)i, (int)vecSize, expected, outData[ i ], inDataA[i*vecSize], inDataB[i*vecSize], error );
+
+                    char vecA[1000], vecB[1000];
+                    vector2string( vecA, inDataA + i * vecSize, vecSize );
+                    vector2string( vecB, inDataB + i * vecSize, vecSize );
+                    log_error( "\tvector A: %s, vector B: %s\n", vecA, vecB );
+                    return -1;
+                }
+            }
+        }
+    }
+
+    if( skipCount )
+        log_info( "Skipped %d tests out of %d because they contained Infs or NaNs\n\tEMBEDDED_PROFILE Device does not support CL_FP_INF_NAN\n", skipCount, TEST_SIZE );
+
+    return 0;
+}
+
+double verifyDot( float *srcA, float *srcB, size_t vecSize )
+{
+    double total = 0.f;
+
+    for( unsigned int i = 0; i < vecSize; i++ )
+        total += (double)srcA[ i ] * (double)srcB[ i ];
+
+    return total;
+}
+
+int test_geom_dot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        if( test_twoToFloat_kernel( queue, context, "dot", sizes[size], verifyDot, -1.0f /*magic value*/, seed ) != 0 )
+        {
+            log_error( "   dot vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+    }
+
+    if (retVal)
+        return retVal;
+
+    if(!is_extension_available(deviceID, "cl_khr_fp64"))
+    {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        return 0;
+    }
+
+    log_info("Testing doubles...\n");
+    return test_geom_dot_double( deviceID,  context,  queue,  num_elements, seed);
+}
+
+double verifyFastDistance( float *srcA, float *srcB, size_t vecSize )
+{
+    double total = 0, value;
+    unsigned int i;
+
+    // We calculate the distance as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+    {
+        value = (double)srcA[i] - (double)srcB[i];
+        total += value * value;
+    }
+
+    return sqrt( total );
+}
+
+int test_geom_fast_distance(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        float maxUlps = 8192.0f +                           // error in sqrt
+        ( 1.5f * (float) sizes[size] +      // cumulative error for multiplications  (a-b+0.5ulp)**2 = (a-b)**2 + a*0.5ulp + b*0.5 ulp + 0.5 ulp for multiplication
+         0.5f * (float) (sizes[size]-1));    // cumulative error for additions
+
+        if( test_twoToFloat_kernel( queue, context, "fast_distance",
+                                   sizes[ size ], verifyFastDistance,
+                                   maxUlps, seed ) != 0 )
+        {
+            log_error( "   fast_distance vector size %d FAILED\n",
+                      (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   fast_distance vector size %d passed\n",
+                     (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+
+double verifyDistance( float *srcA, float *srcB, size_t vecSize )
+{
+    double total = 0, value;
+    unsigned int i;
+
+    // We calculate the distance as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+    {
+        value = (double)srcA[i] - (double)srcB[i];
+        total += value * value;
+    }
+
+    return sqrt( total );
+}
+
+int test_geom_distance(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed );
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        float maxUlps = 3.0f +                              // error in sqrt
+        ( 1.5f * (float) sizes[size] +      // cumulative error for multiplications  (a-b+0.5ulp)**2 = (a-b)**2 + a*0.5ulp + b*0.5 ulp + 0.5 ulp for multiplication
+         0.5f * (float) (sizes[size]-1));    // cumulative error for additions
+
+        if( test_twoToFloat_kernel( queue, context, "distance", sizes[ size ], verifyDistance, maxUlps, seed ) != 0 )
+        {
+            log_error( "   distance vector size %d FAILED\n",
+                      (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   distance vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    if (retVal)
+        return retVal;
+
+    if(!is_extension_available(deviceID, "cl_khr_fp64"))
+    {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        return 0;
+    } else {
+        log_info("Testing doubles...\n");
+        return test_geom_distance_double( deviceID,  context,  queue,  num_elements, seed);
+    }
+}
+
+typedef double (*oneToFloatVerifyFn)( float *srcA, size_t vecSize );
+
+int test_oneToFloat_kernel(cl_command_queue queue, cl_context context, const char *fnName,
+                           size_t vecSize, oneToFloatVerifyFn verifyFn, float ulpLimit, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+    BufferOwningPtr<cl_float> A(malloc(sizeof(cl_float) * TEST_SIZE * 4));
+    BufferOwningPtr<cl_float> B(malloc(sizeof(cl_float) * TEST_SIZE));
+    int error;
+    size_t i, threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeNames[][4] = { "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    cl_float *inDataA = A;
+    cl_float *outData = B;
+
+    /* Create the source */
+    sprintf( kernelSource, vecSize == 3? oneToFloatKernelPatternV3 : oneToFloatKernelPattern, sizeNames[vecSize-1], fnName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    /* Generate some streams */
+    for( i = 0; i < TEST_SIZE * vecSize; i++ )
+    {
+        inDataA[ i ] = get_random_float( -512.f, 512.f, d );
+    }
+    fillWithTrickyNumbers( inDataA, NULL, vecSize );
+
+    /* Clamp values to be in range for fast_ functions */
+    if( verifyFn == verifyFastLength )
+    {
+        for( i = 0; i < TEST_SIZE * vecSize; i++ )
+        {
+            if( fabsf( inDataA[i] ) > MAKE_HEX_FLOAT(0x1.0p62f, 0x1L, 62) || fabsf( inDataA[i] ) < MAKE_HEX_FLOAT(0x1.0p-62f, 0x1L, -62) )
+                inDataA[ i ] = get_random_float( -512.f, 512.f, d );
+        }
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                sizeof(cl_float) * vecSize * TEST_SIZE, inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                sizeof(cl_float) * TEST_SIZE, NULL, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0],
+                                           &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[1], true, 0,
+                                sizeof( cl_float ) * TEST_SIZE, outData,
+                                0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        double expected = verifyFn( inDataA + i * vecSize, vecSize );
+        if( (float) expected != outData[ i ] )
+        {
+            float ulps = Ulp_Error( outData[i], expected );
+            if( fabsf( ulps ) <= ulpLimit )
+                continue;
+
+            // We have to special case NAN
+            if( isnan( outData[ i ] ) && isnan( expected ) )
+                continue;
+
+            if(! (fabsf(ulps) < ulpLimit) )
+            {
+                log_error( "ERROR: Data sample %d at size %d does not validate! Expected (%a), got (%a), source (%a), ulp %f\n",
+                          (int)i, (int)vecSize, expected, outData[ i ],  inDataA[i*vecSize], ulps );
+                char vecA[1000];
+                vector2string( vecA, inDataA + i *vecSize, vecSize );
+                log_error( "\tvector: %s", vecA );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+double verifyLength( float *srcA, size_t vecSize )
+{
+    double total = 0;
+    unsigned int i;
+
+    // We calculate the distance as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+    {
+        total += (double)srcA[i] * (double)srcA[i];
+    }
+
+    return sqrt( total );
+}
+
+int test_geom_length(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+    RandomSeed seed( gRandomSeed );
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        float maxUlps = 3.0f +                              // error in sqrt
+        0.5f *                              // effect on e of taking sqrt( x + e )
+        ( 0.5f * (float) sizes[size] +      // cumulative error for multiplications
+         0.5f * (float) (sizes[size]-1));    // cumulative error for additions
+
+        if( test_oneToFloat_kernel( queue, context, "length", sizes[ size ], verifyLength, maxUlps, seed ) != 0 )
+        {
+            log_error( "   length vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   length vector vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    if (retVal)
+        return retVal;
+
+    if(!is_extension_available(deviceID, "cl_khr_fp64"))
+    {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        return 0;
+    }
+    else
+    {
+        log_info("Testing doubles...\n");
+        return test_geom_length_double( deviceID,  context,  queue,  num_elements, seed);
+    }
+}
+
+
+double verifyFastLength( float *srcA, size_t vecSize )
+{
+    double total = 0;
+    unsigned int i;
+
+    // We calculate the distance as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+    {
+        total += (double)srcA[i] * (double)srcA[i];
+    }
+
+    return sqrt( total );
+}
+
+int test_geom_fast_length(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        float maxUlps = 8192.0f +                           // error in half_sqrt
+        ( 0.5f * (float) sizes[size] +      // cumulative error for multiplications
+         0.5f * (float) (sizes[size]-1));    // cumulative error for additions
+
+        if( test_oneToFloat_kernel( queue, context, "fast_length", sizes[ size ], verifyFastLength, maxUlps, seed ) != 0 )
+        {
+            log_error( "   fast_length vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   fast_length vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+
+typedef void (*oneToOneVerifyFn)( float *srcA, float *dstA, size_t vecSize );
+
+
+int test_oneToOne_kernel(cl_command_queue queue, cl_context context, const char *fnName,
+                         size_t vecSize, oneToOneVerifyFn verifyFn, float ulpLimit, int softball, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+    BufferOwningPtr<cl_float> A(malloc(sizeof(cl_float) * TEST_SIZE
+                                       * vecSize));
+    BufferOwningPtr<cl_float> B(malloc(sizeof(cl_float) * TEST_SIZE
+                                       * vecSize));
+    int error;
+    size_t i, j, threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeNames[][4] = { "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    cl_float *inDataA = A;
+    cl_float *outData = B;
+    float ulp_error = 0;
+
+    /* Create the source */
+    sprintf( kernelSource, vecSize == 3 ? oneToOneKernelPatternV3: oneToOneKernelPattern, sizeNames[vecSize-1], sizeNames[vecSize-1], fnName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr,  "sample_test" ) )
+        return -1;
+
+    /* Initialize data.  First element always 0. */
+    memset( inDataA, 0, sizeof(cl_float) * vecSize );
+    if( 0 == strcmp( fnName, "fast_normalize" ))
+    { // keep problematic cases out of the fast function
+        for( i = vecSize; i < TEST_SIZE * vecSize; i++ )
+        {
+            cl_float z = get_random_float( -MAKE_HEX_FLOAT( 0x1.0p60f, 1, 60), MAKE_HEX_FLOAT( 0x1.0p60f, 1, 60), d);
+            if( fabsf(z) < MAKE_HEX_FLOAT( 0x1.0p-60f, 1, -60) )
+                z = copysignf( 0.0f, z );
+            inDataA[i] = z;
+        }
+    }
+    else
+    {
+        for( i = vecSize; i < TEST_SIZE * vecSize; i++ )
+            inDataA[i] = any_float(d);
+    }
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * vecSize* TEST_SIZE, inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * vecSize  * TEST_SIZE, NULL, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof( cl_float ) * TEST_SIZE  * vecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        float expected[4];
+        int fail = 0;
+        verifyFn( inDataA + i * vecSize, expected, vecSize );
+        for( j = 0; j < vecSize; j++ )
+        {
+            // We have to special case NAN
+            if( isnan( outData[ i * vecSize + j ] )
+               && isnan( expected[ j ] ) )
+                continue;
+
+            if( expected[j] != outData[ i * vecSize + j ] ) {
+                ulp_error = Ulp_Error(  outData[i*vecSize+j], expected[ j ] );
+
+                if( fabsf(ulp_error) > ulpLimit ) {
+                    fail = 1;
+                    break;
+                }
+            }
+
+        }
+
+        // try again with subnormals flushed to zero if the platform flushes
+        if( fail && gFlushDenormsToZero )
+        {
+            float temp[4], expected2[4];
+            for( j = 0; j < vecSize; j++ )
+            {
+                if( IsFloatSubnormal(inDataA[i*vecSize+j] ) )
+                    temp[j] = copysignf( 0.0f, inDataA[i*vecSize+j] );
+                else
+                    temp[j] = inDataA[ i*vecSize +j];
+            }
+
+            verifyFn( temp, expected2, vecSize );
+            fail = 0;
+
+            for( j = 0; j < vecSize; j++ )
+            {
+                // We have to special case NAN
+                if( isnan( outData[ i * vecSize + j ] ) && isnan( expected[ j ] ) )
+                    continue;
+
+                if( expected2[j] != outData[ i * vecSize + j ] )
+                {
+                    ulp_error = Ulp_Error(outData[i*vecSize + j ], expected[ j ]  );
+
+                    if( fabsf(ulp_error) > ulpLimit )
+                    {
+                        if( IsFloatSubnormal(expected2[j]) )
+                        {
+                            expected2[j] = 0.0f;
+                            if( expected2[j] !=  outData[i*vecSize + j ] )
+                            {
+                                ulp_error = Ulp_Error(  outData[ i * vecSize + j ], expected[ j ] );
+                                if( fabsf(ulp_error) > ulpLimit ) {
+                                    fail = 1;
+                                    break;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        if( fail )
+        {
+            log_error( "ERROR: Data sample {%d,%d} at size %d does not validate! Expected %12.24f (%a), got %12.24f (%a), ulp %f\n",
+                      (int)i, (int)j, (int)vecSize, expected[j], expected[j], outData[ i*vecSize+j], outData[ i*vecSize+j], ulp_error );
+            log_error( "       Source: " );
+            for( size_t q = 0; q < vecSize; q++ )
+                log_error( "%g ", inDataA[ i * vecSize+q]);
+            log_error( "\n             : " );
+            for( size_t q = 0; q < vecSize; q++ )
+                log_error( "%a ", inDataA[i*vecSize +q] );
+            log_error( "\n" );
+            log_error( "       Result: " );
+            for( size_t q = 0; q < vecSize; q++ )
+                log_error( "%g ", outData[ i *vecSize + q ] );
+            log_error( "\n             : " );
+            for( size_t q = 0; q < vecSize; q++ )
+                log_error( "%a ", outData[ i * vecSize + q ] );
+            log_error( "\n" );
+            log_error( "       Expected: " );
+            for( size_t q = 0; q < vecSize; q++ )
+                log_error( "%g ", expected[ q ] );
+            log_error( "\n             : " );
+            for( size_t q = 0; q < vecSize; q++ )
+                log_error( "%a ", expected[ q ] );
+            log_error( "\n" );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+void verifyNormalize( float *srcA, float *dst, size_t vecSize )
+{
+    double total = 0, value;
+    unsigned int i;
+
+    // We calculate everything as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+        total += (double)srcA[i] * (double)srcA[i];
+
+    if( total == 0.f )
+    {
+        // Special edge case: copy vector over without change
+        for( i = 0; i < vecSize; i++ )
+            dst[i] = srcA[i];
+        return;
+    }
+
+    // Deal with infinities
+    if( total == INFINITY )
+    {
+        total = 0.0f;
+        for( i = 0; i < vecSize; i++ )
+        {
+            if( fabsf( srcA[i]) == INFINITY )
+                dst[i] = copysignf( 1.0f, srcA[i] );
+            else
+                dst[i] = copysignf( 0.0f, srcA[i] );
+            total += (double)dst[i] * (double)dst[i];
+        }
+
+        srcA = dst;
+    }
+
+    value = sqrt( total );
+    for( i = 0; i < vecSize; i++ )
+        dst[i] = (float)( (double)srcA[i] / value );
+}
+
+int test_geom_normalize(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        float maxUlps = 2.5f +                              // error in rsqrt + error in multiply
+        ( 0.5f * (float) sizes[size] +      // cumulative error for multiplications
+         0.5f * (float) (sizes[size]-1));    // cumulative error for additions
+        if( test_oneToOne_kernel( queue, context, "normalize", sizes[ size ], verifyNormalize, maxUlps, 0, seed ) != 0 )
+        {
+            log_error( "   normalized vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   normalized vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    if (retVal)
+        return retVal;
+
+    if(!is_extension_available(deviceID, "cl_khr_fp64"))
+    {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        return 0;
+    } else {
+        log_info("Testing doubles...\n");
+        return test_geom_normalize_double( deviceID,  context,  queue,  num_elements, seed);
+    }
+}
+
+
+int test_geom_fast_normalize(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+    RandomSeed seed( gRandomSeed );
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        float maxUlps = 8192.5f +                           // error in rsqrt + error in multiply
+        ( 0.5f * (float) sizes[size] +      // cumulative error for multiplications
+         0.5f * (float) (sizes[size]-1));    // cumulative error for additions
+
+        if( test_oneToOne_kernel( queue, context, "fast_normalize", sizes[ size ], verifyNormalize, maxUlps, 1, seed ) != 0 )
+        {
+            log_error( "   fast_normalize vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   fast_normalize vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+
+
diff --git a/test_conformance/geometrics/test_geometrics_double.cpp b/test_conformance/geometrics/test_geometrics_double.cpp
new file mode 100644
index 00000000..2d258d5d
--- /dev/null
+++ b/test_conformance/geometrics/test_geometrics_double.cpp
@@ -0,0 +1,941 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+const char *crossKernelSource_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double4 *sourceA, __global double4 *sourceB, __global double4 *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = cross( sourceA[tid], sourceB[tid] );\n"
+"\n"
+"}\n";
+
+const char *crossKernelSource_doubleV3 =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double *sourceA, __global double *sourceB, __global double *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    vstore3( cross( vload3( tid, sourceA), vload3( tid, sourceB) ), tid, destValues);\n"
+"\n"
+"}\n";
+
+const char *twoToFloatKernelPattern_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global double *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"\n"
+"}\n";
+
+const char *twoToFloatKernelPattern_doubleV3 =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global double *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( vload3( tid, (__global double*) sourceA), vload3( tid, (__global double*) sourceB ) );\n"
+"\n"
+"}\n";
+
+const char *oneToFloatKernelPattern_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid] );\n"
+"\n"
+"}\n";
+
+const char *oneToFloatKernelPattern_doubleV3 =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( vload3( tid, (__global double*) sourceA) );\n"
+"\n"
+"}\n";
+
+const char *oneToOneKernelPattern_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid] );\n"
+"\n"
+"}\n";
+
+const char *oneToOneKernelPattern_doubleV3 =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    vstore3( %s( vload3( tid, (__global double*) sourceA) ), tid, (__global double*) destValues );\n"
+"\n"
+"}\n";
+
+#define TEST_SIZE (1 << 20)
+
+double verifyLength_double( double *srcA, size_t vecSize );
+double verifyDistance_double( double *srcA, double *srcB, size_t vecSize );
+
+
+
+void vector2string_double( char *string, double *vector, size_t elements )
+{
+    *string++ = '{';
+    *string++ = ' ';
+    string += sprintf( string, "%a", vector[0] );
+    size_t i;
+    for( i = 1; i < elements; i++ )
+        string += sprintf( string, ", %a", vector[i] );
+    *string++ = ' ';
+    *string++ = '}';
+    *string = '\0';
+}
+
+void fillWithTrickyNumbers_double( double *aVectors, double *bVectors, size_t vecSize )
+{
+    static const cl_double trickyValues[] = { -FLT_EPSILON, FLT_EPSILON,
+        MAKE_HEX_DOUBLE(0x1.0p511, 0x1L, 511), MAKE_HEX_DOUBLE(0x1.8p511, 0x18L, 507), MAKE_HEX_DOUBLE(0x1.0p512, 0x1L, 512), MAKE_HEX_DOUBLE(-0x1.0p511, -0x1L, 511), MAKE_HEX_DOUBLE(-0x1.8p-511, -0x18L, -515), MAKE_HEX_DOUBLE(-0x1.0p512, -0x1L, 512),
+        MAKE_HEX_DOUBLE(0x1.0p-511, 0x1L, -511), MAKE_HEX_DOUBLE(0x1.8p-511, 0x18L, -515), MAKE_HEX_DOUBLE(0x1.0p-512, 0x1L, -512), MAKE_HEX_DOUBLE(-0x1.0p-511, -0x1L, -511), MAKE_HEX_DOUBLE(-0x1.8p-511, -0x18L, -515), MAKE_HEX_DOUBLE(-0x1.0p-512, -0x1L, -512),
+        DBL_MAX / 2., -DBL_MAX / 2., INFINITY,  -INFINITY, 0., -0. };
+    static const size_t trickyCount = sizeof( trickyValues ) / sizeof( trickyValues[0] );
+    static const size_t stride[4] = {1, trickyCount, trickyCount*trickyCount, trickyCount*trickyCount*trickyCount };
+    size_t i, j, k;
+
+    for( j = 0; j < vecSize; j++ )
+        for( k = 0; k < vecSize; k++ )
+            for( i = 0; i < trickyCount; i++ )
+                aVectors[ j + stride[j] * (i + k*trickyCount)*vecSize] = trickyValues[i];
+
+    if( bVectors )
+    {
+        size_t copySize = vecSize * vecSize * trickyCount;
+        memset( bVectors, 0, sizeof(double) * copySize );
+        memset( aVectors + copySize, 0, sizeof(double) * copySize );
+        memcpy( bVectors + copySize, aVectors, sizeof(double) * copySize );
+    }
+}
+
+
+void cross_product_double( const double *vecA, const double *vecB, double *outVector, double *errorTolerances, double ulpTolerance )
+{
+    outVector[ 0 ] = ( vecA[ 1 ] * vecB[ 2 ] ) - ( vecA[ 2 ] * vecB[ 1 ] );
+    outVector[ 1 ] = ( vecA[ 2 ] * vecB[ 0 ] ) - ( vecA[ 0 ] * vecB[ 2 ] );
+    outVector[ 2 ] = ( vecA[ 0 ] * vecB[ 1 ] ) - ( vecA[ 1 ] * vecB[ 0 ] );
+    outVector[ 3 ] = 0.0f;
+
+    errorTolerances[ 0 ] = fmax( fabs( vecA[ 1 ] ), fmax( fabs( vecB[ 2 ] ), fmax( fabs( vecA[ 2 ] ), fabs( vecB[ 1 ] ) ) ) );
+    errorTolerances[ 1 ] = fmax( fabs( vecA[ 2 ] ), fmax( fabs( vecB[ 0 ] ), fmax( fabs( vecA[ 0 ] ), fabs( vecB[ 2 ] ) ) ) );
+    errorTolerances[ 2 ] = fmax( fabs( vecA[ 0 ] ), fmax( fabs( vecB[ 1 ] ), fmax( fabs( vecA[ 1 ] ), fabs( vecB[ 0 ] ) ) ) );
+
+    errorTolerances[ 0 ] = errorTolerances[ 0 ] * errorTolerances[ 0 ] * ( ulpTolerance * FLT_EPSILON );    // This gives us max squared times ulp tolerance, i.e. the worst-case expected variance we could expect from this result
+    errorTolerances[ 1 ] = errorTolerances[ 1 ] * errorTolerances[ 1 ] * ( ulpTolerance * FLT_EPSILON );
+    errorTolerances[ 2 ] = errorTolerances[ 2 ] * errorTolerances[ 2 ] * ( ulpTolerance * FLT_EPSILON );
+}
+
+int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d)
+{
+    cl_int error;
+    cl_ulong maxAllocSize, maxGlobalMemSize;
+
+    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( maxGlobalMemSize ), &maxGlobalMemSize, NULL );
+    test_error( error, "Unable to get device config" );
+
+    log_info("Device supports:\nCL_DEVICE_MAX_MEM_ALLOC_SIZE: %gMB\nCL_DEVICE_GLOBAL_MEM_SIZE: %gMB\n",
+             maxGlobalMemSize/(1024.0*1024.0), maxAllocSize/(1024.0*1024.0));
+
+    if (maxGlobalMemSize > (cl_ulong)SIZE_MAX) {
+      maxGlobalMemSize = (cl_ulong)SIZE_MAX;
+    }
+
+    unsigned int size;
+    unsigned int bufSize;
+    unsigned int adjustment;
+    int vecsize;
+
+    adjustment = 32*1024*1024; /* Try to allocate a bit less than the limits */
+    for(vecsize = 3; vecsize <= 4; ++vecsize)
+    {
+        /* Make sure we adhere to the maximum individual allocation size and global memory size limits. */
+        size = TEST_SIZE;
+        bufSize = sizeof(cl_double) * TEST_SIZE * vecsize;
+
+        while ((bufSize > (maxAllocSize - adjustment)) || (3*bufSize > (maxGlobalMemSize - adjustment))) {
+            size /= 2;
+            bufSize = sizeof(cl_double) * size * vecsize;
+        }
+
+        /* Perform the test */
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        clMemWrapper streams[3];
+        cl_double testVector[4];
+        int error, i;
+        size_t threads[1], localThreads[1];
+        BufferOwningPtr<cl_double> A(malloc(bufSize));
+        BufferOwningPtr<cl_double> B(malloc(bufSize));
+        BufferOwningPtr<cl_double> C(malloc(bufSize));
+        cl_double *inDataA = A;
+        cl_double *inDataB = B;
+        cl_double *outData = C;
+
+        /* Create kernels */
+        if( create_single_kernel_helper( context, &program, &kernel, 1, vecsize == 3 ? &crossKernelSource_doubleV3 : &crossKernelSource_double, "sample_test" ) )
+            return -1;
+
+        /* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */
+        for( i = 0; i < TEST_SIZE * vecsize; i++ )
+        {
+            inDataA[ i ] = get_random_double( -512.f, 512.f, d );
+            inDataB[ i ] = get_random_double( -512.f, 512.f, d );
+        }
+        fillWithTrickyNumbers_double( inDataA, inDataB, vecsize );
+
+        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), bufSize, inDataA, NULL);
+        if( streams[0] == NULL )
+        {
+            log_error("ERROR: Creating input array A failed!\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), bufSize, inDataB, NULL);
+        if( streams[1] == NULL )
+        {
+            log_error("ERROR: Creating input array B failed!\n");
+            return -1;
+        }
+        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), bufSize, NULL, NULL);
+        if( streams[2] == NULL )
+        {
+            log_error("ERROR: Creating output array failed!\n");
+            return -1;
+        }
+
+        /* Assign streams and execute */
+        for( i = 0; i < 3; i++ )
+        {
+            error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]);
+            test_error( error, "Unable to set indexed kernel arguments" );
+        }
+
+        /* Run the kernel */
+        threads[0] = TEST_SIZE;
+
+        error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+        test_error( error, "Unable to get work group size to use" );
+
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+        test_error( error, "Unable to execute test kernel" );
+
+        /* Now get the results */
+        error = clEnqueueReadBuffer( queue, streams[2], true, 0, bufSize, outData, 0, NULL, NULL );
+        test_error( error, "Unable to read output array!" );
+
+        /* And verify! */
+        for( i = 0; i < size; i++ )
+        {
+            double errorTolerances[ 4 ];
+            // On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product
+            cross_product_double( inDataA + i * vecsize, inDataB + i * vecsize, testVector, errorTolerances, 3.f );
+
+            double errs[] = {   fabs( testVector[ 0 ] - outData[ i * vecsize + 0 ] ),
+                fabs( testVector[ 1 ] - outData[ i * vecsize + 1 ] ),
+                fabs( testVector[ 2 ] - outData[ i * vecsize + 2 ] ) };
+
+            if( errs[ 0 ] > errorTolerances[ 0 ] || errs[ 1 ] > errorTolerances[ 1 ] || errs[ 2 ] > errorTolerances[ 2 ] )
+            {
+                log_error( "ERROR: Data sample %d does not validate! Expected (%a,%a,%a,%a), got (%a,%a,%a,%a)\n",
+                          i, testVector[0], testVector[1], testVector[2], testVector[3],
+                          outData[i*vecsize], outData[i*vecsize+1], outData[i*vecsize+2], outData[i*vecsize+3] );
+                log_error( "    Input: (%a %a %a) and (%a %a %a)\n",
+                          inDataA[ i * vecsize + 0 ], inDataA[ i * vecsize + 1 ], inDataA[ i * vecsize + 2 ],
+                          inDataB[ i * vecsize + 0 ], inDataB[ i * vecsize + 1 ], inDataB[ i * vecsize + 2 ] );
+                log_error( "    Errors: (%a out of %a), (%a out of %a), (%a out of %a)\n",
+                          errs[ 0 ], errorTolerances[ 0 ], errs[ 1 ], errorTolerances[ 1 ], errs[ 2 ], errorTolerances[ 2 ] );
+                log_error("     ulp %g\n", Ulp_Error_Double( outData[ i * vecsize + 1 ], testVector[ 1 ] ) );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+double getMaxValue_double( double vecA[], double vecB[], size_t vecSize )
+{
+    double a = fmax( fabs( vecA[ 0 ] ), fabs( vecB[ 0 ] ) );
+    for( size_t i = 1; i < vecSize; i++ )
+        a = fmax( fabs( vecA[ i ] ), fmax( fabs( vecB[ i ] ), a ) );
+    return a;
+}
+
+typedef double (*twoToFloatVerifyFn_double)( double *srcA, double *srcB, size_t vecSize );
+
+int test_twoToFloat_kernel_double(cl_command_queue queue, cl_context context, const char *fnName,
+                                  size_t vecSize, twoToFloatVerifyFn_double verifyFn, double ulpLimit, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[3];
+    int error;
+    size_t i, threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeNames[][4] = { "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    BufferOwningPtr<cl_double> A(malloc(sizeof(cl_double) * TEST_SIZE * vecSize));
+    BufferOwningPtr<cl_double> B(malloc(sizeof(cl_double) * TEST_SIZE * vecSize));
+    BufferOwningPtr<cl_double> C(malloc(sizeof(cl_double) * TEST_SIZE));
+
+    cl_double *inDataA = A;
+    cl_double *inDataB = B;
+    cl_double *outData = C;
+
+    /* Create the source */
+    sprintf( kernelSource, vecSize == 3 ? twoToFloatKernelPattern_doubleV3 : twoToFloatKernelPattern_double, sizeNames[vecSize-1], sizeNames[vecSize-1], fnName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+        return -1;
+
+    /* Generate some streams */
+    for( i = 0; i < TEST_SIZE * vecSize; i++ )
+    {
+        inDataA[ i ] = any_double(d);
+        inDataB[ i ] = any_double(d);
+    }
+    fillWithTrickyNumbers_double( inDataA, inDataB, vecSize );
+
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataB, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating input array B failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * TEST_SIZE, NULL, NULL);
+    if( streams[2] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    for( i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg(kernel, (int)i, sizeof( streams[i] ), &streams[i]);
+        test_error( error, "Unable to set indexed kernel arguments" );
+    }
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( cl_double ) * TEST_SIZE, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        double expected = verifyFn( inDataA + i * vecSize, inDataB + i * vecSize, vecSize );
+        if( (double) expected != outData[ i ] )
+        {
+            if( isnan(expected) && isnan( outData[i] ) )
+                continue;
+
+            if( ulpLimit < 0 )
+            {
+                // Limit below zero means we need to test via a computed error (like cross product does)
+                double maxValue =
+                getMaxValue_double( inDataA + i * vecSize, inDataB + i * vecSize, vecSize );
+
+                // In this case (dot is the only one that gets here), the ulp is 2*vecSize - 1 (n + n-1 max # of errors)
+                double errorTolerance = maxValue * maxValue * ( 2.f * (double)vecSize - 1.f ) * FLT_EPSILON;
+
+                // Limit below zero means test via epsilon instead
+                double error = fabs( (double)expected - (double)outData[ i ] );
+                if( error > errorTolerance )
+                {
+
+                    log_error( "ERROR: Data sample %d at size %d does not validate! Expected (%a), got (%a), sources (%a and %a) error of %g against tolerance %g\n",
+                              (int)i, (int)vecSize, expected,
+                              outData[ i ],
+                              inDataA[i*vecSize],
+                              inDataB[i*vecSize],
+                              (double)error,
+                              (double)errorTolerance );
+
+                    char vecA[1000], vecB[1000];
+                    vector2string_double( vecA, inDataA + i * vecSize, vecSize );
+                    vector2string_double( vecB, inDataB + i * vecSize, vecSize );
+                    log_error( "\tvector A: %s\n\tvector B: %s\n", vecA, vecB );
+                    return -1;
+                }
+            }
+            else
+            {
+                double error = Ulp_Error_Double( outData[ i ],
+                                                expected );
+                if( fabs(error) > ulpLimit )
+                {
+                    log_error( "ERROR: Data sample %d at size %d does not validate! Expected (%a), got (%a), sources (%a and %a) ulp of %f\n",
+                              (int)i, (int)vecSize, expected,
+                              outData[ i ],
+                              inDataA[i*vecSize],
+                              inDataB[i*vecSize],
+                              error );
+
+                    char vecA[1000], vecB[1000];
+                    vector2string_double( vecA, inDataA + i * vecSize, vecSize );
+                    vector2string_double( vecB, inDataB + i * vecSize, vecSize );
+                    log_error( "\tvector A: %s\n\tvector B: %s\n", vecA, vecB );
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+double verifyDot_double( double *srcA, double *srcB, size_t vecSize )
+{
+    double total = 0.f;
+
+    for( unsigned int i = 0; i < vecSize; i++ )
+        total += (double)srcA[ i ] * (double)srcB[ i ];
+
+    return total;
+}
+
+int test_geom_dot_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        if( test_twoToFloat_kernel_double( queue, context, "dot", sizes[ size ], verifyDot_double, -1.0f /*magic value*/, d ) != 0 )
+        {
+            log_error( "   dot double vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+    }
+    return retVal;
+}
+
+
+int test_geom_fast_distance_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+
+    abort();    //there is no double precision fast_distance
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        double maxUlps = 8192.0f +                           // error in sqrt
+        0.5f *                              // effect on e of taking sqrt( x + e )
+        ( 1.5f * (double) sizes[size] +      // cumulative error for multiplications  (a-b+0.5ulp)**2 = (a-b)**2 + a*0.5ulp + b*0.5 ulp + 0.5 ulp for multiplication
+         0.5f * (double) (sizes[size]-1));    // cumulative error for additions
+
+        if( test_twoToFloat_kernel_double( queue, context, "fast_distance", sizes[ size ], verifyDistance_double, maxUlps, d ) != 0 )
+        {
+            log_error( "   fast_distance double vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   fast_distance double vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+
+double verifyDistance_double( double *srcA, double *srcB, size_t vecSize )
+{
+    unsigned int i;
+    double diff[4];
+
+    for( i = 0; i < vecSize; i++ )
+        diff[i] = srcA[i] - srcB[i];
+
+    return verifyLength_double( diff, vecSize );
+}
+
+int test_geom_distance_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        double maxUlps = 3.0f +                              // error in sqrt
+        0.5f *                              // effect on e of taking sqrt( x + e )
+        ( 1.5f * (double) sizes[size] +      // cumulative error for multiplications  (a-b+0.5ulp)**2 = (a-b)**2 + a*0.5ulp + b*0.5 ulp + 0.5 ulp for multiplication
+         0.5f * (double) (sizes[size]-1));    // cumulative error for additions
+
+        maxUlps *= 2.0;         // our reference code may be in error too
+
+        if( test_twoToFloat_kernel_double( queue, context, "distance", sizes[ size ], verifyDistance_double, maxUlps, d ) != 0 )
+        {
+            log_error( "   distance double vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   distance double vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+typedef double (*oneToFloatVerifyFn_double)( double *srcA, size_t vecSize );
+
+int test_oneToFloat_kernel_double(cl_command_queue queue, cl_context context, const char *fnName,
+                                  size_t vecSize, oneToFloatVerifyFn_double verifyFn, double ulpLimit, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+    BufferOwningPtr<cl_double> A(malloc(sizeof(cl_double) * TEST_SIZE * vecSize));
+    BufferOwningPtr<cl_double> B(malloc(sizeof(cl_double) * TEST_SIZE));
+    int error;
+    size_t i, threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeNames[][4] = { "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    cl_double *inDataA = A;
+    cl_double *outData = B;
+
+    /* Create the source */
+    sprintf( kernelSource, vecSize == 3 ? oneToFloatKernelPattern_doubleV3 : oneToFloatKernelPattern_double, sizeNames[vecSize-1], fnName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+        return -1;
+
+    /* Generate some streams */
+    for( i = 0; i < TEST_SIZE * vecSize; i++ )
+        inDataA[ i ] = any_double(d);
+
+    fillWithTrickyNumbers_double( inDataA, NULL, vecSize );
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * TEST_SIZE, NULL, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof( cl_double ) * TEST_SIZE, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        double expected = verifyFn( inDataA + i * vecSize, vecSize );
+        if( (double) expected != outData[ i ] )
+        {
+            double ulps = Ulp_Error_Double( outData[i], expected );
+            if( fabs( ulps ) <= ulpLimit )
+                continue;
+
+            // We have to special case NAN
+            if( isnan( outData[ i ] ) && isnan( expected ) )
+                continue;
+
+            if(! (fabs(ulps) < ulpLimit) )
+            {
+                log_error( "ERROR: Data sample %d at size %d does not validate! Expected (%a), got (%a), source (%a), ulp %f\n",
+                          (int)i, (int)vecSize, expected, outData[ i ], inDataA[i*vecSize], ulps );
+                char vecA[1000];
+                vector2string_double( vecA, inDataA + i * vecSize, vecSize );
+                log_error( "\tvector: %s", vecA );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+double verifyLength_double( double *srcA, size_t vecSize )
+{
+    double total = 0;
+    unsigned int i;
+
+    // We calculate the distance as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+        total += srcA[i] * srcA[i];
+
+    // Deal with spurious overflow
+    if( total == INFINITY )
+    {
+        total = 0.0;
+        for( i = 0; i < vecSize; i++ )
+        {
+            double f = srcA[i] * MAKE_HEX_DOUBLE(0x1.0p-600, 0x1LL, -600);
+            total += f * f;
+        }
+
+        return sqrt( total ) * MAKE_HEX_DOUBLE(0x1.0p600, 0x1LL, 600);
+    }
+
+    // Deal with spurious underflow
+    if( total < 4 /*max vector length*/ * DBL_MIN / DBL_EPSILON )
+    {
+        total = 0.0;
+        for( i = 0; i < vecSize; i++ )
+        {
+            double f = srcA[i] * MAKE_HEX_DOUBLE(0x1.0p700, 0x1LL, 700);
+            total += f * f;
+        }
+
+        return sqrt( total ) * MAKE_HEX_DOUBLE(0x1.0p-700, 0x1LL, -700);
+    }
+
+    return sqrt( total );
+}
+
+int test_geom_length_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        double maxUlps = 3.0f +                              // error in sqrt
+        0.5f *                              // effect on e of taking sqrt( x + e )
+        ( 0.5f * (double) sizes[size] +      // cumulative error for multiplications
+         0.5f * (double) (sizes[size]-1));    // cumulative error for additions
+
+        maxUlps *= 2.0;         // our reference code may be in error too
+        if( test_oneToFloat_kernel_double( queue, context, "length", sizes[ size ], verifyLength_double, maxUlps, d ) != 0 )
+        {
+            log_error( "   length double vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   length double vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+
+double verifyFastLength_double( double *srcA, size_t vecSize )
+{
+    double total = 0;
+    unsigned int i;
+
+    // We calculate the distance as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+    {
+        total += (double)srcA[i] * (double)srcA[i];
+    }
+
+    return sqrt( total );
+}
+
+int test_geom_fast_length_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+
+    abort();    //there is no double precision fast_length
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        double maxUlps = 8192.0f +                           // error in half_sqrt
+        0.5f *                              // effect on e of taking sqrt( x + e )
+        ( 0.5f * (double) sizes[size] +      // cumulative error for multiplications
+         0.5f * (double) (sizes[size]-1));    // cumulative error for additions
+
+        if( test_oneToFloat_kernel_double( queue, context, "fast_length", sizes[ size ], verifyFastLength_double, maxUlps, d ) != 0 )
+        {
+            log_error( "   fast_length double vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   fast_length double vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+
+typedef void (*oneToOneVerifyFn_double)( double *srcA, double *dstA, size_t vecSize );
+
+int test_oneToOne_kernel_double(cl_command_queue queue, cl_context context, const char *fnName,
+                                size_t vecSize, oneToOneVerifyFn_double verifyFn, double ulpLimit, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+    BufferOwningPtr<cl_double> A(malloc(sizeof(cl_double) * TEST_SIZE * vecSize));
+    BufferOwningPtr<cl_double> B(malloc(sizeof(cl_double) * TEST_SIZE * vecSize));
+    int error;
+    size_t i, j, threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeNames[][4] = { "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    cl_double *inDataA = A;
+    cl_double *outData = B;
+
+    /* Create the source */
+    sprintf( kernelSource, vecSize == 3 ? oneToOneKernelPattern_doubleV3 : oneToOneKernelPattern_double, sizeNames[vecSize-1], sizeNames[vecSize-1], fnName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+        return -1;
+
+    /* initialize data */
+    memset( inDataA, 0, vecSize * sizeof( cl_double ) );
+    for( i = vecSize; i < TEST_SIZE * vecSize; i++ )
+        inDataA[ i ] = any_double(d);
+
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_double) * vecSize * TEST_SIZE, inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * vecSize * TEST_SIZE, NULL, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof( cl_double ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        double expected[4];
+        verifyFn( inDataA + i * vecSize, expected, vecSize );
+        for( j = 0; j < vecSize; j++ )
+        {
+            // We have to special case NAN
+            if( isnan( outData[ i * vecSize + j ] ) && isnan( expected[ j ] ) )
+                continue;
+
+            if( expected[j] != outData[ i *vecSize+j ] )
+            {
+                double error =
+                Ulp_Error_Double( outData[i*vecSize + j ], expected[ j ] );
+                if( fabs(error) > ulpLimit )
+                {
+                    log_error( "ERROR: Data sample {%d,%d} at size %d does not validate! Expected %12.24f (%a), got %12.24f (%a), ulp %f\n",
+                              (int)i, (int)j, (int)vecSize,
+                              expected[j], expected[j],
+                              outData[i*vecSize +j],
+                              outData[i*vecSize +j], error );
+                    log_error( "       Source: " );
+                    for( size_t q = 0; q < vecSize; q++ )
+                        log_error( "%g ", inDataA[ i * vecSize + q ] );
+                    log_error( "\n             : " );
+                    for( size_t q = 0; q < vecSize; q++ )
+                        log_error( "%a ", inDataA[ i * vecSize + q ] );
+                    log_error( "\n" );
+                    log_error( "       Result: " );
+                    for( size_t q = 0; q < vecSize; q++ )
+                        log_error( "%g ", outData[i * vecSize + q ] );
+                    log_error( "\n             : " );
+                    for( size_t q = 0; q < vecSize; q++ )
+                        log_error( "%a ", outData[i * vecSize + q ] );
+                    log_error( "\n" );
+                    log_error( "       Expected: " );
+                    for( size_t q = 0; q < vecSize; q++ )
+                        log_error( "%g ", expected[ q ] );
+                    log_error( "\n             : " );
+                    for( size_t q = 0; q < vecSize; q++ )
+                        log_error( "%a ", expected[ q ] );
+                    log_error( "\n" );
+                    return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+void verifyNormalize_double( double *srcA, double *dst, size_t vecSize )
+{
+    double total = 0, value;
+    unsigned int i;
+
+    // We calculate everything as a double, to try and make up for the fact that
+    // the GPU has better precision distance since it's a single op
+    for( i = 0; i < vecSize; i++ )
+        total += (double)srcA[i] * (double)srcA[i];
+
+    if( total < vecSize * DBL_MIN / DBL_EPSILON )
+    { //we may have incurred denormalization loss -- rescale
+        total = 0;
+        for( i = 0; i < vecSize; i++ )
+        {
+            dst[i] = srcA[i] * MAKE_HEX_DOUBLE(0x1.0p700, 0x1LL, 700);  //exact
+            total += dst[i] * dst[i];
+        }
+
+        //If still zero
+        if( total == 0.0 )
+        {
+            // Special edge case: copy vector over without change
+            for( i = 0; i < vecSize; i++ )
+                dst[i] = srcA[i];
+            return;
+        }
+
+        srcA = dst;
+    }
+    else if( total == INFINITY )
+    { //we may have incurred spurious overflow
+        double scale = MAKE_HEX_DOUBLE(0x1.0p-512, 0x1LL, -512) / vecSize;
+        total = 0;
+        for( i = 0; i < vecSize; i++ )
+        {
+            dst[i] = srcA[i] * scale;  //exact
+            total += dst[i] * dst[i];
+        }
+
+        // If there are infinities here, handle those
+        if( total == INFINITY )
+        {
+            total = 0;
+            for( i = 0; i < vecSize; i++ )
+                {
+                if( isinf(dst[i]) )
+                {
+                    dst[i] = copysign( 1.0, srcA[i] );
+                    total += 1.0;
+                }
+                else
+                    dst[i] = copysign( 0.0, srcA[i] );
+        }
+        }
+
+        srcA = dst;
+    }
+
+    value = sqrt( total );
+
+    for( i = 0; i < vecSize; i++ )
+        dst[i] = srcA[i] / value;
+}
+
+int test_geom_normalize_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, MTdata d)
+{
+    size_t sizes[] = { 1, 2, 3, 4, 0 };
+    unsigned int size;
+    int retVal = 0;
+
+    for( size = 0; sizes[ size ] != 0 ; size++ )
+    {
+        double maxUlps = 2.5f +                              // error in rsqrt + error in multiply
+        0.5f *                               // effect on e of taking sqrt( x + e )
+        ( 0.5f * (double) sizes[size] +      // cumulative error for multiplications
+         0.5f * (double) (sizes[size]-1));    // cumulative error for additions
+
+        maxUlps *= 2.0; //our reference code is not infinitely precise and may have error of its own
+        if( test_oneToOne_kernel_double( queue, context, "normalize", sizes[ size ], verifyNormalize_double, maxUlps, d ) != 0 )
+        {
+            log_error( "   normalize double vector size %d FAILED\n", (int)sizes[ size ] );
+            retVal = -1;
+        }
+        else
+        {
+            log_info( "   normalize double vector size %d passed\n", (int)sizes[ size ] );
+        }
+    }
+    return retVal;
+}
+
+
+
+
+
diff --git a/test_conformance/gl/CMakeLists.txt b/test_conformance/gl/CMakeLists.txt
new file mode 100644
index 00000000..3ebdf6c7
--- /dev/null
+++ b/test_conformance/gl/CMakeLists.txt
@@ -0,0 +1,63 @@
+if(WIN32)
+list(APPEND CLConform_LIBRARIES  glut32 opengl32 glu32 )
+else(WIN32)
+list(APPEND CLConform_LIBRARIES  GL glut GLEW GLU)
+endif(WIN32)
+
+set (GL_SOURCES
+    main.cpp
+    test_buffers.cpp
+    test_images_2D.cpp
+    test_images_3D.cpp
+    test_renderbuffer.cpp
+    test_images_2D_info.cpp
+    test_images_3D_info.cpp
+    test_renderbuffer_info.cpp
+    test_fence_sync.cpp
+    helpers.cpp
+    ../../test_common/gl/helpers.cpp
+    ../../test_common/harness/genericThread.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+    )
+
+if (WIN32)
+    list (APPEND GL_SOURCES ../../test_common/gl/setup_win32.cpp)
+else(WIN32)
+    list (APPEND GL_SOURCES ../../test_common/gl/setup_x11.cpp)
+endif(WIN32)
+
+# Compiling GLEW along with the project if the compiler is MINGW.
+# The mingw linker was giving weird errors while linking to glew32.lib generated by
+# MSVC.
+if(MINGW)
+    list (APPEND GL_SOURCES GLEW/glew.c)
+    set_source_files_properties(
+        ${GL_SOURCES}
+        COMPILE_FLAGS -DGLEW_STATIC)
+    include_directories("./GLEW/")
+endif(MINGW)
+
+if (MSVC)
+    if(CMAKE_CL_64)
+    list(APPEND CLConform_LIBRARIES glew64)
+    else(CMAKE_CL_64)
+    list(APPEND CLConform_LIBRARIES glew32)
+    endif(CMAKE_CL_64)
+endif(MSVC)
+
+add_executable(conformance_test_gl
+        ${GL_SOURCES} ${GLUT_SOURCES})
+
+set_source_files_properties(
+        ${GL_SOURCES}
+        PROPERTIES LANGUAGE CXX)
+
+TARGET_LINK_LIBRARIES(conformance_test_gl
+        ${CLConform_LIBRARIES})
diff --git a/test_conformance/gl/GLEW/GL/glew.h b/test_conformance/gl/GLEW/GL/glew.h
new file mode 100644
index 00000000..13723b4e
--- /dev/null
+++ b/test_conformance/gl/GLEW/GL/glew.h
@@ -0,0 +1,12262 @@
+/*
+** The OpenGL Extension Wrangler Library
+** Copyright (C) 2002-2008, Milan Ikits <milan ikits[]ieee org>
+** Copyright (C) 2002-2008, Marcelo E. Magallon <mmagallo[]debian org>
+** Copyright (C) 2002, Lev Povalahev
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions are met:
+**
+** * Redistributions of source code must retain the above copyright notice,
+**   this list of conditions and the following disclaimer.
+** * Redistributions in binary form must reproduce the above copyright notice,
+**   this list of conditions and the following disclaimer in the documentation
+**   and/or other materials provided with the distribution.
+** * The name of the author may be used to endorse or promote products
+**   derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+** THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.0
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+** Copyright (c) 2007 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#ifndef __glew_h__
+#define __glew_h__
+#define __GLEW_H__
+
+#if defined(__gl_h_) || defined(__GL_H__)
+#error gl.h included before glew.h
+#endif
+#if defined(__glext_h_) || defined(__GLEXT_H_)
+#error glext.h included before glew.h
+#endif
+#if defined(__gl_ATI_h_)
+#error glATI.h included before glew.h
+#endif
+
+#define __gl_h_
+#define __GL_H__
+#define __glext_h_
+#define __GLEXT_H_
+#define __gl_ATI_h_
+
+#if defined(_WIN32)
+
+/*
+ * GLEW does not include <windows.h> to avoid name space pollution.
+ * GL needs GLAPI and GLAPIENTRY, GLU needs APIENTRY, CALLBACK, and wchar_t
+ * defined properly.
+ */
+/* <windef.h> */
+#ifndef APIENTRY
+#define GLEW_APIENTRY_DEFINED
+#  if defined(__MINGW32__)
+#    define APIENTRY __stdcall
+#  elif (_MSC_VER >= 800) || defined(_STDCALL_SUPPORTED) || defined(__BORLANDC__)
+#    define APIENTRY __stdcall
+#  else
+#    define APIENTRY
+#  endif
+#endif
+#ifndef GLAPI
+#  if defined(__MINGW32__)
+#    define GLAPI extern
+#  endif
+#endif
+/* <winnt.h> */
+#ifndef CALLBACK
+#define GLEW_CALLBACK_DEFINED
+#  if defined(__MINGW32__)
+#    define CALLBACK __attribute__ ((__stdcall__))
+#  elif (defined(_M_MRX000) || defined(_M_IX86) || defined(_M_ALPHA) || defined(_M_PPC)) && !defined(MIDL_PASS)
+#    define CALLBACK __stdcall
+#  else
+#    define CALLBACK
+#  endif
+#endif
+/* <wingdi.h> and <winnt.h> */
+#ifndef WINGDIAPI
+#define GLEW_WINGDIAPI_DEFINED
+#define WINGDIAPI __declspec(dllimport)
+#endif
+/* <ctype.h> */
+#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(_WCHAR_T_DEFINED)
+typedef unsigned short wchar_t;
+#  define _WCHAR_T_DEFINED
+#endif
+/* <stddef.h> */
+#if !defined(_W64)
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && defined(_MSC_VER) && _MSC_VER >= 1300
+#    define _W64 __w64
+#  else
+#    define _W64
+#  endif
+#endif
+#if !defined(_PTRDIFF_T_DEFINED) && !defined(_PTRDIFF_T_)
+#  ifdef _WIN64
+typedef __int64 ptrdiff_t;
+#  else
+typedef _W64 int ptrdiff_t;
+#  endif
+#  define _PTRDIFF_T_DEFINED
+#  define _PTRDIFF_T_
+#endif
+
+#ifndef GLAPI
+#  if defined(__MINGW32__)
+#    define GLAPI extern
+#  else
+#    define GLAPI WINGDIAPI
+#  endif
+#endif
+
+#ifndef GLAPIENTRY
+#define GLAPIENTRY APIENTRY
+#endif
+
+/*
+ * GLEW_STATIC needs to be set when using the static version.
+ * GLEW_BUILD is set when building the DLL version.
+ */
+#ifdef GLEW_STATIC
+#  define GLEWAPI extern
+#else
+#  ifdef GLEW_BUILD
+#    define GLEWAPI extern __declspec(dllexport)
+#  else
+#    define GLEWAPI extern __declspec(dllimport)
+#  endif
+#endif
+
+#else /* _UNIX */
+
+/*
+ * Needed for ptrdiff_t in turn needed by VBO.  This is defined by ISO
+ * C.  On my system, this amounts to _3 lines_ of included code, all of
+ * them pretty much harmless.  If you know of a way of detecting 32 vs
+ * 64 _targets_ at compile time you are free to replace this with
+ * something that's portable.  For now, _this_ is the portable solution.
+ * (mem, 2004-01-04)
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define GLEW_APIENTRY_DEFINED
+#define APIENTRY
+#define GLEWAPI extern
+
+/* <glu.h> */
+#ifndef GLAPI
+#define GLAPI extern
+#endif
+#ifndef GLAPIENTRY
+#define GLAPIENTRY
+#endif
+
+#endif /* _WIN32 */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ----------------------------- GL_VERSION_1_1 ---------------------------- */
+
+#ifndef GL_VERSION_1_1
+#define GL_VERSION_1_1 1
+
+typedef unsigned int GLenum;
+typedef unsigned int GLbitfield;
+typedef unsigned int GLuint;
+typedef int GLint;
+typedef int GLsizei;
+typedef unsigned char GLboolean;
+typedef signed char GLbyte;
+typedef short GLshort;
+typedef unsigned char GLubyte;
+typedef unsigned short GLushort;
+typedef unsigned long GLulong;
+typedef float GLfloat;
+typedef float GLclampf;
+typedef double GLdouble;
+typedef double GLclampd;
+typedef void GLvoid;
+#if defined(_MSC_VER)
+#  if _MSC_VER < 1400
+typedef __int64 GLint64EXT;
+typedef unsigned __int64 GLuint64EXT;
+#  else
+typedef signed long long GLint64EXT;
+typedef unsigned long long GLuint64EXT;
+#  endif
+#else
+#  if defined(__MINGW32__)
+#include <inttypes.h>
+#  endif
+typedef int64_t GLint64EXT;
+typedef uint64_t GLuint64EXT;
+#endif
+
+#define GL_ACCUM 0x0100
+#define GL_LOAD 0x0101
+#define GL_RETURN 0x0102
+#define GL_MULT 0x0103
+#define GL_ADD 0x0104
+#define GL_NEVER 0x0200
+#define GL_LESS 0x0201
+#define GL_EQUAL 0x0202
+#define GL_LEQUAL 0x0203
+#define GL_GREATER 0x0204
+#define GL_NOTEQUAL 0x0205
+#define GL_GEQUAL 0x0206
+#define GL_ALWAYS 0x0207
+#define GL_CURRENT_BIT 0x00000001
+#define GL_POINT_BIT 0x00000002
+#define GL_LINE_BIT 0x00000004
+#define GL_POLYGON_BIT 0x00000008
+#define GL_POLYGON_STIPPLE_BIT 0x00000010
+#define GL_PIXEL_MODE_BIT 0x00000020
+#define GL_LIGHTING_BIT 0x00000040
+#define GL_FOG_BIT 0x00000080
+#define GL_DEPTH_BUFFER_BIT 0x00000100
+#define GL_ACCUM_BUFFER_BIT 0x00000200
+#define GL_STENCIL_BUFFER_BIT 0x00000400
+#define GL_VIEWPORT_BIT 0x00000800
+#define GL_TRANSFORM_BIT 0x00001000
+#define GL_ENABLE_BIT 0x00002000
+#define GL_COLOR_BUFFER_BIT 0x00004000
+#define GL_HINT_BIT 0x00008000
+#define GL_EVAL_BIT 0x00010000
+#define GL_LIST_BIT 0x00020000
+#define GL_TEXTURE_BIT 0x00040000
+#define GL_SCISSOR_BIT 0x00080000
+#define GL_ALL_ATTRIB_BITS 0x000fffff
+#define GL_POINTS 0x0000
+#define GL_LINES 0x0001
+#define GL_LINE_LOOP 0x0002
+#define GL_LINE_STRIP 0x0003
+#define GL_TRIANGLES 0x0004
+#define GL_TRIANGLE_STRIP 0x0005
+#define GL_TRIANGLE_FAN 0x0006
+#define GL_QUADS 0x0007
+#define GL_QUAD_STRIP 0x0008
+#define GL_POLYGON 0x0009
+#define GL_ZERO 0
+#define GL_ONE 1
+#define GL_SRC_COLOR 0x0300
+#define GL_ONE_MINUS_SRC_COLOR 0x0301
+#define GL_SRC_ALPHA 0x0302
+#define GL_ONE_MINUS_SRC_ALPHA 0x0303
+#define GL_DST_ALPHA 0x0304
+#define GL_ONE_MINUS_DST_ALPHA 0x0305
+#define GL_DST_COLOR 0x0306
+#define GL_ONE_MINUS_DST_COLOR 0x0307
+#define GL_SRC_ALPHA_SATURATE 0x0308
+#define GL_TRUE 1
+#define GL_FALSE 0
+#define GL_CLIP_PLANE0 0x3000
+#define GL_CLIP_PLANE1 0x3001
+#define GL_CLIP_PLANE2 0x3002
+#define GL_CLIP_PLANE3 0x3003
+#define GL_CLIP_PLANE4 0x3004
+#define GL_CLIP_PLANE5 0x3005
+#define GL_BYTE 0x1400
+#define GL_UNSIGNED_BYTE 0x1401
+#define GL_SHORT 0x1402
+#define GL_UNSIGNED_SHORT 0x1403
+#define GL_INT 0x1404
+#define GL_UNSIGNED_INT 0x1405
+#define GL_FLOAT 0x1406
+#define GL_2_BYTES 0x1407
+#define GL_3_BYTES 0x1408
+#define GL_4_BYTES 0x1409
+#define GL_DOUBLE 0x140A
+#define GL_NONE 0
+#define GL_FRONT_LEFT 0x0400
+#define GL_FRONT_RIGHT 0x0401
+#define GL_BACK_LEFT 0x0402
+#define GL_BACK_RIGHT 0x0403
+#define GL_FRONT 0x0404
+#define GL_BACK 0x0405
+#define GL_LEFT 0x0406
+#define GL_RIGHT 0x0407
+#define GL_FRONT_AND_BACK 0x0408
+#define GL_AUX0 0x0409
+#define GL_AUX1 0x040A
+#define GL_AUX2 0x040B
+#define GL_AUX3 0x040C
+#define GL_NO_ERROR 0
+#define GL_INVALID_ENUM 0x0500
+#define GL_INVALID_VALUE 0x0501
+#define GL_INVALID_OPERATION 0x0502
+#define GL_STACK_OVERFLOW 0x0503
+#define GL_STACK_UNDERFLOW 0x0504
+#define GL_OUT_OF_MEMORY 0x0505
+#define GL_2D 0x0600
+#define GL_3D 0x0601
+#define GL_3D_COLOR 0x0602
+#define GL_3D_COLOR_TEXTURE 0x0603
+#define GL_4D_COLOR_TEXTURE 0x0604
+#define GL_PASS_THROUGH_TOKEN 0x0700
+#define GL_POINT_TOKEN 0x0701
+#define GL_LINE_TOKEN 0x0702
+#define GL_POLYGON_TOKEN 0x0703
+#define GL_BITMAP_TOKEN 0x0704
+#define GL_DRAW_PIXEL_TOKEN 0x0705
+#define GL_COPY_PIXEL_TOKEN 0x0706
+#define GL_LINE_RESET_TOKEN 0x0707
+#define GL_EXP 0x0800
+#define GL_EXP2 0x0801
+#define GL_CW 0x0900
+#define GL_CCW 0x0901
+#define GL_COEFF 0x0A00
+#define GL_ORDER 0x0A01
+#define GL_DOMAIN 0x0A02
+#define GL_CURRENT_COLOR 0x0B00
+#define GL_CURRENT_INDEX 0x0B01
+#define GL_CURRENT_NORMAL 0x0B02
+#define GL_CURRENT_TEXTURE_COORDS 0x0B03
+#define GL_CURRENT_RASTER_COLOR 0x0B04
+#define GL_CURRENT_RASTER_INDEX 0x0B05
+#define GL_CURRENT_RASTER_TEXTURE_COORDS 0x0B06
+#define GL_CURRENT_RASTER_POSITION 0x0B07
+#define GL_CURRENT_RASTER_POSITION_VALID 0x0B08
+#define GL_CURRENT_RASTER_DISTANCE 0x0B09
+#define GL_POINT_SMOOTH 0x0B10
+#define GL_POINT_SIZE 0x0B11
+#define GL_POINT_SIZE_RANGE 0x0B12
+#define GL_POINT_SIZE_GRANULARITY 0x0B13
+#define GL_LINE_SMOOTH 0x0B20
+#define GL_LINE_WIDTH 0x0B21
+#define GL_LINE_WIDTH_RANGE 0x0B22
+#define GL_LINE_WIDTH_GRANULARITY 0x0B23
+#define GL_LINE_STIPPLE 0x0B24
+#define GL_LINE_STIPPLE_PATTERN 0x0B25
+#define GL_LINE_STIPPLE_REPEAT 0x0B26
+#define GL_LIST_MODE 0x0B30
+#define GL_MAX_LIST_NESTING 0x0B31
+#define GL_LIST_BASE 0x0B32
+#define GL_LIST_INDEX 0x0B33
+#define GL_POLYGON_MODE 0x0B40
+#define GL_POLYGON_SMOOTH 0x0B41
+#define GL_POLYGON_STIPPLE 0x0B42
+#define GL_EDGE_FLAG 0x0B43
+#define GL_CULL_FACE 0x0B44
+#define GL_CULL_FACE_MODE 0x0B45
+#define GL_FRONT_FACE 0x0B46
+#define GL_LIGHTING 0x0B50
+#define GL_LIGHT_MODEL_LOCAL_VIEWER 0x0B51
+#define GL_LIGHT_MODEL_TWO_SIDE 0x0B52
+#define GL_LIGHT_MODEL_AMBIENT 0x0B53
+#define GL_SHADE_MODEL 0x0B54
+#define GL_COLOR_MATERIAL_FACE 0x0B55
+#define GL_COLOR_MATERIAL_PARAMETER 0x0B56
+#define GL_COLOR_MATERIAL 0x0B57
+#define GL_FOG 0x0B60
+#define GL_FOG_INDEX 0x0B61
+#define GL_FOG_DENSITY 0x0B62
+#define GL_FOG_START 0x0B63
+#define GL_FOG_END 0x0B64
+#define GL_FOG_MODE 0x0B65
+#define GL_FOG_COLOR 0x0B66
+#define GL_DEPTH_RANGE 0x0B70
+#define GL_DEPTH_TEST 0x0B71
+#define GL_DEPTH_WRITEMASK 0x0B72
+#define GL_DEPTH_CLEAR_VALUE 0x0B73
+#define GL_DEPTH_FUNC 0x0B74
+#define GL_ACCUM_CLEAR_VALUE 0x0B80
+#define GL_STENCIL_TEST 0x0B90
+#define GL_STENCIL_CLEAR_VALUE 0x0B91
+#define GL_STENCIL_FUNC 0x0B92
+#define GL_STENCIL_VALUE_MASK 0x0B93
+#define GL_STENCIL_FAIL 0x0B94
+#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95
+#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96
+#define GL_STENCIL_REF 0x0B97
+#define GL_STENCIL_WRITEMASK 0x0B98
+#define GL_MATRIX_MODE 0x0BA0
+#define GL_NORMALIZE 0x0BA1
+#define GL_VIEWPORT 0x0BA2
+#define GL_MODELVIEW_STACK_DEPTH 0x0BA3
+#define GL_PROJECTION_STACK_DEPTH 0x0BA4
+#define GL_TEXTURE_STACK_DEPTH 0x0BA5
+#define GL_MODELVIEW_MATRIX 0x0BA6
+#define GL_PROJECTION_MATRIX 0x0BA7
+#define GL_TEXTURE_MATRIX 0x0BA8
+#define GL_ATTRIB_STACK_DEPTH 0x0BB0
+#define GL_CLIENT_ATTRIB_STACK_DEPTH 0x0BB1
+#define GL_ALPHA_TEST 0x0BC0
+#define GL_ALPHA_TEST_FUNC 0x0BC1
+#define GL_ALPHA_TEST_REF 0x0BC2
+#define GL_DITHER 0x0BD0
+#define GL_BLEND_DST 0x0BE0
+#define GL_BLEND_SRC 0x0BE1
+#define GL_BLEND 0x0BE2
+#define GL_LOGIC_OP_MODE 0x0BF0
+#define GL_INDEX_LOGIC_OP 0x0BF1
+#define GL_COLOR_LOGIC_OP 0x0BF2
+#define GL_AUX_BUFFERS 0x0C00
+#define GL_DRAW_BUFFER 0x0C01
+#define GL_READ_BUFFER 0x0C02
+#define GL_SCISSOR_BOX 0x0C10
+#define GL_SCISSOR_TEST 0x0C11
+#define GL_INDEX_CLEAR_VALUE 0x0C20
+#define GL_INDEX_WRITEMASK 0x0C21
+#define GL_COLOR_CLEAR_VALUE 0x0C22
+#define GL_COLOR_WRITEMASK 0x0C23
+#define GL_INDEX_MODE 0x0C30
+#define GL_RGBA_MODE 0x0C31
+#define GL_DOUBLEBUFFER 0x0C32
+#define GL_STEREO 0x0C33
+#define GL_RENDER_MODE 0x0C40
+#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50
+#define GL_POINT_SMOOTH_HINT 0x0C51
+#define GL_LINE_SMOOTH_HINT 0x0C52
+#define GL_POLYGON_SMOOTH_HINT 0x0C53
+#define GL_FOG_HINT 0x0C54
+#define GL_TEXTURE_GEN_S 0x0C60
+#define GL_TEXTURE_GEN_T 0x0C61
+#define GL_TEXTURE_GEN_R 0x0C62
+#define GL_TEXTURE_GEN_Q 0x0C63
+#define GL_PIXEL_MAP_I_TO_I 0x0C70
+#define GL_PIXEL_MAP_S_TO_S 0x0C71
+#define GL_PIXEL_MAP_I_TO_R 0x0C72
+#define GL_PIXEL_MAP_I_TO_G 0x0C73
+#define GL_PIXEL_MAP_I_TO_B 0x0C74
+#define GL_PIXEL_MAP_I_TO_A 0x0C75
+#define GL_PIXEL_MAP_R_TO_R 0x0C76
+#define GL_PIXEL_MAP_G_TO_G 0x0C77
+#define GL_PIXEL_MAP_B_TO_B 0x0C78
+#define GL_PIXEL_MAP_A_TO_A 0x0C79
+#define GL_PIXEL_MAP_I_TO_I_SIZE 0x0CB0
+#define GL_PIXEL_MAP_S_TO_S_SIZE 0x0CB1
+#define GL_PIXEL_MAP_I_TO_R_SIZE 0x0CB2
+#define GL_PIXEL_MAP_I_TO_G_SIZE 0x0CB3
+#define GL_PIXEL_MAP_I_TO_B_SIZE 0x0CB4
+#define GL_PIXEL_MAP_I_TO_A_SIZE 0x0CB5
+#define GL_PIXEL_MAP_R_TO_R_SIZE 0x0CB6
+#define GL_PIXEL_MAP_G_TO_G_SIZE 0x0CB7
+#define GL_PIXEL_MAP_B_TO_B_SIZE 0x0CB8
+#define GL_PIXEL_MAP_A_TO_A_SIZE 0x0CB9
+#define GL_UNPACK_SWAP_BYTES 0x0CF0
+#define GL_UNPACK_LSB_FIRST 0x0CF1
+#define GL_UNPACK_ROW_LENGTH 0x0CF2
+#define GL_UNPACK_SKIP_ROWS 0x0CF3
+#define GL_UNPACK_SKIP_PIXELS 0x0CF4
+#define GL_UNPACK_ALIGNMENT 0x0CF5
+#define GL_PACK_SWAP_BYTES 0x0D00
+#define GL_PACK_LSB_FIRST 0x0D01
+#define GL_PACK_ROW_LENGTH 0x0D02
+#define GL_PACK_SKIP_ROWS 0x0D03
+#define GL_PACK_SKIP_PIXELS 0x0D04
+#define GL_PACK_ALIGNMENT 0x0D05
+#define GL_MAP_COLOR 0x0D10
+#define GL_MAP_STENCIL 0x0D11
+#define GL_INDEX_SHIFT 0x0D12
+#define GL_INDEX_OFFSET 0x0D13
+#define GL_RED_SCALE 0x0D14
+#define GL_RED_BIAS 0x0D15
+#define GL_ZOOM_X 0x0D16
+#define GL_ZOOM_Y 0x0D17
+#define GL_GREEN_SCALE 0x0D18
+#define GL_GREEN_BIAS 0x0D19
+#define GL_BLUE_SCALE 0x0D1A
+#define GL_BLUE_BIAS 0x0D1B
+#define GL_ALPHA_SCALE 0x0D1C
+#define GL_ALPHA_BIAS 0x0D1D
+#define GL_DEPTH_SCALE 0x0D1E
+#define GL_DEPTH_BIAS 0x0D1F
+#define GL_MAX_EVAL_ORDER 0x0D30
+#define GL_MAX_LIGHTS 0x0D31
+#define GL_MAX_CLIP_PLANES 0x0D32
+#define GL_MAX_TEXTURE_SIZE 0x0D33
+#define GL_MAX_PIXEL_MAP_TABLE 0x0D34
+#define GL_MAX_ATTRIB_STACK_DEPTH 0x0D35
+#define GL_MAX_MODELVIEW_STACK_DEPTH 0x0D36
+#define GL_MAX_NAME_STACK_DEPTH 0x0D37
+#define GL_MAX_PROJECTION_STACK_DEPTH 0x0D38
+#define GL_MAX_TEXTURE_STACK_DEPTH 0x0D39
+#define GL_MAX_VIEWPORT_DIMS 0x0D3A
+#define GL_MAX_CLIENT_ATTRIB_STACK_DEPTH 0x0D3B
+#define GL_SUBPIXEL_BITS 0x0D50
+#define GL_INDEX_BITS 0x0D51
+#define GL_RED_BITS 0x0D52
+#define GL_GREEN_BITS 0x0D53
+#define GL_BLUE_BITS 0x0D54
+#define GL_ALPHA_BITS 0x0D55
+#define GL_DEPTH_BITS 0x0D56
+#define GL_STENCIL_BITS 0x0D57
+#define GL_ACCUM_RED_BITS 0x0D58
+#define GL_ACCUM_GREEN_BITS 0x0D59
+#define GL_ACCUM_BLUE_BITS 0x0D5A
+#define GL_ACCUM_ALPHA_BITS 0x0D5B
+#define GL_NAME_STACK_DEPTH 0x0D70
+#define GL_AUTO_NORMAL 0x0D80
+#define GL_MAP1_COLOR_4 0x0D90
+#define GL_MAP1_INDEX 0x0D91
+#define GL_MAP1_NORMAL 0x0D92
+#define GL_MAP1_TEXTURE_COORD_1 0x0D93
+#define GL_MAP1_TEXTURE_COORD_2 0x0D94
+#define GL_MAP1_TEXTURE_COORD_3 0x0D95
+#define GL_MAP1_TEXTURE_COORD_4 0x0D96
+#define GL_MAP1_VERTEX_3 0x0D97
+#define GL_MAP1_VERTEX_4 0x0D98
+#define GL_MAP2_COLOR_4 0x0DB0
+#define GL_MAP2_INDEX 0x0DB1
+#define GL_MAP2_NORMAL 0x0DB2
+#define GL_MAP2_TEXTURE_COORD_1 0x0DB3
+#define GL_MAP2_TEXTURE_COORD_2 0x0DB4
+#define GL_MAP2_TEXTURE_COORD_3 0x0DB5
+#define GL_MAP2_TEXTURE_COORD_4 0x0DB6
+#define GL_MAP2_VERTEX_3 0x0DB7
+#define GL_MAP2_VERTEX_4 0x0DB8
+#define GL_MAP1_GRID_DOMAIN 0x0DD0
+#define GL_MAP1_GRID_SEGMENTS 0x0DD1
+#define GL_MAP2_GRID_DOMAIN 0x0DD2
+#define GL_MAP2_GRID_SEGMENTS 0x0DD3
+#define GL_TEXTURE_1D 0x0DE0
+#define GL_TEXTURE_2D 0x0DE1
+#define GL_FEEDBACK_BUFFER_POINTER 0x0DF0
+#define GL_FEEDBACK_BUFFER_SIZE 0x0DF1
+#define GL_FEEDBACK_BUFFER_TYPE 0x0DF2
+#define GL_SELECTION_BUFFER_POINTER 0x0DF3
+#define GL_SELECTION_BUFFER_SIZE 0x0DF4
+#define GL_TEXTURE_WIDTH 0x1000
+#define GL_TEXTURE_HEIGHT 0x1001
+#define GL_TEXTURE_INTERNAL_FORMAT 0x1003
+#define GL_TEXTURE_BORDER_COLOR 0x1004
+#define GL_TEXTURE_BORDER 0x1005
+#define GL_DONT_CARE 0x1100
+#define GL_FASTEST 0x1101
+#define GL_NICEST 0x1102
+#define GL_LIGHT0 0x4000
+#define GL_LIGHT1 0x4001
+#define GL_LIGHT2 0x4002
+#define GL_LIGHT3 0x4003
+#define GL_LIGHT4 0x4004
+#define GL_LIGHT5 0x4005
+#define GL_LIGHT6 0x4006
+#define GL_LIGHT7 0x4007
+#define GL_AMBIENT 0x1200
+#define GL_DIFFUSE 0x1201
+#define GL_SPECULAR 0x1202
+#define GL_POSITION 0x1203
+#define GL_SPOT_DIRECTION 0x1204
+#define GL_SPOT_EXPONENT 0x1205
+#define GL_SPOT_CUTOFF 0x1206
+#define GL_CONSTANT_ATTENUATION 0x1207
+#define GL_LINEAR_ATTENUATION 0x1208
+#define GL_QUADRATIC_ATTENUATION 0x1209
+#define GL_COMPILE 0x1300
+#define GL_COMPILE_AND_EXECUTE 0x1301
+#define GL_CLEAR 0x1500
+#define GL_AND 0x1501
+#define GL_AND_REVERSE 0x1502
+#define GL_COPY 0x1503
+#define GL_AND_INVERTED 0x1504
+#define GL_NOOP 0x1505
+#define GL_XOR 0x1506
+#define GL_OR 0x1507
+#define GL_NOR 0x1508
+#define GL_EQUIV 0x1509
+#define GL_INVERT 0x150A
+#define GL_OR_REVERSE 0x150B
+#define GL_COPY_INVERTED 0x150C
+#define GL_OR_INVERTED 0x150D
+#define GL_NAND 0x150E
+#define GL_SET 0x150F
+#define GL_EMISSION 0x1600
+#define GL_SHININESS 0x1601
+#define GL_AMBIENT_AND_DIFFUSE 0x1602
+#define GL_COLOR_INDEXES 0x1603
+#define GL_MODELVIEW 0x1700
+#define GL_PROJECTION 0x1701
+#define GL_TEXTURE 0x1702
+#define GL_COLOR 0x1800
+#define GL_DEPTH 0x1801
+#define GL_STENCIL 0x1802
+#define GL_COLOR_INDEX 0x1900
+#define GL_STENCIL_INDEX 0x1901
+#define GL_DEPTH_COMPONENT 0x1902
+#define GL_RED 0x1903
+#define GL_GREEN 0x1904
+#define GL_BLUE 0x1905
+#define GL_ALPHA 0x1906
+#define GL_RGB 0x1907
+#define GL_RGBA 0x1908
+#define GL_LUMINANCE 0x1909
+#define GL_LUMINANCE_ALPHA 0x190A
+#define GL_BITMAP 0x1A00
+#define GL_POINT 0x1B00
+#define GL_LINE 0x1B01
+#define GL_FILL 0x1B02
+#define GL_RENDER 0x1C00
+#define GL_FEEDBACK 0x1C01
+#define GL_SELECT 0x1C02
+#define GL_FLAT 0x1D00
+#define GL_SMOOTH 0x1D01
+#define GL_KEEP 0x1E00
+#define GL_REPLACE 0x1E01
+#define GL_INCR 0x1E02
+#define GL_DECR 0x1E03
+#define GL_VENDOR 0x1F00
+#define GL_RENDERER 0x1F01
+#define GL_VERSION 0x1F02
+#define GL_EXTENSIONS 0x1F03
+#define GL_S 0x2000
+#define GL_T 0x2001
+#define GL_R 0x2002
+#define GL_Q 0x2003
+#define GL_MODULATE 0x2100
+#define GL_DECAL 0x2101
+#define GL_TEXTURE_ENV_MODE 0x2200
+#define GL_TEXTURE_ENV_COLOR 0x2201
+#define GL_TEXTURE_ENV 0x2300
+#define GL_EYE_LINEAR 0x2400
+#define GL_OBJECT_LINEAR 0x2401
+#define GL_SPHERE_MAP 0x2402
+#define GL_TEXTURE_GEN_MODE 0x2500
+#define GL_OBJECT_PLANE 0x2501
+#define GL_EYE_PLANE 0x2502
+#define GL_NEAREST 0x2600
+#define GL_LINEAR 0x2601
+#define GL_NEAREST_MIPMAP_NEAREST 0x2700
+#define GL_LINEAR_MIPMAP_NEAREST 0x2701
+#define GL_NEAREST_MIPMAP_LINEAR 0x2702
+#define GL_LINEAR_MIPMAP_LINEAR 0x2703
+#define GL_TEXTURE_MAG_FILTER 0x2800
+#define GL_TEXTURE_MIN_FILTER 0x2801
+#define GL_TEXTURE_WRAP_S 0x2802
+#define GL_TEXTURE_WRAP_T 0x2803
+#define GL_CLAMP 0x2900
+#define GL_REPEAT 0x2901
+#define GL_CLIENT_PIXEL_STORE_BIT 0x00000001
+#define GL_CLIENT_VERTEX_ARRAY_BIT 0x00000002
+#define GL_CLIENT_ALL_ATTRIB_BITS 0xffffffff
+#define GL_POLYGON_OFFSET_FACTOR 0x8038
+#define GL_POLYGON_OFFSET_UNITS 0x2A00
+#define GL_POLYGON_OFFSET_POINT 0x2A01
+#define GL_POLYGON_OFFSET_LINE 0x2A02
+#define GL_POLYGON_OFFSET_FILL 0x8037
+#define GL_ALPHA4 0x803B
+#define GL_ALPHA8 0x803C
+#define GL_ALPHA12 0x803D
+#define GL_ALPHA16 0x803E
+#define GL_LUMINANCE4 0x803F
+#define GL_LUMINANCE8 0x8040
+#define GL_LUMINANCE12 0x8041
+#define GL_LUMINANCE16 0x8042
+#define GL_LUMINANCE4_ALPHA4 0x8043
+#define GL_LUMINANCE6_ALPHA2 0x8044
+#define GL_LUMINANCE8_ALPHA8 0x8045
+#define GL_LUMINANCE12_ALPHA4 0x8046
+#define GL_LUMINANCE12_ALPHA12 0x8047
+#define GL_LUMINANCE16_ALPHA16 0x8048
+#define GL_INTENSITY 0x8049
+#define GL_INTENSITY4 0x804A
+#define GL_INTENSITY8 0x804B
+#define GL_INTENSITY12 0x804C
+#define GL_INTENSITY16 0x804D
+#define GL_R3_G3_B2 0x2A10
+#define GL_RGB4 0x804F
+#define GL_RGB5 0x8050
+#define GL_RGB8 0x8051
+#define GL_RGB10 0x8052
+#define GL_RGB12 0x8053
+#define GL_RGB16 0x8054
+#define GL_RGBA2 0x8055
+#define GL_RGBA4 0x8056
+#define GL_RGB5_A1 0x8057
+#define GL_RGBA8 0x8058
+#define GL_RGB10_A2 0x8059
+#define GL_RGBA12 0x805A
+#define GL_RGBA16 0x805B
+#define GL_TEXTURE_RED_SIZE 0x805C
+#define GL_TEXTURE_GREEN_SIZE 0x805D
+#define GL_TEXTURE_BLUE_SIZE 0x805E
+#define GL_TEXTURE_ALPHA_SIZE 0x805F
+#define GL_TEXTURE_LUMINANCE_SIZE 0x8060
+#define GL_TEXTURE_INTENSITY_SIZE 0x8061
+#define GL_PROXY_TEXTURE_1D 0x8063
+#define GL_PROXY_TEXTURE_2D 0x8064
+#define GL_TEXTURE_PRIORITY 0x8066
+#define GL_TEXTURE_RESIDENT 0x8067
+#define GL_TEXTURE_BINDING_1D 0x8068
+#define GL_TEXTURE_BINDING_2D 0x8069
+#define GL_VERTEX_ARRAY 0x8074
+#define GL_NORMAL_ARRAY 0x8075
+#define GL_COLOR_ARRAY 0x8076
+#define GL_INDEX_ARRAY 0x8077
+#define GL_TEXTURE_COORD_ARRAY 0x8078
+#define GL_EDGE_FLAG_ARRAY 0x8079
+#define GL_VERTEX_ARRAY_SIZE 0x807A
+#define GL_VERTEX_ARRAY_TYPE 0x807B
+#define GL_VERTEX_ARRAY_STRIDE 0x807C
+#define GL_NORMAL_ARRAY_TYPE 0x807E
+#define GL_NORMAL_ARRAY_STRIDE 0x807F
+#define GL_COLOR_ARRAY_SIZE 0x8081
+#define GL_COLOR_ARRAY_TYPE 0x8082
+#define GL_COLOR_ARRAY_STRIDE 0x8083
+#define GL_INDEX_ARRAY_TYPE 0x8085
+#define GL_INDEX_ARRAY_STRIDE 0x8086
+#define GL_TEXTURE_COORD_ARRAY_SIZE 0x8088
+#define GL_TEXTURE_COORD_ARRAY_TYPE 0x8089
+#define GL_TEXTURE_COORD_ARRAY_STRIDE 0x808A
+#define GL_EDGE_FLAG_ARRAY_STRIDE 0x808C
+#define GL_VERTEX_ARRAY_POINTER 0x808E
+#define GL_NORMAL_ARRAY_POINTER 0x808F
+#define GL_COLOR_ARRAY_POINTER 0x8090
+#define GL_INDEX_ARRAY_POINTER 0x8091
+#define GL_TEXTURE_COORD_ARRAY_POINTER 0x8092
+#define GL_EDGE_FLAG_ARRAY_POINTER 0x8093
+#define GL_V2F 0x2A20
+#define GL_V3F 0x2A21
+#define GL_C4UB_V2F 0x2A22
+#define GL_C4UB_V3F 0x2A23
+#define GL_C3F_V3F 0x2A24
+#define GL_N3F_V3F 0x2A25
+#define GL_C4F_N3F_V3F 0x2A26
+#define GL_T2F_V3F 0x2A27
+#define GL_T4F_V4F 0x2A28
+#define GL_T2F_C4UB_V3F 0x2A29
+#define GL_T2F_C3F_V3F 0x2A2A
+#define GL_T2F_N3F_V3F 0x2A2B
+#define GL_T2F_C4F_N3F_V3F 0x2A2C
+#define GL_T4F_C4F_N3F_V4F 0x2A2D
+#define GL_LOGIC_OP GL_INDEX_LOGIC_OP
+#define GL_TEXTURE_COMPONENTS GL_TEXTURE_INTERNAL_FORMAT
+#define GL_COLOR_INDEX1_EXT 0x80E2
+#define GL_COLOR_INDEX2_EXT 0x80E3
+#define GL_COLOR_INDEX4_EXT 0x80E4
+#define GL_COLOR_INDEX8_EXT 0x80E5
+#define GL_COLOR_INDEX12_EXT 0x80E6
+#define GL_COLOR_INDEX16_EXT 0x80E7
+
+GLAPI void GLAPIENTRY glAccum (GLenum op, GLfloat value);
+GLAPI void GLAPIENTRY glAlphaFunc (GLenum func, GLclampf ref);
+GLAPI GLboolean GLAPIENTRY glAreTexturesResident (GLsizei n, const GLuint *textures, GLboolean *residences);
+GLAPI void GLAPIENTRY glArrayElement (GLint i);
+GLAPI void GLAPIENTRY glBegin (GLenum mode);
+GLAPI void GLAPIENTRY glBindTexture (GLenum target, GLuint texture);
+GLAPI void GLAPIENTRY glBitmap (GLsizei width, GLsizei height, GLfloat xorig, GLfloat yorig, GLfloat xmove, GLfloat ymove, const GLubyte *bitmap);
+GLAPI void GLAPIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor);
+GLAPI void GLAPIENTRY glCallList (GLuint list);
+GLAPI void GLAPIENTRY glCallLists (GLsizei n, GLenum type, const GLvoid *lists);
+GLAPI void GLAPIENTRY glClear (GLbitfield mask);
+GLAPI void GLAPIENTRY glClearAccum (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GLAPI void GLAPIENTRY glClearColor (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+GLAPI void GLAPIENTRY glClearDepth (GLclampd depth);
+GLAPI void GLAPIENTRY glClearIndex (GLfloat c);
+GLAPI void GLAPIENTRY glClearStencil (GLint s);
+GLAPI void GLAPIENTRY glClipPlane (GLenum plane, const GLdouble *equation);
+GLAPI void GLAPIENTRY glColor3b (GLbyte red, GLbyte green, GLbyte blue);
+GLAPI void GLAPIENTRY glColor3bv (const GLbyte *v);
+GLAPI void GLAPIENTRY glColor3d (GLdouble red, GLdouble green, GLdouble blue);
+GLAPI void GLAPIENTRY glColor3dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glColor3f (GLfloat red, GLfloat green, GLfloat blue);
+GLAPI void GLAPIENTRY glColor3fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glColor3i (GLint red, GLint green, GLint blue);
+GLAPI void GLAPIENTRY glColor3iv (const GLint *v);
+GLAPI void GLAPIENTRY glColor3s (GLshort red, GLshort green, GLshort blue);
+GLAPI void GLAPIENTRY glColor3sv (const GLshort *v);
+GLAPI void GLAPIENTRY glColor3ub (GLubyte red, GLubyte green, GLubyte blue);
+GLAPI void GLAPIENTRY glColor3ubv (const GLubyte *v);
+GLAPI void GLAPIENTRY glColor3ui (GLuint red, GLuint green, GLuint blue);
+GLAPI void GLAPIENTRY glColor3uiv (const GLuint *v);
+GLAPI void GLAPIENTRY glColor3us (GLushort red, GLushort green, GLushort blue);
+GLAPI void GLAPIENTRY glColor3usv (const GLushort *v);
+GLAPI void GLAPIENTRY glColor4b (GLbyte red, GLbyte green, GLbyte blue, GLbyte alpha);
+GLAPI void GLAPIENTRY glColor4bv (const GLbyte *v);
+GLAPI void GLAPIENTRY glColor4d (GLdouble red, GLdouble green, GLdouble blue, GLdouble alpha);
+GLAPI void GLAPIENTRY glColor4dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glColor4f (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GLAPI void GLAPIENTRY glColor4fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glColor4i (GLint red, GLint green, GLint blue, GLint alpha);
+GLAPI void GLAPIENTRY glColor4iv (const GLint *v);
+GLAPI void GLAPIENTRY glColor4s (GLshort red, GLshort green, GLshort blue, GLshort alpha);
+GLAPI void GLAPIENTRY glColor4sv (const GLshort *v);
+GLAPI void GLAPIENTRY glColor4ub (GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha);
+GLAPI void GLAPIENTRY glColor4ubv (const GLubyte *v);
+GLAPI void GLAPIENTRY glColor4ui (GLuint red, GLuint green, GLuint blue, GLuint alpha);
+GLAPI void GLAPIENTRY glColor4uiv (const GLuint *v);
+GLAPI void GLAPIENTRY glColor4us (GLushort red, GLushort green, GLushort blue, GLushort alpha);
+GLAPI void GLAPIENTRY glColor4usv (const GLushort *v);
+GLAPI void GLAPIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
+GLAPI void GLAPIENTRY glColorMaterial (GLenum face, GLenum mode);
+GLAPI void GLAPIENTRY glColorPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+GLAPI void GLAPIENTRY glCopyPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum type);
+GLAPI void GLAPIENTRY glCopyTexImage1D (GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border);
+GLAPI void GLAPIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+GLAPI void GLAPIENTRY glCopyTexSubImage1D (GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+GLAPI void GLAPIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GLAPI void GLAPIENTRY glCullFace (GLenum mode);
+GLAPI void GLAPIENTRY glDeleteLists (GLuint list, GLsizei range);
+GLAPI void GLAPIENTRY glDeleteTextures (GLsizei n, const GLuint *textures);
+GLAPI void GLAPIENTRY glDepthFunc (GLenum func);
+GLAPI void GLAPIENTRY glDepthMask (GLboolean flag);
+GLAPI void GLAPIENTRY glDepthRange (GLclampd zNear, GLclampd zFar);
+GLAPI void GLAPIENTRY glDisable (GLenum cap);
+GLAPI void GLAPIENTRY glDisableClientState (GLenum array);
+GLAPI void GLAPIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count);
+GLAPI void GLAPIENTRY glDrawBuffer (GLenum mode);
+GLAPI void GLAPIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices);
+GLAPI void GLAPIENTRY glDrawPixels (GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
+GLAPI void GLAPIENTRY glEdgeFlag (GLboolean flag);
+GLAPI void GLAPIENTRY glEdgeFlagPointer (GLsizei stride, const GLvoid *pointer);
+GLAPI void GLAPIENTRY glEdgeFlagv (const GLboolean *flag);
+GLAPI void GLAPIENTRY glEnable (GLenum cap);
+GLAPI void GLAPIENTRY glEnableClientState (GLenum array);
+GLAPI void GLAPIENTRY glEnd (void);
+GLAPI void GLAPIENTRY glEndList (void);
+GLAPI void GLAPIENTRY glEvalCoord1d (GLdouble u);
+GLAPI void GLAPIENTRY glEvalCoord1dv (const GLdouble *u);
+GLAPI void GLAPIENTRY glEvalCoord1f (GLfloat u);
+GLAPI void GLAPIENTRY glEvalCoord1fv (const GLfloat *u);
+GLAPI void GLAPIENTRY glEvalCoord2d (GLdouble u, GLdouble v);
+GLAPI void GLAPIENTRY glEvalCoord2dv (const GLdouble *u);
+GLAPI void GLAPIENTRY glEvalCoord2f (GLfloat u, GLfloat v);
+GLAPI void GLAPIENTRY glEvalCoord2fv (const GLfloat *u);
+GLAPI void GLAPIENTRY glEvalMesh1 (GLenum mode, GLint i1, GLint i2);
+GLAPI void GLAPIENTRY glEvalMesh2 (GLenum mode, GLint i1, GLint i2, GLint j1, GLint j2);
+GLAPI void GLAPIENTRY glEvalPoint1 (GLint i);
+GLAPI void GLAPIENTRY glEvalPoint2 (GLint i, GLint j);
+GLAPI void GLAPIENTRY glFeedbackBuffer (GLsizei size, GLenum type, GLfloat *buffer);
+GLAPI void GLAPIENTRY glFinish (void);
+GLAPI void GLAPIENTRY glFlush (void);
+GLAPI void GLAPIENTRY glFogf (GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glFogfv (GLenum pname, const GLfloat *params);
+GLAPI void GLAPIENTRY glFogi (GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glFogiv (GLenum pname, const GLint *params);
+GLAPI void GLAPIENTRY glFrontFace (GLenum mode);
+GLAPI void GLAPIENTRY glFrustum (GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+GLAPI GLuint GLAPIENTRY glGenLists (GLsizei range);
+GLAPI void GLAPIENTRY glGenTextures (GLsizei n, GLuint *textures);
+GLAPI void GLAPIENTRY glGetBooleanv (GLenum pname, GLboolean *params);
+GLAPI void GLAPIENTRY glGetClipPlane (GLenum plane, GLdouble *equation);
+GLAPI void GLAPIENTRY glGetDoublev (GLenum pname, GLdouble *params);
+GLAPI GLenum GLAPIENTRY glGetError (void);
+GLAPI void GLAPIENTRY glGetFloatv (GLenum pname, GLfloat *params);
+GLAPI void GLAPIENTRY glGetIntegerv (GLenum pname, GLint *params);
+GLAPI void GLAPIENTRY glGetLightfv (GLenum light, GLenum pname, GLfloat *params);
+GLAPI void GLAPIENTRY glGetLightiv (GLenum light, GLenum pname, GLint *params);
+GLAPI void GLAPIENTRY glGetMapdv (GLenum target, GLenum query, GLdouble *v);
+GLAPI void GLAPIENTRY glGetMapfv (GLenum target, GLenum query, GLfloat *v);
+GLAPI void GLAPIENTRY glGetMapiv (GLenum target, GLenum query, GLint *v);
+GLAPI void GLAPIENTRY glGetMaterialfv (GLenum face, GLenum pname, GLfloat *params);
+GLAPI void GLAPIENTRY glGetMaterialiv (GLenum face, GLenum pname, GLint *params);
+GLAPI void GLAPIENTRY glGetPixelMapfv (GLenum map, GLfloat *values);
+GLAPI void GLAPIENTRY glGetPixelMapuiv (GLenum map, GLuint *values);
+GLAPI void GLAPIENTRY glGetPixelMapusv (GLenum map, GLushort *values);
+GLAPI void GLAPIENTRY glGetPointerv (GLenum pname, GLvoid* *params);
+GLAPI void GLAPIENTRY glGetPolygonStipple (GLubyte *mask);
+GLAPI const GLubyte * GLAPIENTRY glGetString (GLenum name);
+GLAPI void GLAPIENTRY glGetTexEnvfv (GLenum target, GLenum pname, GLfloat *params);
+GLAPI void GLAPIENTRY glGetTexEnviv (GLenum target, GLenum pname, GLint *params);
+GLAPI void GLAPIENTRY glGetTexGendv (GLenum coord, GLenum pname, GLdouble *params);
+GLAPI void GLAPIENTRY glGetTexGenfv (GLenum coord, GLenum pname, GLfloat *params);
+GLAPI void GLAPIENTRY glGetTexGeniv (GLenum coord, GLenum pname, GLint *params);
+GLAPI void GLAPIENTRY glGetTexImage (GLenum target, GLint level, GLenum format, GLenum type, GLvoid *pixels);
+GLAPI void GLAPIENTRY glGetTexLevelParameterfv (GLenum target, GLint level, GLenum pname, GLfloat *params);
+GLAPI void GLAPIENTRY glGetTexLevelParameteriv (GLenum target, GLint level, GLenum pname, GLint *params);
+GLAPI void GLAPIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat *params);
+GLAPI void GLAPIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *params);
+GLAPI void GLAPIENTRY glHint (GLenum target, GLenum mode);
+GLAPI void GLAPIENTRY glIndexMask (GLuint mask);
+GLAPI void GLAPIENTRY glIndexPointer (GLenum type, GLsizei stride, const GLvoid *pointer);
+GLAPI void GLAPIENTRY glIndexd (GLdouble c);
+GLAPI void GLAPIENTRY glIndexdv (const GLdouble *c);
+GLAPI void GLAPIENTRY glIndexf (GLfloat c);
+GLAPI void GLAPIENTRY glIndexfv (const GLfloat *c);
+GLAPI void GLAPIENTRY glIndexi (GLint c);
+GLAPI void GLAPIENTRY glIndexiv (const GLint *c);
+GLAPI void GLAPIENTRY glIndexs (GLshort c);
+GLAPI void GLAPIENTRY glIndexsv (const GLshort *c);
+GLAPI void GLAPIENTRY glIndexub (GLubyte c);
+GLAPI void GLAPIENTRY glIndexubv (const GLubyte *c);
+GLAPI void GLAPIENTRY glInitNames (void);
+GLAPI void GLAPIENTRY glInterleavedArrays (GLenum format, GLsizei stride, const GLvoid *pointer);
+GLAPI GLboolean GLAPIENTRY glIsEnabled (GLenum cap);
+GLAPI GLboolean GLAPIENTRY glIsList (GLuint list);
+GLAPI GLboolean GLAPIENTRY glIsTexture (GLuint texture);
+GLAPI void GLAPIENTRY glLightModelf (GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glLightModelfv (GLenum pname, const GLfloat *params);
+GLAPI void GLAPIENTRY glLightModeli (GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glLightModeliv (GLenum pname, const GLint *params);
+GLAPI void GLAPIENTRY glLightf (GLenum light, GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glLightfv (GLenum light, GLenum pname, const GLfloat *params);
+GLAPI void GLAPIENTRY glLighti (GLenum light, GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glLightiv (GLenum light, GLenum pname, const GLint *params);
+GLAPI void GLAPIENTRY glLineStipple (GLint factor, GLushort pattern);
+GLAPI void GLAPIENTRY glLineWidth (GLfloat width);
+GLAPI void GLAPIENTRY glListBase (GLuint base);
+GLAPI void GLAPIENTRY glLoadIdentity (void);
+GLAPI void GLAPIENTRY glLoadMatrixd (const GLdouble *m);
+GLAPI void GLAPIENTRY glLoadMatrixf (const GLfloat *m);
+GLAPI void GLAPIENTRY glLoadName (GLuint name);
+GLAPI void GLAPIENTRY glLogicOp (GLenum opcode);
+GLAPI void GLAPIENTRY glMap1d (GLenum target, GLdouble u1, GLdouble u2, GLint stride, GLint order, const GLdouble *points);
+GLAPI void GLAPIENTRY glMap1f (GLenum target, GLfloat u1, GLfloat u2, GLint stride, GLint order, const GLfloat *points);
+GLAPI void GLAPIENTRY glMap2d (GLenum target, GLdouble u1, GLdouble u2, GLint ustride, GLint uorder, GLdouble v1, GLdouble v2, GLint vstride, GLint vorder, const GLdouble *points);
+GLAPI void GLAPIENTRY glMap2f (GLenum target, GLfloat u1, GLfloat u2, GLint ustride, GLint uorder, GLfloat v1, GLfloat v2, GLint vstride, GLint vorder, const GLfloat *points);
+GLAPI void GLAPIENTRY glMapGrid1d (GLint un, GLdouble u1, GLdouble u2);
+GLAPI void GLAPIENTRY glMapGrid1f (GLint un, GLfloat u1, GLfloat u2);
+GLAPI void GLAPIENTRY glMapGrid2d (GLint un, GLdouble u1, GLdouble u2, GLint vn, GLdouble v1, GLdouble v2);
+GLAPI void GLAPIENTRY glMapGrid2f (GLint un, GLfloat u1, GLfloat u2, GLint vn, GLfloat v1, GLfloat v2);
+GLAPI void GLAPIENTRY glMaterialf (GLenum face, GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glMaterialfv (GLenum face, GLenum pname, const GLfloat *params);
+GLAPI void GLAPIENTRY glMateriali (GLenum face, GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glMaterialiv (GLenum face, GLenum pname, const GLint *params);
+GLAPI void GLAPIENTRY glMatrixMode (GLenum mode);
+GLAPI void GLAPIENTRY glMultMatrixd (const GLdouble *m);
+GLAPI void GLAPIENTRY glMultMatrixf (const GLfloat *m);
+GLAPI void GLAPIENTRY glNewList (GLuint list, GLenum mode);
+GLAPI void GLAPIENTRY glNormal3b (GLbyte nx, GLbyte ny, GLbyte nz);
+GLAPI void GLAPIENTRY glNormal3bv (const GLbyte *v);
+GLAPI void GLAPIENTRY glNormal3d (GLdouble nx, GLdouble ny, GLdouble nz);
+GLAPI void GLAPIENTRY glNormal3dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glNormal3f (GLfloat nx, GLfloat ny, GLfloat nz);
+GLAPI void GLAPIENTRY glNormal3fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glNormal3i (GLint nx, GLint ny, GLint nz);
+GLAPI void GLAPIENTRY glNormal3iv (const GLint *v);
+GLAPI void GLAPIENTRY glNormal3s (GLshort nx, GLshort ny, GLshort nz);
+GLAPI void GLAPIENTRY glNormal3sv (const GLshort *v);
+GLAPI void GLAPIENTRY glNormalPointer (GLenum type, GLsizei stride, const GLvoid *pointer);
+GLAPI void GLAPIENTRY glOrtho (GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
+GLAPI void GLAPIENTRY glPassThrough (GLfloat token);
+GLAPI void GLAPIENTRY glPixelMapfv (GLenum map, GLsizei mapsize, const GLfloat *values);
+GLAPI void GLAPIENTRY glPixelMapuiv (GLenum map, GLsizei mapsize, const GLuint *values);
+GLAPI void GLAPIENTRY glPixelMapusv (GLenum map, GLsizei mapsize, const GLushort *values);
+GLAPI void GLAPIENTRY glPixelStoref (GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glPixelStorei (GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glPixelTransferf (GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glPixelTransferi (GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glPixelZoom (GLfloat xfactor, GLfloat yfactor);
+GLAPI void GLAPIENTRY glPointSize (GLfloat size);
+GLAPI void GLAPIENTRY glPolygonMode (GLenum face, GLenum mode);
+GLAPI void GLAPIENTRY glPolygonOffset (GLfloat factor, GLfloat units);
+GLAPI void GLAPIENTRY glPolygonStipple (const GLubyte *mask);
+GLAPI void GLAPIENTRY glPopAttrib (void);
+GLAPI void GLAPIENTRY glPopClientAttrib (void);
+GLAPI void GLAPIENTRY glPopMatrix (void);
+GLAPI void GLAPIENTRY glPopName (void);
+GLAPI void GLAPIENTRY glPrioritizeTextures (GLsizei n, const GLuint *textures, const GLclampf *priorities);
+GLAPI void GLAPIENTRY glPushAttrib (GLbitfield mask);
+GLAPI void GLAPIENTRY glPushClientAttrib (GLbitfield mask);
+GLAPI void GLAPIENTRY glPushMatrix (void);
+GLAPI void GLAPIENTRY glPushName (GLuint name);
+GLAPI void GLAPIENTRY glRasterPos2d (GLdouble x, GLdouble y);
+GLAPI void GLAPIENTRY glRasterPos2dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glRasterPos2f (GLfloat x, GLfloat y);
+GLAPI void GLAPIENTRY glRasterPos2fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glRasterPos2i (GLint x, GLint y);
+GLAPI void GLAPIENTRY glRasterPos2iv (const GLint *v);
+GLAPI void GLAPIENTRY glRasterPos2s (GLshort x, GLshort y);
+GLAPI void GLAPIENTRY glRasterPos2sv (const GLshort *v);
+GLAPI void GLAPIENTRY glRasterPos3d (GLdouble x, GLdouble y, GLdouble z);
+GLAPI void GLAPIENTRY glRasterPos3dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glRasterPos3f (GLfloat x, GLfloat y, GLfloat z);
+GLAPI void GLAPIENTRY glRasterPos3fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glRasterPos3i (GLint x, GLint y, GLint z);
+GLAPI void GLAPIENTRY glRasterPos3iv (const GLint *v);
+GLAPI void GLAPIENTRY glRasterPos3s (GLshort x, GLshort y, GLshort z);
+GLAPI void GLAPIENTRY glRasterPos3sv (const GLshort *v);
+GLAPI void GLAPIENTRY glRasterPos4d (GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+GLAPI void GLAPIENTRY glRasterPos4dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glRasterPos4f (GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GLAPI void GLAPIENTRY glRasterPos4fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glRasterPos4i (GLint x, GLint y, GLint z, GLint w);
+GLAPI void GLAPIENTRY glRasterPos4iv (const GLint *v);
+GLAPI void GLAPIENTRY glRasterPos4s (GLshort x, GLshort y, GLshort z, GLshort w);
+GLAPI void GLAPIENTRY glRasterPos4sv (const GLshort *v);
+GLAPI void GLAPIENTRY glReadBuffer (GLenum mode);
+GLAPI void GLAPIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels);
+GLAPI void GLAPIENTRY glRectd (GLdouble x1, GLdouble y1, GLdouble x2, GLdouble y2);
+GLAPI void GLAPIENTRY glRectdv (const GLdouble *v1, const GLdouble *v2);
+GLAPI void GLAPIENTRY glRectf (GLfloat x1, GLfloat y1, GLfloat x2, GLfloat y2);
+GLAPI void GLAPIENTRY glRectfv (const GLfloat *v1, const GLfloat *v2);
+GLAPI void GLAPIENTRY glRecti (GLint x1, GLint y1, GLint x2, GLint y2);
+GLAPI void GLAPIENTRY glRectiv (const GLint *v1, const GLint *v2);
+GLAPI void GLAPIENTRY glRects (GLshort x1, GLshort y1, GLshort x2, GLshort y2);
+GLAPI void GLAPIENTRY glRectsv (const GLshort *v1, const GLshort *v2);
+GLAPI GLint GLAPIENTRY glRenderMode (GLenum mode);
+GLAPI void GLAPIENTRY glRotated (GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
+GLAPI void GLAPIENTRY glRotatef (GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
+GLAPI void GLAPIENTRY glScaled (GLdouble x, GLdouble y, GLdouble z);
+GLAPI void GLAPIENTRY glScalef (GLfloat x, GLfloat y, GLfloat z);
+GLAPI void GLAPIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height);
+GLAPI void GLAPIENTRY glSelectBuffer (GLsizei size, GLuint *buffer);
+GLAPI void GLAPIENTRY glShadeModel (GLenum mode);
+GLAPI void GLAPIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask);
+GLAPI void GLAPIENTRY glStencilMask (GLuint mask);
+GLAPI void GLAPIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass);
+GLAPI void GLAPIENTRY glTexCoord1d (GLdouble s);
+GLAPI void GLAPIENTRY glTexCoord1dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glTexCoord1f (GLfloat s);
+GLAPI void GLAPIENTRY glTexCoord1fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glTexCoord1i (GLint s);
+GLAPI void GLAPIENTRY glTexCoord1iv (const GLint *v);
+GLAPI void GLAPIENTRY glTexCoord1s (GLshort s);
+GLAPI void GLAPIENTRY glTexCoord1sv (const GLshort *v);
+GLAPI void GLAPIENTRY glTexCoord2d (GLdouble s, GLdouble t);
+GLAPI void GLAPIENTRY glTexCoord2dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glTexCoord2f (GLfloat s, GLfloat t);
+GLAPI void GLAPIENTRY glTexCoord2fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glTexCoord2i (GLint s, GLint t);
+GLAPI void GLAPIENTRY glTexCoord2iv (const GLint *v);
+GLAPI void GLAPIENTRY glTexCoord2s (GLshort s, GLshort t);
+GLAPI void GLAPIENTRY glTexCoord2sv (const GLshort *v);
+GLAPI void GLAPIENTRY glTexCoord3d (GLdouble s, GLdouble t, GLdouble r);
+GLAPI void GLAPIENTRY glTexCoord3dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glTexCoord3f (GLfloat s, GLfloat t, GLfloat r);
+GLAPI void GLAPIENTRY glTexCoord3fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glTexCoord3i (GLint s, GLint t, GLint r);
+GLAPI void GLAPIENTRY glTexCoord3iv (const GLint *v);
+GLAPI void GLAPIENTRY glTexCoord3s (GLshort s, GLshort t, GLshort r);
+GLAPI void GLAPIENTRY glTexCoord3sv (const GLshort *v);
+GLAPI void GLAPIENTRY glTexCoord4d (GLdouble s, GLdouble t, GLdouble r, GLdouble q);
+GLAPI void GLAPIENTRY glTexCoord4dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glTexCoord4f (GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+GLAPI void GLAPIENTRY glTexCoord4fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glTexCoord4i (GLint s, GLint t, GLint r, GLint q);
+GLAPI void GLAPIENTRY glTexCoord4iv (const GLint *v);
+GLAPI void GLAPIENTRY glTexCoord4s (GLshort s, GLshort t, GLshort r, GLshort q);
+GLAPI void GLAPIENTRY glTexCoord4sv (const GLshort *v);
+GLAPI void GLAPIENTRY glTexCoordPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+GLAPI void GLAPIENTRY glTexEnvf (GLenum target, GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glTexEnvfv (GLenum target, GLenum pname, const GLfloat *params);
+GLAPI void GLAPIENTRY glTexEnvi (GLenum target, GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glTexEnviv (GLenum target, GLenum pname, const GLint *params);
+GLAPI void GLAPIENTRY glTexGend (GLenum coord, GLenum pname, GLdouble param);
+GLAPI void GLAPIENTRY glTexGendv (GLenum coord, GLenum pname, const GLdouble *params);
+GLAPI void GLAPIENTRY glTexGenf (GLenum coord, GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glTexGenfv (GLenum coord, GLenum pname, const GLfloat *params);
+GLAPI void GLAPIENTRY glTexGeni (GLenum coord, GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glTexGeniv (GLenum coord, GLenum pname, const GLint *params);
+GLAPI void GLAPIENTRY glTexImage1D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+GLAPI void GLAPIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+GLAPI void GLAPIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param);
+GLAPI void GLAPIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat *params);
+GLAPI void GLAPIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param);
+GLAPI void GLAPIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint *params);
+GLAPI void GLAPIENTRY glTexSubImage1D (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const GLvoid *pixels);
+GLAPI void GLAPIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
+GLAPI void GLAPIENTRY glTranslated (GLdouble x, GLdouble y, GLdouble z);
+GLAPI void GLAPIENTRY glTranslatef (GLfloat x, GLfloat y, GLfloat z);
+GLAPI void GLAPIENTRY glVertex2d (GLdouble x, GLdouble y);
+GLAPI void GLAPIENTRY glVertex2dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glVertex2f (GLfloat x, GLfloat y);
+GLAPI void GLAPIENTRY glVertex2fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glVertex2i (GLint x, GLint y);
+GLAPI void GLAPIENTRY glVertex2iv (const GLint *v);
+GLAPI void GLAPIENTRY glVertex2s (GLshort x, GLshort y);
+GLAPI void GLAPIENTRY glVertex2sv (const GLshort *v);
+GLAPI void GLAPIENTRY glVertex3d (GLdouble x, GLdouble y, GLdouble z);
+GLAPI void GLAPIENTRY glVertex3dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glVertex3f (GLfloat x, GLfloat y, GLfloat z);
+GLAPI void GLAPIENTRY glVertex3fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glVertex3i (GLint x, GLint y, GLint z);
+GLAPI void GLAPIENTRY glVertex3iv (const GLint *v);
+GLAPI void GLAPIENTRY glVertex3s (GLshort x, GLshort y, GLshort z);
+GLAPI void GLAPIENTRY glVertex3sv (const GLshort *v);
+GLAPI void GLAPIENTRY glVertex4d (GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+GLAPI void GLAPIENTRY glVertex4dv (const GLdouble *v);
+GLAPI void GLAPIENTRY glVertex4f (GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GLAPI void GLAPIENTRY glVertex4fv (const GLfloat *v);
+GLAPI void GLAPIENTRY glVertex4i (GLint x, GLint y, GLint z, GLint w);
+GLAPI void GLAPIENTRY glVertex4iv (const GLint *v);
+GLAPI void GLAPIENTRY glVertex4s (GLshort x, GLshort y, GLshort z, GLshort w);
+GLAPI void GLAPIENTRY glVertex4sv (const GLshort *v);
+GLAPI void GLAPIENTRY glVertexPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+GLAPI void GLAPIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height);
+
+#define GLEW_VERSION_1_1 GLEW_GET_VAR(__GLEW_VERSION_1_1)
+
+#endif /* GL_VERSION_1_1 */
+
+/* ---------------------------------- GLU ---------------------------------- */
+
+/* this is where we can safely include GLU */
+#if defined(__APPLE__) && defined(__MACH__)
+#include <OpenGL/glu.h>
+#else
+#include <GL/glu.h>
+#endif
+
+/* ----------------------------- GL_VERSION_1_2 ---------------------------- */
+
+#ifndef GL_VERSION_1_2
+#define GL_VERSION_1_2 1
+
+#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12
+#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13
+#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22
+#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23
+#define GL_UNSIGNED_BYTE_3_3_2 0x8032
+#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034
+#define GL_UNSIGNED_INT_8_8_8_8 0x8035
+#define GL_UNSIGNED_INT_10_10_10_2 0x8036
+#define GL_RESCALE_NORMAL 0x803A
+#define GL_TEXTURE_BINDING_3D 0x806A
+#define GL_PACK_SKIP_IMAGES 0x806B
+#define GL_PACK_IMAGE_HEIGHT 0x806C
+#define GL_UNPACK_SKIP_IMAGES 0x806D
+#define GL_UNPACK_IMAGE_HEIGHT 0x806E
+#define GL_TEXTURE_3D 0x806F
+#define GL_PROXY_TEXTURE_3D 0x8070
+#define GL_TEXTURE_DEPTH 0x8071
+#define GL_TEXTURE_WRAP_R 0x8072
+#define GL_MAX_3D_TEXTURE_SIZE 0x8073
+#define GL_BGR 0x80E0
+#define GL_BGRA 0x80E1
+#define GL_MAX_ELEMENTS_VERTICES 0x80E8
+#define GL_MAX_ELEMENTS_INDICES 0x80E9
+#define GL_CLAMP_TO_EDGE 0x812F
+#define GL_TEXTURE_MIN_LOD 0x813A
+#define GL_TEXTURE_MAX_LOD 0x813B
+#define GL_TEXTURE_BASE_LEVEL 0x813C
+#define GL_TEXTURE_MAX_LEVEL 0x813D
+#define GL_LIGHT_MODEL_COLOR_CONTROL 0x81F8
+#define GL_SINGLE_COLOR 0x81F9
+#define GL_SEPARATE_SPECULAR_COLOR 0x81FA
+#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362
+#define GL_UNSIGNED_SHORT_5_6_5 0x8363
+#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364
+#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365
+#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366
+#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367
+#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368
+#define GL_ALIASED_POINT_SIZE_RANGE 0x846D
+#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E
+
+typedef void (GLAPIENTRY * PFNGLCOPYTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLDRAWRANGEELEMENTSPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices);
+typedef void (GLAPIENTRY * PFNGLTEXIMAGE3DPROC) (GLenum target, GLint level, GLint internalFormat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+typedef void (GLAPIENTRY * PFNGLTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid *pixels);
+
+#define glCopyTexSubImage3D GLEW_GET_FUN(__glewCopyTexSubImage3D)
+#define glDrawRangeElements GLEW_GET_FUN(__glewDrawRangeElements)
+#define glTexImage3D GLEW_GET_FUN(__glewTexImage3D)
+#define glTexSubImage3D GLEW_GET_FUN(__glewTexSubImage3D)
+
+#define GLEW_VERSION_1_2 GLEW_GET_VAR(__GLEW_VERSION_1_2)
+
+#endif /* GL_VERSION_1_2 */
+
+/* ----------------------------- GL_VERSION_1_3 ---------------------------- */
+
+#ifndef GL_VERSION_1_3
+#define GL_VERSION_1_3 1
+
+#define GL_MULTISAMPLE 0x809D
+#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE 0x809F
+#define GL_SAMPLE_COVERAGE 0x80A0
+#define GL_SAMPLE_BUFFERS 0x80A8
+#define GL_SAMPLES 0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE 0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT 0x80AB
+#define GL_CLAMP_TO_BORDER 0x812D
+#define GL_TEXTURE0 0x84C0
+#define GL_TEXTURE1 0x84C1
+#define GL_TEXTURE2 0x84C2
+#define GL_TEXTURE3 0x84C3
+#define GL_TEXTURE4 0x84C4
+#define GL_TEXTURE5 0x84C5
+#define GL_TEXTURE6 0x84C6
+#define GL_TEXTURE7 0x84C7
+#define GL_TEXTURE8 0x84C8
+#define GL_TEXTURE9 0x84C9
+#define GL_TEXTURE10 0x84CA
+#define GL_TEXTURE11 0x84CB
+#define GL_TEXTURE12 0x84CC
+#define GL_TEXTURE13 0x84CD
+#define GL_TEXTURE14 0x84CE
+#define GL_TEXTURE15 0x84CF
+#define GL_TEXTURE16 0x84D0
+#define GL_TEXTURE17 0x84D1
+#define GL_TEXTURE18 0x84D2
+#define GL_TEXTURE19 0x84D3
+#define GL_TEXTURE20 0x84D4
+#define GL_TEXTURE21 0x84D5
+#define GL_TEXTURE22 0x84D6
+#define GL_TEXTURE23 0x84D7
+#define GL_TEXTURE24 0x84D8
+#define GL_TEXTURE25 0x84D9
+#define GL_TEXTURE26 0x84DA
+#define GL_TEXTURE27 0x84DB
+#define GL_TEXTURE28 0x84DC
+#define GL_TEXTURE29 0x84DD
+#define GL_TEXTURE30 0x84DE
+#define GL_TEXTURE31 0x84DF
+#define GL_ACTIVE_TEXTURE 0x84E0
+#define GL_CLIENT_ACTIVE_TEXTURE 0x84E1
+#define GL_MAX_TEXTURE_UNITS 0x84E2
+#define GL_TRANSPOSE_MODELVIEW_MATRIX 0x84E3
+#define GL_TRANSPOSE_PROJECTION_MATRIX 0x84E4
+#define GL_TRANSPOSE_TEXTURE_MATRIX 0x84E5
+#define GL_TRANSPOSE_COLOR_MATRIX 0x84E6
+#define GL_SUBTRACT 0x84E7
+#define GL_COMPRESSED_ALPHA 0x84E9
+#define GL_COMPRESSED_LUMINANCE 0x84EA
+#define GL_COMPRESSED_LUMINANCE_ALPHA 0x84EB
+#define GL_COMPRESSED_INTENSITY 0x84EC
+#define GL_COMPRESSED_RGB 0x84ED
+#define GL_COMPRESSED_RGBA 0x84EE
+#define GL_TEXTURE_COMPRESSION_HINT 0x84EF
+#define GL_NORMAL_MAP 0x8511
+#define GL_REFLECTION_MAP 0x8512
+#define GL_TEXTURE_CUBE_MAP 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A
+#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C
+#define GL_COMBINE 0x8570
+#define GL_COMBINE_RGB 0x8571
+#define GL_COMBINE_ALPHA 0x8572
+#define GL_RGB_SCALE 0x8573
+#define GL_ADD_SIGNED 0x8574
+#define GL_INTERPOLATE 0x8575
+#define GL_CONSTANT 0x8576
+#define GL_PRIMARY_COLOR 0x8577
+#define GL_PREVIOUS 0x8578
+#define GL_SOURCE0_RGB 0x8580
+#define GL_SOURCE1_RGB 0x8581
+#define GL_SOURCE2_RGB 0x8582
+#define GL_SOURCE0_ALPHA 0x8588
+#define GL_SOURCE1_ALPHA 0x8589
+#define GL_SOURCE2_ALPHA 0x858A
+#define GL_OPERAND0_RGB 0x8590
+#define GL_OPERAND1_RGB 0x8591
+#define GL_OPERAND2_RGB 0x8592
+#define GL_OPERAND0_ALPHA 0x8598
+#define GL_OPERAND1_ALPHA 0x8599
+#define GL_OPERAND2_ALPHA 0x859A
+#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0
+#define GL_TEXTURE_COMPRESSED 0x86A1
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3
+#define GL_DOT3_RGB 0x86AE
+#define GL_DOT3_RGBA 0x86AF
+#define GL_MULTISAMPLE_BIT 0x20000000
+
+typedef void (GLAPIENTRY * PFNGLACTIVETEXTUREPROC) (GLenum texture);
+typedef void (GLAPIENTRY * PFNGLCLIENTACTIVETEXTUREPROC) (GLenum texture);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXIMAGE1DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXIMAGE2DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXIMAGE3DPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid *data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid *data);
+typedef void (GLAPIENTRY * PFNGLGETCOMPRESSEDTEXIMAGEPROC) (GLenum target, GLint lod, GLvoid *img);
+typedef void (GLAPIENTRY * PFNGLLOADTRANSPOSEMATRIXDPROC) (const GLdouble m[16]);
+typedef void (GLAPIENTRY * PFNGLLOADTRANSPOSEMATRIXFPROC) (const GLfloat m[16]);
+typedef void (GLAPIENTRY * PFNGLMULTTRANSPOSEMATRIXDPROC) (const GLdouble m[16]);
+typedef void (GLAPIENTRY * PFNGLMULTTRANSPOSEMATRIXFPROC) (const GLfloat m[16]);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1DPROC) (GLenum target, GLdouble s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1DVPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1FPROC) (GLenum target, GLfloat s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1FVPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1IPROC) (GLenum target, GLint s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1IVPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1SPROC) (GLenum target, GLshort s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1SVPROC) (GLenum target, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2DPROC) (GLenum target, GLdouble s, GLdouble t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2DVPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2FPROC) (GLenum target, GLfloat s, GLfloat t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2FVPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2IPROC) (GLenum target, GLint s, GLint t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2IVPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2SPROC) (GLenum target, GLshort s, GLshort t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2SVPROC) (GLenum target, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3DPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3DVPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3FPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3FVPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3IPROC) (GLenum target, GLint s, GLint t, GLint r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3IVPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3SPROC) (GLenum target, GLshort s, GLshort t, GLshort r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3SVPROC) (GLenum target, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4DPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4DVPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4FPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4FVPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4IPROC) (GLenum target, GLint s, GLint t, GLint r, GLint q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4IVPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4SPROC) (GLenum target, GLshort s, GLshort t, GLshort r, GLshort q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4SVPROC) (GLenum target, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLSAMPLECOVERAGEPROC) (GLclampf value, GLboolean invert);
+
+#define glActiveTexture GLEW_GET_FUN(__glewActiveTexture)
+#define glClientActiveTexture GLEW_GET_FUN(__glewClientActiveTexture)
+#define glCompressedTexImage1D GLEW_GET_FUN(__glewCompressedTexImage1D)
+#define glCompressedTexImage2D GLEW_GET_FUN(__glewCompressedTexImage2D)
+#define glCompressedTexImage3D GLEW_GET_FUN(__glewCompressedTexImage3D)
+#define glCompressedTexSubImage1D GLEW_GET_FUN(__glewCompressedTexSubImage1D)
+#define glCompressedTexSubImage2D GLEW_GET_FUN(__glewCompressedTexSubImage2D)
+#define glCompressedTexSubImage3D GLEW_GET_FUN(__glewCompressedTexSubImage3D)
+#define glGetCompressedTexImage GLEW_GET_FUN(__glewGetCompressedTexImage)
+#define glLoadTransposeMatrixd GLEW_GET_FUN(__glewLoadTransposeMatrixd)
+#define glLoadTransposeMatrixf GLEW_GET_FUN(__glewLoadTransposeMatrixf)
+#define glMultTransposeMatrixd GLEW_GET_FUN(__glewMultTransposeMatrixd)
+#define glMultTransposeMatrixf GLEW_GET_FUN(__glewMultTransposeMatrixf)
+#define glMultiTexCoord1d GLEW_GET_FUN(__glewMultiTexCoord1d)
+#define glMultiTexCoord1dv GLEW_GET_FUN(__glewMultiTexCoord1dv)
+#define glMultiTexCoord1f GLEW_GET_FUN(__glewMultiTexCoord1f)
+#define glMultiTexCoord1fv GLEW_GET_FUN(__glewMultiTexCoord1fv)
+#define glMultiTexCoord1i GLEW_GET_FUN(__glewMultiTexCoord1i)
+#define glMultiTexCoord1iv GLEW_GET_FUN(__glewMultiTexCoord1iv)
+#define glMultiTexCoord1s GLEW_GET_FUN(__glewMultiTexCoord1s)
+#define glMultiTexCoord1sv GLEW_GET_FUN(__glewMultiTexCoord1sv)
+#define glMultiTexCoord2d GLEW_GET_FUN(__glewMultiTexCoord2d)
+#define glMultiTexCoord2dv GLEW_GET_FUN(__glewMultiTexCoord2dv)
+#define glMultiTexCoord2f GLEW_GET_FUN(__glewMultiTexCoord2f)
+#define glMultiTexCoord2fv GLEW_GET_FUN(__glewMultiTexCoord2fv)
+#define glMultiTexCoord2i GLEW_GET_FUN(__glewMultiTexCoord2i)
+#define glMultiTexCoord2iv GLEW_GET_FUN(__glewMultiTexCoord2iv)
+#define glMultiTexCoord2s GLEW_GET_FUN(__glewMultiTexCoord2s)
+#define glMultiTexCoord2sv GLEW_GET_FUN(__glewMultiTexCoord2sv)
+#define glMultiTexCoord3d GLEW_GET_FUN(__glewMultiTexCoord3d)
+#define glMultiTexCoord3dv GLEW_GET_FUN(__glewMultiTexCoord3dv)
+#define glMultiTexCoord3f GLEW_GET_FUN(__glewMultiTexCoord3f)
+#define glMultiTexCoord3fv GLEW_GET_FUN(__glewMultiTexCoord3fv)
+#define glMultiTexCoord3i GLEW_GET_FUN(__glewMultiTexCoord3i)
+#define glMultiTexCoord3iv GLEW_GET_FUN(__glewMultiTexCoord3iv)
+#define glMultiTexCoord3s GLEW_GET_FUN(__glewMultiTexCoord3s)
+#define glMultiTexCoord3sv GLEW_GET_FUN(__glewMultiTexCoord3sv)
+#define glMultiTexCoord4d GLEW_GET_FUN(__glewMultiTexCoord4d)
+#define glMultiTexCoord4dv GLEW_GET_FUN(__glewMultiTexCoord4dv)
+#define glMultiTexCoord4f GLEW_GET_FUN(__glewMultiTexCoord4f)
+#define glMultiTexCoord4fv GLEW_GET_FUN(__glewMultiTexCoord4fv)
+#define glMultiTexCoord4i GLEW_GET_FUN(__glewMultiTexCoord4i)
+#define glMultiTexCoord4iv GLEW_GET_FUN(__glewMultiTexCoord4iv)
+#define glMultiTexCoord4s GLEW_GET_FUN(__glewMultiTexCoord4s)
+#define glMultiTexCoord4sv GLEW_GET_FUN(__glewMultiTexCoord4sv)
+#define glSampleCoverage GLEW_GET_FUN(__glewSampleCoverage)
+
+#define GLEW_VERSION_1_3 GLEW_GET_VAR(__GLEW_VERSION_1_3)
+
+#endif /* GL_VERSION_1_3 */
+
+/* ----------------------------- GL_VERSION_1_4 ---------------------------- */
+
+#ifndef GL_VERSION_1_4
+#define GL_VERSION_1_4 1
+
+#define GL_BLEND_DST_RGB 0x80C8
+#define GL_BLEND_SRC_RGB 0x80C9
+#define GL_BLEND_DST_ALPHA 0x80CA
+#define GL_BLEND_SRC_ALPHA 0x80CB
+#define GL_POINT_SIZE_MIN 0x8126
+#define GL_POINT_SIZE_MAX 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128
+#define GL_POINT_DISTANCE_ATTENUATION 0x8129
+#define GL_GENERATE_MIPMAP 0x8191
+#define GL_GENERATE_MIPMAP_HINT 0x8192
+#define GL_DEPTH_COMPONENT16 0x81A5
+#define GL_DEPTH_COMPONENT24 0x81A6
+#define GL_DEPTH_COMPONENT32 0x81A7
+#define GL_MIRRORED_REPEAT 0x8370
+#define GL_FOG_COORDINATE_SOURCE 0x8450
+#define GL_FOG_COORDINATE 0x8451
+#define GL_FRAGMENT_DEPTH 0x8452
+#define GL_CURRENT_FOG_COORDINATE 0x8453
+#define GL_FOG_COORDINATE_ARRAY_TYPE 0x8454
+#define GL_FOG_COORDINATE_ARRAY_STRIDE 0x8455
+#define GL_FOG_COORDINATE_ARRAY_POINTER 0x8456
+#define GL_FOG_COORDINATE_ARRAY 0x8457
+#define GL_COLOR_SUM 0x8458
+#define GL_CURRENT_SECONDARY_COLOR 0x8459
+#define GL_SECONDARY_COLOR_ARRAY_SIZE 0x845A
+#define GL_SECONDARY_COLOR_ARRAY_TYPE 0x845B
+#define GL_SECONDARY_COLOR_ARRAY_STRIDE 0x845C
+#define GL_SECONDARY_COLOR_ARRAY_POINTER 0x845D
+#define GL_SECONDARY_COLOR_ARRAY 0x845E
+#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD
+#define GL_TEXTURE_FILTER_CONTROL 0x8500
+#define GL_TEXTURE_LOD_BIAS 0x8501
+#define GL_INCR_WRAP 0x8507
+#define GL_DECR_WRAP 0x8508
+#define GL_TEXTURE_DEPTH_SIZE 0x884A
+#define GL_DEPTH_TEXTURE_MODE 0x884B
+#define GL_TEXTURE_COMPARE_MODE 0x884C
+#define GL_TEXTURE_COMPARE_FUNC 0x884D
+#define GL_COMPARE_R_TO_TEXTURE 0x884E
+
+typedef void (GLAPIENTRY * PFNGLBLENDCOLORPROC) (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+typedef void (GLAPIENTRY * PFNGLBLENDEQUATIONPROC) (GLenum mode);
+typedef void (GLAPIENTRY * PFNGLBLENDFUNCSEPARATEPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDPOINTERPROC) (GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDDPROC) (GLdouble coord);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDDVPROC) (const GLdouble *coord);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDFPROC) (GLfloat coord);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDFVPROC) (const GLfloat *coord);
+typedef void (GLAPIENTRY * PFNGLMULTIDRAWARRAYSPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount);
+typedef void (GLAPIENTRY * PFNGLMULTIDRAWELEMENTSPROC) (GLenum mode, GLsizei *count, GLenum type, const GLvoid **indices, GLsizei primcount);
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERFPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERFVPROC) (GLenum pname, GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERIPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERIVPROC) (GLenum pname, GLint *params);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3BPROC) (GLbyte red, GLbyte green, GLbyte blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3BVPROC) (const GLbyte *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3DPROC) (GLdouble red, GLdouble green, GLdouble blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3DVPROC) (const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3FPROC) (GLfloat red, GLfloat green, GLfloat blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3FVPROC) (const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3IPROC) (GLint red, GLint green, GLint blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3IVPROC) (const GLint *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3SPROC) (GLshort red, GLshort green, GLshort blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3SVPROC) (const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UBPROC) (GLubyte red, GLubyte green, GLubyte blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UBVPROC) (const GLubyte *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UIPROC) (GLuint red, GLuint green, GLuint blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UIVPROC) (const GLuint *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3USPROC) (GLushort red, GLushort green, GLushort blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3USVPROC) (const GLushort *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLORPOINTERPROC) (GLint size, GLenum type, GLsizei stride, GLvoid *pointer);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2DPROC) (GLdouble x, GLdouble y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2DVPROC) (const GLdouble *p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2FPROC) (GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2FVPROC) (const GLfloat *p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2IPROC) (GLint x, GLint y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2IVPROC) (const GLint *p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2SPROC) (GLshort x, GLshort y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2SVPROC) (const GLshort *p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3DPROC) (GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3DVPROC) (const GLdouble *p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3FPROC) (GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3FVPROC) (const GLfloat *p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3IPROC) (GLint x, GLint y, GLint z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3IVPROC) (const GLint *p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3SPROC) (GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3SVPROC) (const GLshort *p);
+
+#define glBlendColor GLEW_GET_FUN(__glewBlendColor)
+#define glBlendEquation GLEW_GET_FUN(__glewBlendEquation)
+#define glBlendFuncSeparate GLEW_GET_FUN(__glewBlendFuncSeparate)
+#define glFogCoordPointer GLEW_GET_FUN(__glewFogCoordPointer)
+#define glFogCoordd GLEW_GET_FUN(__glewFogCoordd)
+#define glFogCoorddv GLEW_GET_FUN(__glewFogCoorddv)
+#define glFogCoordf GLEW_GET_FUN(__glewFogCoordf)
+#define glFogCoordfv GLEW_GET_FUN(__glewFogCoordfv)
+#define glMultiDrawArrays GLEW_GET_FUN(__glewMultiDrawArrays)
+#define glMultiDrawElements GLEW_GET_FUN(__glewMultiDrawElements)
+#define glPointParameterf GLEW_GET_FUN(__glewPointParameterf)
+#define glPointParameterfv GLEW_GET_FUN(__glewPointParameterfv)
+#define glPointParameteri GLEW_GET_FUN(__glewPointParameteri)
+#define glPointParameteriv GLEW_GET_FUN(__glewPointParameteriv)
+#define glSecondaryColor3b GLEW_GET_FUN(__glewSecondaryColor3b)
+#define glSecondaryColor3bv GLEW_GET_FUN(__glewSecondaryColor3bv)
+#define glSecondaryColor3d GLEW_GET_FUN(__glewSecondaryColor3d)
+#define glSecondaryColor3dv GLEW_GET_FUN(__glewSecondaryColor3dv)
+#define glSecondaryColor3f GLEW_GET_FUN(__glewSecondaryColor3f)
+#define glSecondaryColor3fv GLEW_GET_FUN(__glewSecondaryColor3fv)
+#define glSecondaryColor3i GLEW_GET_FUN(__glewSecondaryColor3i)
+#define glSecondaryColor3iv GLEW_GET_FUN(__glewSecondaryColor3iv)
+#define glSecondaryColor3s GLEW_GET_FUN(__glewSecondaryColor3s)
+#define glSecondaryColor3sv GLEW_GET_FUN(__glewSecondaryColor3sv)
+#define glSecondaryColor3ub GLEW_GET_FUN(__glewSecondaryColor3ub)
+#define glSecondaryColor3ubv GLEW_GET_FUN(__glewSecondaryColor3ubv)
+#define glSecondaryColor3ui GLEW_GET_FUN(__glewSecondaryColor3ui)
+#define glSecondaryColor3uiv GLEW_GET_FUN(__glewSecondaryColor3uiv)
+#define glSecondaryColor3us GLEW_GET_FUN(__glewSecondaryColor3us)
+#define glSecondaryColor3usv GLEW_GET_FUN(__glewSecondaryColor3usv)
+#define glSecondaryColorPointer GLEW_GET_FUN(__glewSecondaryColorPointer)
+#define glWindowPos2d GLEW_GET_FUN(__glewWindowPos2d)
+#define glWindowPos2dv GLEW_GET_FUN(__glewWindowPos2dv)
+#define glWindowPos2f GLEW_GET_FUN(__glewWindowPos2f)
+#define glWindowPos2fv GLEW_GET_FUN(__glewWindowPos2fv)
+#define glWindowPos2i GLEW_GET_FUN(__glewWindowPos2i)
+#define glWindowPos2iv GLEW_GET_FUN(__glewWindowPos2iv)
+#define glWindowPos2s GLEW_GET_FUN(__glewWindowPos2s)
+#define glWindowPos2sv GLEW_GET_FUN(__glewWindowPos2sv)
+#define glWindowPos3d GLEW_GET_FUN(__glewWindowPos3d)
+#define glWindowPos3dv GLEW_GET_FUN(__glewWindowPos3dv)
+#define glWindowPos3f GLEW_GET_FUN(__glewWindowPos3f)
+#define glWindowPos3fv GLEW_GET_FUN(__glewWindowPos3fv)
+#define glWindowPos3i GLEW_GET_FUN(__glewWindowPos3i)
+#define glWindowPos3iv GLEW_GET_FUN(__glewWindowPos3iv)
+#define glWindowPos3s GLEW_GET_FUN(__glewWindowPos3s)
+#define glWindowPos3sv GLEW_GET_FUN(__glewWindowPos3sv)
+
+#define GLEW_VERSION_1_4 GLEW_GET_VAR(__GLEW_VERSION_1_4)
+
+#endif /* GL_VERSION_1_4 */
+
+/* ----------------------------- GL_VERSION_1_5 ---------------------------- */
+
+#ifndef GL_VERSION_1_5
+#define GL_VERSION_1_5 1
+
+#define GL_FOG_COORD_SRC GL_FOG_COORDINATE_SOURCE
+#define GL_FOG_COORD GL_FOG_COORDINATE
+#define GL_FOG_COORD_ARRAY GL_FOG_COORDINATE_ARRAY
+#define GL_SRC0_RGB GL_SOURCE0_RGB
+#define GL_FOG_COORD_ARRAY_POINTER GL_FOG_COORDINATE_ARRAY_POINTER
+#define GL_FOG_COORD_ARRAY_TYPE GL_FOG_COORDINATE_ARRAY_TYPE
+#define GL_SRC1_ALPHA GL_SOURCE1_ALPHA
+#define GL_CURRENT_FOG_COORD GL_CURRENT_FOG_COORDINATE
+#define GL_FOG_COORD_ARRAY_STRIDE GL_FOG_COORDINATE_ARRAY_STRIDE
+#define GL_SRC0_ALPHA GL_SOURCE0_ALPHA
+#define GL_SRC1_RGB GL_SOURCE1_RGB
+#define GL_FOG_COORD_ARRAY_BUFFER_BINDING GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING
+#define GL_SRC2_ALPHA GL_SOURCE2_ALPHA
+#define GL_SRC2_RGB GL_SOURCE2_RGB
+#define GL_BUFFER_SIZE 0x8764
+#define GL_BUFFER_USAGE 0x8765
+#define GL_QUERY_COUNTER_BITS 0x8864
+#define GL_CURRENT_QUERY 0x8865
+#define GL_QUERY_RESULT 0x8866
+#define GL_QUERY_RESULT_AVAILABLE 0x8867
+#define GL_ARRAY_BUFFER 0x8892
+#define GL_ELEMENT_ARRAY_BUFFER 0x8893
+#define GL_ARRAY_BUFFER_BINDING 0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895
+#define GL_VERTEX_ARRAY_BUFFER_BINDING 0x8896
+#define GL_NORMAL_ARRAY_BUFFER_BINDING 0x8897
+#define GL_COLOR_ARRAY_BUFFER_BINDING 0x8898
+#define GL_INDEX_ARRAY_BUFFER_BINDING 0x8899
+#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING 0x889A
+#define GL_EDGE_FLAG_ARRAY_BUFFER_BINDING 0x889B
+#define GL_SECONDARY_COLOR_ARRAY_BUFFER_BINDING 0x889C
+#define GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING 0x889D
+#define GL_WEIGHT_ARRAY_BUFFER_BINDING 0x889E
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F
+#define GL_READ_ONLY 0x88B8
+#define GL_WRITE_ONLY 0x88B9
+#define GL_READ_WRITE 0x88BA
+#define GL_BUFFER_ACCESS 0x88BB
+#define GL_BUFFER_MAPPED 0x88BC
+#define GL_BUFFER_MAP_POINTER 0x88BD
+#define GL_STREAM_DRAW 0x88E0
+#define GL_STREAM_READ 0x88E1
+#define GL_STREAM_COPY 0x88E2
+#define GL_STATIC_DRAW 0x88E4
+#define GL_STATIC_READ 0x88E5
+#define GL_STATIC_COPY 0x88E6
+#define GL_DYNAMIC_DRAW 0x88E8
+#define GL_DYNAMIC_READ 0x88E9
+#define GL_DYNAMIC_COPY 0x88EA
+#define GL_SAMPLES_PASSED 0x8914
+
+typedef ptrdiff_t GLsizeiptr;
+typedef ptrdiff_t GLintptr;
+
+typedef void (GLAPIENTRY * PFNGLBEGINQUERYPROC) (GLenum target, GLuint id);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERPROC) (GLenum target, GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLBUFFERDATAPROC) (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage);
+typedef void (GLAPIENTRY * PFNGLBUFFERSUBDATAPROC) (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid* data);
+typedef void (GLAPIENTRY * PFNGLDELETEBUFFERSPROC) (GLsizei n, const GLuint* buffers);
+typedef void (GLAPIENTRY * PFNGLDELETEQUERIESPROC) (GLsizei n, const GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLENDQUERYPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLGENBUFFERSPROC) (GLsizei n, GLuint* buffers);
+typedef void (GLAPIENTRY * PFNGLGENQUERIESPROC) (GLsizei n, GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLGETBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETBUFFERPOINTERVPROC) (GLenum target, GLenum pname, GLvoid** params);
+typedef void (GLAPIENTRY * PFNGLGETBUFFERSUBDATAPROC) (GLenum target, GLintptr offset, GLsizeiptr size, GLvoid* data);
+typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTIVPROC) (GLuint id, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTUIVPROC) (GLuint id, GLenum pname, GLuint* params);
+typedef void (GLAPIENTRY * PFNGLGETQUERYIVPROC) (GLenum target, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISBUFFERPROC) (GLuint buffer);
+typedef GLboolean (GLAPIENTRY * PFNGLISQUERYPROC) (GLuint id);
+typedef GLvoid* (GLAPIENTRY * PFNGLMAPBUFFERPROC) (GLenum target, GLenum access);
+typedef GLboolean (GLAPIENTRY * PFNGLUNMAPBUFFERPROC) (GLenum target);
+
+#define glBeginQuery GLEW_GET_FUN(__glewBeginQuery)
+#define glBindBuffer GLEW_GET_FUN(__glewBindBuffer)
+#define glBufferData GLEW_GET_FUN(__glewBufferData)
+#define glBufferSubData GLEW_GET_FUN(__glewBufferSubData)
+#define glDeleteBuffers GLEW_GET_FUN(__glewDeleteBuffers)
+#define glDeleteQueries GLEW_GET_FUN(__glewDeleteQueries)
+#define glEndQuery GLEW_GET_FUN(__glewEndQuery)
+#define glGenBuffers GLEW_GET_FUN(__glewGenBuffers)
+#define glGenQueries GLEW_GET_FUN(__glewGenQueries)
+#define glGetBufferParameteriv GLEW_GET_FUN(__glewGetBufferParameteriv)
+#define glGetBufferPointerv GLEW_GET_FUN(__glewGetBufferPointerv)
+#define glGetBufferSubData GLEW_GET_FUN(__glewGetBufferSubData)
+#define glGetQueryObjectiv GLEW_GET_FUN(__glewGetQueryObjectiv)
+#define glGetQueryObjectuiv GLEW_GET_FUN(__glewGetQueryObjectuiv)
+#define glGetQueryiv GLEW_GET_FUN(__glewGetQueryiv)
+#define glIsBuffer GLEW_GET_FUN(__glewIsBuffer)
+#define glIsQuery GLEW_GET_FUN(__glewIsQuery)
+#define glMapBuffer GLEW_GET_FUN(__glewMapBuffer)
+#define glUnmapBuffer GLEW_GET_FUN(__glewUnmapBuffer)
+
+#define GLEW_VERSION_1_5 GLEW_GET_VAR(__GLEW_VERSION_1_5)
+
+#endif /* GL_VERSION_1_5 */
+
+/* ----------------------------- GL_VERSION_2_0 ---------------------------- */
+
+#ifndef GL_VERSION_2_0
+#define GL_VERSION_2_0 1
+
+#define GL_BLEND_EQUATION_RGB GL_BLEND_EQUATION
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625
+#define GL_CURRENT_VERTEX_ATTRIB 0x8626
+#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642
+#define GL_VERTEX_PROGRAM_TWO_SIDE 0x8643
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645
+#define GL_STENCIL_BACK_FUNC 0x8800
+#define GL_STENCIL_BACK_FAIL 0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803
+#define GL_MAX_DRAW_BUFFERS 0x8824
+#define GL_DRAW_BUFFER0 0x8825
+#define GL_DRAW_BUFFER1 0x8826
+#define GL_DRAW_BUFFER2 0x8827
+#define GL_DRAW_BUFFER3 0x8828
+#define GL_DRAW_BUFFER4 0x8829
+#define GL_DRAW_BUFFER5 0x882A
+#define GL_DRAW_BUFFER6 0x882B
+#define GL_DRAW_BUFFER7 0x882C
+#define GL_DRAW_BUFFER8 0x882D
+#define GL_DRAW_BUFFER9 0x882E
+#define GL_DRAW_BUFFER10 0x882F
+#define GL_DRAW_BUFFER11 0x8830
+#define GL_DRAW_BUFFER12 0x8831
+#define GL_DRAW_BUFFER13 0x8832
+#define GL_DRAW_BUFFER14 0x8833
+#define GL_DRAW_BUFFER15 0x8834
+#define GL_BLEND_EQUATION_ALPHA 0x883D
+#define GL_POINT_SPRITE 0x8861
+#define GL_COORD_REPLACE 0x8862
+#define GL_MAX_VERTEX_ATTRIBS 0x8869
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A
+#define GL_MAX_TEXTURE_COORDS 0x8871
+#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872
+#define GL_FRAGMENT_SHADER 0x8B30
+#define GL_VERTEX_SHADER 0x8B31
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A
+#define GL_MAX_VARYING_FLOATS 0x8B4B
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
+#define GL_SHADER_TYPE 0x8B4F
+#define GL_FLOAT_VEC2 0x8B50
+#define GL_FLOAT_VEC3 0x8B51
+#define GL_FLOAT_VEC4 0x8B52
+#define GL_INT_VEC2 0x8B53
+#define GL_INT_VEC3 0x8B54
+#define GL_INT_VEC4 0x8B55
+#define GL_BOOL 0x8B56
+#define GL_BOOL_VEC2 0x8B57
+#define GL_BOOL_VEC3 0x8B58
+#define GL_BOOL_VEC4 0x8B59
+#define GL_FLOAT_MAT2 0x8B5A
+#define GL_FLOAT_MAT3 0x8B5B
+#define GL_FLOAT_MAT4 0x8B5C
+#define GL_SAMPLER_1D 0x8B5D
+#define GL_SAMPLER_2D 0x8B5E
+#define GL_SAMPLER_3D 0x8B5F
+#define GL_SAMPLER_CUBE 0x8B60
+#define GL_SAMPLER_1D_SHADOW 0x8B61
+#define GL_SAMPLER_2D_SHADOW 0x8B62
+#define GL_DELETE_STATUS 0x8B80
+#define GL_COMPILE_STATUS 0x8B81
+#define GL_LINK_STATUS 0x8B82
+#define GL_VALIDATE_STATUS 0x8B83
+#define GL_INFO_LOG_LENGTH 0x8B84
+#define GL_ATTACHED_SHADERS 0x8B85
+#define GL_ACTIVE_UNIFORMS 0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87
+#define GL_SHADER_SOURCE_LENGTH 0x8B88
+#define GL_ACTIVE_ATTRIBUTES 0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B
+#define GL_SHADING_LANGUAGE_VERSION 0x8B8C
+#define GL_CURRENT_PROGRAM 0x8B8D
+#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0
+#define GL_LOWER_LEFT 0x8CA1
+#define GL_UPPER_LEFT 0x8CA2
+#define GL_STENCIL_BACK_REF 0x8CA3
+#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4
+#define GL_STENCIL_BACK_WRITEMASK 0x8CA5
+
+typedef char GLchar;
+
+typedef void (GLAPIENTRY * PFNGLATTACHSHADERPROC) (GLuint program, GLuint shader);
+typedef void (GLAPIENTRY * PFNGLBINDATTRIBLOCATIONPROC) (GLuint program, GLuint index, const GLchar* name);
+typedef void (GLAPIENTRY * PFNGLBLENDEQUATIONSEPARATEPROC) (GLenum, GLenum);
+typedef void (GLAPIENTRY * PFNGLCOMPILESHADERPROC) (GLuint shader);
+typedef GLuint (GLAPIENTRY * PFNGLCREATEPROGRAMPROC) (void);
+typedef GLuint (GLAPIENTRY * PFNGLCREATESHADERPROC) (GLenum type);
+typedef void (GLAPIENTRY * PFNGLDELETEPROGRAMPROC) (GLuint program);
+typedef void (GLAPIENTRY * PFNGLDELETESHADERPROC) (GLuint shader);
+typedef void (GLAPIENTRY * PFNGLDETACHSHADERPROC) (GLuint program, GLuint shader);
+typedef void (GLAPIENTRY * PFNGLDISABLEVERTEXATTRIBARRAYPROC) (GLuint);
+typedef void (GLAPIENTRY * PFNGLDRAWBUFFERSPROC) (GLsizei n, const GLenum* bufs);
+typedef void (GLAPIENTRY * PFNGLENABLEVERTEXATTRIBARRAYPROC) (GLuint);
+typedef void (GLAPIENTRY * PFNGLGETACTIVEATTRIBPROC) (GLuint program, GLuint index, GLsizei maxLength, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
+typedef void (GLAPIENTRY * PFNGLGETACTIVEUNIFORMPROC) (GLuint program, GLuint index, GLsizei maxLength, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
+typedef void (GLAPIENTRY * PFNGLGETATTACHEDSHADERSPROC) (GLuint program, GLsizei maxCount, GLsizei* count, GLuint* shaders);
+typedef GLint (GLAPIENTRY * PFNGLGETATTRIBLOCATIONPROC) (GLuint program, const GLchar* name);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMINFOLOGPROC) (GLuint program, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMIVPROC) (GLuint program, GLenum pname, GLint* param);
+typedef void (GLAPIENTRY * PFNGLGETSHADERINFOLOGPROC) (GLuint shader, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
+typedef void (GLAPIENTRY * PFNGLGETSHADERSOURCEPROC) (GLint obj, GLsizei maxLength, GLsizei* length, GLchar* source);
+typedef void (GLAPIENTRY * PFNGLGETSHADERIVPROC) (GLuint shader, GLenum pname, GLint* param);
+typedef GLint (GLAPIENTRY * PFNGLGETUNIFORMLOCATIONPROC) (GLuint program, const GLchar* name);
+typedef void (GLAPIENTRY * PFNGLGETUNIFORMFVPROC) (GLuint program, GLint location, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETUNIFORMIVPROC) (GLuint program, GLint location, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBPOINTERVPROC) (GLuint, GLenum, GLvoid*);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBDVPROC) (GLuint, GLenum, GLdouble*);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBFVPROC) (GLuint, GLenum, GLfloat*);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBIVPROC) (GLuint, GLenum, GLint*);
+typedef GLboolean (GLAPIENTRY * PFNGLISPROGRAMPROC) (GLuint program);
+typedef GLboolean (GLAPIENTRY * PFNGLISSHADERPROC) (GLuint shader);
+typedef void (GLAPIENTRY * PFNGLLINKPROGRAMPROC) (GLuint program);
+typedef void (GLAPIENTRY * PFNGLSHADERSOURCEPROC) (GLuint shader, GLsizei count, const GLchar** strings, const GLint* lengths);
+typedef void (GLAPIENTRY * PFNGLSTENCILFUNCSEPARATEPROC) (GLenum frontfunc, GLenum backfunc, GLint ref, GLuint mask);
+typedef void (GLAPIENTRY * PFNGLSTENCILMASKSEPARATEPROC) (GLenum, GLuint);
+typedef void (GLAPIENTRY * PFNGLSTENCILOPSEPARATEPROC) (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1FPROC) (GLint location, GLfloat v0);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1FVPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1IPROC) (GLint location, GLint v0);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1IVPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2FPROC) (GLint location, GLfloat v0, GLfloat v1);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2FVPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2IPROC) (GLint location, GLint v0, GLint v1);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2IVPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3FPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3FVPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3IPROC) (GLint location, GLint v0, GLint v1, GLint v2);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3IVPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4FPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4FVPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4IPROC) (GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4IVPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUSEPROGRAMPROC) (GLuint program);
+typedef void (GLAPIENTRY * PFNGLVALIDATEPROGRAMPROC) (GLuint program);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1DPROC) (GLuint index, GLdouble x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1DVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1FPROC) (GLuint index, GLfloat x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1FVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1SPROC) (GLuint index, GLshort x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1SVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2DPROC) (GLuint index, GLdouble x, GLdouble y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2DVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2FPROC) (GLuint index, GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2FVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2SPROC) (GLuint index, GLshort x, GLshort y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2SVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3DVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3FPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3FVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3SPROC) (GLuint index, GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3SVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NBVPROC) (GLuint index, const GLbyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NIVPROC) (GLuint index, const GLint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NSVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUBPROC) (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUBVPROC) (GLuint index, const GLubyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUIVPROC) (GLuint index, const GLuint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUSVPROC) (GLuint index, const GLushort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4BVPROC) (GLuint index, const GLbyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4DPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4DVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4FPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4FVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4IVPROC) (GLuint index, const GLint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4SPROC) (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4SVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4UBVPROC) (GLuint index, const GLubyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4UIVPROC) (GLuint index, const GLuint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4USVPROC) (GLuint index, const GLushort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBPOINTERPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid* pointer);
+
+#define glAttachShader GLEW_GET_FUN(__glewAttachShader)
+#define glBindAttribLocation GLEW_GET_FUN(__glewBindAttribLocation)
+#define glBlendEquationSeparate GLEW_GET_FUN(__glewBlendEquationSeparate)
+#define glCompileShader GLEW_GET_FUN(__glewCompileShader)
+#define glCreateProgram GLEW_GET_FUN(__glewCreateProgram)
+#define glCreateShader GLEW_GET_FUN(__glewCreateShader)
+#define glDeleteProgram GLEW_GET_FUN(__glewDeleteProgram)
+#define glDeleteShader GLEW_GET_FUN(__glewDeleteShader)
+#define glDetachShader GLEW_GET_FUN(__glewDetachShader)
+#define glDisableVertexAttribArray GLEW_GET_FUN(__glewDisableVertexAttribArray)
+#define glDrawBuffers GLEW_GET_FUN(__glewDrawBuffers)
+#define glEnableVertexAttribArray GLEW_GET_FUN(__glewEnableVertexAttribArray)
+#define glGetActiveAttrib GLEW_GET_FUN(__glewGetActiveAttrib)
+#define glGetActiveUniform GLEW_GET_FUN(__glewGetActiveUniform)
+#define glGetAttachedShaders GLEW_GET_FUN(__glewGetAttachedShaders)
+#define glGetAttribLocation GLEW_GET_FUN(__glewGetAttribLocation)
+#define glGetProgramInfoLog GLEW_GET_FUN(__glewGetProgramInfoLog)
+#define glGetProgramiv GLEW_GET_FUN(__glewGetProgramiv)
+#define glGetShaderInfoLog GLEW_GET_FUN(__glewGetShaderInfoLog)
+#define glGetShaderSource GLEW_GET_FUN(__glewGetShaderSource)
+#define glGetShaderiv GLEW_GET_FUN(__glewGetShaderiv)
+#define glGetUniformLocation GLEW_GET_FUN(__glewGetUniformLocation)
+#define glGetUniformfv GLEW_GET_FUN(__glewGetUniformfv)
+#define glGetUniformiv GLEW_GET_FUN(__glewGetUniformiv)
+#define glGetVertexAttribPointerv GLEW_GET_FUN(__glewGetVertexAttribPointerv)
+#define glGetVertexAttribdv GLEW_GET_FUN(__glewGetVertexAttribdv)
+#define glGetVertexAttribfv GLEW_GET_FUN(__glewGetVertexAttribfv)
+#define glGetVertexAttribiv GLEW_GET_FUN(__glewGetVertexAttribiv)
+#define glIsProgram GLEW_GET_FUN(__glewIsProgram)
+#define glIsShader GLEW_GET_FUN(__glewIsShader)
+#define glLinkProgram GLEW_GET_FUN(__glewLinkProgram)
+#define glShaderSource GLEW_GET_FUN(__glewShaderSource)
+#define glStencilFuncSeparate GLEW_GET_FUN(__glewStencilFuncSeparate)
+#define glStencilMaskSeparate GLEW_GET_FUN(__glewStencilMaskSeparate)
+#define glStencilOpSeparate GLEW_GET_FUN(__glewStencilOpSeparate)
+#define glUniform1f GLEW_GET_FUN(__glewUniform1f)
+#define glUniform1fv GLEW_GET_FUN(__glewUniform1fv)
+#define glUniform1i GLEW_GET_FUN(__glewUniform1i)
+#define glUniform1iv GLEW_GET_FUN(__glewUniform1iv)
+#define glUniform2f GLEW_GET_FUN(__glewUniform2f)
+#define glUniform2fv GLEW_GET_FUN(__glewUniform2fv)
+#define glUniform2i GLEW_GET_FUN(__glewUniform2i)
+#define glUniform2iv GLEW_GET_FUN(__glewUniform2iv)
+#define glUniform3f GLEW_GET_FUN(__glewUniform3f)
+#define glUniform3fv GLEW_GET_FUN(__glewUniform3fv)
+#define glUniform3i GLEW_GET_FUN(__glewUniform3i)
+#define glUniform3iv GLEW_GET_FUN(__glewUniform3iv)
+#define glUniform4f GLEW_GET_FUN(__glewUniform4f)
+#define glUniform4fv GLEW_GET_FUN(__glewUniform4fv)
+#define glUniform4i GLEW_GET_FUN(__glewUniform4i)
+#define glUniform4iv GLEW_GET_FUN(__glewUniform4iv)
+#define glUniformMatrix2fv GLEW_GET_FUN(__glewUniformMatrix2fv)
+#define glUniformMatrix3fv GLEW_GET_FUN(__glewUniformMatrix3fv)
+#define glUniformMatrix4fv GLEW_GET_FUN(__glewUniformMatrix4fv)
+#define glUseProgram GLEW_GET_FUN(__glewUseProgram)
+#define glValidateProgram GLEW_GET_FUN(__glewValidateProgram)
+#define glVertexAttrib1d GLEW_GET_FUN(__glewVertexAttrib1d)
+#define glVertexAttrib1dv GLEW_GET_FUN(__glewVertexAttrib1dv)
+#define glVertexAttrib1f GLEW_GET_FUN(__glewVertexAttrib1f)
+#define glVertexAttrib1fv GLEW_GET_FUN(__glewVertexAttrib1fv)
+#define glVertexAttrib1s GLEW_GET_FUN(__glewVertexAttrib1s)
+#define glVertexAttrib1sv GLEW_GET_FUN(__glewVertexAttrib1sv)
+#define glVertexAttrib2d GLEW_GET_FUN(__glewVertexAttrib2d)
+#define glVertexAttrib2dv GLEW_GET_FUN(__glewVertexAttrib2dv)
+#define glVertexAttrib2f GLEW_GET_FUN(__glewVertexAttrib2f)
+#define glVertexAttrib2fv GLEW_GET_FUN(__glewVertexAttrib2fv)
+#define glVertexAttrib2s GLEW_GET_FUN(__glewVertexAttrib2s)
+#define glVertexAttrib2sv GLEW_GET_FUN(__glewVertexAttrib2sv)
+#define glVertexAttrib3d GLEW_GET_FUN(__glewVertexAttrib3d)
+#define glVertexAttrib3dv GLEW_GET_FUN(__glewVertexAttrib3dv)
+#define glVertexAttrib3f GLEW_GET_FUN(__glewVertexAttrib3f)
+#define glVertexAttrib3fv GLEW_GET_FUN(__glewVertexAttrib3fv)
+#define glVertexAttrib3s GLEW_GET_FUN(__glewVertexAttrib3s)
+#define glVertexAttrib3sv GLEW_GET_FUN(__glewVertexAttrib3sv)
+#define glVertexAttrib4Nbv GLEW_GET_FUN(__glewVertexAttrib4Nbv)
+#define glVertexAttrib4Niv GLEW_GET_FUN(__glewVertexAttrib4Niv)
+#define glVertexAttrib4Nsv GLEW_GET_FUN(__glewVertexAttrib4Nsv)
+#define glVertexAttrib4Nub GLEW_GET_FUN(__glewVertexAttrib4Nub)
+#define glVertexAttrib4Nubv GLEW_GET_FUN(__glewVertexAttrib4Nubv)
+#define glVertexAttrib4Nuiv GLEW_GET_FUN(__glewVertexAttrib4Nuiv)
+#define glVertexAttrib4Nusv GLEW_GET_FUN(__glewVertexAttrib4Nusv)
+#define glVertexAttrib4bv GLEW_GET_FUN(__glewVertexAttrib4bv)
+#define glVertexAttrib4d GLEW_GET_FUN(__glewVertexAttrib4d)
+#define glVertexAttrib4dv GLEW_GET_FUN(__glewVertexAttrib4dv)
+#define glVertexAttrib4f GLEW_GET_FUN(__glewVertexAttrib4f)
+#define glVertexAttrib4fv GLEW_GET_FUN(__glewVertexAttrib4fv)
+#define glVertexAttrib4iv GLEW_GET_FUN(__glewVertexAttrib4iv)
+#define glVertexAttrib4s GLEW_GET_FUN(__glewVertexAttrib4s)
+#define glVertexAttrib4sv GLEW_GET_FUN(__glewVertexAttrib4sv)
+#define glVertexAttrib4ubv GLEW_GET_FUN(__glewVertexAttrib4ubv)
+#define glVertexAttrib4uiv GLEW_GET_FUN(__glewVertexAttrib4uiv)
+#define glVertexAttrib4usv GLEW_GET_FUN(__glewVertexAttrib4usv)
+#define glVertexAttribPointer GLEW_GET_FUN(__glewVertexAttribPointer)
+
+#define GLEW_VERSION_2_0 GLEW_GET_VAR(__GLEW_VERSION_2_0)
+
+#endif /* GL_VERSION_2_0 */
+
+/* ----------------------------- GL_VERSION_2_1 ---------------------------- */
+
+#ifndef GL_VERSION_2_1
+#define GL_VERSION_2_1 1
+
+#define GL_CURRENT_RASTER_SECONDARY_COLOR 0x845F
+#define GL_PIXEL_PACK_BUFFER 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF
+#define GL_FLOAT_MAT2x3 0x8B65
+#define GL_FLOAT_MAT2x4 0x8B66
+#define GL_FLOAT_MAT3x2 0x8B67
+#define GL_FLOAT_MAT3x4 0x8B68
+#define GL_FLOAT_MAT4x2 0x8B69
+#define GL_FLOAT_MAT4x3 0x8B6A
+#define GL_SRGB 0x8C40
+#define GL_SRGB8 0x8C41
+#define GL_SRGB_ALPHA 0x8C42
+#define GL_SRGB8_ALPHA8 0x8C43
+#define GL_SLUMINANCE_ALPHA 0x8C44
+#define GL_SLUMINANCE8_ALPHA8 0x8C45
+#define GL_SLUMINANCE 0x8C46
+#define GL_SLUMINANCE8 0x8C47
+#define GL_COMPRESSED_SRGB 0x8C48
+#define GL_COMPRESSED_SRGB_ALPHA 0x8C49
+#define GL_COMPRESSED_SLUMINANCE 0x8C4A
+#define GL_COMPRESSED_SLUMINANCE_ALPHA 0x8C4B
+
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX2X3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX2X4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX3X2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX3X4FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX4X2FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX4X3FVPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+
+#define glUniformMatrix2x3fv GLEW_GET_FUN(__glewUniformMatrix2x3fv)
+#define glUniformMatrix2x4fv GLEW_GET_FUN(__glewUniformMatrix2x4fv)
+#define glUniformMatrix3x2fv GLEW_GET_FUN(__glewUniformMatrix3x2fv)
+#define glUniformMatrix3x4fv GLEW_GET_FUN(__glewUniformMatrix3x4fv)
+#define glUniformMatrix4x2fv GLEW_GET_FUN(__glewUniformMatrix4x2fv)
+#define glUniformMatrix4x3fv GLEW_GET_FUN(__glewUniformMatrix4x3fv)
+
+#define GLEW_VERSION_2_1 GLEW_GET_VAR(__GLEW_VERSION_2_1)
+
+#endif /* GL_VERSION_2_1 */
+
+/* ----------------------------- GL_VERSION_3_0 ---------------------------- */
+
+#ifndef GL_VERSION_3_0
+#define GL_VERSION_3_0 1
+
+#define GL_MAX_CLIP_DISTANCES GL_MAX_CLIP_PLANES
+#define GL_CLIP_DISTANCE5 GL_CLIP_PLANE5
+#define GL_CLIP_DISTANCE1 GL_CLIP_PLANE1
+#define GL_CLIP_DISTANCE3 GL_CLIP_PLANE3
+#define GL_COMPARE_REF_TO_TEXTURE GL_COMPARE_R_TO_TEXTURE_ARB
+#define GL_CLIP_DISTANCE0 GL_CLIP_PLANE0
+#define GL_CLIP_DISTANCE4 GL_CLIP_PLANE4
+#define GL_CLIP_DISTANCE2 GL_CLIP_PLANE2
+#define GL_MAX_VARYING_COMPONENTS GL_MAX_VARYING_FLOATS
+#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x0001
+#define GL_MAJOR_VERSION 0x821B
+#define GL_MINOR_VERSION 0x821C
+#define GL_NUM_EXTENSIONS 0x821D
+#define GL_CONTEXT_FLAGS 0x821E
+#define GL_DEPTH_BUFFER 0x8223
+#define GL_STENCIL_BUFFER 0x8224
+#define GL_COMPRESSED_RED 0x8225
+#define GL_COMPRESSED_RG 0x8226
+#define GL_RGBA32F 0x8814
+#define GL_RGB32F 0x8815
+#define GL_RGBA16F 0x881A
+#define GL_RGB16F 0x881B
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD
+#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF
+#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905
+#define GL_CLAMP_VERTEX_COLOR 0x891A
+#define GL_CLAMP_FRAGMENT_COLOR 0x891B
+#define GL_CLAMP_READ_COLOR 0x891C
+#define GL_FIXED_ONLY 0x891D
+#define GL_TEXTURE_RED_TYPE 0x8C10
+#define GL_TEXTURE_GREEN_TYPE 0x8C11
+#define GL_TEXTURE_BLUE_TYPE 0x8C12
+#define GL_TEXTURE_ALPHA_TYPE 0x8C13
+#define GL_TEXTURE_LUMINANCE_TYPE 0x8C14
+#define GL_TEXTURE_INTENSITY_TYPE 0x8C15
+#define GL_TEXTURE_DEPTH_TYPE 0x8C16
+#define GL_UNSIGNED_NORMALIZED 0x8C17
+#define GL_TEXTURE_1D_ARRAY 0x8C18
+#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19
+#define GL_TEXTURE_2D_ARRAY 0x8C1A
+#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B
+#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C
+#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D
+#define GL_R11F_G11F_B10F 0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B
+#define GL_RGB9_E5 0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E
+#define GL_TEXTURE_SHARED_SIZE 0x8C3F
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85
+#define GL_PRIMITIVES_GENERATED 0x8C87
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88
+#define GL_RASTERIZER_DISCARD 0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS 0x8C8C
+#define GL_SEPARATE_ATTRIBS 0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F
+#define GL_RGBA32UI 0x8D70
+#define GL_RGB32UI 0x8D71
+#define GL_RGBA16UI 0x8D76
+#define GL_RGB16UI 0x8D77
+#define GL_RGBA8UI 0x8D7C
+#define GL_RGB8UI 0x8D7D
+#define GL_RGBA32I 0x8D82
+#define GL_RGB32I 0x8D83
+#define GL_RGBA16I 0x8D88
+#define GL_RGB16I 0x8D89
+#define GL_RGBA8I 0x8D8E
+#define GL_RGB8I 0x8D8F
+#define GL_RED_INTEGER 0x8D94
+#define GL_GREEN_INTEGER 0x8D95
+#define GL_BLUE_INTEGER 0x8D96
+#define GL_ALPHA_INTEGER 0x8D97
+#define GL_RGB_INTEGER 0x8D98
+#define GL_RGBA_INTEGER 0x8D99
+#define GL_BGR_INTEGER 0x8D9A
+#define GL_BGRA_INTEGER 0x8D9B
+#define GL_SAMPLER_1D_ARRAY 0x8DC0
+#define GL_SAMPLER_2D_ARRAY 0x8DC1
+#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3
+#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW 0x8DC5
+#define GL_UNSIGNED_INT_VEC2 0x8DC6
+#define GL_UNSIGNED_INT_VEC3 0x8DC7
+#define GL_UNSIGNED_INT_VEC4 0x8DC8
+#define GL_INT_SAMPLER_1D 0x8DC9
+#define GL_INT_SAMPLER_2D 0x8DCA
+#define GL_INT_SAMPLER_3D 0x8DCB
+#define GL_INT_SAMPLER_CUBE 0x8DCC
+#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE
+#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF
+#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1
+#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7
+#define GL_QUERY_WAIT 0x8E13
+#define GL_QUERY_NO_WAIT 0x8E14
+#define GL_QUERY_BY_REGION_WAIT 0x8E15
+#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16
+
+typedef void (GLAPIENTRY * PFNGLBEGINCONDITIONALRENDERPROC) (GLuint, GLenum);
+typedef void (GLAPIENTRY * PFNGLBEGINTRANSFORMFEEDBACKPROC) (GLenum);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERBASEPROC) (GLenum, GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERRANGEPROC) (GLenum, GLuint, GLuint, GLintptr, GLsizeiptr);
+typedef void (GLAPIENTRY * PFNGLBINDFRAGDATALOCATIONPROC) (GLuint, GLuint, const GLchar*);
+typedef void (GLAPIENTRY * PFNGLCLAMPCOLORPROC) (GLenum, GLenum);
+typedef void (GLAPIENTRY * PFNGLCLEARBUFFERFIPROC) (GLenum, GLint, GLfloat, GLint);
+typedef void (GLAPIENTRY * PFNGLCLEARBUFFERFVPROC) (GLenum, GLint, const GLfloat*);
+typedef void (GLAPIENTRY * PFNGLCLEARBUFFERIVPROC) (GLenum, GLint, const GLint*);
+typedef void (GLAPIENTRY * PFNGLCLEARBUFFERUIVPROC) (GLenum, GLint, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLCOLORMASKIPROC) (GLuint, GLboolean, GLboolean, GLboolean, GLboolean);
+typedef void (GLAPIENTRY * PFNGLDISABLEIPROC) (GLenum, GLuint);
+typedef void (GLAPIENTRY * PFNGLENABLEIPROC) (GLenum, GLuint);
+typedef void (GLAPIENTRY * PFNGLENDCONDITIONALRENDERPROC) (void);
+typedef void (GLAPIENTRY * PFNGLENDTRANSFORMFEEDBACKPROC) (void);
+typedef void (GLAPIENTRY * PFNGLGETBOOLEANI_VPROC) (GLenum, GLuint, GLboolean*);
+typedef GLint (GLAPIENTRY * PFNGLGETFRAGDATALOCATIONPROC) (GLuint, const GLchar*);
+typedef void (GLAPIENTRY * PFNGLGETINTEGERI_VPROC) (GLenum, GLuint, GLint*);
+typedef const GLubyte* (GLAPIENTRY * PFNGLGETSTRINGIPROC) (GLenum, GLuint);
+typedef void (GLAPIENTRY * PFNGLGETTEXPARAMETERIIVPROC) (GLenum, GLenum, GLint*);
+typedef void (GLAPIENTRY * PFNGLGETTEXPARAMETERIUIVPROC) (GLenum, GLenum, GLuint*);
+typedef void (GLAPIENTRY * PFNGLGETTRANSFORMFEEDBACKVARYINGPROC) (GLuint, GLuint, GLint*);
+typedef void (GLAPIENTRY * PFNGLGETUNIFORMUIVPROC) (GLuint, GLint, GLuint*);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBIIVPROC) (GLuint, GLenum, GLint*);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBIUIVPROC) (GLuint, GLenum, GLuint*);
+typedef GLboolean (GLAPIENTRY * PFNGLISENABLEDIPROC) (GLenum, GLuint);
+typedef void (GLAPIENTRY * PFNGLTEXPARAMETERIIVPROC) (GLenum, GLenum, const GLint*);
+typedef void (GLAPIENTRY * PFNGLTEXPARAMETERIUIVPROC) (GLenum, GLenum, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLTRANSFORMFEEDBACKVARYINGSPROC) (GLuint, GLsizei, const GLint*, GLenum);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1UIPROC) (GLint, GLuint);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1UIVPROC) (GLint, GLsizei, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2UIPROC) (GLint, GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2UIVPROC) (GLint, GLsizei, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3UIPROC) (GLint, GLuint, GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3UIVPROC) (GLint, GLsizei, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4UIPROC) (GLint, GLuint, GLuint, GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4UIVPROC) (GLint, GLsizei, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1IPROC) (GLuint, GLint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1IVPROC) (GLuint, const GLint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1UIPROC) (GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1UIVPROC) (GLuint, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2IPROC) (GLuint, GLint, GLint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2IVPROC) (GLuint, const GLint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2UIPROC) (GLuint, GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2UIVPROC) (GLuint, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3IPROC) (GLuint, GLint, GLint, GLint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3IVPROC) (GLuint, const GLint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3UIPROC) (GLuint, GLuint, GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3UIVPROC) (GLuint, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4BVPROC) (GLuint, const GLbyte*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4IPROC) (GLuint, GLint, GLint, GLint, GLint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4IVPROC) (GLuint, const GLint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4SVPROC) (GLuint, const GLshort*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4UBVPROC) (GLuint, const GLubyte*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4UIPROC) (GLuint, GLuint, GLuint, GLuint, GLuint);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4UIVPROC) (GLuint, const GLuint*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4USVPROC) (GLuint, const GLushort*);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBIPOINTERPROC) (GLuint, GLint, GLenum, GLsizei, const GLvoid*);
+
+#define glBeginConditionalRender GLEW_GET_FUN(__glewBeginConditionalRender)
+#define glBeginTransformFeedback GLEW_GET_FUN(__glewBeginTransformFeedback)
+#define glBindBufferBase GLEW_GET_FUN(__glewBindBufferBase)
+#define glBindBufferRange GLEW_GET_FUN(__glewBindBufferRange)
+#define glBindFragDataLocation GLEW_GET_FUN(__glewBindFragDataLocation)
+#define glClampColor GLEW_GET_FUN(__glewClampColor)
+#define glClearBufferfi GLEW_GET_FUN(__glewClearBufferfi)
+#define glClearBufferfv GLEW_GET_FUN(__glewClearBufferfv)
+#define glClearBufferiv GLEW_GET_FUN(__glewClearBufferiv)
+#define glClearBufferuiv GLEW_GET_FUN(__glewClearBufferuiv)
+#define glColorMaski GLEW_GET_FUN(__glewColorMaski)
+#define glDisablei GLEW_GET_FUN(__glewDisablei)
+#define glEnablei GLEW_GET_FUN(__glewEnablei)
+#define glEndConditionalRender GLEW_GET_FUN(__glewEndConditionalRender)
+#define glEndTransformFeedback GLEW_GET_FUN(__glewEndTransformFeedback)
+#define glGetBooleani_v GLEW_GET_FUN(__glewGetBooleani_v)
+#define glGetFragDataLocation GLEW_GET_FUN(__glewGetFragDataLocation)
+#define glGetIntegeri_v GLEW_GET_FUN(__glewGetIntegeri_v)
+#define glGetStringi GLEW_GET_FUN(__glewGetStringi)
+#define glGetTexParameterIiv GLEW_GET_FUN(__glewGetTexParameterIiv)
+#define glGetTexParameterIuiv GLEW_GET_FUN(__glewGetTexParameterIuiv)
+#define glGetTransformFeedbackVarying GLEW_GET_FUN(__glewGetTransformFeedbackVarying)
+#define glGetUniformuiv GLEW_GET_FUN(__glewGetUniformuiv)
+#define glGetVertexAttribIiv GLEW_GET_FUN(__glewGetVertexAttribIiv)
+#define glGetVertexAttribIuiv GLEW_GET_FUN(__glewGetVertexAttribIuiv)
+#define glIsEnabledi GLEW_GET_FUN(__glewIsEnabledi)
+#define glTexParameterIiv GLEW_GET_FUN(__glewTexParameterIiv)
+#define glTexParameterIuiv GLEW_GET_FUN(__glewTexParameterIuiv)
+#define glTransformFeedbackVaryings GLEW_GET_FUN(__glewTransformFeedbackVaryings)
+#define glUniform1ui GLEW_GET_FUN(__glewUniform1ui)
+#define glUniform1uiv GLEW_GET_FUN(__glewUniform1uiv)
+#define glUniform2ui GLEW_GET_FUN(__glewUniform2ui)
+#define glUniform2uiv GLEW_GET_FUN(__glewUniform2uiv)
+#define glUniform3ui GLEW_GET_FUN(__glewUniform3ui)
+#define glUniform3uiv GLEW_GET_FUN(__glewUniform3uiv)
+#define glUniform4ui GLEW_GET_FUN(__glewUniform4ui)
+#define glUniform4uiv GLEW_GET_FUN(__glewUniform4uiv)
+#define glVertexAttribI1i GLEW_GET_FUN(__glewVertexAttribI1i)
+#define glVertexAttribI1iv GLEW_GET_FUN(__glewVertexAttribI1iv)
+#define glVertexAttribI1ui GLEW_GET_FUN(__glewVertexAttribI1ui)
+#define glVertexAttribI1uiv GLEW_GET_FUN(__glewVertexAttribI1uiv)
+#define glVertexAttribI2i GLEW_GET_FUN(__glewVertexAttribI2i)
+#define glVertexAttribI2iv GLEW_GET_FUN(__glewVertexAttribI2iv)
+#define glVertexAttribI2ui GLEW_GET_FUN(__glewVertexAttribI2ui)
+#define glVertexAttribI2uiv GLEW_GET_FUN(__glewVertexAttribI2uiv)
+#define glVertexAttribI3i GLEW_GET_FUN(__glewVertexAttribI3i)
+#define glVertexAttribI3iv GLEW_GET_FUN(__glewVertexAttribI3iv)
+#define glVertexAttribI3ui GLEW_GET_FUN(__glewVertexAttribI3ui)
+#define glVertexAttribI3uiv GLEW_GET_FUN(__glewVertexAttribI3uiv)
+#define glVertexAttribI4bv GLEW_GET_FUN(__glewVertexAttribI4bv)
+#define glVertexAttribI4i GLEW_GET_FUN(__glewVertexAttribI4i)
+#define glVertexAttribI4iv GLEW_GET_FUN(__glewVertexAttribI4iv)
+#define glVertexAttribI4sv GLEW_GET_FUN(__glewVertexAttribI4sv)
+#define glVertexAttribI4ubv GLEW_GET_FUN(__glewVertexAttribI4ubv)
+#define glVertexAttribI4ui GLEW_GET_FUN(__glewVertexAttribI4ui)
+#define glVertexAttribI4uiv GLEW_GET_FUN(__glewVertexAttribI4uiv)
+#define glVertexAttribI4usv GLEW_GET_FUN(__glewVertexAttribI4usv)
+#define glVertexAttribIPointer GLEW_GET_FUN(__glewVertexAttribIPointer)
+
+#define GLEW_VERSION_3_0 GLEW_GET_VAR(__GLEW_VERSION_3_0)
+
+#endif /* GL_VERSION_3_0 */
+
+/* -------------------------- GL_3DFX_multisample -------------------------- */
+
+#ifndef GL_3DFX_multisample
+#define GL_3DFX_multisample 1
+
+#define GL_MULTISAMPLE_3DFX 0x86B2
+#define GL_SAMPLE_BUFFERS_3DFX 0x86B3
+#define GL_SAMPLES_3DFX 0x86B4
+#define GL_MULTISAMPLE_BIT_3DFX 0x20000000
+
+#define GLEW_3DFX_multisample GLEW_GET_VAR(__GLEW_3DFX_multisample)
+
+#endif /* GL_3DFX_multisample */
+
+/* ---------------------------- GL_3DFX_tbuffer ---------------------------- */
+
+#ifndef GL_3DFX_tbuffer
+#define GL_3DFX_tbuffer 1
+
+typedef void (GLAPIENTRY * PFNGLTBUFFERMASK3DFXPROC) (GLuint mask);
+
+#define glTbufferMask3DFX GLEW_GET_FUN(__glewTbufferMask3DFX)
+
+#define GLEW_3DFX_tbuffer GLEW_GET_VAR(__GLEW_3DFX_tbuffer)
+
+#endif /* GL_3DFX_tbuffer */
+
+/* -------------------- GL_3DFX_texture_compression_FXT1 ------------------- */
+
+#ifndef GL_3DFX_texture_compression_FXT1
+#define GL_3DFX_texture_compression_FXT1 1
+
+#define GL_COMPRESSED_RGB_FXT1_3DFX 0x86B0
+#define GL_COMPRESSED_RGBA_FXT1_3DFX 0x86B1
+
+#define GLEW_3DFX_texture_compression_FXT1 GLEW_GET_VAR(__GLEW_3DFX_texture_compression_FXT1)
+
+#endif /* GL_3DFX_texture_compression_FXT1 */
+
+/* ------------------------ GL_APPLE_client_storage ------------------------ */
+
+#ifndef GL_APPLE_client_storage
+#define GL_APPLE_client_storage 1
+
+#define GL_UNPACK_CLIENT_STORAGE_APPLE 0x85B2
+
+#define GLEW_APPLE_client_storage GLEW_GET_VAR(__GLEW_APPLE_client_storage)
+
+#endif /* GL_APPLE_client_storage */
+
+/* ------------------------- GL_APPLE_element_array ------------------------ */
+
+#ifndef GL_APPLE_element_array
+#define GL_APPLE_element_array 1
+
+#define GL_ELEMENT_ARRAY_APPLE 0x8768
+#define GL_ELEMENT_ARRAY_TYPE_APPLE 0x8769
+#define GL_ELEMENT_ARRAY_POINTER_APPLE 0x876A
+
+typedef void (GLAPIENTRY * PFNGLDRAWELEMENTARRAYAPPLEPROC) (GLenum mode, GLint first, GLsizei count);
+typedef void (GLAPIENTRY * PFNGLDRAWRANGEELEMENTARRAYAPPLEPROC) (GLenum mode, GLuint start, GLuint end, GLint first, GLsizei count);
+typedef void (GLAPIENTRY * PFNGLELEMENTPOINTERAPPLEPROC) (GLenum type, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLMULTIDRAWELEMENTARRAYAPPLEPROC) (GLenum mode, const GLint* first, const GLsizei *count, GLsizei primcount);
+typedef void (GLAPIENTRY * PFNGLMULTIDRAWRANGEELEMENTARRAYAPPLEPROC) (GLenum mode, GLuint start, GLuint end, const GLint* first, const GLsizei *count, GLsizei primcount);
+
+#define glDrawElementArrayAPPLE GLEW_GET_FUN(__glewDrawElementArrayAPPLE)
+#define glDrawRangeElementArrayAPPLE GLEW_GET_FUN(__glewDrawRangeElementArrayAPPLE)
+#define glElementPointerAPPLE GLEW_GET_FUN(__glewElementPointerAPPLE)
+#define glMultiDrawElementArrayAPPLE GLEW_GET_FUN(__glewMultiDrawElementArrayAPPLE)
+#define glMultiDrawRangeElementArrayAPPLE GLEW_GET_FUN(__glewMultiDrawRangeElementArrayAPPLE)
+
+#define GLEW_APPLE_element_array GLEW_GET_VAR(__GLEW_APPLE_element_array)
+
+#endif /* GL_APPLE_element_array */
+
+/* ----------------------------- GL_APPLE_fence ---------------------------- */
+
+#ifndef GL_APPLE_fence
+#define GL_APPLE_fence 1
+
+#define GL_DRAW_PIXELS_APPLE 0x8A0A
+#define GL_FENCE_APPLE 0x8A0B
+
+typedef void (GLAPIENTRY * PFNGLDELETEFENCESAPPLEPROC) (GLsizei n, const GLuint* fences);
+typedef void (GLAPIENTRY * PFNGLFINISHFENCEAPPLEPROC) (GLuint fence);
+typedef void (GLAPIENTRY * PFNGLFINISHOBJECTAPPLEPROC) (GLenum object, GLint name);
+typedef void (GLAPIENTRY * PFNGLGENFENCESAPPLEPROC) (GLsizei n, GLuint* fences);
+typedef GLboolean (GLAPIENTRY * PFNGLISFENCEAPPLEPROC) (GLuint fence);
+typedef void (GLAPIENTRY * PFNGLSETFENCEAPPLEPROC) (GLuint fence);
+typedef GLboolean (GLAPIENTRY * PFNGLTESTFENCEAPPLEPROC) (GLuint fence);
+typedef GLboolean (GLAPIENTRY * PFNGLTESTOBJECTAPPLEPROC) (GLenum object, GLuint name);
+
+#define glDeleteFencesAPPLE GLEW_GET_FUN(__glewDeleteFencesAPPLE)
+#define glFinishFenceAPPLE GLEW_GET_FUN(__glewFinishFenceAPPLE)
+#define glFinishObjectAPPLE GLEW_GET_FUN(__glewFinishObjectAPPLE)
+#define glGenFencesAPPLE GLEW_GET_FUN(__glewGenFencesAPPLE)
+#define glIsFenceAPPLE GLEW_GET_FUN(__glewIsFenceAPPLE)
+#define glSetFenceAPPLE GLEW_GET_FUN(__glewSetFenceAPPLE)
+#define glTestFenceAPPLE GLEW_GET_FUN(__glewTestFenceAPPLE)
+#define glTestObjectAPPLE GLEW_GET_FUN(__glewTestObjectAPPLE)
+
+#define GLEW_APPLE_fence GLEW_GET_VAR(__GLEW_APPLE_fence)
+
+#endif /* GL_APPLE_fence */
+
+/* ------------------------- GL_APPLE_float_pixels ------------------------- */
+
+#ifndef GL_APPLE_float_pixels
+#define GL_APPLE_float_pixels 1
+
+#define GL_HALF_APPLE 0x140B
+#define GL_RGBA_FLOAT32_APPLE 0x8814
+#define GL_RGB_FLOAT32_APPLE 0x8815
+#define GL_ALPHA_FLOAT32_APPLE 0x8816
+#define GL_INTENSITY_FLOAT32_APPLE 0x8817
+#define GL_LUMINANCE_FLOAT32_APPLE 0x8818
+#define GL_LUMINANCE_ALPHA_FLOAT32_APPLE 0x8819
+#define GL_RGBA_FLOAT16_APPLE 0x881A
+#define GL_RGB_FLOAT16_APPLE 0x881B
+#define GL_ALPHA_FLOAT16_APPLE 0x881C
+#define GL_INTENSITY_FLOAT16_APPLE 0x881D
+#define GL_LUMINANCE_FLOAT16_APPLE 0x881E
+#define GL_LUMINANCE_ALPHA_FLOAT16_APPLE 0x881F
+#define GL_COLOR_FLOAT_APPLE 0x8A0F
+
+#define GLEW_APPLE_float_pixels GLEW_GET_VAR(__GLEW_APPLE_float_pixels)
+
+#endif /* GL_APPLE_float_pixels */
+
+/* ---------------------- GL_APPLE_flush_buffer_range ---------------------- */
+
+#ifndef GL_APPLE_flush_buffer_range
+#define GL_APPLE_flush_buffer_range 1
+
+#define GL_BUFFER_SERIALIZED_MODIFY_APPLE 0x8A12
+#define GL_BUFFER_FLUSHING_UNMAP_APPLE 0x8A13
+
+typedef void (GLAPIENTRY * PFNGLBUFFERPARAMETERIAPPLEPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLFLUSHMAPPEDBUFFERRANGEAPPLEPROC) (GLenum target, GLintptr offset, GLsizeiptr size);
+
+#define glBufferParameteriAPPLE GLEW_GET_FUN(__glewBufferParameteriAPPLE)
+#define glFlushMappedBufferRangeAPPLE GLEW_GET_FUN(__glewFlushMappedBufferRangeAPPLE)
+
+#define GLEW_APPLE_flush_buffer_range GLEW_GET_VAR(__GLEW_APPLE_flush_buffer_range)
+
+#endif /* GL_APPLE_flush_buffer_range */
+
+/* ------------------------- GL_APPLE_pixel_buffer ------------------------- */
+
+#ifndef GL_APPLE_pixel_buffer
+#define GL_APPLE_pixel_buffer 1
+
+#define GL_MIN_PBUFFER_VIEWPORT_DIMS_APPLE 0x8A10
+
+#define GLEW_APPLE_pixel_buffer GLEW_GET_VAR(__GLEW_APPLE_pixel_buffer)
+
+#endif /* GL_APPLE_pixel_buffer */
+
+/* ------------------------ GL_APPLE_specular_vector ----------------------- */
+
+#ifndef GL_APPLE_specular_vector
+#define GL_APPLE_specular_vector 1
+
+#define GL_LIGHT_MODEL_SPECULAR_VECTOR_APPLE 0x85B0
+
+#define GLEW_APPLE_specular_vector GLEW_GET_VAR(__GLEW_APPLE_specular_vector)
+
+#endif /* GL_APPLE_specular_vector */
+
+/* ------------------------- GL_APPLE_texture_range ------------------------ */
+
+#ifndef GL_APPLE_texture_range
+#define GL_APPLE_texture_range 1
+
+#define GL_TEXTURE_RANGE_LENGTH_APPLE 0x85B7
+#define GL_TEXTURE_RANGE_POINTER_APPLE 0x85B8
+#define GL_TEXTURE_STORAGE_HINT_APPLE 0x85BC
+#define GL_STORAGE_PRIVATE_APPLE 0x85BD
+#define GL_STORAGE_CACHED_APPLE 0x85BE
+#define GL_STORAGE_SHARED_APPLE 0x85BF
+
+typedef void (GLAPIENTRY * PFNGLGETTEXPARAMETERPOINTERVAPPLEPROC) (GLenum target, GLenum pname, GLvoid **params);
+typedef void (GLAPIENTRY * PFNGLTEXTURERANGEAPPLEPROC) (GLenum target, GLsizei length, GLvoid *pointer);
+
+#define glGetTexParameterPointervAPPLE GLEW_GET_FUN(__glewGetTexParameterPointervAPPLE)
+#define glTextureRangeAPPLE GLEW_GET_FUN(__glewTextureRangeAPPLE)
+
+#define GLEW_APPLE_texture_range GLEW_GET_VAR(__GLEW_APPLE_texture_range)
+
+#endif /* GL_APPLE_texture_range */
+
+/* ------------------------ GL_APPLE_transform_hint ------------------------ */
+
+#ifndef GL_APPLE_transform_hint
+#define GL_APPLE_transform_hint 1
+
+#define GL_TRANSFORM_HINT_APPLE 0x85B1
+
+#define GLEW_APPLE_transform_hint GLEW_GET_VAR(__GLEW_APPLE_transform_hint)
+
+#endif /* GL_APPLE_transform_hint */
+
+/* ---------------------- GL_APPLE_vertex_array_object --------------------- */
+
+#ifndef GL_APPLE_vertex_array_object
+#define GL_APPLE_vertex_array_object 1
+
+#define GL_VERTEX_ARRAY_BINDING_APPLE 0x85B5
+
+typedef void (GLAPIENTRY * PFNGLBINDVERTEXARRAYAPPLEPROC) (GLuint array);
+typedef void (GLAPIENTRY * PFNGLDELETEVERTEXARRAYSAPPLEPROC) (GLsizei n, const GLuint* arrays);
+typedef void (GLAPIENTRY * PFNGLGENVERTEXARRAYSAPPLEPROC) (GLsizei n, const GLuint* arrays);
+typedef GLboolean (GLAPIENTRY * PFNGLISVERTEXARRAYAPPLEPROC) (GLuint array);
+
+#define glBindVertexArrayAPPLE GLEW_GET_FUN(__glewBindVertexArrayAPPLE)
+#define glDeleteVertexArraysAPPLE GLEW_GET_FUN(__glewDeleteVertexArraysAPPLE)
+#define glGenVertexArraysAPPLE GLEW_GET_FUN(__glewGenVertexArraysAPPLE)
+#define glIsVertexArrayAPPLE GLEW_GET_FUN(__glewIsVertexArrayAPPLE)
+
+#define GLEW_APPLE_vertex_array_object GLEW_GET_VAR(__GLEW_APPLE_vertex_array_object)
+
+#endif /* GL_APPLE_vertex_array_object */
+
+/* ---------------------- GL_APPLE_vertex_array_range ---------------------- */
+
+#ifndef GL_APPLE_vertex_array_range
+#define GL_APPLE_vertex_array_range 1
+
+#define GL_VERTEX_ARRAY_RANGE_APPLE 0x851D
+#define GL_VERTEX_ARRAY_RANGE_LENGTH_APPLE 0x851E
+#define GL_VERTEX_ARRAY_STORAGE_HINT_APPLE 0x851F
+#define GL_MAX_VERTEX_ARRAY_RANGE_ELEMENT_APPLE 0x8520
+#define GL_VERTEX_ARRAY_RANGE_POINTER_APPLE 0x8521
+#define GL_STORAGE_CACHED_APPLE 0x85BE
+#define GL_STORAGE_SHARED_APPLE 0x85BF
+
+typedef void (GLAPIENTRY * PFNGLFLUSHVERTEXARRAYRANGEAPPLEPROC) (GLsizei length, void* pointer);
+typedef void (GLAPIENTRY * PFNGLVERTEXARRAYPARAMETERIAPPLEPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLVERTEXARRAYRANGEAPPLEPROC) (GLsizei length, void* pointer);
+
+#define glFlushVertexArrayRangeAPPLE GLEW_GET_FUN(__glewFlushVertexArrayRangeAPPLE)
+#define glVertexArrayParameteriAPPLE GLEW_GET_FUN(__glewVertexArrayParameteriAPPLE)
+#define glVertexArrayRangeAPPLE GLEW_GET_FUN(__glewVertexArrayRangeAPPLE)
+
+#define GLEW_APPLE_vertex_array_range GLEW_GET_VAR(__GLEW_APPLE_vertex_array_range)
+
+#endif /* GL_APPLE_vertex_array_range */
+
+/* --------------------------- GL_APPLE_ycbcr_422 -------------------------- */
+
+#ifndef GL_APPLE_ycbcr_422
+#define GL_APPLE_ycbcr_422 1
+
+#define GL_YCBCR_422_APPLE 0x85B9
+#define GL_UNSIGNED_SHORT_8_8_APPLE 0x85BA
+#define GL_UNSIGNED_SHORT_8_8_REV_APPLE 0x85BB
+
+#define GLEW_APPLE_ycbcr_422 GLEW_GET_VAR(__GLEW_APPLE_ycbcr_422)
+
+#endif /* GL_APPLE_ycbcr_422 */
+
+/* ----------------------- GL_ARB_color_buffer_float ----------------------- */
+
+#ifndef GL_ARB_color_buffer_float
+#define GL_ARB_color_buffer_float 1
+
+#define GL_RGBA_FLOAT_MODE_ARB 0x8820
+#define GL_CLAMP_VERTEX_COLOR_ARB 0x891A
+#define GL_CLAMP_FRAGMENT_COLOR_ARB 0x891B
+#define GL_CLAMP_READ_COLOR_ARB 0x891C
+#define GL_FIXED_ONLY_ARB 0x891D
+
+typedef void (GLAPIENTRY * PFNGLCLAMPCOLORARBPROC) (GLenum target, GLenum clamp);
+
+#define glClampColorARB GLEW_GET_FUN(__glewClampColorARB)
+
+#define GLEW_ARB_color_buffer_float GLEW_GET_VAR(__GLEW_ARB_color_buffer_float)
+
+#endif /* GL_ARB_color_buffer_float */
+
+/* ----------------------- GL_ARB_depth_buffer_float ----------------------- */
+
+#ifndef GL_ARB_depth_buffer_float
+#define GL_ARB_depth_buffer_float 1
+
+#define GL_DEPTH_COMPONENT32F 0x8CAC
+#define GL_DEPTH32F_STENCIL8 0x8CAD
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD
+
+#define GLEW_ARB_depth_buffer_float GLEW_GET_VAR(__GLEW_ARB_depth_buffer_float)
+
+#endif /* GL_ARB_depth_buffer_float */
+
+/* -------------------------- GL_ARB_depth_texture ------------------------- */
+
+#ifndef GL_ARB_depth_texture
+#define GL_ARB_depth_texture 1
+
+#define GL_DEPTH_COMPONENT16_ARB 0x81A5
+#define GL_DEPTH_COMPONENT24_ARB 0x81A6
+#define GL_DEPTH_COMPONENT32_ARB 0x81A7
+#define GL_TEXTURE_DEPTH_SIZE_ARB 0x884A
+#define GL_DEPTH_TEXTURE_MODE_ARB 0x884B
+
+#define GLEW_ARB_depth_texture GLEW_GET_VAR(__GLEW_ARB_depth_texture)
+
+#endif /* GL_ARB_depth_texture */
+
+/* -------------------------- GL_ARB_draw_buffers -------------------------- */
+
+#ifndef GL_ARB_draw_buffers
+#define GL_ARB_draw_buffers 1
+
+#define GL_MAX_DRAW_BUFFERS_ARB 0x8824
+#define GL_DRAW_BUFFER0_ARB 0x8825
+#define GL_DRAW_BUFFER1_ARB 0x8826
+#define GL_DRAW_BUFFER2_ARB 0x8827
+#define GL_DRAW_BUFFER3_ARB 0x8828
+#define GL_DRAW_BUFFER4_ARB 0x8829
+#define GL_DRAW_BUFFER5_ARB 0x882A
+#define GL_DRAW_BUFFER6_ARB 0x882B
+#define GL_DRAW_BUFFER7_ARB 0x882C
+#define GL_DRAW_BUFFER8_ARB 0x882D
+#define GL_DRAW_BUFFER9_ARB 0x882E
+#define GL_DRAW_BUFFER10_ARB 0x882F
+#define GL_DRAW_BUFFER11_ARB 0x8830
+#define GL_DRAW_BUFFER12_ARB 0x8831
+#define GL_DRAW_BUFFER13_ARB 0x8832
+#define GL_DRAW_BUFFER14_ARB 0x8833
+#define GL_DRAW_BUFFER15_ARB 0x8834
+
+typedef void (GLAPIENTRY * PFNGLDRAWBUFFERSARBPROC) (GLsizei n, const GLenum* bufs);
+
+#define glDrawBuffersARB GLEW_GET_FUN(__glewDrawBuffersARB)
+
+#define GLEW_ARB_draw_buffers GLEW_GET_VAR(__GLEW_ARB_draw_buffers)
+
+#endif /* GL_ARB_draw_buffers */
+
+/* ------------------------- GL_ARB_draw_instanced ------------------------- */
+
+#ifndef GL_ARB_draw_instanced
+#define GL_ARB_draw_instanced 1
+
+typedef void (GLAPIENTRY * PFNGLDRAWARRAYSINSTANCEDARBPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount);
+typedef void (GLAPIENTRY * PFNGLDRAWELEMENTSINSTANCEDARBPROC) (GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei primcount);
+
+#define glDrawArraysInstancedARB GLEW_GET_FUN(__glewDrawArraysInstancedARB)
+#define glDrawElementsInstancedARB GLEW_GET_FUN(__glewDrawElementsInstancedARB)
+
+#define GLEW_ARB_draw_instanced GLEW_GET_VAR(__GLEW_ARB_draw_instanced)
+
+#endif /* GL_ARB_draw_instanced */
+
+/* ------------------------ GL_ARB_fragment_program ------------------------ */
+
+#ifndef GL_ARB_fragment_program
+#define GL_ARB_fragment_program 1
+
+#define GL_FRAGMENT_PROGRAM_ARB 0x8804
+#define GL_PROGRAM_ALU_INSTRUCTIONS_ARB 0x8805
+#define GL_PROGRAM_TEX_INSTRUCTIONS_ARB 0x8806
+#define GL_PROGRAM_TEX_INDIRECTIONS_ARB 0x8807
+#define GL_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB 0x8808
+#define GL_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB 0x8809
+#define GL_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB 0x880A
+#define GL_MAX_PROGRAM_ALU_INSTRUCTIONS_ARB 0x880B
+#define GL_MAX_PROGRAM_TEX_INSTRUCTIONS_ARB 0x880C
+#define GL_MAX_PROGRAM_TEX_INDIRECTIONS_ARB 0x880D
+#define GL_MAX_PROGRAM_NATIVE_ALU_INSTRUCTIONS_ARB 0x880E
+#define GL_MAX_PROGRAM_NATIVE_TEX_INSTRUCTIONS_ARB 0x880F
+#define GL_MAX_PROGRAM_NATIVE_TEX_INDIRECTIONS_ARB 0x8810
+#define GL_MAX_TEXTURE_COORDS_ARB 0x8871
+#define GL_MAX_TEXTURE_IMAGE_UNITS_ARB 0x8872
+
+#define GLEW_ARB_fragment_program GLEW_GET_VAR(__GLEW_ARB_fragment_program)
+
+#endif /* GL_ARB_fragment_program */
+
+/* --------------------- GL_ARB_fragment_program_shadow -------------------- */
+
+#ifndef GL_ARB_fragment_program_shadow
+#define GL_ARB_fragment_program_shadow 1
+
+#define GLEW_ARB_fragment_program_shadow GLEW_GET_VAR(__GLEW_ARB_fragment_program_shadow)
+
+#endif /* GL_ARB_fragment_program_shadow */
+
+/* ------------------------- GL_ARB_fragment_shader ------------------------ */
+
+#ifndef GL_ARB_fragment_shader
+#define GL_ARB_fragment_shader 1
+
+#define GL_FRAGMENT_SHADER_ARB 0x8B30
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS_ARB 0x8B49
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT_ARB 0x8B8B
+
+#define GLEW_ARB_fragment_shader GLEW_GET_VAR(__GLEW_ARB_fragment_shader)
+
+#endif /* GL_ARB_fragment_shader */
+
+/* ----------------------- GL_ARB_framebuffer_object ----------------------- */
+
+#ifndef GL_ARB_framebuffer_object
+#define GL_ARB_framebuffer_object 1
+
+#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210
+#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211
+#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212
+#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213
+#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214
+#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215
+#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216
+#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217
+#define GL_FRAMEBUFFER_DEFAULT 0x8218
+#define GL_FRAMEBUFFER_UNDEFINED 0x8219
+#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A
+#define GL_INDEX 0x8222
+#define GL_MAX_RENDERBUFFER_SIZE 0x84E8
+#define GL_DEPTH_STENCIL 0x84F9
+#define GL_UNSIGNED_INT_24_8 0x84FA
+#define GL_DEPTH24_STENCIL8 0x88F0
+#define GL_TEXTURE_STENCIL_SIZE 0x88F1
+#define GL_UNSIGNED_NORMALIZED 0x8C17
+#define GL_SRGB 0x8C40
+#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6
+#define GL_FRAMEBUFFER_BINDING 0x8CA6
+#define GL_RENDERBUFFER_BINDING 0x8CA7
+#define GL_READ_FRAMEBUFFER 0x8CA8
+#define GL_DRAW_FRAMEBUFFER 0x8CA9
+#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA
+#define GL_RENDERBUFFER_SAMPLES 0x8CAB
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4
+#define GL_FRAMEBUFFER_COMPLETE 0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB
+#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC
+#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD
+#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF
+#define GL_COLOR_ATTACHMENT0 0x8CE0
+#define GL_COLOR_ATTACHMENT1 0x8CE1
+#define GL_COLOR_ATTACHMENT2 0x8CE2
+#define GL_COLOR_ATTACHMENT3 0x8CE3
+#define GL_COLOR_ATTACHMENT4 0x8CE4
+#define GL_COLOR_ATTACHMENT5 0x8CE5
+#define GL_COLOR_ATTACHMENT6 0x8CE6
+#define GL_COLOR_ATTACHMENT7 0x8CE7
+#define GL_COLOR_ATTACHMENT8 0x8CE8
+#define GL_COLOR_ATTACHMENT9 0x8CE9
+#define GL_COLOR_ATTACHMENT10 0x8CEA
+#define GL_COLOR_ATTACHMENT11 0x8CEB
+#define GL_COLOR_ATTACHMENT12 0x8CEC
+#define GL_COLOR_ATTACHMENT13 0x8CED
+#define GL_COLOR_ATTACHMENT14 0x8CEE
+#define GL_COLOR_ATTACHMENT15 0x8CEF
+#define GL_DEPTH_ATTACHMENT 0x8D00
+#define GL_STENCIL_ATTACHMENT 0x8D20
+#define GL_FRAMEBUFFER 0x8D40
+#define GL_RENDERBUFFER 0x8D41
+#define GL_RENDERBUFFER_WIDTH 0x8D42
+#define GL_RENDERBUFFER_HEIGHT 0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44
+#define GL_STENCIL_INDEX1 0x8D46
+#define GL_STENCIL_INDEX4 0x8D47
+#define GL_STENCIL_INDEX8 0x8D48
+#define GL_STENCIL_INDEX16 0x8D49
+#define GL_RENDERBUFFER_RED_SIZE 0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56
+#define GL_MAX_SAMPLES 0x8D57
+
+typedef void (GLAPIENTRY * PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer);
+typedef void (GLAPIENTRY * PFNGLBINDRENDERBUFFERPROC) (GLenum target, GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLBLITFRAMEBUFFERPROC) (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+typedef GLenum (GLAPIENTRY * PFNGLCHECKFRAMEBUFFERSTATUSPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLDELETEFRAMEBUFFERSPROC) (GLsizei n, const GLuint* framebuffers);
+typedef void (GLAPIENTRY * PFNGLDELETERENDERBUFFERSPROC) (GLsizei n, const GLuint* renderbuffers);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERRENDERBUFFERPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURLAYERPROC) (GLenum target,GLenum attachment, GLuint texture,GLint level,GLint layer);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURE1DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURE2DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURE3DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint layer);
+typedef void (GLAPIENTRY * PFNGLGENFRAMEBUFFERSPROC) (GLsizei n, GLuint* framebuffers);
+typedef void (GLAPIENTRY * PFNGLGENRENDERBUFFERSPROC) (GLsizei n, GLuint* renderbuffers);
+typedef void (GLAPIENTRY * PFNGLGENERATEMIPMAPPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) (GLenum target, GLenum attachment, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETRENDERBUFFERPARAMETERIVPROC) (GLenum target, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISFRAMEBUFFERPROC) (GLuint framebuffer);
+typedef GLboolean (GLAPIENTRY * PFNGLISRENDERBUFFERPROC) (GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLRENDERBUFFERSTORAGEPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+
+#define glBindFramebuffer GLEW_GET_FUN(__glewBindFramebuffer)
+#define glBindRenderbuffer GLEW_GET_FUN(__glewBindRenderbuffer)
+#define glBlitFramebuffer GLEW_GET_FUN(__glewBlitFramebuffer)
+#define glCheckFramebufferStatus GLEW_GET_FUN(__glewCheckFramebufferStatus)
+#define glDeleteFramebuffers GLEW_GET_FUN(__glewDeleteFramebuffers)
+#define glDeleteRenderbuffers GLEW_GET_FUN(__glewDeleteRenderbuffers)
+#define glFramebufferRenderbuffer GLEW_GET_FUN(__glewFramebufferRenderbuffer)
+#define glFramebufferTexturLayer GLEW_GET_FUN(__glewFramebufferTexturLayer)
+#define glFramebufferTexture1D GLEW_GET_FUN(__glewFramebufferTexture1D)
+#define glFramebufferTexture2D GLEW_GET_FUN(__glewFramebufferTexture2D)
+#define glFramebufferTexture3D GLEW_GET_FUN(__glewFramebufferTexture3D)
+#define glGenFramebuffers GLEW_GET_FUN(__glewGenFramebuffers)
+#define glGenRenderbuffers GLEW_GET_FUN(__glewGenRenderbuffers)
+#define glGenerateMipmap GLEW_GET_FUN(__glewGenerateMipmap)
+#define glGetFramebufferAttachmentParameteriv GLEW_GET_FUN(__glewGetFramebufferAttachmentParameteriv)
+#define glGetRenderbufferParameteriv GLEW_GET_FUN(__glewGetRenderbufferParameteriv)
+#define glIsFramebuffer GLEW_GET_FUN(__glewIsFramebuffer)
+#define glIsRenderbuffer GLEW_GET_FUN(__glewIsRenderbuffer)
+#define glRenderbufferStorage GLEW_GET_FUN(__glewRenderbufferStorage)
+#define glRenderbufferStorageMultisample GLEW_GET_FUN(__glewRenderbufferStorageMultisample)
+
+#define GLEW_ARB_framebuffer_object GLEW_GET_VAR(__GLEW_ARB_framebuffer_object)
+
+#endif /* GL_ARB_framebuffer_object */
+
+/* ------------------------ GL_ARB_framebuffer_sRGB ------------------------ */
+
+#ifndef GL_ARB_framebuffer_sRGB
+#define GL_ARB_framebuffer_sRGB 1
+
+#define GL_FRAMEBUFFER_SRGB 0x8DB9
+
+#define GLEW_ARB_framebuffer_sRGB GLEW_GET_VAR(__GLEW_ARB_framebuffer_sRGB)
+
+#endif /* GL_ARB_framebuffer_sRGB */
+
+/* ------------------------ GL_ARB_geometry_shader4 ------------------------ */
+
+#ifndef GL_ARB_geometry_shader4
+#define GL_ARB_geometry_shader4 1
+
+#define GL_LINES_ADJACENCY_ARB 0xA
+#define GL_LINE_STRIP_ADJACENCY_ARB 0xB
+#define GL_TRIANGLES_ADJACENCY_ARB 0xC
+#define GL_TRIANGLE_STRIP_ADJACENCY_ARB 0xD
+#define GL_PROGRAM_POINT_SIZE_ARB 0x8642
+#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_ARB 0x8C29
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4
+#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED_ARB 0x8DA7
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS_ARB 0x8DA8
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB 0x8DA9
+#define GL_GEOMETRY_SHADER_ARB 0x8DD9
+#define GL_GEOMETRY_VERTICES_OUT_ARB 0x8DDA
+#define GL_GEOMETRY_INPUT_TYPE_ARB 0x8DDB
+#define GL_GEOMETRY_OUTPUT_TYPE_ARB 0x8DDC
+#define GL_MAX_GEOMETRY_VARYING_COMPONENTS_ARB 0x8DDD
+#define GL_MAX_VERTEX_VARYING_COMPONENTS_ARB 0x8DDE
+#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS_ARB 0x8DDF
+#define GL_MAX_GEOMETRY_OUTPUT_VERTICES_ARB 0x8DE0
+#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS_ARB 0x8DE1
+
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTUREARBPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTUREFACEARBPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLenum face);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURELAYERARBPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETERIARBPROC) (GLuint program, GLenum pname, GLint value);
+
+#define glFramebufferTextureARB GLEW_GET_FUN(__glewFramebufferTextureARB)
+#define glFramebufferTextureFaceARB GLEW_GET_FUN(__glewFramebufferTextureFaceARB)
+#define glFramebufferTextureLayerARB GLEW_GET_FUN(__glewFramebufferTextureLayerARB)
+#define glProgramParameteriARB GLEW_GET_FUN(__glewProgramParameteriARB)
+
+#define GLEW_ARB_geometry_shader4 GLEW_GET_VAR(__GLEW_ARB_geometry_shader4)
+
+#endif /* GL_ARB_geometry_shader4 */
+
+/* ------------------------ GL_ARB_half_float_pixel ------------------------ */
+
+#ifndef GL_ARB_half_float_pixel
+#define GL_ARB_half_float_pixel 1
+
+#define GL_HALF_FLOAT_ARB 0x140B
+
+#define GLEW_ARB_half_float_pixel GLEW_GET_VAR(__GLEW_ARB_half_float_pixel)
+
+#endif /* GL_ARB_half_float_pixel */
+
+/* ------------------------ GL_ARB_half_float_vertex ----------------------- */
+
+#ifndef GL_ARB_half_float_vertex
+#define GL_ARB_half_float_vertex 1
+
+#define GL_HALF_FLOAT 0x140B
+
+#define GLEW_ARB_half_float_vertex GLEW_GET_VAR(__GLEW_ARB_half_float_vertex)
+
+#endif /* GL_ARB_half_float_vertex */
+
+/* ----------------------------- GL_ARB_imaging ---------------------------- */
+
+#ifndef GL_ARB_imaging
+#define GL_ARB_imaging 1
+
+#define GL_CONSTANT_COLOR 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002
+#define GL_CONSTANT_ALPHA 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#define GL_BLEND_COLOR 0x8005
+#define GL_FUNC_ADD 0x8006
+#define GL_MIN 0x8007
+#define GL_MAX 0x8008
+#define GL_BLEND_EQUATION 0x8009
+#define GL_FUNC_SUBTRACT 0x800A
+#define GL_FUNC_REVERSE_SUBTRACT 0x800B
+#define GL_CONVOLUTION_1D 0x8010
+#define GL_CONVOLUTION_2D 0x8011
+#define GL_SEPARABLE_2D 0x8012
+#define GL_CONVOLUTION_BORDER_MODE 0x8013
+#define GL_CONVOLUTION_FILTER_SCALE 0x8014
+#define GL_CONVOLUTION_FILTER_BIAS 0x8015
+#define GL_REDUCE 0x8016
+#define GL_CONVOLUTION_FORMAT 0x8017
+#define GL_CONVOLUTION_WIDTH 0x8018
+#define GL_CONVOLUTION_HEIGHT 0x8019
+#define GL_MAX_CONVOLUTION_WIDTH 0x801A
+#define GL_MAX_CONVOLUTION_HEIGHT 0x801B
+#define GL_POST_CONVOLUTION_RED_SCALE 0x801C
+#define GL_POST_CONVOLUTION_GREEN_SCALE 0x801D
+#define GL_POST_CONVOLUTION_BLUE_SCALE 0x801E
+#define GL_POST_CONVOLUTION_ALPHA_SCALE 0x801F
+#define GL_POST_CONVOLUTION_RED_BIAS 0x8020
+#define GL_POST_CONVOLUTION_GREEN_BIAS 0x8021
+#define GL_POST_CONVOLUTION_BLUE_BIAS 0x8022
+#define GL_POST_CONVOLUTION_ALPHA_BIAS 0x8023
+#define GL_HISTOGRAM 0x8024
+#define GL_PROXY_HISTOGRAM 0x8025
+#define GL_HISTOGRAM_WIDTH 0x8026
+#define GL_HISTOGRAM_FORMAT 0x8027
+#define GL_HISTOGRAM_RED_SIZE 0x8028
+#define GL_HISTOGRAM_GREEN_SIZE 0x8029
+#define GL_HISTOGRAM_BLUE_SIZE 0x802A
+#define GL_HISTOGRAM_ALPHA_SIZE 0x802B
+#define GL_HISTOGRAM_LUMINANCE_SIZE 0x802C
+#define GL_HISTOGRAM_SINK 0x802D
+#define GL_MINMAX 0x802E
+#define GL_MINMAX_FORMAT 0x802F
+#define GL_MINMAX_SINK 0x8030
+#define GL_TABLE_TOO_LARGE 0x8031
+#define GL_COLOR_MATRIX 0x80B1
+#define GL_COLOR_MATRIX_STACK_DEPTH 0x80B2
+#define GL_MAX_COLOR_MATRIX_STACK_DEPTH 0x80B3
+#define GL_POST_COLOR_MATRIX_RED_SCALE 0x80B4
+#define GL_POST_COLOR_MATRIX_GREEN_SCALE 0x80B5
+#define GL_POST_COLOR_MATRIX_BLUE_SCALE 0x80B6
+#define GL_POST_COLOR_MATRIX_ALPHA_SCALE 0x80B7
+#define GL_POST_COLOR_MATRIX_RED_BIAS 0x80B8
+#define GL_POST_COLOR_MATRIX_GREEN_BIAS 0x80B9
+#define GL_POST_COLOR_MATRIX_BLUE_BIAS 0x80BA
+#define GL_POST_COLOR_MATRIX_ALPHA_BIAS 0x80BB
+#define GL_COLOR_TABLE 0x80D0
+#define GL_POST_CONVOLUTION_COLOR_TABLE 0x80D1
+#define GL_POST_COLOR_MATRIX_COLOR_TABLE 0x80D2
+#define GL_PROXY_COLOR_TABLE 0x80D3
+#define GL_PROXY_POST_CONVOLUTION_COLOR_TABLE 0x80D4
+#define GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE 0x80D5
+#define GL_COLOR_TABLE_SCALE 0x80D6
+#define GL_COLOR_TABLE_BIAS 0x80D7
+#define GL_COLOR_TABLE_FORMAT 0x80D8
+#define GL_COLOR_TABLE_WIDTH 0x80D9
+#define GL_COLOR_TABLE_RED_SIZE 0x80DA
+#define GL_COLOR_TABLE_GREEN_SIZE 0x80DB
+#define GL_COLOR_TABLE_BLUE_SIZE 0x80DC
+#define GL_COLOR_TABLE_ALPHA_SIZE 0x80DD
+#define GL_COLOR_TABLE_LUMINANCE_SIZE 0x80DE
+#define GL_COLOR_TABLE_INTENSITY_SIZE 0x80DF
+#define GL_IGNORE_BORDER 0x8150
+#define GL_CONSTANT_BORDER 0x8151
+#define GL_WRAP_BORDER 0x8152
+#define GL_REPLICATE_BORDER 0x8153
+#define GL_CONVOLUTION_BORDER_COLOR 0x8154
+
+typedef void (GLAPIENTRY * PFNGLCOLORSUBTABLEPROC) (GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const GLvoid *data);
+typedef void (GLAPIENTRY * PFNGLCOLORTABLEPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *table);
+typedef void (GLAPIENTRY * PFNGLCOLORTABLEPARAMETERFVPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLCOLORTABLEPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONFILTER1DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const GLvoid *image);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONFILTER2DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *image);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERFPROC) (GLenum target, GLenum pname, GLfloat params);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERFVPROC) (GLenum target, GLenum pname, const GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERIPROC) (GLenum target, GLenum pname, GLint params);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERIVPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLCOPYCOLORSUBTABLEPROC) (GLenum target, GLsizei start, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLCOPYCOLORTABLEPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLCOPYCONVOLUTIONFILTER1DPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLCOPYCONVOLUTIONFILTER2DPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEPROC) (GLenum target, GLenum format, GLenum type, GLvoid *table);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (GLAPIENTRY * PFNGLGETCONVOLUTIONFILTERPROC) (GLenum target, GLenum format, GLenum type, GLvoid *image);
+typedef void (GLAPIENTRY * PFNGLGETCONVOLUTIONPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLGETCONVOLUTIONPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (GLAPIENTRY * PFNGLGETHISTOGRAMPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, GLvoid *values);
+typedef void (GLAPIENTRY * PFNGLGETHISTOGRAMPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLGETHISTOGRAMPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (GLAPIENTRY * PFNGLGETMINMAXPROC) (GLenum target, GLboolean reset, GLenum format, GLenum types, GLvoid *values);
+typedef void (GLAPIENTRY * PFNGLGETMINMAXPARAMETERFVPROC) (GLenum target, GLenum pname, GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLGETMINMAXPARAMETERIVPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (GLAPIENTRY * PFNGLGETSEPARABLEFILTERPROC) (GLenum target, GLenum format, GLenum type, GLvoid *row, GLvoid *column, GLvoid *span);
+typedef void (GLAPIENTRY * PFNGLHISTOGRAMPROC) (GLenum target, GLsizei width, GLenum internalformat, GLboolean sink);
+typedef void (GLAPIENTRY * PFNGLMINMAXPROC) (GLenum target, GLenum internalformat, GLboolean sink);
+typedef void (GLAPIENTRY * PFNGLRESETHISTOGRAMPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLRESETMINMAXPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLSEPARABLEFILTER2DPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *row, const GLvoid *column);
+
+#define glColorSubTable GLEW_GET_FUN(__glewColorSubTable)
+#define glColorTable GLEW_GET_FUN(__glewColorTable)
+#define glColorTableParameterfv GLEW_GET_FUN(__glewColorTableParameterfv)
+#define glColorTableParameteriv GLEW_GET_FUN(__glewColorTableParameteriv)
+#define glConvolutionFilter1D GLEW_GET_FUN(__glewConvolutionFilter1D)
+#define glConvolutionFilter2D GLEW_GET_FUN(__glewConvolutionFilter2D)
+#define glConvolutionParameterf GLEW_GET_FUN(__glewConvolutionParameterf)
+#define glConvolutionParameterfv GLEW_GET_FUN(__glewConvolutionParameterfv)
+#define glConvolutionParameteri GLEW_GET_FUN(__glewConvolutionParameteri)
+#define glConvolutionParameteriv GLEW_GET_FUN(__glewConvolutionParameteriv)
+#define glCopyColorSubTable GLEW_GET_FUN(__glewCopyColorSubTable)
+#define glCopyColorTable GLEW_GET_FUN(__glewCopyColorTable)
+#define glCopyConvolutionFilter1D GLEW_GET_FUN(__glewCopyConvolutionFilter1D)
+#define glCopyConvolutionFilter2D GLEW_GET_FUN(__glewCopyConvolutionFilter2D)
+#define glGetColorTable GLEW_GET_FUN(__glewGetColorTable)
+#define glGetColorTableParameterfv GLEW_GET_FUN(__glewGetColorTableParameterfv)
+#define glGetColorTableParameteriv GLEW_GET_FUN(__glewGetColorTableParameteriv)
+#define glGetConvolutionFilter GLEW_GET_FUN(__glewGetConvolutionFilter)
+#define glGetConvolutionParameterfv GLEW_GET_FUN(__glewGetConvolutionParameterfv)
+#define glGetConvolutionParameteriv GLEW_GET_FUN(__glewGetConvolutionParameteriv)
+#define glGetHistogram GLEW_GET_FUN(__glewGetHistogram)
+#define glGetHistogramParameterfv GLEW_GET_FUN(__glewGetHistogramParameterfv)
+#define glGetHistogramParameteriv GLEW_GET_FUN(__glewGetHistogramParameteriv)
+#define glGetMinmax GLEW_GET_FUN(__glewGetMinmax)
+#define glGetMinmaxParameterfv GLEW_GET_FUN(__glewGetMinmaxParameterfv)
+#define glGetMinmaxParameteriv GLEW_GET_FUN(__glewGetMinmaxParameteriv)
+#define glGetSeparableFilter GLEW_GET_FUN(__glewGetSeparableFilter)
+#define glHistogram GLEW_GET_FUN(__glewHistogram)
+#define glMinmax GLEW_GET_FUN(__glewMinmax)
+#define glResetHistogram GLEW_GET_FUN(__glewResetHistogram)
+#define glResetMinmax GLEW_GET_FUN(__glewResetMinmax)
+#define glSeparableFilter2D GLEW_GET_FUN(__glewSeparableFilter2D)
+
+#define GLEW_ARB_imaging GLEW_GET_VAR(__GLEW_ARB_imaging)
+
+#endif /* GL_ARB_imaging */
+
+/* ------------------------ GL_ARB_instanced_arrays ------------------------ */
+
+#ifndef GL_ARB_instanced_arrays
+#define GL_ARB_instanced_arrays 1
+
+#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR_ARB 0x88FE
+
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBDIVISORARBPROC) (GLuint index, GLuint divisor);
+
+#define glVertexAttribDivisorARB GLEW_GET_FUN(__glewVertexAttribDivisorARB)
+
+#define GLEW_ARB_instanced_arrays GLEW_GET_VAR(__GLEW_ARB_instanced_arrays)
+
+#endif /* GL_ARB_instanced_arrays */
+
+/* ------------------------ GL_ARB_map_buffer_range ------------------------ */
+
+#ifndef GL_ARB_map_buffer_range
+#define GL_ARB_map_buffer_range 1
+
+#define GL_MAP_READ_BIT 0x0001
+#define GL_MAP_WRITE_BIT 0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020
+
+typedef void (GLAPIENTRY * PFNGLFLUSHMAPPEDBUFFERRANGEPROC) (GLenum target, GLintptr offset, GLsizeiptr length);
+typedef GLvoid * (GLAPIENTRY * PFNGLMAPBUFFERRANGEPROC) (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+
+#define glFlushMappedBufferRange GLEW_GET_FUN(__glewFlushMappedBufferRange)
+#define glMapBufferRange GLEW_GET_FUN(__glewMapBufferRange)
+
+#define GLEW_ARB_map_buffer_range GLEW_GET_VAR(__GLEW_ARB_map_buffer_range)
+
+#endif /* GL_ARB_map_buffer_range */
+
+/* ------------------------- GL_ARB_matrix_palette ------------------------- */
+
+#ifndef GL_ARB_matrix_palette
+#define GL_ARB_matrix_palette 1
+
+#define GL_MATRIX_PALETTE_ARB 0x8840
+#define GL_MAX_MATRIX_PALETTE_STACK_DEPTH_ARB 0x8841
+#define GL_MAX_PALETTE_MATRICES_ARB 0x8842
+#define GL_CURRENT_PALETTE_MATRIX_ARB 0x8843
+#define GL_MATRIX_INDEX_ARRAY_ARB 0x8844
+#define GL_CURRENT_MATRIX_INDEX_ARB 0x8845
+#define GL_MATRIX_INDEX_ARRAY_SIZE_ARB 0x8846
+#define GL_MATRIX_INDEX_ARRAY_TYPE_ARB 0x8847
+#define GL_MATRIX_INDEX_ARRAY_STRIDE_ARB 0x8848
+#define GL_MATRIX_INDEX_ARRAY_POINTER_ARB 0x8849
+
+typedef void (GLAPIENTRY * PFNGLCURRENTPALETTEMATRIXARBPROC) (GLint index);
+typedef void (GLAPIENTRY * PFNGLMATRIXINDEXPOINTERARBPROC) (GLint size, GLenum type, GLsizei stride, GLvoid *pointer);
+typedef void (GLAPIENTRY * PFNGLMATRIXINDEXUBVARBPROC) (GLint size, GLubyte *indices);
+typedef void (GLAPIENTRY * PFNGLMATRIXINDEXUIVARBPROC) (GLint size, GLuint *indices);
+typedef void (GLAPIENTRY * PFNGLMATRIXINDEXUSVARBPROC) (GLint size, GLushort *indices);
+
+#define glCurrentPaletteMatrixARB GLEW_GET_FUN(__glewCurrentPaletteMatrixARB)
+#define glMatrixIndexPointerARB GLEW_GET_FUN(__glewMatrixIndexPointerARB)
+#define glMatrixIndexubvARB GLEW_GET_FUN(__glewMatrixIndexubvARB)
+#define glMatrixIndexuivARB GLEW_GET_FUN(__glewMatrixIndexuivARB)
+#define glMatrixIndexusvARB GLEW_GET_FUN(__glewMatrixIndexusvARB)
+
+#define GLEW_ARB_matrix_palette GLEW_GET_VAR(__GLEW_ARB_matrix_palette)
+
+#endif /* GL_ARB_matrix_palette */
+
+/* --------------------------- GL_ARB_multisample -------------------------- */
+
+#ifndef GL_ARB_multisample
+#define GL_ARB_multisample 1
+
+#define GL_MULTISAMPLE_ARB 0x809D
+#define GL_SAMPLE_ALPHA_TO_COVERAGE_ARB 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE_ARB 0x809F
+#define GL_SAMPLE_COVERAGE_ARB 0x80A0
+#define GL_SAMPLE_BUFFERS_ARB 0x80A8
+#define GL_SAMPLES_ARB 0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE_ARB 0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT_ARB 0x80AB
+#define GL_MULTISAMPLE_BIT_ARB 0x20000000
+
+typedef void (GLAPIENTRY * PFNGLSAMPLECOVERAGEARBPROC) (GLclampf value, GLboolean invert);
+
+#define glSampleCoverageARB GLEW_GET_FUN(__glewSampleCoverageARB)
+
+#define GLEW_ARB_multisample GLEW_GET_VAR(__GLEW_ARB_multisample)
+
+#endif /* GL_ARB_multisample */
+
+/* -------------------------- GL_ARB_multitexture -------------------------- */
+
+#ifndef GL_ARB_multitexture
+#define GL_ARB_multitexture 1
+
+#define GL_TEXTURE0_ARB 0x84C0
+#define GL_TEXTURE1_ARB 0x84C1
+#define GL_TEXTURE2_ARB 0x84C2
+#define GL_TEXTURE3_ARB 0x84C3
+#define GL_TEXTURE4_ARB 0x84C4
+#define GL_TEXTURE5_ARB 0x84C5
+#define GL_TEXTURE6_ARB 0x84C6
+#define GL_TEXTURE7_ARB 0x84C7
+#define GL_TEXTURE8_ARB 0x84C8
+#define GL_TEXTURE9_ARB 0x84C9
+#define GL_TEXTURE10_ARB 0x84CA
+#define GL_TEXTURE11_ARB 0x84CB
+#define GL_TEXTURE12_ARB 0x84CC
+#define GL_TEXTURE13_ARB 0x84CD
+#define GL_TEXTURE14_ARB 0x84CE
+#define GL_TEXTURE15_ARB 0x84CF
+#define GL_TEXTURE16_ARB 0x84D0
+#define GL_TEXTURE17_ARB 0x84D1
+#define GL_TEXTURE18_ARB 0x84D2
+#define GL_TEXTURE19_ARB 0x84D3
+#define GL_TEXTURE20_ARB 0x84D4
+#define GL_TEXTURE21_ARB 0x84D5
+#define GL_TEXTURE22_ARB 0x84D6
+#define GL_TEXTURE23_ARB 0x84D7
+#define GL_TEXTURE24_ARB 0x84D8
+#define GL_TEXTURE25_ARB 0x84D9
+#define GL_TEXTURE26_ARB 0x84DA
+#define GL_TEXTURE27_ARB 0x84DB
+#define GL_TEXTURE28_ARB 0x84DC
+#define GL_TEXTURE29_ARB 0x84DD
+#define GL_TEXTURE30_ARB 0x84DE
+#define GL_TEXTURE31_ARB 0x84DF
+#define GL_ACTIVE_TEXTURE_ARB 0x84E0
+#define GL_CLIENT_ACTIVE_TEXTURE_ARB 0x84E1
+#define GL_MAX_TEXTURE_UNITS_ARB 0x84E2
+
+typedef void (GLAPIENTRY * PFNGLACTIVETEXTUREARBPROC) (GLenum texture);
+typedef void (GLAPIENTRY * PFNGLCLIENTACTIVETEXTUREARBPROC) (GLenum texture);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1DARBPROC) (GLenum target, GLdouble s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1FARBPROC) (GLenum target, GLfloat s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1IARBPROC) (GLenum target, GLint s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1IVARBPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1SARBPROC) (GLenum target, GLshort s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1SVARBPROC) (GLenum target, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2DARBPROC) (GLenum target, GLdouble s, GLdouble t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2FARBPROC) (GLenum target, GLfloat s, GLfloat t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2IARBPROC) (GLenum target, GLint s, GLint t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2IVARBPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2SARBPROC) (GLenum target, GLshort s, GLshort t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2SVARBPROC) (GLenum target, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3DARBPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3FARBPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3IARBPROC) (GLenum target, GLint s, GLint t, GLint r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3IVARBPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3SARBPROC) (GLenum target, GLshort s, GLshort t, GLshort r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3SVARBPROC) (GLenum target, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4DARBPROC) (GLenum target, GLdouble s, GLdouble t, GLdouble r, GLdouble q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4DVARBPROC) (GLenum target, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4FARBPROC) (GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4FVARBPROC) (GLenum target, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4IARBPROC) (GLenum target, GLint s, GLint t, GLint r, GLint q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4IVARBPROC) (GLenum target, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4SARBPROC) (GLenum target, GLshort s, GLshort t, GLshort r, GLshort q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4SVARBPROC) (GLenum target, const GLshort *v);
+
+#define glActiveTextureARB GLEW_GET_FUN(__glewActiveTextureARB)
+#define glClientActiveTextureARB GLEW_GET_FUN(__glewClientActiveTextureARB)
+#define glMultiTexCoord1dARB GLEW_GET_FUN(__glewMultiTexCoord1dARB)
+#define glMultiTexCoord1dvARB GLEW_GET_FUN(__glewMultiTexCoord1dvARB)
+#define glMultiTexCoord1fARB GLEW_GET_FUN(__glewMultiTexCoord1fARB)
+#define glMultiTexCoord1fvARB GLEW_GET_FUN(__glewMultiTexCoord1fvARB)
+#define glMultiTexCoord1iARB GLEW_GET_FUN(__glewMultiTexCoord1iARB)
+#define glMultiTexCoord1ivARB GLEW_GET_FUN(__glewMultiTexCoord1ivARB)
+#define glMultiTexCoord1sARB GLEW_GET_FUN(__glewMultiTexCoord1sARB)
+#define glMultiTexCoord1svARB GLEW_GET_FUN(__glewMultiTexCoord1svARB)
+#define glMultiTexCoord2dARB GLEW_GET_FUN(__glewMultiTexCoord2dARB)
+#define glMultiTexCoord2dvARB GLEW_GET_FUN(__glewMultiTexCoord2dvARB)
+#define glMultiTexCoord2fARB GLEW_GET_FUN(__glewMultiTexCoord2fARB)
+#define glMultiTexCoord2fvARB GLEW_GET_FUN(__glewMultiTexCoord2fvARB)
+#define glMultiTexCoord2iARB GLEW_GET_FUN(__glewMultiTexCoord2iARB)
+#define glMultiTexCoord2ivARB GLEW_GET_FUN(__glewMultiTexCoord2ivARB)
+#define glMultiTexCoord2sARB GLEW_GET_FUN(__glewMultiTexCoord2sARB)
+#define glMultiTexCoord2svARB GLEW_GET_FUN(__glewMultiTexCoord2svARB)
+#define glMultiTexCoord3dARB GLEW_GET_FUN(__glewMultiTexCoord3dARB)
+#define glMultiTexCoord3dvARB GLEW_GET_FUN(__glewMultiTexCoord3dvARB)
+#define glMultiTexCoord3fARB GLEW_GET_FUN(__glewMultiTexCoord3fARB)
+#define glMultiTexCoord3fvARB GLEW_GET_FUN(__glewMultiTexCoord3fvARB)
+#define glMultiTexCoord3iARB GLEW_GET_FUN(__glewMultiTexCoord3iARB)
+#define glMultiTexCoord3ivARB GLEW_GET_FUN(__glewMultiTexCoord3ivARB)
+#define glMultiTexCoord3sARB GLEW_GET_FUN(__glewMultiTexCoord3sARB)
+#define glMultiTexCoord3svARB GLEW_GET_FUN(__glewMultiTexCoord3svARB)
+#define glMultiTexCoord4dARB GLEW_GET_FUN(__glewMultiTexCoord4dARB)
+#define glMultiTexCoord4dvARB GLEW_GET_FUN(__glewMultiTexCoord4dvARB)
+#define glMultiTexCoord4fARB GLEW_GET_FUN(__glewMultiTexCoord4fARB)
+#define glMultiTexCoord4fvARB GLEW_GET_FUN(__glewMultiTexCoord4fvARB)
+#define glMultiTexCoord4iARB GLEW_GET_FUN(__glewMultiTexCoord4iARB)
+#define glMultiTexCoord4ivARB GLEW_GET_FUN(__glewMultiTexCoord4ivARB)
+#define glMultiTexCoord4sARB GLEW_GET_FUN(__glewMultiTexCoord4sARB)
+#define glMultiTexCoord4svARB GLEW_GET_FUN(__glewMultiTexCoord4svARB)
+
+#define GLEW_ARB_multitexture GLEW_GET_VAR(__GLEW_ARB_multitexture)
+
+#endif /* GL_ARB_multitexture */
+
+/* ------------------------- GL_ARB_occlusion_query ------------------------ */
+
+#ifndef GL_ARB_occlusion_query
+#define GL_ARB_occlusion_query 1
+
+#define GL_QUERY_COUNTER_BITS_ARB 0x8864
+#define GL_CURRENT_QUERY_ARB 0x8865
+#define GL_QUERY_RESULT_ARB 0x8866
+#define GL_QUERY_RESULT_AVAILABLE_ARB 0x8867
+#define GL_SAMPLES_PASSED_ARB 0x8914
+
+typedef void (GLAPIENTRY * PFNGLBEGINQUERYARBPROC) (GLenum target, GLuint id);
+typedef void (GLAPIENTRY * PFNGLDELETEQUERIESARBPROC) (GLsizei n, const GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLENDQUERYARBPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLGENQUERIESARBPROC) (GLsizei n, GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTIVARBPROC) (GLuint id, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTUIVARBPROC) (GLuint id, GLenum pname, GLuint* params);
+typedef void (GLAPIENTRY * PFNGLGETQUERYIVARBPROC) (GLenum target, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISQUERYARBPROC) (GLuint id);
+
+#define glBeginQueryARB GLEW_GET_FUN(__glewBeginQueryARB)
+#define glDeleteQueriesARB GLEW_GET_FUN(__glewDeleteQueriesARB)
+#define glEndQueryARB GLEW_GET_FUN(__glewEndQueryARB)
+#define glGenQueriesARB GLEW_GET_FUN(__glewGenQueriesARB)
+#define glGetQueryObjectivARB GLEW_GET_FUN(__glewGetQueryObjectivARB)
+#define glGetQueryObjectuivARB GLEW_GET_FUN(__glewGetQueryObjectuivARB)
+#define glGetQueryivARB GLEW_GET_FUN(__glewGetQueryivARB)
+#define glIsQueryARB GLEW_GET_FUN(__glewIsQueryARB)
+
+#define GLEW_ARB_occlusion_query GLEW_GET_VAR(__GLEW_ARB_occlusion_query)
+
+#endif /* GL_ARB_occlusion_query */
+
+/* ----------------------- GL_ARB_pixel_buffer_object ---------------------- */
+
+#ifndef GL_ARB_pixel_buffer_object
+#define GL_ARB_pixel_buffer_object 1
+
+#define GL_PIXEL_PACK_BUFFER_ARB 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER_ARB 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING_ARB 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING_ARB 0x88EF
+
+#define GLEW_ARB_pixel_buffer_object GLEW_GET_VAR(__GLEW_ARB_pixel_buffer_object)
+
+#endif /* GL_ARB_pixel_buffer_object */
+
+/* ------------------------ GL_ARB_point_parameters ------------------------ */
+
+#ifndef GL_ARB_point_parameters
+#define GL_ARB_point_parameters 1
+
+#define GL_POINT_SIZE_MIN_ARB 0x8126
+#define GL_POINT_SIZE_MAX_ARB 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE_ARB 0x8128
+#define GL_POINT_DISTANCE_ATTENUATION_ARB 0x8129
+
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERFARBPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERFVARBPROC) (GLenum pname, GLfloat* params);
+
+#define glPointParameterfARB GLEW_GET_FUN(__glewPointParameterfARB)
+#define glPointParameterfvARB GLEW_GET_FUN(__glewPointParameterfvARB)
+
+#define GLEW_ARB_point_parameters GLEW_GET_VAR(__GLEW_ARB_point_parameters)
+
+#endif /* GL_ARB_point_parameters */
+
+/* -------------------------- GL_ARB_point_sprite -------------------------- */
+
+#ifndef GL_ARB_point_sprite
+#define GL_ARB_point_sprite 1
+
+#define GL_POINT_SPRITE_ARB 0x8861
+#define GL_COORD_REPLACE_ARB 0x8862
+
+#define GLEW_ARB_point_sprite GLEW_GET_VAR(__GLEW_ARB_point_sprite)
+
+#endif /* GL_ARB_point_sprite */
+
+/* ------------------------- GL_ARB_shader_objects ------------------------- */
+
+#ifndef GL_ARB_shader_objects
+#define GL_ARB_shader_objects 1
+
+#define GL_PROGRAM_OBJECT_ARB 0x8B40
+#define GL_SHADER_OBJECT_ARB 0x8B48
+#define GL_OBJECT_TYPE_ARB 0x8B4E
+#define GL_OBJECT_SUBTYPE_ARB 0x8B4F
+#define GL_FLOAT_VEC2_ARB 0x8B50
+#define GL_FLOAT_VEC3_ARB 0x8B51
+#define GL_FLOAT_VEC4_ARB 0x8B52
+#define GL_INT_VEC2_ARB 0x8B53
+#define GL_INT_VEC3_ARB 0x8B54
+#define GL_INT_VEC4_ARB 0x8B55
+#define GL_BOOL_ARB 0x8B56
+#define GL_BOOL_VEC2_ARB 0x8B57
+#define GL_BOOL_VEC3_ARB 0x8B58
+#define GL_BOOL_VEC4_ARB 0x8B59
+#define GL_FLOAT_MAT2_ARB 0x8B5A
+#define GL_FLOAT_MAT3_ARB 0x8B5B
+#define GL_FLOAT_MAT4_ARB 0x8B5C
+#define GL_SAMPLER_1D_ARB 0x8B5D
+#define GL_SAMPLER_2D_ARB 0x8B5E
+#define GL_SAMPLER_3D_ARB 0x8B5F
+#define GL_SAMPLER_CUBE_ARB 0x8B60
+#define GL_SAMPLER_1D_SHADOW_ARB 0x8B61
+#define GL_SAMPLER_2D_SHADOW_ARB 0x8B62
+#define GL_SAMPLER_2D_RECT_ARB 0x8B63
+#define GL_SAMPLER_2D_RECT_SHADOW_ARB 0x8B64
+#define GL_OBJECT_DELETE_STATUS_ARB 0x8B80
+#define GL_OBJECT_COMPILE_STATUS_ARB 0x8B81
+#define GL_OBJECT_LINK_STATUS_ARB 0x8B82
+#define GL_OBJECT_VALIDATE_STATUS_ARB 0x8B83
+#define GL_OBJECT_INFO_LOG_LENGTH_ARB 0x8B84
+#define GL_OBJECT_ATTACHED_OBJECTS_ARB 0x8B85
+#define GL_OBJECT_ACTIVE_UNIFORMS_ARB 0x8B86
+#define GL_OBJECT_ACTIVE_UNIFORM_MAX_LENGTH_ARB 0x8B87
+#define GL_OBJECT_SHADER_SOURCE_LENGTH_ARB 0x8B88
+
+typedef char GLcharARB;
+typedef unsigned int GLhandleARB;
+
+typedef void (GLAPIENTRY * PFNGLATTACHOBJECTARBPROC) (GLhandleARB containerObj, GLhandleARB obj);
+typedef void (GLAPIENTRY * PFNGLCOMPILESHADERARBPROC) (GLhandleARB shaderObj);
+typedef GLhandleARB (GLAPIENTRY * PFNGLCREATEPROGRAMOBJECTARBPROC) (void);
+typedef GLhandleARB (GLAPIENTRY * PFNGLCREATESHADEROBJECTARBPROC) (GLenum shaderType);
+typedef void (GLAPIENTRY * PFNGLDELETEOBJECTARBPROC) (GLhandleARB obj);
+typedef void (GLAPIENTRY * PFNGLDETACHOBJECTARBPROC) (GLhandleARB containerObj, GLhandleARB attachedObj);
+typedef void (GLAPIENTRY * PFNGLGETACTIVEUNIFORMARBPROC) (GLhandleARB programObj, GLuint index, GLsizei maxLength, GLsizei* length, GLint *size, GLenum *type, GLcharARB *name);
+typedef void (GLAPIENTRY * PFNGLGETATTACHEDOBJECTSARBPROC) (GLhandleARB containerObj, GLsizei maxCount, GLsizei* count, GLhandleARB *obj);
+typedef GLhandleARB (GLAPIENTRY * PFNGLGETHANDLEARBPROC) (GLenum pname);
+typedef void (GLAPIENTRY * PFNGLGETINFOLOGARBPROC) (GLhandleARB obj, GLsizei maxLength, GLsizei* length, GLcharARB *infoLog);
+typedef void (GLAPIENTRY * PFNGLGETOBJECTPARAMETERFVARBPROC) (GLhandleARB obj, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETOBJECTPARAMETERIVARBPROC) (GLhandleARB obj, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETSHADERSOURCEARBPROC) (GLhandleARB obj, GLsizei maxLength, GLsizei* length, GLcharARB *source);
+typedef GLint (GLAPIENTRY * PFNGLGETUNIFORMLOCATIONARBPROC) (GLhandleARB programObj, const GLcharARB* name);
+typedef void (GLAPIENTRY * PFNGLGETUNIFORMFVARBPROC) (GLhandleARB programObj, GLint location, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETUNIFORMIVARBPROC) (GLhandleARB programObj, GLint location, GLint* params);
+typedef void (GLAPIENTRY * PFNGLLINKPROGRAMARBPROC) (GLhandleARB programObj);
+typedef void (GLAPIENTRY * PFNGLSHADERSOURCEARBPROC) (GLhandleARB shaderObj, GLsizei count, const GLcharARB ** string, const GLint *length);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1FARBPROC) (GLint location, GLfloat v0);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1FVARBPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1IARBPROC) (GLint location, GLint v0);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1IVARBPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2FARBPROC) (GLint location, GLfloat v0, GLfloat v1);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2FVARBPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2IARBPROC) (GLint location, GLint v0, GLint v1);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2IVARBPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3FARBPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3FVARBPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3IARBPROC) (GLint location, GLint v0, GLint v1, GLint v2);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3IVARBPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4FARBPROC) (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4FVARBPROC) (GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4IARBPROC) (GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4IVARBPROC) (GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX2FVARBPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX3FVARBPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUNIFORMMATRIX4FVARBPROC) (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLUSEPROGRAMOBJECTARBPROC) (GLhandleARB programObj);
+typedef void (GLAPIENTRY * PFNGLVALIDATEPROGRAMARBPROC) (GLhandleARB programObj);
+
+#define glAttachObjectARB GLEW_GET_FUN(__glewAttachObjectARB)
+#define glCompileShaderARB GLEW_GET_FUN(__glewCompileShaderARB)
+#define glCreateProgramObjectARB GLEW_GET_FUN(__glewCreateProgramObjectARB)
+#define glCreateShaderObjectARB GLEW_GET_FUN(__glewCreateShaderObjectARB)
+#define glDeleteObjectARB GLEW_GET_FUN(__glewDeleteObjectARB)
+#define glDetachObjectARB GLEW_GET_FUN(__glewDetachObjectARB)
+#define glGetActiveUniformARB GLEW_GET_FUN(__glewGetActiveUniformARB)
+#define glGetAttachedObjectsARB GLEW_GET_FUN(__glewGetAttachedObjectsARB)
+#define glGetHandleARB GLEW_GET_FUN(__glewGetHandleARB)
+#define glGetInfoLogARB GLEW_GET_FUN(__glewGetInfoLogARB)
+#define glGetObjectParameterfvARB GLEW_GET_FUN(__glewGetObjectParameterfvARB)
+#define glGetObjectParameterivARB GLEW_GET_FUN(__glewGetObjectParameterivARB)
+#define glGetShaderSourceARB GLEW_GET_FUN(__glewGetShaderSourceARB)
+#define glGetUniformLocationARB GLEW_GET_FUN(__glewGetUniformLocationARB)
+#define glGetUniformfvARB GLEW_GET_FUN(__glewGetUniformfvARB)
+#define glGetUniformivARB GLEW_GET_FUN(__glewGetUniformivARB)
+#define glLinkProgramARB GLEW_GET_FUN(__glewLinkProgramARB)
+#define glShaderSourceARB GLEW_GET_FUN(__glewShaderSourceARB)
+#define glUniform1fARB GLEW_GET_FUN(__glewUniform1fARB)
+#define glUniform1fvARB GLEW_GET_FUN(__glewUniform1fvARB)
+#define glUniform1iARB GLEW_GET_FUN(__glewUniform1iARB)
+#define glUniform1ivARB GLEW_GET_FUN(__glewUniform1ivARB)
+#define glUniform2fARB GLEW_GET_FUN(__glewUniform2fARB)
+#define glUniform2fvARB GLEW_GET_FUN(__glewUniform2fvARB)
+#define glUniform2iARB GLEW_GET_FUN(__glewUniform2iARB)
+#define glUniform2ivARB GLEW_GET_FUN(__glewUniform2ivARB)
+#define glUniform3fARB GLEW_GET_FUN(__glewUniform3fARB)
+#define glUniform3fvARB GLEW_GET_FUN(__glewUniform3fvARB)
+#define glUniform3iARB GLEW_GET_FUN(__glewUniform3iARB)
+#define glUniform3ivARB GLEW_GET_FUN(__glewUniform3ivARB)
+#define glUniform4fARB GLEW_GET_FUN(__glewUniform4fARB)
+#define glUniform4fvARB GLEW_GET_FUN(__glewUniform4fvARB)
+#define glUniform4iARB GLEW_GET_FUN(__glewUniform4iARB)
+#define glUniform4ivARB GLEW_GET_FUN(__glewUniform4ivARB)
+#define glUniformMatrix2fvARB GLEW_GET_FUN(__glewUniformMatrix2fvARB)
+#define glUniformMatrix3fvARB GLEW_GET_FUN(__glewUniformMatrix3fvARB)
+#define glUniformMatrix4fvARB GLEW_GET_FUN(__glewUniformMatrix4fvARB)
+#define glUseProgramObjectARB GLEW_GET_FUN(__glewUseProgramObjectARB)
+#define glValidateProgramARB GLEW_GET_FUN(__glewValidateProgramARB)
+
+#define GLEW_ARB_shader_objects GLEW_GET_VAR(__GLEW_ARB_shader_objects)
+
+#endif /* GL_ARB_shader_objects */
+
+/* ---------------------- GL_ARB_shading_language_100 ---------------------- */
+
+#ifndef GL_ARB_shading_language_100
+#define GL_ARB_shading_language_100 1
+
+#define GL_SHADING_LANGUAGE_VERSION_ARB 0x8B8C
+
+#define GLEW_ARB_shading_language_100 GLEW_GET_VAR(__GLEW_ARB_shading_language_100)
+
+#endif /* GL_ARB_shading_language_100 */
+
+/* ----------------------------- GL_ARB_shadow ----------------------------- */
+
+#ifndef GL_ARB_shadow
+#define GL_ARB_shadow 1
+
+#define GL_TEXTURE_COMPARE_MODE_ARB 0x884C
+#define GL_TEXTURE_COMPARE_FUNC_ARB 0x884D
+#define GL_COMPARE_R_TO_TEXTURE_ARB 0x884E
+
+#define GLEW_ARB_shadow GLEW_GET_VAR(__GLEW_ARB_shadow)
+
+#endif /* GL_ARB_shadow */
+
+/* ------------------------- GL_ARB_shadow_ambient ------------------------- */
+
+#ifndef GL_ARB_shadow_ambient
+#define GL_ARB_shadow_ambient 1
+
+#define GL_TEXTURE_COMPARE_FAIL_VALUE_ARB 0x80BF
+
+#define GLEW_ARB_shadow_ambient GLEW_GET_VAR(__GLEW_ARB_shadow_ambient)
+
+#endif /* GL_ARB_shadow_ambient */
+
+/* ---------------------- GL_ARB_texture_border_clamp ---------------------- */
+
+#ifndef GL_ARB_texture_border_clamp
+#define GL_ARB_texture_border_clamp 1
+
+#define GL_CLAMP_TO_BORDER_ARB 0x812D
+
+#define GLEW_ARB_texture_border_clamp GLEW_GET_VAR(__GLEW_ARB_texture_border_clamp)
+
+#endif /* GL_ARB_texture_border_clamp */
+
+/* ---------------------- GL_ARB_texture_buffer_object --------------------- */
+
+#ifndef GL_ARB_texture_buffer_object
+#define GL_ARB_texture_buffer_object 1
+
+#define GL_TEXTURE_BUFFER_ARB 0x8C2A
+#define GL_MAX_TEXTURE_BUFFER_SIZE_ARB 0x8C2B
+#define GL_TEXTURE_BINDING_BUFFER_ARB 0x8C2C
+#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING_ARB 0x8C2D
+#define GL_TEXTURE_BUFFER_FORMAT_ARB 0x8C2E
+
+typedef void (GLAPIENTRY * PFNGLTEXBUFFERARBPROC) (GLenum target, GLenum internalformat, GLuint buffer);
+
+#define glTexBufferARB GLEW_GET_FUN(__glewTexBufferARB)
+
+#define GLEW_ARB_texture_buffer_object GLEW_GET_VAR(__GLEW_ARB_texture_buffer_object)
+
+#endif /* GL_ARB_texture_buffer_object */
+
+/* ----------------------- GL_ARB_texture_compression ---------------------- */
+
+#ifndef GL_ARB_texture_compression
+#define GL_ARB_texture_compression 1
+
+#define GL_COMPRESSED_ALPHA_ARB 0x84E9
+#define GL_COMPRESSED_LUMINANCE_ARB 0x84EA
+#define GL_COMPRESSED_LUMINANCE_ALPHA_ARB 0x84EB
+#define GL_COMPRESSED_INTENSITY_ARB 0x84EC
+#define GL_COMPRESSED_RGB_ARB 0x84ED
+#define GL_COMPRESSED_RGBA_ARB 0x84EE
+#define GL_TEXTURE_COMPRESSION_HINT_ARB 0x84EF
+#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE_ARB 0x86A0
+#define GL_TEXTURE_COMPRESSED_ARB 0x86A1
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS_ARB 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS_ARB 0x86A3
+
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXIMAGE1DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXIMAGE2DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXIMAGE3DARBPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLGETCOMPRESSEDTEXIMAGEARBPROC) (GLenum target, GLint lod, void* img);
+
+#define glCompressedTexImage1DARB GLEW_GET_FUN(__glewCompressedTexImage1DARB)
+#define glCompressedTexImage2DARB GLEW_GET_FUN(__glewCompressedTexImage2DARB)
+#define glCompressedTexImage3DARB GLEW_GET_FUN(__glewCompressedTexImage3DARB)
+#define glCompressedTexSubImage1DARB GLEW_GET_FUN(__glewCompressedTexSubImage1DARB)
+#define glCompressedTexSubImage2DARB GLEW_GET_FUN(__glewCompressedTexSubImage2DARB)
+#define glCompressedTexSubImage3DARB GLEW_GET_FUN(__glewCompressedTexSubImage3DARB)
+#define glGetCompressedTexImageARB GLEW_GET_FUN(__glewGetCompressedTexImageARB)
+
+#define GLEW_ARB_texture_compression GLEW_GET_VAR(__GLEW_ARB_texture_compression)
+
+#endif /* GL_ARB_texture_compression */
+
+/* -------------------- GL_ARB_texture_compression_rgtc -------------------- */
+
+#ifndef GL_ARB_texture_compression_rgtc
+#define GL_ARB_texture_compression_rgtc 1
+
+#define GL_COMPRESSED_RED_RGTC1 0x8DBB
+#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC
+#define GL_COMPRESSED_RG_RGTC2 0x8DBD
+#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE
+
+#define GLEW_ARB_texture_compression_rgtc GLEW_GET_VAR(__GLEW_ARB_texture_compression_rgtc)
+
+#endif /* GL_ARB_texture_compression_rgtc */
+
+/* ------------------------ GL_ARB_texture_cube_map ------------------------ */
+
+#ifndef GL_ARB_texture_cube_map
+#define GL_ARB_texture_cube_map 1
+
+#define GL_NORMAL_MAP_ARB 0x8511
+#define GL_REFLECTION_MAP_ARB 0x8512
+#define GL_TEXTURE_CUBE_MAP_ARB 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP_ARB 0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB 0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB 0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB 0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB 0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB 0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB 0x851A
+#define GL_PROXY_TEXTURE_CUBE_MAP_ARB 0x851B
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_ARB 0x851C
+
+#define GLEW_ARB_texture_cube_map GLEW_GET_VAR(__GLEW_ARB_texture_cube_map)
+
+#endif /* GL_ARB_texture_cube_map */
+
+/* ------------------------- GL_ARB_texture_env_add ------------------------ */
+
+#ifndef GL_ARB_texture_env_add
+#define GL_ARB_texture_env_add 1
+
+#define GLEW_ARB_texture_env_add GLEW_GET_VAR(__GLEW_ARB_texture_env_add)
+
+#endif /* GL_ARB_texture_env_add */
+
+/* ----------------------- GL_ARB_texture_env_combine ---------------------- */
+
+#ifndef GL_ARB_texture_env_combine
+#define GL_ARB_texture_env_combine 1
+
+#define GL_SUBTRACT_ARB 0x84E7
+#define GL_COMBINE_ARB 0x8570
+#define GL_COMBINE_RGB_ARB 0x8571
+#define GL_COMBINE_ALPHA_ARB 0x8572
+#define GL_RGB_SCALE_ARB 0x8573
+#define GL_ADD_SIGNED_ARB 0x8574
+#define GL_INTERPOLATE_ARB 0x8575
+#define GL_CONSTANT_ARB 0x8576
+#define GL_PRIMARY_COLOR_ARB 0x8577
+#define GL_PREVIOUS_ARB 0x8578
+#define GL_SOURCE0_RGB_ARB 0x8580
+#define GL_SOURCE1_RGB_ARB 0x8581
+#define GL_SOURCE2_RGB_ARB 0x8582
+#define GL_SOURCE0_ALPHA_ARB 0x8588
+#define GL_SOURCE1_ALPHA_ARB 0x8589
+#define GL_SOURCE2_ALPHA_ARB 0x858A
+#define GL_OPERAND0_RGB_ARB 0x8590
+#define GL_OPERAND1_RGB_ARB 0x8591
+#define GL_OPERAND2_RGB_ARB 0x8592
+#define GL_OPERAND0_ALPHA_ARB 0x8598
+#define GL_OPERAND1_ALPHA_ARB 0x8599
+#define GL_OPERAND2_ALPHA_ARB 0x859A
+
+#define GLEW_ARB_texture_env_combine GLEW_GET_VAR(__GLEW_ARB_texture_env_combine)
+
+#endif /* GL_ARB_texture_env_combine */
+
+/* ---------------------- GL_ARB_texture_env_crossbar ---------------------- */
+
+#ifndef GL_ARB_texture_env_crossbar
+#define GL_ARB_texture_env_crossbar 1
+
+#define GLEW_ARB_texture_env_crossbar GLEW_GET_VAR(__GLEW_ARB_texture_env_crossbar)
+
+#endif /* GL_ARB_texture_env_crossbar */
+
+/* ------------------------ GL_ARB_texture_env_dot3 ------------------------ */
+
+#ifndef GL_ARB_texture_env_dot3
+#define GL_ARB_texture_env_dot3 1
+
+#define GL_DOT3_RGB_ARB 0x86AE
+#define GL_DOT3_RGBA_ARB 0x86AF
+
+#define GLEW_ARB_texture_env_dot3 GLEW_GET_VAR(__GLEW_ARB_texture_env_dot3)
+
+#endif /* GL_ARB_texture_env_dot3 */
+
+/* -------------------------- GL_ARB_texture_float ------------------------- */
+
+#ifndef GL_ARB_texture_float
+#define GL_ARB_texture_float 1
+
+#define GL_RGBA32F_ARB 0x8814
+#define GL_RGB32F_ARB 0x8815
+#define GL_ALPHA32F_ARB 0x8816
+#define GL_INTENSITY32F_ARB 0x8817
+#define GL_LUMINANCE32F_ARB 0x8818
+#define GL_LUMINANCE_ALPHA32F_ARB 0x8819
+#define GL_RGBA16F_ARB 0x881A
+#define GL_RGB16F_ARB 0x881B
+#define GL_ALPHA16F_ARB 0x881C
+#define GL_INTENSITY16F_ARB 0x881D
+#define GL_LUMINANCE16F_ARB 0x881E
+#define GL_LUMINANCE_ALPHA16F_ARB 0x881F
+#define GL_TEXTURE_RED_TYPE_ARB 0x8C10
+#define GL_TEXTURE_GREEN_TYPE_ARB 0x8C11
+#define GL_TEXTURE_BLUE_TYPE_ARB 0x8C12
+#define GL_TEXTURE_ALPHA_TYPE_ARB 0x8C13
+#define GL_TEXTURE_LUMINANCE_TYPE_ARB 0x8C14
+#define GL_TEXTURE_INTENSITY_TYPE_ARB 0x8C15
+#define GL_TEXTURE_DEPTH_TYPE_ARB 0x8C16
+#define GL_UNSIGNED_NORMALIZED_ARB 0x8C17
+
+#define GLEW_ARB_texture_float GLEW_GET_VAR(__GLEW_ARB_texture_float)
+
+#endif /* GL_ARB_texture_float */
+
+/* --------------------- GL_ARB_texture_mirrored_repeat -------------------- */
+
+#ifndef GL_ARB_texture_mirrored_repeat
+#define GL_ARB_texture_mirrored_repeat 1
+
+#define GL_MIRRORED_REPEAT_ARB 0x8370
+
+#define GLEW_ARB_texture_mirrored_repeat GLEW_GET_VAR(__GLEW_ARB_texture_mirrored_repeat)
+
+#endif /* GL_ARB_texture_mirrored_repeat */
+
+/* -------------------- GL_ARB_texture_non_power_of_two -------------------- */
+
+#ifndef GL_ARB_texture_non_power_of_two
+#define GL_ARB_texture_non_power_of_two 1
+
+#define GLEW_ARB_texture_non_power_of_two GLEW_GET_VAR(__GLEW_ARB_texture_non_power_of_two)
+
+#endif /* GL_ARB_texture_non_power_of_two */
+
+/* ------------------------ GL_ARB_texture_rectangle ----------------------- */
+
+#ifndef GL_ARB_texture_rectangle
+#define GL_ARB_texture_rectangle 1
+
+#define GL_TEXTURE_RECTANGLE_ARB 0x84F5
+#define GL_TEXTURE_BINDING_RECTANGLE_ARB 0x84F6
+#define GL_PROXY_TEXTURE_RECTANGLE_ARB 0x84F7
+#define GL_MAX_RECTANGLE_TEXTURE_SIZE_ARB 0x84F8
+#define GL_SAMPLER_2D_RECT_ARB 0x8B63
+#define GL_SAMPLER_2D_RECT_SHADOW_ARB 0x8B64
+
+#define GLEW_ARB_texture_rectangle GLEW_GET_VAR(__GLEW_ARB_texture_rectangle)
+
+#endif /* GL_ARB_texture_rectangle */
+
+/* --------------------------- GL_ARB_texture_rg --------------------------- */
+
+#ifndef GL_ARB_texture_rg
+#define GL_ARB_texture_rg 1
+
+#define GL_RED 0x1903
+#define GL_RG 0x8227
+#define GL_RG_INTEGER 0x8228
+#define GL_R8 0x8229
+#define GL_R16 0x822A
+#define GL_RG8 0x822B
+#define GL_RG16 0x822C
+#define GL_R16F 0x822D
+#define GL_R32F 0x822E
+#define GL_RG16F 0x822F
+#define GL_RG32F 0x8230
+#define GL_R8I 0x8231
+#define GL_R8UI 0x8232
+#define GL_R16I 0x8233
+#define GL_R16UI 0x8234
+#define GL_R32I 0x8235
+#define GL_R32UI 0x8236
+#define GL_RG8I 0x8237
+#define GL_RG8UI 0x8238
+#define GL_RG16I 0x8239
+#define GL_RG16UI 0x823A
+#define GL_RG32I 0x823B
+#define GL_RG32UI 0x823C
+
+#define GLEW_ARB_texture_rg GLEW_GET_VAR(__GLEW_ARB_texture_rg)
+
+#endif /* GL_ARB_texture_rg */
+
+/* ------------------------ GL_ARB_transpose_matrix ------------------------ */
+
+#ifndef GL_ARB_transpose_matrix
+#define GL_ARB_transpose_matrix 1
+
+#define GL_TRANSPOSE_MODELVIEW_MATRIX_ARB 0x84E3
+#define GL_TRANSPOSE_PROJECTION_MATRIX_ARB 0x84E4
+#define GL_TRANSPOSE_TEXTURE_MATRIX_ARB 0x84E5
+#define GL_TRANSPOSE_COLOR_MATRIX_ARB 0x84E6
+
+typedef void (GLAPIENTRY * PFNGLLOADTRANSPOSEMATRIXDARBPROC) (GLdouble m[16]);
+typedef void (GLAPIENTRY * PFNGLLOADTRANSPOSEMATRIXFARBPROC) (GLfloat m[16]);
+typedef void (GLAPIENTRY * PFNGLMULTTRANSPOSEMATRIXDARBPROC) (GLdouble m[16]);
+typedef void (GLAPIENTRY * PFNGLMULTTRANSPOSEMATRIXFARBPROC) (GLfloat m[16]);
+
+#define glLoadTransposeMatrixdARB GLEW_GET_FUN(__glewLoadTransposeMatrixdARB)
+#define glLoadTransposeMatrixfARB GLEW_GET_FUN(__glewLoadTransposeMatrixfARB)
+#define glMultTransposeMatrixdARB GLEW_GET_FUN(__glewMultTransposeMatrixdARB)
+#define glMultTransposeMatrixfARB GLEW_GET_FUN(__glewMultTransposeMatrixfARB)
+
+#define GLEW_ARB_transpose_matrix GLEW_GET_VAR(__GLEW_ARB_transpose_matrix)
+
+#endif /* GL_ARB_transpose_matrix */
+
+/* ----------------------- GL_ARB_vertex_array_object ---------------------- */
+
+#ifndef GL_ARB_vertex_array_object
+#define GL_ARB_vertex_array_object 1
+
+#define GL_VERTEX_ARRAY_BINDING 0x85B5
+
+typedef void (GLAPIENTRY * PFNGLBINDVERTEXARRAYPROC) (GLuint array);
+typedef void (GLAPIENTRY * PFNGLDELETEVERTEXARRAYSPROC) (GLsizei n, const GLuint* arrays);
+typedef void (GLAPIENTRY * PFNGLGENVERTEXARRAYSPROC) (GLsizei n, GLuint* arrays);
+typedef GLboolean (GLAPIENTRY * PFNGLISVERTEXARRAYPROC) (GLuint array);
+
+#define glBindVertexArray GLEW_GET_FUN(__glewBindVertexArray)
+#define glDeleteVertexArrays GLEW_GET_FUN(__glewDeleteVertexArrays)
+#define glGenVertexArrays GLEW_GET_FUN(__glewGenVertexArrays)
+#define glIsVertexArray GLEW_GET_FUN(__glewIsVertexArray)
+
+#define GLEW_ARB_vertex_array_object GLEW_GET_VAR(__GLEW_ARB_vertex_array_object)
+
+#endif /* GL_ARB_vertex_array_object */
+
+/* -------------------------- GL_ARB_vertex_blend -------------------------- */
+
+#ifndef GL_ARB_vertex_blend
+#define GL_ARB_vertex_blend 1
+
+#define GL_MODELVIEW0_ARB 0x1700
+#define GL_MODELVIEW1_ARB 0x850A
+#define GL_MAX_VERTEX_UNITS_ARB 0x86A4
+#define GL_ACTIVE_VERTEX_UNITS_ARB 0x86A5
+#define GL_WEIGHT_SUM_UNITY_ARB 0x86A6
+#define GL_VERTEX_BLEND_ARB 0x86A7
+#define GL_CURRENT_WEIGHT_ARB 0x86A8
+#define GL_WEIGHT_ARRAY_TYPE_ARB 0x86A9
+#define GL_WEIGHT_ARRAY_STRIDE_ARB 0x86AA
+#define GL_WEIGHT_ARRAY_SIZE_ARB 0x86AB
+#define GL_WEIGHT_ARRAY_POINTER_ARB 0x86AC
+#define GL_WEIGHT_ARRAY_ARB 0x86AD
+#define GL_MODELVIEW2_ARB 0x8722
+#define GL_MODELVIEW3_ARB 0x8723
+#define GL_MODELVIEW4_ARB 0x8724
+#define GL_MODELVIEW5_ARB 0x8725
+#define GL_MODELVIEW6_ARB 0x8726
+#define GL_MODELVIEW7_ARB 0x8727
+#define GL_MODELVIEW8_ARB 0x8728
+#define GL_MODELVIEW9_ARB 0x8729
+#define GL_MODELVIEW10_ARB 0x872A
+#define GL_MODELVIEW11_ARB 0x872B
+#define GL_MODELVIEW12_ARB 0x872C
+#define GL_MODELVIEW13_ARB 0x872D
+#define GL_MODELVIEW14_ARB 0x872E
+#define GL_MODELVIEW15_ARB 0x872F
+#define GL_MODELVIEW16_ARB 0x8730
+#define GL_MODELVIEW17_ARB 0x8731
+#define GL_MODELVIEW18_ARB 0x8732
+#define GL_MODELVIEW19_ARB 0x8733
+#define GL_MODELVIEW20_ARB 0x8734
+#define GL_MODELVIEW21_ARB 0x8735
+#define GL_MODELVIEW22_ARB 0x8736
+#define GL_MODELVIEW23_ARB 0x8737
+#define GL_MODELVIEW24_ARB 0x8738
+#define GL_MODELVIEW25_ARB 0x8739
+#define GL_MODELVIEW26_ARB 0x873A
+#define GL_MODELVIEW27_ARB 0x873B
+#define GL_MODELVIEW28_ARB 0x873C
+#define GL_MODELVIEW29_ARB 0x873D
+#define GL_MODELVIEW30_ARB 0x873E
+#define GL_MODELVIEW31_ARB 0x873F
+
+typedef void (GLAPIENTRY * PFNGLVERTEXBLENDARBPROC) (GLint count);
+typedef void (GLAPIENTRY * PFNGLWEIGHTPOINTERARBPROC) (GLint size, GLenum type, GLsizei stride, GLvoid *pointer);
+typedef void (GLAPIENTRY * PFNGLWEIGHTBVARBPROC) (GLint size, GLbyte *weights);
+typedef void (GLAPIENTRY * PFNGLWEIGHTDVARBPROC) (GLint size, GLdouble *weights);
+typedef void (GLAPIENTRY * PFNGLWEIGHTFVARBPROC) (GLint size, GLfloat *weights);
+typedef void (GLAPIENTRY * PFNGLWEIGHTIVARBPROC) (GLint size, GLint *weights);
+typedef void (GLAPIENTRY * PFNGLWEIGHTSVARBPROC) (GLint size, GLshort *weights);
+typedef void (GLAPIENTRY * PFNGLWEIGHTUBVARBPROC) (GLint size, GLubyte *weights);
+typedef void (GLAPIENTRY * PFNGLWEIGHTUIVARBPROC) (GLint size, GLuint *weights);
+typedef void (GLAPIENTRY * PFNGLWEIGHTUSVARBPROC) (GLint size, GLushort *weights);
+
+#define glVertexBlendARB GLEW_GET_FUN(__glewVertexBlendARB)
+#define glWeightPointerARB GLEW_GET_FUN(__glewWeightPointerARB)
+#define glWeightbvARB GLEW_GET_FUN(__glewWeightbvARB)
+#define glWeightdvARB GLEW_GET_FUN(__glewWeightdvARB)
+#define glWeightfvARB GLEW_GET_FUN(__glewWeightfvARB)
+#define glWeightivARB GLEW_GET_FUN(__glewWeightivARB)
+#define glWeightsvARB GLEW_GET_FUN(__glewWeightsvARB)
+#define glWeightubvARB GLEW_GET_FUN(__glewWeightubvARB)
+#define glWeightuivARB GLEW_GET_FUN(__glewWeightuivARB)
+#define glWeightusvARB GLEW_GET_FUN(__glewWeightusvARB)
+
+#define GLEW_ARB_vertex_blend GLEW_GET_VAR(__GLEW_ARB_vertex_blend)
+
+#endif /* GL_ARB_vertex_blend */
+
+/* ---------------------- GL_ARB_vertex_buffer_object ---------------------- */
+
+#ifndef GL_ARB_vertex_buffer_object
+#define GL_ARB_vertex_buffer_object 1
+
+#define GL_BUFFER_SIZE_ARB 0x8764
+#define GL_BUFFER_USAGE_ARB 0x8765
+#define GL_ARRAY_BUFFER_ARB 0x8892
+#define GL_ELEMENT_ARRAY_BUFFER_ARB 0x8893
+#define GL_ARRAY_BUFFER_BINDING_ARB 0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING_ARB 0x8895
+#define GL_VERTEX_ARRAY_BUFFER_BINDING_ARB 0x8896
+#define GL_NORMAL_ARRAY_BUFFER_BINDING_ARB 0x8897
+#define GL_COLOR_ARRAY_BUFFER_BINDING_ARB 0x8898
+#define GL_INDEX_ARRAY_BUFFER_BINDING_ARB 0x8899
+#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING_ARB 0x889A
+#define GL_EDGE_FLAG_ARRAY_BUFFER_BINDING_ARB 0x889B
+#define GL_SECONDARY_COLOR_ARRAY_BUFFER_BINDING_ARB 0x889C
+#define GL_FOG_COORDINATE_ARRAY_BUFFER_BINDING_ARB 0x889D
+#define GL_WEIGHT_ARRAY_BUFFER_BINDING_ARB 0x889E
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING_ARB 0x889F
+#define GL_READ_ONLY_ARB 0x88B8
+#define GL_WRITE_ONLY_ARB 0x88B9
+#define GL_READ_WRITE_ARB 0x88BA
+#define GL_BUFFER_ACCESS_ARB 0x88BB
+#define GL_BUFFER_MAPPED_ARB 0x88BC
+#define GL_BUFFER_MAP_POINTER_ARB 0x88BD
+#define GL_STREAM_DRAW_ARB 0x88E0
+#define GL_STREAM_READ_ARB 0x88E1
+#define GL_STREAM_COPY_ARB 0x88E2
+#define GL_STATIC_DRAW_ARB 0x88E4
+#define GL_STATIC_READ_ARB 0x88E5
+#define GL_STATIC_COPY_ARB 0x88E6
+#define GL_DYNAMIC_DRAW_ARB 0x88E8
+#define GL_DYNAMIC_READ_ARB 0x88E9
+#define GL_DYNAMIC_COPY_ARB 0x88EA
+
+typedef ptrdiff_t GLsizeiptrARB;
+typedef ptrdiff_t GLintptrARB;
+
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERARBPROC) (GLenum target, GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLBUFFERDATAARBPROC) (GLenum target, GLsizeiptrARB size, const GLvoid* data, GLenum usage);
+typedef void (GLAPIENTRY * PFNGLBUFFERSUBDATAARBPROC) (GLenum target, GLintptrARB offset, GLsizeiptrARB size, const GLvoid* data);
+typedef void (GLAPIENTRY * PFNGLDELETEBUFFERSARBPROC) (GLsizei n, const GLuint* buffers);
+typedef void (GLAPIENTRY * PFNGLGENBUFFERSARBPROC) (GLsizei n, GLuint* buffers);
+typedef void (GLAPIENTRY * PFNGLGETBUFFERPARAMETERIVARBPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETBUFFERPOINTERVARBPROC) (GLenum target, GLenum pname, GLvoid** params);
+typedef void (GLAPIENTRY * PFNGLGETBUFFERSUBDATAARBPROC) (GLenum target, GLintptrARB offset, GLsizeiptrARB size, GLvoid* data);
+typedef GLboolean (GLAPIENTRY * PFNGLISBUFFERARBPROC) (GLuint buffer);
+typedef GLvoid * (GLAPIENTRY * PFNGLMAPBUFFERARBPROC) (GLenum target, GLenum access);
+typedef GLboolean (GLAPIENTRY * PFNGLUNMAPBUFFERARBPROC) (GLenum target);
+
+#define glBindBufferARB GLEW_GET_FUN(__glewBindBufferARB)
+#define glBufferDataARB GLEW_GET_FUN(__glewBufferDataARB)
+#define glBufferSubDataARB GLEW_GET_FUN(__glewBufferSubDataARB)
+#define glDeleteBuffersARB GLEW_GET_FUN(__glewDeleteBuffersARB)
+#define glGenBuffersARB GLEW_GET_FUN(__glewGenBuffersARB)
+#define glGetBufferParameterivARB GLEW_GET_FUN(__glewGetBufferParameterivARB)
+#define glGetBufferPointervARB GLEW_GET_FUN(__glewGetBufferPointervARB)
+#define glGetBufferSubDataARB GLEW_GET_FUN(__glewGetBufferSubDataARB)
+#define glIsBufferARB GLEW_GET_FUN(__glewIsBufferARB)
+#define glMapBufferARB GLEW_GET_FUN(__glewMapBufferARB)
+#define glUnmapBufferARB GLEW_GET_FUN(__glewUnmapBufferARB)
+
+#define GLEW_ARB_vertex_buffer_object GLEW_GET_VAR(__GLEW_ARB_vertex_buffer_object)
+
+#endif /* GL_ARB_vertex_buffer_object */
+
+/* ------------------------- GL_ARB_vertex_program ------------------------- */
+
+#ifndef GL_ARB_vertex_program
+#define GL_ARB_vertex_program 1
+
+#define GL_COLOR_SUM_ARB 0x8458
+#define GL_VERTEX_PROGRAM_ARB 0x8620
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED_ARB 0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE_ARB 0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE_ARB 0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE_ARB 0x8625
+#define GL_CURRENT_VERTEX_ATTRIB_ARB 0x8626
+#define GL_PROGRAM_LENGTH_ARB 0x8627
+#define GL_PROGRAM_STRING_ARB 0x8628
+#define GL_MAX_PROGRAM_MATRIX_STACK_DEPTH_ARB 0x862E
+#define GL_MAX_PROGRAM_MATRICES_ARB 0x862F
+#define GL_CURRENT_MATRIX_STACK_DEPTH_ARB 0x8640
+#define GL_CURRENT_MATRIX_ARB 0x8641
+#define GL_VERTEX_PROGRAM_POINT_SIZE_ARB 0x8642
+#define GL_VERTEX_PROGRAM_TWO_SIDE_ARB 0x8643
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER_ARB 0x8645
+#define GL_PROGRAM_ERROR_POSITION_ARB 0x864B
+#define GL_PROGRAM_BINDING_ARB 0x8677
+#define GL_MAX_VERTEX_ATTRIBS_ARB 0x8869
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED_ARB 0x886A
+#define GL_PROGRAM_ERROR_STRING_ARB 0x8874
+#define GL_PROGRAM_FORMAT_ASCII_ARB 0x8875
+#define GL_PROGRAM_FORMAT_ARB 0x8876
+#define GL_PROGRAM_INSTRUCTIONS_ARB 0x88A0
+#define GL_MAX_PROGRAM_INSTRUCTIONS_ARB 0x88A1
+#define GL_PROGRAM_NATIVE_INSTRUCTIONS_ARB 0x88A2
+#define GL_MAX_PROGRAM_NATIVE_INSTRUCTIONS_ARB 0x88A3
+#define GL_PROGRAM_TEMPORARIES_ARB 0x88A4
+#define GL_MAX_PROGRAM_TEMPORARIES_ARB 0x88A5
+#define GL_PROGRAM_NATIVE_TEMPORARIES_ARB 0x88A6
+#define GL_MAX_PROGRAM_NATIVE_TEMPORARIES_ARB 0x88A7
+#define GL_PROGRAM_PARAMETERS_ARB 0x88A8
+#define GL_MAX_PROGRAM_PARAMETERS_ARB 0x88A9
+#define GL_PROGRAM_NATIVE_PARAMETERS_ARB 0x88AA
+#define GL_MAX_PROGRAM_NATIVE_PARAMETERS_ARB 0x88AB
+#define GL_PROGRAM_ATTRIBS_ARB 0x88AC
+#define GL_MAX_PROGRAM_ATTRIBS_ARB 0x88AD
+#define GL_PROGRAM_NATIVE_ATTRIBS_ARB 0x88AE
+#define GL_MAX_PROGRAM_NATIVE_ATTRIBS_ARB 0x88AF
+#define GL_PROGRAM_ADDRESS_REGISTERS_ARB 0x88B0
+#define GL_MAX_PROGRAM_ADDRESS_REGISTERS_ARB 0x88B1
+#define GL_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB 0x88B2
+#define GL_MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB 0x88B3
+#define GL_MAX_PROGRAM_LOCAL_PARAMETERS_ARB 0x88B4
+#define GL_MAX_PROGRAM_ENV_PARAMETERS_ARB 0x88B5
+#define GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB 0x88B6
+#define GL_TRANSPOSE_CURRENT_MATRIX_ARB 0x88B7
+#define GL_MATRIX0_ARB 0x88C0
+#define GL_MATRIX1_ARB 0x88C1
+#define GL_MATRIX2_ARB 0x88C2
+#define GL_MATRIX3_ARB 0x88C3
+#define GL_MATRIX4_ARB 0x88C4
+#define GL_MATRIX5_ARB 0x88C5
+#define GL_MATRIX6_ARB 0x88C6
+#define GL_MATRIX7_ARB 0x88C7
+#define GL_MATRIX8_ARB 0x88C8
+#define GL_MATRIX9_ARB 0x88C9
+#define GL_MATRIX10_ARB 0x88CA
+#define GL_MATRIX11_ARB 0x88CB
+#define GL_MATRIX12_ARB 0x88CC
+#define GL_MATRIX13_ARB 0x88CD
+#define GL_MATRIX14_ARB 0x88CE
+#define GL_MATRIX15_ARB 0x88CF
+#define GL_MATRIX16_ARB 0x88D0
+#define GL_MATRIX17_ARB 0x88D1
+#define GL_MATRIX18_ARB 0x88D2
+#define GL_MATRIX19_ARB 0x88D3
+#define GL_MATRIX20_ARB 0x88D4
+#define GL_MATRIX21_ARB 0x88D5
+#define GL_MATRIX22_ARB 0x88D6
+#define GL_MATRIX23_ARB 0x88D7
+#define GL_MATRIX24_ARB 0x88D8
+#define GL_MATRIX25_ARB 0x88D9
+#define GL_MATRIX26_ARB 0x88DA
+#define GL_MATRIX27_ARB 0x88DB
+#define GL_MATRIX28_ARB 0x88DC
+#define GL_MATRIX29_ARB 0x88DD
+#define GL_MATRIX30_ARB 0x88DE
+#define GL_MATRIX31_ARB 0x88DF
+
+typedef void (GLAPIENTRY * PFNGLBINDPROGRAMARBPROC) (GLenum target, GLuint program);
+typedef void (GLAPIENTRY * PFNGLDELETEPROGRAMSARBPROC) (GLsizei n, const GLuint* programs);
+typedef void (GLAPIENTRY * PFNGLDISABLEVERTEXATTRIBARRAYARBPROC) (GLuint index);
+typedef void (GLAPIENTRY * PFNGLENABLEVERTEXATTRIBARRAYARBPROC) (GLuint index);
+typedef void (GLAPIENTRY * PFNGLGENPROGRAMSARBPROC) (GLsizei n, GLuint* programs);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMENVPARAMETERDVARBPROC) (GLenum target, GLuint index, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMENVPARAMETERFVARBPROC) (GLenum target, GLuint index, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC) (GLenum target, GLuint index, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC) (GLenum target, GLuint index, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMSTRINGARBPROC) (GLenum target, GLenum pname, void* string);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMIVARBPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBPOINTERVARBPROC) (GLuint index, GLenum pname, GLvoid** pointer);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBDVARBPROC) (GLuint index, GLenum pname, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBFVARBPROC) (GLuint index, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBIVARBPROC) (GLuint index, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISPROGRAMARBPROC) (GLuint program);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETER4DARBPROC) (GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETER4DVARBPROC) (GLenum target, GLuint index, const GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETER4FARBPROC) (GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETER4FVARBPROC) (GLenum target, GLuint index, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETER4DARBPROC) (GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETER4DVARBPROC) (GLenum target, GLuint index, const GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETER4FARBPROC) (GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETER4FVARBPROC) (GLenum target, GLuint index, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMSTRINGARBPROC) (GLenum target, GLenum format, GLsizei len, const void* string);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1DARBPROC) (GLuint index, GLdouble x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1DVARBPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1FARBPROC) (GLuint index, GLfloat x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1FVARBPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1SARBPROC) (GLuint index, GLshort x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1SVARBPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2DARBPROC) (GLuint index, GLdouble x, GLdouble y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2DVARBPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2FARBPROC) (GLuint index, GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2FVARBPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2SARBPROC) (GLuint index, GLshort x, GLshort y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2SVARBPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3DARBPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3DVARBPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3FARBPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3FVARBPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3SARBPROC) (GLuint index, GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3SVARBPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NBVARBPROC) (GLuint index, const GLbyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NIVARBPROC) (GLuint index, const GLint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NSVARBPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUBARBPROC) (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUBVARBPROC) (GLuint index, const GLubyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUIVARBPROC) (GLuint index, const GLuint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4NUSVARBPROC) (GLuint index, const GLushort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4BVARBPROC) (GLuint index, const GLbyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4DARBPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4DVARBPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4FARBPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4FVARBPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4IVARBPROC) (GLuint index, const GLint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4SARBPROC) (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4SVARBPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4UBVARBPROC) (GLuint index, const GLubyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4UIVARBPROC) (GLuint index, const GLuint* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4USVARBPROC) (GLuint index, const GLushort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBPOINTERARBPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void* pointer);
+
+#define glBindProgramARB GLEW_GET_FUN(__glewBindProgramARB)
+#define glDeleteProgramsARB GLEW_GET_FUN(__glewDeleteProgramsARB)
+#define glDisableVertexAttribArrayARB GLEW_GET_FUN(__glewDisableVertexAttribArrayARB)
+#define glEnableVertexAttribArrayARB GLEW_GET_FUN(__glewEnableVertexAttribArrayARB)
+#define glGenProgramsARB GLEW_GET_FUN(__glewGenProgramsARB)
+#define glGetProgramEnvParameterdvARB GLEW_GET_FUN(__glewGetProgramEnvParameterdvARB)
+#define glGetProgramEnvParameterfvARB GLEW_GET_FUN(__glewGetProgramEnvParameterfvARB)
+#define glGetProgramLocalParameterdvARB GLEW_GET_FUN(__glewGetProgramLocalParameterdvARB)
+#define glGetProgramLocalParameterfvARB GLEW_GET_FUN(__glewGetProgramLocalParameterfvARB)
+#define glGetProgramStringARB GLEW_GET_FUN(__glewGetProgramStringARB)
+#define glGetProgramivARB GLEW_GET_FUN(__glewGetProgramivARB)
+#define glGetVertexAttribPointervARB GLEW_GET_FUN(__glewGetVertexAttribPointervARB)
+#define glGetVertexAttribdvARB GLEW_GET_FUN(__glewGetVertexAttribdvARB)
+#define glGetVertexAttribfvARB GLEW_GET_FUN(__glewGetVertexAttribfvARB)
+#define glGetVertexAttribivARB GLEW_GET_FUN(__glewGetVertexAttribivARB)
+#define glIsProgramARB GLEW_GET_FUN(__glewIsProgramARB)
+#define glProgramEnvParameter4dARB GLEW_GET_FUN(__glewProgramEnvParameter4dARB)
+#define glProgramEnvParameter4dvARB GLEW_GET_FUN(__glewProgramEnvParameter4dvARB)
+#define glProgramEnvParameter4fARB GLEW_GET_FUN(__glewProgramEnvParameter4fARB)
+#define glProgramEnvParameter4fvARB GLEW_GET_FUN(__glewProgramEnvParameter4fvARB)
+#define glProgramLocalParameter4dARB GLEW_GET_FUN(__glewProgramLocalParameter4dARB)
+#define glProgramLocalParameter4dvARB GLEW_GET_FUN(__glewProgramLocalParameter4dvARB)
+#define glProgramLocalParameter4fARB GLEW_GET_FUN(__glewProgramLocalParameter4fARB)
+#define glProgramLocalParameter4fvARB GLEW_GET_FUN(__glewProgramLocalParameter4fvARB)
+#define glProgramStringARB GLEW_GET_FUN(__glewProgramStringARB)
+#define glVertexAttrib1dARB GLEW_GET_FUN(__glewVertexAttrib1dARB)
+#define glVertexAttrib1dvARB GLEW_GET_FUN(__glewVertexAttrib1dvARB)
+#define glVertexAttrib1fARB GLEW_GET_FUN(__glewVertexAttrib1fARB)
+#define glVertexAttrib1fvARB GLEW_GET_FUN(__glewVertexAttrib1fvARB)
+#define glVertexAttrib1sARB GLEW_GET_FUN(__glewVertexAttrib1sARB)
+#define glVertexAttrib1svARB GLEW_GET_FUN(__glewVertexAttrib1svARB)
+#define glVertexAttrib2dARB GLEW_GET_FUN(__glewVertexAttrib2dARB)
+#define glVertexAttrib2dvARB GLEW_GET_FUN(__glewVertexAttrib2dvARB)
+#define glVertexAttrib2fARB GLEW_GET_FUN(__glewVertexAttrib2fARB)
+#define glVertexAttrib2fvARB GLEW_GET_FUN(__glewVertexAttrib2fvARB)
+#define glVertexAttrib2sARB GLEW_GET_FUN(__glewVertexAttrib2sARB)
+#define glVertexAttrib2svARB GLEW_GET_FUN(__glewVertexAttrib2svARB)
+#define glVertexAttrib3dARB GLEW_GET_FUN(__glewVertexAttrib3dARB)
+#define glVertexAttrib3dvARB GLEW_GET_FUN(__glewVertexAttrib3dvARB)
+#define glVertexAttrib3fARB GLEW_GET_FUN(__glewVertexAttrib3fARB)
+#define glVertexAttrib3fvARB GLEW_GET_FUN(__glewVertexAttrib3fvARB)
+#define glVertexAttrib3sARB GLEW_GET_FUN(__glewVertexAttrib3sARB)
+#define glVertexAttrib3svARB GLEW_GET_FUN(__glewVertexAttrib3svARB)
+#define glVertexAttrib4NbvARB GLEW_GET_FUN(__glewVertexAttrib4NbvARB)
+#define glVertexAttrib4NivARB GLEW_GET_FUN(__glewVertexAttrib4NivARB)
+#define glVertexAttrib4NsvARB GLEW_GET_FUN(__glewVertexAttrib4NsvARB)
+#define glVertexAttrib4NubARB GLEW_GET_FUN(__glewVertexAttrib4NubARB)
+#define glVertexAttrib4NubvARB GLEW_GET_FUN(__glewVertexAttrib4NubvARB)
+#define glVertexAttrib4NuivARB GLEW_GET_FUN(__glewVertexAttrib4NuivARB)
+#define glVertexAttrib4NusvARB GLEW_GET_FUN(__glewVertexAttrib4NusvARB)
+#define glVertexAttrib4bvARB GLEW_GET_FUN(__glewVertexAttrib4bvARB)
+#define glVertexAttrib4dARB GLEW_GET_FUN(__glewVertexAttrib4dARB)
+#define glVertexAttrib4dvARB GLEW_GET_FUN(__glewVertexAttrib4dvARB)
+#define glVertexAttrib4fARB GLEW_GET_FUN(__glewVertexAttrib4fARB)
+#define glVertexAttrib4fvARB GLEW_GET_FUN(__glewVertexAttrib4fvARB)
+#define glVertexAttrib4ivARB GLEW_GET_FUN(__glewVertexAttrib4ivARB)
+#define glVertexAttrib4sARB GLEW_GET_FUN(__glewVertexAttrib4sARB)
+#define glVertexAttrib4svARB GLEW_GET_FUN(__glewVertexAttrib4svARB)
+#define glVertexAttrib4ubvARB GLEW_GET_FUN(__glewVertexAttrib4ubvARB)
+#define glVertexAttrib4uivARB GLEW_GET_FUN(__glewVertexAttrib4uivARB)
+#define glVertexAttrib4usvARB GLEW_GET_FUN(__glewVertexAttrib4usvARB)
+#define glVertexAttribPointerARB GLEW_GET_FUN(__glewVertexAttribPointerARB)
+
+#define GLEW_ARB_vertex_program GLEW_GET_VAR(__GLEW_ARB_vertex_program)
+
+#endif /* GL_ARB_vertex_program */
+
+/* -------------------------- GL_ARB_vertex_shader ------------------------- */
+
+#ifndef GL_ARB_vertex_shader
+#define GL_ARB_vertex_shader 1
+
+#define GL_VERTEX_SHADER_ARB 0x8B31
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS_ARB 0x8B4A
+#define GL_MAX_VARYING_FLOATS_ARB 0x8B4B
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS_ARB 0x8B4C
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS_ARB 0x8B4D
+#define GL_OBJECT_ACTIVE_ATTRIBUTES_ARB 0x8B89
+#define GL_OBJECT_ACTIVE_ATTRIBUTE_MAX_LENGTH_ARB 0x8B8A
+
+typedef void (GLAPIENTRY * PFNGLBINDATTRIBLOCATIONARBPROC) (GLhandleARB programObj, GLuint index, const GLcharARB* name);
+typedef void (GLAPIENTRY * PFNGLGETACTIVEATTRIBARBPROC) (GLhandleARB programObj, GLuint index, GLsizei maxLength, GLsizei* length, GLint *size, GLenum *type, GLcharARB *name);
+typedef GLint (GLAPIENTRY * PFNGLGETATTRIBLOCATIONARBPROC) (GLhandleARB programObj, const GLcharARB* name);
+
+#define glBindAttribLocationARB GLEW_GET_FUN(__glewBindAttribLocationARB)
+#define glGetActiveAttribARB GLEW_GET_FUN(__glewGetActiveAttribARB)
+#define glGetAttribLocationARB GLEW_GET_FUN(__glewGetAttribLocationARB)
+
+#define GLEW_ARB_vertex_shader GLEW_GET_VAR(__GLEW_ARB_vertex_shader)
+
+#endif /* GL_ARB_vertex_shader */
+
+/* --------------------------- GL_ARB_window_pos --------------------------- */
+
+#ifndef GL_ARB_window_pos
+#define GL_ARB_window_pos 1
+
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2DARBPROC) (GLdouble x, GLdouble y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2DVARBPROC) (const GLdouble* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2FARBPROC) (GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2FVARBPROC) (const GLfloat* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2IARBPROC) (GLint x, GLint y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2IVARBPROC) (const GLint* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2SARBPROC) (GLshort x, GLshort y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2SVARBPROC) (const GLshort* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3DARBPROC) (GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3DVARBPROC) (const GLdouble* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3FARBPROC) (GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3FVARBPROC) (const GLfloat* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3IARBPROC) (GLint x, GLint y, GLint z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3IVARBPROC) (const GLint* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3SARBPROC) (GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3SVARBPROC) (const GLshort* p);
+
+#define glWindowPos2dARB GLEW_GET_FUN(__glewWindowPos2dARB)
+#define glWindowPos2dvARB GLEW_GET_FUN(__glewWindowPos2dvARB)
+#define glWindowPos2fARB GLEW_GET_FUN(__glewWindowPos2fARB)
+#define glWindowPos2fvARB GLEW_GET_FUN(__glewWindowPos2fvARB)
+#define glWindowPos2iARB GLEW_GET_FUN(__glewWindowPos2iARB)
+#define glWindowPos2ivARB GLEW_GET_FUN(__glewWindowPos2ivARB)
+#define glWindowPos2sARB GLEW_GET_FUN(__glewWindowPos2sARB)
+#define glWindowPos2svARB GLEW_GET_FUN(__glewWindowPos2svARB)
+#define glWindowPos3dARB GLEW_GET_FUN(__glewWindowPos3dARB)
+#define glWindowPos3dvARB GLEW_GET_FUN(__glewWindowPos3dvARB)
+#define glWindowPos3fARB GLEW_GET_FUN(__glewWindowPos3fARB)
+#define glWindowPos3fvARB GLEW_GET_FUN(__glewWindowPos3fvARB)
+#define glWindowPos3iARB GLEW_GET_FUN(__glewWindowPos3iARB)
+#define glWindowPos3ivARB GLEW_GET_FUN(__glewWindowPos3ivARB)
+#define glWindowPos3sARB GLEW_GET_FUN(__glewWindowPos3sARB)
+#define glWindowPos3svARB GLEW_GET_FUN(__glewWindowPos3svARB)
+
+#define GLEW_ARB_window_pos GLEW_GET_VAR(__GLEW_ARB_window_pos)
+
+#endif /* GL_ARB_window_pos */
+
+/* ------------------------- GL_ATIX_point_sprites ------------------------- */
+
+#ifndef GL_ATIX_point_sprites
+#define GL_ATIX_point_sprites 1
+
+#define GL_TEXTURE_POINT_MODE_ATIX 0x60B0
+#define GL_TEXTURE_POINT_ONE_COORD_ATIX 0x60B1
+#define GL_TEXTURE_POINT_SPRITE_ATIX 0x60B2
+#define GL_POINT_SPRITE_CULL_MODE_ATIX 0x60B3
+#define GL_POINT_SPRITE_CULL_CENTER_ATIX 0x60B4
+#define GL_POINT_SPRITE_CULL_CLIP_ATIX 0x60B5
+
+#define GLEW_ATIX_point_sprites GLEW_GET_VAR(__GLEW_ATIX_point_sprites)
+
+#endif /* GL_ATIX_point_sprites */
+
+/* ---------------------- GL_ATIX_texture_env_combine3 --------------------- */
+
+#ifndef GL_ATIX_texture_env_combine3
+#define GL_ATIX_texture_env_combine3 1
+
+#define GL_MODULATE_ADD_ATIX 0x8744
+#define GL_MODULATE_SIGNED_ADD_ATIX 0x8745
+#define GL_MODULATE_SUBTRACT_ATIX 0x8746
+
+#define GLEW_ATIX_texture_env_combine3 GLEW_GET_VAR(__GLEW_ATIX_texture_env_combine3)
+
+#endif /* GL_ATIX_texture_env_combine3 */
+
+/* ----------------------- GL_ATIX_texture_env_route ----------------------- */
+
+#ifndef GL_ATIX_texture_env_route
+#define GL_ATIX_texture_env_route 1
+
+#define GL_SECONDARY_COLOR_ATIX 0x8747
+#define GL_TEXTURE_OUTPUT_RGB_ATIX 0x8748
+#define GL_TEXTURE_OUTPUT_ALPHA_ATIX 0x8749
+
+#define GLEW_ATIX_texture_env_route GLEW_GET_VAR(__GLEW_ATIX_texture_env_route)
+
+#endif /* GL_ATIX_texture_env_route */
+
+/* ---------------- GL_ATIX_vertex_shader_output_point_size ---------------- */
+
+#ifndef GL_ATIX_vertex_shader_output_point_size
+#define GL_ATIX_vertex_shader_output_point_size 1
+
+#define GL_OUTPUT_POINT_SIZE_ATIX 0x610E
+
+#define GLEW_ATIX_vertex_shader_output_point_size GLEW_GET_VAR(__GLEW_ATIX_vertex_shader_output_point_size)
+
+#endif /* GL_ATIX_vertex_shader_output_point_size */
+
+/* -------------------------- GL_ATI_draw_buffers -------------------------- */
+
+#ifndef GL_ATI_draw_buffers
+#define GL_ATI_draw_buffers 1
+
+#define GL_MAX_DRAW_BUFFERS_ATI 0x8824
+#define GL_DRAW_BUFFER0_ATI 0x8825
+#define GL_DRAW_BUFFER1_ATI 0x8826
+#define GL_DRAW_BUFFER2_ATI 0x8827
+#define GL_DRAW_BUFFER3_ATI 0x8828
+#define GL_DRAW_BUFFER4_ATI 0x8829
+#define GL_DRAW_BUFFER5_ATI 0x882A
+#define GL_DRAW_BUFFER6_ATI 0x882B
+#define GL_DRAW_BUFFER7_ATI 0x882C
+#define GL_DRAW_BUFFER8_ATI 0x882D
+#define GL_DRAW_BUFFER9_ATI 0x882E
+#define GL_DRAW_BUFFER10_ATI 0x882F
+#define GL_DRAW_BUFFER11_ATI 0x8830
+#define GL_DRAW_BUFFER12_ATI 0x8831
+#define GL_DRAW_BUFFER13_ATI 0x8832
+#define GL_DRAW_BUFFER14_ATI 0x8833
+#define GL_DRAW_BUFFER15_ATI 0x8834
+
+typedef void (GLAPIENTRY * PFNGLDRAWBUFFERSATIPROC) (GLsizei n, const GLenum* bufs);
+
+#define glDrawBuffersATI GLEW_GET_FUN(__glewDrawBuffersATI)
+
+#define GLEW_ATI_draw_buffers GLEW_GET_VAR(__GLEW_ATI_draw_buffers)
+
+#endif /* GL_ATI_draw_buffers */
+
+/* -------------------------- GL_ATI_element_array ------------------------- */
+
+#ifndef GL_ATI_element_array
+#define GL_ATI_element_array 1
+
+#define GL_ELEMENT_ARRAY_ATI 0x8768
+#define GL_ELEMENT_ARRAY_TYPE_ATI 0x8769
+#define GL_ELEMENT_ARRAY_POINTER_ATI 0x876A
+
+typedef void (GLAPIENTRY * PFNGLDRAWELEMENTARRAYATIPROC) (GLenum mode, GLsizei count);
+typedef void (GLAPIENTRY * PFNGLDRAWRANGEELEMENTARRAYATIPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count);
+typedef void (GLAPIENTRY * PFNGLELEMENTPOINTERATIPROC) (GLenum type, const void* pointer);
+
+#define glDrawElementArrayATI GLEW_GET_FUN(__glewDrawElementArrayATI)
+#define glDrawRangeElementArrayATI GLEW_GET_FUN(__glewDrawRangeElementArrayATI)
+#define glElementPointerATI GLEW_GET_FUN(__glewElementPointerATI)
+
+#define GLEW_ATI_element_array GLEW_GET_VAR(__GLEW_ATI_element_array)
+
+#endif /* GL_ATI_element_array */
+
+/* ------------------------- GL_ATI_envmap_bumpmap ------------------------- */
+
+#ifndef GL_ATI_envmap_bumpmap
+#define GL_ATI_envmap_bumpmap 1
+
+#define GL_BUMP_ROT_MATRIX_ATI 0x8775
+#define GL_BUMP_ROT_MATRIX_SIZE_ATI 0x8776
+#define GL_BUMP_NUM_TEX_UNITS_ATI 0x8777
+#define GL_BUMP_TEX_UNITS_ATI 0x8778
+#define GL_DUDV_ATI 0x8779
+#define GL_DU8DV8_ATI 0x877A
+#define GL_BUMP_ENVMAP_ATI 0x877B
+#define GL_BUMP_TARGET_ATI 0x877C
+
+typedef void (GLAPIENTRY * PFNGLGETTEXBUMPPARAMETERFVATIPROC) (GLenum pname, GLfloat *param);
+typedef void (GLAPIENTRY * PFNGLGETTEXBUMPPARAMETERIVATIPROC) (GLenum pname, GLint *param);
+typedef void (GLAPIENTRY * PFNGLTEXBUMPPARAMETERFVATIPROC) (GLenum pname, GLfloat *param);
+typedef void (GLAPIENTRY * PFNGLTEXBUMPPARAMETERIVATIPROC) (GLenum pname, GLint *param);
+
+#define glGetTexBumpParameterfvATI GLEW_GET_FUN(__glewGetTexBumpParameterfvATI)
+#define glGetTexBumpParameterivATI GLEW_GET_FUN(__glewGetTexBumpParameterivATI)
+#define glTexBumpParameterfvATI GLEW_GET_FUN(__glewTexBumpParameterfvATI)
+#define glTexBumpParameterivATI GLEW_GET_FUN(__glewTexBumpParameterivATI)
+
+#define GLEW_ATI_envmap_bumpmap GLEW_GET_VAR(__GLEW_ATI_envmap_bumpmap)
+
+#endif /* GL_ATI_envmap_bumpmap */
+
+/* ------------------------- GL_ATI_fragment_shader ------------------------ */
+
+#ifndef GL_ATI_fragment_shader
+#define GL_ATI_fragment_shader 1
+
+#define GL_RED_BIT_ATI 0x00000001
+#define GL_2X_BIT_ATI 0x00000001
+#define GL_4X_BIT_ATI 0x00000002
+#define GL_GREEN_BIT_ATI 0x00000002
+#define GL_COMP_BIT_ATI 0x00000002
+#define GL_BLUE_BIT_ATI 0x00000004
+#define GL_8X_BIT_ATI 0x00000004
+#define GL_NEGATE_BIT_ATI 0x00000004
+#define GL_BIAS_BIT_ATI 0x00000008
+#define GL_HALF_BIT_ATI 0x00000008
+#define GL_QUARTER_BIT_ATI 0x00000010
+#define GL_EIGHTH_BIT_ATI 0x00000020
+#define GL_SATURATE_BIT_ATI 0x00000040
+#define GL_FRAGMENT_SHADER_ATI 0x8920
+#define GL_REG_0_ATI 0x8921
+#define GL_REG_1_ATI 0x8922
+#define GL_REG_2_ATI 0x8923
+#define GL_REG_3_ATI 0x8924
+#define GL_REG_4_ATI 0x8925
+#define GL_REG_5_ATI 0x8926
+#define GL_CON_0_ATI 0x8941
+#define GL_CON_1_ATI 0x8942
+#define GL_CON_2_ATI 0x8943
+#define GL_CON_3_ATI 0x8944
+#define GL_CON_4_ATI 0x8945
+#define GL_CON_5_ATI 0x8946
+#define GL_CON_6_ATI 0x8947
+#define GL_CON_7_ATI 0x8948
+#define GL_MOV_ATI 0x8961
+#define GL_ADD_ATI 0x8963
+#define GL_MUL_ATI 0x8964
+#define GL_SUB_ATI 0x8965
+#define GL_DOT3_ATI 0x8966
+#define GL_DOT4_ATI 0x8967
+#define GL_MAD_ATI 0x8968
+#define GL_LERP_ATI 0x8969
+#define GL_CND_ATI 0x896A
+#define GL_CND0_ATI 0x896B
+#define GL_DOT2_ADD_ATI 0x896C
+#define GL_SECONDARY_INTERPOLATOR_ATI 0x896D
+#define GL_NUM_FRAGMENT_REGISTERS_ATI 0x896E
+#define GL_NUM_FRAGMENT_CONSTANTS_ATI 0x896F
+#define GL_NUM_PASSES_ATI 0x8970
+#define GL_NUM_INSTRUCTIONS_PER_PASS_ATI 0x8971
+#define GL_NUM_INSTRUCTIONS_TOTAL_ATI 0x8972
+#define GL_NUM_INPUT_INTERPOLATOR_COMPONENTS_ATI 0x8973
+#define GL_NUM_LOOPBACK_COMPONENTS_ATI 0x8974
+#define GL_COLOR_ALPHA_PAIRING_ATI 0x8975
+#define GL_SWIZZLE_STR_ATI 0x8976
+#define GL_SWIZZLE_STQ_ATI 0x8977
+#define GL_SWIZZLE_STR_DR_ATI 0x8978
+#define GL_SWIZZLE_STQ_DQ_ATI 0x8979
+#define GL_SWIZZLE_STRQ_ATI 0x897A
+#define GL_SWIZZLE_STRQ_DQ_ATI 0x897B
+
+typedef void (GLAPIENTRY * PFNGLALPHAFRAGMENTOP1ATIPROC) (GLenum op, GLuint dst, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod);
+typedef void (GLAPIENTRY * PFNGLALPHAFRAGMENTOP2ATIPROC) (GLenum op, GLuint dst, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod);
+typedef void (GLAPIENTRY * PFNGLALPHAFRAGMENTOP3ATIPROC) (GLenum op, GLuint dst, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod, GLuint arg3, GLuint arg3Rep, GLuint arg3Mod);
+typedef void (GLAPIENTRY * PFNGLBEGINFRAGMENTSHADERATIPROC) (void);
+typedef void (GLAPIENTRY * PFNGLBINDFRAGMENTSHADERATIPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLCOLORFRAGMENTOP1ATIPROC) (GLenum op, GLuint dst, GLuint dstMask, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod);
+typedef void (GLAPIENTRY * PFNGLCOLORFRAGMENTOP2ATIPROC) (GLenum op, GLuint dst, GLuint dstMask, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod);
+typedef void (GLAPIENTRY * PFNGLCOLORFRAGMENTOP3ATIPROC) (GLenum op, GLuint dst, GLuint dstMask, GLuint dstMod, GLuint arg1, GLuint arg1Rep, GLuint arg1Mod, GLuint arg2, GLuint arg2Rep, GLuint arg2Mod, GLuint arg3, GLuint arg3Rep, GLuint arg3Mod);
+typedef void (GLAPIENTRY * PFNGLDELETEFRAGMENTSHADERATIPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLENDFRAGMENTSHADERATIPROC) (void);
+typedef GLuint (GLAPIENTRY * PFNGLGENFRAGMENTSHADERSATIPROC) (GLuint range);
+typedef void (GLAPIENTRY * PFNGLPASSTEXCOORDATIPROC) (GLuint dst, GLuint coord, GLenum swizzle);
+typedef void (GLAPIENTRY * PFNGLSAMPLEMAPATIPROC) (GLuint dst, GLuint interp, GLenum swizzle);
+typedef void (GLAPIENTRY * PFNGLSETFRAGMENTSHADERCONSTANTATIPROC) (GLuint dst, const GLfloat* value);
+
+#define glAlphaFragmentOp1ATI GLEW_GET_FUN(__glewAlphaFragmentOp1ATI)
+#define glAlphaFragmentOp2ATI GLEW_GET_FUN(__glewAlphaFragmentOp2ATI)
+#define glAlphaFragmentOp3ATI GLEW_GET_FUN(__glewAlphaFragmentOp3ATI)
+#define glBeginFragmentShaderATI GLEW_GET_FUN(__glewBeginFragmentShaderATI)
+#define glBindFragmentShaderATI GLEW_GET_FUN(__glewBindFragmentShaderATI)
+#define glColorFragmentOp1ATI GLEW_GET_FUN(__glewColorFragmentOp1ATI)
+#define glColorFragmentOp2ATI GLEW_GET_FUN(__glewColorFragmentOp2ATI)
+#define glColorFragmentOp3ATI GLEW_GET_FUN(__glewColorFragmentOp3ATI)
+#define glDeleteFragmentShaderATI GLEW_GET_FUN(__glewDeleteFragmentShaderATI)
+#define glEndFragmentShaderATI GLEW_GET_FUN(__glewEndFragmentShaderATI)
+#define glGenFragmentShadersATI GLEW_GET_FUN(__glewGenFragmentShadersATI)
+#define glPassTexCoordATI GLEW_GET_FUN(__glewPassTexCoordATI)
+#define glSampleMapATI GLEW_GET_FUN(__glewSampleMapATI)
+#define glSetFragmentShaderConstantATI GLEW_GET_FUN(__glewSetFragmentShaderConstantATI)
+
+#define GLEW_ATI_fragment_shader GLEW_GET_VAR(__GLEW_ATI_fragment_shader)
+
+#endif /* GL_ATI_fragment_shader */
+
+/* ------------------------ GL_ATI_map_object_buffer ----------------------- */
+
+#ifndef GL_ATI_map_object_buffer
+#define GL_ATI_map_object_buffer 1
+
+typedef void* (GLAPIENTRY * PFNGLMAPOBJECTBUFFERATIPROC) (GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLUNMAPOBJECTBUFFERATIPROC) (GLuint buffer);
+
+#define glMapObjectBufferATI GLEW_GET_FUN(__glewMapObjectBufferATI)
+#define glUnmapObjectBufferATI GLEW_GET_FUN(__glewUnmapObjectBufferATI)
+
+#define GLEW_ATI_map_object_buffer GLEW_GET_VAR(__GLEW_ATI_map_object_buffer)
+
+#endif /* GL_ATI_map_object_buffer */
+
+/* -------------------------- GL_ATI_pn_triangles -------------------------- */
+
+#ifndef GL_ATI_pn_triangles
+#define GL_ATI_pn_triangles 1
+
+#define GL_PN_TRIANGLES_ATI 0x87F0
+#define GL_MAX_PN_TRIANGLES_TESSELATION_LEVEL_ATI 0x87F1
+#define GL_PN_TRIANGLES_POINT_MODE_ATI 0x87F2
+#define GL_PN_TRIANGLES_NORMAL_MODE_ATI 0x87F3
+#define GL_PN_TRIANGLES_TESSELATION_LEVEL_ATI 0x87F4
+#define GL_PN_TRIANGLES_POINT_MODE_LINEAR_ATI 0x87F5
+#define GL_PN_TRIANGLES_POINT_MODE_CUBIC_ATI 0x87F6
+#define GL_PN_TRIANGLES_NORMAL_MODE_LINEAR_ATI 0x87F7
+#define GL_PN_TRIANGLES_NORMAL_MODE_QUADRATIC_ATI 0x87F8
+
+typedef void (GLAPIENTRY * PFNGLPNTRIANGLESFATIPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLPNTRIANGLESIATIPROC) (GLenum pname, GLint param);
+
+#define glPNTrianglesfATI GLEW_GET_FUN(__glPNTrianglewesfATI)
+#define glPNTrianglesiATI GLEW_GET_FUN(__glPNTrianglewesiATI)
+
+#define GLEW_ATI_pn_triangles GLEW_GET_VAR(__GLEW_ATI_pn_triangles)
+
+#endif /* GL_ATI_pn_triangles */
+
+/* ------------------------ GL_ATI_separate_stencil ------------------------ */
+
+#ifndef GL_ATI_separate_stencil
+#define GL_ATI_separate_stencil 1
+
+#define GL_STENCIL_BACK_FUNC_ATI 0x8800
+#define GL_STENCIL_BACK_FAIL_ATI 0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL_ATI 0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS_ATI 0x8803
+
+typedef void (GLAPIENTRY * PFNGLSTENCILFUNCSEPARATEATIPROC) (GLenum frontfunc, GLenum backfunc, GLint ref, GLuint mask);
+typedef void (GLAPIENTRY * PFNGLSTENCILOPSEPARATEATIPROC) (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
+
+#define glStencilFuncSeparateATI GLEW_GET_FUN(__glewStencilFuncSeparateATI)
+#define glStencilOpSeparateATI GLEW_GET_FUN(__glewStencilOpSeparateATI)
+
+#define GLEW_ATI_separate_stencil GLEW_GET_VAR(__GLEW_ATI_separate_stencil)
+
+#endif /* GL_ATI_separate_stencil */
+
+/* ----------------------- GL_ATI_shader_texture_lod ----------------------- */
+
+#ifndef GL_ATI_shader_texture_lod
+#define GL_ATI_shader_texture_lod 1
+
+#define GLEW_ATI_shader_texture_lod GLEW_GET_VAR(__GLEW_ATI_shader_texture_lod)
+
+#endif /* GL_ATI_shader_texture_lod */
+
+/* ---------------------- GL_ATI_text_fragment_shader ---------------------- */
+
+#ifndef GL_ATI_text_fragment_shader
+#define GL_ATI_text_fragment_shader 1
+
+#define GL_TEXT_FRAGMENT_SHADER_ATI 0x8200
+
+#define GLEW_ATI_text_fragment_shader GLEW_GET_VAR(__GLEW_ATI_text_fragment_shader)
+
+#endif /* GL_ATI_text_fragment_shader */
+
+/* --------------------- GL_ATI_texture_compression_3dc -------------------- */
+
+#ifndef GL_ATI_texture_compression_3dc
+#define GL_ATI_texture_compression_3dc 1
+
+#define GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI 0x8837
+
+#define GLEW_ATI_texture_compression_3dc GLEW_GET_VAR(__GLEW_ATI_texture_compression_3dc)
+
+#endif /* GL_ATI_texture_compression_3dc */
+
+/* ---------------------- GL_ATI_texture_env_combine3 ---------------------- */
+
+#ifndef GL_ATI_texture_env_combine3
+#define GL_ATI_texture_env_combine3 1
+
+#define GL_MODULATE_ADD_ATI 0x8744
+#define GL_MODULATE_SIGNED_ADD_ATI 0x8745
+#define GL_MODULATE_SUBTRACT_ATI 0x8746
+
+#define GLEW_ATI_texture_env_combine3 GLEW_GET_VAR(__GLEW_ATI_texture_env_combine3)
+
+#endif /* GL_ATI_texture_env_combine3 */
+
+/* -------------------------- GL_ATI_texture_float ------------------------- */
+
+#ifndef GL_ATI_texture_float
+#define GL_ATI_texture_float 1
+
+#define GL_RGBA_FLOAT32_ATI 0x8814
+#define GL_RGB_FLOAT32_ATI 0x8815
+#define GL_ALPHA_FLOAT32_ATI 0x8816
+#define GL_INTENSITY_FLOAT32_ATI 0x8817
+#define GL_LUMINANCE_FLOAT32_ATI 0x8818
+#define GL_LUMINANCE_ALPHA_FLOAT32_ATI 0x8819
+#define GL_RGBA_FLOAT16_ATI 0x881A
+#define GL_RGB_FLOAT16_ATI 0x881B
+#define GL_ALPHA_FLOAT16_ATI 0x881C
+#define GL_INTENSITY_FLOAT16_ATI 0x881D
+#define GL_LUMINANCE_FLOAT16_ATI 0x881E
+#define GL_LUMINANCE_ALPHA_FLOAT16_ATI 0x881F
+
+#define GLEW_ATI_texture_float GLEW_GET_VAR(__GLEW_ATI_texture_float)
+
+#endif /* GL_ATI_texture_float */
+
+/* ----------------------- GL_ATI_texture_mirror_once ---------------------- */
+
+#ifndef GL_ATI_texture_mirror_once
+#define GL_ATI_texture_mirror_once 1
+
+#define GL_MIRROR_CLAMP_ATI 0x8742
+#define GL_MIRROR_CLAMP_TO_EDGE_ATI 0x8743
+
+#define GLEW_ATI_texture_mirror_once GLEW_GET_VAR(__GLEW_ATI_texture_mirror_once)
+
+#endif /* GL_ATI_texture_mirror_once */
+
+/* ----------------------- GL_ATI_vertex_array_object ---------------------- */
+
+#ifndef GL_ATI_vertex_array_object
+#define GL_ATI_vertex_array_object 1
+
+#define GL_STATIC_ATI 0x8760
+#define GL_DYNAMIC_ATI 0x8761
+#define GL_PRESERVE_ATI 0x8762
+#define GL_DISCARD_ATI 0x8763
+#define GL_OBJECT_BUFFER_SIZE_ATI 0x8764
+#define GL_OBJECT_BUFFER_USAGE_ATI 0x8765
+#define GL_ARRAY_OBJECT_BUFFER_ATI 0x8766
+#define GL_ARRAY_OBJECT_OFFSET_ATI 0x8767
+
+typedef void (GLAPIENTRY * PFNGLARRAYOBJECTATIPROC) (GLenum array, GLint size, GLenum type, GLsizei stride, GLuint buffer, GLuint offset);
+typedef void (GLAPIENTRY * PFNGLFREEOBJECTBUFFERATIPROC) (GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLGETARRAYOBJECTFVATIPROC) (GLenum array, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETARRAYOBJECTIVATIPROC) (GLenum array, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETOBJECTBUFFERFVATIPROC) (GLuint buffer, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETOBJECTBUFFERIVATIPROC) (GLuint buffer, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETVARIANTARRAYOBJECTFVATIPROC) (GLuint id, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETVARIANTARRAYOBJECTIVATIPROC) (GLuint id, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISOBJECTBUFFERATIPROC) (GLuint buffer);
+typedef GLuint (GLAPIENTRY * PFNGLNEWOBJECTBUFFERATIPROC) (GLsizei size, const void* pointer, GLenum usage);
+typedef void (GLAPIENTRY * PFNGLUPDATEOBJECTBUFFERATIPROC) (GLuint buffer, GLuint offset, GLsizei size, const void* pointer, GLenum preserve);
+typedef void (GLAPIENTRY * PFNGLVARIANTARRAYOBJECTATIPROC) (GLuint id, GLenum type, GLsizei stride, GLuint buffer, GLuint offset);
+
+#define glArrayObjectATI GLEW_GET_FUN(__glewArrayObjectATI)
+#define glFreeObjectBufferATI GLEW_GET_FUN(__glewFreeObjectBufferATI)
+#define glGetArrayObjectfvATI GLEW_GET_FUN(__glewGetArrayObjectfvATI)
+#define glGetArrayObjectivATI GLEW_GET_FUN(__glewGetArrayObjectivATI)
+#define glGetObjectBufferfvATI GLEW_GET_FUN(__glewGetObjectBufferfvATI)
+#define glGetObjectBufferivATI GLEW_GET_FUN(__glewGetObjectBufferivATI)
+#define glGetVariantArrayObjectfvATI GLEW_GET_FUN(__glewGetVariantArrayObjectfvATI)
+#define glGetVariantArrayObjectivATI GLEW_GET_FUN(__glewGetVariantArrayObjectivATI)
+#define glIsObjectBufferATI GLEW_GET_FUN(__glewIsObjectBufferATI)
+#define glNewObjectBufferATI GLEW_GET_FUN(__glewNewObjectBufferATI)
+#define glUpdateObjectBufferATI GLEW_GET_FUN(__glewUpdateObjectBufferATI)
+#define glVariantArrayObjectATI GLEW_GET_FUN(__glewVariantArrayObjectATI)
+
+#define GLEW_ATI_vertex_array_object GLEW_GET_VAR(__GLEW_ATI_vertex_array_object)
+
+#endif /* GL_ATI_vertex_array_object */
+
+/* ------------------- GL_ATI_vertex_attrib_array_object ------------------- */
+
+#ifndef GL_ATI_vertex_attrib_array_object
+#define GL_ATI_vertex_attrib_array_object 1
+
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBARRAYOBJECTFVATIPROC) (GLuint index, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBARRAYOBJECTIVATIPROC) (GLuint index, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBARRAYOBJECTATIPROC) (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, GLuint buffer, GLuint offset);
+
+#define glGetVertexAttribArrayObjectfvATI GLEW_GET_FUN(__glewGetVertexAttribArrayObjectfvATI)
+#define glGetVertexAttribArrayObjectivATI GLEW_GET_FUN(__glewGetVertexAttribArrayObjectivATI)
+#define glVertexAttribArrayObjectATI GLEW_GET_FUN(__glewVertexAttribArrayObjectATI)
+
+#define GLEW_ATI_vertex_attrib_array_object GLEW_GET_VAR(__GLEW_ATI_vertex_attrib_array_object)
+
+#endif /* GL_ATI_vertex_attrib_array_object */
+
+/* ------------------------- GL_ATI_vertex_streams ------------------------- */
+
+#ifndef GL_ATI_vertex_streams
+#define GL_ATI_vertex_streams 1
+
+#define GL_MAX_VERTEX_STREAMS_ATI 0x876B
+#define GL_VERTEX_SOURCE_ATI 0x876C
+#define GL_VERTEX_STREAM0_ATI 0x876D
+#define GL_VERTEX_STREAM1_ATI 0x876E
+#define GL_VERTEX_STREAM2_ATI 0x876F
+#define GL_VERTEX_STREAM3_ATI 0x8770
+#define GL_VERTEX_STREAM4_ATI 0x8771
+#define GL_VERTEX_STREAM5_ATI 0x8772
+#define GL_VERTEX_STREAM6_ATI 0x8773
+#define GL_VERTEX_STREAM7_ATI 0x8774
+
+typedef void (GLAPIENTRY * PFNGLCLIENTACTIVEVERTEXSTREAMATIPROC) (GLenum stream);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3BATIPROC) (GLenum stream, GLbyte x, GLbyte y, GLbyte z);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3BVATIPROC) (GLenum stream, const GLbyte *v);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3DATIPROC) (GLenum stream, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3DVATIPROC) (GLenum stream, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3FATIPROC) (GLenum stream, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3FVATIPROC) (GLenum stream, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3IATIPROC) (GLenum stream, GLint x, GLint y, GLint z);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3IVATIPROC) (GLenum stream, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3SATIPROC) (GLenum stream, GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLNORMALSTREAM3SVATIPROC) (GLenum stream, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXBLENDENVFATIPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLVERTEXBLENDENVIATIPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2DATIPROC) (GLenum stream, GLdouble x, GLdouble y);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2DVATIPROC) (GLenum stream, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2FATIPROC) (GLenum stream, GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2FVATIPROC) (GLenum stream, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2IATIPROC) (GLenum stream, GLint x, GLint y);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2IVATIPROC) (GLenum stream, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2SATIPROC) (GLenum stream, GLshort x, GLshort y);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM2SVATIPROC) (GLenum stream, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3DATIPROC) (GLenum stream, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3DVATIPROC) (GLenum stream, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3FATIPROC) (GLenum stream, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3FVATIPROC) (GLenum stream, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3IATIPROC) (GLenum stream, GLint x, GLint y, GLint z);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3IVATIPROC) (GLenum stream, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3SATIPROC) (GLenum stream, GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM3SVATIPROC) (GLenum stream, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4DATIPROC) (GLenum stream, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4DVATIPROC) (GLenum stream, const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4FATIPROC) (GLenum stream, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4FVATIPROC) (GLenum stream, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4IATIPROC) (GLenum stream, GLint x, GLint y, GLint z, GLint w);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4IVATIPROC) (GLenum stream, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4SATIPROC) (GLenum stream, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (GLAPIENTRY * PFNGLVERTEXSTREAM4SVATIPROC) (GLenum stream, const GLshort *v);
+
+#define glClientActiveVertexStreamATI GLEW_GET_FUN(__glewClientActiveVertexStreamATI)
+#define glNormalStream3bATI GLEW_GET_FUN(__glewNormalStream3bATI)
+#define glNormalStream3bvATI GLEW_GET_FUN(__glewNormalStream3bvATI)
+#define glNormalStream3dATI GLEW_GET_FUN(__glewNormalStream3dATI)
+#define glNormalStream3dvATI GLEW_GET_FUN(__glewNormalStream3dvATI)
+#define glNormalStream3fATI GLEW_GET_FUN(__glewNormalStream3fATI)
+#define glNormalStream3fvATI GLEW_GET_FUN(__glewNormalStream3fvATI)
+#define glNormalStream3iATI GLEW_GET_FUN(__glewNormalStream3iATI)
+#define glNormalStream3ivATI GLEW_GET_FUN(__glewNormalStream3ivATI)
+#define glNormalStream3sATI GLEW_GET_FUN(__glewNormalStream3sATI)
+#define glNormalStream3svATI GLEW_GET_FUN(__glewNormalStream3svATI)
+#define glVertexBlendEnvfATI GLEW_GET_FUN(__glewVertexBlendEnvfATI)
+#define glVertexBlendEnviATI GLEW_GET_FUN(__glewVertexBlendEnviATI)
+#define glVertexStream2dATI GLEW_GET_FUN(__glewVertexStream2dATI)
+#define glVertexStream2dvATI GLEW_GET_FUN(__glewVertexStream2dvATI)
+#define glVertexStream2fATI GLEW_GET_FUN(__glewVertexStream2fATI)
+#define glVertexStream2fvATI GLEW_GET_FUN(__glewVertexStream2fvATI)
+#define glVertexStream2iATI GLEW_GET_FUN(__glewVertexStream2iATI)
+#define glVertexStream2ivATI GLEW_GET_FUN(__glewVertexStream2ivATI)
+#define glVertexStream2sATI GLEW_GET_FUN(__glewVertexStream2sATI)
+#define glVertexStream2svATI GLEW_GET_FUN(__glewVertexStream2svATI)
+#define glVertexStream3dATI GLEW_GET_FUN(__glewVertexStream3dATI)
+#define glVertexStream3dvATI GLEW_GET_FUN(__glewVertexStream3dvATI)
+#define glVertexStream3fATI GLEW_GET_FUN(__glewVertexStream3fATI)
+#define glVertexStream3fvATI GLEW_GET_FUN(__glewVertexStream3fvATI)
+#define glVertexStream3iATI GLEW_GET_FUN(__glewVertexStream3iATI)
+#define glVertexStream3ivATI GLEW_GET_FUN(__glewVertexStream3ivATI)
+#define glVertexStream3sATI GLEW_GET_FUN(__glewVertexStream3sATI)
+#define glVertexStream3svATI GLEW_GET_FUN(__glewVertexStream3svATI)
+#define glVertexStream4dATI GLEW_GET_FUN(__glewVertexStream4dATI)
+#define glVertexStream4dvATI GLEW_GET_FUN(__glewVertexStream4dvATI)
+#define glVertexStream4fATI GLEW_GET_FUN(__glewVertexStream4fATI)
+#define glVertexStream4fvATI GLEW_GET_FUN(__glewVertexStream4fvATI)
+#define glVertexStream4iATI GLEW_GET_FUN(__glewVertexStream4iATI)
+#define glVertexStream4ivATI GLEW_GET_FUN(__glewVertexStream4ivATI)
+#define glVertexStream4sATI GLEW_GET_FUN(__glewVertexStream4sATI)
+#define glVertexStream4svATI GLEW_GET_FUN(__glewVertexStream4svATI)
+
+#define GLEW_ATI_vertex_streams GLEW_GET_VAR(__GLEW_ATI_vertex_streams)
+
+#endif /* GL_ATI_vertex_streams */
+
+/* --------------------------- GL_EXT_422_pixels --------------------------- */
+
+#ifndef GL_EXT_422_pixels
+#define GL_EXT_422_pixels 1
+
+#define GL_422_EXT 0x80CC
+#define GL_422_REV_EXT 0x80CD
+#define GL_422_AVERAGE_EXT 0x80CE
+#define GL_422_REV_AVERAGE_EXT 0x80CF
+
+#define GLEW_EXT_422_pixels GLEW_GET_VAR(__GLEW_EXT_422_pixels)
+
+#endif /* GL_EXT_422_pixels */
+
+/* ---------------------------- GL_EXT_Cg_shader --------------------------- */
+
+#ifndef GL_EXT_Cg_shader
+#define GL_EXT_Cg_shader 1
+
+#define GL_CG_VERTEX_SHADER_EXT 0x890E
+#define GL_CG_FRAGMENT_SHADER_EXT 0x890F
+
+#define GLEW_EXT_Cg_shader GLEW_GET_VAR(__GLEW_EXT_Cg_shader)
+
+#endif /* GL_EXT_Cg_shader */
+
+/* ------------------------------ GL_EXT_abgr ------------------------------ */
+
+#ifndef GL_EXT_abgr
+#define GL_EXT_abgr 1
+
+#define GL_ABGR_EXT 0x8000
+
+#define GLEW_EXT_abgr GLEW_GET_VAR(__GLEW_EXT_abgr)
+
+#endif /* GL_EXT_abgr */
+
+/* ------------------------------ GL_EXT_bgra ------------------------------ */
+
+#ifndef GL_EXT_bgra
+#define GL_EXT_bgra 1
+
+#define GL_BGR_EXT 0x80E0
+#define GL_BGRA_EXT 0x80E1
+
+#define GLEW_EXT_bgra GLEW_GET_VAR(__GLEW_EXT_bgra)
+
+#endif /* GL_EXT_bgra */
+
+/* ------------------------ GL_EXT_bindable_uniform ------------------------ */
+
+#ifndef GL_EXT_bindable_uniform
+#define GL_EXT_bindable_uniform 1
+
+#define GL_MAX_VERTEX_BINDABLE_UNIFORMS_EXT 0x8DE2
+#define GL_MAX_FRAGMENT_BINDABLE_UNIFORMS_EXT 0x8DE3
+#define GL_MAX_GEOMETRY_BINDABLE_UNIFORMS_EXT 0x8DE4
+#define GL_MAX_BINDABLE_UNIFORM_SIZE_EXT 0x8DED
+#define GL_UNIFORM_BUFFER_EXT 0x8DEE
+#define GL_UNIFORM_BUFFER_BINDING_EXT 0x8DEF
+
+typedef GLint (GLAPIENTRY * PFNGLGETUNIFORMBUFFERSIZEEXTPROC) (GLuint program, GLint location);
+typedef GLintptr (GLAPIENTRY * PFNGLGETUNIFORMOFFSETEXTPROC) (GLuint program, GLint location);
+typedef void (GLAPIENTRY * PFNGLUNIFORMBUFFEREXTPROC) (GLuint program, GLint location, GLuint buffer);
+
+#define glGetUniformBufferSizeEXT GLEW_GET_FUN(__glewGetUniformBufferSizeEXT)
+#define glGetUniformOffsetEXT GLEW_GET_FUN(__glewGetUniformOffsetEXT)
+#define glUniformBufferEXT GLEW_GET_FUN(__glewUniformBufferEXT)
+
+#define GLEW_EXT_bindable_uniform GLEW_GET_VAR(__GLEW_EXT_bindable_uniform)
+
+#endif /* GL_EXT_bindable_uniform */
+
+/* --------------------------- GL_EXT_blend_color -------------------------- */
+
+#ifndef GL_EXT_blend_color
+#define GL_EXT_blend_color 1
+
+#define GL_CONSTANT_COLOR_EXT 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR_EXT 0x8002
+#define GL_CONSTANT_ALPHA_EXT 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA_EXT 0x8004
+#define GL_BLEND_COLOR_EXT 0x8005
+
+typedef void (GLAPIENTRY * PFNGLBLENDCOLOREXTPROC) (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+
+#define glBlendColorEXT GLEW_GET_FUN(__glewBlendColorEXT)
+
+#define GLEW_EXT_blend_color GLEW_GET_VAR(__GLEW_EXT_blend_color)
+
+#endif /* GL_EXT_blend_color */
+
+/* --------------------- GL_EXT_blend_equation_separate -------------------- */
+
+#ifndef GL_EXT_blend_equation_separate
+#define GL_EXT_blend_equation_separate 1
+
+#define GL_BLEND_EQUATION_RGB_EXT 0x8009
+#define GL_BLEND_EQUATION_ALPHA_EXT 0x883D
+
+typedef void (GLAPIENTRY * PFNGLBLENDEQUATIONSEPARATEEXTPROC) (GLenum modeRGB, GLenum modeAlpha);
+
+#define glBlendEquationSeparateEXT GLEW_GET_FUN(__glewBlendEquationSeparateEXT)
+
+#define GLEW_EXT_blend_equation_separate GLEW_GET_VAR(__GLEW_EXT_blend_equation_separate)
+
+#endif /* GL_EXT_blend_equation_separate */
+
+/* ----------------------- GL_EXT_blend_func_separate ---------------------- */
+
+#ifndef GL_EXT_blend_func_separate
+#define GL_EXT_blend_func_separate 1
+
+#define GL_BLEND_DST_RGB_EXT 0x80C8
+#define GL_BLEND_SRC_RGB_EXT 0x80C9
+#define GL_BLEND_DST_ALPHA_EXT 0x80CA
+#define GL_BLEND_SRC_ALPHA_EXT 0x80CB
+
+typedef void (GLAPIENTRY * PFNGLBLENDFUNCSEPARATEEXTPROC) (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
+
+#define glBlendFuncSeparateEXT GLEW_GET_FUN(__glewBlendFuncSeparateEXT)
+
+#define GLEW_EXT_blend_func_separate GLEW_GET_VAR(__GLEW_EXT_blend_func_separate)
+
+#endif /* GL_EXT_blend_func_separate */
+
+/* ------------------------- GL_EXT_blend_logic_op ------------------------- */
+
+#ifndef GL_EXT_blend_logic_op
+#define GL_EXT_blend_logic_op 1
+
+#define GLEW_EXT_blend_logic_op GLEW_GET_VAR(__GLEW_EXT_blend_logic_op)
+
+#endif /* GL_EXT_blend_logic_op */
+
+/* -------------------------- GL_EXT_blend_minmax -------------------------- */
+
+#ifndef GL_EXT_blend_minmax
+#define GL_EXT_blend_minmax 1
+
+#define GL_FUNC_ADD_EXT 0x8006
+#define GL_MIN_EXT 0x8007
+#define GL_MAX_EXT 0x8008
+#define GL_BLEND_EQUATION_EXT 0x8009
+
+typedef void (GLAPIENTRY * PFNGLBLENDEQUATIONEXTPROC) (GLenum mode);
+
+#define glBlendEquationEXT GLEW_GET_FUN(__glewBlendEquationEXT)
+
+#define GLEW_EXT_blend_minmax GLEW_GET_VAR(__GLEW_EXT_blend_minmax)
+
+#endif /* GL_EXT_blend_minmax */
+
+/* ------------------------- GL_EXT_blend_subtract ------------------------- */
+
+#ifndef GL_EXT_blend_subtract
+#define GL_EXT_blend_subtract 1
+
+#define GL_FUNC_SUBTRACT_EXT 0x800A
+#define GL_FUNC_REVERSE_SUBTRACT_EXT 0x800B
+
+#define GLEW_EXT_blend_subtract GLEW_GET_VAR(__GLEW_EXT_blend_subtract)
+
+#endif /* GL_EXT_blend_subtract */
+
+/* ------------------------ GL_EXT_clip_volume_hint ------------------------ */
+
+#ifndef GL_EXT_clip_volume_hint
+#define GL_EXT_clip_volume_hint 1
+
+#define GL_CLIP_VOLUME_CLIPPING_HINT_EXT 0x80F0
+
+#define GLEW_EXT_clip_volume_hint GLEW_GET_VAR(__GLEW_EXT_clip_volume_hint)
+
+#endif /* GL_EXT_clip_volume_hint */
+
+/* ------------------------------ GL_EXT_cmyka ----------------------------- */
+
+#ifndef GL_EXT_cmyka
+#define GL_EXT_cmyka 1
+
+#define GL_CMYK_EXT 0x800C
+#define GL_CMYKA_EXT 0x800D
+#define GL_PACK_CMYK_HINT_EXT 0x800E
+#define GL_UNPACK_CMYK_HINT_EXT 0x800F
+
+#define GLEW_EXT_cmyka GLEW_GET_VAR(__GLEW_EXT_cmyka)
+
+#endif /* GL_EXT_cmyka */
+
+/* ------------------------- GL_EXT_color_subtable ------------------------- */
+
+#ifndef GL_EXT_color_subtable
+#define GL_EXT_color_subtable 1
+
+typedef void (GLAPIENTRY * PFNGLCOLORSUBTABLEEXTPROC) (GLenum target, GLsizei start, GLsizei count, GLenum format, GLenum type, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOPYCOLORSUBTABLEEXTPROC) (GLenum target, GLsizei start, GLint x, GLint y, GLsizei width);
+
+#define glColorSubTableEXT GLEW_GET_FUN(__glewColorSubTableEXT)
+#define glCopyColorSubTableEXT GLEW_GET_FUN(__glewCopyColorSubTableEXT)
+
+#define GLEW_EXT_color_subtable GLEW_GET_VAR(__GLEW_EXT_color_subtable)
+
+#endif /* GL_EXT_color_subtable */
+
+/* ---------------------- GL_EXT_compiled_vertex_array --------------------- */
+
+#ifndef GL_EXT_compiled_vertex_array
+#define GL_EXT_compiled_vertex_array 1
+
+#define GL_ARRAY_ELEMENT_LOCK_FIRST_EXT 0x81A8
+#define GL_ARRAY_ELEMENT_LOCK_COUNT_EXT 0x81A9
+
+typedef void (GLAPIENTRY * PFNGLLOCKARRAYSEXTPROC) (GLint first, GLsizei count);
+typedef void (GLAPIENTRY * PFNGLUNLOCKARRAYSEXTPROC) (void);
+
+#define glLockArraysEXT GLEW_GET_FUN(__glewLockArraysEXT)
+#define glUnlockArraysEXT GLEW_GET_FUN(__glewUnlockArraysEXT)
+
+#define GLEW_EXT_compiled_vertex_array GLEW_GET_VAR(__GLEW_EXT_compiled_vertex_array)
+
+#endif /* GL_EXT_compiled_vertex_array */
+
+/* --------------------------- GL_EXT_convolution -------------------------- */
+
+#ifndef GL_EXT_convolution
+#define GL_EXT_convolution 1
+
+#define GL_CONVOLUTION_1D_EXT 0x8010
+#define GL_CONVOLUTION_2D_EXT 0x8011
+#define GL_SEPARABLE_2D_EXT 0x8012
+#define GL_CONVOLUTION_BORDER_MODE_EXT 0x8013
+#define GL_CONVOLUTION_FILTER_SCALE_EXT 0x8014
+#define GL_CONVOLUTION_FILTER_BIAS_EXT 0x8015
+#define GL_REDUCE_EXT 0x8016
+#define GL_CONVOLUTION_FORMAT_EXT 0x8017
+#define GL_CONVOLUTION_WIDTH_EXT 0x8018
+#define GL_CONVOLUTION_HEIGHT_EXT 0x8019
+#define GL_MAX_CONVOLUTION_WIDTH_EXT 0x801A
+#define GL_MAX_CONVOLUTION_HEIGHT_EXT 0x801B
+#define GL_POST_CONVOLUTION_RED_SCALE_EXT 0x801C
+#define GL_POST_CONVOLUTION_GREEN_SCALE_EXT 0x801D
+#define GL_POST_CONVOLUTION_BLUE_SCALE_EXT 0x801E
+#define GL_POST_CONVOLUTION_ALPHA_SCALE_EXT 0x801F
+#define GL_POST_CONVOLUTION_RED_BIAS_EXT 0x8020
+#define GL_POST_CONVOLUTION_GREEN_BIAS_EXT 0x8021
+#define GL_POST_CONVOLUTION_BLUE_BIAS_EXT 0x8022
+#define GL_POST_CONVOLUTION_ALPHA_BIAS_EXT 0x8023
+
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONFILTER1DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const void* image);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const void* image);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERFEXTPROC) (GLenum target, GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERFVEXTPROC) (GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERIEXTPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLCONVOLUTIONPARAMETERIVEXTPROC) (GLenum target, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLGETCONVOLUTIONFILTEREXTPROC) (GLenum target, GLenum format, GLenum type, void* image);
+typedef void (GLAPIENTRY * PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETSEPARABLEFILTEREXTPROC) (GLenum target, GLenum format, GLenum type, void* row, void* column, void* span);
+typedef void (GLAPIENTRY * PFNGLSEPARABLEFILTER2DEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height, GLenum format, GLenum type, const void* row, const void* column);
+
+#define glConvolutionFilter1DEXT GLEW_GET_FUN(__glewConvolutionFilter1DEXT)
+#define glConvolutionFilter2DEXT GLEW_GET_FUN(__glewConvolutionFilter2DEXT)
+#define glConvolutionParameterfEXT GLEW_GET_FUN(__glewConvolutionParameterfEXT)
+#define glConvolutionParameterfvEXT GLEW_GET_FUN(__glewConvolutionParameterfvEXT)
+#define glConvolutionParameteriEXT GLEW_GET_FUN(__glewConvolutionParameteriEXT)
+#define glConvolutionParameterivEXT GLEW_GET_FUN(__glewConvolutionParameterivEXT)
+#define glCopyConvolutionFilter1DEXT GLEW_GET_FUN(__glewCopyConvolutionFilter1DEXT)
+#define glCopyConvolutionFilter2DEXT GLEW_GET_FUN(__glewCopyConvolutionFilter2DEXT)
+#define glGetConvolutionFilterEXT GLEW_GET_FUN(__glewGetConvolutionFilterEXT)
+#define glGetConvolutionParameterfvEXT GLEW_GET_FUN(__glewGetConvolutionParameterfvEXT)
+#define glGetConvolutionParameterivEXT GLEW_GET_FUN(__glewGetConvolutionParameterivEXT)
+#define glGetSeparableFilterEXT GLEW_GET_FUN(__glewGetSeparableFilterEXT)
+#define glSeparableFilter2DEXT GLEW_GET_FUN(__glewSeparableFilter2DEXT)
+
+#define GLEW_EXT_convolution GLEW_GET_VAR(__GLEW_EXT_convolution)
+
+#endif /* GL_EXT_convolution */
+
+/* ------------------------ GL_EXT_coordinate_frame ------------------------ */
+
+#ifndef GL_EXT_coordinate_frame
+#define GL_EXT_coordinate_frame 1
+
+#define GL_TANGENT_ARRAY_EXT 0x8439
+#define GL_BINORMAL_ARRAY_EXT 0x843A
+#define GL_CURRENT_TANGENT_EXT 0x843B
+#define GL_CURRENT_BINORMAL_EXT 0x843C
+#define GL_TANGENT_ARRAY_TYPE_EXT 0x843E
+#define GL_TANGENT_ARRAY_STRIDE_EXT 0x843F
+#define GL_BINORMAL_ARRAY_TYPE_EXT 0x8440
+#define GL_BINORMAL_ARRAY_STRIDE_EXT 0x8441
+#define GL_TANGENT_ARRAY_POINTER_EXT 0x8442
+#define GL_BINORMAL_ARRAY_POINTER_EXT 0x8443
+#define GL_MAP1_TANGENT_EXT 0x8444
+#define GL_MAP2_TANGENT_EXT 0x8445
+#define GL_MAP1_BINORMAL_EXT 0x8446
+#define GL_MAP2_BINORMAL_EXT 0x8447
+
+typedef void (GLAPIENTRY * PFNGLBINORMALPOINTEREXTPROC) (GLenum type, GLsizei stride, void* pointer);
+typedef void (GLAPIENTRY * PFNGLTANGENTPOINTEREXTPROC) (GLenum type, GLsizei stride, void* pointer);
+
+#define glBinormalPointerEXT GLEW_GET_FUN(__glewBinormalPointerEXT)
+#define glTangentPointerEXT GLEW_GET_FUN(__glewTangentPointerEXT)
+
+#define GLEW_EXT_coordinate_frame GLEW_GET_VAR(__GLEW_EXT_coordinate_frame)
+
+#endif /* GL_EXT_coordinate_frame */
+
+/* -------------------------- GL_EXT_copy_texture -------------------------- */
+
+#ifndef GL_EXT_copy_texture
+#define GL_EXT_copy_texture 1
+
+typedef void (GLAPIENTRY * PFNGLCOPYTEXIMAGE1DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXIMAGE2DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXSUBIMAGE1DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXSUBIMAGE2DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXSUBIMAGE3DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+
+#define glCopyTexImage1DEXT GLEW_GET_FUN(__glewCopyTexImage1DEXT)
+#define glCopyTexImage2DEXT GLEW_GET_FUN(__glewCopyTexImage2DEXT)
+#define glCopyTexSubImage1DEXT GLEW_GET_FUN(__glewCopyTexSubImage1DEXT)
+#define glCopyTexSubImage2DEXT GLEW_GET_FUN(__glewCopyTexSubImage2DEXT)
+#define glCopyTexSubImage3DEXT GLEW_GET_FUN(__glewCopyTexSubImage3DEXT)
+
+#define GLEW_EXT_copy_texture GLEW_GET_VAR(__GLEW_EXT_copy_texture)
+
+#endif /* GL_EXT_copy_texture */
+
+/* --------------------------- GL_EXT_cull_vertex -------------------------- */
+
+#ifndef GL_EXT_cull_vertex
+#define GL_EXT_cull_vertex 1
+
+typedef void (GLAPIENTRY * PFNGLCULLPARAMETERDVEXTPROC) (GLenum pname, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLCULLPARAMETERFVEXTPROC) (GLenum pname, GLfloat* params);
+
+#define glCullParameterdvEXT GLEW_GET_FUN(__glewCullParameterdvEXT)
+#define glCullParameterfvEXT GLEW_GET_FUN(__glewCullParameterfvEXT)
+
+#define GLEW_EXT_cull_vertex GLEW_GET_VAR(__GLEW_EXT_cull_vertex)
+
+#endif /* GL_EXT_cull_vertex */
+
+/* ------------------------ GL_EXT_depth_bounds_test ----------------------- */
+
+#ifndef GL_EXT_depth_bounds_test
+#define GL_EXT_depth_bounds_test 1
+
+#define GL_DEPTH_BOUNDS_TEST_EXT 0x8890
+#define GL_DEPTH_BOUNDS_EXT 0x8891
+
+typedef void (GLAPIENTRY * PFNGLDEPTHBOUNDSEXTPROC) (GLclampd zmin, GLclampd zmax);
+
+#define glDepthBoundsEXT GLEW_GET_FUN(__glewDepthBoundsEXT)
+
+#define GLEW_EXT_depth_bounds_test GLEW_GET_VAR(__GLEW_EXT_depth_bounds_test)
+
+#endif /* GL_EXT_depth_bounds_test */
+
+/* ----------------------- GL_EXT_direct_state_access ---------------------- */
+
+#ifndef GL_EXT_direct_state_access
+#define GL_EXT_direct_state_access 1
+
+#define GL_PROGRAM_MATRIX_EXT 0x8E2D
+#define GL_TRANSPOSE_PROGRAM_MATRIX_EXT 0x8E2E
+#define GL_PROGRAM_MATRIX_STACK_DEPTH_EXT 0x8E2F
+
+typedef void (GLAPIENTRY * PFNGLBINDMULTITEXTUREEXTPROC) (GLenum texunit, GLenum target, GLuint texture);
+typedef GLenum (GLAPIENTRY * PFNGLCHECKNAMEDFRAMEBUFFERSTATUSEXTPROC) (GLuint framebuffer, GLenum target);
+typedef void (GLAPIENTRY * PFNGLCLIENTATTRIBDEFAULTEXTPROC) (GLbitfield mask);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDMULTITEXIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDMULTITEXIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDMULTITEXIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDMULTITEXSUBIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDMULTITEXSUBIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDMULTITEXSUBIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXTUREIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXTUREIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXTUREIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXTURESUBIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXTURESUBIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOMPRESSEDTEXTURESUBIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data);
+typedef void (GLAPIENTRY * PFNGLCOPYMULTITEXIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
+typedef void (GLAPIENTRY * PFNGLCOPYMULTITEXIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+typedef void (GLAPIENTRY * PFNGLCOPYMULTITEXSUBIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLCOPYMULTITEXSUBIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLCOPYMULTITEXSUBIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXTUREIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXTUREIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXTURESUBIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXTURESUBIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLCOPYTEXTURESUBIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLDISABLECLIENTSTATEINDEXEDEXTPROC) (GLenum array, GLuint index);
+typedef void (GLAPIENTRY * PFNGLENABLECLIENTSTATEINDEXEDEXTPROC) (GLenum array, GLuint index);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERDRAWBUFFEREXTPROC) (GLuint framebuffer, GLenum mode);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERDRAWBUFFERSEXTPROC) (GLuint framebuffer, GLsizei n, const GLenum* bufs);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERREADBUFFEREXTPROC) (GLuint framebuffer, GLenum mode);
+typedef void (GLAPIENTRY * PFNGLGENERATEMULTITEXMIPMAPEXTPROC) (GLenum texunit, GLenum target);
+typedef void (GLAPIENTRY * PFNGLGENERATETEXTUREMIPMAPEXTPROC) (GLuint texture, GLenum target);
+typedef void (GLAPIENTRY * PFNGLGETCOMPRESSEDMULTITEXIMAGEEXTPROC) (GLenum texunit, GLenum target, GLint level, void* img);
+typedef void (GLAPIENTRY * PFNGLGETCOMPRESSEDTEXTUREIMAGEEXTPROC) (GLuint texture, GLenum target, GLint level, void* img);
+typedef void (GLAPIENTRY * PFNGLGETDOUBLEINDEXEDVEXTPROC) (GLenum pname, GLuint index, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETFLOATINDEXEDVEXTPROC) (GLenum pname, GLuint index, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETFRAMEBUFFERPARAMETERIVEXTPROC) (GLuint framebuffer, GLenum pname, GLint* param);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXENVFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXENVIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXGENDVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXGENFVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXGENIVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXIMAGEEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXLEVELPARAMETERFVEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXLEVELPARAMETERIVEXTPROC) (GLenum texunit, GLenum target, GLint level, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXPARAMETERIIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXPARAMETERIUIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLuint* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXPARAMETERFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETMULTITEXPARAMETERIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDBUFFERPARAMETERIVEXTPROC) (GLuint buffer, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDBUFFERPOINTERVEXTPROC) (GLuint buffer, GLenum pname, void** params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDBUFFERSUBDATAEXTPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, void* data);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDPROGRAMLOCALPARAMETERIIVEXTPROC) (GLuint program, GLenum target, GLuint index, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDPROGRAMLOCALPARAMETERIUIVEXTPROC) (GLuint program, GLenum target, GLuint index, GLuint* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDPROGRAMLOCALPARAMETERDVEXTPROC) (GLuint program, GLenum target, GLuint index, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDPROGRAMLOCALPARAMETERFVEXTPROC) (GLuint program, GLenum target, GLuint index, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDPROGRAMSTRINGEXTPROC) (GLuint program, GLenum target, GLenum pname, void* string);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDPROGRAMIVEXTPROC) (GLuint program, GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETNAMEDRENDERBUFFERPARAMETERIVEXTPROC) (GLuint renderbuffer, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETPOINTERINDEXEDVEXTPROC) (GLenum pname, GLuint index, GLvoid** params);
+typedef void (GLAPIENTRY * PFNGLGETTEXTUREIMAGEEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
+typedef void (GLAPIENTRY * PFNGLGETTEXTURELEVELPARAMETERFVEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETTEXTURELEVELPARAMETERIVEXTPROC) (GLuint texture, GLenum target, GLint level, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETTEXTUREPARAMETERIIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETTEXTUREPARAMETERIUIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLuint* params);
+typedef void (GLAPIENTRY * PFNGLGETTEXTUREPARAMETERFVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETTEXTUREPARAMETERIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLint* params);
+typedef GLvoid * (GLAPIENTRY * PFNGLMAPNAMEDBUFFEREXTPROC) (GLuint buffer, GLenum access);
+typedef void (GLAPIENTRY * PFNGLMATRIXFRUSTUMEXTPROC) (GLenum matrixMode, GLdouble l, GLdouble r, GLdouble b, GLdouble t, GLdouble n, GLdouble f);
+typedef void (GLAPIENTRY * PFNGLMATRIXLOADIDENTITYEXTPROC) (GLenum matrixMode);
+typedef void (GLAPIENTRY * PFNGLMATRIXLOADTRANSPOSEDEXTPROC) (GLenum matrixMode, const GLdouble* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXLOADTRANSPOSEFEXTPROC) (GLenum matrixMode, const GLfloat* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXLOADDEXTPROC) (GLenum matrixMode, const GLdouble* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXLOADFEXTPROC) (GLenum matrixMode, const GLfloat* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXMULTTRANSPOSEDEXTPROC) (GLenum matrixMode, const GLdouble* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXMULTTRANSPOSEFEXTPROC) (GLenum matrixMode, const GLfloat* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXMULTDEXTPROC) (GLenum matrixMode, const GLdouble* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXMULTFEXTPROC) (GLenum matrixMode, const GLfloat* m);
+typedef void (GLAPIENTRY * PFNGLMATRIXORTHOEXTPROC) (GLenum matrixMode, GLdouble l, GLdouble r, GLdouble b, GLdouble t, GLdouble n, GLdouble f);
+typedef void (GLAPIENTRY * PFNGLMATRIXPOPEXTPROC) (GLenum matrixMode);
+typedef void (GLAPIENTRY * PFNGLMATRIXPUSHEXTPROC) (GLenum matrixMode);
+typedef void (GLAPIENTRY * PFNGLMATRIXROTATEDEXTPROC) (GLenum matrixMode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLMATRIXROTATEFEXTPROC) (GLenum matrixMode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLMATRIXSCALEDEXTPROC) (GLenum matrixMode, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLMATRIXSCALEFEXTPROC) (GLenum matrixMode, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLMATRIXTRANSLATEDEXTPROC) (GLenum matrixMode, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLMATRIXTRANSLATEFEXTPROC) (GLenum matrixMode, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLMULTITEXBUFFEREXTPROC) (GLenum texunit, GLenum target, GLenum internalformat, GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORDPOINTEREXTPROC) (GLenum texunit, GLint size, GLenum type, GLsizei stride, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLMULTITEXENVFEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXENVFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLMULTITEXENVIEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXENVIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLMULTITEXGENDEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLdouble param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXGENDVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, const GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLMULTITEXGENFEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXGENFVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLMULTITEXGENIEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXGENIVEXTPROC) (GLenum texunit, GLenum coord, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLMULTITEXIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLMULTITEXIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLMULTITEXIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLMULTITEXPARAMETERIIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLMULTITEXPARAMETERIUIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLuint* params);
+typedef void (GLAPIENTRY * PFNGLMULTITEXPARAMETERFEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXPARAMETERFVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLfloat* param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXPARAMETERIEXTPROC) (GLenum texunit, GLenum target, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXPARAMETERIVEXTPROC) (GLenum texunit, GLenum target, GLenum pname, const GLint* param);
+typedef void (GLAPIENTRY * PFNGLMULTITEXRENDERBUFFEREXTPROC) (GLenum texunit, GLenum target, GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLMULTITEXSUBIMAGE1DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLMULTITEXSUBIMAGE2DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLMULTITEXSUBIMAGE3DEXTPROC) (GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLNAMEDBUFFERDATAEXTPROC) (GLuint buffer, GLsizeiptr size, const void* data, GLenum usage);
+typedef void (GLAPIENTRY * PFNGLNAMEDBUFFERSUBDATAEXTPROC) (GLuint buffer, GLintptr offset, GLsizeiptr size, const void* data);
+typedef void (GLAPIENTRY * PFNGLNAMEDFRAMEBUFFERRENDERBUFFEREXTPROC) (GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLNAMEDFRAMEBUFFERTEXTURE1DEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLNAMEDFRAMEBUFFERTEXTURE2DEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLNAMEDFRAMEBUFFERTEXTURE3DEXTPROC) (GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+typedef void (GLAPIENTRY * PFNGLNAMEDFRAMEBUFFERTEXTUREEXTPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLNAMEDFRAMEBUFFERTEXTUREFACEEXTPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLenum face);
+typedef void (GLAPIENTRY * PFNGLNAMEDFRAMEBUFFERTEXTURELAYEREXTPROC) (GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETER4DEXTPROC) (GLuint program, GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETER4DVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETER4FEXTPROC) (GLuint program, GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETER4FVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETERI4IEXTPROC) (GLuint program, GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETERI4IVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIEXTPROC) (GLuint program, GLenum target, GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIVEXTPROC) (GLuint program, GLenum target, GLuint index, const GLuint* params);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETERS4FVEXTPROC) (GLuint program, GLenum target, GLuint index, GLsizei count, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETERSI4IVEXTPROC) (GLuint program, GLenum target, GLuint index, GLsizei count, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMLOCALPARAMETERSI4UIVEXTPROC) (GLuint program, GLenum target, GLuint index, GLsizei count, const GLuint* params);
+typedef void (GLAPIENTRY * PFNGLNAMEDPROGRAMSTRINGEXTPROC) (GLuint program, GLenum target, GLenum format, GLsizei len, const void* string);
+typedef void (GLAPIENTRY * PFNGLNAMEDRENDERBUFFERSTORAGEEXTPROC) (GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLECOVERAGEEXTPROC) (GLuint renderbuffer, GLsizei coverageSamples, GLsizei colorSamples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM1FEXTPROC) (GLuint program, GLint location, GLfloat v0);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM1FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM1IEXTPROC) (GLuint program, GLint location, GLint v0);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM1IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM1UIEXTPROC) (GLuint program, GLint location, GLuint v0);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM1UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM2FEXTPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM2FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM2IEXTPROC) (GLuint program, GLint location, GLint v0, GLint v1);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM2IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM2UIEXTPROC) (GLuint program, GLint location, GLuint v0, GLuint v1);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM2UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM3FEXTPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM3FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM3IEXTPROC) (GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM3IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM3UIEXTPROC) (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM3UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM4FEXTPROC) (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM4FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM4IEXTPROC) (GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM4IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM4UIEXTPROC) (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORM4UIVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLuint* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+typedef void (GLAPIENTRY * PFNGLPUSHCLIENTATTRIBDEFAULTEXTPROC) (GLbitfield mask);
+typedef void (GLAPIENTRY * PFNGLTEXTUREBUFFEREXTPROC) (GLuint texture, GLenum target, GLenum internalformat, GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLTEXTUREIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXTUREIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXTUREIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXTUREPARAMETERIIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLTEXTUREPARAMETERIUIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLuint* params);
+typedef void (GLAPIENTRY * PFNGLTEXTUREPARAMETERFEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLTEXTUREPARAMETERFVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLfloat* param);
+typedef void (GLAPIENTRY * PFNGLTEXTUREPARAMETERIEXTPROC) (GLuint texture, GLenum target, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLTEXTUREPARAMETERIVEXTPROC) (GLuint texture, GLenum target, GLenum pname, const GLint* param);
+typedef void (GLAPIENTRY * PFNGLTEXTURERENDERBUFFEREXTPROC) (GLuint texture, GLenum target, GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLTEXTURESUBIMAGE1DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXTURESUBIMAGE2DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXTURESUBIMAGE3DEXTPROC) (GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
+typedef GLboolean (GLAPIENTRY * PFNGLUNMAPNAMEDBUFFEREXTPROC) (GLuint buffer);
+
+#define glBindMultiTextureEXT GLEW_GET_FUN(__glewBindMultiTextureEXT)
+#define glCheckNamedFramebufferStatusEXT GLEW_GET_FUN(__glewCheckNamedFramebufferStatusEXT)
+#define glClientAttribDefaultEXT GLEW_GET_FUN(__glewClientAttribDefaultEXT)
+#define glCompressedMultiTexImage1DEXT GLEW_GET_FUN(__glewCompressedMultiTexImage1DEXT)
+#define glCompressedMultiTexImage2DEXT GLEW_GET_FUN(__glewCompressedMultiTexImage2DEXT)
+#define glCompressedMultiTexImage3DEXT GLEW_GET_FUN(__glewCompressedMultiTexImage3DEXT)
+#define glCompressedMultiTexSubImage1DEXT GLEW_GET_FUN(__glewCompressedMultiTexSubImage1DEXT)
+#define glCompressedMultiTexSubImage2DEXT GLEW_GET_FUN(__glewCompressedMultiTexSubImage2DEXT)
+#define glCompressedMultiTexSubImage3DEXT GLEW_GET_FUN(__glewCompressedMultiTexSubImage3DEXT)
+#define glCompressedTextureImage1DEXT GLEW_GET_FUN(__glewCompressedTextureImage1DEXT)
+#define glCompressedTextureImage2DEXT GLEW_GET_FUN(__glewCompressedTextureImage2DEXT)
+#define glCompressedTextureImage3DEXT GLEW_GET_FUN(__glewCompressedTextureImage3DEXT)
+#define glCompressedTextureSubImage1DEXT GLEW_GET_FUN(__glewCompressedTextureSubImage1DEXT)
+#define glCompressedTextureSubImage2DEXT GLEW_GET_FUN(__glewCompressedTextureSubImage2DEXT)
+#define glCompressedTextureSubImage3DEXT GLEW_GET_FUN(__glewCompressedTextureSubImage3DEXT)
+#define glCopyMultiTexImage1DEXT GLEW_GET_FUN(__glewCopyMultiTexImage1DEXT)
+#define glCopyMultiTexImage2DEXT GLEW_GET_FUN(__glewCopyMultiTexImage2DEXT)
+#define glCopyMultiTexSubImage1DEXT GLEW_GET_FUN(__glewCopyMultiTexSubImage1DEXT)
+#define glCopyMultiTexSubImage2DEXT GLEW_GET_FUN(__glewCopyMultiTexSubImage2DEXT)
+#define glCopyMultiTexSubImage3DEXT GLEW_GET_FUN(__glewCopyMultiTexSubImage3DEXT)
+#define glCopyTextureImage1DEXT GLEW_GET_FUN(__glewCopyTextureImage1DEXT)
+#define glCopyTextureImage2DEXT GLEW_GET_FUN(__glewCopyTextureImage2DEXT)
+#define glCopyTextureSubImage1DEXT GLEW_GET_FUN(__glewCopyTextureSubImage1DEXT)
+#define glCopyTextureSubImage2DEXT GLEW_GET_FUN(__glewCopyTextureSubImage2DEXT)
+#define glCopyTextureSubImage3DEXT GLEW_GET_FUN(__glewCopyTextureSubImage3DEXT)
+#define glDisableClientStateIndexedEXT GLEW_GET_FUN(__glewDisableClientStateIndexedEXT)
+#define glEnableClientStateIndexedEXT GLEW_GET_FUN(__glewEnableClientStateIndexedEXT)
+#define glFramebufferDrawBufferEXT GLEW_GET_FUN(__glewFramebufferDrawBufferEXT)
+#define glFramebufferDrawBuffersEXT GLEW_GET_FUN(__glewFramebufferDrawBuffersEXT)
+#define glFramebufferReadBufferEXT GLEW_GET_FUN(__glewFramebufferReadBufferEXT)
+#define glGenerateMultiTexMipmapEXT GLEW_GET_FUN(__glewGenerateMultiTexMipmapEXT)
+#define glGenerateTextureMipmapEXT GLEW_GET_FUN(__glewGenerateTextureMipmapEXT)
+#define glGetCompressedMultiTexImageEXT GLEW_GET_FUN(__glewGetCompressedMultiTexImageEXT)
+#define glGetCompressedTextureImageEXT GLEW_GET_FUN(__glewGetCompressedTextureImageEXT)
+#define glGetDoubleIndexedvEXT GLEW_GET_FUN(__glewGetDoubleIndexedvEXT)
+#define glGetFloatIndexedvEXT GLEW_GET_FUN(__glewGetFloatIndexedvEXT)
+#define glGetFramebufferParameterivEXT GLEW_GET_FUN(__glewGetFramebufferParameterivEXT)
+#define glGetMultiTexEnvfvEXT GLEW_GET_FUN(__glewGetMultiTexEnvfvEXT)
+#define glGetMultiTexEnvivEXT GLEW_GET_FUN(__glewGetMultiTexEnvivEXT)
+#define glGetMultiTexGendvEXT GLEW_GET_FUN(__glewGetMultiTexGendvEXT)
+#define glGetMultiTexGenfvEXT GLEW_GET_FUN(__glewGetMultiTexGenfvEXT)
+#define glGetMultiTexGenivEXT GLEW_GET_FUN(__glewGetMultiTexGenivEXT)
+#define glGetMultiTexImageEXT GLEW_GET_FUN(__glewGetMultiTexImageEXT)
+#define glGetMultiTexLevelParameterfvEXT GLEW_GET_FUN(__glewGetMultiTexLevelParameterfvEXT)
+#define glGetMultiTexLevelParameterivEXT GLEW_GET_FUN(__glewGetMultiTexLevelParameterivEXT)
+#define glGetMultiTexParameterIivEXT GLEW_GET_FUN(__glewGetMultiTexParameterIivEXT)
+#define glGetMultiTexParameterIuivEXT GLEW_GET_FUN(__glewGetMultiTexParameterIuivEXT)
+#define glGetMultiTexParameterfvEXT GLEW_GET_FUN(__glewGetMultiTexParameterfvEXT)
+#define glGetMultiTexParameterivEXT GLEW_GET_FUN(__glewGetMultiTexParameterivEXT)
+#define glGetNamedBufferParameterivEXT GLEW_GET_FUN(__glewGetNamedBufferParameterivEXT)
+#define glGetNamedBufferPointervEXT GLEW_GET_FUN(__glewGetNamedBufferPointervEXT)
+#define glGetNamedBufferSubDataEXT GLEW_GET_FUN(__glewGetNamedBufferSubDataEXT)
+#define glGetNamedFramebufferAttachmentParameterivEXT GLEW_GET_FUN(__glewGetNamedFramebufferAttachmentParameterivEXT)
+#define glGetNamedProgramLocalParameterIivEXT GLEW_GET_FUN(__glewGetNamedProgramLocalParameterIivEXT)
+#define glGetNamedProgramLocalParameterIuivEXT GLEW_GET_FUN(__glewGetNamedProgramLocalParameterIuivEXT)
+#define glGetNamedProgramLocalParameterdvEXT GLEW_GET_FUN(__glewGetNamedProgramLocalParameterdvEXT)
+#define glGetNamedProgramLocalParameterfvEXT GLEW_GET_FUN(__glewGetNamedProgramLocalParameterfvEXT)
+#define glGetNamedProgramStringEXT GLEW_GET_FUN(__glewGetNamedProgramStringEXT)
+#define glGetNamedProgramivEXT GLEW_GET_FUN(__glewGetNamedProgramivEXT)
+#define glGetNamedRenderbufferParameterivEXT GLEW_GET_FUN(__glewGetNamedRenderbufferParameterivEXT)
+#define glGetPointerIndexedvEXT GLEW_GET_FUN(__glewGetPointerIndexedvEXT)
+#define glGetTextureImageEXT GLEW_GET_FUN(__glewGetTextureImageEXT)
+#define glGetTextureLevelParameterfvEXT GLEW_GET_FUN(__glewGetTextureLevelParameterfvEXT)
+#define glGetTextureLevelParameterivEXT GLEW_GET_FUN(__glewGetTextureLevelParameterivEXT)
+#define glGetTextureParameterIivEXT GLEW_GET_FUN(__glewGetTextureParameterIivEXT)
+#define glGetTextureParameterIuivEXT GLEW_GET_FUN(__glewGetTextureParameterIuivEXT)
+#define glGetTextureParameterfvEXT GLEW_GET_FUN(__glewGetTextureParameterfvEXT)
+#define glGetTextureParameterivEXT GLEW_GET_FUN(__glewGetTextureParameterivEXT)
+#define glMapNamedBufferEXT GLEW_GET_FUN(__glewMapNamedBufferEXT)
+#define glMatrixFrustumEXT GLEW_GET_FUN(__glewMatrixFrustumEXT)
+#define glMatrixLoadIdentityEXT GLEW_GET_FUN(__glewMatrixLoadIdentityEXT)
+#define glMatrixLoadTransposedEXT GLEW_GET_FUN(__glewMatrixLoadTransposedEXT)
+#define glMatrixLoadTransposefEXT GLEW_GET_FUN(__glewMatrixLoadTransposefEXT)
+#define glMatrixLoaddEXT GLEW_GET_FUN(__glewMatrixLoaddEXT)
+#define glMatrixLoadfEXT GLEW_GET_FUN(__glewMatrixLoadfEXT)
+#define glMatrixMultTransposedEXT GLEW_GET_FUN(__glewMatrixMultTransposedEXT)
+#define glMatrixMultTransposefEXT GLEW_GET_FUN(__glewMatrixMultTransposefEXT)
+#define glMatrixMultdEXT GLEW_GET_FUN(__glewMatrixMultdEXT)
+#define glMatrixMultfEXT GLEW_GET_FUN(__glewMatrixMultfEXT)
+#define glMatrixOrthoEXT GLEW_GET_FUN(__glewMatrixOrthoEXT)
+#define glMatrixPopEXT GLEW_GET_FUN(__glewMatrixPopEXT)
+#define glMatrixPushEXT GLEW_GET_FUN(__glewMatrixPushEXT)
+#define glMatrixRotatedEXT GLEW_GET_FUN(__glewMatrixRotatedEXT)
+#define glMatrixRotatefEXT GLEW_GET_FUN(__glewMatrixRotatefEXT)
+#define glMatrixScaledEXT GLEW_GET_FUN(__glewMatrixScaledEXT)
+#define glMatrixScalefEXT GLEW_GET_FUN(__glewMatrixScalefEXT)
+#define glMatrixTranslatedEXT GLEW_GET_FUN(__glewMatrixTranslatedEXT)
+#define glMatrixTranslatefEXT GLEW_GET_FUN(__glewMatrixTranslatefEXT)
+#define glMultiTexBufferEXT GLEW_GET_FUN(__glewMultiTexBufferEXT)
+#define glMultiTexCoordPointerEXT GLEW_GET_FUN(__glewMultiTexCoordPointerEXT)
+#define glMultiTexEnvfEXT GLEW_GET_FUN(__glewMultiTexEnvfEXT)
+#define glMultiTexEnvfvEXT GLEW_GET_FUN(__glewMultiTexEnvfvEXT)
+#define glMultiTexEnviEXT GLEW_GET_FUN(__glewMultiTexEnviEXT)
+#define glMultiTexEnvivEXT GLEW_GET_FUN(__glewMultiTexEnvivEXT)
+#define glMultiTexGendEXT GLEW_GET_FUN(__glewMultiTexGendEXT)
+#define glMultiTexGendvEXT GLEW_GET_FUN(__glewMultiTexGendvEXT)
+#define glMultiTexGenfEXT GLEW_GET_FUN(__glewMultiTexGenfEXT)
+#define glMultiTexGenfvEXT GLEW_GET_FUN(__glewMultiTexGenfvEXT)
+#define glMultiTexGeniEXT GLEW_GET_FUN(__glewMultiTexGeniEXT)
+#define glMultiTexGenivEXT GLEW_GET_FUN(__glewMultiTexGenivEXT)
+#define glMultiTexImage1DEXT GLEW_GET_FUN(__glewMultiTexImage1DEXT)
+#define glMultiTexImage2DEXT GLEW_GET_FUN(__glewMultiTexImage2DEXT)
+#define glMultiTexImage3DEXT GLEW_GET_FUN(__glewMultiTexImage3DEXT)
+#define glMultiTexParameterIivEXT GLEW_GET_FUN(__glewMultiTexParameterIivEXT)
+#define glMultiTexParameterIuivEXT GLEW_GET_FUN(__glewMultiTexParameterIuivEXT)
+#define glMultiTexParameterfEXT GLEW_GET_FUN(__glewMultiTexParameterfEXT)
+#define glMultiTexParameterfvEXT GLEW_GET_FUN(__glewMultiTexParameterfvEXT)
+#define glMultiTexParameteriEXT GLEW_GET_FUN(__glewMultiTexParameteriEXT)
+#define glMultiTexParameterivEXT GLEW_GET_FUN(__glewMultiTexParameterivEXT)
+#define glMultiTexRenderbufferEXT GLEW_GET_FUN(__glewMultiTexRenderbufferEXT)
+#define glMultiTexSubImage1DEXT GLEW_GET_FUN(__glewMultiTexSubImage1DEXT)
+#define glMultiTexSubImage2DEXT GLEW_GET_FUN(__glewMultiTexSubImage2DEXT)
+#define glMultiTexSubImage3DEXT GLEW_GET_FUN(__glewMultiTexSubImage3DEXT)
+#define glNamedBufferDataEXT GLEW_GET_FUN(__glewNamedBufferDataEXT)
+#define glNamedBufferSubDataEXT GLEW_GET_FUN(__glewNamedBufferSubDataEXT)
+#define glNamedFramebufferRenderbufferEXT GLEW_GET_FUN(__glewNamedFramebufferRenderbufferEXT)
+#define glNamedFramebufferTexture1DEXT GLEW_GET_FUN(__glewNamedFramebufferTexture1DEXT)
+#define glNamedFramebufferTexture2DEXT GLEW_GET_FUN(__glewNamedFramebufferTexture2DEXT)
+#define glNamedFramebufferTexture3DEXT GLEW_GET_FUN(__glewNamedFramebufferTexture3DEXT)
+#define glNamedFramebufferTextureEXT GLEW_GET_FUN(__glewNamedFramebufferTextureEXT)
+#define glNamedFramebufferTextureFaceEXT GLEW_GET_FUN(__glewNamedFramebufferTextureFaceEXT)
+#define glNamedFramebufferTextureLayerEXT GLEW_GET_FUN(__glewNamedFramebufferTextureLayerEXT)
+#define glNamedProgramLocalParameter4dEXT GLEW_GET_FUN(__glewNamedProgramLocalParameter4dEXT)
+#define glNamedProgramLocalParameter4dvEXT GLEW_GET_FUN(__glewNamedProgramLocalParameter4dvEXT)
+#define glNamedProgramLocalParameter4fEXT GLEW_GET_FUN(__glewNamedProgramLocalParameter4fEXT)
+#define glNamedProgramLocalParameter4fvEXT GLEW_GET_FUN(__glewNamedProgramLocalParameter4fvEXT)
+#define glNamedProgramLocalParameterI4iEXT GLEW_GET_FUN(__glewNamedProgramLocalParameterI4iEXT)
+#define glNamedProgramLocalParameterI4ivEXT GLEW_GET_FUN(__glewNamedProgramLocalParameterI4ivEXT)
+#define glNamedProgramLocalParameterI4uiEXT GLEW_GET_FUN(__glewNamedProgramLocalParameterI4uiEXT)
+#define glNamedProgramLocalParameterI4uivEXT GLEW_GET_FUN(__glewNamedProgramLocalParameterI4uivEXT)
+#define glNamedProgramLocalParameters4fvEXT GLEW_GET_FUN(__glewNamedProgramLocalParameters4fvEXT)
+#define glNamedProgramLocalParametersI4ivEXT GLEW_GET_FUN(__glewNamedProgramLocalParametersI4ivEXT)
+#define glNamedProgramLocalParametersI4uivEXT GLEW_GET_FUN(__glewNamedProgramLocalParametersI4uivEXT)
+#define glNamedProgramStringEXT GLEW_GET_FUN(__glewNamedProgramStringEXT)
+#define glNamedRenderbufferStorageEXT GLEW_GET_FUN(__glewNamedRenderbufferStorageEXT)
+#define glNamedRenderbufferStorageMultisampleCoverageEXT GLEW_GET_FUN(__glewNamedRenderbufferStorageMultisampleCoverageEXT)
+#define glNamedRenderbufferStorageMultisampleEXT GLEW_GET_FUN(__glewNamedRenderbufferStorageMultisampleEXT)
+#define glProgramUniform1fEXT GLEW_GET_FUN(__glewProgramUniform1fEXT)
+#define glProgramUniform1fvEXT GLEW_GET_FUN(__glewProgramUniform1fvEXT)
+#define glProgramUniform1iEXT GLEW_GET_FUN(__glewProgramUniform1iEXT)
+#define glProgramUniform1ivEXT GLEW_GET_FUN(__glewProgramUniform1ivEXT)
+#define glProgramUniform1uiEXT GLEW_GET_FUN(__glewProgramUniform1uiEXT)
+#define glProgramUniform1uivEXT GLEW_GET_FUN(__glewProgramUniform1uivEXT)
+#define glProgramUniform2fEXT GLEW_GET_FUN(__glewProgramUniform2fEXT)
+#define glProgramUniform2fvEXT GLEW_GET_FUN(__glewProgramUniform2fvEXT)
+#define glProgramUniform2iEXT GLEW_GET_FUN(__glewProgramUniform2iEXT)
+#define glProgramUniform2ivEXT GLEW_GET_FUN(__glewProgramUniform2ivEXT)
+#define glProgramUniform2uiEXT GLEW_GET_FUN(__glewProgramUniform2uiEXT)
+#define glProgramUniform2uivEXT GLEW_GET_FUN(__glewProgramUniform2uivEXT)
+#define glProgramUniform3fEXT GLEW_GET_FUN(__glewProgramUniform3fEXT)
+#define glProgramUniform3fvEXT GLEW_GET_FUN(__glewProgramUniform3fvEXT)
+#define glProgramUniform3iEXT GLEW_GET_FUN(__glewProgramUniform3iEXT)
+#define glProgramUniform3ivEXT GLEW_GET_FUN(__glewProgramUniform3ivEXT)
+#define glProgramUniform3uiEXT GLEW_GET_FUN(__glewProgramUniform3uiEXT)
+#define glProgramUniform3uivEXT GLEW_GET_FUN(__glewProgramUniform3uivEXT)
+#define glProgramUniform4fEXT GLEW_GET_FUN(__glewProgramUniform4fEXT)
+#define glProgramUniform4fvEXT GLEW_GET_FUN(__glewProgramUniform4fvEXT)
+#define glProgramUniform4iEXT GLEW_GET_FUN(__glewProgramUniform4iEXT)
+#define glProgramUniform4ivEXT GLEW_GET_FUN(__glewProgramUniform4ivEXT)
+#define glProgramUniform4uiEXT GLEW_GET_FUN(__glewProgramUniform4uiEXT)
+#define glProgramUniform4uivEXT GLEW_GET_FUN(__glewProgramUniform4uivEXT)
+#define glProgramUniformMatrix2fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix2fvEXT)
+#define glProgramUniformMatrix2x3fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix2x3fvEXT)
+#define glProgramUniformMatrix2x4fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix2x4fvEXT)
+#define glProgramUniformMatrix3fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix3fvEXT)
+#define glProgramUniformMatrix3x2fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix3x2fvEXT)
+#define glProgramUniformMatrix3x4fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix3x4fvEXT)
+#define glProgramUniformMatrix4fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix4fvEXT)
+#define glProgramUniformMatrix4x2fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix4x2fvEXT)
+#define glProgramUniformMatrix4x3fvEXT GLEW_GET_FUN(__glewProgramUniformMatrix4x3fvEXT)
+#define glPushClientAttribDefaultEXT GLEW_GET_FUN(__glewPushClientAttribDefaultEXT)
+#define glTextureBufferEXT GLEW_GET_FUN(__glewTextureBufferEXT)
+#define glTextureImage1DEXT GLEW_GET_FUN(__glewTextureImage1DEXT)
+#define glTextureImage2DEXT GLEW_GET_FUN(__glewTextureImage2DEXT)
+#define glTextureImage3DEXT GLEW_GET_FUN(__glewTextureImage3DEXT)
+#define glTextureParameterIivEXT GLEW_GET_FUN(__glewTextureParameterIivEXT)
+#define glTextureParameterIuivEXT GLEW_GET_FUN(__glewTextureParameterIuivEXT)
+#define glTextureParameterfEXT GLEW_GET_FUN(__glewTextureParameterfEXT)
+#define glTextureParameterfvEXT GLEW_GET_FUN(__glewTextureParameterfvEXT)
+#define glTextureParameteriEXT GLEW_GET_FUN(__glewTextureParameteriEXT)
+#define glTextureParameterivEXT GLEW_GET_FUN(__glewTextureParameterivEXT)
+#define glTextureRenderbufferEXT GLEW_GET_FUN(__glewTextureRenderbufferEXT)
+#define glTextureSubImage1DEXT GLEW_GET_FUN(__glewTextureSubImage1DEXT)
+#define glTextureSubImage2DEXT GLEW_GET_FUN(__glewTextureSubImage2DEXT)
+#define glTextureSubImage3DEXT GLEW_GET_FUN(__glewTextureSubImage3DEXT)
+#define glUnmapNamedBufferEXT GLEW_GET_FUN(__glewUnmapNamedBufferEXT)
+
+#define GLEW_EXT_direct_state_access GLEW_GET_VAR(__GLEW_EXT_direct_state_access)
+
+#endif /* GL_EXT_direct_state_access */
+
+/* -------------------------- GL_EXT_draw_buffers2 ------------------------- */
+
+#ifndef GL_EXT_draw_buffers2
+#define GL_EXT_draw_buffers2 1
+
+typedef void (GLAPIENTRY * PFNGLCOLORMASKINDEXEDEXTPROC) (GLuint buf, GLboolean r, GLboolean g, GLboolean b, GLboolean a);
+typedef void (GLAPIENTRY * PFNGLDISABLEINDEXEDEXTPROC) (GLenum target, GLuint index);
+typedef void (GLAPIENTRY * PFNGLENABLEINDEXEDEXTPROC) (GLenum target, GLuint index);
+typedef void (GLAPIENTRY * PFNGLGETBOOLEANINDEXEDVEXTPROC) (GLenum value, GLuint index, GLboolean* data);
+typedef void (GLAPIENTRY * PFNGLGETINTEGERINDEXEDVEXTPROC) (GLenum value, GLuint index, GLint* data);
+typedef GLboolean (GLAPIENTRY * PFNGLISENABLEDINDEXEDEXTPROC) (GLenum target, GLuint index);
+
+#define glColorMaskIndexedEXT GLEW_GET_FUN(__glewColorMaskIndexedEXT)
+#define glDisableIndexedEXT GLEW_GET_FUN(__glewDisableIndexedEXT)
+#define glEnableIndexedEXT GLEW_GET_FUN(__glewEnableIndexedEXT)
+#define glGetBooleanIndexedvEXT GLEW_GET_FUN(__glewGetBooleanIndexedvEXT)
+#define glGetIntegerIndexedvEXT GLEW_GET_FUN(__glewGetIntegerIndexedvEXT)
+#define glIsEnabledIndexedEXT GLEW_GET_FUN(__glewIsEnabledIndexedEXT)
+
+#define GLEW_EXT_draw_buffers2 GLEW_GET_VAR(__GLEW_EXT_draw_buffers2)
+
+#endif /* GL_EXT_draw_buffers2 */
+
+/* ------------------------- GL_EXT_draw_instanced ------------------------- */
+
+#ifndef GL_EXT_draw_instanced
+#define GL_EXT_draw_instanced 1
+
+typedef void (GLAPIENTRY * PFNGLDRAWARRAYSINSTANCEDEXTPROC) (GLenum mode, GLint start, GLsizei count, GLsizei primcount);
+typedef void (GLAPIENTRY * PFNGLDRAWELEMENTSINSTANCEDEXTPROC) (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices, GLsizei primcount);
+
+#define glDrawArraysInstancedEXT GLEW_GET_FUN(__glewDrawArraysInstancedEXT)
+#define glDrawElementsInstancedEXT GLEW_GET_FUN(__glewDrawElementsInstancedEXT)
+
+#define GLEW_EXT_draw_instanced GLEW_GET_VAR(__GLEW_EXT_draw_instanced)
+
+#endif /* GL_EXT_draw_instanced */
+
+/* ----------------------- GL_EXT_draw_range_elements ---------------------- */
+
+#ifndef GL_EXT_draw_range_elements
+#define GL_EXT_draw_range_elements 1
+
+#define GL_MAX_ELEMENTS_VERTICES 0x80E8
+#define GL_MAX_ELEMENTS_INDICES 0x80E9
+
+typedef void (GLAPIENTRY * PFNGLDRAWRANGEELEMENTSEXTPROC) (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices);
+
+#define glDrawRangeElementsEXT GLEW_GET_FUN(__glewDrawRangeElementsEXT)
+
+#define GLEW_EXT_draw_range_elements GLEW_GET_VAR(__GLEW_EXT_draw_range_elements)
+
+#endif /* GL_EXT_draw_range_elements */
+
+/* ---------------------------- GL_EXT_fog_coord --------------------------- */
+
+#ifndef GL_EXT_fog_coord
+#define GL_EXT_fog_coord 1
+
+#define GL_FOG_COORDINATE_SOURCE_EXT 0x8450
+#define GL_FOG_COORDINATE_EXT 0x8451
+#define GL_FRAGMENT_DEPTH_EXT 0x8452
+#define GL_CURRENT_FOG_COORDINATE_EXT 0x8453
+#define GL_FOG_COORDINATE_ARRAY_TYPE_EXT 0x8454
+#define GL_FOG_COORDINATE_ARRAY_STRIDE_EXT 0x8455
+#define GL_FOG_COORDINATE_ARRAY_POINTER_EXT 0x8456
+#define GL_FOG_COORDINATE_ARRAY_EXT 0x8457
+
+typedef void (GLAPIENTRY * PFNGLFOGCOORDPOINTEREXTPROC) (GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDDEXTPROC) (GLdouble coord);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDDVEXTPROC) (const GLdouble *coord);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDFEXTPROC) (GLfloat coord);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDFVEXTPROC) (const GLfloat *coord);
+
+#define glFogCoordPointerEXT GLEW_GET_FUN(__glewFogCoordPointerEXT)
+#define glFogCoorddEXT GLEW_GET_FUN(__glewFogCoorddEXT)
+#define glFogCoorddvEXT GLEW_GET_FUN(__glewFogCoorddvEXT)
+#define glFogCoordfEXT GLEW_GET_FUN(__glewFogCoordfEXT)
+#define glFogCoordfvEXT GLEW_GET_FUN(__glewFogCoordfvEXT)
+
+#define GLEW_EXT_fog_coord GLEW_GET_VAR(__GLEW_EXT_fog_coord)
+
+#endif /* GL_EXT_fog_coord */
+
+/* ------------------------ GL_EXT_fragment_lighting ----------------------- */
+
+#ifndef GL_EXT_fragment_lighting
+#define GL_EXT_fragment_lighting 1
+
+#define GL_FRAGMENT_LIGHTING_EXT 0x8400
+#define GL_FRAGMENT_COLOR_MATERIAL_EXT 0x8401
+#define GL_FRAGMENT_COLOR_MATERIAL_FACE_EXT 0x8402
+#define GL_FRAGMENT_COLOR_MATERIAL_PARAMETER_EXT 0x8403
+#define GL_MAX_FRAGMENT_LIGHTS_EXT 0x8404
+#define GL_MAX_ACTIVE_LIGHTS_EXT 0x8405
+#define GL_CURRENT_RASTER_NORMAL_EXT 0x8406
+#define GL_LIGHT_ENV_MODE_EXT 0x8407
+#define GL_FRAGMENT_LIGHT_MODEL_LOCAL_VIEWER_EXT 0x8408
+#define GL_FRAGMENT_LIGHT_MODEL_TWO_SIDE_EXT 0x8409
+#define GL_FRAGMENT_LIGHT_MODEL_AMBIENT_EXT 0x840A
+#define GL_FRAGMENT_LIGHT_MODEL_NORMAL_INTERPOLATION_EXT 0x840B
+#define GL_FRAGMENT_LIGHT0_EXT 0x840C
+#define GL_FRAGMENT_LIGHT7_EXT 0x8413
+
+typedef void (GLAPIENTRY * PFNGLFRAGMENTCOLORMATERIALEXTPROC) (GLenum face, GLenum mode);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELFEXTPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELFVEXTPROC) (GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELIEXTPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELIVEXTPROC) (GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTFEXTPROC) (GLenum light, GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTFVEXTPROC) (GLenum light, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTIEXTPROC) (GLenum light, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTIVEXTPROC) (GLenum light, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALFEXTPROC) (GLenum face, GLenum pname, const GLfloat param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALFVEXTPROC) (GLenum face, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALIEXTPROC) (GLenum face, GLenum pname, const GLint param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALIVEXTPROC) (GLenum face, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTLIGHTFVEXTPROC) (GLenum light, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTLIGHTIVEXTPROC) (GLenum light, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTMATERIALFVEXTPROC) (GLenum face, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTMATERIALIVEXTPROC) (GLenum face, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLLIGHTENVIEXTPROC) (GLenum pname, GLint param);
+
+#define glFragmentColorMaterialEXT GLEW_GET_FUN(__glewFragmentColorMaterialEXT)
+#define glFragmentLightModelfEXT GLEW_GET_FUN(__glewFragmentLightModelfEXT)
+#define glFragmentLightModelfvEXT GLEW_GET_FUN(__glewFragmentLightModelfvEXT)
+#define glFragmentLightModeliEXT GLEW_GET_FUN(__glewFragmentLightModeliEXT)
+#define glFragmentLightModelivEXT GLEW_GET_FUN(__glewFragmentLightModelivEXT)
+#define glFragmentLightfEXT GLEW_GET_FUN(__glewFragmentLightfEXT)
+#define glFragmentLightfvEXT GLEW_GET_FUN(__glewFragmentLightfvEXT)
+#define glFragmentLightiEXT GLEW_GET_FUN(__glewFragmentLightiEXT)
+#define glFragmentLightivEXT GLEW_GET_FUN(__glewFragmentLightivEXT)
+#define glFragmentMaterialfEXT GLEW_GET_FUN(__glewFragmentMaterialfEXT)
+#define glFragmentMaterialfvEXT GLEW_GET_FUN(__glewFragmentMaterialfvEXT)
+#define glFragmentMaterialiEXT GLEW_GET_FUN(__glewFragmentMaterialiEXT)
+#define glFragmentMaterialivEXT GLEW_GET_FUN(__glewFragmentMaterialivEXT)
+#define glGetFragmentLightfvEXT GLEW_GET_FUN(__glewGetFragmentLightfvEXT)
+#define glGetFragmentLightivEXT GLEW_GET_FUN(__glewGetFragmentLightivEXT)
+#define glGetFragmentMaterialfvEXT GLEW_GET_FUN(__glewGetFragmentMaterialfvEXT)
+#define glGetFragmentMaterialivEXT GLEW_GET_FUN(__glewGetFragmentMaterialivEXT)
+#define glLightEnviEXT GLEW_GET_FUN(__glewLightEnviEXT)
+
+#define GLEW_EXT_fragment_lighting GLEW_GET_VAR(__GLEW_EXT_fragment_lighting)
+
+#endif /* GL_EXT_fragment_lighting */
+
+/* ------------------------ GL_EXT_framebuffer_blit ------------------------ */
+
+#ifndef GL_EXT_framebuffer_blit
+#define GL_EXT_framebuffer_blit 1
+
+#define GL_DRAW_FRAMEBUFFER_BINDING_EXT 0x8CA6
+#define GL_READ_FRAMEBUFFER_EXT 0x8CA8
+#define GL_DRAW_FRAMEBUFFER_EXT 0x8CA9
+#define GL_READ_FRAMEBUFFER_BINDING_EXT 0x8CAA
+
+typedef void (GLAPIENTRY * PFNGLBLITFRAMEBUFFEREXTPROC) (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+
+#define glBlitFramebufferEXT GLEW_GET_FUN(__glewBlitFramebufferEXT)
+
+#define GLEW_EXT_framebuffer_blit GLEW_GET_VAR(__GLEW_EXT_framebuffer_blit)
+
+#endif /* GL_EXT_framebuffer_blit */
+
+/* --------------------- GL_EXT_framebuffer_multisample -------------------- */
+
+#ifndef GL_EXT_framebuffer_multisample
+#define GL_EXT_framebuffer_multisample 1
+
+#define GL_RENDERBUFFER_SAMPLES_EXT 0x8CAB
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_EXT 0x8D56
+#define GL_MAX_SAMPLES_EXT 0x8D57
+
+typedef void (GLAPIENTRY * PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+
+#define glRenderbufferStorageMultisampleEXT GLEW_GET_FUN(__glewRenderbufferStorageMultisampleEXT)
+
+#define GLEW_EXT_framebuffer_multisample GLEW_GET_VAR(__GLEW_EXT_framebuffer_multisample)
+
+#endif /* GL_EXT_framebuffer_multisample */
+
+/* ----------------------- GL_EXT_framebuffer_object ----------------------- */
+
+#ifndef GL_EXT_framebuffer_object
+#define GL_EXT_framebuffer_object 1
+
+#define GL_INVALID_FRAMEBUFFER_OPERATION_EXT 0x0506
+#define GL_MAX_RENDERBUFFER_SIZE_EXT 0x84E8
+#define GL_FRAMEBUFFER_BINDING_EXT 0x8CA6
+#define GL_RENDERBUFFER_BINDING_EXT 0x8CA7
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_EXT 0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_EXT 0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_EXT 0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_EXT 0x8CD3
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_EXT 0x8CD4
+#define GL_FRAMEBUFFER_COMPLETE_EXT 0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT 0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT 0x8CD9
+#define GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT 0x8CDA
+#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT 0x8CDB
+#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT 0x8CDC
+#define GL_FRAMEBUFFER_UNSUPPORTED_EXT 0x8CDD
+#define GL_MAX_COLOR_ATTACHMENTS_EXT 0x8CDF
+#define GL_COLOR_ATTACHMENT0_EXT 0x8CE0
+#define GL_COLOR_ATTACHMENT1_EXT 0x8CE1
+#define GL_COLOR_ATTACHMENT2_EXT 0x8CE2
+#define GL_COLOR_ATTACHMENT3_EXT 0x8CE3
+#define GL_COLOR_ATTACHMENT4_EXT 0x8CE4
+#define GL_COLOR_ATTACHMENT5_EXT 0x8CE5
+#define GL_COLOR_ATTACHMENT6_EXT 0x8CE6
+#define GL_COLOR_ATTACHMENT7_EXT 0x8CE7
+#define GL_COLOR_ATTACHMENT8_EXT 0x8CE8
+#define GL_COLOR_ATTACHMENT9_EXT 0x8CE9
+#define GL_COLOR_ATTACHMENT10_EXT 0x8CEA
+#define GL_COLOR_ATTACHMENT11_EXT 0x8CEB
+#define GL_COLOR_ATTACHMENT12_EXT 0x8CEC
+#define GL_COLOR_ATTACHMENT13_EXT 0x8CED
+#define GL_COLOR_ATTACHMENT14_EXT 0x8CEE
+#define GL_COLOR_ATTACHMENT15_EXT 0x8CEF
+#define GL_DEPTH_ATTACHMENT_EXT 0x8D00
+#define GL_STENCIL_ATTACHMENT_EXT 0x8D20
+#define GL_FRAMEBUFFER_EXT 0x8D40
+#define GL_RENDERBUFFER_EXT 0x8D41
+#define GL_RENDERBUFFER_WIDTH_EXT 0x8D42
+#define GL_RENDERBUFFER_HEIGHT_EXT 0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT_EXT 0x8D44
+#define GL_STENCIL_INDEX1_EXT 0x8D46
+#define GL_STENCIL_INDEX4_EXT 0x8D47
+#define GL_STENCIL_INDEX8_EXT 0x8D48
+#define GL_STENCIL_INDEX16_EXT 0x8D49
+#define GL_RENDERBUFFER_RED_SIZE_EXT 0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE_EXT 0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE_EXT 0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE_EXT 0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE_EXT 0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE_EXT 0x8D55
+
+typedef void (GLAPIENTRY * PFNGLBINDFRAMEBUFFEREXTPROC) (GLenum target, GLuint framebuffer);
+typedef void (GLAPIENTRY * PFNGLBINDRENDERBUFFEREXTPROC) (GLenum target, GLuint renderbuffer);
+typedef GLenum (GLAPIENTRY * PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLDELETEFRAMEBUFFERSEXTPROC) (GLsizei n, const GLuint* framebuffers);
+typedef void (GLAPIENTRY * PFNGLDELETERENDERBUFFERSEXTPROC) (GLsizei n, const GLuint* renderbuffers);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURE1DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURE2DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURE3DEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+typedef void (GLAPIENTRY * PFNGLGENFRAMEBUFFERSEXTPROC) (GLsizei n, GLuint* framebuffers);
+typedef void (GLAPIENTRY * PFNGLGENRENDERBUFFERSEXTPROC) (GLsizei n, GLuint* renderbuffers);
+typedef void (GLAPIENTRY * PFNGLGENERATEMIPMAPEXTPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC) (GLenum target, GLenum attachment, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISFRAMEBUFFEREXTPROC) (GLuint framebuffer);
+typedef GLboolean (GLAPIENTRY * PFNGLISRENDERBUFFEREXTPROC) (GLuint renderbuffer);
+typedef void (GLAPIENTRY * PFNGLRENDERBUFFERSTORAGEEXTPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+
+#define glBindFramebufferEXT GLEW_GET_FUN(__glewBindFramebufferEXT)
+#define glBindRenderbufferEXT GLEW_GET_FUN(__glewBindRenderbufferEXT)
+#define glCheckFramebufferStatusEXT GLEW_GET_FUN(__glewCheckFramebufferStatusEXT)
+#define glDeleteFramebuffersEXT GLEW_GET_FUN(__glewDeleteFramebuffersEXT)
+#define glDeleteRenderbuffersEXT GLEW_GET_FUN(__glewDeleteRenderbuffersEXT)
+#define glFramebufferRenderbufferEXT GLEW_GET_FUN(__glewFramebufferRenderbufferEXT)
+#define glFramebufferTexture1DEXT GLEW_GET_FUN(__glewFramebufferTexture1DEXT)
+#define glFramebufferTexture2DEXT GLEW_GET_FUN(__glewFramebufferTexture2DEXT)
+#define glFramebufferTexture3DEXT GLEW_GET_FUN(__glewFramebufferTexture3DEXT)
+#define glGenFramebuffersEXT GLEW_GET_FUN(__glewGenFramebuffersEXT)
+#define glGenRenderbuffersEXT GLEW_GET_FUN(__glewGenRenderbuffersEXT)
+#define glGenerateMipmapEXT GLEW_GET_FUN(__glewGenerateMipmapEXT)
+#define glGetFramebufferAttachmentParameterivEXT GLEW_GET_FUN(__glewGetFramebufferAttachmentParameterivEXT)
+#define glGetRenderbufferParameterivEXT GLEW_GET_FUN(__glewGetRenderbufferParameterivEXT)
+#define glIsFramebufferEXT GLEW_GET_FUN(__glewIsFramebufferEXT)
+#define glIsRenderbufferEXT GLEW_GET_FUN(__glewIsRenderbufferEXT)
+#define glRenderbufferStorageEXT GLEW_GET_FUN(__glewRenderbufferStorageEXT)
+
+#define GLEW_EXT_framebuffer_object GLEW_GET_VAR(__GLEW_EXT_framebuffer_object)
+
+#endif /* GL_EXT_framebuffer_object */
+
+/* ------------------------ GL_EXT_framebuffer_sRGB ------------------------ */
+
+#ifndef GL_EXT_framebuffer_sRGB
+#define GL_EXT_framebuffer_sRGB 1
+
+#define GL_FRAMEBUFFER_SRGB_EXT 0x8DB9
+#define GL_FRAMEBUFFER_SRGB_CAPABLE_EXT 0x8DBA
+
+#define GLEW_EXT_framebuffer_sRGB GLEW_GET_VAR(__GLEW_EXT_framebuffer_sRGB)
+
+#endif /* GL_EXT_framebuffer_sRGB */
+
+/* ------------------------ GL_EXT_geometry_shader4 ------------------------ */
+
+#ifndef GL_EXT_geometry_shader4
+#define GL_EXT_geometry_shader4 1
+
+#define GL_LINES_ADJACENCY_EXT 0xA
+#define GL_LINE_STRIP_ADJACENCY_EXT 0xB
+#define GL_TRIANGLES_ADJACENCY_EXT 0xC
+#define GL_TRIANGLE_STRIP_ADJACENCY_EXT 0xD
+#define GL_PROGRAM_POINT_SIZE_EXT 0x8642
+#define GL_MAX_VARYING_COMPONENTS_EXT 0x8B4B
+#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_EXT 0x8C29
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER_EXT 0x8CD4
+#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED_EXT 0x8DA7
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS_EXT 0x8DA8
+#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_EXT 0x8DA9
+#define GL_GEOMETRY_SHADER_EXT 0x8DD9
+#define GL_GEOMETRY_VERTICES_OUT_EXT 0x8DDA
+#define GL_GEOMETRY_INPUT_TYPE_EXT 0x8DDB
+#define GL_GEOMETRY_OUTPUT_TYPE_EXT 0x8DDC
+#define GL_MAX_GEOMETRY_VARYING_COMPONENTS_EXT 0x8DDD
+#define GL_MAX_VERTEX_VARYING_COMPONENTS_EXT 0x8DDE
+#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS_EXT 0x8DDF
+#define GL_MAX_GEOMETRY_OUTPUT_VERTICES_EXT 0x8DE0
+#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS_EXT 0x8DE1
+
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTUREEXTPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTUREFACEEXTPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLenum face);
+typedef void (GLAPIENTRY * PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC) (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETERIEXTPROC) (GLuint program, GLenum pname, GLint value);
+
+#define glFramebufferTextureEXT GLEW_GET_FUN(__glewFramebufferTextureEXT)
+#define glFramebufferTextureFaceEXT GLEW_GET_FUN(__glewFramebufferTextureFaceEXT)
+#define glFramebufferTextureLayerEXT GLEW_GET_FUN(__glewFramebufferTextureLayerEXT)
+#define glProgramParameteriEXT GLEW_GET_FUN(__glewProgramParameteriEXT)
+
+#define GLEW_EXT_geometry_shader4 GLEW_GET_VAR(__GLEW_EXT_geometry_shader4)
+
+#endif /* GL_EXT_geometry_shader4 */
+
+/* --------------------- GL_EXT_gpu_program_parameters --------------------- */
+
+#ifndef GL_EXT_gpu_program_parameters
+#define GL_EXT_gpu_program_parameters 1
+
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETERS4FVEXTPROC) (GLenum target, GLuint index, GLsizei count, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETERS4FVEXTPROC) (GLenum target, GLuint index, GLsizei count, const GLfloat* params);
+
+#define glProgramEnvParameters4fvEXT GLEW_GET_FUN(__glewProgramEnvParameters4fvEXT)
+#define glProgramLocalParameters4fvEXT GLEW_GET_FUN(__glewProgramLocalParameters4fvEXT)
+
+#define GLEW_EXT_gpu_program_parameters GLEW_GET_VAR(__GLEW_EXT_gpu_program_parameters)
+
+#endif /* GL_EXT_gpu_program_parameters */
+
+/* --------------------------- GL_EXT_gpu_shader4 -------------------------- */
+
+#ifndef GL_EXT_gpu_shader4
+#define GL_EXT_gpu_shader4 1
+
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER_EXT 0x88FD
+#define GL_SAMPLER_1D_ARRAY_EXT 0x8DC0
+#define GL_SAMPLER_2D_ARRAY_EXT 0x8DC1
+#define GL_SAMPLER_BUFFER_EXT 0x8DC2
+#define GL_SAMPLER_1D_ARRAY_SHADOW_EXT 0x8DC3
+#define GL_SAMPLER_2D_ARRAY_SHADOW_EXT 0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW_EXT 0x8DC5
+#define GL_UNSIGNED_INT_VEC2_EXT 0x8DC6
+#define GL_UNSIGNED_INT_VEC3_EXT 0x8DC7
+#define GL_UNSIGNED_INT_VEC4_EXT 0x8DC8
+#define GL_INT_SAMPLER_1D_EXT 0x8DC9
+#define GL_INT_SAMPLER_2D_EXT 0x8DCA
+#define GL_INT_SAMPLER_3D_EXT 0x8DCB
+#define GL_INT_SAMPLER_CUBE_EXT 0x8DCC
+#define GL_INT_SAMPLER_2D_RECT_EXT 0x8DCD
+#define GL_INT_SAMPLER_1D_ARRAY_EXT 0x8DCE
+#define GL_INT_SAMPLER_2D_ARRAY_EXT 0x8DCF
+#define GL_INT_SAMPLER_BUFFER_EXT 0x8DD0
+#define GL_UNSIGNED_INT_SAMPLER_1D_EXT 0x8DD1
+#define GL_UNSIGNED_INT_SAMPLER_2D_EXT 0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D_EXT 0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE_EXT 0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_2D_RECT_EXT 0x8DD5
+#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY_EXT 0x8DD6
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY_EXT 0x8DD7
+#define GL_UNSIGNED_INT_SAMPLER_BUFFER_EXT 0x8DD8
+
+typedef void (GLAPIENTRY * PFNGLBINDFRAGDATALOCATIONEXTPROC) (GLuint program, GLuint color, const GLchar *name);
+typedef GLint (GLAPIENTRY * PFNGLGETFRAGDATALOCATIONEXTPROC) (GLuint program, const GLchar *name);
+typedef void (GLAPIENTRY * PFNGLGETUNIFORMUIVEXTPROC) (GLuint program, GLint location, GLuint *params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBIIVEXTPROC) (GLuint index, GLenum pname, GLint *params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBIUIVEXTPROC) (GLuint index, GLenum pname, GLuint *params);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1UIEXTPROC) (GLint location, GLuint v0);
+typedef void (GLAPIENTRY * PFNGLUNIFORM1UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2UIEXTPROC) (GLint location, GLuint v0, GLuint v1);
+typedef void (GLAPIENTRY * PFNGLUNIFORM2UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3UIEXTPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2);
+typedef void (GLAPIENTRY * PFNGLUNIFORM3UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4UIEXTPROC) (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+typedef void (GLAPIENTRY * PFNGLUNIFORM4UIVEXTPROC) (GLint location, GLsizei count, const GLuint *value);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1IEXTPROC) (GLuint index, GLint x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1UIEXTPROC) (GLuint index, GLuint x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI1UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2IEXTPROC) (GLuint index, GLint x, GLint y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2UIEXTPROC) (GLuint index, GLuint x, GLuint y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI2UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3IEXTPROC) (GLuint index, GLint x, GLint y, GLint z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3UIEXTPROC) (GLuint index, GLuint x, GLuint y, GLuint z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI3UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4BVEXTPROC) (GLuint index, const GLbyte *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4IEXTPROC) (GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4IVEXTPROC) (GLuint index, const GLint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4SVEXTPROC) (GLuint index, const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4UBVEXTPROC) (GLuint index, const GLubyte *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4UIEXTPROC) (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4UIVEXTPROC) (GLuint index, const GLuint *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBI4USVEXTPROC) (GLuint index, const GLushort *v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBIPOINTEREXTPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+
+#define glBindFragDataLocationEXT GLEW_GET_FUN(__glewBindFragDataLocationEXT)
+#define glGetFragDataLocationEXT GLEW_GET_FUN(__glewGetFragDataLocationEXT)
+#define glGetUniformuivEXT GLEW_GET_FUN(__glewGetUniformuivEXT)
+#define glGetVertexAttribIivEXT GLEW_GET_FUN(__glewGetVertexAttribIivEXT)
+#define glGetVertexAttribIuivEXT GLEW_GET_FUN(__glewGetVertexAttribIuivEXT)
+#define glUniform1uiEXT GLEW_GET_FUN(__glewUniform1uiEXT)
+#define glUniform1uivEXT GLEW_GET_FUN(__glewUniform1uivEXT)
+#define glUniform2uiEXT GLEW_GET_FUN(__glewUniform2uiEXT)
+#define glUniform2uivEXT GLEW_GET_FUN(__glewUniform2uivEXT)
+#define glUniform3uiEXT GLEW_GET_FUN(__glewUniform3uiEXT)
+#define glUniform3uivEXT GLEW_GET_FUN(__glewUniform3uivEXT)
+#define glUniform4uiEXT GLEW_GET_FUN(__glewUniform4uiEXT)
+#define glUniform4uivEXT GLEW_GET_FUN(__glewUniform4uivEXT)
+#define glVertexAttribI1iEXT GLEW_GET_FUN(__glewVertexAttribI1iEXT)
+#define glVertexAttribI1ivEXT GLEW_GET_FUN(__glewVertexAttribI1ivEXT)
+#define glVertexAttribI1uiEXT GLEW_GET_FUN(__glewVertexAttribI1uiEXT)
+#define glVertexAttribI1uivEXT GLEW_GET_FUN(__glewVertexAttribI1uivEXT)
+#define glVertexAttribI2iEXT GLEW_GET_FUN(__glewVertexAttribI2iEXT)
+#define glVertexAttribI2ivEXT GLEW_GET_FUN(__glewVertexAttribI2ivEXT)
+#define glVertexAttribI2uiEXT GLEW_GET_FUN(__glewVertexAttribI2uiEXT)
+#define glVertexAttribI2uivEXT GLEW_GET_FUN(__glewVertexAttribI2uivEXT)
+#define glVertexAttribI3iEXT GLEW_GET_FUN(__glewVertexAttribI3iEXT)
+#define glVertexAttribI3ivEXT GLEW_GET_FUN(__glewVertexAttribI3ivEXT)
+#define glVertexAttribI3uiEXT GLEW_GET_FUN(__glewVertexAttribI3uiEXT)
+#define glVertexAttribI3uivEXT GLEW_GET_FUN(__glewVertexAttribI3uivEXT)
+#define glVertexAttribI4bvEXT GLEW_GET_FUN(__glewVertexAttribI4bvEXT)
+#define glVertexAttribI4iEXT GLEW_GET_FUN(__glewVertexAttribI4iEXT)
+#define glVertexAttribI4ivEXT GLEW_GET_FUN(__glewVertexAttribI4ivEXT)
+#define glVertexAttribI4svEXT GLEW_GET_FUN(__glewVertexAttribI4svEXT)
+#define glVertexAttribI4ubvEXT GLEW_GET_FUN(__glewVertexAttribI4ubvEXT)
+#define glVertexAttribI4uiEXT GLEW_GET_FUN(__glewVertexAttribI4uiEXT)
+#define glVertexAttribI4uivEXT GLEW_GET_FUN(__glewVertexAttribI4uivEXT)
+#define glVertexAttribI4usvEXT GLEW_GET_FUN(__glewVertexAttribI4usvEXT)
+#define glVertexAttribIPointerEXT GLEW_GET_FUN(__glewVertexAttribIPointerEXT)
+
+#define GLEW_EXT_gpu_shader4 GLEW_GET_VAR(__GLEW_EXT_gpu_shader4)
+
+#endif /* GL_EXT_gpu_shader4 */
+
+/* ---------------------------- GL_EXT_histogram --------------------------- */
+
+#ifndef GL_EXT_histogram
+#define GL_EXT_histogram 1
+
+#define GL_HISTOGRAM_EXT 0x8024
+#define GL_PROXY_HISTOGRAM_EXT 0x8025
+#define GL_HISTOGRAM_WIDTH_EXT 0x8026
+#define GL_HISTOGRAM_FORMAT_EXT 0x8027
+#define GL_HISTOGRAM_RED_SIZE_EXT 0x8028
+#define GL_HISTOGRAM_GREEN_SIZE_EXT 0x8029
+#define GL_HISTOGRAM_BLUE_SIZE_EXT 0x802A
+#define GL_HISTOGRAM_ALPHA_SIZE_EXT 0x802B
+#define GL_HISTOGRAM_LUMINANCE_SIZE_EXT 0x802C
+#define GL_HISTOGRAM_SINK_EXT 0x802D
+#define GL_MINMAX_EXT 0x802E
+#define GL_MINMAX_FORMAT_EXT 0x802F
+#define GL_MINMAX_SINK_EXT 0x8030
+
+typedef void (GLAPIENTRY * PFNGLGETHISTOGRAMEXTPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, void* values);
+typedef void (GLAPIENTRY * PFNGLGETHISTOGRAMPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETHISTOGRAMPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETMINMAXEXTPROC) (GLenum target, GLboolean reset, GLenum format, GLenum type, void* values);
+typedef void (GLAPIENTRY * PFNGLGETMINMAXPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETMINMAXPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLHISTOGRAMEXTPROC) (GLenum target, GLsizei width, GLenum internalformat, GLboolean sink);
+typedef void (GLAPIENTRY * PFNGLMINMAXEXTPROC) (GLenum target, GLenum internalformat, GLboolean sink);
+typedef void (GLAPIENTRY * PFNGLRESETHISTOGRAMEXTPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLRESETMINMAXEXTPROC) (GLenum target);
+
+#define glGetHistogramEXT GLEW_GET_FUN(__glewGetHistogramEXT)
+#define glGetHistogramParameterfvEXT GLEW_GET_FUN(__glewGetHistogramParameterfvEXT)
+#define glGetHistogramParameterivEXT GLEW_GET_FUN(__glewGetHistogramParameterivEXT)
+#define glGetMinmaxEXT GLEW_GET_FUN(__glewGetMinmaxEXT)
+#define glGetMinmaxParameterfvEXT GLEW_GET_FUN(__glewGetMinmaxParameterfvEXT)
+#define glGetMinmaxParameterivEXT GLEW_GET_FUN(__glewGetMinmaxParameterivEXT)
+#define glHistogramEXT GLEW_GET_FUN(__glewHistogramEXT)
+#define glMinmaxEXT GLEW_GET_FUN(__glewMinmaxEXT)
+#define glResetHistogramEXT GLEW_GET_FUN(__glewResetHistogramEXT)
+#define glResetMinmaxEXT GLEW_GET_FUN(__glewResetMinmaxEXT)
+
+#define GLEW_EXT_histogram GLEW_GET_VAR(__GLEW_EXT_histogram)
+
+#endif /* GL_EXT_histogram */
+
+/* ----------------------- GL_EXT_index_array_formats ---------------------- */
+
+#ifndef GL_EXT_index_array_formats
+#define GL_EXT_index_array_formats 1
+
+#define GLEW_EXT_index_array_formats GLEW_GET_VAR(__GLEW_EXT_index_array_formats)
+
+#endif /* GL_EXT_index_array_formats */
+
+/* --------------------------- GL_EXT_index_func --------------------------- */
+
+#ifndef GL_EXT_index_func
+#define GL_EXT_index_func 1
+
+typedef void (GLAPIENTRY * PFNGLINDEXFUNCEXTPROC) (GLenum func, GLfloat ref);
+
+#define glIndexFuncEXT GLEW_GET_FUN(__glewIndexFuncEXT)
+
+#define GLEW_EXT_index_func GLEW_GET_VAR(__GLEW_EXT_index_func)
+
+#endif /* GL_EXT_index_func */
+
+/* ------------------------- GL_EXT_index_material ------------------------- */
+
+#ifndef GL_EXT_index_material
+#define GL_EXT_index_material 1
+
+typedef void (GLAPIENTRY * PFNGLINDEXMATERIALEXTPROC) (GLenum face, GLenum mode);
+
+#define glIndexMaterialEXT GLEW_GET_FUN(__glewIndexMaterialEXT)
+
+#define GLEW_EXT_index_material GLEW_GET_VAR(__GLEW_EXT_index_material)
+
+#endif /* GL_EXT_index_material */
+
+/* -------------------------- GL_EXT_index_texture ------------------------- */
+
+#ifndef GL_EXT_index_texture
+#define GL_EXT_index_texture 1
+
+#define GLEW_EXT_index_texture GLEW_GET_VAR(__GLEW_EXT_index_texture)
+
+#endif /* GL_EXT_index_texture */
+
+/* -------------------------- GL_EXT_light_texture ------------------------- */
+
+#ifndef GL_EXT_light_texture
+#define GL_EXT_light_texture 1
+
+#define GL_FRAGMENT_MATERIAL_EXT 0x8349
+#define GL_FRAGMENT_NORMAL_EXT 0x834A
+#define GL_FRAGMENT_COLOR_EXT 0x834C
+#define GL_ATTENUATION_EXT 0x834D
+#define GL_SHADOW_ATTENUATION_EXT 0x834E
+#define GL_TEXTURE_APPLICATION_MODE_EXT 0x834F
+#define GL_TEXTURE_LIGHT_EXT 0x8350
+#define GL_TEXTURE_MATERIAL_FACE_EXT 0x8351
+#define GL_TEXTURE_MATERIAL_PARAMETER_EXT 0x8352
+#define GL_FRAGMENT_DEPTH_EXT 0x8452
+
+typedef void (GLAPIENTRY * PFNGLAPPLYTEXTUREEXTPROC) (GLenum mode);
+typedef void (GLAPIENTRY * PFNGLTEXTURELIGHTEXTPROC) (GLenum pname);
+typedef void (GLAPIENTRY * PFNGLTEXTUREMATERIALEXTPROC) (GLenum face, GLenum mode);
+
+#define glApplyTextureEXT GLEW_GET_FUN(__glewApplyTextureEXT)
+#define glTextureLightEXT GLEW_GET_FUN(__glewTextureLightEXT)
+#define glTextureMaterialEXT GLEW_GET_FUN(__glewTextureMaterialEXT)
+
+#define GLEW_EXT_light_texture GLEW_GET_VAR(__GLEW_EXT_light_texture)
+
+#endif /* GL_EXT_light_texture */
+
+/* ------------------------- GL_EXT_misc_attribute ------------------------- */
+
+#ifndef GL_EXT_misc_attribute
+#define GL_EXT_misc_attribute 1
+
+#define GLEW_EXT_misc_attribute GLEW_GET_VAR(__GLEW_EXT_misc_attribute)
+
+#endif /* GL_EXT_misc_attribute */
+
+/* ------------------------ GL_EXT_multi_draw_arrays ----------------------- */
+
+#ifndef GL_EXT_multi_draw_arrays
+#define GL_EXT_multi_draw_arrays 1
+
+typedef void (GLAPIENTRY * PFNGLMULTIDRAWARRAYSEXTPROC) (GLenum mode, GLint* first, GLsizei *count, GLsizei primcount);
+typedef void (GLAPIENTRY * PFNGLMULTIDRAWELEMENTSEXTPROC) (GLenum mode, GLsizei* count, GLenum type, const GLvoid **indices, GLsizei primcount);
+
+#define glMultiDrawArraysEXT GLEW_GET_FUN(__glewMultiDrawArraysEXT)
+#define glMultiDrawElementsEXT GLEW_GET_FUN(__glewMultiDrawElementsEXT)
+
+#define GLEW_EXT_multi_draw_arrays GLEW_GET_VAR(__GLEW_EXT_multi_draw_arrays)
+
+#endif /* GL_EXT_multi_draw_arrays */
+
+/* --------------------------- GL_EXT_multisample -------------------------- */
+
+#ifndef GL_EXT_multisample
+#define GL_EXT_multisample 1
+
+#define GL_MULTISAMPLE_EXT 0x809D
+#define GL_SAMPLE_ALPHA_TO_MASK_EXT 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE_EXT 0x809F
+#define GL_SAMPLE_MASK_EXT 0x80A0
+#define GL_1PASS_EXT 0x80A1
+#define GL_2PASS_0_EXT 0x80A2
+#define GL_2PASS_1_EXT 0x80A3
+#define GL_4PASS_0_EXT 0x80A4
+#define GL_4PASS_1_EXT 0x80A5
+#define GL_4PASS_2_EXT 0x80A6
+#define GL_4PASS_3_EXT 0x80A7
+#define GL_SAMPLE_BUFFERS_EXT 0x80A8
+#define GL_SAMPLES_EXT 0x80A9
+#define GL_SAMPLE_MASK_VALUE_EXT 0x80AA
+#define GL_SAMPLE_MASK_INVERT_EXT 0x80AB
+#define GL_SAMPLE_PATTERN_EXT 0x80AC
+#define GL_MULTISAMPLE_BIT_EXT 0x20000000
+
+typedef void (GLAPIENTRY * PFNGLSAMPLEMASKEXTPROC) (GLclampf value, GLboolean invert);
+typedef void (GLAPIENTRY * PFNGLSAMPLEPATTERNEXTPROC) (GLenum pattern);
+
+#define glSampleMaskEXT GLEW_GET_FUN(__glewSampleMaskEXT)
+#define glSamplePatternEXT GLEW_GET_FUN(__glewSamplePatternEXT)
+
+#define GLEW_EXT_multisample GLEW_GET_VAR(__GLEW_EXT_multisample)
+
+#endif /* GL_EXT_multisample */
+
+/* ---------------------- GL_EXT_packed_depth_stencil ---------------------- */
+
+#ifndef GL_EXT_packed_depth_stencil
+#define GL_EXT_packed_depth_stencil 1
+
+#define GL_DEPTH_STENCIL_EXT 0x84F9
+#define GL_UNSIGNED_INT_24_8_EXT 0x84FA
+#define GL_DEPTH24_STENCIL8_EXT 0x88F0
+#define GL_TEXTURE_STENCIL_SIZE_EXT 0x88F1
+
+#define GLEW_EXT_packed_depth_stencil GLEW_GET_VAR(__GLEW_EXT_packed_depth_stencil)
+
+#endif /* GL_EXT_packed_depth_stencil */
+
+/* -------------------------- GL_EXT_packed_float -------------------------- */
+
+#ifndef GL_EXT_packed_float
+#define GL_EXT_packed_float 1
+
+#define GL_R11F_G11F_B10F_EXT 0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV_EXT 0x8C3B
+#define GL_RGBA_SIGNED_COMPONENTS_EXT 0x8C3C
+
+#define GLEW_EXT_packed_float GLEW_GET_VAR(__GLEW_EXT_packed_float)
+
+#endif /* GL_EXT_packed_float */
+
+/* -------------------------- GL_EXT_packed_pixels ------------------------- */
+
+#ifndef GL_EXT_packed_pixels
+#define GL_EXT_packed_pixels 1
+
+#define GL_UNSIGNED_BYTE_3_3_2_EXT 0x8032
+#define GL_UNSIGNED_SHORT_4_4_4_4_EXT 0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1_EXT 0x8034
+#define GL_UNSIGNED_INT_8_8_8_8_EXT 0x8035
+#define GL_UNSIGNED_INT_10_10_10_2_EXT 0x8036
+
+#define GLEW_EXT_packed_pixels GLEW_GET_VAR(__GLEW_EXT_packed_pixels)
+
+#endif /* GL_EXT_packed_pixels */
+
+/* ------------------------ GL_EXT_paletted_texture ------------------------ */
+
+#ifndef GL_EXT_paletted_texture
+#define GL_EXT_paletted_texture 1
+
+#define GL_TEXTURE_1D 0x0DE0
+#define GL_TEXTURE_2D 0x0DE1
+#define GL_PROXY_TEXTURE_1D 0x8063
+#define GL_PROXY_TEXTURE_2D 0x8064
+#define GL_TEXTURE_3D_EXT 0x806F
+#define GL_PROXY_TEXTURE_3D_EXT 0x8070
+#define GL_COLOR_TABLE_FORMAT_EXT 0x80D8
+#define GL_COLOR_TABLE_WIDTH_EXT 0x80D9
+#define GL_COLOR_TABLE_RED_SIZE_EXT 0x80DA
+#define GL_COLOR_TABLE_GREEN_SIZE_EXT 0x80DB
+#define GL_COLOR_TABLE_BLUE_SIZE_EXT 0x80DC
+#define GL_COLOR_TABLE_ALPHA_SIZE_EXT 0x80DD
+#define GL_COLOR_TABLE_LUMINANCE_SIZE_EXT 0x80DE
+#define GL_COLOR_TABLE_INTENSITY_SIZE_EXT 0x80DF
+#define GL_COLOR_INDEX1_EXT 0x80E2
+#define GL_COLOR_INDEX2_EXT 0x80E3
+#define GL_COLOR_INDEX4_EXT 0x80E4
+#define GL_COLOR_INDEX8_EXT 0x80E5
+#define GL_COLOR_INDEX12_EXT 0x80E6
+#define GL_COLOR_INDEX16_EXT 0x80E7
+#define GL_TEXTURE_INDEX_SIZE_EXT 0x80ED
+#define GL_TEXTURE_CUBE_MAP_ARB 0x8513
+#define GL_PROXY_TEXTURE_CUBE_MAP_ARB 0x851B
+
+typedef void (GLAPIENTRY * PFNGLCOLORTABLEEXTPROC) (GLenum target, GLenum internalFormat, GLsizei width, GLenum format, GLenum type, const void* data);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEEXTPROC) (GLenum target, GLenum format, GLenum type, void* data);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEPARAMETERFVEXTPROC) (GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEPARAMETERIVEXTPROC) (GLenum target, GLenum pname, GLint* params);
+
+#define glColorTableEXT GLEW_GET_FUN(__glewColorTableEXT)
+#define glGetColorTableEXT GLEW_GET_FUN(__glewGetColorTableEXT)
+#define glGetColorTableParameterfvEXT GLEW_GET_FUN(__glewGetColorTableParameterfvEXT)
+#define glGetColorTableParameterivEXT GLEW_GET_FUN(__glewGetColorTableParameterivEXT)
+
+#define GLEW_EXT_paletted_texture GLEW_GET_VAR(__GLEW_EXT_paletted_texture)
+
+#endif /* GL_EXT_paletted_texture */
+
+/* ----------------------- GL_EXT_pixel_buffer_object ---------------------- */
+
+#ifndef GL_EXT_pixel_buffer_object
+#define GL_EXT_pixel_buffer_object 1
+
+#define GL_PIXEL_PACK_BUFFER_EXT 0x88EB
+#define GL_PIXEL_UNPACK_BUFFER_EXT 0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING_EXT 0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING_EXT 0x88EF
+
+#define GLEW_EXT_pixel_buffer_object GLEW_GET_VAR(__GLEW_EXT_pixel_buffer_object)
+
+#endif /* GL_EXT_pixel_buffer_object */
+
+/* ------------------------- GL_EXT_pixel_transform ------------------------ */
+
+#ifndef GL_EXT_pixel_transform
+#define GL_EXT_pixel_transform 1
+
+#define GL_PIXEL_TRANSFORM_2D_EXT 0x8330
+#define GL_PIXEL_MAG_FILTER_EXT 0x8331
+#define GL_PIXEL_MIN_FILTER_EXT 0x8332
+#define GL_PIXEL_CUBIC_WEIGHT_EXT 0x8333
+#define GL_CUBIC_EXT 0x8334
+#define GL_AVERAGE_EXT 0x8335
+#define GL_PIXEL_TRANSFORM_2D_STACK_DEPTH_EXT 0x8336
+#define GL_MAX_PIXEL_TRANSFORM_2D_STACK_DEPTH_EXT 0x8337
+#define GL_PIXEL_TRANSFORM_2D_MATRIX_EXT 0x8338
+
+typedef void (GLAPIENTRY * PFNGLGETPIXELTRANSFORMPARAMETERFVEXTPROC) (GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETPIXELTRANSFORMPARAMETERIVEXTPROC) (GLenum target, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLPIXELTRANSFORMPARAMETERFEXTPROC) (GLenum target, GLenum pname, const GLfloat param);
+typedef void (GLAPIENTRY * PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC) (GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLPIXELTRANSFORMPARAMETERIEXTPROC) (GLenum target, GLenum pname, const GLint param);
+typedef void (GLAPIENTRY * PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC) (GLenum target, GLenum pname, const GLint* params);
+
+#define glGetPixelTransformParameterfvEXT GLEW_GET_FUN(__glewGetPixelTransformParameterfvEXT)
+#define glGetPixelTransformParameterivEXT GLEW_GET_FUN(__glewGetPixelTransformParameterivEXT)
+#define glPixelTransformParameterfEXT GLEW_GET_FUN(__glewPixelTransformParameterfEXT)
+#define glPixelTransformParameterfvEXT GLEW_GET_FUN(__glewPixelTransformParameterfvEXT)
+#define glPixelTransformParameteriEXT GLEW_GET_FUN(__glewPixelTransformParameteriEXT)
+#define glPixelTransformParameterivEXT GLEW_GET_FUN(__glewPixelTransformParameterivEXT)
+
+#define GLEW_EXT_pixel_transform GLEW_GET_VAR(__GLEW_EXT_pixel_transform)
+
+#endif /* GL_EXT_pixel_transform */
+
+/* ------------------- GL_EXT_pixel_transform_color_table ------------------ */
+
+#ifndef GL_EXT_pixel_transform_color_table
+#define GL_EXT_pixel_transform_color_table 1
+
+#define GLEW_EXT_pixel_transform_color_table GLEW_GET_VAR(__GLEW_EXT_pixel_transform_color_table)
+
+#endif /* GL_EXT_pixel_transform_color_table */
+
+/* ------------------------ GL_EXT_point_parameters ------------------------ */
+
+#ifndef GL_EXT_point_parameters
+#define GL_EXT_point_parameters 1
+
+#define GL_POINT_SIZE_MIN_EXT 0x8126
+#define GL_POINT_SIZE_MAX_EXT 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE_EXT 0x8128
+#define GL_DISTANCE_ATTENUATION_EXT 0x8129
+
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERFEXTPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERFVEXTPROC) (GLenum pname, GLfloat* params);
+
+#define glPointParameterfEXT GLEW_GET_FUN(__glewPointParameterfEXT)
+#define glPointParameterfvEXT GLEW_GET_FUN(__glewPointParameterfvEXT)
+
+#define GLEW_EXT_point_parameters GLEW_GET_VAR(__GLEW_EXT_point_parameters)
+
+#endif /* GL_EXT_point_parameters */
+
+/* ------------------------- GL_EXT_polygon_offset ------------------------- */
+
+#ifndef GL_EXT_polygon_offset
+#define GL_EXT_polygon_offset 1
+
+#define GL_POLYGON_OFFSET_EXT 0x8037
+#define GL_POLYGON_OFFSET_FACTOR_EXT 0x8038
+#define GL_POLYGON_OFFSET_BIAS_EXT 0x8039
+
+typedef void (GLAPIENTRY * PFNGLPOLYGONOFFSETEXTPROC) (GLfloat factor, GLfloat bias);
+
+#define glPolygonOffsetEXT GLEW_GET_FUN(__glewPolygonOffsetEXT)
+
+#define GLEW_EXT_polygon_offset GLEW_GET_VAR(__GLEW_EXT_polygon_offset)
+
+#endif /* GL_EXT_polygon_offset */
+
+/* ------------------------- GL_EXT_rescale_normal ------------------------- */
+
+#ifndef GL_EXT_rescale_normal
+#define GL_EXT_rescale_normal 1
+
+#define GL_RESCALE_NORMAL_EXT 0x803A
+
+#define GLEW_EXT_rescale_normal GLEW_GET_VAR(__GLEW_EXT_rescale_normal)
+
+#endif /* GL_EXT_rescale_normal */
+
+/* -------------------------- GL_EXT_scene_marker -------------------------- */
+
+#ifndef GL_EXT_scene_marker
+#define GL_EXT_scene_marker 1
+
+typedef void (GLAPIENTRY * PFNGLBEGINSCENEEXTPROC) (void);
+typedef void (GLAPIENTRY * PFNGLENDSCENEEXTPROC) (void);
+
+#define glBeginSceneEXT GLEW_GET_FUN(__glewBeginSceneEXT)
+#define glEndSceneEXT GLEW_GET_FUN(__glewEndSceneEXT)
+
+#define GLEW_EXT_scene_marker GLEW_GET_VAR(__GLEW_EXT_scene_marker)
+
+#endif /* GL_EXT_scene_marker */
+
+/* ------------------------- GL_EXT_secondary_color ------------------------ */
+
+#ifndef GL_EXT_secondary_color
+#define GL_EXT_secondary_color 1
+
+#define GL_COLOR_SUM_EXT 0x8458
+#define GL_CURRENT_SECONDARY_COLOR_EXT 0x8459
+#define GL_SECONDARY_COLOR_ARRAY_SIZE_EXT 0x845A
+#define GL_SECONDARY_COLOR_ARRAY_TYPE_EXT 0x845B
+#define GL_SECONDARY_COLOR_ARRAY_STRIDE_EXT 0x845C
+#define GL_SECONDARY_COLOR_ARRAY_POINTER_EXT 0x845D
+#define GL_SECONDARY_COLOR_ARRAY_EXT 0x845E
+
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3BEXTPROC) (GLbyte red, GLbyte green, GLbyte blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3BVEXTPROC) (const GLbyte *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3DEXTPROC) (GLdouble red, GLdouble green, GLdouble blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3DVEXTPROC) (const GLdouble *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3FEXTPROC) (GLfloat red, GLfloat green, GLfloat blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3FVEXTPROC) (const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3IEXTPROC) (GLint red, GLint green, GLint blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3IVEXTPROC) (const GLint *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3SEXTPROC) (GLshort red, GLshort green, GLshort blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3SVEXTPROC) (const GLshort *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UBEXTPROC) (GLubyte red, GLubyte green, GLubyte blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UBVEXTPROC) (const GLubyte *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UIEXTPROC) (GLuint red, GLuint green, GLuint blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3UIVEXTPROC) (const GLuint *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3USEXTPROC) (GLushort red, GLushort green, GLushort blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3USVEXTPROC) (const GLushort *v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLORPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLvoid *pointer);
+
+#define glSecondaryColor3bEXT GLEW_GET_FUN(__glewSecondaryColor3bEXT)
+#define glSecondaryColor3bvEXT GLEW_GET_FUN(__glewSecondaryColor3bvEXT)
+#define glSecondaryColor3dEXT GLEW_GET_FUN(__glewSecondaryColor3dEXT)
+#define glSecondaryColor3dvEXT GLEW_GET_FUN(__glewSecondaryColor3dvEXT)
+#define glSecondaryColor3fEXT GLEW_GET_FUN(__glewSecondaryColor3fEXT)
+#define glSecondaryColor3fvEXT GLEW_GET_FUN(__glewSecondaryColor3fvEXT)
+#define glSecondaryColor3iEXT GLEW_GET_FUN(__glewSecondaryColor3iEXT)
+#define glSecondaryColor3ivEXT GLEW_GET_FUN(__glewSecondaryColor3ivEXT)
+#define glSecondaryColor3sEXT GLEW_GET_FUN(__glewSecondaryColor3sEXT)
+#define glSecondaryColor3svEXT GLEW_GET_FUN(__glewSecondaryColor3svEXT)
+#define glSecondaryColor3ubEXT GLEW_GET_FUN(__glewSecondaryColor3ubEXT)
+#define glSecondaryColor3ubvEXT GLEW_GET_FUN(__glewSecondaryColor3ubvEXT)
+#define glSecondaryColor3uiEXT GLEW_GET_FUN(__glewSecondaryColor3uiEXT)
+#define glSecondaryColor3uivEXT GLEW_GET_FUN(__glewSecondaryColor3uivEXT)
+#define glSecondaryColor3usEXT GLEW_GET_FUN(__glewSecondaryColor3usEXT)
+#define glSecondaryColor3usvEXT GLEW_GET_FUN(__glewSecondaryColor3usvEXT)
+#define glSecondaryColorPointerEXT GLEW_GET_FUN(__glewSecondaryColorPointerEXT)
+
+#define GLEW_EXT_secondary_color GLEW_GET_VAR(__GLEW_EXT_secondary_color)
+
+#endif /* GL_EXT_secondary_color */
+
+/* --------------------- GL_EXT_separate_specular_color -------------------- */
+
+#ifndef GL_EXT_separate_specular_color
+#define GL_EXT_separate_specular_color 1
+
+#define GL_LIGHT_MODEL_COLOR_CONTROL_EXT 0x81F8
+#define GL_SINGLE_COLOR_EXT 0x81F9
+#define GL_SEPARATE_SPECULAR_COLOR_EXT 0x81FA
+
+#define GLEW_EXT_separate_specular_color GLEW_GET_VAR(__GLEW_EXT_separate_specular_color)
+
+#endif /* GL_EXT_separate_specular_color */
+
+/* -------------------------- GL_EXT_shadow_funcs -------------------------- */
+
+#ifndef GL_EXT_shadow_funcs
+#define GL_EXT_shadow_funcs 1
+
+#define GLEW_EXT_shadow_funcs GLEW_GET_VAR(__GLEW_EXT_shadow_funcs)
+
+#endif /* GL_EXT_shadow_funcs */
+
+/* --------------------- GL_EXT_shared_texture_palette --------------------- */
+
+#ifndef GL_EXT_shared_texture_palette
+#define GL_EXT_shared_texture_palette 1
+
+#define GL_SHARED_TEXTURE_PALETTE_EXT 0x81FB
+
+#define GLEW_EXT_shared_texture_palette GLEW_GET_VAR(__GLEW_EXT_shared_texture_palette)
+
+#endif /* GL_EXT_shared_texture_palette */
+
+/* ------------------------ GL_EXT_stencil_clear_tag ----------------------- */
+
+#ifndef GL_EXT_stencil_clear_tag
+#define GL_EXT_stencil_clear_tag 1
+
+#define GL_STENCIL_TAG_BITS_EXT 0x88F2
+#define GL_STENCIL_CLEAR_TAG_VALUE_EXT 0x88F3
+
+#define GLEW_EXT_stencil_clear_tag GLEW_GET_VAR(__GLEW_EXT_stencil_clear_tag)
+
+#endif /* GL_EXT_stencil_clear_tag */
+
+/* ------------------------ GL_EXT_stencil_two_side ------------------------ */
+
+#ifndef GL_EXT_stencil_two_side
+#define GL_EXT_stencil_two_side 1
+
+#define GL_STENCIL_TEST_TWO_SIDE_EXT 0x8910
+#define GL_ACTIVE_STENCIL_FACE_EXT 0x8911
+
+typedef void (GLAPIENTRY * PFNGLACTIVESTENCILFACEEXTPROC) (GLenum face);
+
+#define glActiveStencilFaceEXT GLEW_GET_FUN(__glewActiveStencilFaceEXT)
+
+#define GLEW_EXT_stencil_two_side GLEW_GET_VAR(__GLEW_EXT_stencil_two_side)
+
+#endif /* GL_EXT_stencil_two_side */
+
+/* -------------------------- GL_EXT_stencil_wrap -------------------------- */
+
+#ifndef GL_EXT_stencil_wrap
+#define GL_EXT_stencil_wrap 1
+
+#define GL_INCR_WRAP_EXT 0x8507
+#define GL_DECR_WRAP_EXT 0x8508
+
+#define GLEW_EXT_stencil_wrap GLEW_GET_VAR(__GLEW_EXT_stencil_wrap)
+
+#endif /* GL_EXT_stencil_wrap */
+
+/* --------------------------- GL_EXT_subtexture --------------------------- */
+
+#ifndef GL_EXT_subtexture
+#define GL_EXT_subtexture 1
+
+typedef void (GLAPIENTRY * PFNGLTEXSUBIMAGE1DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXSUBIMAGE2DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXSUBIMAGE3DEXTPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
+
+#define glTexSubImage1DEXT GLEW_GET_FUN(__glewTexSubImage1DEXT)
+#define glTexSubImage2DEXT GLEW_GET_FUN(__glewTexSubImage2DEXT)
+#define glTexSubImage3DEXT GLEW_GET_FUN(__glewTexSubImage3DEXT)
+
+#define GLEW_EXT_subtexture GLEW_GET_VAR(__GLEW_EXT_subtexture)
+
+#endif /* GL_EXT_subtexture */
+
+/* ----------------------------- GL_EXT_texture ---------------------------- */
+
+#ifndef GL_EXT_texture
+#define GL_EXT_texture 1
+
+#define GL_ALPHA4_EXT 0x803B
+#define GL_ALPHA8_EXT 0x803C
+#define GL_ALPHA12_EXT 0x803D
+#define GL_ALPHA16_EXT 0x803E
+#define GL_LUMINANCE4_EXT 0x803F
+#define GL_LUMINANCE8_EXT 0x8040
+#define GL_LUMINANCE12_EXT 0x8041
+#define GL_LUMINANCE16_EXT 0x8042
+#define GL_LUMINANCE4_ALPHA4_EXT 0x8043
+#define GL_LUMINANCE6_ALPHA2_EXT 0x8044
+#define GL_LUMINANCE8_ALPHA8_EXT 0x8045
+#define GL_LUMINANCE12_ALPHA4_EXT 0x8046
+#define GL_LUMINANCE12_ALPHA12_EXT 0x8047
+#define GL_LUMINANCE16_ALPHA16_EXT 0x8048
+#define GL_INTENSITY_EXT 0x8049
+#define GL_INTENSITY4_EXT 0x804A
+#define GL_INTENSITY8_EXT 0x804B
+#define GL_INTENSITY12_EXT 0x804C
+#define GL_INTENSITY16_EXT 0x804D
+#define GL_RGB2_EXT 0x804E
+#define GL_RGB4_EXT 0x804F
+#define GL_RGB5_EXT 0x8050
+#define GL_RGB8_EXT 0x8051
+#define GL_RGB10_EXT 0x8052
+#define GL_RGB12_EXT 0x8053
+#define GL_RGB16_EXT 0x8054
+#define GL_RGBA2_EXT 0x8055
+#define GL_RGBA4_EXT 0x8056
+#define GL_RGB5_A1_EXT 0x8057
+#define GL_RGBA8_EXT 0x8058
+#define GL_RGB10_A2_EXT 0x8059
+#define GL_RGBA12_EXT 0x805A
+#define GL_RGBA16_EXT 0x805B
+#define GL_TEXTURE_RED_SIZE_EXT 0x805C
+#define GL_TEXTURE_GREEN_SIZE_EXT 0x805D
+#define GL_TEXTURE_BLUE_SIZE_EXT 0x805E
+#define GL_TEXTURE_ALPHA_SIZE_EXT 0x805F
+#define GL_TEXTURE_LUMINANCE_SIZE_EXT 0x8060
+#define GL_TEXTURE_INTENSITY_SIZE_EXT 0x8061
+#define GL_REPLACE_EXT 0x8062
+#define GL_PROXY_TEXTURE_1D_EXT 0x8063
+#define GL_PROXY_TEXTURE_2D_EXT 0x8064
+
+#define GLEW_EXT_texture GLEW_GET_VAR(__GLEW_EXT_texture)
+
+#endif /* GL_EXT_texture */
+
+/* ---------------------------- GL_EXT_texture3D --------------------------- */
+
+#ifndef GL_EXT_texture3D
+#define GL_EXT_texture3D 1
+
+#define GL_PACK_SKIP_IMAGES_EXT 0x806B
+#define GL_PACK_IMAGE_HEIGHT_EXT 0x806C
+#define GL_UNPACK_SKIP_IMAGES_EXT 0x806D
+#define GL_UNPACK_IMAGE_HEIGHT_EXT 0x806E
+#define GL_TEXTURE_3D_EXT 0x806F
+#define GL_PROXY_TEXTURE_3D_EXT 0x8070
+#define GL_TEXTURE_DEPTH_EXT 0x8071
+#define GL_TEXTURE_WRAP_R_EXT 0x8072
+#define GL_MAX_3D_TEXTURE_SIZE_EXT 0x8073
+
+typedef void (GLAPIENTRY * PFNGLTEXIMAGE3DEXTPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void* pixels);
+
+#define glTexImage3DEXT GLEW_GET_FUN(__glewTexImage3DEXT)
+
+#define GLEW_EXT_texture3D GLEW_GET_VAR(__GLEW_EXT_texture3D)
+
+#endif /* GL_EXT_texture3D */
+
+/* -------------------------- GL_EXT_texture_array ------------------------- */
+
+#ifndef GL_EXT_texture_array
+#define GL_EXT_texture_array 1
+
+#define GL_COMPARE_REF_DEPTH_TO_TEXTURE_EXT 0x884E
+#define GL_MAX_ARRAY_TEXTURE_LAYERS_EXT 0x88FF
+#define GL_TEXTURE_1D_ARRAY_EXT 0x8C18
+#define GL_PROXY_TEXTURE_1D_ARRAY_EXT 0x8C19
+#define GL_TEXTURE_2D_ARRAY_EXT 0x8C1A
+#define GL_PROXY_TEXTURE_2D_ARRAY_EXT 0x8C1B
+#define GL_TEXTURE_BINDING_1D_ARRAY_EXT 0x8C1C
+#define GL_TEXTURE_BINDING_2D_ARRAY_EXT 0x8C1D
+
+#define GLEW_EXT_texture_array GLEW_GET_VAR(__GLEW_EXT_texture_array)
+
+#endif /* GL_EXT_texture_array */
+
+/* ---------------------- GL_EXT_texture_buffer_object --------------------- */
+
+#ifndef GL_EXT_texture_buffer_object
+#define GL_EXT_texture_buffer_object 1
+
+#define GL_TEXTURE_BUFFER_EXT 0x8C2A
+#define GL_MAX_TEXTURE_BUFFER_SIZE_EXT 0x8C2B
+#define GL_TEXTURE_BINDING_BUFFER_EXT 0x8C2C
+#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING_EXT 0x8C2D
+#define GL_TEXTURE_BUFFER_FORMAT_EXT 0x8C2E
+
+typedef void (GLAPIENTRY * PFNGLTEXBUFFEREXTPROC) (GLenum target, GLenum internalformat, GLuint buffer);
+
+#define glTexBufferEXT GLEW_GET_FUN(__glewTexBufferEXT)
+
+#define GLEW_EXT_texture_buffer_object GLEW_GET_VAR(__GLEW_EXT_texture_buffer_object)
+
+#endif /* GL_EXT_texture_buffer_object */
+
+/* -------------------- GL_EXT_texture_compression_dxt1 -------------------- */
+
+#ifndef GL_EXT_texture_compression_dxt1
+#define GL_EXT_texture_compression_dxt1 1
+
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
+
+#define GLEW_EXT_texture_compression_dxt1 GLEW_GET_VAR(__GLEW_EXT_texture_compression_dxt1)
+
+#endif /* GL_EXT_texture_compression_dxt1 */
+
+/* -------------------- GL_EXT_texture_compression_latc -------------------- */
+
+#ifndef GL_EXT_texture_compression_latc
+#define GL_EXT_texture_compression_latc 1
+
+#define GL_COMPRESSED_LUMINANCE_LATC1_EXT 0x8C70
+#define GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT 0x8C71
+#define GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT 0x8C72
+#define GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT 0x8C73
+
+#define GLEW_EXT_texture_compression_latc GLEW_GET_VAR(__GLEW_EXT_texture_compression_latc)
+
+#endif /* GL_EXT_texture_compression_latc */
+
+/* -------------------- GL_EXT_texture_compression_rgtc -------------------- */
+
+#ifndef GL_EXT_texture_compression_rgtc
+#define GL_EXT_texture_compression_rgtc 1
+
+#define GL_COMPRESSED_RED_RGTC1_EXT 0x8DBB
+#define GL_COMPRESSED_SIGNED_RED_RGTC1_EXT 0x8DBC
+#define GL_COMPRESSED_RED_GREEN_RGTC2_EXT 0x8DBD
+#define GL_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT 0x8DBE
+
+#define GLEW_EXT_texture_compression_rgtc GLEW_GET_VAR(__GLEW_EXT_texture_compression_rgtc)
+
+#endif /* GL_EXT_texture_compression_rgtc */
+
+/* -------------------- GL_EXT_texture_compression_s3tc -------------------- */
+
+#ifndef GL_EXT_texture_compression_s3tc
+#define GL_EXT_texture_compression_s3tc 1
+
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
+#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
+#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
+
+#define GLEW_EXT_texture_compression_s3tc GLEW_GET_VAR(__GLEW_EXT_texture_compression_s3tc)
+
+#endif /* GL_EXT_texture_compression_s3tc */
+
+/* ------------------------ GL_EXT_texture_cube_map ------------------------ */
+
+#ifndef GL_EXT_texture_cube_map
+#define GL_EXT_texture_cube_map 1
+
+#define GL_NORMAL_MAP_EXT 0x8511
+#define GL_REFLECTION_MAP_EXT 0x8512
+#define GL_TEXTURE_CUBE_MAP_EXT 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP_EXT 0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_EXT 0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_EXT 0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_EXT 0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT 0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_EXT 0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT 0x851A
+#define GL_PROXY_TEXTURE_CUBE_MAP_EXT 0x851B
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_EXT 0x851C
+
+#define GLEW_EXT_texture_cube_map GLEW_GET_VAR(__GLEW_EXT_texture_cube_map)
+
+#endif /* GL_EXT_texture_cube_map */
+
+/* ----------------------- GL_EXT_texture_edge_clamp ----------------------- */
+
+#ifndef GL_EXT_texture_edge_clamp
+#define GL_EXT_texture_edge_clamp 1
+
+#define GL_CLAMP_TO_EDGE_EXT 0x812F
+
+#define GLEW_EXT_texture_edge_clamp GLEW_GET_VAR(__GLEW_EXT_texture_edge_clamp)
+
+#endif /* GL_EXT_texture_edge_clamp */
+
+/* --------------------------- GL_EXT_texture_env -------------------------- */
+
+#ifndef GL_EXT_texture_env
+#define GL_EXT_texture_env 1
+
+#define GL_TEXTURE_ENV0_EXT 0
+#define GL_ENV_BLEND_EXT 0
+#define GL_TEXTURE_ENV_SHIFT_EXT 0
+#define GL_ENV_REPLACE_EXT 0
+#define GL_ENV_ADD_EXT 0
+#define GL_ENV_SUBTRACT_EXT 0
+#define GL_TEXTURE_ENV_MODE_ALPHA_EXT 0
+#define GL_ENV_REVERSE_SUBTRACT_EXT 0
+#define GL_ENV_REVERSE_BLEND_EXT 0
+#define GL_ENV_COPY_EXT 0
+#define GL_ENV_MODULATE_EXT 0
+
+#define GLEW_EXT_texture_env GLEW_GET_VAR(__GLEW_EXT_texture_env)
+
+#endif /* GL_EXT_texture_env */
+
+/* ------------------------- GL_EXT_texture_env_add ------------------------ */
+
+#ifndef GL_EXT_texture_env_add
+#define GL_EXT_texture_env_add 1
+
+#define GLEW_EXT_texture_env_add GLEW_GET_VAR(__GLEW_EXT_texture_env_add)
+
+#endif /* GL_EXT_texture_env_add */
+
+/* ----------------------- GL_EXT_texture_env_combine ---------------------- */
+
+#ifndef GL_EXT_texture_env_combine
+#define GL_EXT_texture_env_combine 1
+
+#define GL_COMBINE_EXT 0x8570
+#define GL_COMBINE_RGB_EXT 0x8571
+#define GL_COMBINE_ALPHA_EXT 0x8572
+#define GL_RGB_SCALE_EXT 0x8573
+#define GL_ADD_SIGNED_EXT 0x8574
+#define GL_INTERPOLATE_EXT 0x8575
+#define GL_CONSTANT_EXT 0x8576
+#define GL_PRIMARY_COLOR_EXT 0x8577
+#define GL_PREVIOUS_EXT 0x8578
+#define GL_SOURCE0_RGB_EXT 0x8580
+#define GL_SOURCE1_RGB_EXT 0x8581
+#define GL_SOURCE2_RGB_EXT 0x8582
+#define GL_SOURCE0_ALPHA_EXT 0x8588
+#define GL_SOURCE1_ALPHA_EXT 0x8589
+#define GL_SOURCE2_ALPHA_EXT 0x858A
+#define GL_OPERAND0_RGB_EXT 0x8590
+#define GL_OPERAND1_RGB_EXT 0x8591
+#define GL_OPERAND2_RGB_EXT 0x8592
+#define GL_OPERAND0_ALPHA_EXT 0x8598
+#define GL_OPERAND1_ALPHA_EXT 0x8599
+#define GL_OPERAND2_ALPHA_EXT 0x859A
+
+#define GLEW_EXT_texture_env_combine GLEW_GET_VAR(__GLEW_EXT_texture_env_combine)
+
+#endif /* GL_EXT_texture_env_combine */
+
+/* ------------------------ GL_EXT_texture_env_dot3 ------------------------ */
+
+#ifndef GL_EXT_texture_env_dot3
+#define GL_EXT_texture_env_dot3 1
+
+#define GL_DOT3_RGB_EXT 0x8740
+#define GL_DOT3_RGBA_EXT 0x8741
+
+#define GLEW_EXT_texture_env_dot3 GLEW_GET_VAR(__GLEW_EXT_texture_env_dot3)
+
+#endif /* GL_EXT_texture_env_dot3 */
+
+/* ------------------- GL_EXT_texture_filter_anisotropic ------------------- */
+
+#ifndef GL_EXT_texture_filter_anisotropic
+#define GL_EXT_texture_filter_anisotropic 1
+
+#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE
+#define GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF
+
+#define GLEW_EXT_texture_filter_anisotropic GLEW_GET_VAR(__GLEW_EXT_texture_filter_anisotropic)
+
+#endif /* GL_EXT_texture_filter_anisotropic */
+
+/* ------------------------- GL_EXT_texture_integer ------------------------ */
+
+#ifndef GL_EXT_texture_integer
+#define GL_EXT_texture_integer 1
+
+#define GL_RGBA32UI_EXT 0x8D70
+#define GL_RGB32UI_EXT 0x8D71
+#define GL_ALPHA32UI_EXT 0x8D72
+#define GL_INTENSITY32UI_EXT 0x8D73
+#define GL_LUMINANCE32UI_EXT 0x8D74
+#define GL_LUMINANCE_ALPHA32UI_EXT 0x8D75
+#define GL_RGBA16UI_EXT 0x8D76
+#define GL_RGB16UI_EXT 0x8D77
+#define GL_ALPHA16UI_EXT 0x8D78
+#define GL_INTENSITY16UI_EXT 0x8D79
+#define GL_LUMINANCE16UI_EXT 0x8D7A
+#define GL_LUMINANCE_ALPHA16UI_EXT 0x8D7B
+#define GL_RGBA8UI_EXT 0x8D7C
+#define GL_RGB8UI_EXT 0x8D7D
+#define GL_ALPHA8UI_EXT 0x8D7E
+#define GL_INTENSITY8UI_EXT 0x8D7F
+#define GL_LUMINANCE8UI_EXT 0x8D80
+#define GL_LUMINANCE_ALPHA8UI_EXT 0x8D81
+#define GL_RGBA32I_EXT 0x8D82
+#define GL_RGB32I_EXT 0x8D83
+#define GL_ALPHA32I_EXT 0x8D84
+#define GL_INTENSITY32I_EXT 0x8D85
+#define GL_LUMINANCE32I_EXT 0x8D86
+#define GL_LUMINANCE_ALPHA32I_EXT 0x8D87
+#define GL_RGBA16I_EXT 0x8D88
+#define GL_RGB16I_EXT 0x8D89
+#define GL_ALPHA16I_EXT 0x8D8A
+#define GL_INTENSITY16I_EXT 0x8D8B
+#define GL_LUMINANCE16I_EXT 0x8D8C
+#define GL_LUMINANCE_ALPHA16I_EXT 0x8D8D
+#define GL_RGBA8I_EXT 0x8D8E
+#define GL_RGB8I_EXT 0x8D8F
+#define GL_ALPHA8I_EXT 0x8D90
+#define GL_INTENSITY8I_EXT 0x8D91
+#define GL_LUMINANCE8I_EXT 0x8D92
+#define GL_LUMINANCE_ALPHA8I_EXT 0x8D93
+#define GL_RED_INTEGER_EXT 0x8D94
+#define GL_GREEN_INTEGER_EXT 0x8D95
+#define GL_BLUE_INTEGER_EXT 0x8D96
+#define GL_ALPHA_INTEGER_EXT 0x8D97
+#define GL_RGB_INTEGER_EXT 0x8D98
+#define GL_RGBA_INTEGER_EXT 0x8D99
+#define GL_BGR_INTEGER_EXT 0x8D9A
+#define GL_BGRA_INTEGER_EXT 0x8D9B
+#define GL_LUMINANCE_INTEGER_EXT 0x8D9C
+#define GL_LUMINANCE_ALPHA_INTEGER_EXT 0x8D9D
+#define GL_RGBA_INTEGER_MODE_EXT 0x8D9E
+
+typedef void (GLAPIENTRY * PFNGLCLEARCOLORIIEXTPROC) (GLint red, GLint green, GLint blue, GLint alpha);
+typedef void (GLAPIENTRY * PFNGLCLEARCOLORIUIEXTPROC) (GLuint red, GLuint green, GLuint blue, GLuint alpha);
+typedef void (GLAPIENTRY * PFNGLGETTEXPARAMETERIIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (GLAPIENTRY * PFNGLGETTEXPARAMETERIUIVEXTPROC) (GLenum target, GLenum pname, GLuint *params);
+typedef void (GLAPIENTRY * PFNGLTEXPARAMETERIIVEXTPROC) (GLenum target, GLenum pname, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLTEXPARAMETERIUIVEXTPROC) (GLenum target, GLenum pname, const GLuint *params);
+
+#define glClearColorIiEXT GLEW_GET_FUN(__glewClearColorIiEXT)
+#define glClearColorIuiEXT GLEW_GET_FUN(__glewClearColorIuiEXT)
+#define glGetTexParameterIivEXT GLEW_GET_FUN(__glewGetTexParameterIivEXT)
+#define glGetTexParameterIuivEXT GLEW_GET_FUN(__glewGetTexParameterIuivEXT)
+#define glTexParameterIivEXT GLEW_GET_FUN(__glewTexParameterIivEXT)
+#define glTexParameterIuivEXT GLEW_GET_FUN(__glewTexParameterIuivEXT)
+
+#define GLEW_EXT_texture_integer GLEW_GET_VAR(__GLEW_EXT_texture_integer)
+
+#endif /* GL_EXT_texture_integer */
+
+/* ------------------------ GL_EXT_texture_lod_bias ------------------------ */
+
+#ifndef GL_EXT_texture_lod_bias
+#define GL_EXT_texture_lod_bias 1
+
+#define GL_MAX_TEXTURE_LOD_BIAS_EXT 0x84FD
+#define GL_TEXTURE_FILTER_CONTROL_EXT 0x8500
+#define GL_TEXTURE_LOD_BIAS_EXT 0x8501
+
+#define GLEW_EXT_texture_lod_bias GLEW_GET_VAR(__GLEW_EXT_texture_lod_bias)
+
+#endif /* GL_EXT_texture_lod_bias */
+
+/* ---------------------- GL_EXT_texture_mirror_clamp ---------------------- */
+
+#ifndef GL_EXT_texture_mirror_clamp
+#define GL_EXT_texture_mirror_clamp 1
+
+#define GL_MIRROR_CLAMP_EXT 0x8742
+#define GL_MIRROR_CLAMP_TO_EDGE_EXT 0x8743
+#define GL_MIRROR_CLAMP_TO_BORDER_EXT 0x8912
+
+#define GLEW_EXT_texture_mirror_clamp GLEW_GET_VAR(__GLEW_EXT_texture_mirror_clamp)
+
+#endif /* GL_EXT_texture_mirror_clamp */
+
+/* ------------------------- GL_EXT_texture_object ------------------------- */
+
+#ifndef GL_EXT_texture_object
+#define GL_EXT_texture_object 1
+
+#define GL_TEXTURE_PRIORITY_EXT 0x8066
+#define GL_TEXTURE_RESIDENT_EXT 0x8067
+#define GL_TEXTURE_1D_BINDING_EXT 0x8068
+#define GL_TEXTURE_2D_BINDING_EXT 0x8069
+#define GL_TEXTURE_3D_BINDING_EXT 0x806A
+
+typedef GLboolean (GLAPIENTRY * PFNGLARETEXTURESRESIDENTEXTPROC) (GLsizei n, const GLuint* textures, GLboolean* residences);
+typedef void (GLAPIENTRY * PFNGLBINDTEXTUREEXTPROC) (GLenum target, GLuint texture);
+typedef void (GLAPIENTRY * PFNGLDELETETEXTURESEXTPROC) (GLsizei n, const GLuint* textures);
+typedef void (GLAPIENTRY * PFNGLGENTEXTURESEXTPROC) (GLsizei n, GLuint* textures);
+typedef GLboolean (GLAPIENTRY * PFNGLISTEXTUREEXTPROC) (GLuint texture);
+typedef void (GLAPIENTRY * PFNGLPRIORITIZETEXTURESEXTPROC) (GLsizei n, const GLuint* textures, const GLclampf* priorities);
+
+#define glAreTexturesResidentEXT GLEW_GET_FUN(__glewAreTexturesResidentEXT)
+#define glBindTextureEXT GLEW_GET_FUN(__glewBindTextureEXT)
+#define glDeleteTexturesEXT GLEW_GET_FUN(__glewDeleteTexturesEXT)
+#define glGenTexturesEXT GLEW_GET_FUN(__glewGenTexturesEXT)
+#define glIsTextureEXT GLEW_GET_FUN(__glewIsTextureEXT)
+#define glPrioritizeTexturesEXT GLEW_GET_FUN(__glewPrioritizeTexturesEXT)
+
+#define GLEW_EXT_texture_object GLEW_GET_VAR(__GLEW_EXT_texture_object)
+
+#endif /* GL_EXT_texture_object */
+
+/* --------------------- GL_EXT_texture_perturb_normal --------------------- */
+
+#ifndef GL_EXT_texture_perturb_normal
+#define GL_EXT_texture_perturb_normal 1
+
+#define GL_PERTURB_EXT 0x85AE
+#define GL_TEXTURE_NORMAL_EXT 0x85AF
+
+typedef void (GLAPIENTRY * PFNGLTEXTURENORMALEXTPROC) (GLenum mode);
+
+#define glTextureNormalEXT GLEW_GET_FUN(__glewTextureNormalEXT)
+
+#define GLEW_EXT_texture_perturb_normal GLEW_GET_VAR(__GLEW_EXT_texture_perturb_normal)
+
+#endif /* GL_EXT_texture_perturb_normal */
+
+/* ------------------------ GL_EXT_texture_rectangle ----------------------- */
+
+#ifndef GL_EXT_texture_rectangle
+#define GL_EXT_texture_rectangle 1
+
+#define GL_TEXTURE_RECTANGLE_EXT 0x84F5
+#define GL_TEXTURE_BINDING_RECTANGLE_EXT 0x84F6
+#define GL_PROXY_TEXTURE_RECTANGLE_EXT 0x84F7
+#define GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT 0x84F8
+
+#define GLEW_EXT_texture_rectangle GLEW_GET_VAR(__GLEW_EXT_texture_rectangle)
+
+#endif /* GL_EXT_texture_rectangle */
+
+/* -------------------------- GL_EXT_texture_sRGB -------------------------- */
+
+#ifndef GL_EXT_texture_sRGB
+#define GL_EXT_texture_sRGB 1
+
+#define GL_SRGB_EXT 0x8C40
+#define GL_SRGB8_EXT 0x8C41
+#define GL_SRGB_ALPHA_EXT 0x8C42
+#define GL_SRGB8_ALPHA8_EXT 0x8C43
+#define GL_SLUMINANCE_ALPHA_EXT 0x8C44
+#define GL_SLUMINANCE8_ALPHA8_EXT 0x8C45
+#define GL_SLUMINANCE_EXT 0x8C46
+#define GL_SLUMINANCE8_EXT 0x8C47
+#define GL_COMPRESSED_SRGB_EXT 0x8C48
+#define GL_COMPRESSED_SRGB_ALPHA_EXT 0x8C49
+#define GL_COMPRESSED_SLUMINANCE_EXT 0x8C4A
+#define GL_COMPRESSED_SLUMINANCE_ALPHA_EXT 0x8C4B
+#define GL_COMPRESSED_SRGB_S3TC_DXT1_EXT 0x8C4C
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT 0x8C4D
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT 0x8C4E
+#define GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT 0x8C4F
+
+#define GLEW_EXT_texture_sRGB GLEW_GET_VAR(__GLEW_EXT_texture_sRGB)
+
+#endif /* GL_EXT_texture_sRGB */
+
+/* --------------------- GL_EXT_texture_shared_exponent -------------------- */
+
+#ifndef GL_EXT_texture_shared_exponent
+#define GL_EXT_texture_shared_exponent 1
+
+#define GL_RGB9_E5_EXT 0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV_EXT 0x8C3E
+#define GL_TEXTURE_SHARED_SIZE_EXT 0x8C3F
+
+#define GLEW_EXT_texture_shared_exponent GLEW_GET_VAR(__GLEW_EXT_texture_shared_exponent)
+
+#endif /* GL_EXT_texture_shared_exponent */
+
+/* ------------------------- GL_EXT_texture_swizzle ------------------------ */
+
+#ifndef GL_EXT_texture_swizzle
+#define GL_EXT_texture_swizzle 1
+
+#define GL_TEXTURE_SWIZZLE_R_EXT 0x8E42
+#define GL_TEXTURE_SWIZZLE_G_EXT 0x8E43
+#define GL_TEXTURE_SWIZZLE_B_EXT 0x8E44
+#define GL_TEXTURE_SWIZZLE_A_EXT 0x8E45
+#define GL_TEXTURE_SWIZZLE_RGBA_EXT 0x8E46
+
+#define GLEW_EXT_texture_swizzle GLEW_GET_VAR(__GLEW_EXT_texture_swizzle)
+
+#endif /* GL_EXT_texture_swizzle */
+
+/* --------------------------- GL_EXT_timer_query -------------------------- */
+
+#ifndef GL_EXT_timer_query
+#define GL_EXT_timer_query 1
+
+#define GL_TIME_ELAPSED_EXT 0x88BF
+
+typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTI64VEXTPROC) (GLuint id, GLenum pname, GLint64EXT *params);
+typedef void (GLAPIENTRY * PFNGLGETQUERYOBJECTUI64VEXTPROC) (GLuint id, GLenum pname, GLuint64EXT *params);
+
+#define glGetQueryObjecti64vEXT GLEW_GET_FUN(__glewGetQueryObjecti64vEXT)
+#define glGetQueryObjectui64vEXT GLEW_GET_FUN(__glewGetQueryObjectui64vEXT)
+
+#define GLEW_EXT_timer_query GLEW_GET_VAR(__GLEW_EXT_timer_query)
+
+#endif /* GL_EXT_timer_query */
+
+/* ----------------------- GL_EXT_transform_feedback ----------------------- */
+
+#ifndef GL_EXT_transform_feedback
+#define GL_EXT_transform_feedback 1
+
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH_EXT 0x8C76
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE_EXT 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT 0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS_EXT 0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START_EXT 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE_EXT 0x8C85
+#define GL_PRIMITIVES_GENERATED_EXT 0x8C87
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN_EXT 0x8C88
+#define GL_RASTERIZER_DISCARD_EXT 0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS_EXT 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS_EXT 0x8C8C
+#define GL_SEPARATE_ATTRIBS_EXT 0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER_EXT 0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING_EXT 0x8C8F
+
+typedef void (GLAPIENTRY * PFNGLBEGINTRANSFORMFEEDBACKEXTPROC) (GLenum primitiveMode);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERBASEEXTPROC) (GLenum target, GLuint index, GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFEROFFSETEXTPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERRANGEEXTPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+typedef void (GLAPIENTRY * PFNGLENDTRANSFORMFEEDBACKEXTPROC) (void);
+typedef void (GLAPIENTRY * PFNGLGETTRANSFORMFEEDBACKVARYINGEXTPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLsizei *size, GLenum *type, char *name);
+typedef void (GLAPIENTRY * PFNGLTRANSFORMFEEDBACKVARYINGSEXTPROC) (GLuint program, GLsizei count, const char ** varyings, GLenum bufferMode);
+
+#define glBeginTransformFeedbackEXT GLEW_GET_FUN(__glewBeginTransformFeedbackEXT)
+#define glBindBufferBaseEXT GLEW_GET_FUN(__glewBindBufferBaseEXT)
+#define glBindBufferOffsetEXT GLEW_GET_FUN(__glewBindBufferOffsetEXT)
+#define glBindBufferRangeEXT GLEW_GET_FUN(__glewBindBufferRangeEXT)
+#define glEndTransformFeedbackEXT GLEW_GET_FUN(__glewEndTransformFeedbackEXT)
+#define glGetTransformFeedbackVaryingEXT GLEW_GET_FUN(__glewGetTransformFeedbackVaryingEXT)
+#define glTransformFeedbackVaryingsEXT GLEW_GET_FUN(__glewTransformFeedbackVaryingsEXT)
+
+#define GLEW_EXT_transform_feedback GLEW_GET_VAR(__GLEW_EXT_transform_feedback)
+
+#endif /* GL_EXT_transform_feedback */
+
+/* -------------------------- GL_EXT_vertex_array -------------------------- */
+
+#ifndef GL_EXT_vertex_array
+#define GL_EXT_vertex_array 1
+
+#define GL_DOUBLE_EXT 0x140A
+#define GL_VERTEX_ARRAY_EXT 0x8074
+#define GL_NORMAL_ARRAY_EXT 0x8075
+#define GL_COLOR_ARRAY_EXT 0x8076
+#define GL_INDEX_ARRAY_EXT 0x8077
+#define GL_TEXTURE_COORD_ARRAY_EXT 0x8078
+#define GL_EDGE_FLAG_ARRAY_EXT 0x8079
+#define GL_VERTEX_ARRAY_SIZE_EXT 0x807A
+#define GL_VERTEX_ARRAY_TYPE_EXT 0x807B
+#define GL_VERTEX_ARRAY_STRIDE_EXT 0x807C
+#define GL_VERTEX_ARRAY_COUNT_EXT 0x807D
+#define GL_NORMAL_ARRAY_TYPE_EXT 0x807E
+#define GL_NORMAL_ARRAY_STRIDE_EXT 0x807F
+#define GL_NORMAL_ARRAY_COUNT_EXT 0x8080
+#define GL_COLOR_ARRAY_SIZE_EXT 0x8081
+#define GL_COLOR_ARRAY_TYPE_EXT 0x8082
+#define GL_COLOR_ARRAY_STRIDE_EXT 0x8083
+#define GL_COLOR_ARRAY_COUNT_EXT 0x8084
+#define GL_INDEX_ARRAY_TYPE_EXT 0x8085
+#define GL_INDEX_ARRAY_STRIDE_EXT 0x8086
+#define GL_INDEX_ARRAY_COUNT_EXT 0x8087
+#define GL_TEXTURE_COORD_ARRAY_SIZE_EXT 0x8088
+#define GL_TEXTURE_COORD_ARRAY_TYPE_EXT 0x8089
+#define GL_TEXTURE_COORD_ARRAY_STRIDE_EXT 0x808A
+#define GL_TEXTURE_COORD_ARRAY_COUNT_EXT 0x808B
+#define GL_EDGE_FLAG_ARRAY_STRIDE_EXT 0x808C
+#define GL_EDGE_FLAG_ARRAY_COUNT_EXT 0x808D
+#define GL_VERTEX_ARRAY_POINTER_EXT 0x808E
+#define GL_NORMAL_ARRAY_POINTER_EXT 0x808F
+#define GL_COLOR_ARRAY_POINTER_EXT 0x8090
+#define GL_INDEX_ARRAY_POINTER_EXT 0x8091
+#define GL_TEXTURE_COORD_ARRAY_POINTER_EXT 0x8092
+#define GL_EDGE_FLAG_ARRAY_POINTER_EXT 0x8093
+
+typedef void (GLAPIENTRY * PFNGLARRAYELEMENTEXTPROC) (GLint i);
+typedef void (GLAPIENTRY * PFNGLCOLORPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLDRAWARRAYSEXTPROC) (GLenum mode, GLint first, GLsizei count);
+typedef void (GLAPIENTRY * PFNGLEDGEFLAGPOINTEREXTPROC) (GLsizei stride, GLsizei count, const GLboolean* pointer);
+typedef void (GLAPIENTRY * PFNGLGETPOINTERVEXTPROC) (GLenum pname, void** params);
+typedef void (GLAPIENTRY * PFNGLINDEXPOINTEREXTPROC) (GLenum type, GLsizei stride, GLsizei count, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLNORMALPOINTEREXTPROC) (GLenum type, GLsizei stride, GLsizei count, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLTEXCOORDPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLVERTEXPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, GLsizei count, const void* pointer);
+
+#define glArrayElementEXT GLEW_GET_FUN(__glewArrayElementEXT)
+#define glColorPointerEXT GLEW_GET_FUN(__glewColorPointerEXT)
+#define glDrawArraysEXT GLEW_GET_FUN(__glewDrawArraysEXT)
+#define glEdgeFlagPointerEXT GLEW_GET_FUN(__glewEdgeFlagPointerEXT)
+#define glGetPointervEXT GLEW_GET_FUN(__glewGetPointervEXT)
+#define glIndexPointerEXT GLEW_GET_FUN(__glewIndexPointerEXT)
+#define glNormalPointerEXT GLEW_GET_FUN(__glewNormalPointerEXT)
+#define glTexCoordPointerEXT GLEW_GET_FUN(__glewTexCoordPointerEXT)
+#define glVertexPointerEXT GLEW_GET_FUN(__glewVertexPointerEXT)
+
+#define GLEW_EXT_vertex_array GLEW_GET_VAR(__GLEW_EXT_vertex_array)
+
+#endif /* GL_EXT_vertex_array */
+
+/* ------------------------ GL_EXT_vertex_array_bgra ----------------------- */
+
+#ifndef GL_EXT_vertex_array_bgra
+#define GL_EXT_vertex_array_bgra 1
+
+#define GL_BGRA 0x80E1
+
+#define GLEW_EXT_vertex_array_bgra GLEW_GET_VAR(__GLEW_EXT_vertex_array_bgra)
+
+#endif /* GL_EXT_vertex_array_bgra */
+
+/* -------------------------- GL_EXT_vertex_shader ------------------------- */
+
+#ifndef GL_EXT_vertex_shader
+#define GL_EXT_vertex_shader 1
+
+#define GL_VERTEX_SHADER_EXT 0x8780
+#define GL_VERTEX_SHADER_BINDING_EXT 0x8781
+#define GL_OP_INDEX_EXT 0x8782
+#define GL_OP_NEGATE_EXT 0x8783
+#define GL_OP_DOT3_EXT 0x8784
+#define GL_OP_DOT4_EXT 0x8785
+#define GL_OP_MUL_EXT 0x8786
+#define GL_OP_ADD_EXT 0x8787
+#define GL_OP_MADD_EXT 0x8788
+#define GL_OP_FRAC_EXT 0x8789
+#define GL_OP_MAX_EXT 0x878A
+#define GL_OP_MIN_EXT 0x878B
+#define GL_OP_SET_GE_EXT 0x878C
+#define GL_OP_SET_LT_EXT 0x878D
+#define GL_OP_CLAMP_EXT 0x878E
+#define GL_OP_FLOOR_EXT 0x878F
+#define GL_OP_ROUND_EXT 0x8790
+#define GL_OP_EXP_BASE_2_EXT 0x8791
+#define GL_OP_LOG_BASE_2_EXT 0x8792
+#define GL_OP_POWER_EXT 0x8793
+#define GL_OP_RECIP_EXT 0x8794
+#define GL_OP_RECIP_SQRT_EXT 0x8795
+#define GL_OP_SUB_EXT 0x8796
+#define GL_OP_CROSS_PRODUCT_EXT 0x8797
+#define GL_OP_MULTIPLY_MATRIX_EXT 0x8798
+#define GL_OP_MOV_EXT 0x8799
+#define GL_OUTPUT_VERTEX_EXT 0x879A
+#define GL_OUTPUT_COLOR0_EXT 0x879B
+#define GL_OUTPUT_COLOR1_EXT 0x879C
+#define GL_OUTPUT_TEXTURE_COORD0_EXT 0x879D
+#define GL_OUTPUT_TEXTURE_COORD1_EXT 0x879E
+#define GL_OUTPUT_TEXTURE_COORD2_EXT 0x879F
+#define GL_OUTPUT_TEXTURE_COORD3_EXT 0x87A0
+#define GL_OUTPUT_TEXTURE_COORD4_EXT 0x87A1
+#define GL_OUTPUT_TEXTURE_COORD5_EXT 0x87A2
+#define GL_OUTPUT_TEXTURE_COORD6_EXT 0x87A3
+#define GL_OUTPUT_TEXTURE_COORD7_EXT 0x87A4
+#define GL_OUTPUT_TEXTURE_COORD8_EXT 0x87A5
+#define GL_OUTPUT_TEXTURE_COORD9_EXT 0x87A6
+#define GL_OUTPUT_TEXTURE_COORD10_EXT 0x87A7
+#define GL_OUTPUT_TEXTURE_COORD11_EXT 0x87A8
+#define GL_OUTPUT_TEXTURE_COORD12_EXT 0x87A9
+#define GL_OUTPUT_TEXTURE_COORD13_EXT 0x87AA
+#define GL_OUTPUT_TEXTURE_COORD14_EXT 0x87AB
+#define GL_OUTPUT_TEXTURE_COORD15_EXT 0x87AC
+#define GL_OUTPUT_TEXTURE_COORD16_EXT 0x87AD
+#define GL_OUTPUT_TEXTURE_COORD17_EXT 0x87AE
+#define GL_OUTPUT_TEXTURE_COORD18_EXT 0x87AF
+#define GL_OUTPUT_TEXTURE_COORD19_EXT 0x87B0
+#define GL_OUTPUT_TEXTURE_COORD20_EXT 0x87B1
+#define GL_OUTPUT_TEXTURE_COORD21_EXT 0x87B2
+#define GL_OUTPUT_TEXTURE_COORD22_EXT 0x87B3
+#define GL_OUTPUT_TEXTURE_COORD23_EXT 0x87B4
+#define GL_OUTPUT_TEXTURE_COORD24_EXT 0x87B5
+#define GL_OUTPUT_TEXTURE_COORD25_EXT 0x87B6
+#define GL_OUTPUT_TEXTURE_COORD26_EXT 0x87B7
+#define GL_OUTPUT_TEXTURE_COORD27_EXT 0x87B8
+#define GL_OUTPUT_TEXTURE_COORD28_EXT 0x87B9
+#define GL_OUTPUT_TEXTURE_COORD29_EXT 0x87BA
+#define GL_OUTPUT_TEXTURE_COORD30_EXT 0x87BB
+#define GL_OUTPUT_TEXTURE_COORD31_EXT 0x87BC
+#define GL_OUTPUT_FOG_EXT 0x87BD
+#define GL_SCALAR_EXT 0x87BE
+#define GL_VECTOR_EXT 0x87BF
+#define GL_MATRIX_EXT 0x87C0
+#define GL_VARIANT_EXT 0x87C1
+#define GL_INVARIANT_EXT 0x87C2
+#define GL_LOCAL_CONSTANT_EXT 0x87C3
+#define GL_LOCAL_EXT 0x87C4
+#define GL_MAX_VERTEX_SHADER_INSTRUCTIONS_EXT 0x87C5
+#define GL_MAX_VERTEX_SHADER_VARIANTS_EXT 0x87C6
+#define GL_MAX_VERTEX_SHADER_INVARIANTS_EXT 0x87C7
+#define GL_MAX_VERTEX_SHADER_LOCAL_CONSTANTS_EXT 0x87C8
+#define GL_MAX_VERTEX_SHADER_LOCALS_EXT 0x87C9
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_INSTRUCTIONS_EXT 0x87CA
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_VARIANTS_EXT 0x87CB
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_INVARIANTS_EXT 0x87CC
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_LOCAL_CONSTANTS_EXT 0x87CD
+#define GL_MAX_OPTIMIZED_VERTEX_SHADER_LOCALS_EXT 0x87CE
+#define GL_VERTEX_SHADER_INSTRUCTIONS_EXT 0x87CF
+#define GL_VERTEX_SHADER_VARIANTS_EXT 0x87D0
+#define GL_VERTEX_SHADER_INVARIANTS_EXT 0x87D1
+#define GL_VERTEX_SHADER_LOCAL_CONSTANTS_EXT 0x87D2
+#define GL_VERTEX_SHADER_LOCALS_EXT 0x87D3
+#define GL_VERTEX_SHADER_OPTIMIZED_EXT 0x87D4
+#define GL_X_EXT 0x87D5
+#define GL_Y_EXT 0x87D6
+#define GL_Z_EXT 0x87D7
+#define GL_W_EXT 0x87D8
+#define GL_NEGATIVE_X_EXT 0x87D9
+#define GL_NEGATIVE_Y_EXT 0x87DA
+#define GL_NEGATIVE_Z_EXT 0x87DB
+#define GL_NEGATIVE_W_EXT 0x87DC
+#define GL_ZERO_EXT 0x87DD
+#define GL_ONE_EXT 0x87DE
+#define GL_NEGATIVE_ONE_EXT 0x87DF
+#define GL_NORMALIZED_RANGE_EXT 0x87E0
+#define GL_FULL_RANGE_EXT 0x87E1
+#define GL_CURRENT_VERTEX_EXT 0x87E2
+#define GL_MVP_MATRIX_EXT 0x87E3
+#define GL_VARIANT_VALUE_EXT 0x87E4
+#define GL_VARIANT_DATATYPE_EXT 0x87E5
+#define GL_VARIANT_ARRAY_STRIDE_EXT 0x87E6
+#define GL_VARIANT_ARRAY_TYPE_EXT 0x87E7
+#define GL_VARIANT_ARRAY_EXT 0x87E8
+#define GL_VARIANT_ARRAY_POINTER_EXT 0x87E9
+#define GL_INVARIANT_VALUE_EXT 0x87EA
+#define GL_INVARIANT_DATATYPE_EXT 0x87EB
+#define GL_LOCAL_CONSTANT_VALUE_EXT 0x87EC
+#define GL_LOCAL_CONSTANT_DATATYPE_EXT 0x87ED
+
+typedef void (GLAPIENTRY * PFNGLBEGINVERTEXSHADEREXTPROC) (void);
+typedef GLuint (GLAPIENTRY * PFNGLBINDLIGHTPARAMETEREXTPROC) (GLenum light, GLenum value);
+typedef GLuint (GLAPIENTRY * PFNGLBINDMATERIALPARAMETEREXTPROC) (GLenum face, GLenum value);
+typedef GLuint (GLAPIENTRY * PFNGLBINDPARAMETEREXTPROC) (GLenum value);
+typedef GLuint (GLAPIENTRY * PFNGLBINDTEXGENPARAMETEREXTPROC) (GLenum unit, GLenum coord, GLenum value);
+typedef GLuint (GLAPIENTRY * PFNGLBINDTEXTUREUNITPARAMETEREXTPROC) (GLenum unit, GLenum value);
+typedef void (GLAPIENTRY * PFNGLBINDVERTEXSHADEREXTPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLDELETEVERTEXSHADEREXTPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLDISABLEVARIANTCLIENTSTATEEXTPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLENABLEVARIANTCLIENTSTATEEXTPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLENDVERTEXSHADEREXTPROC) (void);
+typedef void (GLAPIENTRY * PFNGLEXTRACTCOMPONENTEXTPROC) (GLuint res, GLuint src, GLuint num);
+typedef GLuint (GLAPIENTRY * PFNGLGENSYMBOLSEXTPROC) (GLenum dataType, GLenum storageType, GLenum range, GLuint components);
+typedef GLuint (GLAPIENTRY * PFNGLGENVERTEXSHADERSEXTPROC) (GLuint range);
+typedef void (GLAPIENTRY * PFNGLGETINVARIANTBOOLEANVEXTPROC) (GLuint id, GLenum value, GLboolean *data);
+typedef void (GLAPIENTRY * PFNGLGETINVARIANTFLOATVEXTPROC) (GLuint id, GLenum value, GLfloat *data);
+typedef void (GLAPIENTRY * PFNGLGETINVARIANTINTEGERVEXTPROC) (GLuint id, GLenum value, GLint *data);
+typedef void (GLAPIENTRY * PFNGLGETLOCALCONSTANTBOOLEANVEXTPROC) (GLuint id, GLenum value, GLboolean *data);
+typedef void (GLAPIENTRY * PFNGLGETLOCALCONSTANTFLOATVEXTPROC) (GLuint id, GLenum value, GLfloat *data);
+typedef void (GLAPIENTRY * PFNGLGETLOCALCONSTANTINTEGERVEXTPROC) (GLuint id, GLenum value, GLint *data);
+typedef void (GLAPIENTRY * PFNGLGETVARIANTBOOLEANVEXTPROC) (GLuint id, GLenum value, GLboolean *data);
+typedef void (GLAPIENTRY * PFNGLGETVARIANTFLOATVEXTPROC) (GLuint id, GLenum value, GLfloat *data);
+typedef void (GLAPIENTRY * PFNGLGETVARIANTINTEGERVEXTPROC) (GLuint id, GLenum value, GLint *data);
+typedef void (GLAPIENTRY * PFNGLGETVARIANTPOINTERVEXTPROC) (GLuint id, GLenum value, GLvoid **data);
+typedef void (GLAPIENTRY * PFNGLINSERTCOMPONENTEXTPROC) (GLuint res, GLuint src, GLuint num);
+typedef GLboolean (GLAPIENTRY * PFNGLISVARIANTENABLEDEXTPROC) (GLuint id, GLenum cap);
+typedef void (GLAPIENTRY * PFNGLSETINVARIANTEXTPROC) (GLuint id, GLenum type, GLvoid *addr);
+typedef void (GLAPIENTRY * PFNGLSETLOCALCONSTANTEXTPROC) (GLuint id, GLenum type, GLvoid *addr);
+typedef void (GLAPIENTRY * PFNGLSHADEROP1EXTPROC) (GLenum op, GLuint res, GLuint arg1);
+typedef void (GLAPIENTRY * PFNGLSHADEROP2EXTPROC) (GLenum op, GLuint res, GLuint arg1, GLuint arg2);
+typedef void (GLAPIENTRY * PFNGLSHADEROP3EXTPROC) (GLenum op, GLuint res, GLuint arg1, GLuint arg2, GLuint arg3);
+typedef void (GLAPIENTRY * PFNGLSWIZZLEEXTPROC) (GLuint res, GLuint in, GLenum outX, GLenum outY, GLenum outZ, GLenum outW);
+typedef void (GLAPIENTRY * PFNGLVARIANTPOINTEREXTPROC) (GLuint id, GLenum type, GLuint stride, GLvoid *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTBVEXTPROC) (GLuint id, GLbyte *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTDVEXTPROC) (GLuint id, GLdouble *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTFVEXTPROC) (GLuint id, GLfloat *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTIVEXTPROC) (GLuint id, GLint *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTSVEXTPROC) (GLuint id, GLshort *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTUBVEXTPROC) (GLuint id, GLubyte *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTUIVEXTPROC) (GLuint id, GLuint *addr);
+typedef void (GLAPIENTRY * PFNGLVARIANTUSVEXTPROC) (GLuint id, GLushort *addr);
+typedef void (GLAPIENTRY * PFNGLWRITEMASKEXTPROC) (GLuint res, GLuint in, GLenum outX, GLenum outY, GLenum outZ, GLenum outW);
+
+#define glBeginVertexShaderEXT GLEW_GET_FUN(__glewBeginVertexShaderEXT)
+#define glBindLightParameterEXT GLEW_GET_FUN(__glewBindLightParameterEXT)
+#define glBindMaterialParameterEXT GLEW_GET_FUN(__glewBindMaterialParameterEXT)
+#define glBindParameterEXT GLEW_GET_FUN(__glewBindParameterEXT)
+#define glBindTexGenParameterEXT GLEW_GET_FUN(__glewBindTexGenParameterEXT)
+#define glBindTextureUnitParameterEXT GLEW_GET_FUN(__glewBindTextureUnitParameterEXT)
+#define glBindVertexShaderEXT GLEW_GET_FUN(__glewBindVertexShaderEXT)
+#define glDeleteVertexShaderEXT GLEW_GET_FUN(__glewDeleteVertexShaderEXT)
+#define glDisableVariantClientStateEXT GLEW_GET_FUN(__glewDisableVariantClientStateEXT)
+#define glEnableVariantClientStateEXT GLEW_GET_FUN(__glewEnableVariantClientStateEXT)
+#define glEndVertexShaderEXT GLEW_GET_FUN(__glewEndVertexShaderEXT)
+#define glExtractComponentEXT GLEW_GET_FUN(__glewExtractComponentEXT)
+#define glGenSymbolsEXT GLEW_GET_FUN(__glewGenSymbolsEXT)
+#define glGenVertexShadersEXT GLEW_GET_FUN(__glewGenVertexShadersEXT)
+#define glGetInvariantBooleanvEXT GLEW_GET_FUN(__glewGetInvariantBooleanvEXT)
+#define glGetInvariantFloatvEXT GLEW_GET_FUN(__glewGetInvariantFloatvEXT)
+#define glGetInvariantIntegervEXT GLEW_GET_FUN(__glewGetInvariantIntegervEXT)
+#define glGetLocalConstantBooleanvEXT GLEW_GET_FUN(__glewGetLocalConstantBooleanvEXT)
+#define glGetLocalConstantFloatvEXT GLEW_GET_FUN(__glewGetLocalConstantFloatvEXT)
+#define glGetLocalConstantIntegervEXT GLEW_GET_FUN(__glewGetLocalConstantIntegervEXT)
+#define glGetVariantBooleanvEXT GLEW_GET_FUN(__glewGetVariantBooleanvEXT)
+#define glGetVariantFloatvEXT GLEW_GET_FUN(__glewGetVariantFloatvEXT)
+#define glGetVariantIntegervEXT GLEW_GET_FUN(__glewGetVariantIntegervEXT)
+#define glGetVariantPointervEXT GLEW_GET_FUN(__glewGetVariantPointervEXT)
+#define glInsertComponentEXT GLEW_GET_FUN(__glewInsertComponentEXT)
+#define glIsVariantEnabledEXT GLEW_GET_FUN(__glewIsVariantEnabledEXT)
+#define glSetInvariantEXT GLEW_GET_FUN(__glewSetInvariantEXT)
+#define glSetLocalConstantEXT GLEW_GET_FUN(__glewSetLocalConstantEXT)
+#define glShaderOp1EXT GLEW_GET_FUN(__glewShaderOp1EXT)
+#define glShaderOp2EXT GLEW_GET_FUN(__glewShaderOp2EXT)
+#define glShaderOp3EXT GLEW_GET_FUN(__glewShaderOp3EXT)
+#define glSwizzleEXT GLEW_GET_FUN(__glewSwizzleEXT)
+#define glVariantPointerEXT GLEW_GET_FUN(__glewVariantPointerEXT)
+#define glVariantbvEXT GLEW_GET_FUN(__glewVariantbvEXT)
+#define glVariantdvEXT GLEW_GET_FUN(__glewVariantdvEXT)
+#define glVariantfvEXT GLEW_GET_FUN(__glewVariantfvEXT)
+#define glVariantivEXT GLEW_GET_FUN(__glewVariantivEXT)
+#define glVariantsvEXT GLEW_GET_FUN(__glewVariantsvEXT)
+#define glVariantubvEXT GLEW_GET_FUN(__glewVariantubvEXT)
+#define glVariantuivEXT GLEW_GET_FUN(__glewVariantuivEXT)
+#define glVariantusvEXT GLEW_GET_FUN(__glewVariantusvEXT)
+#define glWriteMaskEXT GLEW_GET_FUN(__glewWriteMaskEXT)
+
+#define GLEW_EXT_vertex_shader GLEW_GET_VAR(__GLEW_EXT_vertex_shader)
+
+#endif /* GL_EXT_vertex_shader */
+
+/* ------------------------ GL_EXT_vertex_weighting ------------------------ */
+
+#ifndef GL_EXT_vertex_weighting
+#define GL_EXT_vertex_weighting 1
+
+#define GL_MODELVIEW0_STACK_DEPTH_EXT 0x0BA3
+#define GL_MODELVIEW0_MATRIX_EXT 0x0BA6
+#define GL_MODELVIEW0_EXT 0x1700
+#define GL_MODELVIEW1_STACK_DEPTH_EXT 0x8502
+#define GL_MODELVIEW1_MATRIX_EXT 0x8506
+#define GL_VERTEX_WEIGHTING_EXT 0x8509
+#define GL_MODELVIEW1_EXT 0x850A
+#define GL_CURRENT_VERTEX_WEIGHT_EXT 0x850B
+#define GL_VERTEX_WEIGHT_ARRAY_EXT 0x850C
+#define GL_VERTEX_WEIGHT_ARRAY_SIZE_EXT 0x850D
+#define GL_VERTEX_WEIGHT_ARRAY_TYPE_EXT 0x850E
+#define GL_VERTEX_WEIGHT_ARRAY_STRIDE_EXT 0x850F
+#define GL_VERTEX_WEIGHT_ARRAY_POINTER_EXT 0x8510
+
+typedef void (GLAPIENTRY * PFNGLVERTEXWEIGHTPOINTEREXTPROC) (GLint size, GLenum type, GLsizei stride, void* pointer);
+typedef void (GLAPIENTRY * PFNGLVERTEXWEIGHTFEXTPROC) (GLfloat weight);
+typedef void (GLAPIENTRY * PFNGLVERTEXWEIGHTFVEXTPROC) (GLfloat* weight);
+
+#define glVertexWeightPointerEXT GLEW_GET_FUN(__glewVertexWeightPointerEXT)
+#define glVertexWeightfEXT GLEW_GET_FUN(__glewVertexWeightfEXT)
+#define glVertexWeightfvEXT GLEW_GET_FUN(__glewVertexWeightfvEXT)
+
+#define GLEW_EXT_vertex_weighting GLEW_GET_VAR(__GLEW_EXT_vertex_weighting)
+
+#endif /* GL_EXT_vertex_weighting */
+
+/* ---------------------- GL_GREMEDY_frame_terminator ---------------------- */
+
+#ifndef GL_GREMEDY_frame_terminator
+#define GL_GREMEDY_frame_terminator 1
+
+typedef void (GLAPIENTRY * PFNGLFRAMETERMINATORGREMEDYPROC) (void);
+
+#define glFrameTerminatorGREMEDY GLEW_GET_FUN(__glewFrameTerminatorGREMEDY)
+
+#define GLEW_GREMEDY_frame_terminator GLEW_GET_VAR(__GLEW_GREMEDY_frame_terminator)
+
+#endif /* GL_GREMEDY_frame_terminator */
+
+/* ------------------------ GL_GREMEDY_string_marker ----------------------- */
+
+#ifndef GL_GREMEDY_string_marker
+#define GL_GREMEDY_string_marker 1
+
+typedef void (GLAPIENTRY * PFNGLSTRINGMARKERGREMEDYPROC) (GLsizei len, const void* string);
+
+#define glStringMarkerGREMEDY GLEW_GET_FUN(__glewStringMarkerGREMEDY)
+
+#define GLEW_GREMEDY_string_marker GLEW_GET_VAR(__GLEW_GREMEDY_string_marker)
+
+#endif /* GL_GREMEDY_string_marker */
+
+/* --------------------- GL_HP_convolution_border_modes -------------------- */
+
+#ifndef GL_HP_convolution_border_modes
+#define GL_HP_convolution_border_modes 1
+
+#define GLEW_HP_convolution_border_modes GLEW_GET_VAR(__GLEW_HP_convolution_border_modes)
+
+#endif /* GL_HP_convolution_border_modes */
+
+/* ------------------------- GL_HP_image_transform ------------------------- */
+
+#ifndef GL_HP_image_transform
+#define GL_HP_image_transform 1
+
+typedef void (GLAPIENTRY * PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC) (GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC) (GLenum target, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLIMAGETRANSFORMPARAMETERFHPPROC) (GLenum target, GLenum pname, const GLfloat param);
+typedef void (GLAPIENTRY * PFNGLIMAGETRANSFORMPARAMETERFVHPPROC) (GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLIMAGETRANSFORMPARAMETERIHPPROC) (GLenum target, GLenum pname, const GLint param);
+typedef void (GLAPIENTRY * PFNGLIMAGETRANSFORMPARAMETERIVHPPROC) (GLenum target, GLenum pname, const GLint* params);
+
+#define glGetImageTransformParameterfvHP GLEW_GET_FUN(__glewGetImageTransformParameterfvHP)
+#define glGetImageTransformParameterivHP GLEW_GET_FUN(__glewGetImageTransformParameterivHP)
+#define glImageTransformParameterfHP GLEW_GET_FUN(__glewImageTransformParameterfHP)
+#define glImageTransformParameterfvHP GLEW_GET_FUN(__glewImageTransformParameterfvHP)
+#define glImageTransformParameteriHP GLEW_GET_FUN(__glewImageTransformParameteriHP)
+#define glImageTransformParameterivHP GLEW_GET_FUN(__glewImageTransformParameterivHP)
+
+#define GLEW_HP_image_transform GLEW_GET_VAR(__GLEW_HP_image_transform)
+
+#endif /* GL_HP_image_transform */
+
+/* -------------------------- GL_HP_occlusion_test ------------------------- */
+
+#ifndef GL_HP_occlusion_test
+#define GL_HP_occlusion_test 1
+
+#define GL_OCCLUSION_TEST_HP 0x8165
+#define GL_OCCLUSION_TEST_RESULT_HP 0x8166
+
+#define GLEW_HP_occlusion_test GLEW_GET_VAR(__GLEW_HP_occlusion_test)
+
+#endif /* GL_HP_occlusion_test */
+
+/* ------------------------- GL_HP_texture_lighting ------------------------ */
+
+#ifndef GL_HP_texture_lighting
+#define GL_HP_texture_lighting 1
+
+#define GLEW_HP_texture_lighting GLEW_GET_VAR(__GLEW_HP_texture_lighting)
+
+#endif /* GL_HP_texture_lighting */
+
+/* --------------------------- GL_IBM_cull_vertex -------------------------- */
+
+#ifndef GL_IBM_cull_vertex
+#define GL_IBM_cull_vertex 1
+
+#define GL_CULL_VERTEX_IBM 103050
+
+#define GLEW_IBM_cull_vertex GLEW_GET_VAR(__GLEW_IBM_cull_vertex)
+
+#endif /* GL_IBM_cull_vertex */
+
+/* ---------------------- GL_IBM_multimode_draw_arrays --------------------- */
+
+#ifndef GL_IBM_multimode_draw_arrays
+#define GL_IBM_multimode_draw_arrays 1
+
+typedef void (GLAPIENTRY * PFNGLMULTIMODEDRAWARRAYSIBMPROC) (const GLenum* mode, const GLint *first, const GLsizei *count, GLsizei primcount, GLint modestride);
+typedef void (GLAPIENTRY * PFNGLMULTIMODEDRAWELEMENTSIBMPROC) (const GLenum* mode, const GLsizei *count, GLenum type, const GLvoid * const *indices, GLsizei primcount, GLint modestride);
+
+#define glMultiModeDrawArraysIBM GLEW_GET_FUN(__glewMultiModeDrawArraysIBM)
+#define glMultiModeDrawElementsIBM GLEW_GET_FUN(__glewMultiModeDrawElementsIBM)
+
+#define GLEW_IBM_multimode_draw_arrays GLEW_GET_VAR(__GLEW_IBM_multimode_draw_arrays)
+
+#endif /* GL_IBM_multimode_draw_arrays */
+
+/* ------------------------- GL_IBM_rasterpos_clip ------------------------- */
+
+#ifndef GL_IBM_rasterpos_clip
+#define GL_IBM_rasterpos_clip 1
+
+#define GL_RASTER_POSITION_UNCLIPPED_IBM 103010
+
+#define GLEW_IBM_rasterpos_clip GLEW_GET_VAR(__GLEW_IBM_rasterpos_clip)
+
+#endif /* GL_IBM_rasterpos_clip */
+
+/* --------------------------- GL_IBM_static_data -------------------------- */
+
+#ifndef GL_IBM_static_data
+#define GL_IBM_static_data 1
+
+#define GL_ALL_STATIC_DATA_IBM 103060
+#define GL_STATIC_VERTEX_ARRAY_IBM 103061
+
+#define GLEW_IBM_static_data GLEW_GET_VAR(__GLEW_IBM_static_data)
+
+#endif /* GL_IBM_static_data */
+
+/* --------------------- GL_IBM_texture_mirrored_repeat -------------------- */
+
+#ifndef GL_IBM_texture_mirrored_repeat
+#define GL_IBM_texture_mirrored_repeat 1
+
+#define GL_MIRRORED_REPEAT_IBM 0x8370
+
+#define GLEW_IBM_texture_mirrored_repeat GLEW_GET_VAR(__GLEW_IBM_texture_mirrored_repeat)
+
+#endif /* GL_IBM_texture_mirrored_repeat */
+
+/* ----------------------- GL_IBM_vertex_array_lists ----------------------- */
+
+#ifndef GL_IBM_vertex_array_lists
+#define GL_IBM_vertex_array_lists 1
+
+#define GL_VERTEX_ARRAY_LIST_IBM 103070
+#define GL_NORMAL_ARRAY_LIST_IBM 103071
+#define GL_COLOR_ARRAY_LIST_IBM 103072
+#define GL_INDEX_ARRAY_LIST_IBM 103073
+#define GL_TEXTURE_COORD_ARRAY_LIST_IBM 103074
+#define GL_EDGE_FLAG_ARRAY_LIST_IBM 103075
+#define GL_FOG_COORDINATE_ARRAY_LIST_IBM 103076
+#define GL_SECONDARY_COLOR_ARRAY_LIST_IBM 103077
+#define GL_VERTEX_ARRAY_LIST_STRIDE_IBM 103080
+#define GL_NORMAL_ARRAY_LIST_STRIDE_IBM 103081
+#define GL_COLOR_ARRAY_LIST_STRIDE_IBM 103082
+#define GL_INDEX_ARRAY_LIST_STRIDE_IBM 103083
+#define GL_TEXTURE_COORD_ARRAY_LIST_STRIDE_IBM 103084
+#define GL_EDGE_FLAG_ARRAY_LIST_STRIDE_IBM 103085
+#define GL_FOG_COORDINATE_ARRAY_LIST_STRIDE_IBM 103086
+#define GL_SECONDARY_COLOR_ARRAY_LIST_STRIDE_IBM 103087
+
+typedef void (GLAPIENTRY * PFNGLCOLORPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid ** pointer, GLint ptrstride);
+typedef void (GLAPIENTRY * PFNGLEDGEFLAGPOINTERLISTIBMPROC) (GLint stride, const GLboolean ** pointer, GLint ptrstride);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid ** pointer, GLint ptrstride);
+typedef void (GLAPIENTRY * PFNGLINDEXPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid ** pointer, GLint ptrstride);
+typedef void (GLAPIENTRY * PFNGLNORMALPOINTERLISTIBMPROC) (GLenum type, GLint stride, const GLvoid ** pointer, GLint ptrstride);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLORPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid ** pointer, GLint ptrstride);
+typedef void (GLAPIENTRY * PFNGLTEXCOORDPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid ** pointer, GLint ptrstride);
+typedef void (GLAPIENTRY * PFNGLVERTEXPOINTERLISTIBMPROC) (GLint size, GLenum type, GLint stride, const GLvoid ** pointer, GLint ptrstride);
+
+#define glColorPointerListIBM GLEW_GET_FUN(__glewColorPointerListIBM)
+#define glEdgeFlagPointerListIBM GLEW_GET_FUN(__glewEdgeFlagPointerListIBM)
+#define glFogCoordPointerListIBM GLEW_GET_FUN(__glewFogCoordPointerListIBM)
+#define glIndexPointerListIBM GLEW_GET_FUN(__glewIndexPointerListIBM)
+#define glNormalPointerListIBM GLEW_GET_FUN(__glewNormalPointerListIBM)
+#define glSecondaryColorPointerListIBM GLEW_GET_FUN(__glewSecondaryColorPointerListIBM)
+#define glTexCoordPointerListIBM GLEW_GET_FUN(__glewTexCoordPointerListIBM)
+#define glVertexPointerListIBM GLEW_GET_FUN(__glewVertexPointerListIBM)
+
+#define GLEW_IBM_vertex_array_lists GLEW_GET_VAR(__GLEW_IBM_vertex_array_lists)
+
+#endif /* GL_IBM_vertex_array_lists */
+
+/* -------------------------- GL_INGR_color_clamp -------------------------- */
+
+#ifndef GL_INGR_color_clamp
+#define GL_INGR_color_clamp 1
+
+#define GL_RED_MIN_CLAMP_INGR 0x8560
+#define GL_GREEN_MIN_CLAMP_INGR 0x8561
+#define GL_BLUE_MIN_CLAMP_INGR 0x8562
+#define GL_ALPHA_MIN_CLAMP_INGR 0x8563
+#define GL_RED_MAX_CLAMP_INGR 0x8564
+#define GL_GREEN_MAX_CLAMP_INGR 0x8565
+#define GL_BLUE_MAX_CLAMP_INGR 0x8566
+#define GL_ALPHA_MAX_CLAMP_INGR 0x8567
+
+#define GLEW_INGR_color_clamp GLEW_GET_VAR(__GLEW_INGR_color_clamp)
+
+#endif /* GL_INGR_color_clamp */
+
+/* ------------------------- GL_INGR_interlace_read ------------------------ */
+
+#ifndef GL_INGR_interlace_read
+#define GL_INGR_interlace_read 1
+
+#define GL_INTERLACE_READ_INGR 0x8568
+
+#define GLEW_INGR_interlace_read GLEW_GET_VAR(__GLEW_INGR_interlace_read)
+
+#endif /* GL_INGR_interlace_read */
+
+/* ------------------------ GL_INTEL_parallel_arrays ----------------------- */
+
+#ifndef GL_INTEL_parallel_arrays
+#define GL_INTEL_parallel_arrays 1
+
+#define GL_PARALLEL_ARRAYS_INTEL 0x83F4
+#define GL_VERTEX_ARRAY_PARALLEL_POINTERS_INTEL 0x83F5
+#define GL_NORMAL_ARRAY_PARALLEL_POINTERS_INTEL 0x83F6
+#define GL_COLOR_ARRAY_PARALLEL_POINTERS_INTEL 0x83F7
+#define GL_TEXTURE_COORD_ARRAY_PARALLEL_POINTERS_INTEL 0x83F8
+
+typedef void (GLAPIENTRY * PFNGLCOLORPOINTERVINTELPROC) (GLint size, GLenum type, const void** pointer);
+typedef void (GLAPIENTRY * PFNGLNORMALPOINTERVINTELPROC) (GLenum type, const void** pointer);
+typedef void (GLAPIENTRY * PFNGLTEXCOORDPOINTERVINTELPROC) (GLint size, GLenum type, const void** pointer);
+typedef void (GLAPIENTRY * PFNGLVERTEXPOINTERVINTELPROC) (GLint size, GLenum type, const void** pointer);
+
+#define glColorPointervINTEL GLEW_GET_FUN(__glewColorPointervINTEL)
+#define glNormalPointervINTEL GLEW_GET_FUN(__glewNormalPointervINTEL)
+#define glTexCoordPointervINTEL GLEW_GET_FUN(__glewTexCoordPointervINTEL)
+#define glVertexPointervINTEL GLEW_GET_FUN(__glewVertexPointervINTEL)
+
+#define GLEW_INTEL_parallel_arrays GLEW_GET_VAR(__GLEW_INTEL_parallel_arrays)
+
+#endif /* GL_INTEL_parallel_arrays */
+
+/* ------------------------ GL_INTEL_texture_scissor ----------------------- */
+
+#ifndef GL_INTEL_texture_scissor
+#define GL_INTEL_texture_scissor 1
+
+typedef void (GLAPIENTRY * PFNGLTEXSCISSORFUNCINTELPROC) (GLenum target, GLenum lfunc, GLenum hfunc);
+typedef void (GLAPIENTRY * PFNGLTEXSCISSORINTELPROC) (GLenum target, GLclampf tlow, GLclampf thigh);
+
+#define glTexScissorFuncINTEL GLEW_GET_FUN(__glewTexScissorFuncINTEL)
+#define glTexScissorINTEL GLEW_GET_FUN(__glewTexScissorINTEL)
+
+#define GLEW_INTEL_texture_scissor GLEW_GET_VAR(__GLEW_INTEL_texture_scissor)
+
+#endif /* GL_INTEL_texture_scissor */
+
+/* -------------------------- GL_KTX_buffer_region ------------------------- */
+
+#ifndef GL_KTX_buffer_region
+#define GL_KTX_buffer_region 1
+
+#define GL_KTX_FRONT_REGION 0x0
+#define GL_KTX_BACK_REGION 0x1
+#define GL_KTX_Z_REGION 0x2
+#define GL_KTX_STENCIL_REGION 0x3
+
+typedef GLuint (GLAPIENTRY * PFNGLBUFFERREGIONENABLEDEXTPROC) (void);
+typedef void (GLAPIENTRY * PFNGLDELETEBUFFERREGIONEXTPROC) (GLenum region);
+typedef void (GLAPIENTRY * PFNGLDRAWBUFFERREGIONEXTPROC) (GLuint region, GLint x, GLint y, GLsizei width, GLsizei height, GLint xDest, GLint yDest);
+typedef GLuint (GLAPIENTRY * PFNGLNEWBUFFERREGIONEXTPROC) (GLenum region);
+typedef void (GLAPIENTRY * PFNGLREADBUFFERREGIONEXTPROC) (GLuint region, GLint x, GLint y, GLsizei width, GLsizei height);
+
+#define glBufferRegionEnabledEXT GLEW_GET_FUN(__glewBufferRegionEnabledEXT)
+#define glDeleteBufferRegionEXT GLEW_GET_FUN(__glewDeleteBufferRegionEXT)
+#define glDrawBufferRegionEXT GLEW_GET_FUN(__glewDrawBufferRegionEXT)
+#define glNewBufferRegionEXT GLEW_GET_FUN(__glewNewBufferRegionEXT)
+#define glReadBufferRegionEXT GLEW_GET_FUN(__glewReadBufferRegionEXT)
+
+#define GLEW_KTX_buffer_region GLEW_GET_VAR(__GLEW_KTX_buffer_region)
+
+#endif /* GL_KTX_buffer_region */
+
+/* ------------------------- GL_MESAX_texture_stack ------------------------ */
+
+#ifndef GL_MESAX_texture_stack
+#define GL_MESAX_texture_stack 1
+
+#define GL_TEXTURE_1D_STACK_MESAX 0x8759
+#define GL_TEXTURE_2D_STACK_MESAX 0x875A
+#define GL_PROXY_TEXTURE_1D_STACK_MESAX 0x875B
+#define GL_PROXY_TEXTURE_2D_STACK_MESAX 0x875C
+#define GL_TEXTURE_1D_STACK_BINDING_MESAX 0x875D
+#define GL_TEXTURE_2D_STACK_BINDING_MESAX 0x875E
+
+#define GLEW_MESAX_texture_stack GLEW_GET_VAR(__GLEW_MESAX_texture_stack)
+
+#endif /* GL_MESAX_texture_stack */
+
+/* -------------------------- GL_MESA_pack_invert -------------------------- */
+
+#ifndef GL_MESA_pack_invert
+#define GL_MESA_pack_invert 1
+
+#define GL_PACK_INVERT_MESA 0x8758
+
+#define GLEW_MESA_pack_invert GLEW_GET_VAR(__GLEW_MESA_pack_invert)
+
+#endif /* GL_MESA_pack_invert */
+
+/* ------------------------- GL_MESA_resize_buffers ------------------------ */
+
+#ifndef GL_MESA_resize_buffers
+#define GL_MESA_resize_buffers 1
+
+typedef void (GLAPIENTRY * PFNGLRESIZEBUFFERSMESAPROC) (void);
+
+#define glResizeBuffersMESA GLEW_GET_FUN(__glewResizeBuffersMESA)
+
+#define GLEW_MESA_resize_buffers GLEW_GET_VAR(__GLEW_MESA_resize_buffers)
+
+#endif /* GL_MESA_resize_buffers */
+
+/* --------------------------- GL_MESA_window_pos -------------------------- */
+
+#ifndef GL_MESA_window_pos
+#define GL_MESA_window_pos 1
+
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2DMESAPROC) (GLdouble x, GLdouble y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2DVMESAPROC) (const GLdouble* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2FMESAPROC) (GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2FVMESAPROC) (const GLfloat* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2IMESAPROC) (GLint x, GLint y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2IVMESAPROC) (const GLint* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2SMESAPROC) (GLshort x, GLshort y);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS2SVMESAPROC) (const GLshort* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3DMESAPROC) (GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3DVMESAPROC) (const GLdouble* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3FMESAPROC) (GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3FVMESAPROC) (const GLfloat* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3IMESAPROC) (GLint x, GLint y, GLint z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3IVMESAPROC) (const GLint* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3SMESAPROC) (GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS3SVMESAPROC) (const GLshort* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4DMESAPROC) (GLdouble x, GLdouble y, GLdouble z, GLdouble);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4DVMESAPROC) (const GLdouble* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4FMESAPROC) (GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4FVMESAPROC) (const GLfloat* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4IMESAPROC) (GLint x, GLint y, GLint z, GLint w);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4IVMESAPROC) (const GLint* p);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4SMESAPROC) (GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (GLAPIENTRY * PFNGLWINDOWPOS4SVMESAPROC) (const GLshort* p);
+
+#define glWindowPos2dMESA GLEW_GET_FUN(__glewWindowPos2dMESA)
+#define glWindowPos2dvMESA GLEW_GET_FUN(__glewWindowPos2dvMESA)
+#define glWindowPos2fMESA GLEW_GET_FUN(__glewWindowPos2fMESA)
+#define glWindowPos2fvMESA GLEW_GET_FUN(__glewWindowPos2fvMESA)
+#define glWindowPos2iMESA GLEW_GET_FUN(__glewWindowPos2iMESA)
+#define glWindowPos2ivMESA GLEW_GET_FUN(__glewWindowPos2ivMESA)
+#define glWindowPos2sMESA GLEW_GET_FUN(__glewWindowPos2sMESA)
+#define glWindowPos2svMESA GLEW_GET_FUN(__glewWindowPos2svMESA)
+#define glWindowPos3dMESA GLEW_GET_FUN(__glewWindowPos3dMESA)
+#define glWindowPos3dvMESA GLEW_GET_FUN(__glewWindowPos3dvMESA)
+#define glWindowPos3fMESA GLEW_GET_FUN(__glewWindowPos3fMESA)
+#define glWindowPos3fvMESA GLEW_GET_FUN(__glewWindowPos3fvMESA)
+#define glWindowPos3iMESA GLEW_GET_FUN(__glewWindowPos3iMESA)
+#define glWindowPos3ivMESA GLEW_GET_FUN(__glewWindowPos3ivMESA)
+#define glWindowPos3sMESA GLEW_GET_FUN(__glewWindowPos3sMESA)
+#define glWindowPos3svMESA GLEW_GET_FUN(__glewWindowPos3svMESA)
+#define glWindowPos4dMESA GLEW_GET_FUN(__glewWindowPos4dMESA)
+#define glWindowPos4dvMESA GLEW_GET_FUN(__glewWindowPos4dvMESA)
+#define glWindowPos4fMESA GLEW_GET_FUN(__glewWindowPos4fMESA)
+#define glWindowPos4fvMESA GLEW_GET_FUN(__glewWindowPos4fvMESA)
+#define glWindowPos4iMESA GLEW_GET_FUN(__glewWindowPos4iMESA)
+#define glWindowPos4ivMESA GLEW_GET_FUN(__glewWindowPos4ivMESA)
+#define glWindowPos4sMESA GLEW_GET_FUN(__glewWindowPos4sMESA)
+#define glWindowPos4svMESA GLEW_GET_FUN(__glewWindowPos4svMESA)
+
+#define GLEW_MESA_window_pos GLEW_GET_VAR(__GLEW_MESA_window_pos)
+
+#endif /* GL_MESA_window_pos */
+
+/* ------------------------- GL_MESA_ycbcr_texture ------------------------- */
+
+#ifndef GL_MESA_ycbcr_texture
+#define GL_MESA_ycbcr_texture 1
+
+#define GL_UNSIGNED_SHORT_8_8_MESA 0x85BA
+#define GL_UNSIGNED_SHORT_8_8_REV_MESA 0x85BB
+#define GL_YCBCR_MESA 0x8757
+
+#define GLEW_MESA_ycbcr_texture GLEW_GET_VAR(__GLEW_MESA_ycbcr_texture)
+
+#endif /* GL_MESA_ycbcr_texture */
+
+/* --------------------------- GL_NV_blend_square -------------------------- */
+
+#ifndef GL_NV_blend_square
+#define GL_NV_blend_square 1
+
+#define GLEW_NV_blend_square GLEW_GET_VAR(__GLEW_NV_blend_square)
+
+#endif /* GL_NV_blend_square */
+
+/* ------------------------ GL_NV_conditional_render ----------------------- */
+
+#ifndef GL_NV_conditional_render
+#define GL_NV_conditional_render 1
+
+#define GL_QUERY_WAIT_NV 0x8E13
+#define GL_QUERY_NO_WAIT_NV 0x8E14
+#define GL_QUERY_BY_REGION_WAIT_NV 0x8E15
+#define GL_QUERY_BY_REGION_NO_WAIT_NV 0x8E16
+
+typedef void (GLAPIENTRY * PFNGLBEGINCONDITIONALRENDERNVPROC) (GLuint id, GLenum mode);
+typedef void (GLAPIENTRY * PFNGLENDCONDITIONALRENDERNVPROC) (void);
+
+#define glBeginConditionalRenderNV GLEW_GET_FUN(__glewBeginConditionalRenderNV)
+#define glEndConditionalRenderNV GLEW_GET_FUN(__glewEndConditionalRenderNV)
+
+#define GLEW_NV_conditional_render GLEW_GET_VAR(__GLEW_NV_conditional_render)
+
+#endif /* GL_NV_conditional_render */
+
+/* ----------------------- GL_NV_copy_depth_to_color ----------------------- */
+
+#ifndef GL_NV_copy_depth_to_color
+#define GL_NV_copy_depth_to_color 1
+
+#define GL_DEPTH_STENCIL_TO_RGBA_NV 0x886E
+#define GL_DEPTH_STENCIL_TO_BGRA_NV 0x886F
+
+#define GLEW_NV_copy_depth_to_color GLEW_GET_VAR(__GLEW_NV_copy_depth_to_color)
+
+#endif /* GL_NV_copy_depth_to_color */
+
+/* ------------------------ GL_NV_depth_buffer_float ----------------------- */
+
+#ifndef GL_NV_depth_buffer_float
+#define GL_NV_depth_buffer_float 1
+
+#define GL_DEPTH_COMPONENT32F_NV 0x8DAB
+#define GL_DEPTH32F_STENCIL8_NV 0x8DAC
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV_NV 0x8DAD
+#define GL_DEPTH_BUFFER_FLOAT_MODE_NV 0x8DAF
+
+typedef void (GLAPIENTRY * PFNGLCLEARDEPTHDNVPROC) (GLdouble depth);
+typedef void (GLAPIENTRY * PFNGLDEPTHBOUNDSDNVPROC) (GLdouble zmin, GLdouble zmax);
+typedef void (GLAPIENTRY * PFNGLDEPTHRANGEDNVPROC) (GLdouble zNear, GLdouble zFar);
+
+#define glClearDepthdNV GLEW_GET_FUN(__glewClearDepthdNV)
+#define glDepthBoundsdNV GLEW_GET_FUN(__glewDepthBoundsdNV)
+#define glDepthRangedNV GLEW_GET_FUN(__glewDepthRangedNV)
+
+#define GLEW_NV_depth_buffer_float GLEW_GET_VAR(__GLEW_NV_depth_buffer_float)
+
+#endif /* GL_NV_depth_buffer_float */
+
+/* --------------------------- GL_NV_depth_clamp --------------------------- */
+
+#ifndef GL_NV_depth_clamp
+#define GL_NV_depth_clamp 1
+
+#define GL_DEPTH_CLAMP_NV 0x864F
+
+#define GLEW_NV_depth_clamp GLEW_GET_VAR(__GLEW_NV_depth_clamp)
+
+#endif /* GL_NV_depth_clamp */
+
+/* ---------------------- GL_NV_depth_range_unclamped ---------------------- */
+
+#ifndef GL_NV_depth_range_unclamped
+#define GL_NV_depth_range_unclamped 1
+
+#define GL_SAMPLE_COUNT_BITS_NV 0x8864
+#define GL_CURRENT_SAMPLE_COUNT_QUERY_NV 0x8865
+#define GL_QUERY_RESULT_NV 0x8866
+#define GL_QUERY_RESULT_AVAILABLE_NV 0x8867
+#define GL_SAMPLE_COUNT_NV 0x8914
+
+#define GLEW_NV_depth_range_unclamped GLEW_GET_VAR(__GLEW_NV_depth_range_unclamped)
+
+#endif /* GL_NV_depth_range_unclamped */
+
+/* ---------------------------- GL_NV_evaluators --------------------------- */
+
+#ifndef GL_NV_evaluators
+#define GL_NV_evaluators 1
+
+#define GL_EVAL_2D_NV 0x86C0
+#define GL_EVAL_TRIANGULAR_2D_NV 0x86C1
+#define GL_MAP_TESSELLATION_NV 0x86C2
+#define GL_MAP_ATTRIB_U_ORDER_NV 0x86C3
+#define GL_MAP_ATTRIB_V_ORDER_NV 0x86C4
+#define GL_EVAL_FRACTIONAL_TESSELLATION_NV 0x86C5
+#define GL_EVAL_VERTEX_ATTRIB0_NV 0x86C6
+#define GL_EVAL_VERTEX_ATTRIB1_NV 0x86C7
+#define GL_EVAL_VERTEX_ATTRIB2_NV 0x86C8
+#define GL_EVAL_VERTEX_ATTRIB3_NV 0x86C9
+#define GL_EVAL_VERTEX_ATTRIB4_NV 0x86CA
+#define GL_EVAL_VERTEX_ATTRIB5_NV 0x86CB
+#define GL_EVAL_VERTEX_ATTRIB6_NV 0x86CC
+#define GL_EVAL_VERTEX_ATTRIB7_NV 0x86CD
+#define GL_EVAL_VERTEX_ATTRIB8_NV 0x86CE
+#define GL_EVAL_VERTEX_ATTRIB9_NV 0x86CF
+#define GL_EVAL_VERTEX_ATTRIB10_NV 0x86D0
+#define GL_EVAL_VERTEX_ATTRIB11_NV 0x86D1
+#define GL_EVAL_VERTEX_ATTRIB12_NV 0x86D2
+#define GL_EVAL_VERTEX_ATTRIB13_NV 0x86D3
+#define GL_EVAL_VERTEX_ATTRIB14_NV 0x86D4
+#define GL_EVAL_VERTEX_ATTRIB15_NV 0x86D5
+#define GL_MAX_MAP_TESSELLATION_NV 0x86D6
+#define GL_MAX_RATIONAL_EVAL_ORDER_NV 0x86D7
+
+typedef void (GLAPIENTRY * PFNGLEVALMAPSNVPROC) (GLenum target, GLenum mode);
+typedef void (GLAPIENTRY * PFNGLGETMAPATTRIBPARAMETERFVNVPROC) (GLenum target, GLuint index, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETMAPATTRIBPARAMETERIVNVPROC) (GLenum target, GLuint index, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETMAPCONTROLPOINTSNVPROC) (GLenum target, GLuint index, GLenum type, GLsizei ustride, GLsizei vstride, GLboolean packed, void* points);
+typedef void (GLAPIENTRY * PFNGLGETMAPPARAMETERFVNVPROC) (GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETMAPPARAMETERIVNVPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLMAPCONTROLPOINTSNVPROC) (GLenum target, GLuint index, GLenum type, GLsizei ustride, GLsizei vstride, GLint uorder, GLint vorder, GLboolean packed, const void* points);
+typedef void (GLAPIENTRY * PFNGLMAPPARAMETERFVNVPROC) (GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLMAPPARAMETERIVNVPROC) (GLenum target, GLenum pname, const GLint* params);
+
+#define glEvalMapsNV GLEW_GET_FUN(__glewEvalMapsNV)
+#define glGetMapAttribParameterfvNV GLEW_GET_FUN(__glewGetMapAttribParameterfvNV)
+#define glGetMapAttribParameterivNV GLEW_GET_FUN(__glewGetMapAttribParameterivNV)
+#define glGetMapControlPointsNV GLEW_GET_FUN(__glewGetMapControlPointsNV)
+#define glGetMapParameterfvNV GLEW_GET_FUN(__glewGetMapParameterfvNV)
+#define glGetMapParameterivNV GLEW_GET_FUN(__glewGetMapParameterivNV)
+#define glMapControlPointsNV GLEW_GET_FUN(__glewMapControlPointsNV)
+#define glMapParameterfvNV GLEW_GET_FUN(__glewMapParameterfvNV)
+#define glMapParameterivNV GLEW_GET_FUN(__glewMapParameterivNV)
+
+#define GLEW_NV_evaluators GLEW_GET_VAR(__GLEW_NV_evaluators)
+
+#endif /* GL_NV_evaluators */
+
+/* ----------------------- GL_NV_explicit_multisample ---------------------- */
+
+#ifndef GL_NV_explicit_multisample
+#define GL_NV_explicit_multisample 1
+
+#define GL_SAMPLE_POSITION_NV 0x8E50
+#define GL_SAMPLE_MASK_NV 0x8E51
+#define GL_SAMPLE_MASK_VALUE_NV 0x8E52
+#define GL_TEXTURE_BINDING_RENDERBUFFER_NV 0x8E53
+#define GL_TEXTURE_RENDERBUFFER_DATA_STORE_BINDING_NV 0x8E54
+#define GL_TEXTURE_RENDERBUFFER_NV 0x8E55
+#define GL_SAMPLER_RENDERBUFFER_NV 0x8E56
+#define GL_INT_SAMPLER_RENDERBUFFER_NV 0x8E57
+#define GL_UNSIGNED_INT_SAMPLER_RENDERBUFFER_NV 0x8E58
+#define GL_MAX_SAMPLE_MASK_WORDS_NV 0x8E59
+
+typedef void (GLAPIENTRY * PFNGLGETMULTISAMPLEFVNVPROC) (GLenum pname, GLuint index, GLfloat* val);
+typedef void (GLAPIENTRY * PFNGLSAMPLEMASKINDEXEDNVPROC) (GLuint index, GLbitfield mask);
+typedef void (GLAPIENTRY * PFNGLTEXRENDERBUFFERNVPROC) (GLenum target, GLuint renderbuffer);
+
+#define glGetMultisamplefvNV GLEW_GET_FUN(__glewGetMultisamplefvNV)
+#define glSampleMaskIndexedNV GLEW_GET_FUN(__glewSampleMaskIndexedNV)
+#define glTexRenderbufferNV GLEW_GET_FUN(__glewTexRenderbufferNV)
+
+#define GLEW_NV_explicit_multisample GLEW_GET_VAR(__GLEW_NV_explicit_multisample)
+
+#endif /* GL_NV_explicit_multisample */
+
+/* ------------------------------ GL_NV_fence ------------------------------ */
+
+#ifndef GL_NV_fence
+#define GL_NV_fence 1
+
+#define GL_ALL_COMPLETED_NV 0x84F2
+#define GL_FENCE_STATUS_NV 0x84F3
+#define GL_FENCE_CONDITION_NV 0x84F4
+
+typedef void (GLAPIENTRY * PFNGLDELETEFENCESNVPROC) (GLsizei n, const GLuint* fences);
+typedef void (GLAPIENTRY * PFNGLFINISHFENCENVPROC) (GLuint fence);
+typedef void (GLAPIENTRY * PFNGLGENFENCESNVPROC) (GLsizei n, GLuint* fences);
+typedef void (GLAPIENTRY * PFNGLGETFENCEIVNVPROC) (GLuint fence, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISFENCENVPROC) (GLuint fence);
+typedef void (GLAPIENTRY * PFNGLSETFENCENVPROC) (GLuint fence, GLenum condition);
+typedef GLboolean (GLAPIENTRY * PFNGLTESTFENCENVPROC) (GLuint fence);
+
+#define glDeleteFencesNV GLEW_GET_FUN(__glewDeleteFencesNV)
+#define glFinishFenceNV GLEW_GET_FUN(__glewFinishFenceNV)
+#define glGenFencesNV GLEW_GET_FUN(__glewGenFencesNV)
+#define glGetFenceivNV GLEW_GET_FUN(__glewGetFenceivNV)
+#define glIsFenceNV GLEW_GET_FUN(__glewIsFenceNV)
+#define glSetFenceNV GLEW_GET_FUN(__glewSetFenceNV)
+#define glTestFenceNV GLEW_GET_FUN(__glewTestFenceNV)
+
+#define GLEW_NV_fence GLEW_GET_VAR(__GLEW_NV_fence)
+
+#endif /* GL_NV_fence */
+
+/* --------------------------- GL_NV_float_buffer -------------------------- */
+
+#ifndef GL_NV_float_buffer
+#define GL_NV_float_buffer 1
+
+#define GL_FLOAT_R_NV 0x8880
+#define GL_FLOAT_RG_NV 0x8881
+#define GL_FLOAT_RGB_NV 0x8882
+#define GL_FLOAT_RGBA_NV 0x8883
+#define GL_FLOAT_R16_NV 0x8884
+#define GL_FLOAT_R32_NV 0x8885
+#define GL_FLOAT_RG16_NV 0x8886
+#define GL_FLOAT_RG32_NV 0x8887
+#define GL_FLOAT_RGB16_NV 0x8888
+#define GL_FLOAT_RGB32_NV 0x8889
+#define GL_FLOAT_RGBA16_NV 0x888A
+#define GL_FLOAT_RGBA32_NV 0x888B
+#define GL_TEXTURE_FLOAT_COMPONENTS_NV 0x888C
+#define GL_FLOAT_CLEAR_COLOR_VALUE_NV 0x888D
+#define GL_FLOAT_RGBA_MODE_NV 0x888E
+
+#define GLEW_NV_float_buffer GLEW_GET_VAR(__GLEW_NV_float_buffer)
+
+#endif /* GL_NV_float_buffer */
+
+/* --------------------------- GL_NV_fog_distance -------------------------- */
+
+#ifndef GL_NV_fog_distance
+#define GL_NV_fog_distance 1
+
+#define GL_FOG_DISTANCE_MODE_NV 0x855A
+#define GL_EYE_RADIAL_NV 0x855B
+#define GL_EYE_PLANE_ABSOLUTE_NV 0x855C
+
+#define GLEW_NV_fog_distance GLEW_GET_VAR(__GLEW_NV_fog_distance)
+
+#endif /* GL_NV_fog_distance */
+
+/* ------------------------- GL_NV_fragment_program ------------------------ */
+
+#ifndef GL_NV_fragment_program
+#define GL_NV_fragment_program 1
+
+#define GL_MAX_FRAGMENT_PROGRAM_LOCAL_PARAMETERS_NV 0x8868
+#define GL_FRAGMENT_PROGRAM_NV 0x8870
+#define GL_MAX_TEXTURE_COORDS_NV 0x8871
+#define GL_MAX_TEXTURE_IMAGE_UNITS_NV 0x8872
+#define GL_FRAGMENT_PROGRAM_BINDING_NV 0x8873
+#define GL_PROGRAM_ERROR_STRING_NV 0x8874
+
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMNAMEDPARAMETERDVNVPROC) (GLuint id, GLsizei len, const GLubyte* name, GLdouble *params);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMNAMEDPARAMETERFVNVPROC) (GLuint id, GLsizei len, const GLubyte* name, GLfloat *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMNAMEDPARAMETER4DNVPROC) (GLuint id, GLsizei len, const GLubyte* name, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMNAMEDPARAMETER4DVNVPROC) (GLuint id, GLsizei len, const GLubyte* name, const GLdouble v[]);
+typedef void (GLAPIENTRY * PFNGLPROGRAMNAMEDPARAMETER4FNVPROC) (GLuint id, GLsizei len, const GLubyte* name, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMNAMEDPARAMETER4FVNVPROC) (GLuint id, GLsizei len, const GLubyte* name, const GLfloat v[]);
+
+#define glGetProgramNamedParameterdvNV GLEW_GET_FUN(__glewGetProgramNamedParameterdvNV)
+#define glGetProgramNamedParameterfvNV GLEW_GET_FUN(__glewGetProgramNamedParameterfvNV)
+#define glProgramNamedParameter4dNV GLEW_GET_FUN(__glewProgramNamedParameter4dNV)
+#define glProgramNamedParameter4dvNV GLEW_GET_FUN(__glewProgramNamedParameter4dvNV)
+#define glProgramNamedParameter4fNV GLEW_GET_FUN(__glewProgramNamedParameter4fNV)
+#define glProgramNamedParameter4fvNV GLEW_GET_FUN(__glewProgramNamedParameter4fvNV)
+
+#define GLEW_NV_fragment_program GLEW_GET_VAR(__GLEW_NV_fragment_program)
+
+#endif /* GL_NV_fragment_program */
+
+/* ------------------------ GL_NV_fragment_program2 ------------------------ */
+
+#ifndef GL_NV_fragment_program2
+#define GL_NV_fragment_program2 1
+
+#define GL_MAX_PROGRAM_EXEC_INSTRUCTIONS_NV 0x88F4
+#define GL_MAX_PROGRAM_CALL_DEPTH_NV 0x88F5
+#define GL_MAX_PROGRAM_IF_DEPTH_NV 0x88F6
+#define GL_MAX_PROGRAM_LOOP_DEPTH_NV 0x88F7
+#define GL_MAX_PROGRAM_LOOP_COUNT_NV 0x88F8
+
+#define GLEW_NV_fragment_program2 GLEW_GET_VAR(__GLEW_NV_fragment_program2)
+
+#endif /* GL_NV_fragment_program2 */
+
+/* ------------------------ GL_NV_fragment_program4 ------------------------ */
+
+#ifndef GL_NV_fragment_program4
+#define GL_NV_fragment_program4 1
+
+#define GLEW_NV_fragment_program4 GLEW_GET_VAR(__GLEW_NV_fragment_program4)
+
+#endif /* GL_NV_fragment_program4 */
+
+/* --------------------- GL_NV_fragment_program_option --------------------- */
+
+#ifndef GL_NV_fragment_program_option
+#define GL_NV_fragment_program_option 1
+
+#define GLEW_NV_fragment_program_option GLEW_GET_VAR(__GLEW_NV_fragment_program_option)
+
+#endif /* GL_NV_fragment_program_option */
+
+/* ----------------- GL_NV_framebuffer_multisample_coverage ---------------- */
+
+#ifndef GL_NV_framebuffer_multisample_coverage
+#define GL_NV_framebuffer_multisample_coverage 1
+
+#define GL_RENDERBUFFER_COVERAGE_SAMPLES_NV 0x8CAB
+#define GL_RENDERBUFFER_COLOR_SAMPLES_NV 0x8E10
+#define GL_MAX_MULTISAMPLE_COVERAGE_MODES_NV 0x8E11
+#define GL_MULTISAMPLE_COVERAGE_MODES_NV 0x8E12
+
+typedef void (GLAPIENTRY * PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC) (GLenum target, GLsizei coverageSamples, GLsizei colorSamples, GLenum internalformat, GLsizei width, GLsizei height);
+
+#define glRenderbufferStorageMultisampleCoverageNV GLEW_GET_FUN(__glewRenderbufferStorageMultisampleCoverageNV)
+
+#define GLEW_NV_framebuffer_multisample_coverage GLEW_GET_VAR(__GLEW_NV_framebuffer_multisample_coverage)
+
+#endif /* GL_NV_framebuffer_multisample_coverage */
+
+/* ------------------------ GL_NV_geometry_program4 ------------------------ */
+
+#ifndef GL_NV_geometry_program4
+#define GL_NV_geometry_program4 1
+
+#define GL_GEOMETRY_PROGRAM_NV 0x8C26
+#define GL_MAX_PROGRAM_OUTPUT_VERTICES_NV 0x8C27
+#define GL_MAX_PROGRAM_TOTAL_OUTPUT_COMPONENTS_NV 0x8C28
+
+typedef void (GLAPIENTRY * PFNGLPROGRAMVERTEXLIMITNVPROC) (GLenum target, GLint limit);
+
+#define glProgramVertexLimitNV GLEW_GET_FUN(__glewProgramVertexLimitNV)
+
+#define GLEW_NV_geometry_program4 GLEW_GET_VAR(__GLEW_NV_geometry_program4)
+
+#endif /* GL_NV_geometry_program4 */
+
+/* ------------------------- GL_NV_geometry_shader4 ------------------------ */
+
+#ifndef GL_NV_geometry_shader4
+#define GL_NV_geometry_shader4 1
+
+#define GLEW_NV_geometry_shader4 GLEW_GET_VAR(__GLEW_NV_geometry_shader4)
+
+#endif /* GL_NV_geometry_shader4 */
+
+/* --------------------------- GL_NV_gpu_program4 -------------------------- */
+
+#ifndef GL_NV_gpu_program4
+#define GL_NV_gpu_program4 1
+
+#define GL_MIN_PROGRAM_TEXEL_OFFSET_NV 0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET_NV 0x8905
+#define GL_PROGRAM_ATTRIB_COMPONENTS_NV 0x8906
+#define GL_PROGRAM_RESULT_COMPONENTS_NV 0x8907
+#define GL_MAX_PROGRAM_ATTRIB_COMPONENTS_NV 0x8908
+#define GL_MAX_PROGRAM_RESULT_COMPONENTS_NV 0x8909
+#define GL_MAX_PROGRAM_GENERIC_ATTRIBS_NV 0x8DA5
+#define GL_MAX_PROGRAM_GENERIC_RESULTS_NV 0x8DA6
+
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETERI4INVPROC) (GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETERI4IVNVPROC) (GLenum target, GLuint index, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETERI4UINVPROC) (GLenum target, GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETERI4UIVNVPROC) (GLenum target, GLuint index, const GLuint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETERSI4IVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMENVPARAMETERSI4UIVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLuint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETERI4INVPROC) (GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC) (GLenum target, GLuint index, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETERI4UINVPROC) (GLenum target, GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETERI4UIVNVPROC) (GLenum target, GLuint index, const GLuint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETERSI4IVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMLOCALPARAMETERSI4UIVNVPROC) (GLenum target, GLuint index, GLsizei count, const GLuint *params);
+
+#define glProgramEnvParameterI4iNV GLEW_GET_FUN(__glewProgramEnvParameterI4iNV)
+#define glProgramEnvParameterI4ivNV GLEW_GET_FUN(__glewProgramEnvParameterI4ivNV)
+#define glProgramEnvParameterI4uiNV GLEW_GET_FUN(__glewProgramEnvParameterI4uiNV)
+#define glProgramEnvParameterI4uivNV GLEW_GET_FUN(__glewProgramEnvParameterI4uivNV)
+#define glProgramEnvParametersI4ivNV GLEW_GET_FUN(__glewProgramEnvParametersI4ivNV)
+#define glProgramEnvParametersI4uivNV GLEW_GET_FUN(__glewProgramEnvParametersI4uivNV)
+#define glProgramLocalParameterI4iNV GLEW_GET_FUN(__glewProgramLocalParameterI4iNV)
+#define glProgramLocalParameterI4ivNV GLEW_GET_FUN(__glewProgramLocalParameterI4ivNV)
+#define glProgramLocalParameterI4uiNV GLEW_GET_FUN(__glewProgramLocalParameterI4uiNV)
+#define glProgramLocalParameterI4uivNV GLEW_GET_FUN(__glewProgramLocalParameterI4uivNV)
+#define glProgramLocalParametersI4ivNV GLEW_GET_FUN(__glewProgramLocalParametersI4ivNV)
+#define glProgramLocalParametersI4uivNV GLEW_GET_FUN(__glewProgramLocalParametersI4uivNV)
+
+#define GLEW_NV_gpu_program4 GLEW_GET_VAR(__GLEW_NV_gpu_program4)
+
+#endif /* GL_NV_gpu_program4 */
+
+/* ---------------------------- GL_NV_half_float --------------------------- */
+
+#ifndef GL_NV_half_float
+#define GL_NV_half_float 1
+
+#define GL_HALF_FLOAT_NV 0x140B
+
+typedef unsigned short GLhalf;
+
+typedef void (GLAPIENTRY * PFNGLCOLOR3HNVPROC) (GLhalf red, GLhalf green, GLhalf blue);
+typedef void (GLAPIENTRY * PFNGLCOLOR3HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLCOLOR4HNVPROC) (GLhalf red, GLhalf green, GLhalf blue, GLhalf alpha);
+typedef void (GLAPIENTRY * PFNGLCOLOR4HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDHNVPROC) (GLhalf fog);
+typedef void (GLAPIENTRY * PFNGLFOGCOORDHVNVPROC) (const GLhalf* fog);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1HNVPROC) (GLenum target, GLhalf s);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD1HVNVPROC) (GLenum target, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2HNVPROC) (GLenum target, GLhalf s, GLhalf t);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD2HVNVPROC) (GLenum target, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3HNVPROC) (GLenum target, GLhalf s, GLhalf t, GLhalf r);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD3HVNVPROC) (GLenum target, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4HNVPROC) (GLenum target, GLhalf s, GLhalf t, GLhalf r, GLhalf q);
+typedef void (GLAPIENTRY * PFNGLMULTITEXCOORD4HVNVPROC) (GLenum target, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLNORMAL3HNVPROC) (GLhalf nx, GLhalf ny, GLhalf nz);
+typedef void (GLAPIENTRY * PFNGLNORMAL3HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3HNVPROC) (GLhalf red, GLhalf green, GLhalf blue);
+typedef void (GLAPIENTRY * PFNGLSECONDARYCOLOR3HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD1HNVPROC) (GLhalf s);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD1HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2HNVPROC) (GLhalf s, GLhalf t);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD3HNVPROC) (GLhalf s, GLhalf t, GLhalf r);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD3HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD4HNVPROC) (GLhalf s, GLhalf t, GLhalf r, GLhalf q);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD4HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEX2HNVPROC) (GLhalf x, GLhalf y);
+typedef void (GLAPIENTRY * PFNGLVERTEX2HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEX3HNVPROC) (GLhalf x, GLhalf y, GLhalf z);
+typedef void (GLAPIENTRY * PFNGLVERTEX3HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEX4HNVPROC) (GLhalf x, GLhalf y, GLhalf z, GLhalf w);
+typedef void (GLAPIENTRY * PFNGLVERTEX4HVNVPROC) (const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1HNVPROC) (GLuint index, GLhalf x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1HVNVPROC) (GLuint index, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2HNVPROC) (GLuint index, GLhalf x, GLhalf y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2HVNVPROC) (GLuint index, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3HNVPROC) (GLuint index, GLhalf x, GLhalf y, GLhalf z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3HVNVPROC) (GLuint index, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4HNVPROC) (GLuint index, GLhalf x, GLhalf y, GLhalf z, GLhalf w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4HVNVPROC) (GLuint index, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS1HVNVPROC) (GLuint index, GLsizei n, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS2HVNVPROC) (GLuint index, GLsizei n, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS3HVNVPROC) (GLuint index, GLsizei n, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS4HVNVPROC) (GLuint index, GLsizei n, const GLhalf* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXWEIGHTHNVPROC) (GLhalf weight);
+typedef void (GLAPIENTRY * PFNGLVERTEXWEIGHTHVNVPROC) (const GLhalf* weight);
+
+#define glColor3hNV GLEW_GET_FUN(__glewColor3hNV)
+#define glColor3hvNV GLEW_GET_FUN(__glewColor3hvNV)
+#define glColor4hNV GLEW_GET_FUN(__glewColor4hNV)
+#define glColor4hvNV GLEW_GET_FUN(__glewColor4hvNV)
+#define glFogCoordhNV GLEW_GET_FUN(__glewFogCoordhNV)
+#define glFogCoordhvNV GLEW_GET_FUN(__glewFogCoordhvNV)
+#define glMultiTexCoord1hNV GLEW_GET_FUN(__glewMultiTexCoord1hNV)
+#define glMultiTexCoord1hvNV GLEW_GET_FUN(__glewMultiTexCoord1hvNV)
+#define glMultiTexCoord2hNV GLEW_GET_FUN(__glewMultiTexCoord2hNV)
+#define glMultiTexCoord2hvNV GLEW_GET_FUN(__glewMultiTexCoord2hvNV)
+#define glMultiTexCoord3hNV GLEW_GET_FUN(__glewMultiTexCoord3hNV)
+#define glMultiTexCoord3hvNV GLEW_GET_FUN(__glewMultiTexCoord3hvNV)
+#define glMultiTexCoord4hNV GLEW_GET_FUN(__glewMultiTexCoord4hNV)
+#define glMultiTexCoord4hvNV GLEW_GET_FUN(__glewMultiTexCoord4hvNV)
+#define glNormal3hNV GLEW_GET_FUN(__glewNormal3hNV)
+#define glNormal3hvNV GLEW_GET_FUN(__glewNormal3hvNV)
+#define glSecondaryColor3hNV GLEW_GET_FUN(__glewSecondaryColor3hNV)
+#define glSecondaryColor3hvNV GLEW_GET_FUN(__glewSecondaryColor3hvNV)
+#define glTexCoord1hNV GLEW_GET_FUN(__glewTexCoord1hNV)
+#define glTexCoord1hvNV GLEW_GET_FUN(__glewTexCoord1hvNV)
+#define glTexCoord2hNV GLEW_GET_FUN(__glewTexCoord2hNV)
+#define glTexCoord2hvNV GLEW_GET_FUN(__glewTexCoord2hvNV)
+#define glTexCoord3hNV GLEW_GET_FUN(__glewTexCoord3hNV)
+#define glTexCoord3hvNV GLEW_GET_FUN(__glewTexCoord3hvNV)
+#define glTexCoord4hNV GLEW_GET_FUN(__glewTexCoord4hNV)
+#define glTexCoord4hvNV GLEW_GET_FUN(__glewTexCoord4hvNV)
+#define glVertex2hNV GLEW_GET_FUN(__glewVertex2hNV)
+#define glVertex2hvNV GLEW_GET_FUN(__glewVertex2hvNV)
+#define glVertex3hNV GLEW_GET_FUN(__glewVertex3hNV)
+#define glVertex3hvNV GLEW_GET_FUN(__glewVertex3hvNV)
+#define glVertex4hNV GLEW_GET_FUN(__glewVertex4hNV)
+#define glVertex4hvNV GLEW_GET_FUN(__glewVertex4hvNV)
+#define glVertexAttrib1hNV GLEW_GET_FUN(__glewVertexAttrib1hNV)
+#define glVertexAttrib1hvNV GLEW_GET_FUN(__glewVertexAttrib1hvNV)
+#define glVertexAttrib2hNV GLEW_GET_FUN(__glewVertexAttrib2hNV)
+#define glVertexAttrib2hvNV GLEW_GET_FUN(__glewVertexAttrib2hvNV)
+#define glVertexAttrib3hNV GLEW_GET_FUN(__glewVertexAttrib3hNV)
+#define glVertexAttrib3hvNV GLEW_GET_FUN(__glewVertexAttrib3hvNV)
+#define glVertexAttrib4hNV GLEW_GET_FUN(__glewVertexAttrib4hNV)
+#define glVertexAttrib4hvNV GLEW_GET_FUN(__glewVertexAttrib4hvNV)
+#define glVertexAttribs1hvNV GLEW_GET_FUN(__glewVertexAttribs1hvNV)
+#define glVertexAttribs2hvNV GLEW_GET_FUN(__glewVertexAttribs2hvNV)
+#define glVertexAttribs3hvNV GLEW_GET_FUN(__glewVertexAttribs3hvNV)
+#define glVertexAttribs4hvNV GLEW_GET_FUN(__glewVertexAttribs4hvNV)
+#define glVertexWeighthNV GLEW_GET_FUN(__glewVertexWeighthNV)
+#define glVertexWeighthvNV GLEW_GET_FUN(__glewVertexWeighthvNV)
+
+#define GLEW_NV_half_float GLEW_GET_VAR(__GLEW_NV_half_float)
+
+#endif /* GL_NV_half_float */
+
+/* ------------------------ GL_NV_light_max_exponent ----------------------- */
+
+#ifndef GL_NV_light_max_exponent
+#define GL_NV_light_max_exponent 1
+
+#define GL_MAX_SHININESS_NV 0x8504
+#define GL_MAX_SPOT_EXPONENT_NV 0x8505
+
+#define GLEW_NV_light_max_exponent GLEW_GET_VAR(__GLEW_NV_light_max_exponent)
+
+#endif /* GL_NV_light_max_exponent */
+
+/* --------------------- GL_NV_multisample_filter_hint --------------------- */
+
+#ifndef GL_NV_multisample_filter_hint
+#define GL_NV_multisample_filter_hint 1
+
+#define GL_MULTISAMPLE_FILTER_HINT_NV 0x8534
+
+#define GLEW_NV_multisample_filter_hint GLEW_GET_VAR(__GLEW_NV_multisample_filter_hint)
+
+#endif /* GL_NV_multisample_filter_hint */
+
+/* ------------------------- GL_NV_occlusion_query ------------------------- */
+
+#ifndef GL_NV_occlusion_query
+#define GL_NV_occlusion_query 1
+
+#define GL_PIXEL_COUNTER_BITS_NV 0x8864
+#define GL_CURRENT_OCCLUSION_QUERY_ID_NV 0x8865
+#define GL_PIXEL_COUNT_NV 0x8866
+#define GL_PIXEL_COUNT_AVAILABLE_NV 0x8867
+
+typedef void (GLAPIENTRY * PFNGLBEGINOCCLUSIONQUERYNVPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLDELETEOCCLUSIONQUERIESNVPROC) (GLsizei n, const GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLENDOCCLUSIONQUERYNVPROC) (void);
+typedef void (GLAPIENTRY * PFNGLGENOCCLUSIONQUERIESNVPROC) (GLsizei n, GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLGETOCCLUSIONQUERYIVNVPROC) (GLuint id, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETOCCLUSIONQUERYUIVNVPROC) (GLuint id, GLenum pname, GLuint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISOCCLUSIONQUERYNVPROC) (GLuint id);
+
+#define glBeginOcclusionQueryNV GLEW_GET_FUN(__glewBeginOcclusionQueryNV)
+#define glDeleteOcclusionQueriesNV GLEW_GET_FUN(__glewDeleteOcclusionQueriesNV)
+#define glEndOcclusionQueryNV GLEW_GET_FUN(__glewEndOcclusionQueryNV)
+#define glGenOcclusionQueriesNV GLEW_GET_FUN(__glewGenOcclusionQueriesNV)
+#define glGetOcclusionQueryivNV GLEW_GET_FUN(__glewGetOcclusionQueryivNV)
+#define glGetOcclusionQueryuivNV GLEW_GET_FUN(__glewGetOcclusionQueryuivNV)
+#define glIsOcclusionQueryNV GLEW_GET_FUN(__glewIsOcclusionQueryNV)
+
+#define GLEW_NV_occlusion_query GLEW_GET_VAR(__GLEW_NV_occlusion_query)
+
+#endif /* GL_NV_occlusion_query */
+
+/* ----------------------- GL_NV_packed_depth_stencil ---------------------- */
+
+#ifndef GL_NV_packed_depth_stencil
+#define GL_NV_packed_depth_stencil 1
+
+#define GL_DEPTH_STENCIL_NV 0x84F9
+#define GL_UNSIGNED_INT_24_8_NV 0x84FA
+
+#define GLEW_NV_packed_depth_stencil GLEW_GET_VAR(__GLEW_NV_packed_depth_stencil)
+
+#endif /* GL_NV_packed_depth_stencil */
+
+/* --------------------- GL_NV_parameter_buffer_object --------------------- */
+
+#ifndef GL_NV_parameter_buffer_object
+#define GL_NV_parameter_buffer_object 1
+
+#define GL_MAX_PROGRAM_PARAMETER_BUFFER_BINDINGS_NV 0x8DA0
+#define GL_MAX_PROGRAM_PARAMETER_BUFFER_SIZE_NV 0x8DA1
+#define GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV 0x8DA2
+#define GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV 0x8DA3
+#define GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV 0x8DA4
+
+typedef void (GLAPIENTRY * PFNGLPROGRAMBUFFERPARAMETERSIIVNVPROC) (GLenum target, GLuint buffer, GLuint index, GLsizei count, const GLint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMBUFFERPARAMETERSIUIVNVPROC) (GLenum target, GLuint buffer, GLuint index, GLsizei count, const GLuint *params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMBUFFERPARAMETERSFVNVPROC) (GLenum target, GLuint buffer, GLuint index, GLsizei count, const GLfloat *params);
+
+#define glProgramBufferParametersIivNV GLEW_GET_FUN(__glewProgramBufferParametersIivNV)
+#define glProgramBufferParametersIuivNV GLEW_GET_FUN(__glewProgramBufferParametersIuivNV)
+#define glProgramBufferParametersfvNV GLEW_GET_FUN(__glewProgramBufferParametersfvNV)
+
+#define GLEW_NV_parameter_buffer_object GLEW_GET_VAR(__GLEW_NV_parameter_buffer_object)
+
+#endif /* GL_NV_parameter_buffer_object */
+
+/* ------------------------- GL_NV_pixel_data_range ------------------------ */
+
+#ifndef GL_NV_pixel_data_range
+#define GL_NV_pixel_data_range 1
+
+#define GL_WRITE_PIXEL_DATA_RANGE_NV 0x8878
+#define GL_READ_PIXEL_DATA_RANGE_NV 0x8879
+#define GL_WRITE_PIXEL_DATA_RANGE_LENGTH_NV 0x887A
+#define GL_READ_PIXEL_DATA_RANGE_LENGTH_NV 0x887B
+#define GL_WRITE_PIXEL_DATA_RANGE_POINTER_NV 0x887C
+#define GL_READ_PIXEL_DATA_RANGE_POINTER_NV 0x887D
+
+typedef void (GLAPIENTRY * PFNGLFLUSHPIXELDATARANGENVPROC) (GLenum target);
+typedef void (GLAPIENTRY * PFNGLPIXELDATARANGENVPROC) (GLenum target, GLsizei length, void* pointer);
+
+#define glFlushPixelDataRangeNV GLEW_GET_FUN(__glewFlushPixelDataRangeNV)
+#define glPixelDataRangeNV GLEW_GET_FUN(__glewPixelDataRangeNV)
+
+#define GLEW_NV_pixel_data_range GLEW_GET_VAR(__GLEW_NV_pixel_data_range)
+
+#endif /* GL_NV_pixel_data_range */
+
+/* --------------------------- GL_NV_point_sprite -------------------------- */
+
+#ifndef GL_NV_point_sprite
+#define GL_NV_point_sprite 1
+
+#define GL_POINT_SPRITE_NV 0x8861
+#define GL_COORD_REPLACE_NV 0x8862
+#define GL_POINT_SPRITE_R_MODE_NV 0x8863
+
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERINVPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLPOINTPARAMETERIVNVPROC) (GLenum pname, const GLint* params);
+
+#define glPointParameteriNV GLEW_GET_FUN(__glewPointParameteriNV)
+#define glPointParameterivNV GLEW_GET_FUN(__glewPointParameterivNV)
+
+#define GLEW_NV_point_sprite GLEW_GET_VAR(__GLEW_NV_point_sprite)
+
+#endif /* GL_NV_point_sprite */
+
+/* -------------------------- GL_NV_present_video -------------------------- */
+
+#ifndef GL_NV_present_video
+#define GL_NV_present_video 1
+
+#define GL_FRAME_NV 0x8E26
+#define GL_FIELDS_NV 0x8E27
+#define GL_CURRENT_TIME_NV 0x8E28
+#define GL_NUM_FILL_STREAMS_NV 0x8E29
+#define GL_PRESENT_TIME_NV 0x8E2A
+#define GL_PRESENT_DURATION_NV 0x8E2B
+
+typedef void (GLAPIENTRY * PFNGLGETVIDEOI64VNVPROC) (GLuint video_slot, GLenum pname, GLint64EXT* params);
+typedef void (GLAPIENTRY * PFNGLGETVIDEOIVNVPROC) (GLuint video_slot, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETVIDEOUI64VNVPROC) (GLuint video_slot, GLenum pname, GLuint64EXT* params);
+typedef void (GLAPIENTRY * PFNGLGETVIDEOUIVNVPROC) (GLuint video_slot, GLenum pname, GLuint* params);
+typedef void (GLAPIENTRY * PFNGLPRESENTFRAMEDUALFILLNVPROC) (GLuint video_slot, GLuint64EXT minPresentTime, GLuint beginPresentTimeId, GLuint presentDurationId, GLenum type, GLenum target0, GLuint fill0, GLenum target1, GLuint fill1, GLenum target2, GLuint fill2, GLenum target3, GLuint fill3);
+typedef void (GLAPIENTRY * PFNGLPRESENTFRAMEKEYEDNVPROC) (GLuint video_slot, GLuint64EXT minPresentTime, GLuint beginPresentTimeId, GLuint presentDurationId, GLenum type, GLenum target0, GLuint fill0, GLuint key0, GLenum target1, GLuint fill1, GLuint key1);
+typedef void (GLAPIENTRY * PFNGLVIDEOPARAMETERIVNVPROC) (GLuint video_slot, GLenum pname, const GLint* params);
+
+#define glGetVideoi64vNV GLEW_GET_FUN(__glewGetVideoi64vNV)
+#define glGetVideoivNV GLEW_GET_FUN(__glewGetVideoivNV)
+#define glGetVideoui64vNV GLEW_GET_FUN(__glewGetVideoui64vNV)
+#define glGetVideouivNV GLEW_GET_FUN(__glewGetVideouivNV)
+#define glPresentFrameDualFillNV GLEW_GET_FUN(__glewPresentFrameDualFillNV)
+#define glPresentFrameKeyedNV GLEW_GET_FUN(__glewPresentFrameKeyedNV)
+#define glVideoParameterivNV GLEW_GET_FUN(__glewVideoParameterivNV)
+
+#define GLEW_NV_present_video GLEW_GET_VAR(__GLEW_NV_present_video)
+
+#endif /* GL_NV_present_video */
+
+/* ------------------------ GL_NV_primitive_restart ------------------------ */
+
+#ifndef GL_NV_primitive_restart
+#define GL_NV_primitive_restart 1
+
+#define GL_PRIMITIVE_RESTART_NV 0x8558
+#define GL_PRIMITIVE_RESTART_INDEX_NV 0x8559
+
+typedef void (GLAPIENTRY * PFNGLPRIMITIVERESTARTINDEXNVPROC) (GLuint index);
+typedef void (GLAPIENTRY * PFNGLPRIMITIVERESTARTNVPROC) (void);
+
+#define glPrimitiveRestartIndexNV GLEW_GET_FUN(__glewPrimitiveRestartIndexNV)
+#define glPrimitiveRestartNV GLEW_GET_FUN(__glewPrimitiveRestartNV)
+
+#define GLEW_NV_primitive_restart GLEW_GET_VAR(__GLEW_NV_primitive_restart)
+
+#endif /* GL_NV_primitive_restart */
+
+/* ------------------------ GL_NV_register_combiners ----------------------- */
+
+#ifndef GL_NV_register_combiners
+#define GL_NV_register_combiners 1
+
+#define GL_REGISTER_COMBINERS_NV 0x8522
+#define GL_VARIABLE_A_NV 0x8523
+#define GL_VARIABLE_B_NV 0x8524
+#define GL_VARIABLE_C_NV 0x8525
+#define GL_VARIABLE_D_NV 0x8526
+#define GL_VARIABLE_E_NV 0x8527
+#define GL_VARIABLE_F_NV 0x8528
+#define GL_VARIABLE_G_NV 0x8529
+#define GL_CONSTANT_COLOR0_NV 0x852A
+#define GL_CONSTANT_COLOR1_NV 0x852B
+#define GL_PRIMARY_COLOR_NV 0x852C
+#define GL_SECONDARY_COLOR_NV 0x852D
+#define GL_SPARE0_NV 0x852E
+#define GL_SPARE1_NV 0x852F
+#define GL_DISCARD_NV 0x8530
+#define GL_E_TIMES_F_NV 0x8531
+#define GL_SPARE0_PLUS_SECONDARY_COLOR_NV 0x8532
+#define GL_UNSIGNED_IDENTITY_NV 0x8536
+#define GL_UNSIGNED_INVERT_NV 0x8537
+#define GL_EXPAND_NORMAL_NV 0x8538
+#define GL_EXPAND_NEGATE_NV 0x8539
+#define GL_HALF_BIAS_NORMAL_NV 0x853A
+#define GL_HALF_BIAS_NEGATE_NV 0x853B
+#define GL_SIGNED_IDENTITY_NV 0x853C
+#define GL_SIGNED_NEGATE_NV 0x853D
+#define GL_SCALE_BY_TWO_NV 0x853E
+#define GL_SCALE_BY_FOUR_NV 0x853F
+#define GL_SCALE_BY_ONE_HALF_NV 0x8540
+#define GL_BIAS_BY_NEGATIVE_ONE_HALF_NV 0x8541
+#define GL_COMBINER_INPUT_NV 0x8542
+#define GL_COMBINER_MAPPING_NV 0x8543
+#define GL_COMBINER_COMPONENT_USAGE_NV 0x8544
+#define GL_COMBINER_AB_DOT_PRODUCT_NV 0x8545
+#define GL_COMBINER_CD_DOT_PRODUCT_NV 0x8546
+#define GL_COMBINER_MUX_SUM_NV 0x8547
+#define GL_COMBINER_SCALE_NV 0x8548
+#define GL_COMBINER_BIAS_NV 0x8549
+#define GL_COMBINER_AB_OUTPUT_NV 0x854A
+#define GL_COMBINER_CD_OUTPUT_NV 0x854B
+#define GL_COMBINER_SUM_OUTPUT_NV 0x854C
+#define GL_MAX_GENERAL_COMBINERS_NV 0x854D
+#define GL_NUM_GENERAL_COMBINERS_NV 0x854E
+#define GL_COLOR_SUM_CLAMP_NV 0x854F
+#define GL_COMBINER0_NV 0x8550
+#define GL_COMBINER1_NV 0x8551
+#define GL_COMBINER2_NV 0x8552
+#define GL_COMBINER3_NV 0x8553
+#define GL_COMBINER4_NV 0x8554
+#define GL_COMBINER5_NV 0x8555
+#define GL_COMBINER6_NV 0x8556
+#define GL_COMBINER7_NV 0x8557
+
+typedef void (GLAPIENTRY * PFNGLCOMBINERINPUTNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum input, GLenum mapping, GLenum componentUsage);
+typedef void (GLAPIENTRY * PFNGLCOMBINEROUTPUTNVPROC) (GLenum stage, GLenum portion, GLenum abOutput, GLenum cdOutput, GLenum sumOutput, GLenum scale, GLenum bias, GLboolean abDotProduct, GLboolean cdDotProduct, GLboolean muxSum);
+typedef void (GLAPIENTRY * PFNGLCOMBINERPARAMETERFNVPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLCOMBINERPARAMETERFVNVPROC) (GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLCOMBINERPARAMETERINVPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLCOMBINERPARAMETERIVNVPROC) (GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLFINALCOMBINERINPUTNVPROC) (GLenum variable, GLenum input, GLenum mapping, GLenum componentUsage);
+typedef void (GLAPIENTRY * PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC) (GLenum stage, GLenum portion, GLenum variable, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC) (GLenum stage, GLenum portion, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC) (GLenum stage, GLenum portion, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC) (GLenum variable, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC) (GLenum variable, GLenum pname, GLint* params);
+
+#define glCombinerInputNV GLEW_GET_FUN(__glewCombinerInputNV)
+#define glCombinerOutputNV GLEW_GET_FUN(__glewCombinerOutputNV)
+#define glCombinerParameterfNV GLEW_GET_FUN(__glewCombinerParameterfNV)
+#define glCombinerParameterfvNV GLEW_GET_FUN(__glewCombinerParameterfvNV)
+#define glCombinerParameteriNV GLEW_GET_FUN(__glewCombinerParameteriNV)
+#define glCombinerParameterivNV GLEW_GET_FUN(__glewCombinerParameterivNV)
+#define glFinalCombinerInputNV GLEW_GET_FUN(__glewFinalCombinerInputNV)
+#define glGetCombinerInputParameterfvNV GLEW_GET_FUN(__glewGetCombinerInputParameterfvNV)
+#define glGetCombinerInputParameterivNV GLEW_GET_FUN(__glewGetCombinerInputParameterivNV)
+#define glGetCombinerOutputParameterfvNV GLEW_GET_FUN(__glewGetCombinerOutputParameterfvNV)
+#define glGetCombinerOutputParameterivNV GLEW_GET_FUN(__glewGetCombinerOutputParameterivNV)
+#define glGetFinalCombinerInputParameterfvNV GLEW_GET_FUN(__glewGetFinalCombinerInputParameterfvNV)
+#define glGetFinalCombinerInputParameterivNV GLEW_GET_FUN(__glewGetFinalCombinerInputParameterivNV)
+
+#define GLEW_NV_register_combiners GLEW_GET_VAR(__GLEW_NV_register_combiners)
+
+#endif /* GL_NV_register_combiners */
+
+/* ----------------------- GL_NV_register_combiners2 ----------------------- */
+
+#ifndef GL_NV_register_combiners2
+#define GL_NV_register_combiners2 1
+
+#define GL_PER_STAGE_CONSTANTS_NV 0x8535
+
+typedef void (GLAPIENTRY * PFNGLCOMBINERSTAGEPARAMETERFVNVPROC) (GLenum stage, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETCOMBINERSTAGEPARAMETERFVNVPROC) (GLenum stage, GLenum pname, GLfloat* params);
+
+#define glCombinerStageParameterfvNV GLEW_GET_FUN(__glewCombinerStageParameterfvNV)
+#define glGetCombinerStageParameterfvNV GLEW_GET_FUN(__glewGetCombinerStageParameterfvNV)
+
+#define GLEW_NV_register_combiners2 GLEW_GET_VAR(__GLEW_NV_register_combiners2)
+
+#endif /* GL_NV_register_combiners2 */
+
+/* -------------------------- GL_NV_texgen_emboss -------------------------- */
+
+#ifndef GL_NV_texgen_emboss
+#define GL_NV_texgen_emboss 1
+
+#define GL_EMBOSS_LIGHT_NV 0x855D
+#define GL_EMBOSS_CONSTANT_NV 0x855E
+#define GL_EMBOSS_MAP_NV 0x855F
+
+#define GLEW_NV_texgen_emboss GLEW_GET_VAR(__GLEW_NV_texgen_emboss)
+
+#endif /* GL_NV_texgen_emboss */
+
+/* ------------------------ GL_NV_texgen_reflection ------------------------ */
+
+#ifndef GL_NV_texgen_reflection
+#define GL_NV_texgen_reflection 1
+
+#define GL_NORMAL_MAP_NV 0x8511
+#define GL_REFLECTION_MAP_NV 0x8512
+
+#define GLEW_NV_texgen_reflection GLEW_GET_VAR(__GLEW_NV_texgen_reflection)
+
+#endif /* GL_NV_texgen_reflection */
+
+/* --------------------- GL_NV_texture_compression_vtc --------------------- */
+
+#ifndef GL_NV_texture_compression_vtc
+#define GL_NV_texture_compression_vtc 1
+
+#define GLEW_NV_texture_compression_vtc GLEW_GET_VAR(__GLEW_NV_texture_compression_vtc)
+
+#endif /* GL_NV_texture_compression_vtc */
+
+/* ----------------------- GL_NV_texture_env_combine4 ---------------------- */
+
+#ifndef GL_NV_texture_env_combine4
+#define GL_NV_texture_env_combine4 1
+
+#define GL_COMBINE4_NV 0x8503
+#define GL_SOURCE3_RGB_NV 0x8583
+#define GL_SOURCE3_ALPHA_NV 0x858B
+#define GL_OPERAND3_RGB_NV 0x8593
+#define GL_OPERAND3_ALPHA_NV 0x859B
+
+#define GLEW_NV_texture_env_combine4 GLEW_GET_VAR(__GLEW_NV_texture_env_combine4)
+
+#endif /* GL_NV_texture_env_combine4 */
+
+/* ---------------------- GL_NV_texture_expand_normal ---------------------- */
+
+#ifndef GL_NV_texture_expand_normal
+#define GL_NV_texture_expand_normal 1
+
+#define GL_TEXTURE_UNSIGNED_REMAP_MODE_NV 0x888F
+
+#define GLEW_NV_texture_expand_normal GLEW_GET_VAR(__GLEW_NV_texture_expand_normal)
+
+#endif /* GL_NV_texture_expand_normal */
+
+/* ------------------------ GL_NV_texture_rectangle ------------------------ */
+
+#ifndef GL_NV_texture_rectangle
+#define GL_NV_texture_rectangle 1
+
+#define GL_TEXTURE_RECTANGLE_NV 0x84F5
+#define GL_TEXTURE_BINDING_RECTANGLE_NV 0x84F6
+#define GL_PROXY_TEXTURE_RECTANGLE_NV 0x84F7
+#define GL_MAX_RECTANGLE_TEXTURE_SIZE_NV 0x84F8
+
+#define GLEW_NV_texture_rectangle GLEW_GET_VAR(__GLEW_NV_texture_rectangle)
+
+#endif /* GL_NV_texture_rectangle */
+
+/* -------------------------- GL_NV_texture_shader ------------------------- */
+
+#ifndef GL_NV_texture_shader
+#define GL_NV_texture_shader 1
+
+#define GL_OFFSET_TEXTURE_RECTANGLE_NV 0x864C
+#define GL_OFFSET_TEXTURE_RECTANGLE_SCALE_NV 0x864D
+#define GL_DOT_PRODUCT_TEXTURE_RECTANGLE_NV 0x864E
+#define GL_RGBA_UNSIGNED_DOT_PRODUCT_MAPPING_NV 0x86D9
+#define GL_UNSIGNED_INT_S8_S8_8_8_NV 0x86DA
+#define GL_UNSIGNED_INT_8_8_S8_S8_REV_NV 0x86DB
+#define GL_DSDT_MAG_INTENSITY_NV 0x86DC
+#define GL_SHADER_CONSISTENT_NV 0x86DD
+#define GL_TEXTURE_SHADER_NV 0x86DE
+#define GL_SHADER_OPERATION_NV 0x86DF
+#define GL_CULL_MODES_NV 0x86E0
+#define GL_OFFSET_TEXTURE_2D_MATRIX_NV 0x86E1
+#define GL_OFFSET_TEXTURE_MATRIX_NV 0x86E1
+#define GL_OFFSET_TEXTURE_2D_SCALE_NV 0x86E2
+#define GL_OFFSET_TEXTURE_SCALE_NV 0x86E2
+#define GL_OFFSET_TEXTURE_BIAS_NV 0x86E3
+#define GL_OFFSET_TEXTURE_2D_BIAS_NV 0x86E3
+#define GL_PREVIOUS_TEXTURE_INPUT_NV 0x86E4
+#define GL_CONST_EYE_NV 0x86E5
+#define GL_PASS_THROUGH_NV 0x86E6
+#define GL_CULL_FRAGMENT_NV 0x86E7
+#define GL_OFFSET_TEXTURE_2D_NV 0x86E8
+#define GL_DEPENDENT_AR_TEXTURE_2D_NV 0x86E9
+#define GL_DEPENDENT_GB_TEXTURE_2D_NV 0x86EA
+#define GL_DOT_PRODUCT_NV 0x86EC
+#define GL_DOT_PRODUCT_DEPTH_REPLACE_NV 0x86ED
+#define GL_DOT_PRODUCT_TEXTURE_2D_NV 0x86EE
+#define GL_DOT_PRODUCT_TEXTURE_CUBE_MAP_NV 0x86F0
+#define GL_DOT_PRODUCT_DIFFUSE_CUBE_MAP_NV 0x86F1
+#define GL_DOT_PRODUCT_REFLECT_CUBE_MAP_NV 0x86F2
+#define GL_DOT_PRODUCT_CONST_EYE_REFLECT_CUBE_MAP_NV 0x86F3
+#define GL_HILO_NV 0x86F4
+#define GL_DSDT_NV 0x86F5
+#define GL_DSDT_MAG_NV 0x86F6
+#define GL_DSDT_MAG_VIB_NV 0x86F7
+#define GL_HILO16_NV 0x86F8
+#define GL_SIGNED_HILO_NV 0x86F9
+#define GL_SIGNED_HILO16_NV 0x86FA
+#define GL_SIGNED_RGBA_NV 0x86FB
+#define GL_SIGNED_RGBA8_NV 0x86FC
+#define GL_SIGNED_RGB_NV 0x86FE
+#define GL_SIGNED_RGB8_NV 0x86FF
+#define GL_SIGNED_LUMINANCE_NV 0x8701
+#define GL_SIGNED_LUMINANCE8_NV 0x8702
+#define GL_SIGNED_LUMINANCE_ALPHA_NV 0x8703
+#define GL_SIGNED_LUMINANCE8_ALPHA8_NV 0x8704
+#define GL_SIGNED_ALPHA_NV 0x8705
+#define GL_SIGNED_ALPHA8_NV 0x8706
+#define GL_SIGNED_INTENSITY_NV 0x8707
+#define GL_SIGNED_INTENSITY8_NV 0x8708
+#define GL_DSDT8_NV 0x8709
+#define GL_DSDT8_MAG8_NV 0x870A
+#define GL_DSDT8_MAG8_INTENSITY8_NV 0x870B
+#define GL_SIGNED_RGB_UNSIGNED_ALPHA_NV 0x870C
+#define GL_SIGNED_RGB8_UNSIGNED_ALPHA8_NV 0x870D
+#define GL_HI_SCALE_NV 0x870E
+#define GL_LO_SCALE_NV 0x870F
+#define GL_DS_SCALE_NV 0x8710
+#define GL_DT_SCALE_NV 0x8711
+#define GL_MAGNITUDE_SCALE_NV 0x8712
+#define GL_VIBRANCE_SCALE_NV 0x8713
+#define GL_HI_BIAS_NV 0x8714
+#define GL_LO_BIAS_NV 0x8715
+#define GL_DS_BIAS_NV 0x8716
+#define GL_DT_BIAS_NV 0x8717
+#define GL_MAGNITUDE_BIAS_NV 0x8718
+#define GL_VIBRANCE_BIAS_NV 0x8719
+#define GL_TEXTURE_BORDER_VALUES_NV 0x871A
+#define GL_TEXTURE_HI_SIZE_NV 0x871B
+#define GL_TEXTURE_LO_SIZE_NV 0x871C
+#define GL_TEXTURE_DS_SIZE_NV 0x871D
+#define GL_TEXTURE_DT_SIZE_NV 0x871E
+#define GL_TEXTURE_MAG_SIZE_NV 0x871F
+
+#define GLEW_NV_texture_shader GLEW_GET_VAR(__GLEW_NV_texture_shader)
+
+#endif /* GL_NV_texture_shader */
+
+/* ------------------------- GL_NV_texture_shader2 ------------------------- */
+
+#ifndef GL_NV_texture_shader2
+#define GL_NV_texture_shader2 1
+
+#define GL_UNSIGNED_INT_S8_S8_8_8_NV 0x86DA
+#define GL_UNSIGNED_INT_8_8_S8_S8_REV_NV 0x86DB
+#define GL_DSDT_MAG_INTENSITY_NV 0x86DC
+#define GL_DOT_PRODUCT_TEXTURE_3D_NV 0x86EF
+#define GL_HILO_NV 0x86F4
+#define GL_DSDT_NV 0x86F5
+#define GL_DSDT_MAG_NV 0x86F6
+#define GL_DSDT_MAG_VIB_NV 0x86F7
+#define GL_HILO16_NV 0x86F8
+#define GL_SIGNED_HILO_NV 0x86F9
+#define GL_SIGNED_HILO16_NV 0x86FA
+#define GL_SIGNED_RGBA_NV 0x86FB
+#define GL_SIGNED_RGBA8_NV 0x86FC
+#define GL_SIGNED_RGB_NV 0x86FE
+#define GL_SIGNED_RGB8_NV 0x86FF
+#define GL_SIGNED_LUMINANCE_NV 0x8701
+#define GL_SIGNED_LUMINANCE8_NV 0x8702
+#define GL_SIGNED_LUMINANCE_ALPHA_NV 0x8703
+#define GL_SIGNED_LUMINANCE8_ALPHA8_NV 0x8704
+#define GL_SIGNED_ALPHA_NV 0x8705
+#define GL_SIGNED_ALPHA8_NV 0x8706
+#define GL_SIGNED_INTENSITY_NV 0x8707
+#define GL_SIGNED_INTENSITY8_NV 0x8708
+#define GL_DSDT8_NV 0x8709
+#define GL_DSDT8_MAG8_NV 0x870A
+#define GL_DSDT8_MAG8_INTENSITY8_NV 0x870B
+#define GL_SIGNED_RGB_UNSIGNED_ALPHA_NV 0x870C
+#define GL_SIGNED_RGB8_UNSIGNED_ALPHA8_NV 0x870D
+
+#define GLEW_NV_texture_shader2 GLEW_GET_VAR(__GLEW_NV_texture_shader2)
+
+#endif /* GL_NV_texture_shader2 */
+
+/* ------------------------- GL_NV_texture_shader3 ------------------------- */
+
+#ifndef GL_NV_texture_shader3
+#define GL_NV_texture_shader3 1
+
+#define GL_OFFSET_PROJECTIVE_TEXTURE_2D_NV 0x8850
+#define GL_OFFSET_PROJECTIVE_TEXTURE_2D_SCALE_NV 0x8851
+#define GL_OFFSET_PROJECTIVE_TEXTURE_RECTANGLE_NV 0x8852
+#define GL_OFFSET_PROJECTIVE_TEXTURE_RECTANGLE_SCALE_NV 0x8853
+#define GL_OFFSET_HILO_TEXTURE_2D_NV 0x8854
+#define GL_OFFSET_HILO_TEXTURE_RECTANGLE_NV 0x8855
+#define GL_OFFSET_HILO_PROJECTIVE_TEXTURE_2D_NV 0x8856
+#define GL_OFFSET_HILO_PROJECTIVE_TEXTURE_RECTANGLE_NV 0x8857
+#define GL_DEPENDENT_HILO_TEXTURE_2D_NV 0x8858
+#define GL_DEPENDENT_RGB_TEXTURE_3D_NV 0x8859
+#define GL_DEPENDENT_RGB_TEXTURE_CUBE_MAP_NV 0x885A
+#define GL_DOT_PRODUCT_PASS_THROUGH_NV 0x885B
+#define GL_DOT_PRODUCT_TEXTURE_1D_NV 0x885C
+#define GL_DOT_PRODUCT_AFFINE_DEPTH_REPLACE_NV 0x885D
+#define GL_HILO8_NV 0x885E
+#define GL_SIGNED_HILO8_NV 0x885F
+#define GL_FORCE_BLUE_TO_ONE_NV 0x8860
+
+#define GLEW_NV_texture_shader3 GLEW_GET_VAR(__GLEW_NV_texture_shader3)
+
+#endif /* GL_NV_texture_shader3 */
+
+/* ------------------------ GL_NV_transform_feedback ----------------------- */
+
+#ifndef GL_NV_transform_feedback
+#define GL_NV_transform_feedback 1
+
+#define GL_BACK_PRIMARY_COLOR_NV 0x8C77
+#define GL_BACK_SECONDARY_COLOR_NV 0x8C78
+#define GL_TEXTURE_COORD_NV 0x8C79
+#define GL_CLIP_DISTANCE_NV 0x8C7A
+#define GL_VERTEX_ID_NV 0x8C7B
+#define GL_PRIMITIVE_ID_NV 0x8C7C
+#define GL_GENERIC_ATTRIB_NV 0x8C7D
+#define GL_TRANSFORM_FEEDBACK_ATTRIBS_NV 0x8C7E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE_NV 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_NV 0x8C80
+#define GL_ACTIVE_VARYINGS_NV 0x8C81
+#define GL_ACTIVE_VARYING_MAX_LENGTH_NV 0x8C82
+#define GL_TRANSFORM_FEEDBACK_VARYINGS_NV 0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START_NV 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE_NV 0x8C85
+#define GL_TRANSFORM_FEEDBACK_RECORD_NV 0x8C86
+#define GL_PRIMITIVES_GENERATED_NV 0x8C87
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN_NV 0x8C88
+#define GL_RASTERIZER_DISCARD_NV 0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_NV 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS_NV 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS_NV 0x8C8C
+#define GL_SEPARATE_ATTRIBS_NV 0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER_NV 0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING_NV 0x8C8F
+
+typedef void (GLAPIENTRY * PFNGLACTIVEVARYINGNVPROC) (GLuint program, const GLchar *name);
+typedef void (GLAPIENTRY * PFNGLBEGINTRANSFORMFEEDBACKNVPROC) (GLenum primitiveMode);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERBASENVPROC) (GLenum target, GLuint index, GLuint buffer);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFEROFFSETNVPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset);
+typedef void (GLAPIENTRY * PFNGLBINDBUFFERRANGENVPROC) (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+typedef void (GLAPIENTRY * PFNGLENDTRANSFORMFEEDBACKNVPROC) (void);
+typedef void (GLAPIENTRY * PFNGLGETACTIVEVARYINGNVPROC) (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name);
+typedef void (GLAPIENTRY * PFNGLGETTRANSFORMFEEDBACKVARYINGNVPROC) (GLuint program, GLuint index, GLint *location);
+typedef GLint (GLAPIENTRY * PFNGLGETVARYINGLOCATIONNVPROC) (GLuint program, const GLchar *name);
+typedef void (GLAPIENTRY * PFNGLTRANSFORMFEEDBACKATTRIBSNVPROC) (GLuint count, const GLint *attribs, GLenum bufferMode);
+typedef void (GLAPIENTRY * PFNGLTRANSFORMFEEDBACKVARYINGSNVPROC) (GLuint program, GLsizei count, const GLint *locations, GLenum bufferMode);
+
+#define glActiveVaryingNV GLEW_GET_FUN(__glewActiveVaryingNV)
+#define glBeginTransformFeedbackNV GLEW_GET_FUN(__glewBeginTransformFeedbackNV)
+#define glBindBufferBaseNV GLEW_GET_FUN(__glewBindBufferBaseNV)
+#define glBindBufferOffsetNV GLEW_GET_FUN(__glewBindBufferOffsetNV)
+#define glBindBufferRangeNV GLEW_GET_FUN(__glewBindBufferRangeNV)
+#define glEndTransformFeedbackNV GLEW_GET_FUN(__glewEndTransformFeedbackNV)
+#define glGetActiveVaryingNV GLEW_GET_FUN(__glewGetActiveVaryingNV)
+#define glGetTransformFeedbackVaryingNV GLEW_GET_FUN(__glewGetTransformFeedbackVaryingNV)
+#define glGetVaryingLocationNV GLEW_GET_FUN(__glewGetVaryingLocationNV)
+#define glTransformFeedbackAttribsNV GLEW_GET_FUN(__glewTransformFeedbackAttribsNV)
+#define glTransformFeedbackVaryingsNV GLEW_GET_FUN(__glewTransformFeedbackVaryingsNV)
+
+#define GLEW_NV_transform_feedback GLEW_GET_VAR(__GLEW_NV_transform_feedback)
+
+#endif /* GL_NV_transform_feedback */
+
+/* ------------------------ GL_NV_vertex_array_range ----------------------- */
+
+#ifndef GL_NV_vertex_array_range
+#define GL_NV_vertex_array_range 1
+
+#define GL_VERTEX_ARRAY_RANGE_NV 0x851D
+#define GL_VERTEX_ARRAY_RANGE_LENGTH_NV 0x851E
+#define GL_VERTEX_ARRAY_RANGE_VALID_NV 0x851F
+#define GL_MAX_VERTEX_ARRAY_RANGE_ELEMENT_NV 0x8520
+#define GL_VERTEX_ARRAY_RANGE_POINTER_NV 0x8521
+
+typedef void (GLAPIENTRY * PFNGLFLUSHVERTEXARRAYRANGENVPROC) (void);
+typedef void (GLAPIENTRY * PFNGLVERTEXARRAYRANGENVPROC) (GLsizei length, void* pointer);
+
+#define glFlushVertexArrayRangeNV GLEW_GET_FUN(__glewFlushVertexArrayRangeNV)
+#define glVertexArrayRangeNV GLEW_GET_FUN(__glewVertexArrayRangeNV)
+
+#define GLEW_NV_vertex_array_range GLEW_GET_VAR(__GLEW_NV_vertex_array_range)
+
+#endif /* GL_NV_vertex_array_range */
+
+/* ----------------------- GL_NV_vertex_array_range2 ----------------------- */
+
+#ifndef GL_NV_vertex_array_range2
+#define GL_NV_vertex_array_range2 1
+
+#define GL_VERTEX_ARRAY_RANGE_WITHOUT_FLUSH_NV 0x8533
+
+#define GLEW_NV_vertex_array_range2 GLEW_GET_VAR(__GLEW_NV_vertex_array_range2)
+
+#endif /* GL_NV_vertex_array_range2 */
+
+/* -------------------------- GL_NV_vertex_program ------------------------- */
+
+#ifndef GL_NV_vertex_program
+#define GL_NV_vertex_program 1
+
+#define GL_VERTEX_PROGRAM_NV 0x8620
+#define GL_VERTEX_STATE_PROGRAM_NV 0x8621
+#define GL_ATTRIB_ARRAY_SIZE_NV 0x8623
+#define GL_ATTRIB_ARRAY_STRIDE_NV 0x8624
+#define GL_ATTRIB_ARRAY_TYPE_NV 0x8625
+#define GL_CURRENT_ATTRIB_NV 0x8626
+#define GL_PROGRAM_LENGTH_NV 0x8627
+#define GL_PROGRAM_STRING_NV 0x8628
+#define GL_MODELVIEW_PROJECTION_NV 0x8629
+#define GL_IDENTITY_NV 0x862A
+#define GL_INVERSE_NV 0x862B
+#define GL_TRANSPOSE_NV 0x862C
+#define GL_INVERSE_TRANSPOSE_NV 0x862D
+#define GL_MAX_TRACK_MATRIX_STACK_DEPTH_NV 0x862E
+#define GL_MAX_TRACK_MATRICES_NV 0x862F
+#define GL_MATRIX0_NV 0x8630
+#define GL_MATRIX1_NV 0x8631
+#define GL_MATRIX2_NV 0x8632
+#define GL_MATRIX3_NV 0x8633
+#define GL_MATRIX4_NV 0x8634
+#define GL_MATRIX5_NV 0x8635
+#define GL_MATRIX6_NV 0x8636
+#define GL_MATRIX7_NV 0x8637
+#define GL_CURRENT_MATRIX_STACK_DEPTH_NV 0x8640
+#define GL_CURRENT_MATRIX_NV 0x8641
+#define GL_VERTEX_PROGRAM_POINT_SIZE_NV 0x8642
+#define GL_VERTEX_PROGRAM_TWO_SIDE_NV 0x8643
+#define GL_PROGRAM_PARAMETER_NV 0x8644
+#define GL_ATTRIB_ARRAY_POINTER_NV 0x8645
+#define GL_PROGRAM_TARGET_NV 0x8646
+#define GL_PROGRAM_RESIDENT_NV 0x8647
+#define GL_TRACK_MATRIX_NV 0x8648
+#define GL_TRACK_MATRIX_TRANSFORM_NV 0x8649
+#define GL_VERTEX_PROGRAM_BINDING_NV 0x864A
+#define GL_PROGRAM_ERROR_POSITION_NV 0x864B
+#define GL_VERTEX_ATTRIB_ARRAY0_NV 0x8650
+#define GL_VERTEX_ATTRIB_ARRAY1_NV 0x8651
+#define GL_VERTEX_ATTRIB_ARRAY2_NV 0x8652
+#define GL_VERTEX_ATTRIB_ARRAY3_NV 0x8653
+#define GL_VERTEX_ATTRIB_ARRAY4_NV 0x8654
+#define GL_VERTEX_ATTRIB_ARRAY5_NV 0x8655
+#define GL_VERTEX_ATTRIB_ARRAY6_NV 0x8656
+#define GL_VERTEX_ATTRIB_ARRAY7_NV 0x8657
+#define GL_VERTEX_ATTRIB_ARRAY8_NV 0x8658
+#define GL_VERTEX_ATTRIB_ARRAY9_NV 0x8659
+#define GL_VERTEX_ATTRIB_ARRAY10_NV 0x865A
+#define GL_VERTEX_ATTRIB_ARRAY11_NV 0x865B
+#define GL_VERTEX_ATTRIB_ARRAY12_NV 0x865C
+#define GL_VERTEX_ATTRIB_ARRAY13_NV 0x865D
+#define GL_VERTEX_ATTRIB_ARRAY14_NV 0x865E
+#define GL_VERTEX_ATTRIB_ARRAY15_NV 0x865F
+#define GL_MAP1_VERTEX_ATTRIB0_4_NV 0x8660
+#define GL_MAP1_VERTEX_ATTRIB1_4_NV 0x8661
+#define GL_MAP1_VERTEX_ATTRIB2_4_NV 0x8662
+#define GL_MAP1_VERTEX_ATTRIB3_4_NV 0x8663
+#define GL_MAP1_VERTEX_ATTRIB4_4_NV 0x8664
+#define GL_MAP1_VERTEX_ATTRIB5_4_NV 0x8665
+#define GL_MAP1_VERTEX_ATTRIB6_4_NV 0x8666
+#define GL_MAP1_VERTEX_ATTRIB7_4_NV 0x8667
+#define GL_MAP1_VERTEX_ATTRIB8_4_NV 0x8668
+#define GL_MAP1_VERTEX_ATTRIB9_4_NV 0x8669
+#define GL_MAP1_VERTEX_ATTRIB10_4_NV 0x866A
+#define GL_MAP1_VERTEX_ATTRIB11_4_NV 0x866B
+#define GL_MAP1_VERTEX_ATTRIB12_4_NV 0x866C
+#define GL_MAP1_VERTEX_ATTRIB13_4_NV 0x866D
+#define GL_MAP1_VERTEX_ATTRIB14_4_NV 0x866E
+#define GL_MAP1_VERTEX_ATTRIB15_4_NV 0x866F
+#define GL_MAP2_VERTEX_ATTRIB0_4_NV 0x8670
+#define GL_MAP2_VERTEX_ATTRIB1_4_NV 0x8671
+#define GL_MAP2_VERTEX_ATTRIB2_4_NV 0x8672
+#define GL_MAP2_VERTEX_ATTRIB3_4_NV 0x8673
+#define GL_MAP2_VERTEX_ATTRIB4_4_NV 0x8674
+#define GL_MAP2_VERTEX_ATTRIB5_4_NV 0x8675
+#define GL_MAP2_VERTEX_ATTRIB6_4_NV 0x8676
+#define GL_MAP2_VERTEX_ATTRIB7_4_NV 0x8677
+#define GL_MAP2_VERTEX_ATTRIB8_4_NV 0x8678
+#define GL_MAP2_VERTEX_ATTRIB9_4_NV 0x8679
+#define GL_MAP2_VERTEX_ATTRIB10_4_NV 0x867A
+#define GL_MAP2_VERTEX_ATTRIB11_4_NV 0x867B
+#define GL_MAP2_VERTEX_ATTRIB12_4_NV 0x867C
+#define GL_MAP2_VERTEX_ATTRIB13_4_NV 0x867D
+#define GL_MAP2_VERTEX_ATTRIB14_4_NV 0x867E
+#define GL_MAP2_VERTEX_ATTRIB15_4_NV 0x867F
+
+typedef GLboolean (GLAPIENTRY * PFNGLAREPROGRAMSRESIDENTNVPROC) (GLsizei n, const GLuint* ids, GLboolean *residences);
+typedef void (GLAPIENTRY * PFNGLBINDPROGRAMNVPROC) (GLenum target, GLuint id);
+typedef void (GLAPIENTRY * PFNGLDELETEPROGRAMSNVPROC) (GLsizei n, const GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLEXECUTEPROGRAMNVPROC) (GLenum target, GLuint id, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGENPROGRAMSNVPROC) (GLsizei n, GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMPARAMETERDVNVPROC) (GLenum target, GLuint index, GLenum pname, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMPARAMETERFVNVPROC) (GLenum target, GLuint index, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMSTRINGNVPROC) (GLuint id, GLenum pname, GLubyte* program);
+typedef void (GLAPIENTRY * PFNGLGETPROGRAMIVNVPROC) (GLuint id, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETTRACKMATRIXIVNVPROC) (GLenum target, GLuint address, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBPOINTERVNVPROC) (GLuint index, GLenum pname, GLvoid** pointer);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBDVNVPROC) (GLuint index, GLenum pname, GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBFVNVPROC) (GLuint index, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETVERTEXATTRIBIVNVPROC) (GLuint index, GLenum pname, GLint* params);
+typedef GLboolean (GLAPIENTRY * PFNGLISPROGRAMNVPROC) (GLuint id);
+typedef void (GLAPIENTRY * PFNGLLOADPROGRAMNVPROC) (GLenum target, GLuint id, GLsizei len, const GLubyte* program);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETER4DNVPROC) (GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETER4DVNVPROC) (GLenum target, GLuint index, const GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETER4FNVPROC) (GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETER4FVNVPROC) (GLenum target, GLuint index, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETERS4DVNVPROC) (GLenum target, GLuint index, GLuint num, const GLdouble* params);
+typedef void (GLAPIENTRY * PFNGLPROGRAMPARAMETERS4FVNVPROC) (GLenum target, GLuint index, GLuint num, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLREQUESTRESIDENTPROGRAMSNVPROC) (GLsizei n, GLuint* ids);
+typedef void (GLAPIENTRY * PFNGLTRACKMATRIXNVPROC) (GLenum target, GLuint address, GLenum matrix, GLenum transform);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1DNVPROC) (GLuint index, GLdouble x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1DVNVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1FNVPROC) (GLuint index, GLfloat x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1FVNVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1SNVPROC) (GLuint index, GLshort x);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB1SVNVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2DNVPROC) (GLuint index, GLdouble x, GLdouble y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2DVNVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2FNVPROC) (GLuint index, GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2FVNVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2SNVPROC) (GLuint index, GLshort x, GLshort y);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB2SVNVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3DNVPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3DVNVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3FNVPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3FVNVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3SNVPROC) (GLuint index, GLshort x, GLshort y, GLshort z);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB3SVNVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4DNVPROC) (GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4DVNVPROC) (GLuint index, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4FNVPROC) (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4FVNVPROC) (GLuint index, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4SNVPROC) (GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4SVNVPROC) (GLuint index, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4UBNVPROC) (GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIB4UBVNVPROC) (GLuint index, const GLubyte* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBPOINTERNVPROC) (GLuint index, GLint size, GLenum type, GLsizei stride, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS1DVNVPROC) (GLuint index, GLsizei n, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS1FVNVPROC) (GLuint index, GLsizei n, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS1SVNVPROC) (GLuint index, GLsizei n, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS2DVNVPROC) (GLuint index, GLsizei n, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS2FVNVPROC) (GLuint index, GLsizei n, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS2SVNVPROC) (GLuint index, GLsizei n, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS3DVNVPROC) (GLuint index, GLsizei n, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS3FVNVPROC) (GLuint index, GLsizei n, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS3SVNVPROC) (GLuint index, GLsizei n, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS4DVNVPROC) (GLuint index, GLsizei n, const GLdouble* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS4FVNVPROC) (GLuint index, GLsizei n, const GLfloat* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS4SVNVPROC) (GLuint index, GLsizei n, const GLshort* v);
+typedef void (GLAPIENTRY * PFNGLVERTEXATTRIBS4UBVNVPROC) (GLuint index, GLsizei n, const GLubyte* v);
+
+#define glAreProgramsResidentNV GLEW_GET_FUN(__glewAreProgramsResidentNV)
+#define glBindProgramNV GLEW_GET_FUN(__glewBindProgramNV)
+#define glDeleteProgramsNV GLEW_GET_FUN(__glewDeleteProgramsNV)
+#define glExecuteProgramNV GLEW_GET_FUN(__glewExecuteProgramNV)
+#define glGenProgramsNV GLEW_GET_FUN(__glewGenProgramsNV)
+#define glGetProgramParameterdvNV GLEW_GET_FUN(__glewGetProgramParameterdvNV)
+#define glGetProgramParameterfvNV GLEW_GET_FUN(__glewGetProgramParameterfvNV)
+#define glGetProgramStringNV GLEW_GET_FUN(__glewGetProgramStringNV)
+#define glGetProgramivNV GLEW_GET_FUN(__glewGetProgramivNV)
+#define glGetTrackMatrixivNV GLEW_GET_FUN(__glewGetTrackMatrixivNV)
+#define glGetVertexAttribPointervNV GLEW_GET_FUN(__glewGetVertexAttribPointervNV)
+#define glGetVertexAttribdvNV GLEW_GET_FUN(__glewGetVertexAttribdvNV)
+#define glGetVertexAttribfvNV GLEW_GET_FUN(__glewGetVertexAttribfvNV)
+#define glGetVertexAttribivNV GLEW_GET_FUN(__glewGetVertexAttribivNV)
+#define glIsProgramNV GLEW_GET_FUN(__glewIsProgramNV)
+#define glLoadProgramNV GLEW_GET_FUN(__glewLoadProgramNV)
+#define glProgramParameter4dNV GLEW_GET_FUN(__glewProgramParameter4dNV)
+#define glProgramParameter4dvNV GLEW_GET_FUN(__glewProgramParameter4dvNV)
+#define glProgramParameter4fNV GLEW_GET_FUN(__glewProgramParameter4fNV)
+#define glProgramParameter4fvNV GLEW_GET_FUN(__glewProgramParameter4fvNV)
+#define glProgramParameters4dvNV GLEW_GET_FUN(__glewProgramParameters4dvNV)
+#define glProgramParameters4fvNV GLEW_GET_FUN(__glewProgramParameters4fvNV)
+#define glRequestResidentProgramsNV GLEW_GET_FUN(__glewRequestResidentProgramsNV)
+#define glTrackMatrixNV GLEW_GET_FUN(__glewTrackMatrixNV)
+#define glVertexAttrib1dNV GLEW_GET_FUN(__glewVertexAttrib1dNV)
+#define glVertexAttrib1dvNV GLEW_GET_FUN(__glewVertexAttrib1dvNV)
+#define glVertexAttrib1fNV GLEW_GET_FUN(__glewVertexAttrib1fNV)
+#define glVertexAttrib1fvNV GLEW_GET_FUN(__glewVertexAttrib1fvNV)
+#define glVertexAttrib1sNV GLEW_GET_FUN(__glewVertexAttrib1sNV)
+#define glVertexAttrib1svNV GLEW_GET_FUN(__glewVertexAttrib1svNV)
+#define glVertexAttrib2dNV GLEW_GET_FUN(__glewVertexAttrib2dNV)
+#define glVertexAttrib2dvNV GLEW_GET_FUN(__glewVertexAttrib2dvNV)
+#define glVertexAttrib2fNV GLEW_GET_FUN(__glewVertexAttrib2fNV)
+#define glVertexAttrib2fvNV GLEW_GET_FUN(__glewVertexAttrib2fvNV)
+#define glVertexAttrib2sNV GLEW_GET_FUN(__glewVertexAttrib2sNV)
+#define glVertexAttrib2svNV GLEW_GET_FUN(__glewVertexAttrib2svNV)
+#define glVertexAttrib3dNV GLEW_GET_FUN(__glewVertexAttrib3dNV)
+#define glVertexAttrib3dvNV GLEW_GET_FUN(__glewVertexAttrib3dvNV)
+#define glVertexAttrib3fNV GLEW_GET_FUN(__glewVertexAttrib3fNV)
+#define glVertexAttrib3fvNV GLEW_GET_FUN(__glewVertexAttrib3fvNV)
+#define glVertexAttrib3sNV GLEW_GET_FUN(__glewVertexAttrib3sNV)
+#define glVertexAttrib3svNV GLEW_GET_FUN(__glewVertexAttrib3svNV)
+#define glVertexAttrib4dNV GLEW_GET_FUN(__glewVertexAttrib4dNV)
+#define glVertexAttrib4dvNV GLEW_GET_FUN(__glewVertexAttrib4dvNV)
+#define glVertexAttrib4fNV GLEW_GET_FUN(__glewVertexAttrib4fNV)
+#define glVertexAttrib4fvNV GLEW_GET_FUN(__glewVertexAttrib4fvNV)
+#define glVertexAttrib4sNV GLEW_GET_FUN(__glewVertexAttrib4sNV)
+#define glVertexAttrib4svNV GLEW_GET_FUN(__glewVertexAttrib4svNV)
+#define glVertexAttrib4ubNV GLEW_GET_FUN(__glewVertexAttrib4ubNV)
+#define glVertexAttrib4ubvNV GLEW_GET_FUN(__glewVertexAttrib4ubvNV)
+#define glVertexAttribPointerNV GLEW_GET_FUN(__glewVertexAttribPointerNV)
+#define glVertexAttribs1dvNV GLEW_GET_FUN(__glewVertexAttribs1dvNV)
+#define glVertexAttribs1fvNV GLEW_GET_FUN(__glewVertexAttribs1fvNV)
+#define glVertexAttribs1svNV GLEW_GET_FUN(__glewVertexAttribs1svNV)
+#define glVertexAttribs2dvNV GLEW_GET_FUN(__glewVertexAttribs2dvNV)
+#define glVertexAttribs2fvNV GLEW_GET_FUN(__glewVertexAttribs2fvNV)
+#define glVertexAttribs2svNV GLEW_GET_FUN(__glewVertexAttribs2svNV)
+#define glVertexAttribs3dvNV GLEW_GET_FUN(__glewVertexAttribs3dvNV)
+#define glVertexAttribs3fvNV GLEW_GET_FUN(__glewVertexAttribs3fvNV)
+#define glVertexAttribs3svNV GLEW_GET_FUN(__glewVertexAttribs3svNV)
+#define glVertexAttribs4dvNV GLEW_GET_FUN(__glewVertexAttribs4dvNV)
+#define glVertexAttribs4fvNV GLEW_GET_FUN(__glewVertexAttribs4fvNV)
+#define glVertexAttribs4svNV GLEW_GET_FUN(__glewVertexAttribs4svNV)
+#define glVertexAttribs4ubvNV GLEW_GET_FUN(__glewVertexAttribs4ubvNV)
+
+#define GLEW_NV_vertex_program GLEW_GET_VAR(__GLEW_NV_vertex_program)
+
+#endif /* GL_NV_vertex_program */
+
+/* ------------------------ GL_NV_vertex_program1_1 ------------------------ */
+
+#ifndef GL_NV_vertex_program1_1
+#define GL_NV_vertex_program1_1 1
+
+#define GLEW_NV_vertex_program1_1 GLEW_GET_VAR(__GLEW_NV_vertex_program1_1)
+
+#endif /* GL_NV_vertex_program1_1 */
+
+/* ------------------------- GL_NV_vertex_program2 ------------------------- */
+
+#ifndef GL_NV_vertex_program2
+#define GL_NV_vertex_program2 1
+
+#define GLEW_NV_vertex_program2 GLEW_GET_VAR(__GLEW_NV_vertex_program2)
+
+#endif /* GL_NV_vertex_program2 */
+
+/* ---------------------- GL_NV_vertex_program2_option --------------------- */
+
+#ifndef GL_NV_vertex_program2_option
+#define GL_NV_vertex_program2_option 1
+
+#define GL_MAX_PROGRAM_EXEC_INSTRUCTIONS_NV 0x88F4
+#define GL_MAX_PROGRAM_CALL_DEPTH_NV 0x88F5
+
+#define GLEW_NV_vertex_program2_option GLEW_GET_VAR(__GLEW_NV_vertex_program2_option)
+
+#endif /* GL_NV_vertex_program2_option */
+
+/* ------------------------- GL_NV_vertex_program3 ------------------------- */
+
+#ifndef GL_NV_vertex_program3
+#define GL_NV_vertex_program3 1
+
+#define MAX_VERTEX_TEXTURE_IMAGE_UNITS_ARB 0x8B4C
+
+#define GLEW_NV_vertex_program3 GLEW_GET_VAR(__GLEW_NV_vertex_program3)
+
+#endif /* GL_NV_vertex_program3 */
+
+/* ------------------------- GL_NV_vertex_program4 ------------------------- */
+
+#ifndef GL_NV_vertex_program4
+#define GL_NV_vertex_program4 1
+
+#define GLEW_NV_vertex_program4 GLEW_GET_VAR(__GLEW_NV_vertex_program4)
+
+#endif /* GL_NV_vertex_program4 */
+
+/* ------------------------ GL_OES_byte_coordinates ------------------------ */
+
+#ifndef GL_OES_byte_coordinates
+#define GL_OES_byte_coordinates 1
+
+#define GL_BYTE 0x1400
+
+#define GLEW_OES_byte_coordinates GLEW_GET_VAR(__GLEW_OES_byte_coordinates)
+
+#endif /* GL_OES_byte_coordinates */
+
+/* ------------------- GL_OES_compressed_paletted_texture ------------------ */
+
+#ifndef GL_OES_compressed_paletted_texture
+#define GL_OES_compressed_paletted_texture 1
+
+#define GL_PALETTE4_RGB8_OES 0x8B90
+#define GL_PALETTE4_RGBA8_OES 0x8B91
+#define GL_PALETTE4_R5_G6_B5_OES 0x8B92
+#define GL_PALETTE4_RGBA4_OES 0x8B93
+#define GL_PALETTE4_RGB5_A1_OES 0x8B94
+#define GL_PALETTE8_RGB8_OES 0x8B95
+#define GL_PALETTE8_RGBA8_OES 0x8B96
+#define GL_PALETTE8_R5_G6_B5_OES 0x8B97
+#define GL_PALETTE8_RGBA4_OES 0x8B98
+#define GL_PALETTE8_RGB5_A1_OES 0x8B99
+
+#define GLEW_OES_compressed_paletted_texture GLEW_GET_VAR(__GLEW_OES_compressed_paletted_texture)
+
+#endif /* GL_OES_compressed_paletted_texture */
+
+/* --------------------------- GL_OES_read_format -------------------------- */
+
+#ifndef GL_OES_read_format
+#define GL_OES_read_format 1
+
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE_OES 0x8B9A
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT_OES 0x8B9B
+
+#define GLEW_OES_read_format GLEW_GET_VAR(__GLEW_OES_read_format)
+
+#endif /* GL_OES_read_format */
+
+/* ------------------------ GL_OES_single_precision ------------------------ */
+
+#ifndef GL_OES_single_precision
+#define GL_OES_single_precision 1
+
+typedef void (GLAPIENTRY * PFNGLCLEARDEPTHFOESPROC) (GLclampd depth);
+typedef void (GLAPIENTRY * PFNGLCLIPPLANEFOESPROC) (GLenum plane, const GLfloat* equation);
+typedef void (GLAPIENTRY * PFNGLDEPTHRANGEFOESPROC) (GLclampf n, GLclampf f);
+typedef void (GLAPIENTRY * PFNGLFRUSTUMFOESPROC) (GLfloat l, GLfloat r, GLfloat b, GLfloat t, GLfloat n, GLfloat f);
+typedef void (GLAPIENTRY * PFNGLGETCLIPPLANEFOESPROC) (GLenum plane, GLfloat* equation);
+typedef void (GLAPIENTRY * PFNGLORTHOFOESPROC) (GLfloat l, GLfloat r, GLfloat b, GLfloat t, GLfloat n, GLfloat f);
+
+#define glClearDepthfOES GLEW_GET_FUN(__glewClearDepthfOES)
+#define glClipPlanefOES GLEW_GET_FUN(__glewClipPlanefOES)
+#define glDepthRangefOES GLEW_GET_FUN(__glewDepthRangefOES)
+#define glFrustumfOES GLEW_GET_FUN(__glewFrustumfOES)
+#define glGetClipPlanefOES GLEW_GET_FUN(__glewGetClipPlanefOES)
+#define glOrthofOES GLEW_GET_FUN(__glewOrthofOES)
+
+#define GLEW_OES_single_precision GLEW_GET_VAR(__GLEW_OES_single_precision)
+
+#endif /* GL_OES_single_precision */
+
+/* ---------------------------- GL_OML_interlace --------------------------- */
+
+#ifndef GL_OML_interlace
+#define GL_OML_interlace 1
+
+#define GL_INTERLACE_OML 0x8980
+#define GL_INTERLACE_READ_OML 0x8981
+
+#define GLEW_OML_interlace GLEW_GET_VAR(__GLEW_OML_interlace)
+
+#endif /* GL_OML_interlace */
+
+/* ---------------------------- GL_OML_resample ---------------------------- */
+
+#ifndef GL_OML_resample
+#define GL_OML_resample 1
+
+#define GL_PACK_RESAMPLE_OML 0x8984
+#define GL_UNPACK_RESAMPLE_OML 0x8985
+#define GL_RESAMPLE_REPLICATE_OML 0x8986
+#define GL_RESAMPLE_ZERO_FILL_OML 0x8987
+#define GL_RESAMPLE_AVERAGE_OML 0x8988
+#define GL_RESAMPLE_DECIMATE_OML 0x8989
+
+#define GLEW_OML_resample GLEW_GET_VAR(__GLEW_OML_resample)
+
+#endif /* GL_OML_resample */
+
+/* ---------------------------- GL_OML_subsample --------------------------- */
+
+#ifndef GL_OML_subsample
+#define GL_OML_subsample 1
+
+#define GL_FORMAT_SUBSAMPLE_24_24_OML 0x8982
+#define GL_FORMAT_SUBSAMPLE_244_244_OML 0x8983
+
+#define GLEW_OML_subsample GLEW_GET_VAR(__GLEW_OML_subsample)
+
+#endif /* GL_OML_subsample */
+
+/* --------------------------- GL_PGI_misc_hints --------------------------- */
+
+#ifndef GL_PGI_misc_hints
+#define GL_PGI_misc_hints 1
+
+#define GL_PREFER_DOUBLEBUFFER_HINT_PGI 107000
+#define GL_CONSERVE_MEMORY_HINT_PGI 107005
+#define GL_RECLAIM_MEMORY_HINT_PGI 107006
+#define GL_NATIVE_GRAPHICS_HANDLE_PGI 107010
+#define GL_NATIVE_GRAPHICS_BEGIN_HINT_PGI 107011
+#define GL_NATIVE_GRAPHICS_END_HINT_PGI 107012
+#define GL_ALWAYS_FAST_HINT_PGI 107020
+#define GL_ALWAYS_SOFT_HINT_PGI 107021
+#define GL_ALLOW_DRAW_OBJ_HINT_PGI 107022
+#define GL_ALLOW_DRAW_WIN_HINT_PGI 107023
+#define GL_ALLOW_DRAW_FRG_HINT_PGI 107024
+#define GL_ALLOW_DRAW_MEM_HINT_PGI 107025
+#define GL_STRICT_DEPTHFUNC_HINT_PGI 107030
+#define GL_STRICT_LIGHTING_HINT_PGI 107031
+#define GL_STRICT_SCISSOR_HINT_PGI 107032
+#define GL_FULL_STIPPLE_HINT_PGI 107033
+#define GL_CLIP_NEAR_HINT_PGI 107040
+#define GL_CLIP_FAR_HINT_PGI 107041
+#define GL_WIDE_LINE_HINT_PGI 107042
+#define GL_BACK_NORMALS_HINT_PGI 107043
+
+#define GLEW_PGI_misc_hints GLEW_GET_VAR(__GLEW_PGI_misc_hints)
+
+#endif /* GL_PGI_misc_hints */
+
+/* -------------------------- GL_PGI_vertex_hints -------------------------- */
+
+#ifndef GL_PGI_vertex_hints
+#define GL_PGI_vertex_hints 1
+
+#define GL_VERTEX23_BIT_PGI 0x00000004
+#define GL_VERTEX4_BIT_PGI 0x00000008
+#define GL_COLOR3_BIT_PGI 0x00010000
+#define GL_COLOR4_BIT_PGI 0x00020000
+#define GL_EDGEFLAG_BIT_PGI 0x00040000
+#define GL_INDEX_BIT_PGI 0x00080000
+#define GL_MAT_AMBIENT_BIT_PGI 0x00100000
+#define GL_VERTEX_DATA_HINT_PGI 107050
+#define GL_VERTEX_CONSISTENT_HINT_PGI 107051
+#define GL_MATERIAL_SIDE_HINT_PGI 107052
+#define GL_MAX_VERTEX_HINT_PGI 107053
+#define GL_MAT_AMBIENT_AND_DIFFUSE_BIT_PGI 0x00200000
+#define GL_MAT_DIFFUSE_BIT_PGI 0x00400000
+#define GL_MAT_EMISSION_BIT_PGI 0x00800000
+#define GL_MAT_COLOR_INDEXES_BIT_PGI 0x01000000
+#define GL_MAT_SHININESS_BIT_PGI 0x02000000
+#define GL_MAT_SPECULAR_BIT_PGI 0x04000000
+#define GL_NORMAL_BIT_PGI 0x08000000
+#define GL_TEXCOORD1_BIT_PGI 0x10000000
+#define GL_TEXCOORD2_BIT_PGI 0x20000000
+#define GL_TEXCOORD3_BIT_PGI 0x40000000
+#define GL_TEXCOORD4_BIT_PGI 0x80000000
+
+#define GLEW_PGI_vertex_hints GLEW_GET_VAR(__GLEW_PGI_vertex_hints)
+
+#endif /* GL_PGI_vertex_hints */
+
+/* ----------------------- GL_REND_screen_coordinates ---------------------- */
+
+#ifndef GL_REND_screen_coordinates
+#define GL_REND_screen_coordinates 1
+
+#define GL_SCREEN_COORDINATES_REND 0x8490
+#define GL_INVERTED_SCREEN_W_REND 0x8491
+
+#define GLEW_REND_screen_coordinates GLEW_GET_VAR(__GLEW_REND_screen_coordinates)
+
+#endif /* GL_REND_screen_coordinates */
+
+/* ------------------------------- GL_S3_s3tc ------------------------------ */
+
+#ifndef GL_S3_s3tc
+#define GL_S3_s3tc 1
+
+#define GL_RGB_S3TC 0x83A0
+#define GL_RGB4_S3TC 0x83A1
+#define GL_RGBA_S3TC 0x83A2
+#define GL_RGBA4_S3TC 0x83A3
+#define GL_RGBA_DXT5_S3TC 0x83A4
+#define GL_RGBA4_DXT5_S3TC 0x83A5
+
+#define GLEW_S3_s3tc GLEW_GET_VAR(__GLEW_S3_s3tc)
+
+#endif /* GL_S3_s3tc */
+
+/* -------------------------- GL_SGIS_color_range -------------------------- */
+
+#ifndef GL_SGIS_color_range
+#define GL_SGIS_color_range 1
+
+#define GL_EXTENDED_RANGE_SGIS 0x85A5
+#define GL_MIN_RED_SGIS 0x85A6
+#define GL_MAX_RED_SGIS 0x85A7
+#define GL_MIN_GREEN_SGIS 0x85A8
+#define GL_MAX_GREEN_SGIS 0x85A9
+#define GL_MIN_BLUE_SGIS 0x85AA
+#define GL_MAX_BLUE_SGIS 0x85AB
+#define GL_MIN_ALPHA_SGIS 0x85AC
+#define GL_MAX_ALPHA_SGIS 0x85AD
+
+#define GLEW_SGIS_color_range GLEW_GET_VAR(__GLEW_SGIS_color_range)
+
+#endif /* GL_SGIS_color_range */
+
+/* ------------------------- GL_SGIS_detail_texture ------------------------ */
+
+#ifndef GL_SGIS_detail_texture
+#define GL_SGIS_detail_texture 1
+
+typedef void (GLAPIENTRY * PFNGLDETAILTEXFUNCSGISPROC) (GLenum target, GLsizei n, const GLfloat* points);
+typedef void (GLAPIENTRY * PFNGLGETDETAILTEXFUNCSGISPROC) (GLenum target, GLfloat* points);
+
+#define glDetailTexFuncSGIS GLEW_GET_FUN(__glewDetailTexFuncSGIS)
+#define glGetDetailTexFuncSGIS GLEW_GET_FUN(__glewGetDetailTexFuncSGIS)
+
+#define GLEW_SGIS_detail_texture GLEW_GET_VAR(__GLEW_SGIS_detail_texture)
+
+#endif /* GL_SGIS_detail_texture */
+
+/* -------------------------- GL_SGIS_fog_function ------------------------- */
+
+#ifndef GL_SGIS_fog_function
+#define GL_SGIS_fog_function 1
+
+typedef void (GLAPIENTRY * PFNGLFOGFUNCSGISPROC) (GLsizei n, const GLfloat* points);
+typedef void (GLAPIENTRY * PFNGLGETFOGFUNCSGISPROC) (GLfloat* points);
+
+#define glFogFuncSGIS GLEW_GET_FUN(__glewFogFuncSGIS)
+#define glGetFogFuncSGIS GLEW_GET_FUN(__glewGetFogFuncSGIS)
+
+#define GLEW_SGIS_fog_function GLEW_GET_VAR(__GLEW_SGIS_fog_function)
+
+#endif /* GL_SGIS_fog_function */
+
+/* ------------------------ GL_SGIS_generate_mipmap ------------------------ */
+
+#ifndef GL_SGIS_generate_mipmap
+#define GL_SGIS_generate_mipmap 1
+
+#define GL_GENERATE_MIPMAP_SGIS 0x8191
+#define GL_GENERATE_MIPMAP_HINT_SGIS 0x8192
+
+#define GLEW_SGIS_generate_mipmap GLEW_GET_VAR(__GLEW_SGIS_generate_mipmap)
+
+#endif /* GL_SGIS_generate_mipmap */
+
+/* -------------------------- GL_SGIS_multisample -------------------------- */
+
+#ifndef GL_SGIS_multisample
+#define GL_SGIS_multisample 1
+
+#define GL_MULTISAMPLE_SGIS 0x809D
+#define GL_SAMPLE_ALPHA_TO_MASK_SGIS 0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE_SGIS 0x809F
+#define GL_SAMPLE_MASK_SGIS 0x80A0
+#define GL_1PASS_SGIS 0x80A1
+#define GL_2PASS_0_SGIS 0x80A2
+#define GL_2PASS_1_SGIS 0x80A3
+#define GL_4PASS_0_SGIS 0x80A4
+#define GL_4PASS_1_SGIS 0x80A5
+#define GL_4PASS_2_SGIS 0x80A6
+#define GL_4PASS_3_SGIS 0x80A7
+#define GL_SAMPLE_BUFFERS_SGIS 0x80A8
+#define GL_SAMPLES_SGIS 0x80A9
+#define GL_SAMPLE_MASK_VALUE_SGIS 0x80AA
+#define GL_SAMPLE_MASK_INVERT_SGIS 0x80AB
+#define GL_SAMPLE_PATTERN_SGIS 0x80AC
+#define GL_MULTISAMPLE_BIT_EXT 0x20000000
+
+typedef void (GLAPIENTRY * PFNGLSAMPLEMASKSGISPROC) (GLclampf value, GLboolean invert);
+typedef void (GLAPIENTRY * PFNGLSAMPLEPATTERNSGISPROC) (GLenum pattern);
+
+#define glSampleMaskSGIS GLEW_GET_FUN(__glewSampleMaskSGIS)
+#define glSamplePatternSGIS GLEW_GET_FUN(__glewSamplePatternSGIS)
+
+#define GLEW_SGIS_multisample GLEW_GET_VAR(__GLEW_SGIS_multisample)
+
+#endif /* GL_SGIS_multisample */
+
+/* ------------------------- GL_SGIS_pixel_texture ------------------------- */
+
+#ifndef GL_SGIS_pixel_texture
+#define GL_SGIS_pixel_texture 1
+
+#define GLEW_SGIS_pixel_texture GLEW_GET_VAR(__GLEW_SGIS_pixel_texture)
+
+#endif /* GL_SGIS_pixel_texture */
+
+/* ----------------------- GL_SGIS_point_line_texgen ----------------------- */
+
+#ifndef GL_SGIS_point_line_texgen
+#define GL_SGIS_point_line_texgen 1
+
+#define GL_EYE_DISTANCE_TO_POINT_SGIS 0x81F0
+#define GL_OBJECT_DISTANCE_TO_POINT_SGIS 0x81F1
+#define GL_EYE_DISTANCE_TO_LINE_SGIS 0x81F2
+#define GL_OBJECT_DISTANCE_TO_LINE_SGIS 0x81F3
+#define GL_EYE_POINT_SGIS 0x81F4
+#define GL_OBJECT_POINT_SGIS 0x81F5
+#define GL_EYE_LINE_SGIS 0x81F6
+#define GL_OBJECT_LINE_SGIS 0x81F7
+
+#define GLEW_SGIS_point_line_texgen GLEW_GET_VAR(__GLEW_SGIS_point_line_texgen)
+
+#endif /* GL_SGIS_point_line_texgen */
+
+/* ------------------------ GL_SGIS_sharpen_texture ------------------------ */
+
+#ifndef GL_SGIS_sharpen_texture
+#define GL_SGIS_sharpen_texture 1
+
+typedef void (GLAPIENTRY * PFNGLGETSHARPENTEXFUNCSGISPROC) (GLenum target, GLfloat* points);
+typedef void (GLAPIENTRY * PFNGLSHARPENTEXFUNCSGISPROC) (GLenum target, GLsizei n, const GLfloat* points);
+
+#define glGetSharpenTexFuncSGIS GLEW_GET_FUN(__glewGetSharpenTexFuncSGIS)
+#define glSharpenTexFuncSGIS GLEW_GET_FUN(__glewSharpenTexFuncSGIS)
+
+#define GLEW_SGIS_sharpen_texture GLEW_GET_VAR(__GLEW_SGIS_sharpen_texture)
+
+#endif /* GL_SGIS_sharpen_texture */
+
+/* --------------------------- GL_SGIS_texture4D --------------------------- */
+
+#ifndef GL_SGIS_texture4D
+#define GL_SGIS_texture4D 1
+
+typedef void (GLAPIENTRY * PFNGLTEXIMAGE4DSGISPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLsizei extent, GLint border, GLenum format, GLenum type, const void* pixels);
+typedef void (GLAPIENTRY * PFNGLTEXSUBIMAGE4DSGISPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint woffset, GLsizei width, GLsizei height, GLsizei depth, GLsizei extent, GLenum format, GLenum type, const void* pixels);
+
+#define glTexImage4DSGIS GLEW_GET_FUN(__glewTexImage4DSGIS)
+#define glTexSubImage4DSGIS GLEW_GET_FUN(__glewTexSubImage4DSGIS)
+
+#define GLEW_SGIS_texture4D GLEW_GET_VAR(__GLEW_SGIS_texture4D)
+
+#endif /* GL_SGIS_texture4D */
+
+/* ---------------------- GL_SGIS_texture_border_clamp --------------------- */
+
+#ifndef GL_SGIS_texture_border_clamp
+#define GL_SGIS_texture_border_clamp 1
+
+#define GL_CLAMP_TO_BORDER_SGIS 0x812D
+
+#define GLEW_SGIS_texture_border_clamp GLEW_GET_VAR(__GLEW_SGIS_texture_border_clamp)
+
+#endif /* GL_SGIS_texture_border_clamp */
+
+/* ----------------------- GL_SGIS_texture_edge_clamp ---------------------- */
+
+#ifndef GL_SGIS_texture_edge_clamp
+#define GL_SGIS_texture_edge_clamp 1
+
+#define GL_CLAMP_TO_EDGE_SGIS 0x812F
+
+#define GLEW_SGIS_texture_edge_clamp GLEW_GET_VAR(__GLEW_SGIS_texture_edge_clamp)
+
+#endif /* GL_SGIS_texture_edge_clamp */
+
+/* ------------------------ GL_SGIS_texture_filter4 ------------------------ */
+
+#ifndef GL_SGIS_texture_filter4
+#define GL_SGIS_texture_filter4 1
+
+typedef void (GLAPIENTRY * PFNGLGETTEXFILTERFUNCSGISPROC) (GLenum target, GLenum filter, GLfloat* weights);
+typedef void (GLAPIENTRY * PFNGLTEXFILTERFUNCSGISPROC) (GLenum target, GLenum filter, GLsizei n, const GLfloat* weights);
+
+#define glGetTexFilterFuncSGIS GLEW_GET_FUN(__glewGetTexFilterFuncSGIS)
+#define glTexFilterFuncSGIS GLEW_GET_FUN(__glewTexFilterFuncSGIS)
+
+#define GLEW_SGIS_texture_filter4 GLEW_GET_VAR(__GLEW_SGIS_texture_filter4)
+
+#endif /* GL_SGIS_texture_filter4 */
+
+/* -------------------------- GL_SGIS_texture_lod -------------------------- */
+
+#ifndef GL_SGIS_texture_lod
+#define GL_SGIS_texture_lod 1
+
+#define GL_TEXTURE_MIN_LOD_SGIS 0x813A
+#define GL_TEXTURE_MAX_LOD_SGIS 0x813B
+#define GL_TEXTURE_BASE_LEVEL_SGIS 0x813C
+#define GL_TEXTURE_MAX_LEVEL_SGIS 0x813D
+
+#define GLEW_SGIS_texture_lod GLEW_GET_VAR(__GLEW_SGIS_texture_lod)
+
+#endif /* GL_SGIS_texture_lod */
+
+/* ------------------------- GL_SGIS_texture_select ------------------------ */
+
+#ifndef GL_SGIS_texture_select
+#define GL_SGIS_texture_select 1
+
+#define GLEW_SGIS_texture_select GLEW_GET_VAR(__GLEW_SGIS_texture_select)
+
+#endif /* GL_SGIS_texture_select */
+
+/* ----------------------------- GL_SGIX_async ----------------------------- */
+
+#ifndef GL_SGIX_async
+#define GL_SGIX_async 1
+
+#define GL_ASYNC_MARKER_SGIX 0x8329
+
+typedef void (GLAPIENTRY * PFNGLASYNCMARKERSGIXPROC) (GLuint marker);
+typedef void (GLAPIENTRY * PFNGLDELETEASYNCMARKERSSGIXPROC) (GLuint marker, GLsizei range);
+typedef GLint (GLAPIENTRY * PFNGLFINISHASYNCSGIXPROC) (GLuint* markerp);
+typedef GLuint (GLAPIENTRY * PFNGLGENASYNCMARKERSSGIXPROC) (GLsizei range);
+typedef GLboolean (GLAPIENTRY * PFNGLISASYNCMARKERSGIXPROC) (GLuint marker);
+typedef GLint (GLAPIENTRY * PFNGLPOLLASYNCSGIXPROC) (GLuint* markerp);
+
+#define glAsyncMarkerSGIX GLEW_GET_FUN(__glewAsyncMarkerSGIX)
+#define glDeleteAsyncMarkersSGIX GLEW_GET_FUN(__glewDeleteAsyncMarkersSGIX)
+#define glFinishAsyncSGIX GLEW_GET_FUN(__glewFinishAsyncSGIX)
+#define glGenAsyncMarkersSGIX GLEW_GET_FUN(__glewGenAsyncMarkersSGIX)
+#define glIsAsyncMarkerSGIX GLEW_GET_FUN(__glewIsAsyncMarkerSGIX)
+#define glPollAsyncSGIX GLEW_GET_FUN(__glewPollAsyncSGIX)
+
+#define GLEW_SGIX_async GLEW_GET_VAR(__GLEW_SGIX_async)
+
+#endif /* GL_SGIX_async */
+
+/* ------------------------ GL_SGIX_async_histogram ------------------------ */
+
+#ifndef GL_SGIX_async_histogram
+#define GL_SGIX_async_histogram 1
+
+#define GL_ASYNC_HISTOGRAM_SGIX 0x832C
+#define GL_MAX_ASYNC_HISTOGRAM_SGIX 0x832D
+
+#define GLEW_SGIX_async_histogram GLEW_GET_VAR(__GLEW_SGIX_async_histogram)
+
+#endif /* GL_SGIX_async_histogram */
+
+/* -------------------------- GL_SGIX_async_pixel -------------------------- */
+
+#ifndef GL_SGIX_async_pixel
+#define GL_SGIX_async_pixel 1
+
+#define GL_ASYNC_TEX_IMAGE_SGIX 0x835C
+#define GL_ASYNC_DRAW_PIXELS_SGIX 0x835D
+#define GL_ASYNC_READ_PIXELS_SGIX 0x835E
+#define GL_MAX_ASYNC_TEX_IMAGE_SGIX 0x835F
+#define GL_MAX_ASYNC_DRAW_PIXELS_SGIX 0x8360
+#define GL_MAX_ASYNC_READ_PIXELS_SGIX 0x8361
+
+#define GLEW_SGIX_async_pixel GLEW_GET_VAR(__GLEW_SGIX_async_pixel)
+
+#endif /* GL_SGIX_async_pixel */
+
+/* ----------------------- GL_SGIX_blend_alpha_minmax ---------------------- */
+
+#ifndef GL_SGIX_blend_alpha_minmax
+#define GL_SGIX_blend_alpha_minmax 1
+
+#define GL_ALPHA_MIN_SGIX 0x8320
+#define GL_ALPHA_MAX_SGIX 0x8321
+
+#define GLEW_SGIX_blend_alpha_minmax GLEW_GET_VAR(__GLEW_SGIX_blend_alpha_minmax)
+
+#endif /* GL_SGIX_blend_alpha_minmax */
+
+/* ---------------------------- GL_SGIX_clipmap ---------------------------- */
+
+#ifndef GL_SGIX_clipmap
+#define GL_SGIX_clipmap 1
+
+#define GLEW_SGIX_clipmap GLEW_GET_VAR(__GLEW_SGIX_clipmap)
+
+#endif /* GL_SGIX_clipmap */
+
+/* ---------------------- GL_SGIX_convolution_accuracy --------------------- */
+
+#ifndef GL_SGIX_convolution_accuracy
+#define GL_SGIX_convolution_accuracy 1
+
+#define GL_CONVOLUTION_HINT_SGIX 0x8316
+
+#define GLEW_SGIX_convolution_accuracy GLEW_GET_VAR(__GLEW_SGIX_convolution_accuracy)
+
+#endif /* GL_SGIX_convolution_accuracy */
+
+/* ------------------------- GL_SGIX_depth_texture ------------------------- */
+
+#ifndef GL_SGIX_depth_texture
+#define GL_SGIX_depth_texture 1
+
+#define GL_DEPTH_COMPONENT16_SGIX 0x81A5
+#define GL_DEPTH_COMPONENT24_SGIX 0x81A6
+#define GL_DEPTH_COMPONENT32_SGIX 0x81A7
+
+#define GLEW_SGIX_depth_texture GLEW_GET_VAR(__GLEW_SGIX_depth_texture)
+
+#endif /* GL_SGIX_depth_texture */
+
+/* -------------------------- GL_SGIX_flush_raster ------------------------- */
+
+#ifndef GL_SGIX_flush_raster
+#define GL_SGIX_flush_raster 1
+
+typedef void (GLAPIENTRY * PFNGLFLUSHRASTERSGIXPROC) (void);
+
+#define glFlushRasterSGIX GLEW_GET_FUN(__glewFlushRasterSGIX)
+
+#define GLEW_SGIX_flush_raster GLEW_GET_VAR(__GLEW_SGIX_flush_raster)
+
+#endif /* GL_SGIX_flush_raster */
+
+/* --------------------------- GL_SGIX_fog_offset -------------------------- */
+
+#ifndef GL_SGIX_fog_offset
+#define GL_SGIX_fog_offset 1
+
+#define GL_FOG_OFFSET_SGIX 0x8198
+#define GL_FOG_OFFSET_VALUE_SGIX 0x8199
+
+#define GLEW_SGIX_fog_offset GLEW_GET_VAR(__GLEW_SGIX_fog_offset)
+
+#endif /* GL_SGIX_fog_offset */
+
+/* -------------------------- GL_SGIX_fog_texture -------------------------- */
+
+#ifndef GL_SGIX_fog_texture
+#define GL_SGIX_fog_texture 1
+
+#define GL_TEXTURE_FOG_SGIX 0
+#define GL_FOG_PATCHY_FACTOR_SGIX 0
+#define GL_FRAGMENT_FOG_SGIX 0
+
+typedef void (GLAPIENTRY * PFNGLTEXTUREFOGSGIXPROC) (GLenum pname);
+
+#define glTextureFogSGIX GLEW_GET_FUN(__glewTextureFogSGIX)
+
+#define GLEW_SGIX_fog_texture GLEW_GET_VAR(__GLEW_SGIX_fog_texture)
+
+#endif /* GL_SGIX_fog_texture */
+
+/* ------------------- GL_SGIX_fragment_specular_lighting ------------------ */
+
+#ifndef GL_SGIX_fragment_specular_lighting
+#define GL_SGIX_fragment_specular_lighting 1
+
+typedef void (GLAPIENTRY * PFNGLFRAGMENTCOLORMATERIALSGIXPROC) (GLenum face, GLenum mode);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELFSGIXPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELFVSGIXPROC) (GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELISGIXPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTMODELIVSGIXPROC) (GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTFSGIXPROC) (GLenum light, GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTFVSGIXPROC) (GLenum light, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTISGIXPROC) (GLenum light, GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTLIGHTIVSGIXPROC) (GLenum light, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALFSGIXPROC) (GLenum face, GLenum pname, const GLfloat param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALFVSGIXPROC) (GLenum face, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALISGIXPROC) (GLenum face, GLenum pname, const GLint param);
+typedef void (GLAPIENTRY * PFNGLFRAGMENTMATERIALIVSGIXPROC) (GLenum face, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTLIGHTFVSGIXPROC) (GLenum light, GLenum value, GLfloat* data);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTLIGHTIVSGIXPROC) (GLenum light, GLenum value, GLint* data);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTMATERIALFVSGIXPROC) (GLenum face, GLenum pname, const GLfloat* data);
+typedef void (GLAPIENTRY * PFNGLGETFRAGMENTMATERIALIVSGIXPROC) (GLenum face, GLenum pname, const GLint* data);
+
+#define glFragmentColorMaterialSGIX GLEW_GET_FUN(__glewFragmentColorMaterialSGIX)
+#define glFragmentLightModelfSGIX GLEW_GET_FUN(__glewFragmentLightModelfSGIX)
+#define glFragmentLightModelfvSGIX GLEW_GET_FUN(__glewFragmentLightModelfvSGIX)
+#define glFragmentLightModeliSGIX GLEW_GET_FUN(__glewFragmentLightModeliSGIX)
+#define glFragmentLightModelivSGIX GLEW_GET_FUN(__glewFragmentLightModelivSGIX)
+#define glFragmentLightfSGIX GLEW_GET_FUN(__glewFragmentLightfSGIX)
+#define glFragmentLightfvSGIX GLEW_GET_FUN(__glewFragmentLightfvSGIX)
+#define glFragmentLightiSGIX GLEW_GET_FUN(__glewFragmentLightiSGIX)
+#define glFragmentLightivSGIX GLEW_GET_FUN(__glewFragmentLightivSGIX)
+#define glFragmentMaterialfSGIX GLEW_GET_FUN(__glewFragmentMaterialfSGIX)
+#define glFragmentMaterialfvSGIX GLEW_GET_FUN(__glewFragmentMaterialfvSGIX)
+#define glFragmentMaterialiSGIX GLEW_GET_FUN(__glewFragmentMaterialiSGIX)
+#define glFragmentMaterialivSGIX GLEW_GET_FUN(__glewFragmentMaterialivSGIX)
+#define glGetFragmentLightfvSGIX GLEW_GET_FUN(__glewGetFragmentLightfvSGIX)
+#define glGetFragmentLightivSGIX GLEW_GET_FUN(__glewGetFragmentLightivSGIX)
+#define glGetFragmentMaterialfvSGIX GLEW_GET_FUN(__glewGetFragmentMaterialfvSGIX)
+#define glGetFragmentMaterialivSGIX GLEW_GET_FUN(__glewGetFragmentMaterialivSGIX)
+
+#define GLEW_SGIX_fragment_specular_lighting GLEW_GET_VAR(__GLEW_SGIX_fragment_specular_lighting)
+
+#endif /* GL_SGIX_fragment_specular_lighting */
+
+/* --------------------------- GL_SGIX_framezoom --------------------------- */
+
+#ifndef GL_SGIX_framezoom
+#define GL_SGIX_framezoom 1
+
+typedef void (GLAPIENTRY * PFNGLFRAMEZOOMSGIXPROC) (GLint factor);
+
+#define glFrameZoomSGIX GLEW_GET_FUN(__glewFrameZoomSGIX)
+
+#define GLEW_SGIX_framezoom GLEW_GET_VAR(__GLEW_SGIX_framezoom)
+
+#endif /* GL_SGIX_framezoom */
+
+/* --------------------------- GL_SGIX_interlace --------------------------- */
+
+#ifndef GL_SGIX_interlace
+#define GL_SGIX_interlace 1
+
+#define GL_INTERLACE_SGIX 0x8094
+
+#define GLEW_SGIX_interlace GLEW_GET_VAR(__GLEW_SGIX_interlace)
+
+#endif /* GL_SGIX_interlace */
+
+/* ------------------------- GL_SGIX_ir_instrument1 ------------------------ */
+
+#ifndef GL_SGIX_ir_instrument1
+#define GL_SGIX_ir_instrument1 1
+
+#define GLEW_SGIX_ir_instrument1 GLEW_GET_VAR(__GLEW_SGIX_ir_instrument1)
+
+#endif /* GL_SGIX_ir_instrument1 */
+
+/* ------------------------- GL_SGIX_list_priority ------------------------- */
+
+#ifndef GL_SGIX_list_priority
+#define GL_SGIX_list_priority 1
+
+#define GLEW_SGIX_list_priority GLEW_GET_VAR(__GLEW_SGIX_list_priority)
+
+#endif /* GL_SGIX_list_priority */
+
+/* ------------------------- GL_SGIX_pixel_texture ------------------------- */
+
+#ifndef GL_SGIX_pixel_texture
+#define GL_SGIX_pixel_texture 1
+
+typedef void (GLAPIENTRY * PFNGLPIXELTEXGENSGIXPROC) (GLenum mode);
+
+#define glPixelTexGenSGIX GLEW_GET_FUN(__glewPixelTexGenSGIX)
+
+#define GLEW_SGIX_pixel_texture GLEW_GET_VAR(__GLEW_SGIX_pixel_texture)
+
+#endif /* GL_SGIX_pixel_texture */
+
+/* ----------------------- GL_SGIX_pixel_texture_bits ---------------------- */
+
+#ifndef GL_SGIX_pixel_texture_bits
+#define GL_SGIX_pixel_texture_bits 1
+
+#define GLEW_SGIX_pixel_texture_bits GLEW_GET_VAR(__GLEW_SGIX_pixel_texture_bits)
+
+#endif /* GL_SGIX_pixel_texture_bits */
+
+/* ------------------------ GL_SGIX_reference_plane ------------------------ */
+
+#ifndef GL_SGIX_reference_plane
+#define GL_SGIX_reference_plane 1
+
+typedef void (GLAPIENTRY * PFNGLREFERENCEPLANESGIXPROC) (const GLdouble* equation);
+
+#define glReferencePlaneSGIX GLEW_GET_FUN(__glewReferencePlaneSGIX)
+
+#define GLEW_SGIX_reference_plane GLEW_GET_VAR(__GLEW_SGIX_reference_plane)
+
+#endif /* GL_SGIX_reference_plane */
+
+/* ---------------------------- GL_SGIX_resample --------------------------- */
+
+#ifndef GL_SGIX_resample
+#define GL_SGIX_resample 1
+
+#define GL_PACK_RESAMPLE_SGIX 0x842E
+#define GL_UNPACK_RESAMPLE_SGIX 0x842F
+#define GL_RESAMPLE_DECIMATE_SGIX 0x8430
+#define GL_RESAMPLE_REPLICATE_SGIX 0x8433
+#define GL_RESAMPLE_ZERO_FILL_SGIX 0x8434
+
+#define GLEW_SGIX_resample GLEW_GET_VAR(__GLEW_SGIX_resample)
+
+#endif /* GL_SGIX_resample */
+
+/* ----------------------------- GL_SGIX_shadow ---------------------------- */
+
+#ifndef GL_SGIX_shadow
+#define GL_SGIX_shadow 1
+
+#define GL_TEXTURE_COMPARE_SGIX 0x819A
+#define GL_TEXTURE_COMPARE_OPERATOR_SGIX 0x819B
+#define GL_TEXTURE_LEQUAL_R_SGIX 0x819C
+#define GL_TEXTURE_GEQUAL_R_SGIX 0x819D
+
+#define GLEW_SGIX_shadow GLEW_GET_VAR(__GLEW_SGIX_shadow)
+
+#endif /* GL_SGIX_shadow */
+
+/* ------------------------- GL_SGIX_shadow_ambient ------------------------ */
+
+#ifndef GL_SGIX_shadow_ambient
+#define GL_SGIX_shadow_ambient 1
+
+#define GL_SHADOW_AMBIENT_SGIX 0x80BF
+
+#define GLEW_SGIX_shadow_ambient GLEW_GET_VAR(__GLEW_SGIX_shadow_ambient)
+
+#endif /* GL_SGIX_shadow_ambient */
+
+/* ----------------------------- GL_SGIX_sprite ---------------------------- */
+
+#ifndef GL_SGIX_sprite
+#define GL_SGIX_sprite 1
+
+typedef void (GLAPIENTRY * PFNGLSPRITEPARAMETERFSGIXPROC) (GLenum pname, GLfloat param);
+typedef void (GLAPIENTRY * PFNGLSPRITEPARAMETERFVSGIXPROC) (GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLSPRITEPARAMETERISGIXPROC) (GLenum pname, GLint param);
+typedef void (GLAPIENTRY * PFNGLSPRITEPARAMETERIVSGIXPROC) (GLenum pname, GLint* params);
+
+#define glSpriteParameterfSGIX GLEW_GET_FUN(__glewSpriteParameterfSGIX)
+#define glSpriteParameterfvSGIX GLEW_GET_FUN(__glewSpriteParameterfvSGIX)
+#define glSpriteParameteriSGIX GLEW_GET_FUN(__glewSpriteParameteriSGIX)
+#define glSpriteParameterivSGIX GLEW_GET_FUN(__glewSpriteParameterivSGIX)
+
+#define GLEW_SGIX_sprite GLEW_GET_VAR(__GLEW_SGIX_sprite)
+
+#endif /* GL_SGIX_sprite */
+
+/* ----------------------- GL_SGIX_tag_sample_buffer ----------------------- */
+
+#ifndef GL_SGIX_tag_sample_buffer
+#define GL_SGIX_tag_sample_buffer 1
+
+typedef void (GLAPIENTRY * PFNGLTAGSAMPLEBUFFERSGIXPROC) (void);
+
+#define glTagSampleBufferSGIX GLEW_GET_FUN(__glewTagSampleBufferSGIX)
+
+#define GLEW_SGIX_tag_sample_buffer GLEW_GET_VAR(__GLEW_SGIX_tag_sample_buffer)
+
+#endif /* GL_SGIX_tag_sample_buffer */
+
+/* ------------------------ GL_SGIX_texture_add_env ------------------------ */
+
+#ifndef GL_SGIX_texture_add_env
+#define GL_SGIX_texture_add_env 1
+
+#define GLEW_SGIX_texture_add_env GLEW_GET_VAR(__GLEW_SGIX_texture_add_env)
+
+#endif /* GL_SGIX_texture_add_env */
+
+/* -------------------- GL_SGIX_texture_coordinate_clamp ------------------- */
+
+#ifndef GL_SGIX_texture_coordinate_clamp
+#define GL_SGIX_texture_coordinate_clamp 1
+
+#define GL_TEXTURE_MAX_CLAMP_S_SGIX 0x8369
+#define GL_TEXTURE_MAX_CLAMP_T_SGIX 0x836A
+#define GL_TEXTURE_MAX_CLAMP_R_SGIX 0x836B
+
+#define GLEW_SGIX_texture_coordinate_clamp GLEW_GET_VAR(__GLEW_SGIX_texture_coordinate_clamp)
+
+#endif /* GL_SGIX_texture_coordinate_clamp */
+
+/* ------------------------ GL_SGIX_texture_lod_bias ----------------------- */
+
+#ifndef GL_SGIX_texture_lod_bias
+#define GL_SGIX_texture_lod_bias 1
+
+#define GLEW_SGIX_texture_lod_bias GLEW_GET_VAR(__GLEW_SGIX_texture_lod_bias)
+
+#endif /* GL_SGIX_texture_lod_bias */
+
+/* ---------------------- GL_SGIX_texture_multi_buffer --------------------- */
+
+#ifndef GL_SGIX_texture_multi_buffer
+#define GL_SGIX_texture_multi_buffer 1
+
+#define GL_TEXTURE_MULTI_BUFFER_HINT_SGIX 0x812E
+
+#define GLEW_SGIX_texture_multi_buffer GLEW_GET_VAR(__GLEW_SGIX_texture_multi_buffer)
+
+#endif /* GL_SGIX_texture_multi_buffer */
+
+/* ------------------------- GL_SGIX_texture_range ------------------------- */
+
+#ifndef GL_SGIX_texture_range
+#define GL_SGIX_texture_range 1
+
+#define GL_RGB_SIGNED_SGIX 0x85E0
+#define GL_RGBA_SIGNED_SGIX 0x85E1
+#define GL_ALPHA_SIGNED_SGIX 0x85E2
+#define GL_LUMINANCE_SIGNED_SGIX 0x85E3
+#define GL_INTENSITY_SIGNED_SGIX 0x85E4
+#define GL_LUMINANCE_ALPHA_SIGNED_SGIX 0x85E5
+#define GL_RGB16_SIGNED_SGIX 0x85E6
+#define GL_RGBA16_SIGNED_SGIX 0x85E7
+#define GL_ALPHA16_SIGNED_SGIX 0x85E8
+#define GL_LUMINANCE16_SIGNED_SGIX 0x85E9
+#define GL_INTENSITY16_SIGNED_SGIX 0x85EA
+#define GL_LUMINANCE16_ALPHA16_SIGNED_SGIX 0x85EB
+#define GL_RGB_EXTENDED_RANGE_SGIX 0x85EC
+#define GL_RGBA_EXTENDED_RANGE_SGIX 0x85ED
+#define GL_ALPHA_EXTENDED_RANGE_SGIX 0x85EE
+#define GL_LUMINANCE_EXTENDED_RANGE_SGIX 0x85EF
+#define GL_INTENSITY_EXTENDED_RANGE_SGIX 0x85F0
+#define GL_LUMINANCE_ALPHA_EXTENDED_RANGE_SGIX 0x85F1
+#define GL_RGB16_EXTENDED_RANGE_SGIX 0x85F2
+#define GL_RGBA16_EXTENDED_RANGE_SGIX 0x85F3
+#define GL_ALPHA16_EXTENDED_RANGE_SGIX 0x85F4
+#define GL_LUMINANCE16_EXTENDED_RANGE_SGIX 0x85F5
+#define GL_INTENSITY16_EXTENDED_RANGE_SGIX 0x85F6
+#define GL_LUMINANCE16_ALPHA16_EXTENDED_RANGE_SGIX 0x85F7
+#define GL_MIN_LUMINANCE_SGIS 0x85F8
+#define GL_MAX_LUMINANCE_SGIS 0x85F9
+#define GL_MIN_INTENSITY_SGIS 0x85FA
+#define GL_MAX_INTENSITY_SGIS 0x85FB
+
+#define GLEW_SGIX_texture_range GLEW_GET_VAR(__GLEW_SGIX_texture_range)
+
+#endif /* GL_SGIX_texture_range */
+
+/* ----------------------- GL_SGIX_texture_scale_bias ---------------------- */
+
+#ifndef GL_SGIX_texture_scale_bias
+#define GL_SGIX_texture_scale_bias 1
+
+#define GL_POST_TEXTURE_FILTER_BIAS_SGIX 0x8179
+#define GL_POST_TEXTURE_FILTER_SCALE_SGIX 0x817A
+#define GL_POST_TEXTURE_FILTER_BIAS_RANGE_SGIX 0x817B
+#define GL_POST_TEXTURE_FILTER_SCALE_RANGE_SGIX 0x817C
+
+#define GLEW_SGIX_texture_scale_bias GLEW_GET_VAR(__GLEW_SGIX_texture_scale_bias)
+
+#endif /* GL_SGIX_texture_scale_bias */
+
+/* ------------------------- GL_SGIX_vertex_preclip ------------------------ */
+
+#ifndef GL_SGIX_vertex_preclip
+#define GL_SGIX_vertex_preclip 1
+
+#define GL_VERTEX_PRECLIP_SGIX 0x83EE
+#define GL_VERTEX_PRECLIP_HINT_SGIX 0x83EF
+
+#define GLEW_SGIX_vertex_preclip GLEW_GET_VAR(__GLEW_SGIX_vertex_preclip)
+
+#endif /* GL_SGIX_vertex_preclip */
+
+/* ---------------------- GL_SGIX_vertex_preclip_hint ---------------------- */
+
+#ifndef GL_SGIX_vertex_preclip_hint
+#define GL_SGIX_vertex_preclip_hint 1
+
+#define GL_VERTEX_PRECLIP_SGIX 0x83EE
+#define GL_VERTEX_PRECLIP_HINT_SGIX 0x83EF
+
+#define GLEW_SGIX_vertex_preclip_hint GLEW_GET_VAR(__GLEW_SGIX_vertex_preclip_hint)
+
+#endif /* GL_SGIX_vertex_preclip_hint */
+
+/* ----------------------------- GL_SGIX_ycrcb ----------------------------- */
+
+#ifndef GL_SGIX_ycrcb
+#define GL_SGIX_ycrcb 1
+
+#define GLEW_SGIX_ycrcb GLEW_GET_VAR(__GLEW_SGIX_ycrcb)
+
+#endif /* GL_SGIX_ycrcb */
+
+/* -------------------------- GL_SGI_color_matrix -------------------------- */
+
+#ifndef GL_SGI_color_matrix
+#define GL_SGI_color_matrix 1
+
+#define GL_COLOR_MATRIX_SGI 0x80B1
+#define GL_COLOR_MATRIX_STACK_DEPTH_SGI 0x80B2
+#define GL_MAX_COLOR_MATRIX_STACK_DEPTH_SGI 0x80B3
+#define GL_POST_COLOR_MATRIX_RED_SCALE_SGI 0x80B4
+#define GL_POST_COLOR_MATRIX_GREEN_SCALE_SGI 0x80B5
+#define GL_POST_COLOR_MATRIX_BLUE_SCALE_SGI 0x80B6
+#define GL_POST_COLOR_MATRIX_ALPHA_SCALE_SGI 0x80B7
+#define GL_POST_COLOR_MATRIX_RED_BIAS_SGI 0x80B8
+#define GL_POST_COLOR_MATRIX_GREEN_BIAS_SGI 0x80B9
+#define GL_POST_COLOR_MATRIX_BLUE_BIAS_SGI 0x80BA
+#define GL_POST_COLOR_MATRIX_ALPHA_BIAS_SGI 0x80BB
+
+#define GLEW_SGI_color_matrix GLEW_GET_VAR(__GLEW_SGI_color_matrix)
+
+#endif /* GL_SGI_color_matrix */
+
+/* --------------------------- GL_SGI_color_table -------------------------- */
+
+#ifndef GL_SGI_color_table
+#define GL_SGI_color_table 1
+
+#define GL_COLOR_TABLE_SGI 0x80D0
+#define GL_POST_CONVOLUTION_COLOR_TABLE_SGI 0x80D1
+#define GL_POST_COLOR_MATRIX_COLOR_TABLE_SGI 0x80D2
+#define GL_PROXY_COLOR_TABLE_SGI 0x80D3
+#define GL_PROXY_POST_CONVOLUTION_COLOR_TABLE_SGI 0x80D4
+#define GL_PROXY_POST_COLOR_MATRIX_COLOR_TABLE_SGI 0x80D5
+#define GL_COLOR_TABLE_SCALE_SGI 0x80D6
+#define GL_COLOR_TABLE_BIAS_SGI 0x80D7
+#define GL_COLOR_TABLE_FORMAT_SGI 0x80D8
+#define GL_COLOR_TABLE_WIDTH_SGI 0x80D9
+#define GL_COLOR_TABLE_RED_SIZE_SGI 0x80DA
+#define GL_COLOR_TABLE_GREEN_SIZE_SGI 0x80DB
+#define GL_COLOR_TABLE_BLUE_SIZE_SGI 0x80DC
+#define GL_COLOR_TABLE_ALPHA_SIZE_SGI 0x80DD
+#define GL_COLOR_TABLE_LUMINANCE_SIZE_SGI 0x80DE
+#define GL_COLOR_TABLE_INTENSITY_SIZE_SGI 0x80DF
+
+typedef void (GLAPIENTRY * PFNGLCOLORTABLEPARAMETERFVSGIPROC) (GLenum target, GLenum pname, const GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLCOLORTABLEPARAMETERIVSGIPROC) (GLenum target, GLenum pname, const GLint* params);
+typedef void (GLAPIENTRY * PFNGLCOLORTABLESGIPROC) (GLenum target, GLenum internalformat, GLsizei width, GLenum format, GLenum type, const void* table);
+typedef void (GLAPIENTRY * PFNGLCOPYCOLORTABLESGIPROC) (GLenum target, GLenum internalformat, GLint x, GLint y, GLsizei width);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEPARAMETERFVSGIPROC) (GLenum target, GLenum pname, GLfloat* params);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLEPARAMETERIVSGIPROC) (GLenum target, GLenum pname, GLint* params);
+typedef void (GLAPIENTRY * PFNGLGETCOLORTABLESGIPROC) (GLenum target, GLenum format, GLenum type, void* table);
+
+#define glColorTableParameterfvSGI GLEW_GET_FUN(__glewColorTableParameterfvSGI)
+#define glColorTableParameterivSGI GLEW_GET_FUN(__glewColorTableParameterivSGI)
+#define glColorTableSGI GLEW_GET_FUN(__glewColorTableSGI)
+#define glCopyColorTableSGI GLEW_GET_FUN(__glewCopyColorTableSGI)
+#define glGetColorTableParameterfvSGI GLEW_GET_FUN(__glewGetColorTableParameterfvSGI)
+#define glGetColorTableParameterivSGI GLEW_GET_FUN(__glewGetColorTableParameterivSGI)
+#define glGetColorTableSGI GLEW_GET_FUN(__glewGetColorTableSGI)
+
+#define GLEW_SGI_color_table GLEW_GET_VAR(__GLEW_SGI_color_table)
+
+#endif /* GL_SGI_color_table */
+
+/* ----------------------- GL_SGI_texture_color_table ---------------------- */
+
+#ifndef GL_SGI_texture_color_table
+#define GL_SGI_texture_color_table 1
+
+#define GL_TEXTURE_COLOR_TABLE_SGI 0x80BC
+#define GL_PROXY_TEXTURE_COLOR_TABLE_SGI 0x80BD
+
+#define GLEW_SGI_texture_color_table GLEW_GET_VAR(__GLEW_SGI_texture_color_table)
+
+#endif /* GL_SGI_texture_color_table */
+
+/* ------------------------- GL_SUNX_constant_data ------------------------- */
+
+#ifndef GL_SUNX_constant_data
+#define GL_SUNX_constant_data 1
+
+#define GL_UNPACK_CONSTANT_DATA_SUNX 0x81D5
+#define GL_TEXTURE_CONSTANT_DATA_SUNX 0x81D6
+
+typedef void (GLAPIENTRY * PFNGLFINISHTEXTURESUNXPROC) (void);
+
+#define glFinishTextureSUNX GLEW_GET_FUN(__glewFinishTextureSUNX)
+
+#define GLEW_SUNX_constant_data GLEW_GET_VAR(__GLEW_SUNX_constant_data)
+
+#endif /* GL_SUNX_constant_data */
+
+/* -------------------- GL_SUN_convolution_border_modes -------------------- */
+
+#ifndef GL_SUN_convolution_border_modes
+#define GL_SUN_convolution_border_modes 1
+
+#define GL_WRAP_BORDER_SUN 0x81D4
+
+#define GLEW_SUN_convolution_border_modes GLEW_GET_VAR(__GLEW_SUN_convolution_border_modes)
+
+#endif /* GL_SUN_convolution_border_modes */
+
+/* -------------------------- GL_SUN_global_alpha -------------------------- */
+
+#ifndef GL_SUN_global_alpha
+#define GL_SUN_global_alpha 1
+
+#define GL_GLOBAL_ALPHA_SUN 0x81D9
+#define GL_GLOBAL_ALPHA_FACTOR_SUN 0x81DA
+
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORBSUNPROC) (GLbyte factor);
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORDSUNPROC) (GLdouble factor);
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORFSUNPROC) (GLfloat factor);
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORISUNPROC) (GLint factor);
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORSSUNPROC) (GLshort factor);
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORUBSUNPROC) (GLubyte factor);
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORUISUNPROC) (GLuint factor);
+typedef void (GLAPIENTRY * PFNGLGLOBALALPHAFACTORUSSUNPROC) (GLushort factor);
+
+#define glGlobalAlphaFactorbSUN GLEW_GET_FUN(__glewGlobalAlphaFactorbSUN)
+#define glGlobalAlphaFactordSUN GLEW_GET_FUN(__glewGlobalAlphaFactordSUN)
+#define glGlobalAlphaFactorfSUN GLEW_GET_FUN(__glewGlobalAlphaFactorfSUN)
+#define glGlobalAlphaFactoriSUN GLEW_GET_FUN(__glewGlobalAlphaFactoriSUN)
+#define glGlobalAlphaFactorsSUN GLEW_GET_FUN(__glewGlobalAlphaFactorsSUN)
+#define glGlobalAlphaFactorubSUN GLEW_GET_FUN(__glewGlobalAlphaFactorubSUN)
+#define glGlobalAlphaFactoruiSUN GLEW_GET_FUN(__glewGlobalAlphaFactoruiSUN)
+#define glGlobalAlphaFactorusSUN GLEW_GET_FUN(__glewGlobalAlphaFactorusSUN)
+
+#define GLEW_SUN_global_alpha GLEW_GET_VAR(__GLEW_SUN_global_alpha)
+
+#endif /* GL_SUN_global_alpha */
+
+/* --------------------------- GL_SUN_mesh_array --------------------------- */
+
+#ifndef GL_SUN_mesh_array
+#define GL_SUN_mesh_array 1
+
+#define GL_QUAD_MESH_SUN 0x8614
+#define GL_TRIANGLE_MESH_SUN 0x8615
+
+#define GLEW_SUN_mesh_array GLEW_GET_VAR(__GLEW_SUN_mesh_array)
+
+#endif /* GL_SUN_mesh_array */
+
+/* ------------------------ GL_SUN_read_video_pixels ----------------------- */
+
+#ifndef GL_SUN_read_video_pixels
+#define GL_SUN_read_video_pixels 1
+
+typedef void (GLAPIENTRY * PFNGLREADVIDEOPIXELSSUNPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid* pixels);
+
+#define glReadVideoPixelsSUN GLEW_GET_FUN(__glewReadVideoPixelsSUN)
+
+#define GLEW_SUN_read_video_pixels GLEW_GET_VAR(__GLEW_SUN_read_video_pixels)
+
+#endif /* GL_SUN_read_video_pixels */
+
+/* --------------------------- GL_SUN_slice_accum -------------------------- */
+
+#ifndef GL_SUN_slice_accum
+#define GL_SUN_slice_accum 1
+
+#define GL_SLICE_ACCUM_SUN 0x85CC
+
+#define GLEW_SUN_slice_accum GLEW_GET_VAR(__GLEW_SUN_slice_accum)
+
+#endif /* GL_SUN_slice_accum */
+
+/* -------------------------- GL_SUN_triangle_list ------------------------- */
+
+#ifndef GL_SUN_triangle_list
+#define GL_SUN_triangle_list 1
+
+#define GL_RESTART_SUN 0x01
+#define GL_REPLACE_MIDDLE_SUN 0x02
+#define GL_REPLACE_OLDEST_SUN 0x03
+#define GL_TRIANGLE_LIST_SUN 0x81D7
+#define GL_REPLACEMENT_CODE_SUN 0x81D8
+#define GL_REPLACEMENT_CODE_ARRAY_SUN 0x85C0
+#define GL_REPLACEMENT_CODE_ARRAY_TYPE_SUN 0x85C1
+#define GL_REPLACEMENT_CODE_ARRAY_STRIDE_SUN 0x85C2
+#define GL_REPLACEMENT_CODE_ARRAY_POINTER_SUN 0x85C3
+#define GL_R1UI_V3F_SUN 0x85C4
+#define GL_R1UI_C4UB_V3F_SUN 0x85C5
+#define GL_R1UI_C3F_V3F_SUN 0x85C6
+#define GL_R1UI_N3F_V3F_SUN 0x85C7
+#define GL_R1UI_C4F_N3F_V3F_SUN 0x85C8
+#define GL_R1UI_T2F_V3F_SUN 0x85C9
+#define GL_R1UI_T2F_N3F_V3F_SUN 0x85CA
+#define GL_R1UI_T2F_C4F_N3F_V3F_SUN 0x85CB
+
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEPOINTERSUNPROC) (GLenum type, GLsizei stride, const void* pointer);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUBSUNPROC) (GLubyte code);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUBVSUNPROC) (const GLubyte* code);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUISUNPROC) (GLuint code);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUIVSUNPROC) (const GLuint* code);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUSSUNPROC) (GLushort code);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUSVSUNPROC) (const GLushort* code);
+
+#define glReplacementCodePointerSUN GLEW_GET_FUN(__glewReplacementCodePointerSUN)
+#define glReplacementCodeubSUN GLEW_GET_FUN(__glewReplacementCodeubSUN)
+#define glReplacementCodeubvSUN GLEW_GET_FUN(__glewReplacementCodeubvSUN)
+#define glReplacementCodeuiSUN GLEW_GET_FUN(__glewReplacementCodeuiSUN)
+#define glReplacementCodeuivSUN GLEW_GET_FUN(__glewReplacementCodeuivSUN)
+#define glReplacementCodeusSUN GLEW_GET_FUN(__glewReplacementCodeusSUN)
+#define glReplacementCodeusvSUN GLEW_GET_FUN(__glewReplacementCodeusvSUN)
+
+#define GLEW_SUN_triangle_list GLEW_GET_VAR(__GLEW_SUN_triangle_list)
+
+#endif /* GL_SUN_triangle_list */
+
+/* ----------------------------- GL_SUN_vertex ----------------------------- */
+
+#ifndef GL_SUN_vertex
+#define GL_SUN_vertex 1
+
+typedef void (GLAPIENTRY * PFNGLCOLOR3FVERTEX3FSUNPROC) (GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLCOLOR3FVERTEX3FVSUNPROC) (const GLfloat* c, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat* c, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLCOLOR4UBVERTEX2FSUNPROC) (GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y);
+typedef void (GLAPIENTRY * PFNGLCOLOR4UBVERTEX2FVSUNPROC) (const GLubyte* c, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLCOLOR4UBVERTEX3FSUNPROC) (GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLCOLOR4UBVERTEX3FVSUNPROC) (const GLubyte* c, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLNORMAL3FVERTEX3FSUNPROC) (GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLNORMAL3FVERTEX3FVSUNPROC) (const GLfloat* n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC) (GLuint rc, GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC) (const GLuint* rc, const GLfloat *c, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLuint* rc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC) (GLuint rc, GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC) (const GLuint* rc, const GLubyte *c, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC) (const GLuint* rc, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLuint* rc, const GLfloat *tc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC) (GLuint rc, GLfloat s, GLfloat t, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC) (const GLuint* rc, const GLfloat *tc, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC) (GLuint rc, GLfloat s, GLfloat t, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC) (const GLuint* rc, const GLfloat *tc, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC) (GLuint rc, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC) (const GLuint* rc, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC) (const GLfloat* tc, const GLfloat *c, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat* tc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLubyte r, GLubyte g, GLubyte b, GLubyte a, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC) (const GLfloat* tc, const GLubyte *c, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC) (const GLfloat* tc, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FVERTEX3FSUNPROC) (GLfloat s, GLfloat t, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD2FVERTEX3FVSUNPROC) (const GLfloat* tc, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC) (GLfloat s, GLfloat t, GLfloat p, GLfloat q, GLfloat r, GLfloat g, GLfloat b, GLfloat a, GLfloat nx, GLfloat ny, GLfloat nz, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC) (const GLfloat* tc, const GLfloat *c, const GLfloat *n, const GLfloat *v);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD4FVERTEX4FSUNPROC) (GLfloat s, GLfloat t, GLfloat p, GLfloat q, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GLAPIENTRY * PFNGLTEXCOORD4FVERTEX4FVSUNPROC) (const GLfloat* tc, const GLfloat *v);
+
+#define glColor3fVertex3fSUN GLEW_GET_FUN(__glewColor3fVertex3fSUN)
+#define glColor3fVertex3fvSUN GLEW_GET_FUN(__glewColor3fVertex3fvSUN)
+#define glColor4fNormal3fVertex3fSUN GLEW_GET_FUN(__glewColor4fNormal3fVertex3fSUN)
+#define glColor4fNormal3fVertex3fvSUN GLEW_GET_FUN(__glewColor4fNormal3fVertex3fvSUN)
+#define glColor4ubVertex2fSUN GLEW_GET_FUN(__glewColor4ubVertex2fSUN)
+#define glColor4ubVertex2fvSUN GLEW_GET_FUN(__glewColor4ubVertex2fvSUN)
+#define glColor4ubVertex3fSUN GLEW_GET_FUN(__glewColor4ubVertex3fSUN)
+#define glColor4ubVertex3fvSUN GLEW_GET_FUN(__glewColor4ubVertex3fvSUN)
+#define glNormal3fVertex3fSUN GLEW_GET_FUN(__glewNormal3fVertex3fSUN)
+#define glNormal3fVertex3fvSUN GLEW_GET_FUN(__glewNormal3fVertex3fvSUN)
+#define glReplacementCodeuiColor3fVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiColor3fVertex3fSUN)
+#define glReplacementCodeuiColor3fVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiColor3fVertex3fvSUN)
+#define glReplacementCodeuiColor4fNormal3fVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiColor4fNormal3fVertex3fSUN)
+#define glReplacementCodeuiColor4fNormal3fVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiColor4fNormal3fVertex3fvSUN)
+#define glReplacementCodeuiColor4ubVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiColor4ubVertex3fSUN)
+#define glReplacementCodeuiColor4ubVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiColor4ubVertex3fvSUN)
+#define glReplacementCodeuiNormal3fVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiNormal3fVertex3fSUN)
+#define glReplacementCodeuiNormal3fVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiNormal3fVertex3fvSUN)
+#define glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN)
+#define glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN)
+#define glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiTexCoord2fNormal3fVertex3fSUN)
+#define glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN)
+#define glReplacementCodeuiTexCoord2fVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiTexCoord2fVertex3fSUN)
+#define glReplacementCodeuiTexCoord2fVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiTexCoord2fVertex3fvSUN)
+#define glReplacementCodeuiVertex3fSUN GLEW_GET_FUN(__glewReplacementCodeuiVertex3fSUN)
+#define glReplacementCodeuiVertex3fvSUN GLEW_GET_FUN(__glewReplacementCodeuiVertex3fvSUN)
+#define glTexCoord2fColor3fVertex3fSUN GLEW_GET_FUN(__glewTexCoord2fColor3fVertex3fSUN)
+#define glTexCoord2fColor3fVertex3fvSUN GLEW_GET_FUN(__glewTexCoord2fColor3fVertex3fvSUN)
+#define glTexCoord2fColor4fNormal3fVertex3fSUN GLEW_GET_FUN(__glewTexCoord2fColor4fNormal3fVertex3fSUN)
+#define glTexCoord2fColor4fNormal3fVertex3fvSUN GLEW_GET_FUN(__glewTexCoord2fColor4fNormal3fVertex3fvSUN)
+#define glTexCoord2fColor4ubVertex3fSUN GLEW_GET_FUN(__glewTexCoord2fColor4ubVertex3fSUN)
+#define glTexCoord2fColor4ubVertex3fvSUN GLEW_GET_FUN(__glewTexCoord2fColor4ubVertex3fvSUN)
+#define glTexCoord2fNormal3fVertex3fSUN GLEW_GET_FUN(__glewTexCoord2fNormal3fVertex3fSUN)
+#define glTexCoord2fNormal3fVertex3fvSUN GLEW_GET_FUN(__glewTexCoord2fNormal3fVertex3fvSUN)
+#define glTexCoord2fVertex3fSUN GLEW_GET_FUN(__glewTexCoord2fVertex3fSUN)
+#define glTexCoord2fVertex3fvSUN GLEW_GET_FUN(__glewTexCoord2fVertex3fvSUN)
+#define glTexCoord4fColor4fNormal3fVertex4fSUN GLEW_GET_FUN(__glewTexCoord4fColor4fNormal3fVertex4fSUN)
+#define glTexCoord4fColor4fNormal3fVertex4fvSUN GLEW_GET_FUN(__glewTexCoord4fColor4fNormal3fVertex4fvSUN)
+#define glTexCoord4fVertex4fSUN GLEW_GET_FUN(__glewTexCoord4fVertex4fSUN)
+#define glTexCoord4fVertex4fvSUN GLEW_GET_FUN(__glewTexCoord4fVertex4fvSUN)
+
+#define GLEW_SUN_vertex GLEW_GET_VAR(__GLEW_SUN_vertex)
+
+#endif /* GL_SUN_vertex */
+
+/* -------------------------- GL_WIN_phong_shading ------------------------- */
+
+#ifndef GL_WIN_phong_shading
+#define GL_WIN_phong_shading 1
+
+#define GL_PHONG_WIN 0x80EA
+#define GL_PHONG_HINT_WIN 0x80EB
+
+#define GLEW_WIN_phong_shading GLEW_GET_VAR(__GLEW_WIN_phong_shading)
+
+#endif /* GL_WIN_phong_shading */
+
+/* -------------------------- GL_WIN_specular_fog -------------------------- */
+
+#ifndef GL_WIN_specular_fog
+#define GL_WIN_specular_fog 1
+
+#define GL_FOG_SPECULAR_TEXTURE_WIN 0x80EC
+
+#define GLEW_WIN_specular_fog GLEW_GET_VAR(__GLEW_WIN_specular_fog)
+
+#endif /* GL_WIN_specular_fog */
+
+/* ---------------------------- GL_WIN_swap_hint --------------------------- */
+
+#ifndef GL_WIN_swap_hint
+#define GL_WIN_swap_hint 1
+
+typedef void (GLAPIENTRY * PFNGLADDSWAPHINTRECTWINPROC) (GLint x, GLint y, GLsizei width, GLsizei height);
+
+#define glAddSwapHintRectWIN GLEW_GET_FUN(__glewAddSwapHintRectWIN)
+
+#define GLEW_WIN_swap_hint GLEW_GET_VAR(__GLEW_WIN_swap_hint)
+
+#endif /* GL_WIN_swap_hint */
+
+/* ------------------------------------------------------------------------- */
+
+#if defined(GLEW_MX) && defined(_WIN32)
+#define GLEW_FUN_EXPORT
+#else
+#define GLEW_FUN_EXPORT GLEWAPI
+#endif /* GLEW_MX */
+
+#if defined(GLEW_MX)
+#define GLEW_VAR_EXPORT
+#else
+#define GLEW_VAR_EXPORT GLEWAPI
+#endif /* GLEW_MX */
+
+#if defined(GLEW_MX) && defined(_WIN32)
+struct GLEWContextStruct
+{
+#endif /* GLEW_MX */
+
+GLEW_FUN_EXPORT PFNGLCOPYTEXSUBIMAGE3DPROC __glewCopyTexSubImage3D;
+GLEW_FUN_EXPORT PFNGLDRAWRANGEELEMENTSPROC __glewDrawRangeElements;
+GLEW_FUN_EXPORT PFNGLTEXIMAGE3DPROC __glewTexImage3D;
+GLEW_FUN_EXPORT PFNGLTEXSUBIMAGE3DPROC __glewTexSubImage3D;
+
+GLEW_FUN_EXPORT PFNGLACTIVETEXTUREPROC __glewActiveTexture;
+GLEW_FUN_EXPORT PFNGLCLIENTACTIVETEXTUREPROC __glewClientActiveTexture;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXIMAGE1DPROC __glewCompressedTexImage1D;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXIMAGE2DPROC __glewCompressedTexImage2D;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXIMAGE3DPROC __glewCompressedTexImage3D;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC __glewCompressedTexSubImage1D;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC __glewCompressedTexSubImage2D;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC __glewCompressedTexSubImage3D;
+GLEW_FUN_EXPORT PFNGLGETCOMPRESSEDTEXIMAGEPROC __glewGetCompressedTexImage;
+GLEW_FUN_EXPORT PFNGLLOADTRANSPOSEMATRIXDPROC __glewLoadTransposeMatrixd;
+GLEW_FUN_EXPORT PFNGLLOADTRANSPOSEMATRIXFPROC __glewLoadTransposeMatrixf;
+GLEW_FUN_EXPORT PFNGLMULTTRANSPOSEMATRIXDPROC __glewMultTransposeMatrixd;
+GLEW_FUN_EXPORT PFNGLMULTTRANSPOSEMATRIXFPROC __glewMultTransposeMatrixf;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1DPROC __glewMultiTexCoord1d;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1DVPROC __glewMultiTexCoord1dv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1FPROC __glewMultiTexCoord1f;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1FVPROC __glewMultiTexCoord1fv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1IPROC __glewMultiTexCoord1i;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1IVPROC __glewMultiTexCoord1iv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1SPROC __glewMultiTexCoord1s;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1SVPROC __glewMultiTexCoord1sv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2DPROC __glewMultiTexCoord2d;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2DVPROC __glewMultiTexCoord2dv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2FPROC __glewMultiTexCoord2f;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2FVPROC __glewMultiTexCoord2fv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2IPROC __glewMultiTexCoord2i;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2IVPROC __glewMultiTexCoord2iv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2SPROC __glewMultiTexCoord2s;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2SVPROC __glewMultiTexCoord2sv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3DPROC __glewMultiTexCoord3d;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3DVPROC __glewMultiTexCoord3dv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3FPROC __glewMultiTexCoord3f;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3FVPROC __glewMultiTexCoord3fv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3IPROC __glewMultiTexCoord3i;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3IVPROC __glewMultiTexCoord3iv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3SPROC __glewMultiTexCoord3s;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3SVPROC __glewMultiTexCoord3sv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4DPROC __glewMultiTexCoord4d;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4DVPROC __glewMultiTexCoord4dv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4FPROC __glewMultiTexCoord4f;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4FVPROC __glewMultiTexCoord4fv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4IPROC __glewMultiTexCoord4i;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4IVPROC __glewMultiTexCoord4iv;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4SPROC __glewMultiTexCoord4s;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4SVPROC __glewMultiTexCoord4sv;
+GLEW_FUN_EXPORT PFNGLSAMPLECOVERAGEPROC __glewSampleCoverage;
+
+GLEW_FUN_EXPORT PFNGLBLENDCOLORPROC __glewBlendColor;
+GLEW_FUN_EXPORT PFNGLBLENDEQUATIONPROC __glewBlendEquation;
+GLEW_FUN_EXPORT PFNGLBLENDFUNCSEPARATEPROC __glewBlendFuncSeparate;
+GLEW_FUN_EXPORT PFNGLFOGCOORDPOINTERPROC __glewFogCoordPointer;
+GLEW_FUN_EXPORT PFNGLFOGCOORDDPROC __glewFogCoordd;
+GLEW_FUN_EXPORT PFNGLFOGCOORDDVPROC __glewFogCoorddv;
+GLEW_FUN_EXPORT PFNGLFOGCOORDFPROC __glewFogCoordf;
+GLEW_FUN_EXPORT PFNGLFOGCOORDFVPROC __glewFogCoordfv;
+GLEW_FUN_EXPORT PFNGLMULTIDRAWARRAYSPROC __glewMultiDrawArrays;
+GLEW_FUN_EXPORT PFNGLMULTIDRAWELEMENTSPROC __glewMultiDrawElements;
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERFPROC __glewPointParameterf;
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERFVPROC __glewPointParameterfv;
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERIPROC __glewPointParameteri;
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERIVPROC __glewPointParameteriv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3BPROC __glewSecondaryColor3b;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3BVPROC __glewSecondaryColor3bv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3DPROC __glewSecondaryColor3d;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3DVPROC __glewSecondaryColor3dv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3FPROC __glewSecondaryColor3f;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3FVPROC __glewSecondaryColor3fv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3IPROC __glewSecondaryColor3i;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3IVPROC __glewSecondaryColor3iv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3SPROC __glewSecondaryColor3s;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3SVPROC __glewSecondaryColor3sv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UBPROC __glewSecondaryColor3ub;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UBVPROC __glewSecondaryColor3ubv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UIPROC __glewSecondaryColor3ui;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UIVPROC __glewSecondaryColor3uiv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3USPROC __glewSecondaryColor3us;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3USVPROC __glewSecondaryColor3usv;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLORPOINTERPROC __glewSecondaryColorPointer;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2DPROC __glewWindowPos2d;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2DVPROC __glewWindowPos2dv;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2FPROC __glewWindowPos2f;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2FVPROC __glewWindowPos2fv;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2IPROC __glewWindowPos2i;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2IVPROC __glewWindowPos2iv;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2SPROC __glewWindowPos2s;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2SVPROC __glewWindowPos2sv;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3DPROC __glewWindowPos3d;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3DVPROC __glewWindowPos3dv;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3FPROC __glewWindowPos3f;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3FVPROC __glewWindowPos3fv;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3IPROC __glewWindowPos3i;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3IVPROC __glewWindowPos3iv;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3SPROC __glewWindowPos3s;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3SVPROC __glewWindowPos3sv;
+
+GLEW_FUN_EXPORT PFNGLBEGINQUERYPROC __glewBeginQuery;
+GLEW_FUN_EXPORT PFNGLBINDBUFFERPROC __glewBindBuffer;
+GLEW_FUN_EXPORT PFNGLBUFFERDATAPROC __glewBufferData;
+GLEW_FUN_EXPORT PFNGLBUFFERSUBDATAPROC __glewBufferSubData;
+GLEW_FUN_EXPORT PFNGLDELETEBUFFERSPROC __glewDeleteBuffers;
+GLEW_FUN_EXPORT PFNGLDELETEQUERIESPROC __glewDeleteQueries;
+GLEW_FUN_EXPORT PFNGLENDQUERYPROC __glewEndQuery;
+GLEW_FUN_EXPORT PFNGLGENBUFFERSPROC __glewGenBuffers;
+GLEW_FUN_EXPORT PFNGLGENQUERIESPROC __glewGenQueries;
+GLEW_FUN_EXPORT PFNGLGETBUFFERPARAMETERIVPROC __glewGetBufferParameteriv;
+GLEW_FUN_EXPORT PFNGLGETBUFFERPOINTERVPROC __glewGetBufferPointerv;
+GLEW_FUN_EXPORT PFNGLGETBUFFERSUBDATAPROC __glewGetBufferSubData;
+GLEW_FUN_EXPORT PFNGLGETQUERYOBJECTIVPROC __glewGetQueryObjectiv;
+GLEW_FUN_EXPORT PFNGLGETQUERYOBJECTUIVPROC __glewGetQueryObjectuiv;
+GLEW_FUN_EXPORT PFNGLGETQUERYIVPROC __glewGetQueryiv;
+GLEW_FUN_EXPORT PFNGLISBUFFERPROC __glewIsBuffer;
+GLEW_FUN_EXPORT PFNGLISQUERYPROC __glewIsQuery;
+GLEW_FUN_EXPORT PFNGLMAPBUFFERPROC __glewMapBuffer;
+GLEW_FUN_EXPORT PFNGLUNMAPBUFFERPROC __glewUnmapBuffer;
+
+GLEW_FUN_EXPORT PFNGLATTACHSHADERPROC __glewAttachShader;
+GLEW_FUN_EXPORT PFNGLBINDATTRIBLOCATIONPROC __glewBindAttribLocation;
+GLEW_FUN_EXPORT PFNGLBLENDEQUATIONSEPARATEPROC __glewBlendEquationSeparate;
+GLEW_FUN_EXPORT PFNGLCOMPILESHADERPROC __glewCompileShader;
+GLEW_FUN_EXPORT PFNGLCREATEPROGRAMPROC __glewCreateProgram;
+GLEW_FUN_EXPORT PFNGLCREATESHADERPROC __glewCreateShader;
+GLEW_FUN_EXPORT PFNGLDELETEPROGRAMPROC __glewDeleteProgram;
+GLEW_FUN_EXPORT PFNGLDELETESHADERPROC __glewDeleteShader;
+GLEW_FUN_EXPORT PFNGLDETACHSHADERPROC __glewDetachShader;
+GLEW_FUN_EXPORT PFNGLDISABLEVERTEXATTRIBARRAYPROC __glewDisableVertexAttribArray;
+GLEW_FUN_EXPORT PFNGLDRAWBUFFERSPROC __glewDrawBuffers;
+GLEW_FUN_EXPORT PFNGLENABLEVERTEXATTRIBARRAYPROC __glewEnableVertexAttribArray;
+GLEW_FUN_EXPORT PFNGLGETACTIVEATTRIBPROC __glewGetActiveAttrib;
+GLEW_FUN_EXPORT PFNGLGETACTIVEUNIFORMPROC __glewGetActiveUniform;
+GLEW_FUN_EXPORT PFNGLGETATTACHEDSHADERSPROC __glewGetAttachedShaders;
+GLEW_FUN_EXPORT PFNGLGETATTRIBLOCATIONPROC __glewGetAttribLocation;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMINFOLOGPROC __glewGetProgramInfoLog;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMIVPROC __glewGetProgramiv;
+GLEW_FUN_EXPORT PFNGLGETSHADERINFOLOGPROC __glewGetShaderInfoLog;
+GLEW_FUN_EXPORT PFNGLGETSHADERSOURCEPROC __glewGetShaderSource;
+GLEW_FUN_EXPORT PFNGLGETSHADERIVPROC __glewGetShaderiv;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMLOCATIONPROC __glewGetUniformLocation;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMFVPROC __glewGetUniformfv;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMIVPROC __glewGetUniformiv;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBPOINTERVPROC __glewGetVertexAttribPointerv;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBDVPROC __glewGetVertexAttribdv;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBFVPROC __glewGetVertexAttribfv;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBIVPROC __glewGetVertexAttribiv;
+GLEW_FUN_EXPORT PFNGLISPROGRAMPROC __glewIsProgram;
+GLEW_FUN_EXPORT PFNGLISSHADERPROC __glewIsShader;
+GLEW_FUN_EXPORT PFNGLLINKPROGRAMPROC __glewLinkProgram;
+GLEW_FUN_EXPORT PFNGLSHADERSOURCEPROC __glewShaderSource;
+GLEW_FUN_EXPORT PFNGLSTENCILFUNCSEPARATEPROC __glewStencilFuncSeparate;
+GLEW_FUN_EXPORT PFNGLSTENCILMASKSEPARATEPROC __glewStencilMaskSeparate;
+GLEW_FUN_EXPORT PFNGLSTENCILOPSEPARATEPROC __glewStencilOpSeparate;
+GLEW_FUN_EXPORT PFNGLUNIFORM1FPROC __glewUniform1f;
+GLEW_FUN_EXPORT PFNGLUNIFORM1FVPROC __glewUniform1fv;
+GLEW_FUN_EXPORT PFNGLUNIFORM1IPROC __glewUniform1i;
+GLEW_FUN_EXPORT PFNGLUNIFORM1IVPROC __glewUniform1iv;
+GLEW_FUN_EXPORT PFNGLUNIFORM2FPROC __glewUniform2f;
+GLEW_FUN_EXPORT PFNGLUNIFORM2FVPROC __glewUniform2fv;
+GLEW_FUN_EXPORT PFNGLUNIFORM2IPROC __glewUniform2i;
+GLEW_FUN_EXPORT PFNGLUNIFORM2IVPROC __glewUniform2iv;
+GLEW_FUN_EXPORT PFNGLUNIFORM3FPROC __glewUniform3f;
+GLEW_FUN_EXPORT PFNGLUNIFORM3FVPROC __glewUniform3fv;
+GLEW_FUN_EXPORT PFNGLUNIFORM3IPROC __glewUniform3i;
+GLEW_FUN_EXPORT PFNGLUNIFORM3IVPROC __glewUniform3iv;
+GLEW_FUN_EXPORT PFNGLUNIFORM4FPROC __glewUniform4f;
+GLEW_FUN_EXPORT PFNGLUNIFORM4FVPROC __glewUniform4fv;
+GLEW_FUN_EXPORT PFNGLUNIFORM4IPROC __glewUniform4i;
+GLEW_FUN_EXPORT PFNGLUNIFORM4IVPROC __glewUniform4iv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX2FVPROC __glewUniformMatrix2fv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX3FVPROC __glewUniformMatrix3fv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX4FVPROC __glewUniformMatrix4fv;
+GLEW_FUN_EXPORT PFNGLUSEPROGRAMPROC __glewUseProgram;
+GLEW_FUN_EXPORT PFNGLVALIDATEPROGRAMPROC __glewValidateProgram;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1DPROC __glewVertexAttrib1d;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1DVPROC __glewVertexAttrib1dv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1FPROC __glewVertexAttrib1f;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1FVPROC __glewVertexAttrib1fv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1SPROC __glewVertexAttrib1s;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1SVPROC __glewVertexAttrib1sv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2DPROC __glewVertexAttrib2d;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2DVPROC __glewVertexAttrib2dv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2FPROC __glewVertexAttrib2f;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2FVPROC __glewVertexAttrib2fv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2SPROC __glewVertexAttrib2s;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2SVPROC __glewVertexAttrib2sv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3DPROC __glewVertexAttrib3d;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3DVPROC __glewVertexAttrib3dv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3FPROC __glewVertexAttrib3f;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3FVPROC __glewVertexAttrib3fv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3SPROC __glewVertexAttrib3s;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3SVPROC __glewVertexAttrib3sv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NBVPROC __glewVertexAttrib4Nbv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NIVPROC __glewVertexAttrib4Niv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NSVPROC __glewVertexAttrib4Nsv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUBPROC __glewVertexAttrib4Nub;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUBVPROC __glewVertexAttrib4Nubv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUIVPROC __glewVertexAttrib4Nuiv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUSVPROC __glewVertexAttrib4Nusv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4BVPROC __glewVertexAttrib4bv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4DPROC __glewVertexAttrib4d;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4DVPROC __glewVertexAttrib4dv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4FPROC __glewVertexAttrib4f;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4FVPROC __glewVertexAttrib4fv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4IVPROC __glewVertexAttrib4iv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4SPROC __glewVertexAttrib4s;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4SVPROC __glewVertexAttrib4sv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4UBVPROC __glewVertexAttrib4ubv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4UIVPROC __glewVertexAttrib4uiv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4USVPROC __glewVertexAttrib4usv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBPOINTERPROC __glewVertexAttribPointer;
+
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX2X3FVPROC __glewUniformMatrix2x3fv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX2X4FVPROC __glewUniformMatrix2x4fv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX3X2FVPROC __glewUniformMatrix3x2fv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX3X4FVPROC __glewUniformMatrix3x4fv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX4X2FVPROC __glewUniformMatrix4x2fv;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX4X3FVPROC __glewUniformMatrix4x3fv;
+
+GLEW_FUN_EXPORT PFNGLBEGINCONDITIONALRENDERPROC __glewBeginConditionalRender;
+GLEW_FUN_EXPORT PFNGLBEGINTRANSFORMFEEDBACKPROC __glewBeginTransformFeedback;
+GLEW_FUN_EXPORT PFNGLBINDBUFFERBASEPROC __glewBindBufferBase;
+GLEW_FUN_EXPORT PFNGLBINDBUFFERRANGEPROC __glewBindBufferRange;
+GLEW_FUN_EXPORT PFNGLBINDFRAGDATALOCATIONPROC __glewBindFragDataLocation;
+GLEW_FUN_EXPORT PFNGLCLAMPCOLORPROC __glewClampColor;
+GLEW_FUN_EXPORT PFNGLCLEARBUFFERFIPROC __glewClearBufferfi;
+GLEW_FUN_EXPORT PFNGLCLEARBUFFERFVPROC __glewClearBufferfv;
+GLEW_FUN_EXPORT PFNGLCLEARBUFFERIVPROC __glewClearBufferiv;
+GLEW_FUN_EXPORT PFNGLCLEARBUFFERUIVPROC __glewClearBufferuiv;
+GLEW_FUN_EXPORT PFNGLCOLORMASKIPROC __glewColorMaski;
+GLEW_FUN_EXPORT PFNGLDISABLEIPROC __glewDisablei;
+GLEW_FUN_EXPORT PFNGLENABLEIPROC __glewEnablei;
+GLEW_FUN_EXPORT PFNGLENDCONDITIONALRENDERPROC __glewEndConditionalRender;
+GLEW_FUN_EXPORT PFNGLENDTRANSFORMFEEDBACKPROC __glewEndTransformFeedback;
+GLEW_FUN_EXPORT PFNGLGETBOOLEANI_VPROC __glewGetBooleani_v;
+GLEW_FUN_EXPORT PFNGLGETFRAGDATALOCATIONPROC __glewGetFragDataLocation;
+GLEW_FUN_EXPORT PFNGLGETINTEGERI_VPROC __glewGetIntegeri_v;
+GLEW_FUN_EXPORT PFNGLGETSTRINGIPROC __glewGetStringi;
+GLEW_FUN_EXPORT PFNGLGETTEXPARAMETERIIVPROC __glewGetTexParameterIiv;
+GLEW_FUN_EXPORT PFNGLGETTEXPARAMETERIUIVPROC __glewGetTexParameterIuiv;
+GLEW_FUN_EXPORT PFNGLGETTRANSFORMFEEDBACKVARYINGPROC __glewGetTransformFeedbackVarying;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMUIVPROC __glewGetUniformuiv;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBIIVPROC __glewGetVertexAttribIiv;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBIUIVPROC __glewGetVertexAttribIuiv;
+GLEW_FUN_EXPORT PFNGLISENABLEDIPROC __glewIsEnabledi;
+GLEW_FUN_EXPORT PFNGLTEXPARAMETERIIVPROC __glewTexParameterIiv;
+GLEW_FUN_EXPORT PFNGLTEXPARAMETERIUIVPROC __glewTexParameterIuiv;
+GLEW_FUN_EXPORT PFNGLTRANSFORMFEEDBACKVARYINGSPROC __glewTransformFeedbackVaryings;
+GLEW_FUN_EXPORT PFNGLUNIFORM1UIPROC __glewUniform1ui;
+GLEW_FUN_EXPORT PFNGLUNIFORM1UIVPROC __glewUniform1uiv;
+GLEW_FUN_EXPORT PFNGLUNIFORM2UIPROC __glewUniform2ui;
+GLEW_FUN_EXPORT PFNGLUNIFORM2UIVPROC __glewUniform2uiv;
+GLEW_FUN_EXPORT PFNGLUNIFORM3UIPROC __glewUniform3ui;
+GLEW_FUN_EXPORT PFNGLUNIFORM3UIVPROC __glewUniform3uiv;
+GLEW_FUN_EXPORT PFNGLUNIFORM4UIPROC __glewUniform4ui;
+GLEW_FUN_EXPORT PFNGLUNIFORM4UIVPROC __glewUniform4uiv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1IPROC __glewVertexAttribI1i;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1IVPROC __glewVertexAttribI1iv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1UIPROC __glewVertexAttribI1ui;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1UIVPROC __glewVertexAttribI1uiv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2IPROC __glewVertexAttribI2i;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2IVPROC __glewVertexAttribI2iv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2UIPROC __glewVertexAttribI2ui;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2UIVPROC __glewVertexAttribI2uiv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3IPROC __glewVertexAttribI3i;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3IVPROC __glewVertexAttribI3iv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3UIPROC __glewVertexAttribI3ui;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3UIVPROC __glewVertexAttribI3uiv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4BVPROC __glewVertexAttribI4bv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4IPROC __glewVertexAttribI4i;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4IVPROC __glewVertexAttribI4iv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4SVPROC __glewVertexAttribI4sv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4UBVPROC __glewVertexAttribI4ubv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4UIPROC __glewVertexAttribI4ui;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4UIVPROC __glewVertexAttribI4uiv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4USVPROC __glewVertexAttribI4usv;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBIPOINTERPROC __glewVertexAttribIPointer;
+
+GLEW_FUN_EXPORT PFNGLTBUFFERMASK3DFXPROC __glewTbufferMask3DFX;
+
+GLEW_FUN_EXPORT PFNGLDRAWELEMENTARRAYAPPLEPROC __glewDrawElementArrayAPPLE;
+GLEW_FUN_EXPORT PFNGLDRAWRANGEELEMENTARRAYAPPLEPROC __glewDrawRangeElementArrayAPPLE;
+GLEW_FUN_EXPORT PFNGLELEMENTPOINTERAPPLEPROC __glewElementPointerAPPLE;
+GLEW_FUN_EXPORT PFNGLMULTIDRAWELEMENTARRAYAPPLEPROC __glewMultiDrawElementArrayAPPLE;
+GLEW_FUN_EXPORT PFNGLMULTIDRAWRANGEELEMENTARRAYAPPLEPROC __glewMultiDrawRangeElementArrayAPPLE;
+
+GLEW_FUN_EXPORT PFNGLDELETEFENCESAPPLEPROC __glewDeleteFencesAPPLE;
+GLEW_FUN_EXPORT PFNGLFINISHFENCEAPPLEPROC __glewFinishFenceAPPLE;
+GLEW_FUN_EXPORT PFNGLFINISHOBJECTAPPLEPROC __glewFinishObjectAPPLE;
+GLEW_FUN_EXPORT PFNGLGENFENCESAPPLEPROC __glewGenFencesAPPLE;
+GLEW_FUN_EXPORT PFNGLISFENCEAPPLEPROC __glewIsFenceAPPLE;
+GLEW_FUN_EXPORT PFNGLSETFENCEAPPLEPROC __glewSetFenceAPPLE;
+GLEW_FUN_EXPORT PFNGLTESTFENCEAPPLEPROC __glewTestFenceAPPLE;
+GLEW_FUN_EXPORT PFNGLTESTOBJECTAPPLEPROC __glewTestObjectAPPLE;
+
+GLEW_FUN_EXPORT PFNGLBUFFERPARAMETERIAPPLEPROC __glewBufferParameteriAPPLE;
+GLEW_FUN_EXPORT PFNGLFLUSHMAPPEDBUFFERRANGEAPPLEPROC __glewFlushMappedBufferRangeAPPLE;
+
+GLEW_FUN_EXPORT PFNGLGETTEXPARAMETERPOINTERVAPPLEPROC __glewGetTexParameterPointervAPPLE;
+GLEW_FUN_EXPORT PFNGLTEXTURERANGEAPPLEPROC __glewTextureRangeAPPLE;
+
+GLEW_FUN_EXPORT PFNGLBINDVERTEXARRAYAPPLEPROC __glewBindVertexArrayAPPLE;
+GLEW_FUN_EXPORT PFNGLDELETEVERTEXARRAYSAPPLEPROC __glewDeleteVertexArraysAPPLE;
+GLEW_FUN_EXPORT PFNGLGENVERTEXARRAYSAPPLEPROC __glewGenVertexArraysAPPLE;
+GLEW_FUN_EXPORT PFNGLISVERTEXARRAYAPPLEPROC __glewIsVertexArrayAPPLE;
+
+GLEW_FUN_EXPORT PFNGLFLUSHVERTEXARRAYRANGEAPPLEPROC __glewFlushVertexArrayRangeAPPLE;
+GLEW_FUN_EXPORT PFNGLVERTEXARRAYPARAMETERIAPPLEPROC __glewVertexArrayParameteriAPPLE;
+GLEW_FUN_EXPORT PFNGLVERTEXARRAYRANGEAPPLEPROC __glewVertexArrayRangeAPPLE;
+
+GLEW_FUN_EXPORT PFNGLCLAMPCOLORARBPROC __glewClampColorARB;
+
+GLEW_FUN_EXPORT PFNGLDRAWBUFFERSARBPROC __glewDrawBuffersARB;
+
+GLEW_FUN_EXPORT PFNGLDRAWARRAYSINSTANCEDARBPROC __glewDrawArraysInstancedARB;
+GLEW_FUN_EXPORT PFNGLDRAWELEMENTSINSTANCEDARBPROC __glewDrawElementsInstancedARB;
+
+GLEW_FUN_EXPORT PFNGLBINDFRAMEBUFFERPROC __glewBindFramebuffer;
+GLEW_FUN_EXPORT PFNGLBINDRENDERBUFFERPROC __glewBindRenderbuffer;
+GLEW_FUN_EXPORT PFNGLBLITFRAMEBUFFERPROC __glewBlitFramebuffer;
+GLEW_FUN_EXPORT PFNGLCHECKFRAMEBUFFERSTATUSPROC __glewCheckFramebufferStatus;
+GLEW_FUN_EXPORT PFNGLDELETEFRAMEBUFFERSPROC __glewDeleteFramebuffers;
+GLEW_FUN_EXPORT PFNGLDELETERENDERBUFFERSPROC __glewDeleteRenderbuffers;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERRENDERBUFFERPROC __glewFramebufferRenderbuffer;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURLAYERPROC __glewFramebufferTexturLayer;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURE1DPROC __glewFramebufferTexture1D;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURE2DPROC __glewFramebufferTexture2D;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURE3DPROC __glewFramebufferTexture3D;
+GLEW_FUN_EXPORT PFNGLGENFRAMEBUFFERSPROC __glewGenFramebuffers;
+GLEW_FUN_EXPORT PFNGLGENRENDERBUFFERSPROC __glewGenRenderbuffers;
+GLEW_FUN_EXPORT PFNGLGENERATEMIPMAPPROC __glewGenerateMipmap;
+GLEW_FUN_EXPORT PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC __glewGetFramebufferAttachmentParameteriv;
+GLEW_FUN_EXPORT PFNGLGETRENDERBUFFERPARAMETERIVPROC __glewGetRenderbufferParameteriv;
+GLEW_FUN_EXPORT PFNGLISFRAMEBUFFERPROC __glewIsFramebuffer;
+GLEW_FUN_EXPORT PFNGLISRENDERBUFFERPROC __glewIsRenderbuffer;
+GLEW_FUN_EXPORT PFNGLRENDERBUFFERSTORAGEPROC __glewRenderbufferStorage;
+GLEW_FUN_EXPORT PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC __glewRenderbufferStorageMultisample;
+
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTUREARBPROC __glewFramebufferTextureARB;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTUREFACEARBPROC __glewFramebufferTextureFaceARB;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURELAYERARBPROC __glewFramebufferTextureLayerARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETERIARBPROC __glewProgramParameteriARB;
+
+GLEW_FUN_EXPORT PFNGLCOLORSUBTABLEPROC __glewColorSubTable;
+GLEW_FUN_EXPORT PFNGLCOLORTABLEPROC __glewColorTable;
+GLEW_FUN_EXPORT PFNGLCOLORTABLEPARAMETERFVPROC __glewColorTableParameterfv;
+GLEW_FUN_EXPORT PFNGLCOLORTABLEPARAMETERIVPROC __glewColorTableParameteriv;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONFILTER1DPROC __glewConvolutionFilter1D;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONFILTER2DPROC __glewConvolutionFilter2D;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERFPROC __glewConvolutionParameterf;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERFVPROC __glewConvolutionParameterfv;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERIPROC __glewConvolutionParameteri;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERIVPROC __glewConvolutionParameteriv;
+GLEW_FUN_EXPORT PFNGLCOPYCOLORSUBTABLEPROC __glewCopyColorSubTable;
+GLEW_FUN_EXPORT PFNGLCOPYCOLORTABLEPROC __glewCopyColorTable;
+GLEW_FUN_EXPORT PFNGLCOPYCONVOLUTIONFILTER1DPROC __glewCopyConvolutionFilter1D;
+GLEW_FUN_EXPORT PFNGLCOPYCONVOLUTIONFILTER2DPROC __glewCopyConvolutionFilter2D;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEPROC __glewGetColorTable;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEPARAMETERFVPROC __glewGetColorTableParameterfv;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEPARAMETERIVPROC __glewGetColorTableParameteriv;
+GLEW_FUN_EXPORT PFNGLGETCONVOLUTIONFILTERPROC __glewGetConvolutionFilter;
+GLEW_FUN_EXPORT PFNGLGETCONVOLUTIONPARAMETERFVPROC __glewGetConvolutionParameterfv;
+GLEW_FUN_EXPORT PFNGLGETCONVOLUTIONPARAMETERIVPROC __glewGetConvolutionParameteriv;
+GLEW_FUN_EXPORT PFNGLGETHISTOGRAMPROC __glewGetHistogram;
+GLEW_FUN_EXPORT PFNGLGETHISTOGRAMPARAMETERFVPROC __glewGetHistogramParameterfv;
+GLEW_FUN_EXPORT PFNGLGETHISTOGRAMPARAMETERIVPROC __glewGetHistogramParameteriv;
+GLEW_FUN_EXPORT PFNGLGETMINMAXPROC __glewGetMinmax;
+GLEW_FUN_EXPORT PFNGLGETMINMAXPARAMETERFVPROC __glewGetMinmaxParameterfv;
+GLEW_FUN_EXPORT PFNGLGETMINMAXPARAMETERIVPROC __glewGetMinmaxParameteriv;
+GLEW_FUN_EXPORT PFNGLGETSEPARABLEFILTERPROC __glewGetSeparableFilter;
+GLEW_FUN_EXPORT PFNGLHISTOGRAMPROC __glewHistogram;
+GLEW_FUN_EXPORT PFNGLMINMAXPROC __glewMinmax;
+GLEW_FUN_EXPORT PFNGLRESETHISTOGRAMPROC __glewResetHistogram;
+GLEW_FUN_EXPORT PFNGLRESETMINMAXPROC __glewResetMinmax;
+GLEW_FUN_EXPORT PFNGLSEPARABLEFILTER2DPROC __glewSeparableFilter2D;
+
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBDIVISORARBPROC __glewVertexAttribDivisorARB;
+
+GLEW_FUN_EXPORT PFNGLFLUSHMAPPEDBUFFERRANGEPROC __glewFlushMappedBufferRange;
+GLEW_FUN_EXPORT PFNGLMAPBUFFERRANGEPROC __glewMapBufferRange;
+
+GLEW_FUN_EXPORT PFNGLCURRENTPALETTEMATRIXARBPROC __glewCurrentPaletteMatrixARB;
+GLEW_FUN_EXPORT PFNGLMATRIXINDEXPOINTERARBPROC __glewMatrixIndexPointerARB;
+GLEW_FUN_EXPORT PFNGLMATRIXINDEXUBVARBPROC __glewMatrixIndexubvARB;
+GLEW_FUN_EXPORT PFNGLMATRIXINDEXUIVARBPROC __glewMatrixIndexuivARB;
+GLEW_FUN_EXPORT PFNGLMATRIXINDEXUSVARBPROC __glewMatrixIndexusvARB;
+
+GLEW_FUN_EXPORT PFNGLSAMPLECOVERAGEARBPROC __glewSampleCoverageARB;
+
+GLEW_FUN_EXPORT PFNGLACTIVETEXTUREARBPROC __glewActiveTextureARB;
+GLEW_FUN_EXPORT PFNGLCLIENTACTIVETEXTUREARBPROC __glewClientActiveTextureARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1DARBPROC __glewMultiTexCoord1dARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1DVARBPROC __glewMultiTexCoord1dvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1FARBPROC __glewMultiTexCoord1fARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1FVARBPROC __glewMultiTexCoord1fvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1IARBPROC __glewMultiTexCoord1iARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1IVARBPROC __glewMultiTexCoord1ivARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1SARBPROC __glewMultiTexCoord1sARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1SVARBPROC __glewMultiTexCoord1svARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2DARBPROC __glewMultiTexCoord2dARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2DVARBPROC __glewMultiTexCoord2dvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2FARBPROC __glewMultiTexCoord2fARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2FVARBPROC __glewMultiTexCoord2fvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2IARBPROC __glewMultiTexCoord2iARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2IVARBPROC __glewMultiTexCoord2ivARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2SARBPROC __glewMultiTexCoord2sARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2SVARBPROC __glewMultiTexCoord2svARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3DARBPROC __glewMultiTexCoord3dARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3DVARBPROC __glewMultiTexCoord3dvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3FARBPROC __glewMultiTexCoord3fARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3FVARBPROC __glewMultiTexCoord3fvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3IARBPROC __glewMultiTexCoord3iARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3IVARBPROC __glewMultiTexCoord3ivARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3SARBPROC __glewMultiTexCoord3sARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3SVARBPROC __glewMultiTexCoord3svARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4DARBPROC __glewMultiTexCoord4dARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4DVARBPROC __glewMultiTexCoord4dvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4FARBPROC __glewMultiTexCoord4fARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4FVARBPROC __glewMultiTexCoord4fvARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4IARBPROC __glewMultiTexCoord4iARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4IVARBPROC __glewMultiTexCoord4ivARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4SARBPROC __glewMultiTexCoord4sARB;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4SVARBPROC __glewMultiTexCoord4svARB;
+
+GLEW_FUN_EXPORT PFNGLBEGINQUERYARBPROC __glewBeginQueryARB;
+GLEW_FUN_EXPORT PFNGLDELETEQUERIESARBPROC __glewDeleteQueriesARB;
+GLEW_FUN_EXPORT PFNGLENDQUERYARBPROC __glewEndQueryARB;
+GLEW_FUN_EXPORT PFNGLGENQUERIESARBPROC __glewGenQueriesARB;
+GLEW_FUN_EXPORT PFNGLGETQUERYOBJECTIVARBPROC __glewGetQueryObjectivARB;
+GLEW_FUN_EXPORT PFNGLGETQUERYOBJECTUIVARBPROC __glewGetQueryObjectuivARB;
+GLEW_FUN_EXPORT PFNGLGETQUERYIVARBPROC __glewGetQueryivARB;
+GLEW_FUN_EXPORT PFNGLISQUERYARBPROC __glewIsQueryARB;
+
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERFARBPROC __glewPointParameterfARB;
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERFVARBPROC __glewPointParameterfvARB;
+
+GLEW_FUN_EXPORT PFNGLATTACHOBJECTARBPROC __glewAttachObjectARB;
+GLEW_FUN_EXPORT PFNGLCOMPILESHADERARBPROC __glewCompileShaderARB;
+GLEW_FUN_EXPORT PFNGLCREATEPROGRAMOBJECTARBPROC __glewCreateProgramObjectARB;
+GLEW_FUN_EXPORT PFNGLCREATESHADEROBJECTARBPROC __glewCreateShaderObjectARB;
+GLEW_FUN_EXPORT PFNGLDELETEOBJECTARBPROC __glewDeleteObjectARB;
+GLEW_FUN_EXPORT PFNGLDETACHOBJECTARBPROC __glewDetachObjectARB;
+GLEW_FUN_EXPORT PFNGLGETACTIVEUNIFORMARBPROC __glewGetActiveUniformARB;
+GLEW_FUN_EXPORT PFNGLGETATTACHEDOBJECTSARBPROC __glewGetAttachedObjectsARB;
+GLEW_FUN_EXPORT PFNGLGETHANDLEARBPROC __glewGetHandleARB;
+GLEW_FUN_EXPORT PFNGLGETINFOLOGARBPROC __glewGetInfoLogARB;
+GLEW_FUN_EXPORT PFNGLGETOBJECTPARAMETERFVARBPROC __glewGetObjectParameterfvARB;
+GLEW_FUN_EXPORT PFNGLGETOBJECTPARAMETERIVARBPROC __glewGetObjectParameterivARB;
+GLEW_FUN_EXPORT PFNGLGETSHADERSOURCEARBPROC __glewGetShaderSourceARB;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMLOCATIONARBPROC __glewGetUniformLocationARB;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMFVARBPROC __glewGetUniformfvARB;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMIVARBPROC __glewGetUniformivARB;
+GLEW_FUN_EXPORT PFNGLLINKPROGRAMARBPROC __glewLinkProgramARB;
+GLEW_FUN_EXPORT PFNGLSHADERSOURCEARBPROC __glewShaderSourceARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM1FARBPROC __glewUniform1fARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM1FVARBPROC __glewUniform1fvARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM1IARBPROC __glewUniform1iARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM1IVARBPROC __glewUniform1ivARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM2FARBPROC __glewUniform2fARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM2FVARBPROC __glewUniform2fvARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM2IARBPROC __glewUniform2iARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM2IVARBPROC __glewUniform2ivARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM3FARBPROC __glewUniform3fARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM3FVARBPROC __glewUniform3fvARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM3IARBPROC __glewUniform3iARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM3IVARBPROC __glewUniform3ivARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM4FARBPROC __glewUniform4fARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM4FVARBPROC __glewUniform4fvARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM4IARBPROC __glewUniform4iARB;
+GLEW_FUN_EXPORT PFNGLUNIFORM4IVARBPROC __glewUniform4ivARB;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX2FVARBPROC __glewUniformMatrix2fvARB;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX3FVARBPROC __glewUniformMatrix3fvARB;
+GLEW_FUN_EXPORT PFNGLUNIFORMMATRIX4FVARBPROC __glewUniformMatrix4fvARB;
+GLEW_FUN_EXPORT PFNGLUSEPROGRAMOBJECTARBPROC __glewUseProgramObjectARB;
+GLEW_FUN_EXPORT PFNGLVALIDATEPROGRAMARBPROC __glewValidateProgramARB;
+
+GLEW_FUN_EXPORT PFNGLTEXBUFFERARBPROC __glewTexBufferARB;
+
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXIMAGE1DARBPROC __glewCompressedTexImage1DARB;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXIMAGE2DARBPROC __glewCompressedTexImage2DARB;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXIMAGE3DARBPROC __glewCompressedTexImage3DARB;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC __glewCompressedTexSubImage1DARB;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC __glewCompressedTexSubImage2DARB;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC __glewCompressedTexSubImage3DARB;
+GLEW_FUN_EXPORT PFNGLGETCOMPRESSEDTEXIMAGEARBPROC __glewGetCompressedTexImageARB;
+
+GLEW_FUN_EXPORT PFNGLLOADTRANSPOSEMATRIXDARBPROC __glewLoadTransposeMatrixdARB;
+GLEW_FUN_EXPORT PFNGLLOADTRANSPOSEMATRIXFARBPROC __glewLoadTransposeMatrixfARB;
+GLEW_FUN_EXPORT PFNGLMULTTRANSPOSEMATRIXDARBPROC __glewMultTransposeMatrixdARB;
+GLEW_FUN_EXPORT PFNGLMULTTRANSPOSEMATRIXFARBPROC __glewMultTransposeMatrixfARB;
+
+GLEW_FUN_EXPORT PFNGLBINDVERTEXARRAYPROC __glewBindVertexArray;
+GLEW_FUN_EXPORT PFNGLDELETEVERTEXARRAYSPROC __glewDeleteVertexArrays;
+GLEW_FUN_EXPORT PFNGLGENVERTEXARRAYSPROC __glewGenVertexArrays;
+GLEW_FUN_EXPORT PFNGLISVERTEXARRAYPROC __glewIsVertexArray;
+
+GLEW_FUN_EXPORT PFNGLVERTEXBLENDARBPROC __glewVertexBlendARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTPOINTERARBPROC __glewWeightPointerARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTBVARBPROC __glewWeightbvARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTDVARBPROC __glewWeightdvARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTFVARBPROC __glewWeightfvARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTIVARBPROC __glewWeightivARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTSVARBPROC __glewWeightsvARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTUBVARBPROC __glewWeightubvARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTUIVARBPROC __glewWeightuivARB;
+GLEW_FUN_EXPORT PFNGLWEIGHTUSVARBPROC __glewWeightusvARB;
+
+GLEW_FUN_EXPORT PFNGLBINDBUFFERARBPROC __glewBindBufferARB;
+GLEW_FUN_EXPORT PFNGLBUFFERDATAARBPROC __glewBufferDataARB;
+GLEW_FUN_EXPORT PFNGLBUFFERSUBDATAARBPROC __glewBufferSubDataARB;
+GLEW_FUN_EXPORT PFNGLDELETEBUFFERSARBPROC __glewDeleteBuffersARB;
+GLEW_FUN_EXPORT PFNGLGENBUFFERSARBPROC __glewGenBuffersARB;
+GLEW_FUN_EXPORT PFNGLGETBUFFERPARAMETERIVARBPROC __glewGetBufferParameterivARB;
+GLEW_FUN_EXPORT PFNGLGETBUFFERPOINTERVARBPROC __glewGetBufferPointervARB;
+GLEW_FUN_EXPORT PFNGLGETBUFFERSUBDATAARBPROC __glewGetBufferSubDataARB;
+GLEW_FUN_EXPORT PFNGLISBUFFERARBPROC __glewIsBufferARB;
+GLEW_FUN_EXPORT PFNGLMAPBUFFERARBPROC __glewMapBufferARB;
+GLEW_FUN_EXPORT PFNGLUNMAPBUFFERARBPROC __glewUnmapBufferARB;
+
+GLEW_FUN_EXPORT PFNGLBINDPROGRAMARBPROC __glewBindProgramARB;
+GLEW_FUN_EXPORT PFNGLDELETEPROGRAMSARBPROC __glewDeleteProgramsARB;
+GLEW_FUN_EXPORT PFNGLDISABLEVERTEXATTRIBARRAYARBPROC __glewDisableVertexAttribArrayARB;
+GLEW_FUN_EXPORT PFNGLENABLEVERTEXATTRIBARRAYARBPROC __glewEnableVertexAttribArrayARB;
+GLEW_FUN_EXPORT PFNGLGENPROGRAMSARBPROC __glewGenProgramsARB;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMENVPARAMETERDVARBPROC __glewGetProgramEnvParameterdvARB;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMENVPARAMETERFVARBPROC __glewGetProgramEnvParameterfvARB;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC __glewGetProgramLocalParameterdvARB;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC __glewGetProgramLocalParameterfvARB;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMSTRINGARBPROC __glewGetProgramStringARB;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMIVARBPROC __glewGetProgramivARB;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBPOINTERVARBPROC __glewGetVertexAttribPointervARB;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBDVARBPROC __glewGetVertexAttribdvARB;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBFVARBPROC __glewGetVertexAttribfvARB;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBIVARBPROC __glewGetVertexAttribivARB;
+GLEW_FUN_EXPORT PFNGLISPROGRAMARBPROC __glewIsProgramARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETER4DARBPROC __glewProgramEnvParameter4dARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETER4DVARBPROC __glewProgramEnvParameter4dvARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETER4FARBPROC __glewProgramEnvParameter4fARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETER4FVARBPROC __glewProgramEnvParameter4fvARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETER4DARBPROC __glewProgramLocalParameter4dARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETER4DVARBPROC __glewProgramLocalParameter4dvARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETER4FARBPROC __glewProgramLocalParameter4fARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETER4FVARBPROC __glewProgramLocalParameter4fvARB;
+GLEW_FUN_EXPORT PFNGLPROGRAMSTRINGARBPROC __glewProgramStringARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1DARBPROC __glewVertexAttrib1dARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1DVARBPROC __glewVertexAttrib1dvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1FARBPROC __glewVertexAttrib1fARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1FVARBPROC __glewVertexAttrib1fvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1SARBPROC __glewVertexAttrib1sARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1SVARBPROC __glewVertexAttrib1svARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2DARBPROC __glewVertexAttrib2dARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2DVARBPROC __glewVertexAttrib2dvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2FARBPROC __glewVertexAttrib2fARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2FVARBPROC __glewVertexAttrib2fvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2SARBPROC __glewVertexAttrib2sARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2SVARBPROC __glewVertexAttrib2svARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3DARBPROC __glewVertexAttrib3dARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3DVARBPROC __glewVertexAttrib3dvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3FARBPROC __glewVertexAttrib3fARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3FVARBPROC __glewVertexAttrib3fvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3SARBPROC __glewVertexAttrib3sARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3SVARBPROC __glewVertexAttrib3svARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NBVARBPROC __glewVertexAttrib4NbvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NIVARBPROC __glewVertexAttrib4NivARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NSVARBPROC __glewVertexAttrib4NsvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUBARBPROC __glewVertexAttrib4NubARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUBVARBPROC __glewVertexAttrib4NubvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUIVARBPROC __glewVertexAttrib4NuivARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4NUSVARBPROC __glewVertexAttrib4NusvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4BVARBPROC __glewVertexAttrib4bvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4DARBPROC __glewVertexAttrib4dARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4DVARBPROC __glewVertexAttrib4dvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4FARBPROC __glewVertexAttrib4fARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4FVARBPROC __glewVertexAttrib4fvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4IVARBPROC __glewVertexAttrib4ivARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4SARBPROC __glewVertexAttrib4sARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4SVARBPROC __glewVertexAttrib4svARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4UBVARBPROC __glewVertexAttrib4ubvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4UIVARBPROC __glewVertexAttrib4uivARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4USVARBPROC __glewVertexAttrib4usvARB;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBPOINTERARBPROC __glewVertexAttribPointerARB;
+
+GLEW_FUN_EXPORT PFNGLBINDATTRIBLOCATIONARBPROC __glewBindAttribLocationARB;
+GLEW_FUN_EXPORT PFNGLGETACTIVEATTRIBARBPROC __glewGetActiveAttribARB;
+GLEW_FUN_EXPORT PFNGLGETATTRIBLOCATIONARBPROC __glewGetAttribLocationARB;
+
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2DARBPROC __glewWindowPos2dARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2DVARBPROC __glewWindowPos2dvARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2FARBPROC __glewWindowPos2fARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2FVARBPROC __glewWindowPos2fvARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2IARBPROC __glewWindowPos2iARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2IVARBPROC __glewWindowPos2ivARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2SARBPROC __glewWindowPos2sARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2SVARBPROC __glewWindowPos2svARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3DARBPROC __glewWindowPos3dARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3DVARBPROC __glewWindowPos3dvARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3FARBPROC __glewWindowPos3fARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3FVARBPROC __glewWindowPos3fvARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3IARBPROC __glewWindowPos3iARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3IVARBPROC __glewWindowPos3ivARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3SARBPROC __glewWindowPos3sARB;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3SVARBPROC __glewWindowPos3svARB;
+
+GLEW_FUN_EXPORT PFNGLDRAWBUFFERSATIPROC __glewDrawBuffersATI;
+
+GLEW_FUN_EXPORT PFNGLDRAWELEMENTARRAYATIPROC __glewDrawElementArrayATI;
+GLEW_FUN_EXPORT PFNGLDRAWRANGEELEMENTARRAYATIPROC __glewDrawRangeElementArrayATI;
+GLEW_FUN_EXPORT PFNGLELEMENTPOINTERATIPROC __glewElementPointerATI;
+
+GLEW_FUN_EXPORT PFNGLGETTEXBUMPPARAMETERFVATIPROC __glewGetTexBumpParameterfvATI;
+GLEW_FUN_EXPORT PFNGLGETTEXBUMPPARAMETERIVATIPROC __glewGetTexBumpParameterivATI;
+GLEW_FUN_EXPORT PFNGLTEXBUMPPARAMETERFVATIPROC __glewTexBumpParameterfvATI;
+GLEW_FUN_EXPORT PFNGLTEXBUMPPARAMETERIVATIPROC __glewTexBumpParameterivATI;
+
+GLEW_FUN_EXPORT PFNGLALPHAFRAGMENTOP1ATIPROC __glewAlphaFragmentOp1ATI;
+GLEW_FUN_EXPORT PFNGLALPHAFRAGMENTOP2ATIPROC __glewAlphaFragmentOp2ATI;
+GLEW_FUN_EXPORT PFNGLALPHAFRAGMENTOP3ATIPROC __glewAlphaFragmentOp3ATI;
+GLEW_FUN_EXPORT PFNGLBEGINFRAGMENTSHADERATIPROC __glewBeginFragmentShaderATI;
+GLEW_FUN_EXPORT PFNGLBINDFRAGMENTSHADERATIPROC __glewBindFragmentShaderATI;
+GLEW_FUN_EXPORT PFNGLCOLORFRAGMENTOP1ATIPROC __glewColorFragmentOp1ATI;
+GLEW_FUN_EXPORT PFNGLCOLORFRAGMENTOP2ATIPROC __glewColorFragmentOp2ATI;
+GLEW_FUN_EXPORT PFNGLCOLORFRAGMENTOP3ATIPROC __glewColorFragmentOp3ATI;
+GLEW_FUN_EXPORT PFNGLDELETEFRAGMENTSHADERATIPROC __glewDeleteFragmentShaderATI;
+GLEW_FUN_EXPORT PFNGLENDFRAGMENTSHADERATIPROC __glewEndFragmentShaderATI;
+GLEW_FUN_EXPORT PFNGLGENFRAGMENTSHADERSATIPROC __glewGenFragmentShadersATI;
+GLEW_FUN_EXPORT PFNGLPASSTEXCOORDATIPROC __glewPassTexCoordATI;
+GLEW_FUN_EXPORT PFNGLSAMPLEMAPATIPROC __glewSampleMapATI;
+GLEW_FUN_EXPORT PFNGLSETFRAGMENTSHADERCONSTANTATIPROC __glewSetFragmentShaderConstantATI;
+
+GLEW_FUN_EXPORT PFNGLMAPOBJECTBUFFERATIPROC __glewMapObjectBufferATI;
+GLEW_FUN_EXPORT PFNGLUNMAPOBJECTBUFFERATIPROC __glewUnmapObjectBufferATI;
+
+GLEW_FUN_EXPORT PFNGLPNTRIANGLESFATIPROC __glPNTrianglewesfATI;
+GLEW_FUN_EXPORT PFNGLPNTRIANGLESIATIPROC __glPNTrianglewesiATI;
+
+GLEW_FUN_EXPORT PFNGLSTENCILFUNCSEPARATEATIPROC __glewStencilFuncSeparateATI;
+GLEW_FUN_EXPORT PFNGLSTENCILOPSEPARATEATIPROC __glewStencilOpSeparateATI;
+
+GLEW_FUN_EXPORT PFNGLARRAYOBJECTATIPROC __glewArrayObjectATI;
+GLEW_FUN_EXPORT PFNGLFREEOBJECTBUFFERATIPROC __glewFreeObjectBufferATI;
+GLEW_FUN_EXPORT PFNGLGETARRAYOBJECTFVATIPROC __glewGetArrayObjectfvATI;
+GLEW_FUN_EXPORT PFNGLGETARRAYOBJECTIVATIPROC __glewGetArrayObjectivATI;
+GLEW_FUN_EXPORT PFNGLGETOBJECTBUFFERFVATIPROC __glewGetObjectBufferfvATI;
+GLEW_FUN_EXPORT PFNGLGETOBJECTBUFFERIVATIPROC __glewGetObjectBufferivATI;
+GLEW_FUN_EXPORT PFNGLGETVARIANTARRAYOBJECTFVATIPROC __glewGetVariantArrayObjectfvATI;
+GLEW_FUN_EXPORT PFNGLGETVARIANTARRAYOBJECTIVATIPROC __glewGetVariantArrayObjectivATI;
+GLEW_FUN_EXPORT PFNGLISOBJECTBUFFERATIPROC __glewIsObjectBufferATI;
+GLEW_FUN_EXPORT PFNGLNEWOBJECTBUFFERATIPROC __glewNewObjectBufferATI;
+GLEW_FUN_EXPORT PFNGLUPDATEOBJECTBUFFERATIPROC __glewUpdateObjectBufferATI;
+GLEW_FUN_EXPORT PFNGLVARIANTARRAYOBJECTATIPROC __glewVariantArrayObjectATI;
+
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBARRAYOBJECTFVATIPROC __glewGetVertexAttribArrayObjectfvATI;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBARRAYOBJECTIVATIPROC __glewGetVertexAttribArrayObjectivATI;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBARRAYOBJECTATIPROC __glewVertexAttribArrayObjectATI;
+
+GLEW_FUN_EXPORT PFNGLCLIENTACTIVEVERTEXSTREAMATIPROC __glewClientActiveVertexStreamATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3BATIPROC __glewNormalStream3bATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3BVATIPROC __glewNormalStream3bvATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3DATIPROC __glewNormalStream3dATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3DVATIPROC __glewNormalStream3dvATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3FATIPROC __glewNormalStream3fATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3FVATIPROC __glewNormalStream3fvATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3IATIPROC __glewNormalStream3iATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3IVATIPROC __glewNormalStream3ivATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3SATIPROC __glewNormalStream3sATI;
+GLEW_FUN_EXPORT PFNGLNORMALSTREAM3SVATIPROC __glewNormalStream3svATI;
+GLEW_FUN_EXPORT PFNGLVERTEXBLENDENVFATIPROC __glewVertexBlendEnvfATI;
+GLEW_FUN_EXPORT PFNGLVERTEXBLENDENVIATIPROC __glewVertexBlendEnviATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2DATIPROC __glewVertexStream2dATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2DVATIPROC __glewVertexStream2dvATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2FATIPROC __glewVertexStream2fATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2FVATIPROC __glewVertexStream2fvATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2IATIPROC __glewVertexStream2iATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2IVATIPROC __glewVertexStream2ivATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2SATIPROC __glewVertexStream2sATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM2SVATIPROC __glewVertexStream2svATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3DATIPROC __glewVertexStream3dATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3DVATIPROC __glewVertexStream3dvATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3FATIPROC __glewVertexStream3fATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3FVATIPROC __glewVertexStream3fvATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3IATIPROC __glewVertexStream3iATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3IVATIPROC __glewVertexStream3ivATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3SATIPROC __glewVertexStream3sATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM3SVATIPROC __glewVertexStream3svATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4DATIPROC __glewVertexStream4dATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4DVATIPROC __glewVertexStream4dvATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4FATIPROC __glewVertexStream4fATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4FVATIPROC __glewVertexStream4fvATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4IATIPROC __glewVertexStream4iATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4IVATIPROC __glewVertexStream4ivATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4SATIPROC __glewVertexStream4sATI;
+GLEW_FUN_EXPORT PFNGLVERTEXSTREAM4SVATIPROC __glewVertexStream4svATI;
+
+GLEW_FUN_EXPORT PFNGLGETUNIFORMBUFFERSIZEEXTPROC __glewGetUniformBufferSizeEXT;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMOFFSETEXTPROC __glewGetUniformOffsetEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORMBUFFEREXTPROC __glewUniformBufferEXT;
+
+GLEW_FUN_EXPORT PFNGLBLENDCOLOREXTPROC __glewBlendColorEXT;
+
+GLEW_FUN_EXPORT PFNGLBLENDEQUATIONSEPARATEEXTPROC __glewBlendEquationSeparateEXT;
+
+GLEW_FUN_EXPORT PFNGLBLENDFUNCSEPARATEEXTPROC __glewBlendFuncSeparateEXT;
+
+GLEW_FUN_EXPORT PFNGLBLENDEQUATIONEXTPROC __glewBlendEquationEXT;
+
+GLEW_FUN_EXPORT PFNGLCOLORSUBTABLEEXTPROC __glewColorSubTableEXT;
+GLEW_FUN_EXPORT PFNGLCOPYCOLORSUBTABLEEXTPROC __glewCopyColorSubTableEXT;
+
+GLEW_FUN_EXPORT PFNGLLOCKARRAYSEXTPROC __glewLockArraysEXT;
+GLEW_FUN_EXPORT PFNGLUNLOCKARRAYSEXTPROC __glewUnlockArraysEXT;
+
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONFILTER1DEXTPROC __glewConvolutionFilter1DEXT;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONFILTER2DEXTPROC __glewConvolutionFilter2DEXT;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERFEXTPROC __glewConvolutionParameterfEXT;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERFVEXTPROC __glewConvolutionParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERIEXTPROC __glewConvolutionParameteriEXT;
+GLEW_FUN_EXPORT PFNGLCONVOLUTIONPARAMETERIVEXTPROC __glewConvolutionParameterivEXT;
+GLEW_FUN_EXPORT PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC __glewCopyConvolutionFilter1DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC __glewCopyConvolutionFilter2DEXT;
+GLEW_FUN_EXPORT PFNGLGETCONVOLUTIONFILTEREXTPROC __glewGetConvolutionFilterEXT;
+GLEW_FUN_EXPORT PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC __glewGetConvolutionParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC __glewGetConvolutionParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETSEPARABLEFILTEREXTPROC __glewGetSeparableFilterEXT;
+GLEW_FUN_EXPORT PFNGLSEPARABLEFILTER2DEXTPROC __glewSeparableFilter2DEXT;
+
+GLEW_FUN_EXPORT PFNGLBINORMALPOINTEREXTPROC __glewBinormalPointerEXT;
+GLEW_FUN_EXPORT PFNGLTANGENTPOINTEREXTPROC __glewTangentPointerEXT;
+
+GLEW_FUN_EXPORT PFNGLCOPYTEXIMAGE1DEXTPROC __glewCopyTexImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXIMAGE2DEXTPROC __glewCopyTexImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXSUBIMAGE1DEXTPROC __glewCopyTexSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXSUBIMAGE2DEXTPROC __glewCopyTexSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXSUBIMAGE3DEXTPROC __glewCopyTexSubImage3DEXT;
+
+GLEW_FUN_EXPORT PFNGLCULLPARAMETERDVEXTPROC __glewCullParameterdvEXT;
+GLEW_FUN_EXPORT PFNGLCULLPARAMETERFVEXTPROC __glewCullParameterfvEXT;
+
+GLEW_FUN_EXPORT PFNGLDEPTHBOUNDSEXTPROC __glewDepthBoundsEXT;
+
+GLEW_FUN_EXPORT PFNGLBINDMULTITEXTUREEXTPROC __glewBindMultiTextureEXT;
+GLEW_FUN_EXPORT PFNGLCHECKNAMEDFRAMEBUFFERSTATUSEXTPROC __glewCheckNamedFramebufferStatusEXT;
+GLEW_FUN_EXPORT PFNGLCLIENTATTRIBDEFAULTEXTPROC __glewClientAttribDefaultEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDMULTITEXIMAGE1DEXTPROC __glewCompressedMultiTexImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDMULTITEXIMAGE2DEXTPROC __glewCompressedMultiTexImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDMULTITEXIMAGE3DEXTPROC __glewCompressedMultiTexImage3DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDMULTITEXSUBIMAGE1DEXTPROC __glewCompressedMultiTexSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDMULTITEXSUBIMAGE2DEXTPROC __glewCompressedMultiTexSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDMULTITEXSUBIMAGE3DEXTPROC __glewCompressedMultiTexSubImage3DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXTUREIMAGE1DEXTPROC __glewCompressedTextureImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXTUREIMAGE2DEXTPROC __glewCompressedTextureImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXTUREIMAGE3DEXTPROC __glewCompressedTextureImage3DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXTURESUBIMAGE1DEXTPROC __glewCompressedTextureSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXTURESUBIMAGE2DEXTPROC __glewCompressedTextureSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOMPRESSEDTEXTURESUBIMAGE3DEXTPROC __glewCompressedTextureSubImage3DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYMULTITEXIMAGE1DEXTPROC __glewCopyMultiTexImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYMULTITEXIMAGE2DEXTPROC __glewCopyMultiTexImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYMULTITEXSUBIMAGE1DEXTPROC __glewCopyMultiTexSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYMULTITEXSUBIMAGE2DEXTPROC __glewCopyMultiTexSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYMULTITEXSUBIMAGE3DEXTPROC __glewCopyMultiTexSubImage3DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXTUREIMAGE1DEXTPROC __glewCopyTextureImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXTUREIMAGE2DEXTPROC __glewCopyTextureImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXTURESUBIMAGE1DEXTPROC __glewCopyTextureSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXTURESUBIMAGE2DEXTPROC __glewCopyTextureSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLCOPYTEXTURESUBIMAGE3DEXTPROC __glewCopyTextureSubImage3DEXT;
+GLEW_FUN_EXPORT PFNGLDISABLECLIENTSTATEINDEXEDEXTPROC __glewDisableClientStateIndexedEXT;
+GLEW_FUN_EXPORT PFNGLENABLECLIENTSTATEINDEXEDEXTPROC __glewEnableClientStateIndexedEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERDRAWBUFFEREXTPROC __glewFramebufferDrawBufferEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERDRAWBUFFERSEXTPROC __glewFramebufferDrawBuffersEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERREADBUFFEREXTPROC __glewFramebufferReadBufferEXT;
+GLEW_FUN_EXPORT PFNGLGENERATEMULTITEXMIPMAPEXTPROC __glewGenerateMultiTexMipmapEXT;
+GLEW_FUN_EXPORT PFNGLGENERATETEXTUREMIPMAPEXTPROC __glewGenerateTextureMipmapEXT;
+GLEW_FUN_EXPORT PFNGLGETCOMPRESSEDMULTITEXIMAGEEXTPROC __glewGetCompressedMultiTexImageEXT;
+GLEW_FUN_EXPORT PFNGLGETCOMPRESSEDTEXTUREIMAGEEXTPROC __glewGetCompressedTextureImageEXT;
+GLEW_FUN_EXPORT PFNGLGETDOUBLEINDEXEDVEXTPROC __glewGetDoubleIndexedvEXT;
+GLEW_FUN_EXPORT PFNGLGETFLOATINDEXEDVEXTPROC __glewGetFloatIndexedvEXT;
+GLEW_FUN_EXPORT PFNGLGETFRAMEBUFFERPARAMETERIVEXTPROC __glewGetFramebufferParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXENVFVEXTPROC __glewGetMultiTexEnvfvEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXENVIVEXTPROC __glewGetMultiTexEnvivEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXGENDVEXTPROC __glewGetMultiTexGendvEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXGENFVEXTPROC __glewGetMultiTexGenfvEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXGENIVEXTPROC __glewGetMultiTexGenivEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXIMAGEEXTPROC __glewGetMultiTexImageEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXLEVELPARAMETERFVEXTPROC __glewGetMultiTexLevelParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXLEVELPARAMETERIVEXTPROC __glewGetMultiTexLevelParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXPARAMETERIIVEXTPROC __glewGetMultiTexParameterIivEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXPARAMETERIUIVEXTPROC __glewGetMultiTexParameterIuivEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXPARAMETERFVEXTPROC __glewGetMultiTexParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETMULTITEXPARAMETERIVEXTPROC __glewGetMultiTexParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDBUFFERPARAMETERIVEXTPROC __glewGetNamedBufferParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDBUFFERPOINTERVEXTPROC __glewGetNamedBufferPointervEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDBUFFERSUBDATAEXTPROC __glewGetNamedBufferSubDataEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC __glewGetNamedFramebufferAttachmentParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDPROGRAMLOCALPARAMETERIIVEXTPROC __glewGetNamedProgramLocalParameterIivEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDPROGRAMLOCALPARAMETERIUIVEXTPROC __glewGetNamedProgramLocalParameterIuivEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDPROGRAMLOCALPARAMETERDVEXTPROC __glewGetNamedProgramLocalParameterdvEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDPROGRAMLOCALPARAMETERFVEXTPROC __glewGetNamedProgramLocalParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDPROGRAMSTRINGEXTPROC __glewGetNamedProgramStringEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDPROGRAMIVEXTPROC __glewGetNamedProgramivEXT;
+GLEW_FUN_EXPORT PFNGLGETNAMEDRENDERBUFFERPARAMETERIVEXTPROC __glewGetNamedRenderbufferParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETPOINTERINDEXEDVEXTPROC __glewGetPointerIndexedvEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXTUREIMAGEEXTPROC __glewGetTextureImageEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXTURELEVELPARAMETERFVEXTPROC __glewGetTextureLevelParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXTURELEVELPARAMETERIVEXTPROC __glewGetTextureLevelParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXTUREPARAMETERIIVEXTPROC __glewGetTextureParameterIivEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXTUREPARAMETERIUIVEXTPROC __glewGetTextureParameterIuivEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXTUREPARAMETERFVEXTPROC __glewGetTextureParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXTUREPARAMETERIVEXTPROC __glewGetTextureParameterivEXT;
+GLEW_FUN_EXPORT PFNGLMAPNAMEDBUFFEREXTPROC __glewMapNamedBufferEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXFRUSTUMEXTPROC __glewMatrixFrustumEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXLOADIDENTITYEXTPROC __glewMatrixLoadIdentityEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXLOADTRANSPOSEDEXTPROC __glewMatrixLoadTransposedEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXLOADTRANSPOSEFEXTPROC __glewMatrixLoadTransposefEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXLOADDEXTPROC __glewMatrixLoaddEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXLOADFEXTPROC __glewMatrixLoadfEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXMULTTRANSPOSEDEXTPROC __glewMatrixMultTransposedEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXMULTTRANSPOSEFEXTPROC __glewMatrixMultTransposefEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXMULTDEXTPROC __glewMatrixMultdEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXMULTFEXTPROC __glewMatrixMultfEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXORTHOEXTPROC __glewMatrixOrthoEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXPOPEXTPROC __glewMatrixPopEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXPUSHEXTPROC __glewMatrixPushEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXROTATEDEXTPROC __glewMatrixRotatedEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXROTATEFEXTPROC __glewMatrixRotatefEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXSCALEDEXTPROC __glewMatrixScaledEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXSCALEFEXTPROC __glewMatrixScalefEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXTRANSLATEDEXTPROC __glewMatrixTranslatedEXT;
+GLEW_FUN_EXPORT PFNGLMATRIXTRANSLATEFEXTPROC __glewMatrixTranslatefEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXBUFFEREXTPROC __glewMultiTexBufferEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORDPOINTEREXTPROC __glewMultiTexCoordPointerEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXENVFEXTPROC __glewMultiTexEnvfEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXENVFVEXTPROC __glewMultiTexEnvfvEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXENVIEXTPROC __glewMultiTexEnviEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXENVIVEXTPROC __glewMultiTexEnvivEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXGENDEXTPROC __glewMultiTexGendEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXGENDVEXTPROC __glewMultiTexGendvEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXGENFEXTPROC __glewMultiTexGenfEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXGENFVEXTPROC __glewMultiTexGenfvEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXGENIEXTPROC __glewMultiTexGeniEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXGENIVEXTPROC __glewMultiTexGenivEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXIMAGE1DEXTPROC __glewMultiTexImage1DEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXIMAGE2DEXTPROC __glewMultiTexImage2DEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXIMAGE3DEXTPROC __glewMultiTexImage3DEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXPARAMETERIIVEXTPROC __glewMultiTexParameterIivEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXPARAMETERIUIVEXTPROC __glewMultiTexParameterIuivEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXPARAMETERFEXTPROC __glewMultiTexParameterfEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXPARAMETERFVEXTPROC __glewMultiTexParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXPARAMETERIEXTPROC __glewMultiTexParameteriEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXPARAMETERIVEXTPROC __glewMultiTexParameterivEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXRENDERBUFFEREXTPROC __glewMultiTexRenderbufferEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXSUBIMAGE1DEXTPROC __glewMultiTexSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXSUBIMAGE2DEXTPROC __glewMultiTexSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLMULTITEXSUBIMAGE3DEXTPROC __glewMultiTexSubImage3DEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDBUFFERDATAEXTPROC __glewNamedBufferDataEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDBUFFERSUBDATAEXTPROC __glewNamedBufferSubDataEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDFRAMEBUFFERRENDERBUFFEREXTPROC __glewNamedFramebufferRenderbufferEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDFRAMEBUFFERTEXTURE1DEXTPROC __glewNamedFramebufferTexture1DEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDFRAMEBUFFERTEXTURE2DEXTPROC __glewNamedFramebufferTexture2DEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDFRAMEBUFFERTEXTURE3DEXTPROC __glewNamedFramebufferTexture3DEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDFRAMEBUFFERTEXTUREEXTPROC __glewNamedFramebufferTextureEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDFRAMEBUFFERTEXTUREFACEEXTPROC __glewNamedFramebufferTextureFaceEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDFRAMEBUFFERTEXTURELAYEREXTPROC __glewNamedFramebufferTextureLayerEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETER4DEXTPROC __glewNamedProgramLocalParameter4dEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETER4DVEXTPROC __glewNamedProgramLocalParameter4dvEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETER4FEXTPROC __glewNamedProgramLocalParameter4fEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETER4FVEXTPROC __glewNamedProgramLocalParameter4fvEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETERI4IEXTPROC __glewNamedProgramLocalParameterI4iEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETERI4IVEXTPROC __glewNamedProgramLocalParameterI4ivEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIEXTPROC __glewNamedProgramLocalParameterI4uiEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIVEXTPROC __glewNamedProgramLocalParameterI4uivEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETERS4FVEXTPROC __glewNamedProgramLocalParameters4fvEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETERSI4IVEXTPROC __glewNamedProgramLocalParametersI4ivEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMLOCALPARAMETERSI4UIVEXTPROC __glewNamedProgramLocalParametersI4uivEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDPROGRAMSTRINGEXTPROC __glewNamedProgramStringEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDRENDERBUFFERSTORAGEEXTPROC __glewNamedRenderbufferStorageEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLECOVERAGEEXTPROC __glewNamedRenderbufferStorageMultisampleCoverageEXT;
+GLEW_FUN_EXPORT PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC __glewNamedRenderbufferStorageMultisampleEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM1FEXTPROC __glewProgramUniform1fEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM1FVEXTPROC __glewProgramUniform1fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM1IEXTPROC __glewProgramUniform1iEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM1IVEXTPROC __glewProgramUniform1ivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM1UIEXTPROC __glewProgramUniform1uiEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM1UIVEXTPROC __glewProgramUniform1uivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM2FEXTPROC __glewProgramUniform2fEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM2FVEXTPROC __glewProgramUniform2fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM2IEXTPROC __glewProgramUniform2iEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM2IVEXTPROC __glewProgramUniform2ivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM2UIEXTPROC __glewProgramUniform2uiEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM2UIVEXTPROC __glewProgramUniform2uivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM3FEXTPROC __glewProgramUniform3fEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM3FVEXTPROC __glewProgramUniform3fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM3IEXTPROC __glewProgramUniform3iEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM3IVEXTPROC __glewProgramUniform3ivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM3UIEXTPROC __glewProgramUniform3uiEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM3UIVEXTPROC __glewProgramUniform3uivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM4FEXTPROC __glewProgramUniform4fEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM4FVEXTPROC __glewProgramUniform4fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM4IEXTPROC __glewProgramUniform4iEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM4IVEXTPROC __glewProgramUniform4ivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM4UIEXTPROC __glewProgramUniform4uiEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORM4UIVEXTPROC __glewProgramUniform4uivEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC __glewProgramUniformMatrix2fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC __glewProgramUniformMatrix2x3fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC __glewProgramUniformMatrix2x4fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC __glewProgramUniformMatrix3fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC __glewProgramUniformMatrix3x2fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC __glewProgramUniformMatrix3x4fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC __glewProgramUniformMatrix4fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC __glewProgramUniformMatrix4x2fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC __glewProgramUniformMatrix4x3fvEXT;
+GLEW_FUN_EXPORT PFNGLPUSHCLIENTATTRIBDEFAULTEXTPROC __glewPushClientAttribDefaultEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREBUFFEREXTPROC __glewTextureBufferEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREIMAGE1DEXTPROC __glewTextureImage1DEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREIMAGE2DEXTPROC __glewTextureImage2DEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREIMAGE3DEXTPROC __glewTextureImage3DEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREPARAMETERIIVEXTPROC __glewTextureParameterIivEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREPARAMETERIUIVEXTPROC __glewTextureParameterIuivEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREPARAMETERFEXTPROC __glewTextureParameterfEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREPARAMETERFVEXTPROC __glewTextureParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREPARAMETERIEXTPROC __glewTextureParameteriEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREPARAMETERIVEXTPROC __glewTextureParameterivEXT;
+GLEW_FUN_EXPORT PFNGLTEXTURERENDERBUFFEREXTPROC __glewTextureRenderbufferEXT;
+GLEW_FUN_EXPORT PFNGLTEXTURESUBIMAGE1DEXTPROC __glewTextureSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLTEXTURESUBIMAGE2DEXTPROC __glewTextureSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLTEXTURESUBIMAGE3DEXTPROC __glewTextureSubImage3DEXT;
+GLEW_FUN_EXPORT PFNGLUNMAPNAMEDBUFFEREXTPROC __glewUnmapNamedBufferEXT;
+
+GLEW_FUN_EXPORT PFNGLCOLORMASKINDEXEDEXTPROC __glewColorMaskIndexedEXT;
+GLEW_FUN_EXPORT PFNGLDISABLEINDEXEDEXTPROC __glewDisableIndexedEXT;
+GLEW_FUN_EXPORT PFNGLENABLEINDEXEDEXTPROC __glewEnableIndexedEXT;
+GLEW_FUN_EXPORT PFNGLGETBOOLEANINDEXEDVEXTPROC __glewGetBooleanIndexedvEXT;
+GLEW_FUN_EXPORT PFNGLGETINTEGERINDEXEDVEXTPROC __glewGetIntegerIndexedvEXT;
+GLEW_FUN_EXPORT PFNGLISENABLEDINDEXEDEXTPROC __glewIsEnabledIndexedEXT;
+
+GLEW_FUN_EXPORT PFNGLDRAWARRAYSINSTANCEDEXTPROC __glewDrawArraysInstancedEXT;
+GLEW_FUN_EXPORT PFNGLDRAWELEMENTSINSTANCEDEXTPROC __glewDrawElementsInstancedEXT;
+
+GLEW_FUN_EXPORT PFNGLDRAWRANGEELEMENTSEXTPROC __glewDrawRangeElementsEXT;
+
+GLEW_FUN_EXPORT PFNGLFOGCOORDPOINTEREXTPROC __glewFogCoordPointerEXT;
+GLEW_FUN_EXPORT PFNGLFOGCOORDDEXTPROC __glewFogCoorddEXT;
+GLEW_FUN_EXPORT PFNGLFOGCOORDDVEXTPROC __glewFogCoorddvEXT;
+GLEW_FUN_EXPORT PFNGLFOGCOORDFEXTPROC __glewFogCoordfEXT;
+GLEW_FUN_EXPORT PFNGLFOGCOORDFVEXTPROC __glewFogCoordfvEXT;
+
+GLEW_FUN_EXPORT PFNGLFRAGMENTCOLORMATERIALEXTPROC __glewFragmentColorMaterialEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELFEXTPROC __glewFragmentLightModelfEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELFVEXTPROC __glewFragmentLightModelfvEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELIEXTPROC __glewFragmentLightModeliEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELIVEXTPROC __glewFragmentLightModelivEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTFEXTPROC __glewFragmentLightfEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTFVEXTPROC __glewFragmentLightfvEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTIEXTPROC __glewFragmentLightiEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTIVEXTPROC __glewFragmentLightivEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALFEXTPROC __glewFragmentMaterialfEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALFVEXTPROC __glewFragmentMaterialfvEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALIEXTPROC __glewFragmentMaterialiEXT;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALIVEXTPROC __glewFragmentMaterialivEXT;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTLIGHTFVEXTPROC __glewGetFragmentLightfvEXT;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTLIGHTIVEXTPROC __glewGetFragmentLightivEXT;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTMATERIALFVEXTPROC __glewGetFragmentMaterialfvEXT;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTMATERIALIVEXTPROC __glewGetFragmentMaterialivEXT;
+GLEW_FUN_EXPORT PFNGLLIGHTENVIEXTPROC __glewLightEnviEXT;
+
+GLEW_FUN_EXPORT PFNGLBLITFRAMEBUFFEREXTPROC __glewBlitFramebufferEXT;
+
+GLEW_FUN_EXPORT PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC __glewRenderbufferStorageMultisampleEXT;
+
+GLEW_FUN_EXPORT PFNGLBINDFRAMEBUFFEREXTPROC __glewBindFramebufferEXT;
+GLEW_FUN_EXPORT PFNGLBINDRENDERBUFFEREXTPROC __glewBindRenderbufferEXT;
+GLEW_FUN_EXPORT PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC __glewCheckFramebufferStatusEXT;
+GLEW_FUN_EXPORT PFNGLDELETEFRAMEBUFFERSEXTPROC __glewDeleteFramebuffersEXT;
+GLEW_FUN_EXPORT PFNGLDELETERENDERBUFFERSEXTPROC __glewDeleteRenderbuffersEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC __glewFramebufferRenderbufferEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURE1DEXTPROC __glewFramebufferTexture1DEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURE2DEXTPROC __glewFramebufferTexture2DEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURE3DEXTPROC __glewFramebufferTexture3DEXT;
+GLEW_FUN_EXPORT PFNGLGENFRAMEBUFFERSEXTPROC __glewGenFramebuffersEXT;
+GLEW_FUN_EXPORT PFNGLGENRENDERBUFFERSEXTPROC __glewGenRenderbuffersEXT;
+GLEW_FUN_EXPORT PFNGLGENERATEMIPMAPEXTPROC __glewGenerateMipmapEXT;
+GLEW_FUN_EXPORT PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC __glewGetFramebufferAttachmentParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC __glewGetRenderbufferParameterivEXT;
+GLEW_FUN_EXPORT PFNGLISFRAMEBUFFEREXTPROC __glewIsFramebufferEXT;
+GLEW_FUN_EXPORT PFNGLISRENDERBUFFEREXTPROC __glewIsRenderbufferEXT;
+GLEW_FUN_EXPORT PFNGLRENDERBUFFERSTORAGEEXTPROC __glewRenderbufferStorageEXT;
+
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTUREEXTPROC __glewFramebufferTextureEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTUREFACEEXTPROC __glewFramebufferTextureFaceEXT;
+GLEW_FUN_EXPORT PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC __glewFramebufferTextureLayerEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETERIEXTPROC __glewProgramParameteriEXT;
+
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETERS4FVEXTPROC __glewProgramEnvParameters4fvEXT;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETERS4FVEXTPROC __glewProgramLocalParameters4fvEXT;
+
+GLEW_FUN_EXPORT PFNGLBINDFRAGDATALOCATIONEXTPROC __glewBindFragDataLocationEXT;
+GLEW_FUN_EXPORT PFNGLGETFRAGDATALOCATIONEXTPROC __glewGetFragDataLocationEXT;
+GLEW_FUN_EXPORT PFNGLGETUNIFORMUIVEXTPROC __glewGetUniformuivEXT;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBIIVEXTPROC __glewGetVertexAttribIivEXT;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBIUIVEXTPROC __glewGetVertexAttribIuivEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM1UIEXTPROC __glewUniform1uiEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM1UIVEXTPROC __glewUniform1uivEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM2UIEXTPROC __glewUniform2uiEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM2UIVEXTPROC __glewUniform2uivEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM3UIEXTPROC __glewUniform3uiEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM3UIVEXTPROC __glewUniform3uivEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM4UIEXTPROC __glewUniform4uiEXT;
+GLEW_FUN_EXPORT PFNGLUNIFORM4UIVEXTPROC __glewUniform4uivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1IEXTPROC __glewVertexAttribI1iEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1IVEXTPROC __glewVertexAttribI1ivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1UIEXTPROC __glewVertexAttribI1uiEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI1UIVEXTPROC __glewVertexAttribI1uivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2IEXTPROC __glewVertexAttribI2iEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2IVEXTPROC __glewVertexAttribI2ivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2UIEXTPROC __glewVertexAttribI2uiEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI2UIVEXTPROC __glewVertexAttribI2uivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3IEXTPROC __glewVertexAttribI3iEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3IVEXTPROC __glewVertexAttribI3ivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3UIEXTPROC __glewVertexAttribI3uiEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI3UIVEXTPROC __glewVertexAttribI3uivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4BVEXTPROC __glewVertexAttribI4bvEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4IEXTPROC __glewVertexAttribI4iEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4IVEXTPROC __glewVertexAttribI4ivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4SVEXTPROC __glewVertexAttribI4svEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4UBVEXTPROC __glewVertexAttribI4ubvEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4UIEXTPROC __glewVertexAttribI4uiEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4UIVEXTPROC __glewVertexAttribI4uivEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBI4USVEXTPROC __glewVertexAttribI4usvEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBIPOINTEREXTPROC __glewVertexAttribIPointerEXT;
+
+GLEW_FUN_EXPORT PFNGLGETHISTOGRAMEXTPROC __glewGetHistogramEXT;
+GLEW_FUN_EXPORT PFNGLGETHISTOGRAMPARAMETERFVEXTPROC __glewGetHistogramParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETHISTOGRAMPARAMETERIVEXTPROC __glewGetHistogramParameterivEXT;
+GLEW_FUN_EXPORT PFNGLGETMINMAXEXTPROC __glewGetMinmaxEXT;
+GLEW_FUN_EXPORT PFNGLGETMINMAXPARAMETERFVEXTPROC __glewGetMinmaxParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETMINMAXPARAMETERIVEXTPROC __glewGetMinmaxParameterivEXT;
+GLEW_FUN_EXPORT PFNGLHISTOGRAMEXTPROC __glewHistogramEXT;
+GLEW_FUN_EXPORT PFNGLMINMAXEXTPROC __glewMinmaxEXT;
+GLEW_FUN_EXPORT PFNGLRESETHISTOGRAMEXTPROC __glewResetHistogramEXT;
+GLEW_FUN_EXPORT PFNGLRESETMINMAXEXTPROC __glewResetMinmaxEXT;
+
+GLEW_FUN_EXPORT PFNGLINDEXFUNCEXTPROC __glewIndexFuncEXT;
+
+GLEW_FUN_EXPORT PFNGLINDEXMATERIALEXTPROC __glewIndexMaterialEXT;
+
+GLEW_FUN_EXPORT PFNGLAPPLYTEXTUREEXTPROC __glewApplyTextureEXT;
+GLEW_FUN_EXPORT PFNGLTEXTURELIGHTEXTPROC __glewTextureLightEXT;
+GLEW_FUN_EXPORT PFNGLTEXTUREMATERIALEXTPROC __glewTextureMaterialEXT;
+
+GLEW_FUN_EXPORT PFNGLMULTIDRAWARRAYSEXTPROC __glewMultiDrawArraysEXT;
+GLEW_FUN_EXPORT PFNGLMULTIDRAWELEMENTSEXTPROC __glewMultiDrawElementsEXT;
+
+GLEW_FUN_EXPORT PFNGLSAMPLEMASKEXTPROC __glewSampleMaskEXT;
+GLEW_FUN_EXPORT PFNGLSAMPLEPATTERNEXTPROC __glewSamplePatternEXT;
+
+GLEW_FUN_EXPORT PFNGLCOLORTABLEEXTPROC __glewColorTableEXT;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEEXTPROC __glewGetColorTableEXT;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEPARAMETERFVEXTPROC __glewGetColorTableParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEPARAMETERIVEXTPROC __glewGetColorTableParameterivEXT;
+
+GLEW_FUN_EXPORT PFNGLGETPIXELTRANSFORMPARAMETERFVEXTPROC __glewGetPixelTransformParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLGETPIXELTRANSFORMPARAMETERIVEXTPROC __glewGetPixelTransformParameterivEXT;
+GLEW_FUN_EXPORT PFNGLPIXELTRANSFORMPARAMETERFEXTPROC __glewPixelTransformParameterfEXT;
+GLEW_FUN_EXPORT PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC __glewPixelTransformParameterfvEXT;
+GLEW_FUN_EXPORT PFNGLPIXELTRANSFORMPARAMETERIEXTPROC __glewPixelTransformParameteriEXT;
+GLEW_FUN_EXPORT PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC __glewPixelTransformParameterivEXT;
+
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERFEXTPROC __glewPointParameterfEXT;
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERFVEXTPROC __glewPointParameterfvEXT;
+
+GLEW_FUN_EXPORT PFNGLPOLYGONOFFSETEXTPROC __glewPolygonOffsetEXT;
+
+GLEW_FUN_EXPORT PFNGLBEGINSCENEEXTPROC __glewBeginSceneEXT;
+GLEW_FUN_EXPORT PFNGLENDSCENEEXTPROC __glewEndSceneEXT;
+
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3BEXTPROC __glewSecondaryColor3bEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3BVEXTPROC __glewSecondaryColor3bvEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3DEXTPROC __glewSecondaryColor3dEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3DVEXTPROC __glewSecondaryColor3dvEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3FEXTPROC __glewSecondaryColor3fEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3FVEXTPROC __glewSecondaryColor3fvEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3IEXTPROC __glewSecondaryColor3iEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3IVEXTPROC __glewSecondaryColor3ivEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3SEXTPROC __glewSecondaryColor3sEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3SVEXTPROC __glewSecondaryColor3svEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UBEXTPROC __glewSecondaryColor3ubEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UBVEXTPROC __glewSecondaryColor3ubvEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UIEXTPROC __glewSecondaryColor3uiEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3UIVEXTPROC __glewSecondaryColor3uivEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3USEXTPROC __glewSecondaryColor3usEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3USVEXTPROC __glewSecondaryColor3usvEXT;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLORPOINTEREXTPROC __glewSecondaryColorPointerEXT;
+
+GLEW_FUN_EXPORT PFNGLACTIVESTENCILFACEEXTPROC __glewActiveStencilFaceEXT;
+
+GLEW_FUN_EXPORT PFNGLTEXSUBIMAGE1DEXTPROC __glewTexSubImage1DEXT;
+GLEW_FUN_EXPORT PFNGLTEXSUBIMAGE2DEXTPROC __glewTexSubImage2DEXT;
+GLEW_FUN_EXPORT PFNGLTEXSUBIMAGE3DEXTPROC __glewTexSubImage3DEXT;
+
+GLEW_FUN_EXPORT PFNGLTEXIMAGE3DEXTPROC __glewTexImage3DEXT;
+
+GLEW_FUN_EXPORT PFNGLTEXBUFFEREXTPROC __glewTexBufferEXT;
+
+GLEW_FUN_EXPORT PFNGLCLEARCOLORIIEXTPROC __glewClearColorIiEXT;
+GLEW_FUN_EXPORT PFNGLCLEARCOLORIUIEXTPROC __glewClearColorIuiEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXPARAMETERIIVEXTPROC __glewGetTexParameterIivEXT;
+GLEW_FUN_EXPORT PFNGLGETTEXPARAMETERIUIVEXTPROC __glewGetTexParameterIuivEXT;
+GLEW_FUN_EXPORT PFNGLTEXPARAMETERIIVEXTPROC __glewTexParameterIivEXT;
+GLEW_FUN_EXPORT PFNGLTEXPARAMETERIUIVEXTPROC __glewTexParameterIuivEXT;
+
+GLEW_FUN_EXPORT PFNGLARETEXTURESRESIDENTEXTPROC __glewAreTexturesResidentEXT;
+GLEW_FUN_EXPORT PFNGLBINDTEXTUREEXTPROC __glewBindTextureEXT;
+GLEW_FUN_EXPORT PFNGLDELETETEXTURESEXTPROC __glewDeleteTexturesEXT;
+GLEW_FUN_EXPORT PFNGLGENTEXTURESEXTPROC __glewGenTexturesEXT;
+GLEW_FUN_EXPORT PFNGLISTEXTUREEXTPROC __glewIsTextureEXT;
+GLEW_FUN_EXPORT PFNGLPRIORITIZETEXTURESEXTPROC __glewPrioritizeTexturesEXT;
+
+GLEW_FUN_EXPORT PFNGLTEXTURENORMALEXTPROC __glewTextureNormalEXT;
+
+GLEW_FUN_EXPORT PFNGLGETQUERYOBJECTI64VEXTPROC __glewGetQueryObjecti64vEXT;
+GLEW_FUN_EXPORT PFNGLGETQUERYOBJECTUI64VEXTPROC __glewGetQueryObjectui64vEXT;
+
+GLEW_FUN_EXPORT PFNGLBEGINTRANSFORMFEEDBACKEXTPROC __glewBeginTransformFeedbackEXT;
+GLEW_FUN_EXPORT PFNGLBINDBUFFERBASEEXTPROC __glewBindBufferBaseEXT;
+GLEW_FUN_EXPORT PFNGLBINDBUFFEROFFSETEXTPROC __glewBindBufferOffsetEXT;
+GLEW_FUN_EXPORT PFNGLBINDBUFFERRANGEEXTPROC __glewBindBufferRangeEXT;
+GLEW_FUN_EXPORT PFNGLENDTRANSFORMFEEDBACKEXTPROC __glewEndTransformFeedbackEXT;
+GLEW_FUN_EXPORT PFNGLGETTRANSFORMFEEDBACKVARYINGEXTPROC __glewGetTransformFeedbackVaryingEXT;
+GLEW_FUN_EXPORT PFNGLTRANSFORMFEEDBACKVARYINGSEXTPROC __glewTransformFeedbackVaryingsEXT;
+
+GLEW_FUN_EXPORT PFNGLARRAYELEMENTEXTPROC __glewArrayElementEXT;
+GLEW_FUN_EXPORT PFNGLCOLORPOINTEREXTPROC __glewColorPointerEXT;
+GLEW_FUN_EXPORT PFNGLDRAWARRAYSEXTPROC __glewDrawArraysEXT;
+GLEW_FUN_EXPORT PFNGLEDGEFLAGPOINTEREXTPROC __glewEdgeFlagPointerEXT;
+GLEW_FUN_EXPORT PFNGLGETPOINTERVEXTPROC __glewGetPointervEXT;
+GLEW_FUN_EXPORT PFNGLINDEXPOINTEREXTPROC __glewIndexPointerEXT;
+GLEW_FUN_EXPORT PFNGLNORMALPOINTEREXTPROC __glewNormalPointerEXT;
+GLEW_FUN_EXPORT PFNGLTEXCOORDPOINTEREXTPROC __glewTexCoordPointerEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXPOINTEREXTPROC __glewVertexPointerEXT;
+
+GLEW_FUN_EXPORT PFNGLBEGINVERTEXSHADEREXTPROC __glewBeginVertexShaderEXT;
+GLEW_FUN_EXPORT PFNGLBINDLIGHTPARAMETEREXTPROC __glewBindLightParameterEXT;
+GLEW_FUN_EXPORT PFNGLBINDMATERIALPARAMETEREXTPROC __glewBindMaterialParameterEXT;
+GLEW_FUN_EXPORT PFNGLBINDPARAMETEREXTPROC __glewBindParameterEXT;
+GLEW_FUN_EXPORT PFNGLBINDTEXGENPARAMETEREXTPROC __glewBindTexGenParameterEXT;
+GLEW_FUN_EXPORT PFNGLBINDTEXTUREUNITPARAMETEREXTPROC __glewBindTextureUnitParameterEXT;
+GLEW_FUN_EXPORT PFNGLBINDVERTEXSHADEREXTPROC __glewBindVertexShaderEXT;
+GLEW_FUN_EXPORT PFNGLDELETEVERTEXSHADEREXTPROC __glewDeleteVertexShaderEXT;
+GLEW_FUN_EXPORT PFNGLDISABLEVARIANTCLIENTSTATEEXTPROC __glewDisableVariantClientStateEXT;
+GLEW_FUN_EXPORT PFNGLENABLEVARIANTCLIENTSTATEEXTPROC __glewEnableVariantClientStateEXT;
+GLEW_FUN_EXPORT PFNGLENDVERTEXSHADEREXTPROC __glewEndVertexShaderEXT;
+GLEW_FUN_EXPORT PFNGLEXTRACTCOMPONENTEXTPROC __glewExtractComponentEXT;
+GLEW_FUN_EXPORT PFNGLGENSYMBOLSEXTPROC __glewGenSymbolsEXT;
+GLEW_FUN_EXPORT PFNGLGENVERTEXSHADERSEXTPROC __glewGenVertexShadersEXT;
+GLEW_FUN_EXPORT PFNGLGETINVARIANTBOOLEANVEXTPROC __glewGetInvariantBooleanvEXT;
+GLEW_FUN_EXPORT PFNGLGETINVARIANTFLOATVEXTPROC __glewGetInvariantFloatvEXT;
+GLEW_FUN_EXPORT PFNGLGETINVARIANTINTEGERVEXTPROC __glewGetInvariantIntegervEXT;
+GLEW_FUN_EXPORT PFNGLGETLOCALCONSTANTBOOLEANVEXTPROC __glewGetLocalConstantBooleanvEXT;
+GLEW_FUN_EXPORT PFNGLGETLOCALCONSTANTFLOATVEXTPROC __glewGetLocalConstantFloatvEXT;
+GLEW_FUN_EXPORT PFNGLGETLOCALCONSTANTINTEGERVEXTPROC __glewGetLocalConstantIntegervEXT;
+GLEW_FUN_EXPORT PFNGLGETVARIANTBOOLEANVEXTPROC __glewGetVariantBooleanvEXT;
+GLEW_FUN_EXPORT PFNGLGETVARIANTFLOATVEXTPROC __glewGetVariantFloatvEXT;
+GLEW_FUN_EXPORT PFNGLGETVARIANTINTEGERVEXTPROC __glewGetVariantIntegervEXT;
+GLEW_FUN_EXPORT PFNGLGETVARIANTPOINTERVEXTPROC __glewGetVariantPointervEXT;
+GLEW_FUN_EXPORT PFNGLINSERTCOMPONENTEXTPROC __glewInsertComponentEXT;
+GLEW_FUN_EXPORT PFNGLISVARIANTENABLEDEXTPROC __glewIsVariantEnabledEXT;
+GLEW_FUN_EXPORT PFNGLSETINVARIANTEXTPROC __glewSetInvariantEXT;
+GLEW_FUN_EXPORT PFNGLSETLOCALCONSTANTEXTPROC __glewSetLocalConstantEXT;
+GLEW_FUN_EXPORT PFNGLSHADEROP1EXTPROC __glewShaderOp1EXT;
+GLEW_FUN_EXPORT PFNGLSHADEROP2EXTPROC __glewShaderOp2EXT;
+GLEW_FUN_EXPORT PFNGLSHADEROP3EXTPROC __glewShaderOp3EXT;
+GLEW_FUN_EXPORT PFNGLSWIZZLEEXTPROC __glewSwizzleEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTPOINTEREXTPROC __glewVariantPointerEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTBVEXTPROC __glewVariantbvEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTDVEXTPROC __glewVariantdvEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTFVEXTPROC __glewVariantfvEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTIVEXTPROC __glewVariantivEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTSVEXTPROC __glewVariantsvEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTUBVEXTPROC __glewVariantubvEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTUIVEXTPROC __glewVariantuivEXT;
+GLEW_FUN_EXPORT PFNGLVARIANTUSVEXTPROC __glewVariantusvEXT;
+GLEW_FUN_EXPORT PFNGLWRITEMASKEXTPROC __glewWriteMaskEXT;
+
+GLEW_FUN_EXPORT PFNGLVERTEXWEIGHTPOINTEREXTPROC __glewVertexWeightPointerEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXWEIGHTFEXTPROC __glewVertexWeightfEXT;
+GLEW_FUN_EXPORT PFNGLVERTEXWEIGHTFVEXTPROC __glewVertexWeightfvEXT;
+
+GLEW_FUN_EXPORT PFNGLFRAMETERMINATORGREMEDYPROC __glewFrameTerminatorGREMEDY;
+
+GLEW_FUN_EXPORT PFNGLSTRINGMARKERGREMEDYPROC __glewStringMarkerGREMEDY;
+
+GLEW_FUN_EXPORT PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC __glewGetImageTransformParameterfvHP;
+GLEW_FUN_EXPORT PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC __glewGetImageTransformParameterivHP;
+GLEW_FUN_EXPORT PFNGLIMAGETRANSFORMPARAMETERFHPPROC __glewImageTransformParameterfHP;
+GLEW_FUN_EXPORT PFNGLIMAGETRANSFORMPARAMETERFVHPPROC __glewImageTransformParameterfvHP;
+GLEW_FUN_EXPORT PFNGLIMAGETRANSFORMPARAMETERIHPPROC __glewImageTransformParameteriHP;
+GLEW_FUN_EXPORT PFNGLIMAGETRANSFORMPARAMETERIVHPPROC __glewImageTransformParameterivHP;
+
+GLEW_FUN_EXPORT PFNGLMULTIMODEDRAWARRAYSIBMPROC __glewMultiModeDrawArraysIBM;
+GLEW_FUN_EXPORT PFNGLMULTIMODEDRAWELEMENTSIBMPROC __glewMultiModeDrawElementsIBM;
+
+GLEW_FUN_EXPORT PFNGLCOLORPOINTERLISTIBMPROC __glewColorPointerListIBM;
+GLEW_FUN_EXPORT PFNGLEDGEFLAGPOINTERLISTIBMPROC __glewEdgeFlagPointerListIBM;
+GLEW_FUN_EXPORT PFNGLFOGCOORDPOINTERLISTIBMPROC __glewFogCoordPointerListIBM;
+GLEW_FUN_EXPORT PFNGLINDEXPOINTERLISTIBMPROC __glewIndexPointerListIBM;
+GLEW_FUN_EXPORT PFNGLNORMALPOINTERLISTIBMPROC __glewNormalPointerListIBM;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLORPOINTERLISTIBMPROC __glewSecondaryColorPointerListIBM;
+GLEW_FUN_EXPORT PFNGLTEXCOORDPOINTERLISTIBMPROC __glewTexCoordPointerListIBM;
+GLEW_FUN_EXPORT PFNGLVERTEXPOINTERLISTIBMPROC __glewVertexPointerListIBM;
+
+GLEW_FUN_EXPORT PFNGLCOLORPOINTERVINTELPROC __glewColorPointervINTEL;
+GLEW_FUN_EXPORT PFNGLNORMALPOINTERVINTELPROC __glewNormalPointervINTEL;
+GLEW_FUN_EXPORT PFNGLTEXCOORDPOINTERVINTELPROC __glewTexCoordPointervINTEL;
+GLEW_FUN_EXPORT PFNGLVERTEXPOINTERVINTELPROC __glewVertexPointervINTEL;
+
+GLEW_FUN_EXPORT PFNGLTEXSCISSORFUNCINTELPROC __glewTexScissorFuncINTEL;
+GLEW_FUN_EXPORT PFNGLTEXSCISSORINTELPROC __glewTexScissorINTEL;
+
+GLEW_FUN_EXPORT PFNGLBUFFERREGIONENABLEDEXTPROC __glewBufferRegionEnabledEXT;
+GLEW_FUN_EXPORT PFNGLDELETEBUFFERREGIONEXTPROC __glewDeleteBufferRegionEXT;
+GLEW_FUN_EXPORT PFNGLDRAWBUFFERREGIONEXTPROC __glewDrawBufferRegionEXT;
+GLEW_FUN_EXPORT PFNGLNEWBUFFERREGIONEXTPROC __glewNewBufferRegionEXT;
+GLEW_FUN_EXPORT PFNGLREADBUFFERREGIONEXTPROC __glewReadBufferRegionEXT;
+
+GLEW_FUN_EXPORT PFNGLRESIZEBUFFERSMESAPROC __glewResizeBuffersMESA;
+
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2DMESAPROC __glewWindowPos2dMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2DVMESAPROC __glewWindowPos2dvMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2FMESAPROC __glewWindowPos2fMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2FVMESAPROC __glewWindowPos2fvMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2IMESAPROC __glewWindowPos2iMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2IVMESAPROC __glewWindowPos2ivMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2SMESAPROC __glewWindowPos2sMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS2SVMESAPROC __glewWindowPos2svMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3DMESAPROC __glewWindowPos3dMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3DVMESAPROC __glewWindowPos3dvMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3FMESAPROC __glewWindowPos3fMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3FVMESAPROC __glewWindowPos3fvMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3IMESAPROC __glewWindowPos3iMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3IVMESAPROC __glewWindowPos3ivMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3SMESAPROC __glewWindowPos3sMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS3SVMESAPROC __glewWindowPos3svMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4DMESAPROC __glewWindowPos4dMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4DVMESAPROC __glewWindowPos4dvMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4FMESAPROC __glewWindowPos4fMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4FVMESAPROC __glewWindowPos4fvMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4IMESAPROC __glewWindowPos4iMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4IVMESAPROC __glewWindowPos4ivMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4SMESAPROC __glewWindowPos4sMESA;
+GLEW_FUN_EXPORT PFNGLWINDOWPOS4SVMESAPROC __glewWindowPos4svMESA;
+
+GLEW_FUN_EXPORT PFNGLBEGINCONDITIONALRENDERNVPROC __glewBeginConditionalRenderNV;
+GLEW_FUN_EXPORT PFNGLENDCONDITIONALRENDERNVPROC __glewEndConditionalRenderNV;
+
+GLEW_FUN_EXPORT PFNGLCLEARDEPTHDNVPROC __glewClearDepthdNV;
+GLEW_FUN_EXPORT PFNGLDEPTHBOUNDSDNVPROC __glewDepthBoundsdNV;
+GLEW_FUN_EXPORT PFNGLDEPTHRANGEDNVPROC __glewDepthRangedNV;
+
+GLEW_FUN_EXPORT PFNGLEVALMAPSNVPROC __glewEvalMapsNV;
+GLEW_FUN_EXPORT PFNGLGETMAPATTRIBPARAMETERFVNVPROC __glewGetMapAttribParameterfvNV;
+GLEW_FUN_EXPORT PFNGLGETMAPATTRIBPARAMETERIVNVPROC __glewGetMapAttribParameterivNV;
+GLEW_FUN_EXPORT PFNGLGETMAPCONTROLPOINTSNVPROC __glewGetMapControlPointsNV;
+GLEW_FUN_EXPORT PFNGLGETMAPPARAMETERFVNVPROC __glewGetMapParameterfvNV;
+GLEW_FUN_EXPORT PFNGLGETMAPPARAMETERIVNVPROC __glewGetMapParameterivNV;
+GLEW_FUN_EXPORT PFNGLMAPCONTROLPOINTSNVPROC __glewMapControlPointsNV;
+GLEW_FUN_EXPORT PFNGLMAPPARAMETERFVNVPROC __glewMapParameterfvNV;
+GLEW_FUN_EXPORT PFNGLMAPPARAMETERIVNVPROC __glewMapParameterivNV;
+
+GLEW_FUN_EXPORT PFNGLGETMULTISAMPLEFVNVPROC __glewGetMultisamplefvNV;
+GLEW_FUN_EXPORT PFNGLSAMPLEMASKINDEXEDNVPROC __glewSampleMaskIndexedNV;
+GLEW_FUN_EXPORT PFNGLTEXRENDERBUFFERNVPROC __glewTexRenderbufferNV;
+
+GLEW_FUN_EXPORT PFNGLDELETEFENCESNVPROC __glewDeleteFencesNV;
+GLEW_FUN_EXPORT PFNGLFINISHFENCENVPROC __glewFinishFenceNV;
+GLEW_FUN_EXPORT PFNGLGENFENCESNVPROC __glewGenFencesNV;
+GLEW_FUN_EXPORT PFNGLGETFENCEIVNVPROC __glewGetFenceivNV;
+GLEW_FUN_EXPORT PFNGLISFENCENVPROC __glewIsFenceNV;
+GLEW_FUN_EXPORT PFNGLSETFENCENVPROC __glewSetFenceNV;
+GLEW_FUN_EXPORT PFNGLTESTFENCENVPROC __glewTestFenceNV;
+
+GLEW_FUN_EXPORT PFNGLGETPROGRAMNAMEDPARAMETERDVNVPROC __glewGetProgramNamedParameterdvNV;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMNAMEDPARAMETERFVNVPROC __glewGetProgramNamedParameterfvNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMNAMEDPARAMETER4DNVPROC __glewProgramNamedParameter4dNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMNAMEDPARAMETER4DVNVPROC __glewProgramNamedParameter4dvNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMNAMEDPARAMETER4FNVPROC __glewProgramNamedParameter4fNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMNAMEDPARAMETER4FVNVPROC __glewProgramNamedParameter4fvNV;
+
+GLEW_FUN_EXPORT PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC __glewRenderbufferStorageMultisampleCoverageNV;
+
+GLEW_FUN_EXPORT PFNGLPROGRAMVERTEXLIMITNVPROC __glewProgramVertexLimitNV;
+
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETERI4INVPROC __glewProgramEnvParameterI4iNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETERI4IVNVPROC __glewProgramEnvParameterI4ivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETERI4UINVPROC __glewProgramEnvParameterI4uiNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETERI4UIVNVPROC __glewProgramEnvParameterI4uivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETERSI4IVNVPROC __glewProgramEnvParametersI4ivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMENVPARAMETERSI4UIVNVPROC __glewProgramEnvParametersI4uivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETERI4INVPROC __glewProgramLocalParameterI4iNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC __glewProgramLocalParameterI4ivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETERI4UINVPROC __glewProgramLocalParameterI4uiNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETERI4UIVNVPROC __glewProgramLocalParameterI4uivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETERSI4IVNVPROC __glewProgramLocalParametersI4ivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMLOCALPARAMETERSI4UIVNVPROC __glewProgramLocalParametersI4uivNV;
+
+GLEW_FUN_EXPORT PFNGLCOLOR3HNVPROC __glewColor3hNV;
+GLEW_FUN_EXPORT PFNGLCOLOR3HVNVPROC __glewColor3hvNV;
+GLEW_FUN_EXPORT PFNGLCOLOR4HNVPROC __glewColor4hNV;
+GLEW_FUN_EXPORT PFNGLCOLOR4HVNVPROC __glewColor4hvNV;
+GLEW_FUN_EXPORT PFNGLFOGCOORDHNVPROC __glewFogCoordhNV;
+GLEW_FUN_EXPORT PFNGLFOGCOORDHVNVPROC __glewFogCoordhvNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1HNVPROC __glewMultiTexCoord1hNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD1HVNVPROC __glewMultiTexCoord1hvNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2HNVPROC __glewMultiTexCoord2hNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD2HVNVPROC __glewMultiTexCoord2hvNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3HNVPROC __glewMultiTexCoord3hNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD3HVNVPROC __glewMultiTexCoord3hvNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4HNVPROC __glewMultiTexCoord4hNV;
+GLEW_FUN_EXPORT PFNGLMULTITEXCOORD4HVNVPROC __glewMultiTexCoord4hvNV;
+GLEW_FUN_EXPORT PFNGLNORMAL3HNVPROC __glewNormal3hNV;
+GLEW_FUN_EXPORT PFNGLNORMAL3HVNVPROC __glewNormal3hvNV;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3HNVPROC __glewSecondaryColor3hNV;
+GLEW_FUN_EXPORT PFNGLSECONDARYCOLOR3HVNVPROC __glewSecondaryColor3hvNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD1HNVPROC __glewTexCoord1hNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD1HVNVPROC __glewTexCoord1hvNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2HNVPROC __glewTexCoord2hNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2HVNVPROC __glewTexCoord2hvNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD3HNVPROC __glewTexCoord3hNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD3HVNVPROC __glewTexCoord3hvNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD4HNVPROC __glewTexCoord4hNV;
+GLEW_FUN_EXPORT PFNGLTEXCOORD4HVNVPROC __glewTexCoord4hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEX2HNVPROC __glewVertex2hNV;
+GLEW_FUN_EXPORT PFNGLVERTEX2HVNVPROC __glewVertex2hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEX3HNVPROC __glewVertex3hNV;
+GLEW_FUN_EXPORT PFNGLVERTEX3HVNVPROC __glewVertex3hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEX4HNVPROC __glewVertex4hNV;
+GLEW_FUN_EXPORT PFNGLVERTEX4HVNVPROC __glewVertex4hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1HNVPROC __glewVertexAttrib1hNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1HVNVPROC __glewVertexAttrib1hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2HNVPROC __glewVertexAttrib2hNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2HVNVPROC __glewVertexAttrib2hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3HNVPROC __glewVertexAttrib3hNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3HVNVPROC __glewVertexAttrib3hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4HNVPROC __glewVertexAttrib4hNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4HVNVPROC __glewVertexAttrib4hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS1HVNVPROC __glewVertexAttribs1hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS2HVNVPROC __glewVertexAttribs2hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS3HVNVPROC __glewVertexAttribs3hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS4HVNVPROC __glewVertexAttribs4hvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXWEIGHTHNVPROC __glewVertexWeighthNV;
+GLEW_FUN_EXPORT PFNGLVERTEXWEIGHTHVNVPROC __glewVertexWeighthvNV;
+
+GLEW_FUN_EXPORT PFNGLBEGINOCCLUSIONQUERYNVPROC __glewBeginOcclusionQueryNV;
+GLEW_FUN_EXPORT PFNGLDELETEOCCLUSIONQUERIESNVPROC __glewDeleteOcclusionQueriesNV;
+GLEW_FUN_EXPORT PFNGLENDOCCLUSIONQUERYNVPROC __glewEndOcclusionQueryNV;
+GLEW_FUN_EXPORT PFNGLGENOCCLUSIONQUERIESNVPROC __glewGenOcclusionQueriesNV;
+GLEW_FUN_EXPORT PFNGLGETOCCLUSIONQUERYIVNVPROC __glewGetOcclusionQueryivNV;
+GLEW_FUN_EXPORT PFNGLGETOCCLUSIONQUERYUIVNVPROC __glewGetOcclusionQueryuivNV;
+GLEW_FUN_EXPORT PFNGLISOCCLUSIONQUERYNVPROC __glewIsOcclusionQueryNV;
+
+GLEW_FUN_EXPORT PFNGLPROGRAMBUFFERPARAMETERSIIVNVPROC __glewProgramBufferParametersIivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMBUFFERPARAMETERSIUIVNVPROC __glewProgramBufferParametersIuivNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMBUFFERPARAMETERSFVNVPROC __glewProgramBufferParametersfvNV;
+
+GLEW_FUN_EXPORT PFNGLFLUSHPIXELDATARANGENVPROC __glewFlushPixelDataRangeNV;
+GLEW_FUN_EXPORT PFNGLPIXELDATARANGENVPROC __glewPixelDataRangeNV;
+
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERINVPROC __glewPointParameteriNV;
+GLEW_FUN_EXPORT PFNGLPOINTPARAMETERIVNVPROC __glewPointParameterivNV;
+
+GLEW_FUN_EXPORT PFNGLGETVIDEOI64VNVPROC __glewGetVideoi64vNV;
+GLEW_FUN_EXPORT PFNGLGETVIDEOIVNVPROC __glewGetVideoivNV;
+GLEW_FUN_EXPORT PFNGLGETVIDEOUI64VNVPROC __glewGetVideoui64vNV;
+GLEW_FUN_EXPORT PFNGLGETVIDEOUIVNVPROC __glewGetVideouivNV;
+GLEW_FUN_EXPORT PFNGLPRESENTFRAMEDUALFILLNVPROC __glewPresentFrameDualFillNV;
+GLEW_FUN_EXPORT PFNGLPRESENTFRAMEKEYEDNVPROC __glewPresentFrameKeyedNV;
+GLEW_FUN_EXPORT PFNGLVIDEOPARAMETERIVNVPROC __glewVideoParameterivNV;
+
+GLEW_FUN_EXPORT PFNGLPRIMITIVERESTARTINDEXNVPROC __glewPrimitiveRestartIndexNV;
+GLEW_FUN_EXPORT PFNGLPRIMITIVERESTARTNVPROC __glewPrimitiveRestartNV;
+
+GLEW_FUN_EXPORT PFNGLCOMBINERINPUTNVPROC __glewCombinerInputNV;
+GLEW_FUN_EXPORT PFNGLCOMBINEROUTPUTNVPROC __glewCombinerOutputNV;
+GLEW_FUN_EXPORT PFNGLCOMBINERPARAMETERFNVPROC __glewCombinerParameterfNV;
+GLEW_FUN_EXPORT PFNGLCOMBINERPARAMETERFVNVPROC __glewCombinerParameterfvNV;
+GLEW_FUN_EXPORT PFNGLCOMBINERPARAMETERINVPROC __glewCombinerParameteriNV;
+GLEW_FUN_EXPORT PFNGLCOMBINERPARAMETERIVNVPROC __glewCombinerParameterivNV;
+GLEW_FUN_EXPORT PFNGLFINALCOMBINERINPUTNVPROC __glewFinalCombinerInputNV;
+GLEW_FUN_EXPORT PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC __glewGetCombinerInputParameterfvNV;
+GLEW_FUN_EXPORT PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC __glewGetCombinerInputParameterivNV;
+GLEW_FUN_EXPORT PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC __glewGetCombinerOutputParameterfvNV;
+GLEW_FUN_EXPORT PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC __glewGetCombinerOutputParameterivNV;
+GLEW_FUN_EXPORT PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC __glewGetFinalCombinerInputParameterfvNV;
+GLEW_FUN_EXPORT PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC __glewGetFinalCombinerInputParameterivNV;
+
+GLEW_FUN_EXPORT PFNGLCOMBINERSTAGEPARAMETERFVNVPROC __glewCombinerStageParameterfvNV;
+GLEW_FUN_EXPORT PFNGLGETCOMBINERSTAGEPARAMETERFVNVPROC __glewGetCombinerStageParameterfvNV;
+
+GLEW_FUN_EXPORT PFNGLACTIVEVARYINGNVPROC __glewActiveVaryingNV;
+GLEW_FUN_EXPORT PFNGLBEGINTRANSFORMFEEDBACKNVPROC __glewBeginTransformFeedbackNV;
+GLEW_FUN_EXPORT PFNGLBINDBUFFERBASENVPROC __glewBindBufferBaseNV;
+GLEW_FUN_EXPORT PFNGLBINDBUFFEROFFSETNVPROC __glewBindBufferOffsetNV;
+GLEW_FUN_EXPORT PFNGLBINDBUFFERRANGENVPROC __glewBindBufferRangeNV;
+GLEW_FUN_EXPORT PFNGLENDTRANSFORMFEEDBACKNVPROC __glewEndTransformFeedbackNV;
+GLEW_FUN_EXPORT PFNGLGETACTIVEVARYINGNVPROC __glewGetActiveVaryingNV;
+GLEW_FUN_EXPORT PFNGLGETTRANSFORMFEEDBACKVARYINGNVPROC __glewGetTransformFeedbackVaryingNV;
+GLEW_FUN_EXPORT PFNGLGETVARYINGLOCATIONNVPROC __glewGetVaryingLocationNV;
+GLEW_FUN_EXPORT PFNGLTRANSFORMFEEDBACKATTRIBSNVPROC __glewTransformFeedbackAttribsNV;
+GLEW_FUN_EXPORT PFNGLTRANSFORMFEEDBACKVARYINGSNVPROC __glewTransformFeedbackVaryingsNV;
+
+GLEW_FUN_EXPORT PFNGLFLUSHVERTEXARRAYRANGENVPROC __glewFlushVertexArrayRangeNV;
+GLEW_FUN_EXPORT PFNGLVERTEXARRAYRANGENVPROC __glewVertexArrayRangeNV;
+
+GLEW_FUN_EXPORT PFNGLAREPROGRAMSRESIDENTNVPROC __glewAreProgramsResidentNV;
+GLEW_FUN_EXPORT PFNGLBINDPROGRAMNVPROC __glewBindProgramNV;
+GLEW_FUN_EXPORT PFNGLDELETEPROGRAMSNVPROC __glewDeleteProgramsNV;
+GLEW_FUN_EXPORT PFNGLEXECUTEPROGRAMNVPROC __glewExecuteProgramNV;
+GLEW_FUN_EXPORT PFNGLGENPROGRAMSNVPROC __glewGenProgramsNV;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMPARAMETERDVNVPROC __glewGetProgramParameterdvNV;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMPARAMETERFVNVPROC __glewGetProgramParameterfvNV;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMSTRINGNVPROC __glewGetProgramStringNV;
+GLEW_FUN_EXPORT PFNGLGETPROGRAMIVNVPROC __glewGetProgramivNV;
+GLEW_FUN_EXPORT PFNGLGETTRACKMATRIXIVNVPROC __glewGetTrackMatrixivNV;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBPOINTERVNVPROC __glewGetVertexAttribPointervNV;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBDVNVPROC __glewGetVertexAttribdvNV;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBFVNVPROC __glewGetVertexAttribfvNV;
+GLEW_FUN_EXPORT PFNGLGETVERTEXATTRIBIVNVPROC __glewGetVertexAttribivNV;
+GLEW_FUN_EXPORT PFNGLISPROGRAMNVPROC __glewIsProgramNV;
+GLEW_FUN_EXPORT PFNGLLOADPROGRAMNVPROC __glewLoadProgramNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETER4DNVPROC __glewProgramParameter4dNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETER4DVNVPROC __glewProgramParameter4dvNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETER4FNVPROC __glewProgramParameter4fNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETER4FVNVPROC __glewProgramParameter4fvNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETERS4DVNVPROC __glewProgramParameters4dvNV;
+GLEW_FUN_EXPORT PFNGLPROGRAMPARAMETERS4FVNVPROC __glewProgramParameters4fvNV;
+GLEW_FUN_EXPORT PFNGLREQUESTRESIDENTPROGRAMSNVPROC __glewRequestResidentProgramsNV;
+GLEW_FUN_EXPORT PFNGLTRACKMATRIXNVPROC __glewTrackMatrixNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1DNVPROC __glewVertexAttrib1dNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1DVNVPROC __glewVertexAttrib1dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1FNVPROC __glewVertexAttrib1fNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1FVNVPROC __glewVertexAttrib1fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1SNVPROC __glewVertexAttrib1sNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB1SVNVPROC __glewVertexAttrib1svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2DNVPROC __glewVertexAttrib2dNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2DVNVPROC __glewVertexAttrib2dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2FNVPROC __glewVertexAttrib2fNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2FVNVPROC __glewVertexAttrib2fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2SNVPROC __glewVertexAttrib2sNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB2SVNVPROC __glewVertexAttrib2svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3DNVPROC __glewVertexAttrib3dNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3DVNVPROC __glewVertexAttrib3dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3FNVPROC __glewVertexAttrib3fNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3FVNVPROC __glewVertexAttrib3fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3SNVPROC __glewVertexAttrib3sNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB3SVNVPROC __glewVertexAttrib3svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4DNVPROC __glewVertexAttrib4dNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4DVNVPROC __glewVertexAttrib4dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4FNVPROC __glewVertexAttrib4fNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4FVNVPROC __glewVertexAttrib4fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4SNVPROC __glewVertexAttrib4sNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4SVNVPROC __glewVertexAttrib4svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4UBNVPROC __glewVertexAttrib4ubNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIB4UBVNVPROC __glewVertexAttrib4ubvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBPOINTERNVPROC __glewVertexAttribPointerNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS1DVNVPROC __glewVertexAttribs1dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS1FVNVPROC __glewVertexAttribs1fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS1SVNVPROC __glewVertexAttribs1svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS2DVNVPROC __glewVertexAttribs2dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS2FVNVPROC __glewVertexAttribs2fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS2SVNVPROC __glewVertexAttribs2svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS3DVNVPROC __glewVertexAttribs3dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS3FVNVPROC __glewVertexAttribs3fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS3SVNVPROC __glewVertexAttribs3svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS4DVNVPROC __glewVertexAttribs4dvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS4FVNVPROC __glewVertexAttribs4fvNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS4SVNVPROC __glewVertexAttribs4svNV;
+GLEW_FUN_EXPORT PFNGLVERTEXATTRIBS4UBVNVPROC __glewVertexAttribs4ubvNV;
+
+GLEW_FUN_EXPORT PFNGLCLEARDEPTHFOESPROC __glewClearDepthfOES;
+GLEW_FUN_EXPORT PFNGLCLIPPLANEFOESPROC __glewClipPlanefOES;
+GLEW_FUN_EXPORT PFNGLDEPTHRANGEFOESPROC __glewDepthRangefOES;
+GLEW_FUN_EXPORT PFNGLFRUSTUMFOESPROC __glewFrustumfOES;
+GLEW_FUN_EXPORT PFNGLGETCLIPPLANEFOESPROC __glewGetClipPlanefOES;
+GLEW_FUN_EXPORT PFNGLORTHOFOESPROC __glewOrthofOES;
+
+GLEW_FUN_EXPORT PFNGLDETAILTEXFUNCSGISPROC __glewDetailTexFuncSGIS;
+GLEW_FUN_EXPORT PFNGLGETDETAILTEXFUNCSGISPROC __glewGetDetailTexFuncSGIS;
+
+GLEW_FUN_EXPORT PFNGLFOGFUNCSGISPROC __glewFogFuncSGIS;
+GLEW_FUN_EXPORT PFNGLGETFOGFUNCSGISPROC __glewGetFogFuncSGIS;
+
+GLEW_FUN_EXPORT PFNGLSAMPLEMASKSGISPROC __glewSampleMaskSGIS;
+GLEW_FUN_EXPORT PFNGLSAMPLEPATTERNSGISPROC __glewSamplePatternSGIS;
+
+GLEW_FUN_EXPORT PFNGLGETSHARPENTEXFUNCSGISPROC __glewGetSharpenTexFuncSGIS;
+GLEW_FUN_EXPORT PFNGLSHARPENTEXFUNCSGISPROC __glewSharpenTexFuncSGIS;
+
+GLEW_FUN_EXPORT PFNGLTEXIMAGE4DSGISPROC __glewTexImage4DSGIS;
+GLEW_FUN_EXPORT PFNGLTEXSUBIMAGE4DSGISPROC __glewTexSubImage4DSGIS;
+
+GLEW_FUN_EXPORT PFNGLGETTEXFILTERFUNCSGISPROC __glewGetTexFilterFuncSGIS;
+GLEW_FUN_EXPORT PFNGLTEXFILTERFUNCSGISPROC __glewTexFilterFuncSGIS;
+
+GLEW_FUN_EXPORT PFNGLASYNCMARKERSGIXPROC __glewAsyncMarkerSGIX;
+GLEW_FUN_EXPORT PFNGLDELETEASYNCMARKERSSGIXPROC __glewDeleteAsyncMarkersSGIX;
+GLEW_FUN_EXPORT PFNGLFINISHASYNCSGIXPROC __glewFinishAsyncSGIX;
+GLEW_FUN_EXPORT PFNGLGENASYNCMARKERSSGIXPROC __glewGenAsyncMarkersSGIX;
+GLEW_FUN_EXPORT PFNGLISASYNCMARKERSGIXPROC __glewIsAsyncMarkerSGIX;
+GLEW_FUN_EXPORT PFNGLPOLLASYNCSGIXPROC __glewPollAsyncSGIX;
+
+GLEW_FUN_EXPORT PFNGLFLUSHRASTERSGIXPROC __glewFlushRasterSGIX;
+
+GLEW_FUN_EXPORT PFNGLTEXTUREFOGSGIXPROC __glewTextureFogSGIX;
+
+GLEW_FUN_EXPORT PFNGLFRAGMENTCOLORMATERIALSGIXPROC __glewFragmentColorMaterialSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELFSGIXPROC __glewFragmentLightModelfSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELFVSGIXPROC __glewFragmentLightModelfvSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELISGIXPROC __glewFragmentLightModeliSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTMODELIVSGIXPROC __glewFragmentLightModelivSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTFSGIXPROC __glewFragmentLightfSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTFVSGIXPROC __glewFragmentLightfvSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTISGIXPROC __glewFragmentLightiSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTLIGHTIVSGIXPROC __glewFragmentLightivSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALFSGIXPROC __glewFragmentMaterialfSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALFVSGIXPROC __glewFragmentMaterialfvSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALISGIXPROC __glewFragmentMaterialiSGIX;
+GLEW_FUN_EXPORT PFNGLFRAGMENTMATERIALIVSGIXPROC __glewFragmentMaterialivSGIX;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTLIGHTFVSGIXPROC __glewGetFragmentLightfvSGIX;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTLIGHTIVSGIXPROC __glewGetFragmentLightivSGIX;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTMATERIALFVSGIXPROC __glewGetFragmentMaterialfvSGIX;
+GLEW_FUN_EXPORT PFNGLGETFRAGMENTMATERIALIVSGIXPROC __glewGetFragmentMaterialivSGIX;
+
+GLEW_FUN_EXPORT PFNGLFRAMEZOOMSGIXPROC __glewFrameZoomSGIX;
+
+GLEW_FUN_EXPORT PFNGLPIXELTEXGENSGIXPROC __glewPixelTexGenSGIX;
+
+GLEW_FUN_EXPORT PFNGLREFERENCEPLANESGIXPROC __glewReferencePlaneSGIX;
+
+GLEW_FUN_EXPORT PFNGLSPRITEPARAMETERFSGIXPROC __glewSpriteParameterfSGIX;
+GLEW_FUN_EXPORT PFNGLSPRITEPARAMETERFVSGIXPROC __glewSpriteParameterfvSGIX;
+GLEW_FUN_EXPORT PFNGLSPRITEPARAMETERISGIXPROC __glewSpriteParameteriSGIX;
+GLEW_FUN_EXPORT PFNGLSPRITEPARAMETERIVSGIXPROC __glewSpriteParameterivSGIX;
+
+GLEW_FUN_EXPORT PFNGLTAGSAMPLEBUFFERSGIXPROC __glewTagSampleBufferSGIX;
+
+GLEW_FUN_EXPORT PFNGLCOLORTABLEPARAMETERFVSGIPROC __glewColorTableParameterfvSGI;
+GLEW_FUN_EXPORT PFNGLCOLORTABLEPARAMETERIVSGIPROC __glewColorTableParameterivSGI;
+GLEW_FUN_EXPORT PFNGLCOLORTABLESGIPROC __glewColorTableSGI;
+GLEW_FUN_EXPORT PFNGLCOPYCOLORTABLESGIPROC __glewCopyColorTableSGI;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEPARAMETERFVSGIPROC __glewGetColorTableParameterfvSGI;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLEPARAMETERIVSGIPROC __glewGetColorTableParameterivSGI;
+GLEW_FUN_EXPORT PFNGLGETCOLORTABLESGIPROC __glewGetColorTableSGI;
+
+GLEW_FUN_EXPORT PFNGLFINISHTEXTURESUNXPROC __glewFinishTextureSUNX;
+
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORBSUNPROC __glewGlobalAlphaFactorbSUN;
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORDSUNPROC __glewGlobalAlphaFactordSUN;
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORFSUNPROC __glewGlobalAlphaFactorfSUN;
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORISUNPROC __glewGlobalAlphaFactoriSUN;
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORSSUNPROC __glewGlobalAlphaFactorsSUN;
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORUBSUNPROC __glewGlobalAlphaFactorubSUN;
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORUISUNPROC __glewGlobalAlphaFactoruiSUN;
+GLEW_FUN_EXPORT PFNGLGLOBALALPHAFACTORUSSUNPROC __glewGlobalAlphaFactorusSUN;
+
+GLEW_FUN_EXPORT PFNGLREADVIDEOPIXELSSUNPROC __glewReadVideoPixelsSUN;
+
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEPOINTERSUNPROC __glewReplacementCodePointerSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUBSUNPROC __glewReplacementCodeubSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUBVSUNPROC __glewReplacementCodeubvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUISUNPROC __glewReplacementCodeuiSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUIVSUNPROC __glewReplacementCodeuivSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUSSUNPROC __glewReplacementCodeusSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUSVSUNPROC __glewReplacementCodeusvSUN;
+
+GLEW_FUN_EXPORT PFNGLCOLOR3FVERTEX3FSUNPROC __glewColor3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLCOLOR3FVERTEX3FVSUNPROC __glewColor3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC __glewColor4fNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewColor4fNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLCOLOR4UBVERTEX2FSUNPROC __glewColor4ubVertex2fSUN;
+GLEW_FUN_EXPORT PFNGLCOLOR4UBVERTEX2FVSUNPROC __glewColor4ubVertex2fvSUN;
+GLEW_FUN_EXPORT PFNGLCOLOR4UBVERTEX3FSUNPROC __glewColor4ubVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLCOLOR4UBVERTEX3FVSUNPROC __glewColor4ubVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLNORMAL3FVERTEX3FSUNPROC __glewNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLNORMAL3FVERTEX3FVSUNPROC __glewNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC __glewReplacementCodeuiColor3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC __glewReplacementCodeuiColor3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiColor4fNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiColor4fNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC __glewReplacementCodeuiColor4ubVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC __glewReplacementCodeuiColor4ubVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiTexCoord2fNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC __glewReplacementCodeuiTexCoord2fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC __glewReplacementCodeuiTexCoord2fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC __glewReplacementCodeuiVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC __glewReplacementCodeuiVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC __glewTexCoord2fColor3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC __glewTexCoord2fColor3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC __glewTexCoord2fColor4fNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewTexCoord2fColor4fNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC __glewTexCoord2fColor4ubVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC __glewTexCoord2fColor4ubVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC __glewTexCoord2fNormal3fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC __glewTexCoord2fNormal3fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FVERTEX3FSUNPROC __glewTexCoord2fVertex3fSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD2FVERTEX3FVSUNPROC __glewTexCoord2fVertex3fvSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC __glewTexCoord4fColor4fNormal3fVertex4fSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC __glewTexCoord4fColor4fNormal3fVertex4fvSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD4FVERTEX4FSUNPROC __glewTexCoord4fVertex4fSUN;
+GLEW_FUN_EXPORT PFNGLTEXCOORD4FVERTEX4FVSUNPROC __glewTexCoord4fVertex4fvSUN;
+
+GLEW_FUN_EXPORT PFNGLADDSWAPHINTRECTWINPROC __glewAddSwapHintRectWIN;
+
+#if defined(GLEW_MX) && !defined(_WIN32)
+struct GLEWContextStruct
+{
+#endif /* GLEW_MX */
+
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_1_1;
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_1_2;
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_1_3;
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_1_4;
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_1_5;
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_2_0;
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_2_1;
+GLEW_VAR_EXPORT GLboolean __GLEW_VERSION_3_0;
+GLEW_VAR_EXPORT GLboolean __GLEW_3DFX_multisample;
+GLEW_VAR_EXPORT GLboolean __GLEW_3DFX_tbuffer;
+GLEW_VAR_EXPORT GLboolean __GLEW_3DFX_texture_compression_FXT1;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_client_storage;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_element_array;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_fence;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_float_pixels;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_flush_buffer_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_pixel_buffer;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_specular_vector;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_texture_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_transform_hint;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_vertex_array_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_vertex_array_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_APPLE_ycbcr_422;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_color_buffer_float;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_depth_buffer_float;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_depth_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_draw_buffers;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_draw_instanced;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_fragment_program;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_fragment_program_shadow;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_fragment_shader;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_framebuffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_framebuffer_sRGB;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_geometry_shader4;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_half_float_pixel;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_half_float_vertex;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_imaging;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_instanced_arrays;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_map_buffer_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_matrix_palette;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_multisample;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_multitexture;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_occlusion_query;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_pixel_buffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_point_parameters;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_point_sprite;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_shader_objects;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_shading_language_100;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_shadow;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_shadow_ambient;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_border_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_buffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_compression;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_compression_rgtc;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_cube_map;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_env_add;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_env_combine;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_env_crossbar;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_env_dot3;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_float;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_mirrored_repeat;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_non_power_of_two;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_rectangle;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_texture_rg;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_transpose_matrix;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_vertex_array_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_vertex_blend;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_vertex_buffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_vertex_program;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_vertex_shader;
+GLEW_VAR_EXPORT GLboolean __GLEW_ARB_window_pos;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATIX_point_sprites;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATIX_texture_env_combine3;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATIX_texture_env_route;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATIX_vertex_shader_output_point_size;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_draw_buffers;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_element_array;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_envmap_bumpmap;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_fragment_shader;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_map_object_buffer;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_pn_triangles;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_separate_stencil;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_shader_texture_lod;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_text_fragment_shader;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_texture_compression_3dc;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_texture_env_combine3;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_texture_float;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_texture_mirror_once;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_vertex_array_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_vertex_attrib_array_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_ATI_vertex_streams;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_422_pixels;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_Cg_shader;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_abgr;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_bgra;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_bindable_uniform;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_blend_color;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_blend_equation_separate;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_blend_func_separate;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_blend_logic_op;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_blend_minmax;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_blend_subtract;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_clip_volume_hint;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_cmyka;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_color_subtable;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_compiled_vertex_array;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_convolution;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_coordinate_frame;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_copy_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_cull_vertex;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_depth_bounds_test;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_direct_state_access;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_draw_buffers2;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_draw_instanced;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_draw_range_elements;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_fog_coord;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_fragment_lighting;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_framebuffer_blit;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_framebuffer_multisample;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_framebuffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_framebuffer_sRGB;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_geometry_shader4;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_gpu_program_parameters;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_gpu_shader4;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_histogram;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_index_array_formats;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_index_func;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_index_material;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_index_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_light_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_misc_attribute;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_multi_draw_arrays;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_multisample;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_packed_depth_stencil;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_packed_float;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_packed_pixels;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_paletted_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_pixel_buffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_pixel_transform;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_pixel_transform_color_table;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_point_parameters;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_polygon_offset;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_rescale_normal;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_scene_marker;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_secondary_color;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_separate_specular_color;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_shadow_funcs;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_shared_texture_palette;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_stencil_clear_tag;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_stencil_two_side;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_stencil_wrap;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_subtexture;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture3D;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_array;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_buffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_compression_dxt1;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_compression_latc;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_compression_rgtc;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_compression_s3tc;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_cube_map;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_edge_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_env;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_env_add;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_env_combine;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_env_dot3;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_filter_anisotropic;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_integer;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_lod_bias;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_mirror_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_perturb_normal;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_rectangle;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_sRGB;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_shared_exponent;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_texture_swizzle;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_timer_query;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_transform_feedback;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_vertex_array;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_vertex_array_bgra;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_vertex_shader;
+GLEW_VAR_EXPORT GLboolean __GLEW_EXT_vertex_weighting;
+GLEW_VAR_EXPORT GLboolean __GLEW_GREMEDY_frame_terminator;
+GLEW_VAR_EXPORT GLboolean __GLEW_GREMEDY_string_marker;
+GLEW_VAR_EXPORT GLboolean __GLEW_HP_convolution_border_modes;
+GLEW_VAR_EXPORT GLboolean __GLEW_HP_image_transform;
+GLEW_VAR_EXPORT GLboolean __GLEW_HP_occlusion_test;
+GLEW_VAR_EXPORT GLboolean __GLEW_HP_texture_lighting;
+GLEW_VAR_EXPORT GLboolean __GLEW_IBM_cull_vertex;
+GLEW_VAR_EXPORT GLboolean __GLEW_IBM_multimode_draw_arrays;
+GLEW_VAR_EXPORT GLboolean __GLEW_IBM_rasterpos_clip;
+GLEW_VAR_EXPORT GLboolean __GLEW_IBM_static_data;
+GLEW_VAR_EXPORT GLboolean __GLEW_IBM_texture_mirrored_repeat;
+GLEW_VAR_EXPORT GLboolean __GLEW_IBM_vertex_array_lists;
+GLEW_VAR_EXPORT GLboolean __GLEW_INGR_color_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_INGR_interlace_read;
+GLEW_VAR_EXPORT GLboolean __GLEW_INTEL_parallel_arrays;
+GLEW_VAR_EXPORT GLboolean __GLEW_INTEL_texture_scissor;
+GLEW_VAR_EXPORT GLboolean __GLEW_KTX_buffer_region;
+GLEW_VAR_EXPORT GLboolean __GLEW_MESAX_texture_stack;
+GLEW_VAR_EXPORT GLboolean __GLEW_MESA_pack_invert;
+GLEW_VAR_EXPORT GLboolean __GLEW_MESA_resize_buffers;
+GLEW_VAR_EXPORT GLboolean __GLEW_MESA_window_pos;
+GLEW_VAR_EXPORT GLboolean __GLEW_MESA_ycbcr_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_blend_square;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_conditional_render;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_copy_depth_to_color;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_depth_buffer_float;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_depth_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_depth_range_unclamped;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_evaluators;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_explicit_multisample;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_fence;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_float_buffer;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_fog_distance;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_fragment_program;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_fragment_program2;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_fragment_program4;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_fragment_program_option;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_framebuffer_multisample_coverage;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_geometry_program4;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_geometry_shader4;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_gpu_program4;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_half_float;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_light_max_exponent;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_multisample_filter_hint;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_occlusion_query;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_packed_depth_stencil;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_parameter_buffer_object;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_pixel_data_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_point_sprite;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_present_video;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_primitive_restart;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_register_combiners;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_register_combiners2;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texgen_emboss;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texgen_reflection;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texture_compression_vtc;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texture_env_combine4;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texture_expand_normal;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texture_rectangle;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texture_shader;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texture_shader2;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_texture_shader3;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_transform_feedback;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_array_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_array_range2;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_program;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_program1_1;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_program2;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_program2_option;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_program3;
+GLEW_VAR_EXPORT GLboolean __GLEW_NV_vertex_program4;
+GLEW_VAR_EXPORT GLboolean __GLEW_OES_byte_coordinates;
+GLEW_VAR_EXPORT GLboolean __GLEW_OES_compressed_paletted_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_OES_read_format;
+GLEW_VAR_EXPORT GLboolean __GLEW_OES_single_precision;
+GLEW_VAR_EXPORT GLboolean __GLEW_OML_interlace;
+GLEW_VAR_EXPORT GLboolean __GLEW_OML_resample;
+GLEW_VAR_EXPORT GLboolean __GLEW_OML_subsample;
+GLEW_VAR_EXPORT GLboolean __GLEW_PGI_misc_hints;
+GLEW_VAR_EXPORT GLboolean __GLEW_PGI_vertex_hints;
+GLEW_VAR_EXPORT GLboolean __GLEW_REND_screen_coordinates;
+GLEW_VAR_EXPORT GLboolean __GLEW_S3_s3tc;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_color_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_detail_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_fog_function;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_generate_mipmap;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_multisample;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_pixel_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_point_line_texgen;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_sharpen_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_texture4D;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_texture_border_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_texture_edge_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_texture_filter4;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_texture_lod;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIS_texture_select;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_async;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_async_histogram;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_async_pixel;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_blend_alpha_minmax;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_clipmap;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_convolution_accuracy;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_depth_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_flush_raster;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_fog_offset;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_fog_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_fragment_specular_lighting;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_framezoom;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_interlace;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_ir_instrument1;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_list_priority;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_pixel_texture;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_pixel_texture_bits;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_reference_plane;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_resample;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_shadow;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_shadow_ambient;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_sprite;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_tag_sample_buffer;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_texture_add_env;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_texture_coordinate_clamp;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_texture_lod_bias;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_texture_multi_buffer;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_texture_range;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_texture_scale_bias;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_vertex_preclip;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_vertex_preclip_hint;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGIX_ycrcb;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGI_color_matrix;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGI_color_table;
+GLEW_VAR_EXPORT GLboolean __GLEW_SGI_texture_color_table;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUNX_constant_data;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUN_convolution_border_modes;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUN_global_alpha;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUN_mesh_array;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUN_read_video_pixels;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUN_slice_accum;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUN_triangle_list;
+GLEW_VAR_EXPORT GLboolean __GLEW_SUN_vertex;
+GLEW_VAR_EXPORT GLboolean __GLEW_WIN_phong_shading;
+GLEW_VAR_EXPORT GLboolean __GLEW_WIN_specular_fog;
+GLEW_VAR_EXPORT GLboolean __GLEW_WIN_swap_hint;
+
+#ifdef GLEW_MX
+}; /* GLEWContextStruct */
+#endif /* GLEW_MX */
+
+/* ------------------------------------------------------------------------- */
+
+/* error codes */
+#define GLEW_OK 0
+#define GLEW_NO_ERROR 0
+#define GLEW_ERROR_NO_GL_VERSION 1  /* missing GL version */
+#define GLEW_ERROR_GL_VERSION_10_ONLY 2  /* GL 1.1 and up are not supported */
+#define GLEW_ERROR_GLX_VERSION_11_ONLY 3  /* GLX 1.2 and up are not supported */
+
+/* string codes */
+#define GLEW_VERSION 1
+#define GLEW_VERSION_MAJOR 2
+#define GLEW_VERSION_MINOR 3
+#define GLEW_VERSION_MICRO 4
+
+/* API */
+#ifdef GLEW_MX
+
+typedef struct GLEWContextStruct GLEWContext;
+GLEWAPI GLenum glewContextInit (GLEWContext* ctx);
+GLEWAPI GLboolean glewContextIsSupported (GLEWContext* ctx, const char* name);
+
+#define glewInit() glewContextInit(glewGetContext())
+#define glewIsSupported(x) glewContextIsSupported(glewGetContext(), x)
+#define glewIsExtensionSupported(x) glewIsSupported(x)
+
+#define GLEW_GET_VAR(x) (*(const GLboolean*)&(glewGetContext()->x))
+#ifdef _WIN32
+#  define GLEW_GET_FUN(x) glewGetContext()->x
+#else
+#  define GLEW_GET_FUN(x) x
+#endif
+
+#else /* GLEW_MX */
+
+GLEWAPI GLenum glewInit ();
+GLEWAPI GLboolean glewIsSupported (const char* name);
+#define glewIsExtensionSupported(x) glewIsSupported(x)
+
+#define GLEW_GET_VAR(x) (*(const GLboolean*)&x)
+#define GLEW_GET_FUN(x) x
+
+#endif /* GLEW_MX */
+
+GLEWAPI GLboolean glewExperimental;
+GLEWAPI GLboolean glewGetExtension (const char* name);
+GLEWAPI const GLubyte* glewGetErrorString (GLenum error);
+GLEWAPI const GLubyte* glewGetString (GLenum name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef GLEW_APIENTRY_DEFINED
+#undef GLEW_APIENTRY_DEFINED
+#undef APIENTRY
+#undef GLAPIENTRY
+#endif
+
+#ifdef GLEW_CALLBACK_DEFINED
+#undef GLEW_CALLBACK_DEFINED
+#undef CALLBACK
+#endif
+
+#ifdef GLEW_WINGDIAPI_DEFINED
+#undef GLEW_WINGDIAPI_DEFINED
+#undef WINGDIAPI
+#endif
+
+#undef GLAPI
+/* #undef GLEWAPI */
+
+#endif /* __glew_h__ */
diff --git a/test_conformance/gl/GLEW/GL/glxew.h b/test_conformance/gl/GLEW/GL/glxew.h
new file mode 100644
index 00000000..625ee0fb
--- /dev/null
+++ b/test_conformance/gl/GLEW/GL/glxew.h
@@ -0,0 +1,1397 @@
+/*
+** The OpenGL Extension Wrangler Library
+** Copyright (C) 2002-2008, Milan Ikits <milan ikits[]ieee org>
+** Copyright (C) 2002-2008, Marcelo E. Magallon <mmagallo[]debian org>
+** Copyright (C) 2002, Lev Povalahev
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions are met:
+**
+** * Redistributions of source code must retain the above copyright notice,
+**   this list of conditions and the following disclaimer.
+** * Redistributions in binary form must reproduce the above copyright notice,
+**   this list of conditions and the following disclaimer in the documentation
+**   and/or other materials provided with the distribution.
+** * The name of the author may be used to endorse or promote products
+**   derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+** THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  7.0
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+** Copyright (c) 2007 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#ifndef __glxew_h__
+#define __glxew_h__
+#define __GLXEW_H__
+
+#ifdef __glxext_h_
+#error glxext.h included before glxew.h
+#endif
+#ifdef GLX_H
+#error glx.h included before glxew.h
+#endif
+
+#define __glxext_h_
+#define __GLX_glx_h__
+#define GLX_H
+
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include <X11/Xmd.h>
+#include <GL/glew.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* ---------------------------- GLX_VERSION_1_0 --------------------------- */
+
+#ifndef GLX_VERSION_1_0
+#define GLX_VERSION_1_0 1
+
+#define GLX_USE_GL 1
+#define GLX_BUFFER_SIZE 2
+#define GLX_LEVEL 3
+#define GLX_RGBA 4
+#define GLX_DOUBLEBUFFER 5
+#define GLX_STEREO 6
+#define GLX_AUX_BUFFERS 7
+#define GLX_RED_SIZE 8
+#define GLX_GREEN_SIZE 9
+#define GLX_BLUE_SIZE 10
+#define GLX_ALPHA_SIZE 11
+#define GLX_DEPTH_SIZE 12
+#define GLX_STENCIL_SIZE 13
+#define GLX_ACCUM_RED_SIZE 14
+#define GLX_ACCUM_GREEN_SIZE 15
+#define GLX_ACCUM_BLUE_SIZE 16
+#define GLX_ACCUM_ALPHA_SIZE 17
+#define GLX_BAD_SCREEN 1
+#define GLX_BAD_ATTRIBUTE 2
+#define GLX_NO_EXTENSION 3
+#define GLX_BAD_VISUAL 4
+#define GLX_BAD_CONTEXT 5
+#define GLX_BAD_VALUE 6
+#define GLX_BAD_ENUM 7
+
+typedef XID GLXDrawable;
+typedef XID GLXPixmap;
+#ifdef __sun
+typedef struct __glXContextRec *GLXContext;
+#else
+typedef struct __GLXcontextRec *GLXContext;
+#endif
+
+typedef unsigned int GLXVideoDeviceNV;
+
+extern Bool glXQueryExtension (Display *dpy, int *errorBase, int *eventBase);
+extern Bool glXQueryVersion (Display *dpy, int *major, int *minor);
+extern int glXGetConfig (Display *dpy, XVisualInfo *vis, int attrib, int *value);
+extern XVisualInfo* glXChooseVisual (Display *dpy, int screen, int *attribList);
+extern GLXPixmap glXCreateGLXPixmap (Display *dpy, XVisualInfo *vis, Pixmap pixmap);
+extern void glXDestroyGLXPixmap (Display *dpy, GLXPixmap pix);
+extern GLXContext glXCreateContext (Display *dpy, XVisualInfo *vis, GLXContext shareList, Bool direct);
+extern void glXDestroyContext (Display *dpy, GLXContext ctx);
+extern Bool glXIsDirect (Display *dpy, GLXContext ctx);
+extern void glXCopyContext (Display *dpy, GLXContext src, GLXContext dst, GLulong mask);
+extern Bool glXMakeCurrent (Display *dpy, GLXDrawable drawable, GLXContext ctx);
+extern GLXContext glXGetCurrentContext (void);
+extern GLXDrawable glXGetCurrentDrawable (void);
+extern void glXWaitGL (void);
+extern void glXWaitX (void);
+extern void glXSwapBuffers (Display *dpy, GLXDrawable drawable);
+extern void glXUseXFont (Font font, int first, int count, int listBase);
+
+#define GLXEW_VERSION_1_0 GLXEW_GET_VAR(__GLXEW_VERSION_1_0)
+
+#endif /* GLX_VERSION_1_0 */
+
+/* ---------------------------- GLX_VERSION_1_1 --------------------------- */
+
+#ifndef GLX_VERSION_1_1
+#define GLX_VERSION_1_1
+
+#define GLX_VENDOR 0x1
+#define GLX_VERSION 0x2
+#define GLX_EXTENSIONS 0x3
+
+extern const char* glXQueryExtensionsString (Display *dpy, int screen);
+extern const char* glXGetClientString (Display *dpy, int name);
+extern const char* glXQueryServerString (Display *dpy, int screen, int name);
+
+#define GLXEW_VERSION_1_1 GLXEW_GET_VAR(__GLXEW_VERSION_1_1)
+
+#endif /* GLX_VERSION_1_1 */
+
+/* ---------------------------- GLX_VERSION_1_2 ---------------------------- */
+
+#ifndef GLX_VERSION_1_2
+#define GLX_VERSION_1_2 1
+
+typedef Display* ( * PFNGLXGETCURRENTDISPLAYPROC) (void);
+
+#define glXGetCurrentDisplay GLXEW_GET_FUN(__glewXGetCurrentDisplay)
+
+#define GLXEW_VERSION_1_2 GLXEW_GET_VAR(__GLXEW_VERSION_1_2)
+
+#endif /* GLX_VERSION_1_2 */
+
+/* ---------------------------- GLX_VERSION_1_3 ---------------------------- */
+
+#ifndef GLX_VERSION_1_3
+#define GLX_VERSION_1_3 1
+
+#define GLX_RGBA_BIT 0x00000001
+#define GLX_FRONT_LEFT_BUFFER_BIT 0x00000001
+#define GLX_WINDOW_BIT 0x00000001
+#define GLX_COLOR_INDEX_BIT 0x00000002
+#define GLX_PIXMAP_BIT 0x00000002
+#define GLX_FRONT_RIGHT_BUFFER_BIT 0x00000002
+#define GLX_BACK_LEFT_BUFFER_BIT 0x00000004
+#define GLX_PBUFFER_BIT 0x00000004
+#define GLX_BACK_RIGHT_BUFFER_BIT 0x00000008
+#define GLX_AUX_BUFFERS_BIT 0x00000010
+#define GLX_CONFIG_CAVEAT 0x20
+#define GLX_DEPTH_BUFFER_BIT 0x00000020
+#define GLX_X_VISUAL_TYPE 0x22
+#define GLX_TRANSPARENT_TYPE 0x23
+#define GLX_TRANSPARENT_INDEX_VALUE 0x24
+#define GLX_TRANSPARENT_RED_VALUE 0x25
+#define GLX_TRANSPARENT_GREEN_VALUE 0x26
+#define GLX_TRANSPARENT_BLUE_VALUE 0x27
+#define GLX_TRANSPARENT_ALPHA_VALUE 0x28
+#define GLX_STENCIL_BUFFER_BIT 0x00000040
+#define GLX_ACCUM_BUFFER_BIT 0x00000080
+#define GLX_NONE 0x8000
+#define GLX_SLOW_CONFIG 0x8001
+#define GLX_TRUE_COLOR 0x8002
+#define GLX_DIRECT_COLOR 0x8003
+#define GLX_PSEUDO_COLOR 0x8004
+#define GLX_STATIC_COLOR 0x8005
+#define GLX_GRAY_SCALE 0x8006
+#define GLX_STATIC_GRAY 0x8007
+#define GLX_TRANSPARENT_RGB 0x8008
+#define GLX_TRANSPARENT_INDEX 0x8009
+#define GLX_VISUAL_ID 0x800B
+#define GLX_SCREEN 0x800C
+#define GLX_NON_CONFORMANT_CONFIG 0x800D
+#define GLX_DRAWABLE_TYPE 0x8010
+#define GLX_RENDER_TYPE 0x8011
+#define GLX_X_RENDERABLE 0x8012
+#define GLX_FBCONFIG_ID 0x8013
+#define GLX_RGBA_TYPE 0x8014
+#define GLX_COLOR_INDEX_TYPE 0x8015
+#define GLX_MAX_PBUFFER_WIDTH 0x8016
+#define GLX_MAX_PBUFFER_HEIGHT 0x8017
+#define GLX_MAX_PBUFFER_PIXELS 0x8018
+#define GLX_PRESERVED_CONTENTS 0x801B
+#define GLX_LARGEST_PBUFFER 0x801C
+#define GLX_WIDTH 0x801D
+#define GLX_HEIGHT 0x801E
+#define GLX_EVENT_MASK 0x801F
+#define GLX_DAMAGED 0x8020
+#define GLX_SAVED 0x8021
+#define GLX_WINDOW 0x8022
+#define GLX_PBUFFER 0x8023
+#define GLX_PBUFFER_HEIGHT 0x8040
+#define GLX_PBUFFER_WIDTH 0x8041
+#define GLX_PBUFFER_CLOBBER_MASK 0x08000000
+#define GLX_DONT_CARE 0xFFFFFFFF
+
+typedef XID GLXFBConfigID;
+typedef XID GLXWindow;
+typedef XID GLXPbuffer;
+typedef struct __GLXFBConfigRec *GLXFBConfig;
+
+typedef struct {
+  int event_type;
+  int draw_type;
+  unsigned long serial;
+  Bool send_event;
+  Display *display;
+  GLXDrawable drawable;
+  unsigned int buffer_mask;
+  unsigned int aux_buffer;
+  int x, y;
+  int width, height;
+  int count;
+} GLXPbufferClobberEvent;
+typedef union __GLXEvent {
+  GLXPbufferClobberEvent glxpbufferclobber;
+  long pad[24];
+} GLXEvent;
+
+typedef GLXFBConfig* ( * PFNGLXCHOOSEFBCONFIGPROC) (Display *dpy, int screen, const int *attrib_list, int *nelements);
+typedef GLXContext ( * PFNGLXCREATENEWCONTEXTPROC) (Display *dpy, GLXFBConfig config, int render_type, GLXContext share_list, Bool direct);
+typedef GLXPbuffer ( * PFNGLXCREATEPBUFFERPROC) (Display *dpy, GLXFBConfig config, const int *attrib_list);
+typedef GLXPixmap ( * PFNGLXCREATEPIXMAPPROC) (Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attrib_list);
+typedef GLXWindow ( * PFNGLXCREATEWINDOWPROC) (Display *dpy, GLXFBConfig config, Window win, const int *attrib_list);
+typedef void ( * PFNGLXDESTROYPBUFFERPROC) (Display *dpy, GLXPbuffer pbuf);
+typedef void ( * PFNGLXDESTROYPIXMAPPROC) (Display *dpy, GLXPixmap pixmap);
+typedef void ( * PFNGLXDESTROYWINDOWPROC) (Display *dpy, GLXWindow win);
+typedef GLXDrawable ( * PFNGLXGETCURRENTREADDRAWABLEPROC) (void);
+typedef int ( * PFNGLXGETFBCONFIGATTRIBPROC) (Display *dpy, GLXFBConfig config, int attribute, int *value);
+typedef GLXFBConfig* ( * PFNGLXGETFBCONFIGSPROC) (Display *dpy, int screen, int *nelements);
+typedef void ( * PFNGLXGETSELECTEDEVENTPROC) (Display *dpy, GLXDrawable draw, unsigned long *event_mask);
+typedef XVisualInfo* ( * PFNGLXGETVISUALFROMFBCONFIGPROC) (Display *dpy, GLXFBConfig config);
+typedef Bool ( * PFNGLXMAKECONTEXTCURRENTPROC) (Display *display, GLXDrawable draw, GLXDrawable read, GLXContext ctx);
+typedef int ( * PFNGLXQUERYCONTEXTPROC) (Display *dpy, GLXContext ctx, int attribute, int *value);
+typedef void ( * PFNGLXQUERYDRAWABLEPROC) (Display *dpy, GLXDrawable draw, int attribute, unsigned int *value);
+typedef void ( * PFNGLXSELECTEVENTPROC) (Display *dpy, GLXDrawable draw, unsigned long event_mask);
+
+#define glXChooseFBConfig GLXEW_GET_FUN(__glewXChooseFBConfig)
+#define glXCreateNewContext GLXEW_GET_FUN(__glewXCreateNewContext)
+#define glXCreatePbuffer GLXEW_GET_FUN(__glewXCreatePbuffer)
+#define glXCreatePixmap GLXEW_GET_FUN(__glewXCreatePixmap)
+#define glXCreateWindow GLXEW_GET_FUN(__glewXCreateWindow)
+#define glXDestroyPbuffer GLXEW_GET_FUN(__glewXDestroyPbuffer)
+#define glXDestroyPixmap GLXEW_GET_FUN(__glewXDestroyPixmap)
+#define glXDestroyWindow GLXEW_GET_FUN(__glewXDestroyWindow)
+#define glXGetCurrentReadDrawable GLXEW_GET_FUN(__glewXGetCurrentReadDrawable)
+#define glXGetFBConfigAttrib GLXEW_GET_FUN(__glewXGetFBConfigAttrib)
+#define glXGetFBConfigs GLXEW_GET_FUN(__glewXGetFBConfigs)
+#define glXGetSelectedEvent GLXEW_GET_FUN(__glewXGetSelectedEvent)
+#define glXGetVisualFromFBConfig GLXEW_GET_FUN(__glewXGetVisualFromFBConfig)
+#define glXMakeContextCurrent GLXEW_GET_FUN(__glewXMakeContextCurrent)
+#define glXQueryContext GLXEW_GET_FUN(__glewXQueryContext)
+#define glXQueryDrawable GLXEW_GET_FUN(__glewXQueryDrawable)
+#define glXSelectEvent GLXEW_GET_FUN(__glewXSelectEvent)
+
+#define GLXEW_VERSION_1_3 GLXEW_GET_VAR(__GLXEW_VERSION_1_3)
+
+#endif /* GLX_VERSION_1_3 */
+
+/* ---------------------------- GLX_VERSION_1_4 ---------------------------- */
+
+#ifndef GLX_VERSION_1_4
+#define GLX_VERSION_1_4 1
+
+#define GLX_SAMPLE_BUFFERS 100000
+#define GLX_SAMPLES 100001
+
+extern void ( * glXGetProcAddress (const GLubyte *procName)) (void);
+
+#define GLXEW_VERSION_1_4 GLXEW_GET_VAR(__GLXEW_VERSION_1_4)
+
+#endif /* GLX_VERSION_1_4 */
+
+/* -------------------------- GLX_3DFX_multisample ------------------------- */
+
+#ifndef GLX_3DFX_multisample
+#define GLX_3DFX_multisample 1
+
+#define GLX_SAMPLE_BUFFERS_3DFX 0x8050
+#define GLX_SAMPLES_3DFX 0x8051
+
+#define GLXEW_3DFX_multisample GLXEW_GET_VAR(__GLXEW_3DFX_multisample)
+
+#endif /* GLX_3DFX_multisample */
+
+/* ------------------------- GLX_ARB_create_context ------------------------ */
+
+#ifndef GLX_ARB_create_context
+#define GLX_ARB_create_context 1
+
+#define GLX_CONTEXT_DEBUG_BIT_ARB 0x0001
+#define GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002
+#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091
+#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092
+#define GLX_CONTEXT_FLAGS_ARB 0x2094
+
+typedef GLXContext ( * PFNGLXCREATECONTEXTATTRIBSARBPROC) (Display* dpy, GLXFBConfig config, GLXContext share_context, Bool direct, const int *attrib_list);
+
+#define glXCreateContextAttribsARB GLXEW_GET_FUN(__glewXCreateContextAttribsARB)
+
+#define GLXEW_ARB_create_context GLXEW_GET_VAR(__GLXEW_ARB_create_context)
+
+#endif /* GLX_ARB_create_context */
+
+/* ------------------------- GLX_ARB_fbconfig_float ------------------------ */
+
+#ifndef GLX_ARB_fbconfig_float
+#define GLX_ARB_fbconfig_float 1
+
+#define GLX_RGBA_FLOAT_BIT 0x00000004
+#define GLX_RGBA_FLOAT_TYPE 0x20B9
+
+#define GLXEW_ARB_fbconfig_float GLXEW_GET_VAR(__GLXEW_ARB_fbconfig_float)
+
+#endif /* GLX_ARB_fbconfig_float */
+
+/* ------------------------ GLX_ARB_framebuffer_sRGB ----------------------- */
+
+#ifndef GLX_ARB_framebuffer_sRGB
+#define GLX_ARB_framebuffer_sRGB 1
+
+#define GLX_FRAMEBUFFER_SRGB_CAPABLE_ARB 0x20B2
+
+#define GLXEW_ARB_framebuffer_sRGB GLXEW_GET_VAR(__GLXEW_ARB_framebuffer_sRGB)
+
+#endif /* GLX_ARB_framebuffer_sRGB */
+
+/* ------------------------ GLX_ARB_get_proc_address ----------------------- */
+
+#ifndef GLX_ARB_get_proc_address
+#define GLX_ARB_get_proc_address 1
+
+extern void ( * glXGetProcAddressARB (const GLubyte *procName)) (void);
+
+#define GLXEW_ARB_get_proc_address GLXEW_GET_VAR(__GLXEW_ARB_get_proc_address)
+
+#endif /* GLX_ARB_get_proc_address */
+
+/* -------------------------- GLX_ARB_multisample -------------------------- */
+
+#ifndef GLX_ARB_multisample
+#define GLX_ARB_multisample 1
+
+#define GLX_SAMPLE_BUFFERS_ARB 100000
+#define GLX_SAMPLES_ARB 100001
+
+#define GLXEW_ARB_multisample GLXEW_GET_VAR(__GLXEW_ARB_multisample)
+
+#endif /* GLX_ARB_multisample */
+
+/* ----------------------- GLX_ATI_pixel_format_float ---------------------- */
+
+#ifndef GLX_ATI_pixel_format_float
+#define GLX_ATI_pixel_format_float 1
+
+#define GLX_RGBA_FLOAT_ATI_BIT 0x00000100
+
+#define GLXEW_ATI_pixel_format_float GLXEW_GET_VAR(__GLXEW_ATI_pixel_format_float)
+
+#endif /* GLX_ATI_pixel_format_float */
+
+/* ------------------------- GLX_ATI_render_texture ------------------------ */
+
+#ifndef GLX_ATI_render_texture
+#define GLX_ATI_render_texture 1
+
+#define GLX_BIND_TO_TEXTURE_RGB_ATI 0x9800
+#define GLX_BIND_TO_TEXTURE_RGBA_ATI 0x9801
+#define GLX_TEXTURE_FORMAT_ATI 0x9802
+#define GLX_TEXTURE_TARGET_ATI 0x9803
+#define GLX_MIPMAP_TEXTURE_ATI 0x9804
+#define GLX_TEXTURE_RGB_ATI 0x9805
+#define GLX_TEXTURE_RGBA_ATI 0x9806
+#define GLX_NO_TEXTURE_ATI 0x9807
+#define GLX_TEXTURE_CUBE_MAP_ATI 0x9808
+#define GLX_TEXTURE_1D_ATI 0x9809
+#define GLX_TEXTURE_2D_ATI 0x980A
+#define GLX_MIPMAP_LEVEL_ATI 0x980B
+#define GLX_CUBE_MAP_FACE_ATI 0x980C
+#define GLX_TEXTURE_CUBE_MAP_POSITIVE_X_ATI 0x980D
+#define GLX_TEXTURE_CUBE_MAP_NEGATIVE_X_ATI 0x980E
+#define GLX_TEXTURE_CUBE_MAP_POSITIVE_Y_ATI 0x980F
+#define GLX_TEXTURE_CUBE_MAP_NEGATIVE_Y_ATI 0x9810
+#define GLX_TEXTURE_CUBE_MAP_POSITIVE_Z_ATI 0x9811
+#define GLX_TEXTURE_CUBE_MAP_NEGATIVE_Z_ATI 0x9812
+#define GLX_FRONT_LEFT_ATI 0x9813
+#define GLX_FRONT_RIGHT_ATI 0x9814
+#define GLX_BACK_LEFT_ATI 0x9815
+#define GLX_BACK_RIGHT_ATI 0x9816
+#define GLX_AUX0_ATI 0x9817
+#define GLX_AUX1_ATI 0x9818
+#define GLX_AUX2_ATI 0x9819
+#define GLX_AUX3_ATI 0x981A
+#define GLX_AUX4_ATI 0x981B
+#define GLX_AUX5_ATI 0x981C
+#define GLX_AUX6_ATI 0x981D
+#define GLX_AUX7_ATI 0x981E
+#define GLX_AUX8_ATI 0x981F
+#define GLX_AUX9_ATI 0x9820
+#define GLX_BIND_TO_TEXTURE_LUMINANCE_ATI 0x9821
+#define GLX_BIND_TO_TEXTURE_INTENSITY_ATI 0x9822
+
+typedef void ( * PFNGLXBINDTEXIMAGEATIPROC) (Display *dpy, GLXPbuffer pbuf, int buffer);
+typedef void ( * PFNGLXDRAWABLEATTRIBATIPROC) (Display *dpy, GLXDrawable draw, const int *attrib_list);
+typedef void ( * PFNGLXRELEASETEXIMAGEATIPROC) (Display *dpy, GLXPbuffer pbuf, int buffer);
+
+#define glXBindTexImageATI GLXEW_GET_FUN(__glewXBindTexImageATI)
+#define glXDrawableAttribATI GLXEW_GET_FUN(__glewXDrawableAttribATI)
+#define glXReleaseTexImageATI GLXEW_GET_FUN(__glewXReleaseTexImageATI)
+
+#define GLXEW_ATI_render_texture GLXEW_GET_VAR(__GLXEW_ATI_render_texture)
+
+#endif /* GLX_ATI_render_texture */
+
+/* --------------------- GLX_EXT_fbconfig_packed_float --------------------- */
+
+#ifndef GLX_EXT_fbconfig_packed_float
+#define GLX_EXT_fbconfig_packed_float 1
+
+#define GLX_RGBA_UNSIGNED_FLOAT_BIT_EXT 0x00000008
+#define GLX_RGBA_UNSIGNED_FLOAT_TYPE_EXT 0x20B1
+
+#define GLXEW_EXT_fbconfig_packed_float GLXEW_GET_VAR(__GLXEW_EXT_fbconfig_packed_float)
+
+#endif /* GLX_EXT_fbconfig_packed_float */
+
+/* ------------------------ GLX_EXT_framebuffer_sRGB ----------------------- */
+
+#ifndef GLX_EXT_framebuffer_sRGB
+#define GLX_EXT_framebuffer_sRGB 1
+
+#define GLX_FRAMEBUFFER_SRGB_CAPABLE_EXT 0x20B2
+
+#define GLXEW_EXT_framebuffer_sRGB GLXEW_GET_VAR(__GLXEW_EXT_framebuffer_sRGB)
+
+#endif /* GLX_EXT_framebuffer_sRGB */
+
+/* ------------------------- GLX_EXT_import_context ------------------------ */
+
+#ifndef GLX_EXT_import_context
+#define GLX_EXT_import_context 1
+
+#define GLX_SHARE_CONTEXT_EXT 0x800A
+#define GLX_VISUAL_ID_EXT 0x800B
+#define GLX_SCREEN_EXT 0x800C
+
+typedef XID GLXContextID;
+
+typedef void ( * PFNGLXFREECONTEXTEXTPROC) (Display* dpy, GLXContext context);
+typedef GLXContextID ( * PFNGLXGETCONTEXTIDEXTPROC) (const GLXContext context);
+typedef GLXContext ( * PFNGLXIMPORTCONTEXTEXTPROC) (Display* dpy, GLXContextID contextID);
+typedef int ( * PFNGLXQUERYCONTEXTINFOEXTPROC) (Display* dpy, GLXContext context, int attribute,int *value);
+
+#define glXFreeContextEXT GLXEW_GET_FUN(__glewXFreeContextEXT)
+#define glXGetContextIDEXT GLXEW_GET_FUN(__glewXGetContextIDEXT)
+#define glXImportContextEXT GLXEW_GET_FUN(__glewXImportContextEXT)
+#define glXQueryContextInfoEXT GLXEW_GET_FUN(__glewXQueryContextInfoEXT)
+
+#define GLXEW_EXT_import_context GLXEW_GET_VAR(__GLXEW_EXT_import_context)
+
+#endif /* GLX_EXT_import_context */
+
+/* -------------------------- GLX_EXT_scene_marker ------------------------- */
+
+#ifndef GLX_EXT_scene_marker
+#define GLX_EXT_scene_marker 1
+
+#define GLXEW_EXT_scene_marker GLXEW_GET_VAR(__GLXEW_EXT_scene_marker)
+
+#endif /* GLX_EXT_scene_marker */
+
+/* ---------------------- GLX_EXT_texture_from_pixmap ---------------------- */
+
+#ifndef GLX_EXT_texture_from_pixmap
+#define GLX_EXT_texture_from_pixmap 1
+
+#define GLX_TEXTURE_1D_BIT_EXT 0x00000001
+#define GLX_TEXTURE_2D_BIT_EXT 0x00000002
+#define GLX_TEXTURE_RECTANGLE_BIT_EXT 0x00000004
+#define GLX_BIND_TO_TEXTURE_RGB_EXT 0x20D0
+#define GLX_BIND_TO_TEXTURE_RGBA_EXT 0x20D1
+#define GLX_BIND_TO_MIPMAP_TEXTURE_EXT 0x20D2
+#define GLX_BIND_TO_TEXTURE_TARGETS_EXT 0x20D3
+#define GLX_Y_INVERTED_EXT 0x20D4
+#define GLX_TEXTURE_FORMAT_EXT 0x20D5
+#define GLX_TEXTURE_TARGET_EXT 0x20D6
+#define GLX_MIPMAP_TEXTURE_EXT 0x20D7
+#define GLX_TEXTURE_FORMAT_NONE_EXT 0x20D8
+#define GLX_TEXTURE_FORMAT_RGB_EXT 0x20D9
+#define GLX_TEXTURE_FORMAT_RGBA_EXT 0x20DA
+#define GLX_TEXTURE_1D_EXT 0x20DB
+#define GLX_TEXTURE_2D_EXT 0x20DC
+#define GLX_TEXTURE_RECTANGLE_EXT 0x20DD
+#define GLX_FRONT_LEFT_EXT 0x20DE
+#define GLX_FRONT_RIGHT_EXT 0x20DF
+#define GLX_BACK_LEFT_EXT 0x20E0
+#define GLX_BACK_RIGHT_EXT 0x20E1
+#define GLX_AUX0_EXT 0x20E2
+#define GLX_AUX1_EXT 0x20E3
+#define GLX_AUX2_EXT 0x20E4
+#define GLX_AUX3_EXT 0x20E5
+#define GLX_AUX4_EXT 0x20E6
+#define GLX_AUX5_EXT 0x20E7
+#define GLX_AUX6_EXT 0x20E8
+#define GLX_AUX7_EXT 0x20E9
+#define GLX_AUX8_EXT 0x20EA
+#define GLX_AUX9_EXT 0x20EB
+
+typedef void ( * PFNGLXBINDTEXIMAGEEXTPROC) (Display* display, GLXDrawable drawable, int buffer, const int *attrib_list);
+typedef void ( * PFNGLXRELEASETEXIMAGEEXTPROC) (Display* display, GLXDrawable drawable, int buffer);
+
+#define glXBindTexImageEXT GLXEW_GET_FUN(__glewXBindTexImageEXT)
+#define glXReleaseTexImageEXT GLXEW_GET_FUN(__glewXReleaseTexImageEXT)
+
+#define GLXEW_EXT_texture_from_pixmap GLXEW_GET_VAR(__GLXEW_EXT_texture_from_pixmap)
+
+#endif /* GLX_EXT_texture_from_pixmap */
+
+/* -------------------------- GLX_EXT_visual_info -------------------------- */
+
+#ifndef GLX_EXT_visual_info
+#define GLX_EXT_visual_info 1
+
+#define GLX_X_VISUAL_TYPE_EXT 0x22
+#define GLX_TRANSPARENT_TYPE_EXT 0x23
+#define GLX_TRANSPARENT_INDEX_VALUE_EXT 0x24
+#define GLX_TRANSPARENT_RED_VALUE_EXT 0x25
+#define GLX_TRANSPARENT_GREEN_VALUE_EXT 0x26
+#define GLX_TRANSPARENT_BLUE_VALUE_EXT 0x27
+#define GLX_TRANSPARENT_ALPHA_VALUE_EXT 0x28
+#define GLX_NONE_EXT 0x8000
+#define GLX_TRUE_COLOR_EXT 0x8002
+#define GLX_DIRECT_COLOR_EXT 0x8003
+#define GLX_PSEUDO_COLOR_EXT 0x8004
+#define GLX_STATIC_COLOR_EXT 0x8005
+#define GLX_GRAY_SCALE_EXT 0x8006
+#define GLX_STATIC_GRAY_EXT 0x8007
+#define GLX_TRANSPARENT_RGB_EXT 0x8008
+#define GLX_TRANSPARENT_INDEX_EXT 0x8009
+
+#define GLXEW_EXT_visual_info GLXEW_GET_VAR(__GLXEW_EXT_visual_info)
+
+#endif /* GLX_EXT_visual_info */
+
+/* ------------------------- GLX_EXT_visual_rating ------------------------- */
+
+#ifndef GLX_EXT_visual_rating
+#define GLX_EXT_visual_rating 1
+
+#define GLX_VISUAL_CAVEAT_EXT 0x20
+#define GLX_SLOW_VISUAL_EXT 0x8001
+#define GLX_NON_CONFORMANT_VISUAL_EXT 0x800D
+
+#define GLXEW_EXT_visual_rating GLXEW_GET_VAR(__GLXEW_EXT_visual_rating)
+
+#endif /* GLX_EXT_visual_rating */
+
+/* -------------------------- GLX_MESA_agp_offset -------------------------- */
+
+#ifndef GLX_MESA_agp_offset
+#define GLX_MESA_agp_offset 1
+
+typedef unsigned int ( * PFNGLXGETAGPOFFSETMESAPROC) (const void* pointer);
+
+#define glXGetAGPOffsetMESA GLXEW_GET_FUN(__glewXGetAGPOffsetMESA)
+
+#define GLXEW_MESA_agp_offset GLXEW_GET_VAR(__GLXEW_MESA_agp_offset)
+
+#endif /* GLX_MESA_agp_offset */
+
+/* ------------------------ GLX_MESA_copy_sub_buffer ----------------------- */
+
+#ifndef GLX_MESA_copy_sub_buffer
+#define GLX_MESA_copy_sub_buffer 1
+
+typedef void ( * PFNGLXCOPYSUBBUFFERMESAPROC) (Display* dpy, GLXDrawable drawable, int x, int y, int width, int height);
+
+#define glXCopySubBufferMESA GLXEW_GET_FUN(__glewXCopySubBufferMESA)
+
+#define GLXEW_MESA_copy_sub_buffer GLXEW_GET_VAR(__GLXEW_MESA_copy_sub_buffer)
+
+#endif /* GLX_MESA_copy_sub_buffer */
+
+/* ------------------------ GLX_MESA_pixmap_colormap ----------------------- */
+
+#ifndef GLX_MESA_pixmap_colormap
+#define GLX_MESA_pixmap_colormap 1
+
+typedef GLXPixmap ( * PFNGLXCREATEGLXPIXMAPMESAPROC) (Display* dpy, XVisualInfo *visual, Pixmap pixmap, Colormap cmap);
+
+#define glXCreateGLXPixmapMESA GLXEW_GET_FUN(__glewXCreateGLXPixmapMESA)
+
+#define GLXEW_MESA_pixmap_colormap GLXEW_GET_VAR(__GLXEW_MESA_pixmap_colormap)
+
+#endif /* GLX_MESA_pixmap_colormap */
+
+/* ------------------------ GLX_MESA_release_buffers ----------------------- */
+
+#ifndef GLX_MESA_release_buffers
+#define GLX_MESA_release_buffers 1
+
+typedef Bool ( * PFNGLXRELEASEBUFFERSMESAPROC) (Display* dpy, GLXDrawable d);
+
+#define glXReleaseBuffersMESA GLXEW_GET_FUN(__glewXReleaseBuffersMESA)
+
+#define GLXEW_MESA_release_buffers GLXEW_GET_VAR(__GLXEW_MESA_release_buffers)
+
+#endif /* GLX_MESA_release_buffers */
+
+/* ------------------------- GLX_MESA_set_3dfx_mode ------------------------ */
+
+#ifndef GLX_MESA_set_3dfx_mode
+#define GLX_MESA_set_3dfx_mode 1
+
+#define GLX_3DFX_WINDOW_MODE_MESA 0x1
+#define GLX_3DFX_FULLSCREEN_MODE_MESA 0x2
+
+typedef GLboolean ( * PFNGLXSET3DFXMODEMESAPROC) (GLint mode);
+
+#define glXSet3DfxModeMESA GLXEW_GET_FUN(__glewXSet3DfxModeMESA)
+
+#define GLXEW_MESA_set_3dfx_mode GLXEW_GET_VAR(__GLXEW_MESA_set_3dfx_mode)
+
+#endif /* GLX_MESA_set_3dfx_mode */
+
+/* -------------------------- GLX_NV_float_buffer -------------------------- */
+
+#ifndef GLX_NV_float_buffer
+#define GLX_NV_float_buffer 1
+
+#define GLX_FLOAT_COMPONENTS_NV 0x20B0
+
+#define GLXEW_NV_float_buffer GLXEW_GET_VAR(__GLXEW_NV_float_buffer)
+
+#endif /* GLX_NV_float_buffer */
+
+/* -------------------------- GLX_NV_present_video ------------------------- */
+
+#ifndef GLX_NV_present_video
+#define GLX_NV_present_video 1
+
+#define GLX_NUM_VIDEO_SLOTS_NV 0x20F0
+
+typedef int ( * PFNGLXBINDVIDEODEVICENVPROC) (Display* dpy, unsigned int video_slot, unsigned int video_device, const int *attrib_list);
+typedef unsigned int* ( * PFNGLXENUMERATEVIDEODEVICESNVPROC) (Display *dpy, int screen, int *nelements);
+
+#define glXBindVideoDeviceNV GLXEW_GET_FUN(__glewXBindVideoDeviceNV)
+#define glXEnumerateVideoDevicesNV GLXEW_GET_FUN(__glewXEnumerateVideoDevicesNV)
+
+#define GLXEW_NV_present_video GLXEW_GET_VAR(__GLXEW_NV_present_video)
+
+#endif /* GLX_NV_present_video */
+
+/* --------------------------- GLX_NV_swap_group --------------------------- */
+
+#ifndef GLX_NV_swap_group
+#define GLX_NV_swap_group 1
+
+typedef Bool ( * PFNGLXBINDSWAPBARRIERNVPROC) (Display* dpy, GLuint group, GLuint barrier);
+typedef Bool ( * PFNGLXJOINSWAPGROUPNVPROC) (Display* dpy, GLXDrawable drawable, GLuint group);
+typedef Bool ( * PFNGLXQUERYFRAMECOUNTNVPROC) (Display* dpy, int screen, GLuint *count);
+typedef Bool ( * PFNGLXQUERYMAXSWAPGROUPSNVPROC) (Display* dpy, int screen, GLuint *maxGroups, GLuint *maxBarriers);
+typedef Bool ( * PFNGLXQUERYSWAPGROUPNVPROC) (Display* dpy, GLXDrawable drawable, GLuint *group, GLuint *barrier);
+typedef Bool ( * PFNGLXRESETFRAMECOUNTNVPROC) (Display* dpy, int screen);
+
+#define glXBindSwapBarrierNV GLXEW_GET_FUN(__glewXBindSwapBarrierNV)
+#define glXJoinSwapGroupNV GLXEW_GET_FUN(__glewXJoinSwapGroupNV)
+#define glXQueryFrameCountNV GLXEW_GET_FUN(__glewXQueryFrameCountNV)
+#define glXQueryMaxSwapGroupsNV GLXEW_GET_FUN(__glewXQueryMaxSwapGroupsNV)
+#define glXQuerySwapGroupNV GLXEW_GET_FUN(__glewXQuerySwapGroupNV)
+#define glXResetFrameCountNV GLXEW_GET_FUN(__glewXResetFrameCountNV)
+
+#define GLXEW_NV_swap_group GLXEW_GET_VAR(__GLXEW_NV_swap_group)
+
+#endif /* GLX_NV_swap_group */
+
+/* ----------------------- GLX_NV_vertex_array_range ----------------------- */
+
+#ifndef GLX_NV_vertex_array_range
+#define GLX_NV_vertex_array_range 1
+
+typedef void * ( * PFNGLXALLOCATEMEMORYNVPROC) (GLsizei size, GLfloat readFrequency, GLfloat writeFrequency, GLfloat priority);
+typedef void ( * PFNGLXFREEMEMORYNVPROC) (void *pointer);
+
+#define glXAllocateMemoryNV GLXEW_GET_FUN(__glewXAllocateMemoryNV)
+#define glXFreeMemoryNV GLXEW_GET_FUN(__glewXFreeMemoryNV)
+
+#define GLXEW_NV_vertex_array_range GLXEW_GET_VAR(__GLXEW_NV_vertex_array_range)
+
+#endif /* GLX_NV_vertex_array_range */
+
+/* -------------------------- GLX_NV_video_output -------------------------- */
+
+#ifndef GLX_NV_video_output
+#define GLX_NV_video_output 1
+
+#define GLX_VIDEO_OUT_COLOR_NV 0x20C3
+#define GLX_VIDEO_OUT_ALPHA_NV 0x20C4
+#define GLX_VIDEO_OUT_DEPTH_NV 0x20C5
+#define GLX_VIDEO_OUT_COLOR_AND_ALPHA_NV 0x20C6
+#define GLX_VIDEO_OUT_COLOR_AND_DEPTH_NV 0x20C7
+#define GLX_VIDEO_OUT_FRAME_NV 0x20C8
+#define GLX_VIDEO_OUT_FIELD_1_NV 0x20C9
+#define GLX_VIDEO_OUT_FIELD_2_NV 0x20CA
+#define GLX_VIDEO_OUT_STACKED_FIELDS_1_2_NV 0x20CB
+#define GLX_VIDEO_OUT_STACKED_FIELDS_2_1_NV 0x20CC
+
+typedef int ( * PFNGLXBINDVIDEOIMAGENVPROC) (Display* dpy, GLXVideoDeviceNV VideoDevice, GLXPbuffer pbuf, int iVideoBuffer);
+typedef int ( * PFNGLXGETVIDEODEVICENVPROC) (Display* dpy, int screen, int numVideoDevices, GLXVideoDeviceNV *pVideoDevice);
+typedef int ( * PFNGLXGETVIDEOINFONVPROC) (Display* dpy, int screen, GLXVideoDeviceNV VideoDevice, unsigned long *pulCounterOutputPbuffer, unsigned long *pulCounterOutputVideo);
+typedef int ( * PFNGLXRELEASEVIDEODEVICENVPROC) (Display* dpy, int screen, GLXVideoDeviceNV VideoDevice);
+typedef int ( * PFNGLXRELEASEVIDEOIMAGENVPROC) (Display* dpy, GLXPbuffer pbuf);
+typedef int ( * PFNGLXSENDPBUFFERTOVIDEONVPROC) (Display* dpy, GLXPbuffer pbuf, int iBufferType, unsigned long *pulCounterPbuffer, GLboolean bBlock);
+
+#define glXBindVideoImageNV GLXEW_GET_FUN(__glewXBindVideoImageNV)
+#define glXGetVideoDeviceNV GLXEW_GET_FUN(__glewXGetVideoDeviceNV)
+#define glXGetVideoInfoNV GLXEW_GET_FUN(__glewXGetVideoInfoNV)
+#define glXReleaseVideoDeviceNV GLXEW_GET_FUN(__glewXReleaseVideoDeviceNV)
+#define glXReleaseVideoImageNV GLXEW_GET_FUN(__glewXReleaseVideoImageNV)
+#define glXSendPbufferToVideoNV GLXEW_GET_FUN(__glewXSendPbufferToVideoNV)
+
+#define GLXEW_NV_video_output GLXEW_GET_VAR(__GLXEW_NV_video_output)
+
+#endif /* GLX_NV_video_output */
+
+/* -------------------------- GLX_OML_swap_method -------------------------- */
+
+#ifndef GLX_OML_swap_method
+#define GLX_OML_swap_method 1
+
+#define GLX_SWAP_METHOD_OML 0x8060
+#define GLX_SWAP_EXCHANGE_OML 0x8061
+#define GLX_SWAP_COPY_OML 0x8062
+#define GLX_SWAP_UNDEFINED_OML 0x8063
+
+#define GLXEW_OML_swap_method GLXEW_GET_VAR(__GLXEW_OML_swap_method)
+
+#endif /* GLX_OML_swap_method */
+
+/* -------------------------- GLX_OML_sync_control ------------------------- */
+
+#if !defined(GLX_OML_sync_control) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#include <inttypes.h>
+#define GLX_OML_sync_control 1
+
+typedef Bool ( * PFNGLXGETMSCRATEOMLPROC) (Display* dpy, GLXDrawable drawable, int32_t* numerator, int32_t* denominator);
+typedef Bool ( * PFNGLXGETSYNCVALUESOMLPROC) (Display* dpy, GLXDrawable drawable, int64_t* ust, int64_t* msc, int64_t* sbc);
+typedef int64_t ( * PFNGLXSWAPBUFFERSMSCOMLPROC) (Display* dpy, GLXDrawable drawable, int64_t target_msc, int64_t divisor, int64_t remainder);
+typedef Bool ( * PFNGLXWAITFORMSCOMLPROC) (Display* dpy, GLXDrawable drawable, int64_t target_msc, int64_t divisor, int64_t remainder, int64_t* ust, int64_t* msc, int64_t* sbc);
+typedef Bool ( * PFNGLXWAITFORSBCOMLPROC) (Display* dpy, GLXDrawable drawable, int64_t target_sbc, int64_t* ust, int64_t* msc, int64_t* sbc);
+
+#define glXGetMscRateOML GLXEW_GET_FUN(__glewXGetMscRateOML)
+#define glXGetSyncValuesOML GLXEW_GET_FUN(__glewXGetSyncValuesOML)
+#define glXSwapBuffersMscOML GLXEW_GET_FUN(__glewXSwapBuffersMscOML)
+#define glXWaitForMscOML GLXEW_GET_FUN(__glewXWaitForMscOML)
+#define glXWaitForSbcOML GLXEW_GET_FUN(__glewXWaitForSbcOML)
+
+#define GLXEW_OML_sync_control GLXEW_GET_VAR(__GLXEW_OML_sync_control)
+
+#endif /* GLX_OML_sync_control */
+
+/* ------------------------ GLX_SGIS_blended_overlay ----------------------- */
+
+#ifndef GLX_SGIS_blended_overlay
+#define GLX_SGIS_blended_overlay 1
+
+#define GLX_BLENDED_RGBA_SGIS 0x8025
+
+#define GLXEW_SGIS_blended_overlay GLXEW_GET_VAR(__GLXEW_SGIS_blended_overlay)
+
+#endif /* GLX_SGIS_blended_overlay */
+
+/* -------------------------- GLX_SGIS_color_range ------------------------- */
+
+#ifndef GLX_SGIS_color_range
+#define GLX_SGIS_color_range 1
+
+#define GLX_MIN_RED_SGIS 0
+#define GLX_MAX_GREEN_SGIS 0
+#define GLX_MIN_BLUE_SGIS 0
+#define GLX_MAX_ALPHA_SGIS 0
+#define GLX_MIN_GREEN_SGIS 0
+#define GLX_MIN_ALPHA_SGIS 0
+#define GLX_MAX_RED_SGIS 0
+#define GLX_EXTENDED_RANGE_SGIS 0
+#define GLX_MAX_BLUE_SGIS 0
+
+#define GLXEW_SGIS_color_range GLXEW_GET_VAR(__GLXEW_SGIS_color_range)
+
+#endif /* GLX_SGIS_color_range */
+
+/* -------------------------- GLX_SGIS_multisample ------------------------- */
+
+#ifndef GLX_SGIS_multisample
+#define GLX_SGIS_multisample 1
+
+#define GLX_SAMPLE_BUFFERS_SGIS 100000
+#define GLX_SAMPLES_SGIS 100001
+
+#define GLXEW_SGIS_multisample GLXEW_GET_VAR(__GLXEW_SGIS_multisample)
+
+#endif /* GLX_SGIS_multisample */
+
+/* ---------------------- GLX_SGIS_shared_multisample ---------------------- */
+
+#ifndef GLX_SGIS_shared_multisample
+#define GLX_SGIS_shared_multisample 1
+
+#define GLX_MULTISAMPLE_SUB_RECT_WIDTH_SGIS 0x8026
+#define GLX_MULTISAMPLE_SUB_RECT_HEIGHT_SGIS 0x8027
+
+#define GLXEW_SGIS_shared_multisample GLXEW_GET_VAR(__GLXEW_SGIS_shared_multisample)
+
+#endif /* GLX_SGIS_shared_multisample */
+
+/* --------------------------- GLX_SGIX_fbconfig --------------------------- */
+
+#ifndef GLX_SGIX_fbconfig
+#define GLX_SGIX_fbconfig 1
+
+#define GLX_WINDOW_BIT_SGIX 0x00000001
+#define GLX_RGBA_BIT_SGIX 0x00000001
+#define GLX_PIXMAP_BIT_SGIX 0x00000002
+#define GLX_COLOR_INDEX_BIT_SGIX 0x00000002
+#define GLX_SCREEN_EXT 0x800C
+#define GLX_DRAWABLE_TYPE_SGIX 0x8010
+#define GLX_RENDER_TYPE_SGIX 0x8011
+#define GLX_X_RENDERABLE_SGIX 0x8012
+#define GLX_FBCONFIG_ID_SGIX 0x8013
+#define GLX_RGBA_TYPE_SGIX 0x8014
+#define GLX_COLOR_INDEX_TYPE_SGIX 0x8015
+
+typedef XID GLXFBConfigIDSGIX;
+typedef struct __GLXFBConfigRec *GLXFBConfigSGIX;
+
+typedef GLXFBConfigSGIX* ( * PFNGLXCHOOSEFBCONFIGSGIXPROC) (Display *dpy, int screen, const int *attrib_list, int *nelements);
+typedef GLXContext ( * PFNGLXCREATECONTEXTWITHCONFIGSGIXPROC) (Display* dpy, GLXFBConfig config, int render_type, GLXContext share_list, Bool direct);
+typedef GLXPixmap ( * PFNGLXCREATEGLXPIXMAPWITHCONFIGSGIXPROC) (Display* dpy, GLXFBConfig config, Pixmap pixmap);
+typedef int ( * PFNGLXGETFBCONFIGATTRIBSGIXPROC) (Display* dpy, GLXFBConfigSGIX config, int attribute, int *value);
+typedef GLXFBConfigSGIX ( * PFNGLXGETFBCONFIGFROMVISUALSGIXPROC) (Display* dpy, XVisualInfo *vis);
+typedef XVisualInfo* ( * PFNGLXGETVISUALFROMFBCONFIGSGIXPROC) (Display *dpy, GLXFBConfig config);
+
+#define glXChooseFBConfigSGIX GLXEW_GET_FUN(__glewXChooseFBConfigSGIX)
+#define glXCreateContextWithConfigSGIX GLXEW_GET_FUN(__glewXCreateContextWithConfigSGIX)
+#define glXCreateGLXPixmapWithConfigSGIX GLXEW_GET_FUN(__glewXCreateGLXPixmapWithConfigSGIX)
+#define glXGetFBConfigAttribSGIX GLXEW_GET_FUN(__glewXGetFBConfigAttribSGIX)
+#define glXGetFBConfigFromVisualSGIX GLXEW_GET_FUN(__glewXGetFBConfigFromVisualSGIX)
+#define glXGetVisualFromFBConfigSGIX GLXEW_GET_FUN(__glewXGetVisualFromFBConfigSGIX)
+
+#define GLXEW_SGIX_fbconfig GLXEW_GET_VAR(__GLXEW_SGIX_fbconfig)
+
+#endif /* GLX_SGIX_fbconfig */
+
+/* --------------------------- GLX_SGIX_hyperpipe -------------------------- */
+
+#ifndef GLX_SGIX_hyperpipe
+#define GLX_SGIX_hyperpipe 1
+
+#define GLX_HYPERPIPE_DISPLAY_PIPE_SGIX 0x00000001
+#define GLX_PIPE_RECT_SGIX 0x00000001
+#define GLX_PIPE_RECT_LIMITS_SGIX 0x00000002
+#define GLX_HYPERPIPE_RENDER_PIPE_SGIX 0x00000002
+#define GLX_HYPERPIPE_STEREO_SGIX 0x00000003
+#define GLX_HYPERPIPE_PIXEL_AVERAGE_SGIX 0x00000004
+#define GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX 80
+#define GLX_BAD_HYPERPIPE_CONFIG_SGIX 91
+#define GLX_BAD_HYPERPIPE_SGIX 92
+#define GLX_HYPERPIPE_ID_SGIX 0x8030
+
+typedef struct {
+  char pipeName[GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX];
+  int  networkId;
+} GLXHyperpipeNetworkSGIX;
+typedef struct {
+  char pipeName[GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX];
+  int XOrigin;
+  int YOrigin;
+  int maxHeight;
+  int maxWidth;
+} GLXPipeRectLimits;
+typedef struct {
+  char pipeName[GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX];
+  int channel;
+  unsigned int participationType;
+  int timeSlice;
+} GLXHyperpipeConfigSGIX;
+typedef struct {
+  char pipeName[GLX_HYPERPIPE_PIPE_NAME_LENGTH_SGIX];
+  int srcXOrigin;
+  int srcYOrigin;
+  int srcWidth;
+  int srcHeight;
+  int destXOrigin;
+  int destYOrigin;
+  int destWidth;
+  int destHeight;
+} GLXPipeRect;
+
+typedef int ( * PFNGLXBINDHYPERPIPESGIXPROC) (Display *dpy, int hpId);
+typedef int ( * PFNGLXDESTROYHYPERPIPECONFIGSGIXPROC) (Display *dpy, int hpId);
+typedef int ( * PFNGLXHYPERPIPEATTRIBSGIXPROC) (Display *dpy, int timeSlice, int attrib, int size, void *attribList);
+typedef int ( * PFNGLXHYPERPIPECONFIGSGIXPROC) (Display *dpy, int networkId, int npipes, GLXHyperpipeConfigSGIX *cfg, int *hpId);
+typedef int ( * PFNGLXQUERYHYPERPIPEATTRIBSGIXPROC) (Display *dpy, int timeSlice, int attrib, int size, void *returnAttribList);
+typedef int ( * PFNGLXQUERYHYPERPIPEBESTATTRIBSGIXPROC) (Display *dpy, int timeSlice, int attrib, int size, void *attribList, void *returnAttribList);
+typedef GLXHyperpipeConfigSGIX * ( * PFNGLXQUERYHYPERPIPECONFIGSGIXPROC) (Display *dpy, int hpId, int *npipes);
+typedef GLXHyperpipeNetworkSGIX * ( * PFNGLXQUERYHYPERPIPENETWORKSGIXPROC) (Display *dpy, int *npipes);
+
+#define glXBindHyperpipeSGIX GLXEW_GET_FUN(__glewXBindHyperpipeSGIX)
+#define glXDestroyHyperpipeConfigSGIX GLXEW_GET_FUN(__glewXDestroyHyperpipeConfigSGIX)
+#define glXHyperpipeAttribSGIX GLXEW_GET_FUN(__glewXHyperpipeAttribSGIX)
+#define glXHyperpipeConfigSGIX GLXEW_GET_FUN(__glewXHyperpipeConfigSGIX)
+#define glXQueryHyperpipeAttribSGIX GLXEW_GET_FUN(__glewXQueryHyperpipeAttribSGIX)
+#define glXQueryHyperpipeBestAttribSGIX GLXEW_GET_FUN(__glewXQueryHyperpipeBestAttribSGIX)
+#define glXQueryHyperpipeConfigSGIX GLXEW_GET_FUN(__glewXQueryHyperpipeConfigSGIX)
+#define glXQueryHyperpipeNetworkSGIX GLXEW_GET_FUN(__glewXQueryHyperpipeNetworkSGIX)
+
+#define GLXEW_SGIX_hyperpipe GLXEW_GET_VAR(__GLXEW_SGIX_hyperpipe)
+
+#endif /* GLX_SGIX_hyperpipe */
+
+/* ---------------------------- GLX_SGIX_pbuffer --------------------------- */
+
+#ifndef GLX_SGIX_pbuffer
+#define GLX_SGIX_pbuffer 1
+
+#define GLX_FRONT_LEFT_BUFFER_BIT_SGIX 0x00000001
+#define GLX_FRONT_RIGHT_BUFFER_BIT_SGIX 0x00000002
+#define GLX_PBUFFER_BIT_SGIX 0x00000004
+#define GLX_BACK_LEFT_BUFFER_BIT_SGIX 0x00000004
+#define GLX_BACK_RIGHT_BUFFER_BIT_SGIX 0x00000008
+#define GLX_AUX_BUFFERS_BIT_SGIX 0x00000010
+#define GLX_DEPTH_BUFFER_BIT_SGIX 0x00000020
+#define GLX_STENCIL_BUFFER_BIT_SGIX 0x00000040
+#define GLX_ACCUM_BUFFER_BIT_SGIX 0x00000080
+#define GLX_SAMPLE_BUFFERS_BIT_SGIX 0x00000100
+#define GLX_MAX_PBUFFER_WIDTH_SGIX 0x8016
+#define GLX_MAX_PBUFFER_HEIGHT_SGIX 0x8017
+#define GLX_MAX_PBUFFER_PIXELS_SGIX 0x8018
+#define GLX_OPTIMAL_PBUFFER_WIDTH_SGIX 0x8019
+#define GLX_OPTIMAL_PBUFFER_HEIGHT_SGIX 0x801A
+#define GLX_PRESERVED_CONTENTS_SGIX 0x801B
+#define GLX_LARGEST_PBUFFER_SGIX 0x801C
+#define GLX_WIDTH_SGIX 0x801D
+#define GLX_HEIGHT_SGIX 0x801E
+#define GLX_EVENT_MASK_SGIX 0x801F
+#define GLX_DAMAGED_SGIX 0x8020
+#define GLX_SAVED_SGIX 0x8021
+#define GLX_WINDOW_SGIX 0x8022
+#define GLX_PBUFFER_SGIX 0x8023
+#define GLX_BUFFER_CLOBBER_MASK_SGIX 0x08000000
+
+typedef XID GLXPbufferSGIX;
+typedef struct { int type; unsigned long serial; Bool send_event; Display *display; GLXDrawable drawable; int event_type; int draw_type; unsigned int mask; int x, y; int width, height; int count; } GLXBufferClobberEventSGIX;
+
+typedef GLXPbuffer ( * PFNGLXCREATEGLXPBUFFERSGIXPROC) (Display* dpy, GLXFBConfig config, unsigned int width, unsigned int height, int *attrib_list);
+typedef void ( * PFNGLXDESTROYGLXPBUFFERSGIXPROC) (Display* dpy, GLXPbuffer pbuf);
+typedef void ( * PFNGLXGETSELECTEDEVENTSGIXPROC) (Display* dpy, GLXDrawable drawable, unsigned long *mask);
+typedef void ( * PFNGLXQUERYGLXPBUFFERSGIXPROC) (Display* dpy, GLXPbuffer pbuf, int attribute, unsigned int *value);
+typedef void ( * PFNGLXSELECTEVENTSGIXPROC) (Display* dpy, GLXDrawable drawable, unsigned long mask);
+
+#define glXCreateGLXPbufferSGIX GLXEW_GET_FUN(__glewXCreateGLXPbufferSGIX)
+#define glXDestroyGLXPbufferSGIX GLXEW_GET_FUN(__glewXDestroyGLXPbufferSGIX)
+#define glXGetSelectedEventSGIX GLXEW_GET_FUN(__glewXGetSelectedEventSGIX)
+#define glXQueryGLXPbufferSGIX GLXEW_GET_FUN(__glewXQueryGLXPbufferSGIX)
+#define glXSelectEventSGIX GLXEW_GET_FUN(__glewXSelectEventSGIX)
+
+#define GLXEW_SGIX_pbuffer GLXEW_GET_VAR(__GLXEW_SGIX_pbuffer)
+
+#endif /* GLX_SGIX_pbuffer */
+
+/* ------------------------- GLX_SGIX_swap_barrier ------------------------- */
+
+#ifndef GLX_SGIX_swap_barrier
+#define GLX_SGIX_swap_barrier 1
+
+typedef void ( * PFNGLXBINDSWAPBARRIERSGIXPROC) (Display *dpy, GLXDrawable drawable, int barrier);
+typedef Bool ( * PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC) (Display *dpy, int screen, int *max);
+
+#define glXBindSwapBarrierSGIX GLXEW_GET_FUN(__glewXBindSwapBarrierSGIX)
+#define glXQueryMaxSwapBarriersSGIX GLXEW_GET_FUN(__glewXQueryMaxSwapBarriersSGIX)
+
+#define GLXEW_SGIX_swap_barrier GLXEW_GET_VAR(__GLXEW_SGIX_swap_barrier)
+
+#endif /* GLX_SGIX_swap_barrier */
+
+/* -------------------------- GLX_SGIX_swap_group -------------------------- */
+
+#ifndef GLX_SGIX_swap_group
+#define GLX_SGIX_swap_group 1
+
+typedef void ( * PFNGLXJOINSWAPGROUPSGIXPROC) (Display *dpy, GLXDrawable drawable, GLXDrawable member);
+
+#define glXJoinSwapGroupSGIX GLXEW_GET_FUN(__glewXJoinSwapGroupSGIX)
+
+#define GLXEW_SGIX_swap_group GLXEW_GET_VAR(__GLXEW_SGIX_swap_group)
+
+#endif /* GLX_SGIX_swap_group */
+
+/* ------------------------- GLX_SGIX_video_resize ------------------------- */
+
+#ifndef GLX_SGIX_video_resize
+#define GLX_SGIX_video_resize 1
+
+#define GLX_SYNC_FRAME_SGIX 0x00000000
+#define GLX_SYNC_SWAP_SGIX 0x00000001
+
+typedef int ( * PFNGLXBINDCHANNELTOWINDOWSGIXPROC) (Display* display, int screen, int channel, Window window);
+typedef int ( * PFNGLXCHANNELRECTSGIXPROC) (Display* display, int screen, int channel, int x, int y, int w, int h);
+typedef int ( * PFNGLXCHANNELRECTSYNCSGIXPROC) (Display* display, int screen, int channel, GLenum synctype);
+typedef int ( * PFNGLXQUERYCHANNELDELTASSGIXPROC) (Display* display, int screen, int channel, int *x, int *y, int *w, int *h);
+typedef int ( * PFNGLXQUERYCHANNELRECTSGIXPROC) (Display* display, int screen, int channel, int *dx, int *dy, int *dw, int *dh);
+
+#define glXBindChannelToWindowSGIX GLXEW_GET_FUN(__glewXBindChannelToWindowSGIX)
+#define glXChannelRectSGIX GLXEW_GET_FUN(__glewXChannelRectSGIX)
+#define glXChannelRectSyncSGIX GLXEW_GET_FUN(__glewXChannelRectSyncSGIX)
+#define glXQueryChannelDeltasSGIX GLXEW_GET_FUN(__glewXQueryChannelDeltasSGIX)
+#define glXQueryChannelRectSGIX GLXEW_GET_FUN(__glewXQueryChannelRectSGIX)
+
+#define GLXEW_SGIX_video_resize GLXEW_GET_VAR(__GLXEW_SGIX_video_resize)
+
+#endif /* GLX_SGIX_video_resize */
+
+/* ---------------------- GLX_SGIX_visual_select_group --------------------- */
+
+#ifndef GLX_SGIX_visual_select_group
+#define GLX_SGIX_visual_select_group 1
+
+#define GLX_VISUAL_SELECT_GROUP_SGIX 0x8028
+
+#define GLXEW_SGIX_visual_select_group GLXEW_GET_VAR(__GLXEW_SGIX_visual_select_group)
+
+#endif /* GLX_SGIX_visual_select_group */
+
+/* ---------------------------- GLX_SGI_cushion ---------------------------- */
+
+#ifndef GLX_SGI_cushion
+#define GLX_SGI_cushion 1
+
+typedef void ( * PFNGLXCUSHIONSGIPROC) (Display* dpy, Window window, float cushion);
+
+#define glXCushionSGI GLXEW_GET_FUN(__glewXCushionSGI)
+
+#define GLXEW_SGI_cushion GLXEW_GET_VAR(__GLXEW_SGI_cushion)
+
+#endif /* GLX_SGI_cushion */
+
+/* ----------------------- GLX_SGI_make_current_read ----------------------- */
+
+#ifndef GLX_SGI_make_current_read
+#define GLX_SGI_make_current_read 1
+
+typedef GLXDrawable ( * PFNGLXGETCURRENTREADDRAWABLESGIPROC) (void);
+typedef Bool ( * PFNGLXMAKECURRENTREADSGIPROC) (Display* dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx);
+
+#define glXGetCurrentReadDrawableSGI GLXEW_GET_FUN(__glewXGetCurrentReadDrawableSGI)
+#define glXMakeCurrentReadSGI GLXEW_GET_FUN(__glewXMakeCurrentReadSGI)
+
+#define GLXEW_SGI_make_current_read GLXEW_GET_VAR(__GLXEW_SGI_make_current_read)
+
+#endif /* GLX_SGI_make_current_read */
+
+/* -------------------------- GLX_SGI_swap_control ------------------------- */
+
+#ifndef GLX_SGI_swap_control
+#define GLX_SGI_swap_control 1
+
+typedef int ( * PFNGLXSWAPINTERVALSGIPROC) (int interval);
+
+#define glXSwapIntervalSGI GLXEW_GET_FUN(__glewXSwapIntervalSGI)
+
+#define GLXEW_SGI_swap_control GLXEW_GET_VAR(__GLXEW_SGI_swap_control)
+
+#endif /* GLX_SGI_swap_control */
+
+/* --------------------------- GLX_SGI_video_sync -------------------------- */
+
+#ifndef GLX_SGI_video_sync
+#define GLX_SGI_video_sync 1
+
+typedef int ( * PFNGLXGETVIDEOSYNCSGIPROC) (uint* count);
+typedef int ( * PFNGLXWAITVIDEOSYNCSGIPROC) (int divisor, int remainder, unsigned int* count);
+
+#define glXGetVideoSyncSGI GLXEW_GET_FUN(__glewXGetVideoSyncSGI)
+#define glXWaitVideoSyncSGI GLXEW_GET_FUN(__glewXWaitVideoSyncSGI)
+
+#define GLXEW_SGI_video_sync GLXEW_GET_VAR(__GLXEW_SGI_video_sync)
+
+#endif /* GLX_SGI_video_sync */
+
+/* --------------------- GLX_SUN_get_transparent_index --------------------- */
+
+#ifndef GLX_SUN_get_transparent_index
+#define GLX_SUN_get_transparent_index 1
+
+typedef Status ( * PFNGLXGETTRANSPARENTINDEXSUNPROC) (Display* dpy, Window overlay, Window underlay, unsigned long *pTransparentIndex);
+
+#define glXGetTransparentIndexSUN GLXEW_GET_FUN(__glewXGetTransparentIndexSUN)
+
+#define GLXEW_SUN_get_transparent_index GLXEW_GET_VAR(__GLXEW_SUN_get_transparent_index)
+
+#endif /* GLX_SUN_get_transparent_index */
+
+/* -------------------------- GLX_SUN_video_resize ------------------------- */
+
+#ifndef GLX_SUN_video_resize
+#define GLX_SUN_video_resize 1
+
+#define GLX_VIDEO_RESIZE_SUN 0x8171
+#define GL_VIDEO_RESIZE_COMPENSATION_SUN 0x85CD
+
+typedef int ( * PFNGLXGETVIDEORESIZESUNPROC) (Display* display, GLXDrawable window, float* factor);
+typedef int ( * PFNGLXVIDEORESIZESUNPROC) (Display* display, GLXDrawable window, float factor);
+
+#define glXGetVideoResizeSUN GLXEW_GET_FUN(__glewXGetVideoResizeSUN)
+#define glXVideoResizeSUN GLXEW_GET_FUN(__glewXVideoResizeSUN)
+
+#define GLXEW_SUN_video_resize GLXEW_GET_VAR(__GLXEW_SUN_video_resize)
+
+#endif /* GLX_SUN_video_resize */
+
+/* ------------------------------------------------------------------------- */
+
+#ifdef GLEW_MX
+#define GLXEW_EXPORT
+#else
+#define GLXEW_EXPORT extern
+#endif /* GLEW_MX */
+
+extern PFNGLXGETCURRENTDISPLAYPROC __glewXGetCurrentDisplay;
+
+extern PFNGLXCHOOSEFBCONFIGPROC __glewXChooseFBConfig;
+extern PFNGLXCREATENEWCONTEXTPROC __glewXCreateNewContext;
+extern PFNGLXCREATEPBUFFERPROC __glewXCreatePbuffer;
+extern PFNGLXCREATEPIXMAPPROC __glewXCreatePixmap;
+extern PFNGLXCREATEWINDOWPROC __glewXCreateWindow;
+extern PFNGLXDESTROYPBUFFERPROC __glewXDestroyPbuffer;
+extern PFNGLXDESTROYPIXMAPPROC __glewXDestroyPixmap;
+extern PFNGLXDESTROYWINDOWPROC __glewXDestroyWindow;
+extern PFNGLXGETCURRENTREADDRAWABLEPROC __glewXGetCurrentReadDrawable;
+extern PFNGLXGETFBCONFIGATTRIBPROC __glewXGetFBConfigAttrib;
+extern PFNGLXGETFBCONFIGSPROC __glewXGetFBConfigs;
+extern PFNGLXGETSELECTEDEVENTPROC __glewXGetSelectedEvent;
+extern PFNGLXGETVISUALFROMFBCONFIGPROC __glewXGetVisualFromFBConfig;
+extern PFNGLXMAKECONTEXTCURRENTPROC __glewXMakeContextCurrent;
+extern PFNGLXQUERYCONTEXTPROC __glewXQueryContext;
+extern PFNGLXQUERYDRAWABLEPROC __glewXQueryDrawable;
+extern PFNGLXSELECTEVENTPROC __glewXSelectEvent;
+
+extern PFNGLXCREATECONTEXTATTRIBSARBPROC __glewXCreateContextAttribsARB;
+
+extern PFNGLXBINDTEXIMAGEATIPROC __glewXBindTexImageATI;
+extern PFNGLXDRAWABLEATTRIBATIPROC __glewXDrawableAttribATI;
+extern PFNGLXRELEASETEXIMAGEATIPROC __glewXReleaseTexImageATI;
+
+extern PFNGLXFREECONTEXTEXTPROC __glewXFreeContextEXT;
+extern PFNGLXGETCONTEXTIDEXTPROC __glewXGetContextIDEXT;
+extern PFNGLXIMPORTCONTEXTEXTPROC __glewXImportContextEXT;
+extern PFNGLXQUERYCONTEXTINFOEXTPROC __glewXQueryContextInfoEXT;
+
+extern PFNGLXBINDTEXIMAGEEXTPROC __glewXBindTexImageEXT;
+extern PFNGLXRELEASETEXIMAGEEXTPROC __glewXReleaseTexImageEXT;
+
+extern PFNGLXGETAGPOFFSETMESAPROC __glewXGetAGPOffsetMESA;
+
+extern PFNGLXCOPYSUBBUFFERMESAPROC __glewXCopySubBufferMESA;
+
+extern PFNGLXCREATEGLXPIXMAPMESAPROC __glewXCreateGLXPixmapMESA;
+
+extern PFNGLXRELEASEBUFFERSMESAPROC __glewXReleaseBuffersMESA;
+
+extern PFNGLXSET3DFXMODEMESAPROC __glewXSet3DfxModeMESA;
+
+extern PFNGLXBINDVIDEODEVICENVPROC __glewXBindVideoDeviceNV;
+extern PFNGLXENUMERATEVIDEODEVICESNVPROC __glewXEnumerateVideoDevicesNV;
+
+extern PFNGLXBINDSWAPBARRIERNVPROC __glewXBindSwapBarrierNV;
+extern PFNGLXJOINSWAPGROUPNVPROC __glewXJoinSwapGroupNV;
+extern PFNGLXQUERYFRAMECOUNTNVPROC __glewXQueryFrameCountNV;
+extern PFNGLXQUERYMAXSWAPGROUPSNVPROC __glewXQueryMaxSwapGroupsNV;
+extern PFNGLXQUERYSWAPGROUPNVPROC __glewXQuerySwapGroupNV;
+extern PFNGLXRESETFRAMECOUNTNVPROC __glewXResetFrameCountNV;
+
+extern PFNGLXALLOCATEMEMORYNVPROC __glewXAllocateMemoryNV;
+extern PFNGLXFREEMEMORYNVPROC __glewXFreeMemoryNV;
+
+extern PFNGLXBINDVIDEOIMAGENVPROC __glewXBindVideoImageNV;
+extern PFNGLXGETVIDEODEVICENVPROC __glewXGetVideoDeviceNV;
+extern PFNGLXGETVIDEOINFONVPROC __glewXGetVideoInfoNV;
+extern PFNGLXRELEASEVIDEODEVICENVPROC __glewXReleaseVideoDeviceNV;
+extern PFNGLXRELEASEVIDEOIMAGENVPROC __glewXReleaseVideoImageNV;
+extern PFNGLXSENDPBUFFERTOVIDEONVPROC __glewXSendPbufferToVideoNV;
+
+#ifdef GLX_OML_sync_control
+extern PFNGLXGETMSCRATEOMLPROC __glewXGetMscRateOML;
+extern PFNGLXGETSYNCVALUESOMLPROC __glewXGetSyncValuesOML;
+extern PFNGLXSWAPBUFFERSMSCOMLPROC __glewXSwapBuffersMscOML;
+extern PFNGLXWAITFORMSCOMLPROC __glewXWaitForMscOML;
+extern PFNGLXWAITFORSBCOMLPROC __glewXWaitForSbcOML;
+#endif
+
+extern PFNGLXCHOOSEFBCONFIGSGIXPROC __glewXChooseFBConfigSGIX;
+extern PFNGLXCREATECONTEXTWITHCONFIGSGIXPROC __glewXCreateContextWithConfigSGIX;
+extern PFNGLXCREATEGLXPIXMAPWITHCONFIGSGIXPROC __glewXCreateGLXPixmapWithConfigSGIX;
+extern PFNGLXGETFBCONFIGATTRIBSGIXPROC __glewXGetFBConfigAttribSGIX;
+extern PFNGLXGETFBCONFIGFROMVISUALSGIXPROC __glewXGetFBConfigFromVisualSGIX;
+extern PFNGLXGETVISUALFROMFBCONFIGSGIXPROC __glewXGetVisualFromFBConfigSGIX;
+
+extern PFNGLXBINDHYPERPIPESGIXPROC __glewXBindHyperpipeSGIX;
+extern PFNGLXDESTROYHYPERPIPECONFIGSGIXPROC __glewXDestroyHyperpipeConfigSGIX;
+extern PFNGLXHYPERPIPEATTRIBSGIXPROC __glewXHyperpipeAttribSGIX;
+extern PFNGLXHYPERPIPECONFIGSGIXPROC __glewXHyperpipeConfigSGIX;
+extern PFNGLXQUERYHYPERPIPEATTRIBSGIXPROC __glewXQueryHyperpipeAttribSGIX;
+extern PFNGLXQUERYHYPERPIPEBESTATTRIBSGIXPROC __glewXQueryHyperpipeBestAttribSGIX;
+extern PFNGLXQUERYHYPERPIPECONFIGSGIXPROC __glewXQueryHyperpipeConfigSGIX;
+extern PFNGLXQUERYHYPERPIPENETWORKSGIXPROC __glewXQueryHyperpipeNetworkSGIX;
+
+extern PFNGLXCREATEGLXPBUFFERSGIXPROC __glewXCreateGLXPbufferSGIX;
+extern PFNGLXDESTROYGLXPBUFFERSGIXPROC __glewXDestroyGLXPbufferSGIX;
+extern PFNGLXGETSELECTEDEVENTSGIXPROC __glewXGetSelectedEventSGIX;
+extern PFNGLXQUERYGLXPBUFFERSGIXPROC __glewXQueryGLXPbufferSGIX;
+extern PFNGLXSELECTEVENTSGIXPROC __glewXSelectEventSGIX;
+
+extern PFNGLXBINDSWAPBARRIERSGIXPROC __glewXBindSwapBarrierSGIX;
+extern PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC __glewXQueryMaxSwapBarriersSGIX;
+
+extern PFNGLXJOINSWAPGROUPSGIXPROC __glewXJoinSwapGroupSGIX;
+
+extern PFNGLXBINDCHANNELTOWINDOWSGIXPROC __glewXBindChannelToWindowSGIX;
+extern PFNGLXCHANNELRECTSGIXPROC __glewXChannelRectSGIX;
+extern PFNGLXCHANNELRECTSYNCSGIXPROC __glewXChannelRectSyncSGIX;
+extern PFNGLXQUERYCHANNELDELTASSGIXPROC __glewXQueryChannelDeltasSGIX;
+extern PFNGLXQUERYCHANNELRECTSGIXPROC __glewXQueryChannelRectSGIX;
+
+extern PFNGLXCUSHIONSGIPROC __glewXCushionSGI;
+
+extern PFNGLXGETCURRENTREADDRAWABLESGIPROC __glewXGetCurrentReadDrawableSGI;
+extern PFNGLXMAKECURRENTREADSGIPROC __glewXMakeCurrentReadSGI;
+
+extern PFNGLXSWAPINTERVALSGIPROC __glewXSwapIntervalSGI;
+
+extern PFNGLXGETVIDEOSYNCSGIPROC __glewXGetVideoSyncSGI;
+extern PFNGLXWAITVIDEOSYNCSGIPROC __glewXWaitVideoSyncSGI;
+
+extern PFNGLXGETTRANSPARENTINDEXSUNPROC __glewXGetTransparentIndexSUN;
+
+extern PFNGLXGETVIDEORESIZESUNPROC __glewXGetVideoResizeSUN;
+extern PFNGLXVIDEORESIZESUNPROC __glewXVideoResizeSUN;
+
+#if defined(GLEW_MX)
+struct GLXEWContextStruct
+{
+#endif /* GLEW_MX */
+
+GLXEW_EXPORT GLboolean __GLXEW_VERSION_1_0;
+GLXEW_EXPORT GLboolean __GLXEW_VERSION_1_1;
+GLXEW_EXPORT GLboolean __GLXEW_VERSION_1_2;
+GLXEW_EXPORT GLboolean __GLXEW_VERSION_1_3;
+GLXEW_EXPORT GLboolean __GLXEW_VERSION_1_4;
+GLXEW_EXPORT GLboolean __GLXEW_3DFX_multisample;
+GLXEW_EXPORT GLboolean __GLXEW_ARB_create_context;
+GLXEW_EXPORT GLboolean __GLXEW_ARB_fbconfig_float;
+GLXEW_EXPORT GLboolean __GLXEW_ARB_framebuffer_sRGB;
+GLXEW_EXPORT GLboolean __GLXEW_ARB_get_proc_address;
+GLXEW_EXPORT GLboolean __GLXEW_ARB_multisample;
+GLXEW_EXPORT GLboolean __GLXEW_ATI_pixel_format_float;
+GLXEW_EXPORT GLboolean __GLXEW_ATI_render_texture;
+GLXEW_EXPORT GLboolean __GLXEW_EXT_fbconfig_packed_float;
+GLXEW_EXPORT GLboolean __GLXEW_EXT_framebuffer_sRGB;
+GLXEW_EXPORT GLboolean __GLXEW_EXT_import_context;
+GLXEW_EXPORT GLboolean __GLXEW_EXT_scene_marker;
+GLXEW_EXPORT GLboolean __GLXEW_EXT_texture_from_pixmap;
+GLXEW_EXPORT GLboolean __GLXEW_EXT_visual_info;
+GLXEW_EXPORT GLboolean __GLXEW_EXT_visual_rating;
+GLXEW_EXPORT GLboolean __GLXEW_MESA_agp_offset;
+GLXEW_EXPORT GLboolean __GLXEW_MESA_copy_sub_buffer;
+GLXEW_EXPORT GLboolean __GLXEW_MESA_pixmap_colormap;
+GLXEW_EXPORT GLboolean __GLXEW_MESA_release_buffers;
+GLXEW_EXPORT GLboolean __GLXEW_MESA_set_3dfx_mode;
+GLXEW_EXPORT GLboolean __GLXEW_NV_float_buffer;
+GLXEW_EXPORT GLboolean __GLXEW_NV_present_video;
+GLXEW_EXPORT GLboolean __GLXEW_NV_swap_group;
+GLXEW_EXPORT GLboolean __GLXEW_NV_vertex_array_range;
+GLXEW_EXPORT GLboolean __GLXEW_NV_video_output;
+GLXEW_EXPORT GLboolean __GLXEW_OML_swap_method;
+GLXEW_EXPORT GLboolean __GLXEW_OML_sync_control;
+GLXEW_EXPORT GLboolean __GLXEW_SGIS_blended_overlay;
+GLXEW_EXPORT GLboolean __GLXEW_SGIS_color_range;
+GLXEW_EXPORT GLboolean __GLXEW_SGIS_multisample;
+GLXEW_EXPORT GLboolean __GLXEW_SGIS_shared_multisample;
+GLXEW_EXPORT GLboolean __GLXEW_SGIX_fbconfig;
+GLXEW_EXPORT GLboolean __GLXEW_SGIX_hyperpipe;
+GLXEW_EXPORT GLboolean __GLXEW_SGIX_pbuffer;
+GLXEW_EXPORT GLboolean __GLXEW_SGIX_swap_barrier;
+GLXEW_EXPORT GLboolean __GLXEW_SGIX_swap_group;
+GLXEW_EXPORT GLboolean __GLXEW_SGIX_video_resize;
+GLXEW_EXPORT GLboolean __GLXEW_SGIX_visual_select_group;
+GLXEW_EXPORT GLboolean __GLXEW_SGI_cushion;
+GLXEW_EXPORT GLboolean __GLXEW_SGI_make_current_read;
+GLXEW_EXPORT GLboolean __GLXEW_SGI_swap_control;
+GLXEW_EXPORT GLboolean __GLXEW_SGI_video_sync;
+GLXEW_EXPORT GLboolean __GLXEW_SUN_get_transparent_index;
+GLXEW_EXPORT GLboolean __GLXEW_SUN_video_resize;
+
+#ifdef GLEW_MX
+}; /* GLXEWContextStruct */
+#endif /* GLEW_MX */
+
+/* ------------------------------------------------------------------------ */
+
+#ifdef GLEW_MX
+
+typedef struct GLXEWContextStruct GLXEWContext;
+extern GLenum glxewContextInit (GLXEWContext* ctx);
+extern GLboolean glxewContextIsSupported (GLXEWContext* ctx, const char* name);
+
+#define glxewInit() glxewContextInit(glxewGetContext())
+#define glxewIsSupported(x) glxewContextIsSupported(glxewGetContext(), x)
+
+#define GLXEW_GET_VAR(x) (*(const GLboolean*)&(glxewGetContext()->x))
+#define GLXEW_GET_FUN(x) x
+
+#else /* GLEW_MX */
+
+#define GLXEW_GET_VAR(x) (*(const GLboolean*)&x)
+#define GLXEW_GET_FUN(x) x
+
+extern GLboolean glxewIsSupported (const char* name);
+
+#endif /* GLEW_MX */
+
+extern GLboolean glxewGetExtension (const char* name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __glxew_h__ */
diff --git a/test_conformance/gl/GLEW/GL/wglew.h b/test_conformance/gl/GLEW/GL/wglew.h
new file mode 100644
index 00000000..7f802761
--- /dev/null
+++ b/test_conformance/gl/GLEW/GL/wglew.h
@@ -0,0 +1,1165 @@
+/*
+** The OpenGL Extension Wrangler Library
+** Copyright (C) 2002-2008, Milan Ikits <milan ikits[]ieee org>
+** Copyright (C) 2002-2008, Marcelo E. Magallon <mmagallo[]debian org>
+** Copyright (C) 2002, Lev Povalahev
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions are met:
+**
+** * Redistributions of source code must retain the above copyright notice,
+**   this list of conditions and the following disclaimer.
+** * Redistributions in binary form must reproduce the above copyright notice,
+**   this list of conditions and the following disclaimer in the documentation
+**   and/or other materials provided with the distribution.
+** * The name of the author may be used to endorse or promote products
+**   derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+** THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+** Copyright (c) 2007 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#ifndef __wglew_h__
+#define __wglew_h__
+#define __WGLEW_H__
+
+#ifdef __wglext_h_
+#error wglext.h included before wglew.h
+#endif
+
+#define __wglext_h_
+
+#if !defined(APIENTRY) && !defined(__CYGWIN__)
+#  ifndef WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN 1
+#  endif
+#include <windows.h>
+#endif
+
+/*
+ * GLEW_STATIC needs to be set when using the static version.
+ * GLEW_BUILD is set when building the DLL version.
+ */
+#ifdef GLEW_STATIC
+#  define GLEWAPI extern
+#else
+#  ifdef GLEW_BUILD
+#    define GLEWAPI extern __declspec(dllexport)
+#  else
+#    define GLEWAPI extern __declspec(dllimport)
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* -------------------------- WGL_3DFX_multisample ------------------------- */
+
+#ifndef WGL_3DFX_multisample
+#define WGL_3DFX_multisample 1
+
+#define WGL_SAMPLE_BUFFERS_3DFX 0x2060
+#define WGL_SAMPLES_3DFX 0x2061
+
+#define WGLEW_3DFX_multisample WGLEW_GET_VAR(__WGLEW_3DFX_multisample)
+
+#endif /* WGL_3DFX_multisample */
+
+/* ------------------------- WGL_3DL_stereo_control ------------------------ */
+
+#ifndef WGL_3DL_stereo_control
+#define WGL_3DL_stereo_control 1
+
+#define WGL_STEREO_EMITTER_ENABLE_3DL 0x2055
+#define WGL_STEREO_EMITTER_DISABLE_3DL 0x2056
+#define WGL_STEREO_POLARITY_NORMAL_3DL 0x2057
+#define WGL_STEREO_POLARITY_INVERT_3DL 0x2058
+
+typedef BOOL (WINAPI * PFNWGLSETSTEREOEMITTERSTATE3DLPROC) (HDC hDC, UINT uState);
+
+#define wglSetStereoEmitterState3DL WGLEW_GET_FUN(__wglewSetStereoEmitterState3DL)
+
+#define WGLEW_3DL_stereo_control WGLEW_GET_VAR(__WGLEW_3DL_stereo_control)
+
+#endif /* WGL_3DL_stereo_control */
+
+/* ------------------------- WGL_ARB_buffer_region ------------------------- */
+
+#ifndef WGL_ARB_buffer_region
+#define WGL_ARB_buffer_region 1
+
+#define WGL_FRONT_COLOR_BUFFER_BIT_ARB 0x00000001
+#define WGL_BACK_COLOR_BUFFER_BIT_ARB 0x00000002
+#define WGL_DEPTH_BUFFER_BIT_ARB 0x00000004
+#define WGL_STENCIL_BUFFER_BIT_ARB 0x00000008
+
+typedef HANDLE (WINAPI * PFNWGLCREATEBUFFERREGIONARBPROC) (HDC hDC, int iLayerPlane, UINT uType);
+typedef VOID (WINAPI * PFNWGLDELETEBUFFERREGIONARBPROC) (HANDLE hRegion);
+typedef BOOL (WINAPI * PFNWGLRESTOREBUFFERREGIONARBPROC) (HANDLE hRegion, int x, int y, int width, int height, int xSrc, int ySrc);
+typedef BOOL (WINAPI * PFNWGLSAVEBUFFERREGIONARBPROC) (HANDLE hRegion, int x, int y, int width, int height);
+
+#define wglCreateBufferRegionARB WGLEW_GET_FUN(__wglewCreateBufferRegionARB)
+#define wglDeleteBufferRegionARB WGLEW_GET_FUN(__wglewDeleteBufferRegionARB)
+#define wglRestoreBufferRegionARB WGLEW_GET_FUN(__wglewRestoreBufferRegionARB)
+#define wglSaveBufferRegionARB WGLEW_GET_FUN(__wglewSaveBufferRegionARB)
+
+#define WGLEW_ARB_buffer_region WGLEW_GET_VAR(__WGLEW_ARB_buffer_region)
+
+#endif /* WGL_ARB_buffer_region */
+
+/* ------------------------- WGL_ARB_create_context ------------------------ */
+
+#ifndef WGL_ARB_create_context
+#define WGL_ARB_create_context 1
+
+#define WGL_CONTEXT_DEBUG_BIT_ARB 0x0001
+#define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002
+#define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091
+#define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092
+#define WGL_CONTEXT_LAYER_PLANE_ARB 0x2093
+#define WGL_CONTEXT_FLAGS_ARB 0x2094
+
+typedef HGLRC (WINAPI * PFNWGLCREATECONTEXTATTRIBSARBPROC) (HDC hDC, HGLRC hShareContext, const int* attribList);
+
+#define wglCreateContextAttribsARB WGLEW_GET_FUN(__wglewCreateContextAttribsARB)
+
+#define WGLEW_ARB_create_context WGLEW_GET_VAR(__WGLEW_ARB_create_context)
+
+#endif /* WGL_ARB_create_context */
+
+/* ----------------------- WGL_ARB_extensions_string ----------------------- */
+
+#ifndef WGL_ARB_extensions_string
+#define WGL_ARB_extensions_string 1
+
+typedef const char* (WINAPI * PFNWGLGETEXTENSIONSSTRINGARBPROC) (HDC hdc);
+
+#define wglGetExtensionsStringARB WGLEW_GET_FUN(__wglewGetExtensionsStringARB)
+
+#define WGLEW_ARB_extensions_string WGLEW_GET_VAR(__WGLEW_ARB_extensions_string)
+
+#endif /* WGL_ARB_extensions_string */
+
+/* ------------------------ WGL_ARB_framebuffer_sRGB ----------------------- */
+
+#ifndef WGL_ARB_framebuffer_sRGB
+#define WGL_ARB_framebuffer_sRGB 1
+
+#define WGL_FRAMEBUFFER_SRGB_CAPABLE_ARB 0x20A9
+
+#define WGLEW_ARB_framebuffer_sRGB WGLEW_GET_VAR(__WGLEW_ARB_framebuffer_sRGB)
+
+#endif /* WGL_ARB_framebuffer_sRGB */
+
+/* ----------------------- WGL_ARB_make_current_read ----------------------- */
+
+#ifndef WGL_ARB_make_current_read
+#define WGL_ARB_make_current_read 1
+
+#define ERROR_INVALID_PIXEL_TYPE_ARB 0x2043
+#define ERROR_INCOMPATIBLE_DEVICE_CONTEXTS_ARB 0x2054
+
+typedef HDC (WINAPI * PFNWGLGETCURRENTREADDCARBPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLMAKECONTEXTCURRENTARBPROC) (HDC hDrawDC, HDC hReadDC, HGLRC hglrc);
+
+#define wglGetCurrentReadDCARB WGLEW_GET_FUN(__wglewGetCurrentReadDCARB)
+#define wglMakeContextCurrentARB WGLEW_GET_FUN(__wglewMakeContextCurrentARB)
+
+#define WGLEW_ARB_make_current_read WGLEW_GET_VAR(__WGLEW_ARB_make_current_read)
+
+#endif /* WGL_ARB_make_current_read */
+
+/* -------------------------- WGL_ARB_multisample -------------------------- */
+
+#ifndef WGL_ARB_multisample
+#define WGL_ARB_multisample 1
+
+#define WGL_SAMPLE_BUFFERS_ARB 0x2041
+#define WGL_SAMPLES_ARB 0x2042
+
+#define WGLEW_ARB_multisample WGLEW_GET_VAR(__WGLEW_ARB_multisample)
+
+#endif /* WGL_ARB_multisample */
+
+/* ---------------------------- WGL_ARB_pbuffer ---------------------------- */
+
+#ifndef WGL_ARB_pbuffer
+#define WGL_ARB_pbuffer 1
+
+#define WGL_DRAW_TO_PBUFFER_ARB 0x202D
+#define WGL_MAX_PBUFFER_PIXELS_ARB 0x202E
+#define WGL_MAX_PBUFFER_WIDTH_ARB 0x202F
+#define WGL_MAX_PBUFFER_HEIGHT_ARB 0x2030
+#define WGL_PBUFFER_LARGEST_ARB 0x2033
+#define WGL_PBUFFER_WIDTH_ARB 0x2034
+#define WGL_PBUFFER_HEIGHT_ARB 0x2035
+#define WGL_PBUFFER_LOST_ARB 0x2036
+
+DECLARE_HANDLE(HPBUFFERARB);
+
+typedef HPBUFFERARB (WINAPI * PFNWGLCREATEPBUFFERARBPROC) (HDC hDC, int iPixelFormat, int iWidth, int iHeight, const int* piAttribList);
+typedef BOOL (WINAPI * PFNWGLDESTROYPBUFFERARBPROC) (HPBUFFERARB hPbuffer);
+typedef HDC (WINAPI * PFNWGLGETPBUFFERDCARBPROC) (HPBUFFERARB hPbuffer);
+typedef BOOL (WINAPI * PFNWGLQUERYPBUFFERARBPROC) (HPBUFFERARB hPbuffer, int iAttribute, int* piValue);
+typedef int (WINAPI * PFNWGLRELEASEPBUFFERDCARBPROC) (HPBUFFERARB hPbuffer, HDC hDC);
+
+#define wglCreatePbufferARB WGLEW_GET_FUN(__wglewCreatePbufferARB)
+#define wglDestroyPbufferARB WGLEW_GET_FUN(__wglewDestroyPbufferARB)
+#define wglGetPbufferDCARB WGLEW_GET_FUN(__wglewGetPbufferDCARB)
+#define wglQueryPbufferARB WGLEW_GET_FUN(__wglewQueryPbufferARB)
+#define wglReleasePbufferDCARB WGLEW_GET_FUN(__wglewReleasePbufferDCARB)
+
+#define WGLEW_ARB_pbuffer WGLEW_GET_VAR(__WGLEW_ARB_pbuffer)
+
+#endif /* WGL_ARB_pbuffer */
+
+/* -------------------------- WGL_ARB_pixel_format ------------------------- */
+
+#ifndef WGL_ARB_pixel_format
+#define WGL_ARB_pixel_format 1
+
+#define WGL_NUMBER_PIXEL_FORMATS_ARB 0x2000
+#define WGL_DRAW_TO_WINDOW_ARB 0x2001
+#define WGL_DRAW_TO_BITMAP_ARB 0x2002
+#define WGL_ACCELERATION_ARB 0x2003
+#define WGL_NEED_PALETTE_ARB 0x2004
+#define WGL_NEED_SYSTEM_PALETTE_ARB 0x2005
+#define WGL_SWAP_LAYER_BUFFERS_ARB 0x2006
+#define WGL_SWAP_METHOD_ARB 0x2007
+#define WGL_NUMBER_OVERLAYS_ARB 0x2008
+#define WGL_NUMBER_UNDERLAYS_ARB 0x2009
+#define WGL_TRANSPARENT_ARB 0x200A
+#define WGL_SHARE_DEPTH_ARB 0x200C
+#define WGL_SHARE_STENCIL_ARB 0x200D
+#define WGL_SHARE_ACCUM_ARB 0x200E
+#define WGL_SUPPORT_GDI_ARB 0x200F
+#define WGL_SUPPORT_OPENGL_ARB 0x2010
+#define WGL_DOUBLE_BUFFER_ARB 0x2011
+#define WGL_STEREO_ARB 0x2012
+#define WGL_PIXEL_TYPE_ARB 0x2013
+#define WGL_COLOR_BITS_ARB 0x2014
+#define WGL_RED_BITS_ARB 0x2015
+#define WGL_RED_SHIFT_ARB 0x2016
+#define WGL_GREEN_BITS_ARB 0x2017
+#define WGL_GREEN_SHIFT_ARB 0x2018
+#define WGL_BLUE_BITS_ARB 0x2019
+#define WGL_BLUE_SHIFT_ARB 0x201A
+#define WGL_ALPHA_BITS_ARB 0x201B
+#define WGL_ALPHA_SHIFT_ARB 0x201C
+#define WGL_ACCUM_BITS_ARB 0x201D
+#define WGL_ACCUM_RED_BITS_ARB 0x201E
+#define WGL_ACCUM_GREEN_BITS_ARB 0x201F
+#define WGL_ACCUM_BLUE_BITS_ARB 0x2020
+#define WGL_ACCUM_ALPHA_BITS_ARB 0x2021
+#define WGL_DEPTH_BITS_ARB 0x2022
+#define WGL_STENCIL_BITS_ARB 0x2023
+#define WGL_AUX_BUFFERS_ARB 0x2024
+#define WGL_NO_ACCELERATION_ARB 0x2025
+#define WGL_GENERIC_ACCELERATION_ARB 0x2026
+#define WGL_FULL_ACCELERATION_ARB 0x2027
+#define WGL_SWAP_EXCHANGE_ARB 0x2028
+#define WGL_SWAP_COPY_ARB 0x2029
+#define WGL_SWAP_UNDEFINED_ARB 0x202A
+#define WGL_TYPE_RGBA_ARB 0x202B
+#define WGL_TYPE_COLORINDEX_ARB 0x202C
+#define WGL_TRANSPARENT_RED_VALUE_ARB 0x2037
+#define WGL_TRANSPARENT_GREEN_VALUE_ARB 0x2038
+#define WGL_TRANSPARENT_BLUE_VALUE_ARB 0x2039
+#define WGL_TRANSPARENT_ALPHA_VALUE_ARB 0x203A
+#define WGL_TRANSPARENT_INDEX_VALUE_ARB 0x203B
+
+typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATARBPROC) (HDC hdc, const int* piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, int *piFormats, UINT *nNumFormats);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBFVARBPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, const int* piAttributes, FLOAT *pfValues);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBIVARBPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, const int* piAttributes, int *piValues);
+
+#define wglChoosePixelFormatARB WGLEW_GET_FUN(__wglewChoosePixelFormatARB)
+#define wglGetPixelFormatAttribfvARB WGLEW_GET_FUN(__wglewGetPixelFormatAttribfvARB)
+#define wglGetPixelFormatAttribivARB WGLEW_GET_FUN(__wglewGetPixelFormatAttribivARB)
+
+#define WGLEW_ARB_pixel_format WGLEW_GET_VAR(__WGLEW_ARB_pixel_format)
+
+#endif /* WGL_ARB_pixel_format */
+
+/* ----------------------- WGL_ARB_pixel_format_float ---------------------- */
+
+#ifndef WGL_ARB_pixel_format_float
+#define WGL_ARB_pixel_format_float 1
+
+#define WGL_TYPE_RGBA_FLOAT_ARB 0x21A0
+
+#define WGLEW_ARB_pixel_format_float WGLEW_GET_VAR(__WGLEW_ARB_pixel_format_float)
+
+#endif /* WGL_ARB_pixel_format_float */
+
+/* ------------------------- WGL_ARB_render_texture ------------------------ */
+
+#ifndef WGL_ARB_render_texture
+#define WGL_ARB_render_texture 1
+
+#define WGL_BIND_TO_TEXTURE_RGB_ARB 0x2070
+#define WGL_BIND_TO_TEXTURE_RGBA_ARB 0x2071
+#define WGL_TEXTURE_FORMAT_ARB 0x2072
+#define WGL_TEXTURE_TARGET_ARB 0x2073
+#define WGL_MIPMAP_TEXTURE_ARB 0x2074
+#define WGL_TEXTURE_RGB_ARB 0x2075
+#define WGL_TEXTURE_RGBA_ARB 0x2076
+#define WGL_NO_TEXTURE_ARB 0x2077
+#define WGL_TEXTURE_CUBE_MAP_ARB 0x2078
+#define WGL_TEXTURE_1D_ARB 0x2079
+#define WGL_TEXTURE_2D_ARB 0x207A
+#define WGL_MIPMAP_LEVEL_ARB 0x207B
+#define WGL_CUBE_MAP_FACE_ARB 0x207C
+#define WGL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB 0x207D
+#define WGL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB 0x207E
+#define WGL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB 0x207F
+#define WGL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB 0x2080
+#define WGL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB 0x2081
+#define WGL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB 0x2082
+#define WGL_FRONT_LEFT_ARB 0x2083
+#define WGL_FRONT_RIGHT_ARB 0x2084
+#define WGL_BACK_LEFT_ARB 0x2085
+#define WGL_BACK_RIGHT_ARB 0x2086
+#define WGL_AUX0_ARB 0x2087
+#define WGL_AUX1_ARB 0x2088
+#define WGL_AUX2_ARB 0x2089
+#define WGL_AUX3_ARB 0x208A
+#define WGL_AUX4_ARB 0x208B
+#define WGL_AUX5_ARB 0x208C
+#define WGL_AUX6_ARB 0x208D
+#define WGL_AUX7_ARB 0x208E
+#define WGL_AUX8_ARB 0x208F
+#define WGL_AUX9_ARB 0x2090
+
+typedef BOOL (WINAPI * PFNWGLBINDTEXIMAGEARBPROC) (HPBUFFERARB hPbuffer, int iBuffer);
+typedef BOOL (WINAPI * PFNWGLRELEASETEXIMAGEARBPROC) (HPBUFFERARB hPbuffer, int iBuffer);
+typedef BOOL (WINAPI * PFNWGLSETPBUFFERATTRIBARBPROC) (HPBUFFERARB hPbuffer, const int* piAttribList);
+
+#define wglBindTexImageARB WGLEW_GET_FUN(__wglewBindTexImageARB)
+#define wglReleaseTexImageARB WGLEW_GET_FUN(__wglewReleaseTexImageARB)
+#define wglSetPbufferAttribARB WGLEW_GET_FUN(__wglewSetPbufferAttribARB)
+
+#define WGLEW_ARB_render_texture WGLEW_GET_VAR(__WGLEW_ARB_render_texture)
+
+#endif /* WGL_ARB_render_texture */
+
+/* ----------------------- WGL_ATI_pixel_format_float ---------------------- */
+
+#ifndef WGL_ATI_pixel_format_float
+#define WGL_ATI_pixel_format_float 1
+
+#define WGL_TYPE_RGBA_FLOAT_ATI 0x21A0
+#define GL_RGBA_FLOAT_MODE_ATI 0x8820
+#define GL_COLOR_CLEAR_UNCLAMPED_VALUE_ATI 0x8835
+
+#define WGLEW_ATI_pixel_format_float WGLEW_GET_VAR(__WGLEW_ATI_pixel_format_float)
+
+#endif /* WGL_ATI_pixel_format_float */
+
+/* -------------------- WGL_ATI_render_texture_rectangle ------------------- */
+
+#ifndef WGL_ATI_render_texture_rectangle
+#define WGL_ATI_render_texture_rectangle 1
+
+#define WGL_TEXTURE_RECTANGLE_ATI 0x21A5
+
+#define WGLEW_ATI_render_texture_rectangle WGLEW_GET_VAR(__WGLEW_ATI_render_texture_rectangle)
+
+#endif /* WGL_ATI_render_texture_rectangle */
+
+/* -------------------------- WGL_EXT_depth_float -------------------------- */
+
+#ifndef WGL_EXT_depth_float
+#define WGL_EXT_depth_float 1
+
+#define WGL_DEPTH_FLOAT_EXT 0x2040
+
+#define WGLEW_EXT_depth_float WGLEW_GET_VAR(__WGLEW_EXT_depth_float)
+
+#endif /* WGL_EXT_depth_float */
+
+/* ---------------------- WGL_EXT_display_color_table ---------------------- */
+
+#ifndef WGL_EXT_display_color_table
+#define WGL_EXT_display_color_table 1
+
+typedef GLboolean (WINAPI * PFNWGLBINDDISPLAYCOLORTABLEEXTPROC) (GLushort id);
+typedef GLboolean (WINAPI * PFNWGLCREATEDISPLAYCOLORTABLEEXTPROC) (GLushort id);
+typedef void (WINAPI * PFNWGLDESTROYDISPLAYCOLORTABLEEXTPROC) (GLushort id);
+typedef GLboolean (WINAPI * PFNWGLLOADDISPLAYCOLORTABLEEXTPROC) (GLushort* table, GLuint length);
+
+#define wglBindDisplayColorTableEXT WGLEW_GET_FUN(__wglewBindDisplayColorTableEXT)
+#define wglCreateDisplayColorTableEXT WGLEW_GET_FUN(__wglewCreateDisplayColorTableEXT)
+#define wglDestroyDisplayColorTableEXT WGLEW_GET_FUN(__wglewDestroyDisplayColorTableEXT)
+#define wglLoadDisplayColorTableEXT WGLEW_GET_FUN(__wglewLoadDisplayColorTableEXT)
+
+#define WGLEW_EXT_display_color_table WGLEW_GET_VAR(__WGLEW_EXT_display_color_table)
+
+#endif /* WGL_EXT_display_color_table */
+
+/* ----------------------- WGL_EXT_extensions_string ----------------------- */
+
+#ifndef WGL_EXT_extensions_string
+#define WGL_EXT_extensions_string 1
+
+typedef const char* (WINAPI * PFNWGLGETEXTENSIONSSTRINGEXTPROC) (void);
+
+#define wglGetExtensionsStringEXT WGLEW_GET_FUN(__wglewGetExtensionsStringEXT)
+
+#define WGLEW_EXT_extensions_string WGLEW_GET_VAR(__WGLEW_EXT_extensions_string)
+
+#endif /* WGL_EXT_extensions_string */
+
+/* ------------------------ WGL_EXT_framebuffer_sRGB ----------------------- */
+
+#ifndef WGL_EXT_framebuffer_sRGB
+#define WGL_EXT_framebuffer_sRGB 1
+
+#define WGL_FRAMEBUFFER_SRGB_CAPABLE_EXT 0x20A9
+
+#define WGLEW_EXT_framebuffer_sRGB WGLEW_GET_VAR(__WGLEW_EXT_framebuffer_sRGB)
+
+#endif /* WGL_EXT_framebuffer_sRGB */
+
+/* ----------------------- WGL_EXT_make_current_read ----------------------- */
+
+#ifndef WGL_EXT_make_current_read
+#define WGL_EXT_make_current_read 1
+
+#define ERROR_INVALID_PIXEL_TYPE_EXT 0x2043
+
+typedef HDC (WINAPI * PFNWGLGETCURRENTREADDCEXTPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLMAKECONTEXTCURRENTEXTPROC) (HDC hDrawDC, HDC hReadDC, HGLRC hglrc);
+
+#define wglGetCurrentReadDCEXT WGLEW_GET_FUN(__wglewGetCurrentReadDCEXT)
+#define wglMakeContextCurrentEXT WGLEW_GET_FUN(__wglewMakeContextCurrentEXT)
+
+#define WGLEW_EXT_make_current_read WGLEW_GET_VAR(__WGLEW_EXT_make_current_read)
+
+#endif /* WGL_EXT_make_current_read */
+
+/* -------------------------- WGL_EXT_multisample -------------------------- */
+
+#ifndef WGL_EXT_multisample
+#define WGL_EXT_multisample 1
+
+#define WGL_SAMPLE_BUFFERS_EXT 0x2041
+#define WGL_SAMPLES_EXT 0x2042
+
+#define WGLEW_EXT_multisample WGLEW_GET_VAR(__WGLEW_EXT_multisample)
+
+#endif /* WGL_EXT_multisample */
+
+/* ---------------------------- WGL_EXT_pbuffer ---------------------------- */
+
+#ifndef WGL_EXT_pbuffer
+#define WGL_EXT_pbuffer 1
+
+#define WGL_DRAW_TO_PBUFFER_EXT 0x202D
+#define WGL_MAX_PBUFFER_PIXELS_EXT 0x202E
+#define WGL_MAX_PBUFFER_WIDTH_EXT 0x202F
+#define WGL_MAX_PBUFFER_HEIGHT_EXT 0x2030
+#define WGL_OPTIMAL_PBUFFER_WIDTH_EXT 0x2031
+#define WGL_OPTIMAL_PBUFFER_HEIGHT_EXT 0x2032
+#define WGL_PBUFFER_LARGEST_EXT 0x2033
+#define WGL_PBUFFER_WIDTH_EXT 0x2034
+#define WGL_PBUFFER_HEIGHT_EXT 0x2035
+
+DECLARE_HANDLE(HPBUFFEREXT);
+
+typedef HPBUFFEREXT (WINAPI * PFNWGLCREATEPBUFFEREXTPROC) (HDC hDC, int iPixelFormat, int iWidth, int iHeight, const int* piAttribList);
+typedef BOOL (WINAPI * PFNWGLDESTROYPBUFFEREXTPROC) (HPBUFFEREXT hPbuffer);
+typedef HDC (WINAPI * PFNWGLGETPBUFFERDCEXTPROC) (HPBUFFEREXT hPbuffer);
+typedef BOOL (WINAPI * PFNWGLQUERYPBUFFEREXTPROC) (HPBUFFEREXT hPbuffer, int iAttribute, int* piValue);
+typedef int (WINAPI * PFNWGLRELEASEPBUFFERDCEXTPROC) (HPBUFFEREXT hPbuffer, HDC hDC);
+
+#define wglCreatePbufferEXT WGLEW_GET_FUN(__wglewCreatePbufferEXT)
+#define wglDestroyPbufferEXT WGLEW_GET_FUN(__wglewDestroyPbufferEXT)
+#define wglGetPbufferDCEXT WGLEW_GET_FUN(__wglewGetPbufferDCEXT)
+#define wglQueryPbufferEXT WGLEW_GET_FUN(__wglewQueryPbufferEXT)
+#define wglReleasePbufferDCEXT WGLEW_GET_FUN(__wglewReleasePbufferDCEXT)
+
+#define WGLEW_EXT_pbuffer WGLEW_GET_VAR(__WGLEW_EXT_pbuffer)
+
+#endif /* WGL_EXT_pbuffer */
+
+/* -------------------------- WGL_EXT_pixel_format ------------------------- */
+
+#ifndef WGL_EXT_pixel_format
+#define WGL_EXT_pixel_format 1
+
+#define WGL_NUMBER_PIXEL_FORMATS_EXT 0x2000
+#define WGL_DRAW_TO_WINDOW_EXT 0x2001
+#define WGL_DRAW_TO_BITMAP_EXT 0x2002
+#define WGL_ACCELERATION_EXT 0x2003
+#define WGL_NEED_PALETTE_EXT 0x2004
+#define WGL_NEED_SYSTEM_PALETTE_EXT 0x2005
+#define WGL_SWAP_LAYER_BUFFERS_EXT 0x2006
+#define WGL_SWAP_METHOD_EXT 0x2007
+#define WGL_NUMBER_OVERLAYS_EXT 0x2008
+#define WGL_NUMBER_UNDERLAYS_EXT 0x2009
+#define WGL_TRANSPARENT_EXT 0x200A
+#define WGL_TRANSPARENT_VALUE_EXT 0x200B
+#define WGL_SHARE_DEPTH_EXT 0x200C
+#define WGL_SHARE_STENCIL_EXT 0x200D
+#define WGL_SHARE_ACCUM_EXT 0x200E
+#define WGL_SUPPORT_GDI_EXT 0x200F
+#define WGL_SUPPORT_OPENGL_EXT 0x2010
+#define WGL_DOUBLE_BUFFER_EXT 0x2011
+#define WGL_STEREO_EXT 0x2012
+#define WGL_PIXEL_TYPE_EXT 0x2013
+#define WGL_COLOR_BITS_EXT 0x2014
+#define WGL_RED_BITS_EXT 0x2015
+#define WGL_RED_SHIFT_EXT 0x2016
+#define WGL_GREEN_BITS_EXT 0x2017
+#define WGL_GREEN_SHIFT_EXT 0x2018
+#define WGL_BLUE_BITS_EXT 0x2019
+#define WGL_BLUE_SHIFT_EXT 0x201A
+#define WGL_ALPHA_BITS_EXT 0x201B
+#define WGL_ALPHA_SHIFT_EXT 0x201C
+#define WGL_ACCUM_BITS_EXT 0x201D
+#define WGL_ACCUM_RED_BITS_EXT 0x201E
+#define WGL_ACCUM_GREEN_BITS_EXT 0x201F
+#define WGL_ACCUM_BLUE_BITS_EXT 0x2020
+#define WGL_ACCUM_ALPHA_BITS_EXT 0x2021
+#define WGL_DEPTH_BITS_EXT 0x2022
+#define WGL_STENCIL_BITS_EXT 0x2023
+#define WGL_AUX_BUFFERS_EXT 0x2024
+#define WGL_NO_ACCELERATION_EXT 0x2025
+#define WGL_GENERIC_ACCELERATION_EXT 0x2026
+#define WGL_FULL_ACCELERATION_EXT 0x2027
+#define WGL_SWAP_EXCHANGE_EXT 0x2028
+#define WGL_SWAP_COPY_EXT 0x2029
+#define WGL_SWAP_UNDEFINED_EXT 0x202A
+#define WGL_TYPE_RGBA_EXT 0x202B
+#define WGL_TYPE_COLORINDEX_EXT 0x202C
+
+typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATEXTPROC) (HDC hdc, const int* piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, int *piFormats, UINT *nNumFormats);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBFVEXTPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, int* piAttributes, FLOAT *pfValues);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBIVEXTPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, int* piAttributes, int *piValues);
+
+#define wglChoosePixelFormatEXT WGLEW_GET_FUN(__wglewChoosePixelFormatEXT)
+#define wglGetPixelFormatAttribfvEXT WGLEW_GET_FUN(__wglewGetPixelFormatAttribfvEXT)
+#define wglGetPixelFormatAttribivEXT WGLEW_GET_FUN(__wglewGetPixelFormatAttribivEXT)
+
+#define WGLEW_EXT_pixel_format WGLEW_GET_VAR(__WGLEW_EXT_pixel_format)
+
+#endif /* WGL_EXT_pixel_format */
+
+/* ------------------- WGL_EXT_pixel_format_packed_float ------------------- */
+
+#ifndef WGL_EXT_pixel_format_packed_float
+#define WGL_EXT_pixel_format_packed_float 1
+
+#define WGL_TYPE_RGBA_UNSIGNED_FLOAT_EXT 0x20A8
+
+#define WGLEW_EXT_pixel_format_packed_float WGLEW_GET_VAR(__WGLEW_EXT_pixel_format_packed_float)
+
+#endif /* WGL_EXT_pixel_format_packed_float */
+
+/* -------------------------- WGL_EXT_swap_control ------------------------- */
+
+#ifndef WGL_EXT_swap_control
+#define WGL_EXT_swap_control 1
+
+typedef int (WINAPI * PFNWGLGETSWAPINTERVALEXTPROC) (void);
+typedef BOOL (WINAPI * PFNWGLSWAPINTERVALEXTPROC) (int interval);
+
+#define wglGetSwapIntervalEXT WGLEW_GET_FUN(__wglewGetSwapIntervalEXT)
+#define wglSwapIntervalEXT WGLEW_GET_FUN(__wglewSwapIntervalEXT)
+
+#define WGLEW_EXT_swap_control WGLEW_GET_VAR(__WGLEW_EXT_swap_control)
+
+#endif /* WGL_EXT_swap_control */
+
+/* --------------------- WGL_I3D_digital_video_control --------------------- */
+
+#ifndef WGL_I3D_digital_video_control
+#define WGL_I3D_digital_video_control 1
+
+#define WGL_DIGITAL_VIDEO_CURSOR_ALPHA_FRAMEBUFFER_I3D 0x2050
+#define WGL_DIGITAL_VIDEO_CURSOR_ALPHA_VALUE_I3D 0x2051
+#define WGL_DIGITAL_VIDEO_CURSOR_INCLUDED_I3D 0x2052
+#define WGL_DIGITAL_VIDEO_GAMMA_CORRECTED_I3D 0x2053
+
+typedef BOOL (WINAPI * PFNWGLGETDIGITALVIDEOPARAMETERSI3DPROC) (HDC hDC, int iAttribute, int* piValue);
+typedef BOOL (WINAPI * PFNWGLSETDIGITALVIDEOPARAMETERSI3DPROC) (HDC hDC, int iAttribute, const int* piValue);
+
+#define wglGetDigitalVideoParametersI3D WGLEW_GET_FUN(__wglewGetDigitalVideoParametersI3D)
+#define wglSetDigitalVideoParametersI3D WGLEW_GET_FUN(__wglewSetDigitalVideoParametersI3D)
+
+#define WGLEW_I3D_digital_video_control WGLEW_GET_VAR(__WGLEW_I3D_digital_video_control)
+
+#endif /* WGL_I3D_digital_video_control */
+
+/* ----------------------------- WGL_I3D_gamma ----------------------------- */
+
+#ifndef WGL_I3D_gamma
+#define WGL_I3D_gamma 1
+
+#define WGL_GAMMA_TABLE_SIZE_I3D 0x204E
+#define WGL_GAMMA_EXCLUDE_DESKTOP_I3D 0x204F
+
+typedef BOOL (WINAPI * PFNWGLGETGAMMATABLEI3DPROC) (HDC hDC, int iEntries, USHORT* puRed, USHORT *puGreen, USHORT *puBlue);
+typedef BOOL (WINAPI * PFNWGLGETGAMMATABLEPARAMETERSI3DPROC) (HDC hDC, int iAttribute, int* piValue);
+typedef BOOL (WINAPI * PFNWGLSETGAMMATABLEI3DPROC) (HDC hDC, int iEntries, const USHORT* puRed, const USHORT *puGreen, const USHORT *puBlue);
+typedef BOOL (WINAPI * PFNWGLSETGAMMATABLEPARAMETERSI3DPROC) (HDC hDC, int iAttribute, const int* piValue);
+
+#define wglGetGammaTableI3D WGLEW_GET_FUN(__wglewGetGammaTableI3D)
+#define wglGetGammaTableParametersI3D WGLEW_GET_FUN(__wglewGetGammaTableParametersI3D)
+#define wglSetGammaTableI3D WGLEW_GET_FUN(__wglewSetGammaTableI3D)
+#define wglSetGammaTableParametersI3D WGLEW_GET_FUN(__wglewSetGammaTableParametersI3D)
+
+#define WGLEW_I3D_gamma WGLEW_GET_VAR(__WGLEW_I3D_gamma)
+
+#endif /* WGL_I3D_gamma */
+
+/* ---------------------------- WGL_I3D_genlock ---------------------------- */
+
+#ifndef WGL_I3D_genlock
+#define WGL_I3D_genlock 1
+
+#define WGL_GENLOCK_SOURCE_MULTIVIEW_I3D 0x2044
+#define WGL_GENLOCK_SOURCE_EXTERNAL_SYNC_I3D 0x2045
+#define WGL_GENLOCK_SOURCE_EXTERNAL_FIELD_I3D 0x2046
+#define WGL_GENLOCK_SOURCE_EXTERNAL_TTL_I3D 0x2047
+#define WGL_GENLOCK_SOURCE_DIGITAL_SYNC_I3D 0x2048
+#define WGL_GENLOCK_SOURCE_DIGITAL_FIELD_I3D 0x2049
+#define WGL_GENLOCK_SOURCE_EDGE_FALLING_I3D 0x204A
+#define WGL_GENLOCK_SOURCE_EDGE_RISING_I3D 0x204B
+#define WGL_GENLOCK_SOURCE_EDGE_BOTH_I3D 0x204C
+
+typedef BOOL (WINAPI * PFNWGLDISABLEGENLOCKI3DPROC) (HDC hDC);
+typedef BOOL (WINAPI * PFNWGLENABLEGENLOCKI3DPROC) (HDC hDC);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSAMPLERATEI3DPROC) (HDC hDC, UINT uRate);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSOURCEDELAYI3DPROC) (HDC hDC, UINT uDelay);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSOURCEEDGEI3DPROC) (HDC hDC, UINT uEdge);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSOURCEI3DPROC) (HDC hDC, UINT uSource);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSAMPLERATEI3DPROC) (HDC hDC, UINT* uRate);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSOURCEDELAYI3DPROC) (HDC hDC, UINT* uDelay);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSOURCEEDGEI3DPROC) (HDC hDC, UINT* uEdge);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSOURCEI3DPROC) (HDC hDC, UINT* uSource);
+typedef BOOL (WINAPI * PFNWGLISENABLEDGENLOCKI3DPROC) (HDC hDC, BOOL* pFlag);
+typedef BOOL (WINAPI * PFNWGLQUERYGENLOCKMAXSOURCEDELAYI3DPROC) (HDC hDC, UINT* uMaxLineDelay, UINT *uMaxPixelDelay);
+
+#define wglDisableGenlockI3D WGLEW_GET_FUN(__wglewDisableGenlockI3D)
+#define wglEnableGenlockI3D WGLEW_GET_FUN(__wglewEnableGenlockI3D)
+#define wglGenlockSampleRateI3D WGLEW_GET_FUN(__wglewGenlockSampleRateI3D)
+#define wglGenlockSourceDelayI3D WGLEW_GET_FUN(__wglewGenlockSourceDelayI3D)
+#define wglGenlockSourceEdgeI3D WGLEW_GET_FUN(__wglewGenlockSourceEdgeI3D)
+#define wglGenlockSourceI3D WGLEW_GET_FUN(__wglewGenlockSourceI3D)
+#define wglGetGenlockSampleRateI3D WGLEW_GET_FUN(__wglewGetGenlockSampleRateI3D)
+#define wglGetGenlockSourceDelayI3D WGLEW_GET_FUN(__wglewGetGenlockSourceDelayI3D)
+#define wglGetGenlockSourceEdgeI3D WGLEW_GET_FUN(__wglewGetGenlockSourceEdgeI3D)
+#define wglGetGenlockSourceI3D WGLEW_GET_FUN(__wglewGetGenlockSourceI3D)
+#define wglIsEnabledGenlockI3D WGLEW_GET_FUN(__wglewIsEnabledGenlockI3D)
+#define wglQueryGenlockMaxSourceDelayI3D WGLEW_GET_FUN(__wglewQueryGenlockMaxSourceDelayI3D)
+
+#define WGLEW_I3D_genlock WGLEW_GET_VAR(__WGLEW_I3D_genlock)
+
+#endif /* WGL_I3D_genlock */
+
+/* -------------------------- WGL_I3D_image_buffer ------------------------- */
+
+#ifndef WGL_I3D_image_buffer
+#define WGL_I3D_image_buffer 1
+
+#define WGL_IMAGE_BUFFER_MIN_ACCESS_I3D 0x00000001
+#define WGL_IMAGE_BUFFER_LOCK_I3D 0x00000002
+
+typedef BOOL (WINAPI * PFNWGLASSOCIATEIMAGEBUFFEREVENTSI3DPROC) (HDC hdc, HANDLE* pEvent, LPVOID *pAddress, DWORD *pSize, UINT count);
+typedef LPVOID (WINAPI * PFNWGLCREATEIMAGEBUFFERI3DPROC) (HDC hDC, DWORD dwSize, UINT uFlags);
+typedef BOOL (WINAPI * PFNWGLDESTROYIMAGEBUFFERI3DPROC) (HDC hDC, LPVOID pAddress);
+typedef BOOL (WINAPI * PFNWGLRELEASEIMAGEBUFFEREVENTSI3DPROC) (HDC hdc, LPVOID* pAddress, UINT count);
+
+#define wglAssociateImageBufferEventsI3D WGLEW_GET_FUN(__wglewAssociateImageBufferEventsI3D)
+#define wglCreateImageBufferI3D WGLEW_GET_FUN(__wglewCreateImageBufferI3D)
+#define wglDestroyImageBufferI3D WGLEW_GET_FUN(__wglewDestroyImageBufferI3D)
+#define wglReleaseImageBufferEventsI3D WGLEW_GET_FUN(__wglewReleaseImageBufferEventsI3D)
+
+#define WGLEW_I3D_image_buffer WGLEW_GET_VAR(__WGLEW_I3D_image_buffer)
+
+#endif /* WGL_I3D_image_buffer */
+
+/* ------------------------ WGL_I3D_swap_frame_lock ------------------------ */
+
+#ifndef WGL_I3D_swap_frame_lock
+#define WGL_I3D_swap_frame_lock 1
+
+typedef BOOL (WINAPI * PFNWGLDISABLEFRAMELOCKI3DPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLENABLEFRAMELOCKI3DPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLISENABLEDFRAMELOCKI3DPROC) (BOOL* pFlag);
+typedef BOOL (WINAPI * PFNWGLQUERYFRAMELOCKMASTERI3DPROC) (BOOL* pFlag);
+
+#define wglDisableFrameLockI3D WGLEW_GET_FUN(__wglewDisableFrameLockI3D)
+#define wglEnableFrameLockI3D WGLEW_GET_FUN(__wglewEnableFrameLockI3D)
+#define wglIsEnabledFrameLockI3D WGLEW_GET_FUN(__wglewIsEnabledFrameLockI3D)
+#define wglQueryFrameLockMasterI3D WGLEW_GET_FUN(__wglewQueryFrameLockMasterI3D)
+
+#define WGLEW_I3D_swap_frame_lock WGLEW_GET_VAR(__WGLEW_I3D_swap_frame_lock)
+
+#endif /* WGL_I3D_swap_frame_lock */
+
+/* ------------------------ WGL_I3D_swap_frame_usage ----------------------- */
+
+#ifndef WGL_I3D_swap_frame_usage
+#define WGL_I3D_swap_frame_usage 1
+
+typedef BOOL (WINAPI * PFNWGLBEGINFRAMETRACKINGI3DPROC) (void);
+typedef BOOL (WINAPI * PFNWGLENDFRAMETRACKINGI3DPROC) (void);
+typedef BOOL (WINAPI * PFNWGLGETFRAMEUSAGEI3DPROC) (float* pUsage);
+typedef BOOL (WINAPI * PFNWGLQUERYFRAMETRACKINGI3DPROC) (DWORD* pFrameCount, DWORD *pMissedFrames, float *pLastMissedUsage);
+
+#define wglBeginFrameTrackingI3D WGLEW_GET_FUN(__wglewBeginFrameTrackingI3D)
+#define wglEndFrameTrackingI3D WGLEW_GET_FUN(__wglewEndFrameTrackingI3D)
+#define wglGetFrameUsageI3D WGLEW_GET_FUN(__wglewGetFrameUsageI3D)
+#define wglQueryFrameTrackingI3D WGLEW_GET_FUN(__wglewQueryFrameTrackingI3D)
+
+#define WGLEW_I3D_swap_frame_usage WGLEW_GET_VAR(__WGLEW_I3D_swap_frame_usage)
+
+#endif /* WGL_I3D_swap_frame_usage */
+
+/* -------------------------- WGL_NV_float_buffer -------------------------- */
+
+#ifndef WGL_NV_float_buffer
+#define WGL_NV_float_buffer 1
+
+#define WGL_FLOAT_COMPONENTS_NV 0x20B0
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_R_NV 0x20B1
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_RG_NV 0x20B2
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_RGB_NV 0x20B3
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_RGBA_NV 0x20B4
+#define WGL_TEXTURE_FLOAT_R_NV 0x20B5
+#define WGL_TEXTURE_FLOAT_RG_NV 0x20B6
+#define WGL_TEXTURE_FLOAT_RGB_NV 0x20B7
+#define WGL_TEXTURE_FLOAT_RGBA_NV 0x20B8
+
+#define WGLEW_NV_float_buffer WGLEW_GET_VAR(__WGLEW_NV_float_buffer)
+
+#endif /* WGL_NV_float_buffer */
+
+/* -------------------------- WGL_NV_gpu_affinity -------------------------- */
+
+#ifndef WGL_NV_gpu_affinity
+#define WGL_NV_gpu_affinity 1
+
+#define WGL_ERROR_INCOMPATIBLE_AFFINITY_MASKS_NV 0x20D0
+#define WGL_ERROR_MISSING_AFFINITY_MASK_NV 0x20D1
+
+DECLARE_HANDLE(HGPUNV);
+typedef struct _GPU_DEVICE {
+  DWORD cb;
+  CHAR DeviceName[32];
+  CHAR DeviceString[128];
+  DWORD Flags;
+  RECT rcVirtualScreen;
+} GPU_DEVICE, *PGPU_DEVICE;
+
+typedef HDC (WINAPI * PFNWGLCREATEAFFINITYDCNVPROC) (const HGPUNV *phGpuList);
+typedef BOOL (WINAPI * PFNWGLDELETEDCNVPROC) (HDC hdc);
+typedef BOOL (WINAPI * PFNWGLENUMGPUDEVICESNVPROC) (HGPUNV hGpu, UINT iDeviceIndex, PGPU_DEVICE lpGpuDevice);
+typedef BOOL (WINAPI * PFNWGLENUMGPUSFROMAFFINITYDCNVPROC) (HDC hAffinityDC, UINT iGpuIndex, HGPUNV *hGpu);
+typedef BOOL (WINAPI * PFNWGLENUMGPUSNVPROC) (UINT iGpuIndex, HGPUNV *phGpu);
+
+#define wglCreateAffinityDCNV WGLEW_GET_FUN(__wglewCreateAffinityDCNV)
+#define wglDeleteDCNV WGLEW_GET_FUN(__wglewDeleteDCNV)
+#define wglEnumGpuDevicesNV WGLEW_GET_FUN(__wglewEnumGpuDevicesNV)
+#define wglEnumGpusFromAffinityDCNV WGLEW_GET_FUN(__wglewEnumGpusFromAffinityDCNV)
+#define wglEnumGpusNV WGLEW_GET_FUN(__wglewEnumGpusNV)
+
+#define WGLEW_NV_gpu_affinity WGLEW_GET_VAR(__WGLEW_NV_gpu_affinity)
+
+#endif /* WGL_NV_gpu_affinity */
+
+/* -------------------------- WGL_NV_present_video ------------------------- */
+
+#ifndef WGL_NV_present_video
+#define WGL_NV_present_video 1
+
+#define WGL_NUM_VIDEO_SLOTS_NV 0x20F0
+
+DECLARE_HANDLE(HVIDEOOUTPUTDEVICENV);
+
+typedef BOOL (WINAPI * PFNWGLBINDVIDEODEVICENVPROC) (HDC hDc, unsigned int uVideoSlot, HVIDEOOUTPUTDEVICENV hVideoDevice, const int* piAttribList);
+typedef int (WINAPI * PFNWGLENUMERATEVIDEODEVICESNVPROC) (HDC hDc, HVIDEOOUTPUTDEVICENV* phDeviceList);
+typedef BOOL (WINAPI * PFNWGLQUERYCURRENTCONTEXTNVPROC) (int iAttribute, int* piValue);
+
+#define wglBindVideoDeviceNV WGLEW_GET_FUN(__wglewBindVideoDeviceNV)
+#define wglEnumerateVideoDevicesNV WGLEW_GET_FUN(__wglewEnumerateVideoDevicesNV)
+#define wglQueryCurrentContextNV WGLEW_GET_FUN(__wglewQueryCurrentContextNV)
+
+#define WGLEW_NV_present_video WGLEW_GET_VAR(__WGLEW_NV_present_video)
+
+#endif /* WGL_NV_present_video */
+
+/* ---------------------- WGL_NV_render_depth_texture ---------------------- */
+
+#ifndef WGL_NV_render_depth_texture
+#define WGL_NV_render_depth_texture 1
+
+#define WGL_NO_TEXTURE_ARB 0x2077
+#define WGL_BIND_TO_TEXTURE_DEPTH_NV 0x20A3
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_DEPTH_NV 0x20A4
+#define WGL_DEPTH_TEXTURE_FORMAT_NV 0x20A5
+#define WGL_TEXTURE_DEPTH_COMPONENT_NV 0x20A6
+#define WGL_DEPTH_COMPONENT_NV 0x20A7
+
+#define WGLEW_NV_render_depth_texture WGLEW_GET_VAR(__WGLEW_NV_render_depth_texture)
+
+#endif /* WGL_NV_render_depth_texture */
+
+/* -------------------- WGL_NV_render_texture_rectangle -------------------- */
+
+#ifndef WGL_NV_render_texture_rectangle
+#define WGL_NV_render_texture_rectangle 1
+
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_RGB_NV 0x20A0
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_RGBA_NV 0x20A1
+#define WGL_TEXTURE_RECTANGLE_NV 0x20A2
+
+#define WGLEW_NV_render_texture_rectangle WGLEW_GET_VAR(__WGLEW_NV_render_texture_rectangle)
+
+#endif /* WGL_NV_render_texture_rectangle */
+
+/* --------------------------- WGL_NV_swap_group --------------------------- */
+
+#ifndef WGL_NV_swap_group
+#define WGL_NV_swap_group 1
+
+typedef BOOL (WINAPI * PFNWGLBINDSWAPBARRIERNVPROC) (GLuint group, GLuint barrier);
+typedef BOOL (WINAPI * PFNWGLJOINSWAPGROUPNVPROC) (HDC hDC, GLuint group);
+typedef BOOL (WINAPI * PFNWGLQUERYFRAMECOUNTNVPROC) (HDC hDC, GLuint* count);
+typedef BOOL (WINAPI * PFNWGLQUERYMAXSWAPGROUPSNVPROC) (HDC hDC, GLuint* maxGroups, GLuint *maxBarriers);
+typedef BOOL (WINAPI * PFNWGLQUERYSWAPGROUPNVPROC) (HDC hDC, GLuint* group);
+typedef BOOL (WINAPI * PFNWGLRESETFRAMECOUNTNVPROC) (HDC hDC);
+
+#define wglBindSwapBarrierNV WGLEW_GET_FUN(__wglewBindSwapBarrierNV)
+#define wglJoinSwapGroupNV WGLEW_GET_FUN(__wglewJoinSwapGroupNV)
+#define wglQueryFrameCountNV WGLEW_GET_FUN(__wglewQueryFrameCountNV)
+#define wglQueryMaxSwapGroupsNV WGLEW_GET_FUN(__wglewQueryMaxSwapGroupsNV)
+#define wglQuerySwapGroupNV WGLEW_GET_FUN(__wglewQuerySwapGroupNV)
+#define wglResetFrameCountNV WGLEW_GET_FUN(__wglewResetFrameCountNV)
+
+#define WGLEW_NV_swap_group WGLEW_GET_VAR(__WGLEW_NV_swap_group)
+
+#endif /* WGL_NV_swap_group */
+
+/* ----------------------- WGL_NV_vertex_array_range ----------------------- */
+
+#ifndef WGL_NV_vertex_array_range
+#define WGL_NV_vertex_array_range 1
+
+typedef void * (WINAPI * PFNWGLALLOCATEMEMORYNVPROC) (GLsizei size, GLfloat readFrequency, GLfloat writeFrequency, GLfloat priority);
+typedef void (WINAPI * PFNWGLFREEMEMORYNVPROC) (void *pointer);
+
+#define wglAllocateMemoryNV WGLEW_GET_FUN(__wglewAllocateMemoryNV)
+#define wglFreeMemoryNV WGLEW_GET_FUN(__wglewFreeMemoryNV)
+
+#define WGLEW_NV_vertex_array_range WGLEW_GET_VAR(__WGLEW_NV_vertex_array_range)
+
+#endif /* WGL_NV_vertex_array_range */
+
+/* -------------------------- WGL_NV_video_output -------------------------- */
+
+#ifndef WGL_NV_video_output
+#define WGL_NV_video_output 1
+
+#define WGL_BIND_TO_VIDEO_RGB_NV 0x20C0
+#define WGL_BIND_TO_VIDEO_RGBA_NV 0x20C1
+#define WGL_BIND_TO_VIDEO_RGB_AND_DEPTH_NV 0x20C2
+#define WGL_VIDEO_OUT_COLOR_NV 0x20C3
+#define WGL_VIDEO_OUT_ALPHA_NV 0x20C4
+#define WGL_VIDEO_OUT_DEPTH_NV 0x20C5
+#define WGL_VIDEO_OUT_COLOR_AND_ALPHA_NV 0x20C6
+#define WGL_VIDEO_OUT_COLOR_AND_DEPTH_NV 0x20C7
+#define WGL_VIDEO_OUT_FRAME 0x20C8
+#define WGL_VIDEO_OUT_FIELD_1 0x20C9
+#define WGL_VIDEO_OUT_FIELD_2 0x20CA
+#define WGL_VIDEO_OUT_STACKED_FIELDS_1_2 0x20CB
+#define WGL_VIDEO_OUT_STACKED_FIELDS_2_1 0x20CC
+
+DECLARE_HANDLE(HPVIDEODEV);
+
+typedef BOOL (WINAPI * PFNWGLBINDVIDEOIMAGENVPROC) (HPVIDEODEV hVideoDevice, HPBUFFERARB hPbuffer, int iVideoBuffer);
+typedef BOOL (WINAPI * PFNWGLGETVIDEODEVICENVPROC) (HDC hDC, int numDevices, HPVIDEODEV* hVideoDevice);
+typedef BOOL (WINAPI * PFNWGLGETVIDEOINFONVPROC) (HPVIDEODEV hpVideoDevice, unsigned long* pulCounterOutputPbuffer, unsigned long *pulCounterOutputVideo);
+typedef BOOL (WINAPI * PFNWGLRELEASEVIDEODEVICENVPROC) (HPVIDEODEV hVideoDevice);
+typedef BOOL (WINAPI * PFNWGLRELEASEVIDEOIMAGENVPROC) (HPBUFFERARB hPbuffer, int iVideoBuffer);
+typedef BOOL (WINAPI * PFNWGLSENDPBUFFERTOVIDEONVPROC) (HPBUFFERARB hPbuffer, int iBufferType, unsigned long* pulCounterPbuffer, BOOL bBlock);
+
+#define wglBindVideoImageNV WGLEW_GET_FUN(__wglewBindVideoImageNV)
+#define wglGetVideoDeviceNV WGLEW_GET_FUN(__wglewGetVideoDeviceNV)
+#define wglGetVideoInfoNV WGLEW_GET_FUN(__wglewGetVideoInfoNV)
+#define wglReleaseVideoDeviceNV WGLEW_GET_FUN(__wglewReleaseVideoDeviceNV)
+#define wglReleaseVideoImageNV WGLEW_GET_FUN(__wglewReleaseVideoImageNV)
+#define wglSendPbufferToVideoNV WGLEW_GET_FUN(__wglewSendPbufferToVideoNV)
+
+#define WGLEW_NV_video_output WGLEW_GET_VAR(__WGLEW_NV_video_output)
+
+#endif /* WGL_NV_video_output */
+
+/* -------------------------- WGL_OML_sync_control ------------------------- */
+
+#ifndef WGL_OML_sync_control
+#define WGL_OML_sync_control 1
+
+typedef BOOL (WINAPI * PFNWGLGETMSCRATEOMLPROC) (HDC hdc, INT32* numerator, INT32 *denominator);
+typedef BOOL (WINAPI * PFNWGLGETSYNCVALUESOMLPROC) (HDC hdc, INT64* ust, INT64 *msc, INT64 *sbc);
+typedef INT64 (WINAPI * PFNWGLSWAPBUFFERSMSCOMLPROC) (HDC hdc, INT64 target_msc, INT64 divisor, INT64 remainder);
+typedef INT64 (WINAPI * PFNWGLSWAPLAYERBUFFERSMSCOMLPROC) (HDC hdc, INT fuPlanes, INT64 target_msc, INT64 divisor, INT64 remainder);
+typedef BOOL (WINAPI * PFNWGLWAITFORMSCOMLPROC) (HDC hdc, INT64 target_msc, INT64 divisor, INT64 remainder, INT64* ust, INT64 *msc, INT64 *sbc);
+typedef BOOL (WINAPI * PFNWGLWAITFORSBCOMLPROC) (HDC hdc, INT64 target_sbc, INT64* ust, INT64 *msc, INT64 *sbc);
+
+#define wglGetMscRateOML WGLEW_GET_FUN(__wglewGetMscRateOML)
+#define wglGetSyncValuesOML WGLEW_GET_FUN(__wglewGetSyncValuesOML)
+#define wglSwapBuffersMscOML WGLEW_GET_FUN(__wglewSwapBuffersMscOML)
+#define wglSwapLayerBuffersMscOML WGLEW_GET_FUN(__wglewSwapLayerBuffersMscOML)
+#define wglWaitForMscOML WGLEW_GET_FUN(__wglewWaitForMscOML)
+#define wglWaitForSbcOML WGLEW_GET_FUN(__wglewWaitForSbcOML)
+
+#define WGLEW_OML_sync_control WGLEW_GET_VAR(__WGLEW_OML_sync_control)
+
+#endif /* WGL_OML_sync_control */
+
+/* ------------------------------------------------------------------------- */
+
+#ifdef GLEW_MX
+#define WGLEW_EXPORT
+#else
+#define WGLEW_EXPORT GLEWAPI
+#endif /* GLEW_MX */
+
+#ifdef GLEW_MX
+struct WGLEWContextStruct
+{
+#endif /* GLEW_MX */
+
+WGLEW_EXPORT PFNWGLSETSTEREOEMITTERSTATE3DLPROC __wglewSetStereoEmitterState3DL;
+
+WGLEW_EXPORT PFNWGLCREATEBUFFERREGIONARBPROC __wglewCreateBufferRegionARB;
+WGLEW_EXPORT PFNWGLDELETEBUFFERREGIONARBPROC __wglewDeleteBufferRegionARB;
+WGLEW_EXPORT PFNWGLRESTOREBUFFERREGIONARBPROC __wglewRestoreBufferRegionARB;
+WGLEW_EXPORT PFNWGLSAVEBUFFERREGIONARBPROC __wglewSaveBufferRegionARB;
+
+WGLEW_EXPORT PFNWGLCREATECONTEXTATTRIBSARBPROC __wglewCreateContextAttribsARB;
+
+WGLEW_EXPORT PFNWGLGETEXTENSIONSSTRINGARBPROC __wglewGetExtensionsStringARB;
+
+WGLEW_EXPORT PFNWGLGETCURRENTREADDCARBPROC __wglewGetCurrentReadDCARB;
+WGLEW_EXPORT PFNWGLMAKECONTEXTCURRENTARBPROC __wglewMakeContextCurrentARB;
+
+WGLEW_EXPORT PFNWGLCREATEPBUFFERARBPROC __wglewCreatePbufferARB;
+WGLEW_EXPORT PFNWGLDESTROYPBUFFERARBPROC __wglewDestroyPbufferARB;
+WGLEW_EXPORT PFNWGLGETPBUFFERDCARBPROC __wglewGetPbufferDCARB;
+WGLEW_EXPORT PFNWGLQUERYPBUFFERARBPROC __wglewQueryPbufferARB;
+WGLEW_EXPORT PFNWGLRELEASEPBUFFERDCARBPROC __wglewReleasePbufferDCARB;
+
+WGLEW_EXPORT PFNWGLCHOOSEPIXELFORMATARBPROC __wglewChoosePixelFormatARB;
+WGLEW_EXPORT PFNWGLGETPIXELFORMATATTRIBFVARBPROC __wglewGetPixelFormatAttribfvARB;
+WGLEW_EXPORT PFNWGLGETPIXELFORMATATTRIBIVARBPROC __wglewGetPixelFormatAttribivARB;
+
+WGLEW_EXPORT PFNWGLBINDTEXIMAGEARBPROC __wglewBindTexImageARB;
+WGLEW_EXPORT PFNWGLRELEASETEXIMAGEARBPROC __wglewReleaseTexImageARB;
+WGLEW_EXPORT PFNWGLSETPBUFFERATTRIBARBPROC __wglewSetPbufferAttribARB;
+
+WGLEW_EXPORT PFNWGLBINDDISPLAYCOLORTABLEEXTPROC __wglewBindDisplayColorTableEXT;
+WGLEW_EXPORT PFNWGLCREATEDISPLAYCOLORTABLEEXTPROC __wglewCreateDisplayColorTableEXT;
+WGLEW_EXPORT PFNWGLDESTROYDISPLAYCOLORTABLEEXTPROC __wglewDestroyDisplayColorTableEXT;
+WGLEW_EXPORT PFNWGLLOADDISPLAYCOLORTABLEEXTPROC __wglewLoadDisplayColorTableEXT;
+
+WGLEW_EXPORT PFNWGLGETEXTENSIONSSTRINGEXTPROC __wglewGetExtensionsStringEXT;
+
+WGLEW_EXPORT PFNWGLGETCURRENTREADDCEXTPROC __wglewGetCurrentReadDCEXT;
+WGLEW_EXPORT PFNWGLMAKECONTEXTCURRENTEXTPROC __wglewMakeContextCurrentEXT;
+
+WGLEW_EXPORT PFNWGLCREATEPBUFFEREXTPROC __wglewCreatePbufferEXT;
+WGLEW_EXPORT PFNWGLDESTROYPBUFFEREXTPROC __wglewDestroyPbufferEXT;
+WGLEW_EXPORT PFNWGLGETPBUFFERDCEXTPROC __wglewGetPbufferDCEXT;
+WGLEW_EXPORT PFNWGLQUERYPBUFFEREXTPROC __wglewQueryPbufferEXT;
+WGLEW_EXPORT PFNWGLRELEASEPBUFFERDCEXTPROC __wglewReleasePbufferDCEXT;
+
+WGLEW_EXPORT PFNWGLCHOOSEPIXELFORMATEXTPROC __wglewChoosePixelFormatEXT;
+WGLEW_EXPORT PFNWGLGETPIXELFORMATATTRIBFVEXTPROC __wglewGetPixelFormatAttribfvEXT;
+WGLEW_EXPORT PFNWGLGETPIXELFORMATATTRIBIVEXTPROC __wglewGetPixelFormatAttribivEXT;
+
+WGLEW_EXPORT PFNWGLGETSWAPINTERVALEXTPROC __wglewGetSwapIntervalEXT;
+WGLEW_EXPORT PFNWGLSWAPINTERVALEXTPROC __wglewSwapIntervalEXT;
+
+WGLEW_EXPORT PFNWGLGETDIGITALVIDEOPARAMETERSI3DPROC __wglewGetDigitalVideoParametersI3D;
+WGLEW_EXPORT PFNWGLSETDIGITALVIDEOPARAMETERSI3DPROC __wglewSetDigitalVideoParametersI3D;
+
+WGLEW_EXPORT PFNWGLGETGAMMATABLEI3DPROC __wglewGetGammaTableI3D;
+WGLEW_EXPORT PFNWGLGETGAMMATABLEPARAMETERSI3DPROC __wglewGetGammaTableParametersI3D;
+WGLEW_EXPORT PFNWGLSETGAMMATABLEI3DPROC __wglewSetGammaTableI3D;
+WGLEW_EXPORT PFNWGLSETGAMMATABLEPARAMETERSI3DPROC __wglewSetGammaTableParametersI3D;
+
+WGLEW_EXPORT PFNWGLDISABLEGENLOCKI3DPROC __wglewDisableGenlockI3D;
+WGLEW_EXPORT PFNWGLENABLEGENLOCKI3DPROC __wglewEnableGenlockI3D;
+WGLEW_EXPORT PFNWGLGENLOCKSAMPLERATEI3DPROC __wglewGenlockSampleRateI3D;
+WGLEW_EXPORT PFNWGLGENLOCKSOURCEDELAYI3DPROC __wglewGenlockSourceDelayI3D;
+WGLEW_EXPORT PFNWGLGENLOCKSOURCEEDGEI3DPROC __wglewGenlockSourceEdgeI3D;
+WGLEW_EXPORT PFNWGLGENLOCKSOURCEI3DPROC __wglewGenlockSourceI3D;
+WGLEW_EXPORT PFNWGLGETGENLOCKSAMPLERATEI3DPROC __wglewGetGenlockSampleRateI3D;
+WGLEW_EXPORT PFNWGLGETGENLOCKSOURCEDELAYI3DPROC __wglewGetGenlockSourceDelayI3D;
+WGLEW_EXPORT PFNWGLGETGENLOCKSOURCEEDGEI3DPROC __wglewGetGenlockSourceEdgeI3D;
+WGLEW_EXPORT PFNWGLGETGENLOCKSOURCEI3DPROC __wglewGetGenlockSourceI3D;
+WGLEW_EXPORT PFNWGLISENABLEDGENLOCKI3DPROC __wglewIsEnabledGenlockI3D;
+WGLEW_EXPORT PFNWGLQUERYGENLOCKMAXSOURCEDELAYI3DPROC __wglewQueryGenlockMaxSourceDelayI3D;
+
+WGLEW_EXPORT PFNWGLASSOCIATEIMAGEBUFFEREVENTSI3DPROC __wglewAssociateImageBufferEventsI3D;
+WGLEW_EXPORT PFNWGLCREATEIMAGEBUFFERI3DPROC __wglewCreateImageBufferI3D;
+WGLEW_EXPORT PFNWGLDESTROYIMAGEBUFFERI3DPROC __wglewDestroyImageBufferI3D;
+WGLEW_EXPORT PFNWGLRELEASEIMAGEBUFFEREVENTSI3DPROC __wglewReleaseImageBufferEventsI3D;
+
+WGLEW_EXPORT PFNWGLDISABLEFRAMELOCKI3DPROC __wglewDisableFrameLockI3D;
+WGLEW_EXPORT PFNWGLENABLEFRAMELOCKI3DPROC __wglewEnableFrameLockI3D;
+WGLEW_EXPORT PFNWGLISENABLEDFRAMELOCKI3DPROC __wglewIsEnabledFrameLockI3D;
+WGLEW_EXPORT PFNWGLQUERYFRAMELOCKMASTERI3DPROC __wglewQueryFrameLockMasterI3D;
+
+WGLEW_EXPORT PFNWGLBEGINFRAMETRACKINGI3DPROC __wglewBeginFrameTrackingI3D;
+WGLEW_EXPORT PFNWGLENDFRAMETRACKINGI3DPROC __wglewEndFrameTrackingI3D;
+WGLEW_EXPORT PFNWGLGETFRAMEUSAGEI3DPROC __wglewGetFrameUsageI3D;
+WGLEW_EXPORT PFNWGLQUERYFRAMETRACKINGI3DPROC __wglewQueryFrameTrackingI3D;
+
+WGLEW_EXPORT PFNWGLCREATEAFFINITYDCNVPROC __wglewCreateAffinityDCNV;
+WGLEW_EXPORT PFNWGLDELETEDCNVPROC __wglewDeleteDCNV;
+WGLEW_EXPORT PFNWGLENUMGPUDEVICESNVPROC __wglewEnumGpuDevicesNV;
+WGLEW_EXPORT PFNWGLENUMGPUSFROMAFFINITYDCNVPROC __wglewEnumGpusFromAffinityDCNV;
+WGLEW_EXPORT PFNWGLENUMGPUSNVPROC __wglewEnumGpusNV;
+
+WGLEW_EXPORT PFNWGLBINDVIDEODEVICENVPROC __wglewBindVideoDeviceNV;
+WGLEW_EXPORT PFNWGLENUMERATEVIDEODEVICESNVPROC __wglewEnumerateVideoDevicesNV;
+WGLEW_EXPORT PFNWGLQUERYCURRENTCONTEXTNVPROC __wglewQueryCurrentContextNV;
+
+WGLEW_EXPORT PFNWGLBINDSWAPBARRIERNVPROC __wglewBindSwapBarrierNV;
+WGLEW_EXPORT PFNWGLJOINSWAPGROUPNVPROC __wglewJoinSwapGroupNV;
+WGLEW_EXPORT PFNWGLQUERYFRAMECOUNTNVPROC __wglewQueryFrameCountNV;
+WGLEW_EXPORT PFNWGLQUERYMAXSWAPGROUPSNVPROC __wglewQueryMaxSwapGroupsNV;
+WGLEW_EXPORT PFNWGLQUERYSWAPGROUPNVPROC __wglewQuerySwapGroupNV;
+WGLEW_EXPORT PFNWGLRESETFRAMECOUNTNVPROC __wglewResetFrameCountNV;
+
+WGLEW_EXPORT PFNWGLALLOCATEMEMORYNVPROC __wglewAllocateMemoryNV;
+WGLEW_EXPORT PFNWGLFREEMEMORYNVPROC __wglewFreeMemoryNV;
+
+WGLEW_EXPORT PFNWGLBINDVIDEOIMAGENVPROC __wglewBindVideoImageNV;
+WGLEW_EXPORT PFNWGLGETVIDEODEVICENVPROC __wglewGetVideoDeviceNV;
+WGLEW_EXPORT PFNWGLGETVIDEOINFONVPROC __wglewGetVideoInfoNV;
+WGLEW_EXPORT PFNWGLRELEASEVIDEODEVICENVPROC __wglewReleaseVideoDeviceNV;
+WGLEW_EXPORT PFNWGLRELEASEVIDEOIMAGENVPROC __wglewReleaseVideoImageNV;
+WGLEW_EXPORT PFNWGLSENDPBUFFERTOVIDEONVPROC __wglewSendPbufferToVideoNV;
+
+WGLEW_EXPORT PFNWGLGETMSCRATEOMLPROC __wglewGetMscRateOML;
+WGLEW_EXPORT PFNWGLGETSYNCVALUESOMLPROC __wglewGetSyncValuesOML;
+WGLEW_EXPORT PFNWGLSWAPBUFFERSMSCOMLPROC __wglewSwapBuffersMscOML;
+WGLEW_EXPORT PFNWGLSWAPLAYERBUFFERSMSCOMLPROC __wglewSwapLayerBuffersMscOML;
+WGLEW_EXPORT PFNWGLWAITFORMSCOMLPROC __wglewWaitForMscOML;
+WGLEW_EXPORT PFNWGLWAITFORSBCOMLPROC __wglewWaitForSbcOML;
+WGLEW_EXPORT GLboolean __WGLEW_3DFX_multisample;
+WGLEW_EXPORT GLboolean __WGLEW_3DL_stereo_control;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_buffer_region;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_create_context;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_extensions_string;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_framebuffer_sRGB;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_make_current_read;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_multisample;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_pbuffer;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_pixel_format;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_pixel_format_float;
+WGLEW_EXPORT GLboolean __WGLEW_ARB_render_texture;
+WGLEW_EXPORT GLboolean __WGLEW_ATI_pixel_format_float;
+WGLEW_EXPORT GLboolean __WGLEW_ATI_render_texture_rectangle;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_depth_float;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_display_color_table;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_extensions_string;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_framebuffer_sRGB;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_make_current_read;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_multisample;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_pbuffer;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_pixel_format;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_pixel_format_packed_float;
+WGLEW_EXPORT GLboolean __WGLEW_EXT_swap_control;
+WGLEW_EXPORT GLboolean __WGLEW_I3D_digital_video_control;
+WGLEW_EXPORT GLboolean __WGLEW_I3D_gamma;
+WGLEW_EXPORT GLboolean __WGLEW_I3D_genlock;
+WGLEW_EXPORT GLboolean __WGLEW_I3D_image_buffer;
+WGLEW_EXPORT GLboolean __WGLEW_I3D_swap_frame_lock;
+WGLEW_EXPORT GLboolean __WGLEW_I3D_swap_frame_usage;
+WGLEW_EXPORT GLboolean __WGLEW_NV_float_buffer;
+WGLEW_EXPORT GLboolean __WGLEW_NV_gpu_affinity;
+WGLEW_EXPORT GLboolean __WGLEW_NV_present_video;
+WGLEW_EXPORT GLboolean __WGLEW_NV_render_depth_texture;
+WGLEW_EXPORT GLboolean __WGLEW_NV_render_texture_rectangle;
+WGLEW_EXPORT GLboolean __WGLEW_NV_swap_group;
+WGLEW_EXPORT GLboolean __WGLEW_NV_vertex_array_range;
+WGLEW_EXPORT GLboolean __WGLEW_NV_video_output;
+WGLEW_EXPORT GLboolean __WGLEW_OML_sync_control;
+
+#ifdef GLEW_MX
+}; /* WGLEWContextStruct */
+#endif /* GLEW_MX */
+
+/* ------------------------------------------------------------------------- */
+
+#ifdef GLEW_MX
+
+typedef struct WGLEWContextStruct WGLEWContext;
+GLEWAPI GLenum wglewContextInit (WGLEWContext* ctx);
+GLEWAPI GLboolean wglewContextIsSupported (WGLEWContext* ctx, const char* name);
+
+#define wglewInit() wglewContextInit(wglewGetContext())
+#define wglewIsSupported(x) wglewContextIsSupported(wglewGetContext(), x)
+
+#define WGLEW_GET_VAR(x) (*(const GLboolean*)&(wglewGetContext()->x))
+#define WGLEW_GET_FUN(x) wglewGetContext()->x
+
+#else /* GLEW_MX */
+
+#define WGLEW_GET_VAR(x) (*(const GLboolean*)&x)
+#define WGLEW_GET_FUN(x) x
+
+GLEWAPI GLboolean wglewIsSupported (const char* name);
+
+#endif /* GLEW_MX */
+
+GLEWAPI GLboolean wglewGetExtension (const char* name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#undef GLEWAPI
+
+#endif /* __wglew_h__ */
diff --git a/test_conformance/gl/GLEW/glew.c b/test_conformance/gl/GLEW/glew.c
new file mode 100644
index 00000000..1100800a
--- /dev/null
+++ b/test_conformance/gl/GLEW/glew.c
@@ -0,0 +1,10835 @@
+/*
+** The OpenGL Extension Wrangler Library
+** Copyright (C) 2002-2008, Milan Ikits <milan ikits[]ieee org>
+** Copyright (C) 2002-2008, Marcelo E. Magallon <mmagallo[]debian org>
+** Copyright (C) 2002, Lev Povalahev
+** All rights reserved.
+**
+** Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions are met:
+**
+** * Redistributions of source code must retain the above copyright notice,
+**   this list of conditions and the following disclaimer.
+** * Redistributions in binary form must reproduce the above copyright notice,
+**   this list of conditions and the following disclaimer in the documentation
+**   and/or other materials provided with the distribution.
+** * The name of the author may be used to endorse or promote products
+**   derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+** THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <GL/glew.h>
+#if defined(_WIN32)
+#  include <GL/wglew.h>
+#elif !defined(__APPLE__) || defined(GLEW_APPLE_GLX)
+#  include <GL/glxew.h>
+#endif
+
+/*
+ * Define glewGetContext and related helper macros.
+ */
+#ifdef GLEW_MX
+#  define glewGetContext() ctx
+#  ifdef _WIN32
+#    define GLEW_CONTEXT_ARG_DEF_INIT GLEWContext* ctx
+#    define GLEW_CONTEXT_ARG_VAR_INIT ctx
+#    define wglewGetContext() ctx
+#    define WGLEW_CONTEXT_ARG_DEF_INIT WGLEWContext* ctx
+#    define WGLEW_CONTEXT_ARG_DEF_LIST WGLEWContext* ctx
+#  else /* _WIN32 */
+#    define GLEW_CONTEXT_ARG_DEF_INIT void
+#    define GLEW_CONTEXT_ARG_VAR_INIT
+#    define glxewGetContext() ctx
+#    define GLXEW_CONTEXT_ARG_DEF_INIT void
+#    define GLXEW_CONTEXT_ARG_DEF_LIST GLXEWContext* ctx
+#  endif /* _WIN32 */
+#  define GLEW_CONTEXT_ARG_DEF_LIST GLEWContext* ctx
+#else /* GLEW_MX */
+#  define GLEW_CONTEXT_ARG_DEF_INIT void
+#  define GLEW_CONTEXT_ARG_VAR_INIT
+#  define GLEW_CONTEXT_ARG_DEF_LIST void
+#  define WGLEW_CONTEXT_ARG_DEF_INIT void
+#  define WGLEW_CONTEXT_ARG_DEF_LIST void
+#  define GLXEW_CONTEXT_ARG_DEF_INIT void
+#  define GLXEW_CONTEXT_ARG_DEF_LIST void
+#endif /* GLEW_MX */
+
+#if defined(__APPLE__)
+#include <mach-o/dyld.h>
+#include <stdlib.h>
+#include <string.h>
+
+void* NSGLGetProcAddress (const GLubyte *name)
+{
+  static const struct mach_header* image = NULL;
+  NSSymbol symbol;
+  char* symbolName;
+  if (NULL == image)
+  {
+    image = NSAddImage("/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL", NSADDIMAGE_OPTION_RETURN_ON_ERROR);
+  }
+  /* prepend a '_' for the Unix C symbol mangling convention */
+  symbolName = malloc(strlen((const char*)name) + 2);
+  strcpy(symbolName+1, (const char*)name);
+  symbolName[0] = '_';
+  symbol = NULL;
+  /* if (NSIsSymbolNameDefined(symbolName))
+     symbol = NSLookupAndBindSymbol(symbolName); */
+  symbol = image ? NSLookupSymbolInImage(image, symbolName, NSLOOKUPSYMBOLINIMAGE_OPTION_BIND | NSLOOKUPSYMBOLINIMAGE_OPTION_RETURN_ON_ERROR) : NULL;
+  free(symbolName);
+  return symbol ? NSAddressOfSymbol(symbol) : NULL;
+}
+#endif /* __APPLE__ */
+
+#if defined(__sgi) || defined (__sun)
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void* dlGetProcAddress (const GLubyte* name)
+{
+  static void* h = NULL;
+  static void* gpa;
+
+  if (h == NULL)
+  {
+    if ((h = dlopen(NULL, RTLD_LAZY | RTLD_LOCAL)) == NULL) return NULL;
+    gpa = dlsym(h, "glXGetProcAddress");
+  }
+
+  if (gpa != NULL)
+    return ((void*(*)(const GLubyte*))gpa)(name);
+  else
+    return dlsym(h, (const char*)name);
+}
+#endif /* __sgi || __sun */
+
+/*
+ * Define glewGetProcAddress.
+ */
+#if defined(_WIN32)
+#  define glewGetProcAddress(name) wglGetProcAddress((LPCSTR)name)
+#else
+#  if defined(__APPLE__)
+#    define glewGetProcAddress(name) NSGLGetProcAddress(name)
+#  else
+#    if defined(__sgi) || defined(__sun)
+#      define glewGetProcAddress(name) dlGetProcAddress(name)
+#    else /* __linux */
+#      define glewGetProcAddress(name) (*glXGetProcAddressARB)(name)
+#    endif
+#  endif
+#endif
+
+/*
+ * Define GLboolean const cast.
+ */
+#define CONST_CAST(x) (*(GLboolean*)&x)
+
+/*
+ * GLEW, just like OpenGL or GLU, does not rely on the standard C library.
+ * These functions implement the functionality required in this file.
+ */
+static GLuint _glewStrLen (const GLubyte* s)
+{
+  GLuint i=0;
+  if (s == NULL) return 0;
+  while (s[i] != '\0') i++;
+  return i;
+}
+
+static GLuint _glewStrCLen (const GLubyte* s, GLubyte c)
+{
+  GLuint i=0;
+  if (s == NULL) return 0;
+  while (s[i] != '\0' && s[i] != c) i++;
+  return s[i] == c ? i : 0;
+}
+
+static GLboolean _glewStrSame (const GLubyte* a, const GLubyte* b, GLuint n)
+{
+  GLuint i=0;
+  if(a == NULL || b == NULL)
+    return (a == NULL && b == NULL && n == 0) ? GL_TRUE : GL_FALSE;
+  while (i < n && a[i] != '\0' && b[i] != '\0' && a[i] == b[i]) i++;
+  return i == n ? GL_TRUE : GL_FALSE;
+}
+
+static GLboolean _glewStrSame1 (GLubyte** a, GLuint* na, const GLubyte* b, GLuint nb)
+{
+  while (*na > 0 && (**a == ' ' || **a == '\n' || **a == '\r' || **a == '\t'))
+  {
+    (*a)++;
+    (*na)--;
+  }
+  if(*na >= nb)
+  {
+    GLuint i=0;
+    while (i < nb && (*a)+i != NULL && b+i != NULL && (*a)[i] == b[i]) i++;
+    if(i == nb)
+    {
+        *a = *a + nb;
+        *na = *na - nb;
+        return GL_TRUE;
+    }
+  }
+  return GL_FALSE;
+}
+
+static GLboolean _glewStrSame2 (GLubyte** a, GLuint* na, const GLubyte* b, GLuint nb)
+{
+  if(*na >= nb)
+  {
+    GLuint i=0;
+    while (i < nb && (*a)+i != NULL && b+i != NULL && (*a)[i] == b[i]) i++;
+    if(i == nb)
+    {
+        *a = *a + nb;
+        *na = *na - nb;
+        return GL_TRUE;
+    }
+  }
+  return GL_FALSE;
+}
+
+static GLboolean _glewStrSame3 (GLubyte** a, GLuint* na, const GLubyte* b, GLuint nb)
+{
+  if(*na >= nb)
+  {
+    GLuint i=0;
+    while (i < nb && (*a)+i != NULL && b+i != NULL && (*a)[i] == b[i]) i++;
+    if (i == nb && (*na == nb || (*a)[i] == ' ' || (*a)[i] == '\n' || (*a)[i] == '\r' || (*a)[i] == '\t'))
+    {
+      *a = *a + nb;
+      *na = *na - nb;
+      return GL_TRUE;
+    }
+  }
+  return GL_FALSE;
+}
+
+#if !defined(_WIN32) || !defined(GLEW_MX)
+
+PFNGLCOPYTEXSUBIMAGE3DPROC __glewCopyTexSubImage3D = NULL;
+PFNGLDRAWRANGEELEMENTSPROC __glewDrawRangeElements = NULL;
+PFNGLTEXIMAGE3DPROC __glewTexImage3D = NULL;
+PFNGLTEXSUBIMAGE3DPROC __glewTexSubImage3D = NULL;
+
+PFNGLACTIVETEXTUREPROC __glewActiveTexture = NULL;
+PFNGLCLIENTACTIVETEXTUREPROC __glewClientActiveTexture = NULL;
+PFNGLCOMPRESSEDTEXIMAGE1DPROC __glewCompressedTexImage1D = NULL;
+PFNGLCOMPRESSEDTEXIMAGE2DPROC __glewCompressedTexImage2D = NULL;
+PFNGLCOMPRESSEDTEXIMAGE3DPROC __glewCompressedTexImage3D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC __glewCompressedTexSubImage1D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC __glewCompressedTexSubImage2D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC __glewCompressedTexSubImage3D = NULL;
+PFNGLGETCOMPRESSEDTEXIMAGEPROC __glewGetCompressedTexImage = NULL;
+PFNGLLOADTRANSPOSEMATRIXDPROC __glewLoadTransposeMatrixd = NULL;
+PFNGLLOADTRANSPOSEMATRIXFPROC __glewLoadTransposeMatrixf = NULL;
+PFNGLMULTTRANSPOSEMATRIXDPROC __glewMultTransposeMatrixd = NULL;
+PFNGLMULTTRANSPOSEMATRIXFPROC __glewMultTransposeMatrixf = NULL;
+PFNGLMULTITEXCOORD1DPROC __glewMultiTexCoord1d = NULL;
+PFNGLMULTITEXCOORD1DVPROC __glewMultiTexCoord1dv = NULL;
+PFNGLMULTITEXCOORD1FPROC __glewMultiTexCoord1f = NULL;
+PFNGLMULTITEXCOORD1FVPROC __glewMultiTexCoord1fv = NULL;
+PFNGLMULTITEXCOORD1IPROC __glewMultiTexCoord1i = NULL;
+PFNGLMULTITEXCOORD1IVPROC __glewMultiTexCoord1iv = NULL;
+PFNGLMULTITEXCOORD1SPROC __glewMultiTexCoord1s = NULL;
+PFNGLMULTITEXCOORD1SVPROC __glewMultiTexCoord1sv = NULL;
+PFNGLMULTITEXCOORD2DPROC __glewMultiTexCoord2d = NULL;
+PFNGLMULTITEXCOORD2DVPROC __glewMultiTexCoord2dv = NULL;
+PFNGLMULTITEXCOORD2FPROC __glewMultiTexCoord2f = NULL;
+PFNGLMULTITEXCOORD2FVPROC __glewMultiTexCoord2fv = NULL;
+PFNGLMULTITEXCOORD2IPROC __glewMultiTexCoord2i = NULL;
+PFNGLMULTITEXCOORD2IVPROC __glewMultiTexCoord2iv = NULL;
+PFNGLMULTITEXCOORD2SPROC __glewMultiTexCoord2s = NULL;
+PFNGLMULTITEXCOORD2SVPROC __glewMultiTexCoord2sv = NULL;
+PFNGLMULTITEXCOORD3DPROC __glewMultiTexCoord3d = NULL;
+PFNGLMULTITEXCOORD3DVPROC __glewMultiTexCoord3dv = NULL;
+PFNGLMULTITEXCOORD3FPROC __glewMultiTexCoord3f = NULL;
+PFNGLMULTITEXCOORD3FVPROC __glewMultiTexCoord3fv = NULL;
+PFNGLMULTITEXCOORD3IPROC __glewMultiTexCoord3i = NULL;
+PFNGLMULTITEXCOORD3IVPROC __glewMultiTexCoord3iv = NULL;
+PFNGLMULTITEXCOORD3SPROC __glewMultiTexCoord3s = NULL;
+PFNGLMULTITEXCOORD3SVPROC __glewMultiTexCoord3sv = NULL;
+PFNGLMULTITEXCOORD4DPROC __glewMultiTexCoord4d = NULL;
+PFNGLMULTITEXCOORD4DVPROC __glewMultiTexCoord4dv = NULL;
+PFNGLMULTITEXCOORD4FPROC __glewMultiTexCoord4f = NULL;
+PFNGLMULTITEXCOORD4FVPROC __glewMultiTexCoord4fv = NULL;
+PFNGLMULTITEXCOORD4IPROC __glewMultiTexCoord4i = NULL;
+PFNGLMULTITEXCOORD4IVPROC __glewMultiTexCoord4iv = NULL;
+PFNGLMULTITEXCOORD4SPROC __glewMultiTexCoord4s = NULL;
+PFNGLMULTITEXCOORD4SVPROC __glewMultiTexCoord4sv = NULL;
+PFNGLSAMPLECOVERAGEPROC __glewSampleCoverage = NULL;
+
+PFNGLBLENDCOLORPROC __glewBlendColor = NULL;
+PFNGLBLENDEQUATIONPROC __glewBlendEquation = NULL;
+PFNGLBLENDFUNCSEPARATEPROC __glewBlendFuncSeparate = NULL;
+PFNGLFOGCOORDPOINTERPROC __glewFogCoordPointer = NULL;
+PFNGLFOGCOORDDPROC __glewFogCoordd = NULL;
+PFNGLFOGCOORDDVPROC __glewFogCoorddv = NULL;
+PFNGLFOGCOORDFPROC __glewFogCoordf = NULL;
+PFNGLFOGCOORDFVPROC __glewFogCoordfv = NULL;
+PFNGLMULTIDRAWARRAYSPROC __glewMultiDrawArrays = NULL;
+PFNGLMULTIDRAWELEMENTSPROC __glewMultiDrawElements = NULL;
+PFNGLPOINTPARAMETERFPROC __glewPointParameterf = NULL;
+PFNGLPOINTPARAMETERFVPROC __glewPointParameterfv = NULL;
+PFNGLPOINTPARAMETERIPROC __glewPointParameteri = NULL;
+PFNGLPOINTPARAMETERIVPROC __glewPointParameteriv = NULL;
+PFNGLSECONDARYCOLOR3BPROC __glewSecondaryColor3b = NULL;
+PFNGLSECONDARYCOLOR3BVPROC __glewSecondaryColor3bv = NULL;
+PFNGLSECONDARYCOLOR3DPROC __glewSecondaryColor3d = NULL;
+PFNGLSECONDARYCOLOR3DVPROC __glewSecondaryColor3dv = NULL;
+PFNGLSECONDARYCOLOR3FPROC __glewSecondaryColor3f = NULL;
+PFNGLSECONDARYCOLOR3FVPROC __glewSecondaryColor3fv = NULL;
+PFNGLSECONDARYCOLOR3IPROC __glewSecondaryColor3i = NULL;
+PFNGLSECONDARYCOLOR3IVPROC __glewSecondaryColor3iv = NULL;
+PFNGLSECONDARYCOLOR3SPROC __glewSecondaryColor3s = NULL;
+PFNGLSECONDARYCOLOR3SVPROC __glewSecondaryColor3sv = NULL;
+PFNGLSECONDARYCOLOR3UBPROC __glewSecondaryColor3ub = NULL;
+PFNGLSECONDARYCOLOR3UBVPROC __glewSecondaryColor3ubv = NULL;
+PFNGLSECONDARYCOLOR3UIPROC __glewSecondaryColor3ui = NULL;
+PFNGLSECONDARYCOLOR3UIVPROC __glewSecondaryColor3uiv = NULL;
+PFNGLSECONDARYCOLOR3USPROC __glewSecondaryColor3us = NULL;
+PFNGLSECONDARYCOLOR3USVPROC __glewSecondaryColor3usv = NULL;
+PFNGLSECONDARYCOLORPOINTERPROC __glewSecondaryColorPointer = NULL;
+PFNGLWINDOWPOS2DPROC __glewWindowPos2d = NULL;
+PFNGLWINDOWPOS2DVPROC __glewWindowPos2dv = NULL;
+PFNGLWINDOWPOS2FPROC __glewWindowPos2f = NULL;
+PFNGLWINDOWPOS2FVPROC __glewWindowPos2fv = NULL;
+PFNGLWINDOWPOS2IPROC __glewWindowPos2i = NULL;
+PFNGLWINDOWPOS2IVPROC __glewWindowPos2iv = NULL;
+PFNGLWINDOWPOS2SPROC __glewWindowPos2s = NULL;
+PFNGLWINDOWPOS2SVPROC __glewWindowPos2sv = NULL;
+PFNGLWINDOWPOS3DPROC __glewWindowPos3d = NULL;
+PFNGLWINDOWPOS3DVPROC __glewWindowPos3dv = NULL;
+PFNGLWINDOWPOS3FPROC __glewWindowPos3f = NULL;
+PFNGLWINDOWPOS3FVPROC __glewWindowPos3fv = NULL;
+PFNGLWINDOWPOS3IPROC __glewWindowPos3i = NULL;
+PFNGLWINDOWPOS3IVPROC __glewWindowPos3iv = NULL;
+PFNGLWINDOWPOS3SPROC __glewWindowPos3s = NULL;
+PFNGLWINDOWPOS3SVPROC __glewWindowPos3sv = NULL;
+
+PFNGLBEGINQUERYPROC __glewBeginQuery = NULL;
+PFNGLBINDBUFFERPROC __glewBindBuffer = NULL;
+PFNGLBUFFERDATAPROC __glewBufferData = NULL;
+PFNGLBUFFERSUBDATAPROC __glewBufferSubData = NULL;
+PFNGLDELETEBUFFERSPROC __glewDeleteBuffers = NULL;
+PFNGLDELETEQUERIESPROC __glewDeleteQueries = NULL;
+PFNGLENDQUERYPROC __glewEndQuery = NULL;
+PFNGLGENBUFFERSPROC __glewGenBuffers = NULL;
+PFNGLGENQUERIESPROC __glewGenQueries = NULL;
+PFNGLGETBUFFERPARAMETERIVPROC __glewGetBufferParameteriv = NULL;
+PFNGLGETBUFFERPOINTERVPROC __glewGetBufferPointerv = NULL;
+PFNGLGETBUFFERSUBDATAPROC __glewGetBufferSubData = NULL;
+PFNGLGETQUERYOBJECTIVPROC __glewGetQueryObjectiv = NULL;
+PFNGLGETQUERYOBJECTUIVPROC __glewGetQueryObjectuiv = NULL;
+PFNGLGETQUERYIVPROC __glewGetQueryiv = NULL;
+PFNGLISBUFFERPROC __glewIsBuffer = NULL;
+PFNGLISQUERYPROC __glewIsQuery = NULL;
+PFNGLMAPBUFFERPROC __glewMapBuffer = NULL;
+PFNGLUNMAPBUFFERPROC __glewUnmapBuffer = NULL;
+
+PFNGLATTACHSHADERPROC __glewAttachShader = NULL;
+PFNGLBINDATTRIBLOCATIONPROC __glewBindAttribLocation = NULL;
+PFNGLBLENDEQUATIONSEPARATEPROC __glewBlendEquationSeparate = NULL;
+PFNGLCOMPILESHADERPROC __glewCompileShader = NULL;
+PFNGLCREATEPROGRAMPROC __glewCreateProgram = NULL;
+PFNGLCREATESHADERPROC __glewCreateShader = NULL;
+PFNGLDELETEPROGRAMPROC __glewDeleteProgram = NULL;
+PFNGLDELETESHADERPROC __glewDeleteShader = NULL;
+PFNGLDETACHSHADERPROC __glewDetachShader = NULL;
+PFNGLDISABLEVERTEXATTRIBARRAYPROC __glewDisableVertexAttribArray = NULL;
+PFNGLDRAWBUFFERSPROC __glewDrawBuffers = NULL;
+PFNGLENABLEVERTEXATTRIBARRAYPROC __glewEnableVertexAttribArray = NULL;
+PFNGLGETACTIVEATTRIBPROC __glewGetActiveAttrib = NULL;
+PFNGLGETACTIVEUNIFORMPROC __glewGetActiveUniform = NULL;
+PFNGLGETATTACHEDSHADERSPROC __glewGetAttachedShaders = NULL;
+PFNGLGETATTRIBLOCATIONPROC __glewGetAttribLocation = NULL;
+PFNGLGETPROGRAMINFOLOGPROC __glewGetProgramInfoLog = NULL;
+PFNGLGETPROGRAMIVPROC __glewGetProgramiv = NULL;
+PFNGLGETSHADERINFOLOGPROC __glewGetShaderInfoLog = NULL;
+PFNGLGETSHADERSOURCEPROC __glewGetShaderSource = NULL;
+PFNGLGETSHADERIVPROC __glewGetShaderiv = NULL;
+PFNGLGETUNIFORMLOCATIONPROC __glewGetUniformLocation = NULL;
+PFNGLGETUNIFORMFVPROC __glewGetUniformfv = NULL;
+PFNGLGETUNIFORMIVPROC __glewGetUniformiv = NULL;
+PFNGLGETVERTEXATTRIBPOINTERVPROC __glewGetVertexAttribPointerv = NULL;
+PFNGLGETVERTEXATTRIBDVPROC __glewGetVertexAttribdv = NULL;
+PFNGLGETVERTEXATTRIBFVPROC __glewGetVertexAttribfv = NULL;
+PFNGLGETVERTEXATTRIBIVPROC __glewGetVertexAttribiv = NULL;
+PFNGLISPROGRAMPROC __glewIsProgram = NULL;
+PFNGLISSHADERPROC __glewIsShader = NULL;
+PFNGLLINKPROGRAMPROC __glewLinkProgram = NULL;
+PFNGLSHADERSOURCEPROC __glewShaderSource = NULL;
+PFNGLSTENCILFUNCSEPARATEPROC __glewStencilFuncSeparate = NULL;
+PFNGLSTENCILMASKSEPARATEPROC __glewStencilMaskSeparate = NULL;
+PFNGLSTENCILOPSEPARATEPROC __glewStencilOpSeparate = NULL;
+PFNGLUNIFORM1FPROC __glewUniform1f = NULL;
+PFNGLUNIFORM1FVPROC __glewUniform1fv = NULL;
+PFNGLUNIFORM1IPROC __glewUniform1i = NULL;
+PFNGLUNIFORM1IVPROC __glewUniform1iv = NULL;
+PFNGLUNIFORM2FPROC __glewUniform2f = NULL;
+PFNGLUNIFORM2FVPROC __glewUniform2fv = NULL;
+PFNGLUNIFORM2IPROC __glewUniform2i = NULL;
+PFNGLUNIFORM2IVPROC __glewUniform2iv = NULL;
+PFNGLUNIFORM3FPROC __glewUniform3f = NULL;
+PFNGLUNIFORM3FVPROC __glewUniform3fv = NULL;
+PFNGLUNIFORM3IPROC __glewUniform3i = NULL;
+PFNGLUNIFORM3IVPROC __glewUniform3iv = NULL;
+PFNGLUNIFORM4FPROC __glewUniform4f = NULL;
+PFNGLUNIFORM4FVPROC __glewUniform4fv = NULL;
+PFNGLUNIFORM4IPROC __glewUniform4i = NULL;
+PFNGLUNIFORM4IVPROC __glewUniform4iv = NULL;
+PFNGLUNIFORMMATRIX2FVPROC __glewUniformMatrix2fv = NULL;
+PFNGLUNIFORMMATRIX3FVPROC __glewUniformMatrix3fv = NULL;
+PFNGLUNIFORMMATRIX4FVPROC __glewUniformMatrix4fv = NULL;
+PFNGLUSEPROGRAMPROC __glewUseProgram = NULL;
+PFNGLVALIDATEPROGRAMPROC __glewValidateProgram = NULL;
+PFNGLVERTEXATTRIB1DPROC __glewVertexAttrib1d = NULL;
+PFNGLVERTEXATTRIB1DVPROC __glewVertexAttrib1dv = NULL;
+PFNGLVERTEXATTRIB1FPROC __glewVertexAttrib1f = NULL;
+PFNGLVERTEXATTRIB1FVPROC __glewVertexAttrib1fv = NULL;
+PFNGLVERTEXATTRIB1SPROC __glewVertexAttrib1s = NULL;
+PFNGLVERTEXATTRIB1SVPROC __glewVertexAttrib1sv = NULL;
+PFNGLVERTEXATTRIB2DPROC __glewVertexAttrib2d = NULL;
+PFNGLVERTEXATTRIB2DVPROC __glewVertexAttrib2dv = NULL;
+PFNGLVERTEXATTRIB2FPROC __glewVertexAttrib2f = NULL;
+PFNGLVERTEXATTRIB2FVPROC __glewVertexAttrib2fv = NULL;
+PFNGLVERTEXATTRIB2SPROC __glewVertexAttrib2s = NULL;
+PFNGLVERTEXATTRIB2SVPROC __glewVertexAttrib2sv = NULL;
+PFNGLVERTEXATTRIB3DPROC __glewVertexAttrib3d = NULL;
+PFNGLVERTEXATTRIB3DVPROC __glewVertexAttrib3dv = NULL;
+PFNGLVERTEXATTRIB3FPROC __glewVertexAttrib3f = NULL;
+PFNGLVERTEXATTRIB3FVPROC __glewVertexAttrib3fv = NULL;
+PFNGLVERTEXATTRIB3SPROC __glewVertexAttrib3s = NULL;
+PFNGLVERTEXATTRIB3SVPROC __glewVertexAttrib3sv = NULL;
+PFNGLVERTEXATTRIB4NBVPROC __glewVertexAttrib4Nbv = NULL;
+PFNGLVERTEXATTRIB4NIVPROC __glewVertexAttrib4Niv = NULL;
+PFNGLVERTEXATTRIB4NSVPROC __glewVertexAttrib4Nsv = NULL;
+PFNGLVERTEXATTRIB4NUBPROC __glewVertexAttrib4Nub = NULL;
+PFNGLVERTEXATTRIB4NUBVPROC __glewVertexAttrib4Nubv = NULL;
+PFNGLVERTEXATTRIB4NUIVPROC __glewVertexAttrib4Nuiv = NULL;
+PFNGLVERTEXATTRIB4NUSVPROC __glewVertexAttrib4Nusv = NULL;
+PFNGLVERTEXATTRIB4BVPROC __glewVertexAttrib4bv = NULL;
+PFNGLVERTEXATTRIB4DPROC __glewVertexAttrib4d = NULL;
+PFNGLVERTEXATTRIB4DVPROC __glewVertexAttrib4dv = NULL;
+PFNGLVERTEXATTRIB4FPROC __glewVertexAttrib4f = NULL;
+PFNGLVERTEXATTRIB4FVPROC __glewVertexAttrib4fv = NULL;
+PFNGLVERTEXATTRIB4IVPROC __glewVertexAttrib4iv = NULL;
+PFNGLVERTEXATTRIB4SPROC __glewVertexAttrib4s = NULL;
+PFNGLVERTEXATTRIB4SVPROC __glewVertexAttrib4sv = NULL;
+PFNGLVERTEXATTRIB4UBVPROC __glewVertexAttrib4ubv = NULL;
+PFNGLVERTEXATTRIB4UIVPROC __glewVertexAttrib4uiv = NULL;
+PFNGLVERTEXATTRIB4USVPROC __glewVertexAttrib4usv = NULL;
+PFNGLVERTEXATTRIBPOINTERPROC __glewVertexAttribPointer = NULL;
+
+PFNGLUNIFORMMATRIX2X3FVPROC __glewUniformMatrix2x3fv = NULL;
+PFNGLUNIFORMMATRIX2X4FVPROC __glewUniformMatrix2x4fv = NULL;
+PFNGLUNIFORMMATRIX3X2FVPROC __glewUniformMatrix3x2fv = NULL;
+PFNGLUNIFORMMATRIX3X4FVPROC __glewUniformMatrix3x4fv = NULL;
+PFNGLUNIFORMMATRIX4X2FVPROC __glewUniformMatrix4x2fv = NULL;
+PFNGLUNIFORMMATRIX4X3FVPROC __glewUniformMatrix4x3fv = NULL;
+
+PFNGLTBUFFERMASK3DFXPROC __glewTbufferMask3DFX = NULL;
+
+PFNGLDRAWELEMENTARRAYAPPLEPROC __glewDrawElementArrayAPPLE = NULL;
+PFNGLDRAWRANGEELEMENTARRAYAPPLEPROC __glewDrawRangeElementArrayAPPLE = NULL;
+PFNGLELEMENTPOINTERAPPLEPROC __glewElementPointerAPPLE = NULL;
+PFNGLMULTIDRAWELEMENTARRAYAPPLEPROC __glewMultiDrawElementArrayAPPLE = NULL;
+PFNGLMULTIDRAWRANGEELEMENTARRAYAPPLEPROC __glewMultiDrawRangeElementArrayAPPLE = NULL;
+
+PFNGLDELETEFENCESAPPLEPROC __glewDeleteFencesAPPLE = NULL;
+PFNGLFINISHFENCEAPPLEPROC __glewFinishFenceAPPLE = NULL;
+PFNGLFINISHOBJECTAPPLEPROC __glewFinishObjectAPPLE = NULL;
+PFNGLGENFENCESAPPLEPROC __glewGenFencesAPPLE = NULL;
+PFNGLISFENCEAPPLEPROC __glewIsFenceAPPLE = NULL;
+PFNGLSETFENCEAPPLEPROC __glewSetFenceAPPLE = NULL;
+PFNGLTESTFENCEAPPLEPROC __glewTestFenceAPPLE = NULL;
+PFNGLTESTOBJECTAPPLEPROC __glewTestObjectAPPLE = NULL;
+
+PFNGLBUFFERPARAMETERIAPPLEPROC __glewBufferParameteriAPPLE = NULL;
+PFNGLFLUSHMAPPEDBUFFERRANGEAPPLEPROC __glewFlushMappedBufferRangeAPPLE = NULL;
+
+PFNGLGETTEXPARAMETERPOINTERVAPPLEPROC __glewGetTexParameterPointervAPPLE = NULL;
+PFNGLTEXTURERANGEAPPLEPROC __glewTextureRangeAPPLE = NULL;
+
+PFNGLBINDVERTEXARRAYAPPLEPROC __glewBindVertexArrayAPPLE = NULL;
+PFNGLDELETEVERTEXARRAYSAPPLEPROC __glewDeleteVertexArraysAPPLE = NULL;
+PFNGLGENVERTEXARRAYSAPPLEPROC __glewGenVertexArraysAPPLE = NULL;
+PFNGLISVERTEXARRAYAPPLEPROC __glewIsVertexArrayAPPLE = NULL;
+
+PFNGLFLUSHVERTEXARRAYRANGEAPPLEPROC __glewFlushVertexArrayRangeAPPLE = NULL;
+PFNGLVERTEXARRAYPARAMETERIAPPLEPROC __glewVertexArrayParameteriAPPLE = NULL;
+PFNGLVERTEXARRAYRANGEAPPLEPROC __glewVertexArrayRangeAPPLE = NULL;
+
+PFNGLCLAMPCOLORARBPROC __glewClampColorARB = NULL;
+
+PFNGLDRAWBUFFERSARBPROC __glewDrawBuffersARB = NULL;
+
+PFNGLCOLORSUBTABLEPROC __glewColorSubTable = NULL;
+PFNGLCOLORTABLEPROC __glewColorTable = NULL;
+PFNGLCOLORTABLEPARAMETERFVPROC __glewColorTableParameterfv = NULL;
+PFNGLCOLORTABLEPARAMETERIVPROC __glewColorTableParameteriv = NULL;
+PFNGLCONVOLUTIONFILTER1DPROC __glewConvolutionFilter1D = NULL;
+PFNGLCONVOLUTIONFILTER2DPROC __glewConvolutionFilter2D = NULL;
+PFNGLCONVOLUTIONPARAMETERFPROC __glewConvolutionParameterf = NULL;
+PFNGLCONVOLUTIONPARAMETERFVPROC __glewConvolutionParameterfv = NULL;
+PFNGLCONVOLUTIONPARAMETERIPROC __glewConvolutionParameteri = NULL;
+PFNGLCONVOLUTIONPARAMETERIVPROC __glewConvolutionParameteriv = NULL;
+PFNGLCOPYCOLORSUBTABLEPROC __glewCopyColorSubTable = NULL;
+PFNGLCOPYCOLORTABLEPROC __glewCopyColorTable = NULL;
+PFNGLCOPYCONVOLUTIONFILTER1DPROC __glewCopyConvolutionFilter1D = NULL;
+PFNGLCOPYCONVOLUTIONFILTER2DPROC __glewCopyConvolutionFilter2D = NULL;
+PFNGLGETCOLORTABLEPROC __glewGetColorTable = NULL;
+PFNGLGETCOLORTABLEPARAMETERFVPROC __glewGetColorTableParameterfv = NULL;
+PFNGLGETCOLORTABLEPARAMETERIVPROC __glewGetColorTableParameteriv = NULL;
+PFNGLGETCONVOLUTIONFILTERPROC __glewGetConvolutionFilter = NULL;
+PFNGLGETCONVOLUTIONPARAMETERFVPROC __glewGetConvolutionParameterfv = NULL;
+PFNGLGETCONVOLUTIONPARAMETERIVPROC __glewGetConvolutionParameteriv = NULL;
+PFNGLGETHISTOGRAMPROC __glewGetHistogram = NULL;
+PFNGLGETHISTOGRAMPARAMETERFVPROC __glewGetHistogramParameterfv = NULL;
+PFNGLGETHISTOGRAMPARAMETERIVPROC __glewGetHistogramParameteriv = NULL;
+PFNGLGETMINMAXPROC __glewGetMinmax = NULL;
+PFNGLGETMINMAXPARAMETERFVPROC __glewGetMinmaxParameterfv = NULL;
+PFNGLGETMINMAXPARAMETERIVPROC __glewGetMinmaxParameteriv = NULL;
+PFNGLGETSEPARABLEFILTERPROC __glewGetSeparableFilter = NULL;
+PFNGLHISTOGRAMPROC __glewHistogram = NULL;
+PFNGLMINMAXPROC __glewMinmax = NULL;
+PFNGLRESETHISTOGRAMPROC __glewResetHistogram = NULL;
+PFNGLRESETMINMAXPROC __glewResetMinmax = NULL;
+PFNGLSEPARABLEFILTER2DPROC __glewSeparableFilter2D = NULL;
+
+PFNGLCURRENTPALETTEMATRIXARBPROC __glewCurrentPaletteMatrixARB = NULL;
+PFNGLMATRIXINDEXPOINTERARBPROC __glewMatrixIndexPointerARB = NULL;
+PFNGLMATRIXINDEXUBVARBPROC __glewMatrixIndexubvARB = NULL;
+PFNGLMATRIXINDEXUIVARBPROC __glewMatrixIndexuivARB = NULL;
+PFNGLMATRIXINDEXUSVARBPROC __glewMatrixIndexusvARB = NULL;
+
+PFNGLSAMPLECOVERAGEARBPROC __glewSampleCoverageARB = NULL;
+
+PFNGLACTIVETEXTUREARBPROC __glewActiveTextureARB = NULL;
+PFNGLCLIENTACTIVETEXTUREARBPROC __glewClientActiveTextureARB = NULL;
+PFNGLMULTITEXCOORD1DARBPROC __glewMultiTexCoord1dARB = NULL;
+PFNGLMULTITEXCOORD1DVARBPROC __glewMultiTexCoord1dvARB = NULL;
+PFNGLMULTITEXCOORD1FARBPROC __glewMultiTexCoord1fARB = NULL;
+PFNGLMULTITEXCOORD1FVARBPROC __glewMultiTexCoord1fvARB = NULL;
+PFNGLMULTITEXCOORD1IARBPROC __glewMultiTexCoord1iARB = NULL;
+PFNGLMULTITEXCOORD1IVARBPROC __glewMultiTexCoord1ivARB = NULL;
+PFNGLMULTITEXCOORD1SARBPROC __glewMultiTexCoord1sARB = NULL;
+PFNGLMULTITEXCOORD1SVARBPROC __glewMultiTexCoord1svARB = NULL;
+PFNGLMULTITEXCOORD2DARBPROC __glewMultiTexCoord2dARB = NULL;
+PFNGLMULTITEXCOORD2DVARBPROC __glewMultiTexCoord2dvARB = NULL;
+PFNGLMULTITEXCOORD2FARBPROC __glewMultiTexCoord2fARB = NULL;
+PFNGLMULTITEXCOORD2FVARBPROC __glewMultiTexCoord2fvARB = NULL;
+PFNGLMULTITEXCOORD2IARBPROC __glewMultiTexCoord2iARB = NULL;
+PFNGLMULTITEXCOORD2IVARBPROC __glewMultiTexCoord2ivARB = NULL;
+PFNGLMULTITEXCOORD2SARBPROC __glewMultiTexCoord2sARB = NULL;
+PFNGLMULTITEXCOORD2SVARBPROC __glewMultiTexCoord2svARB = NULL;
+PFNGLMULTITEXCOORD3DARBPROC __glewMultiTexCoord3dARB = NULL;
+PFNGLMULTITEXCOORD3DVARBPROC __glewMultiTexCoord3dvARB = NULL;
+PFNGLMULTITEXCOORD3FARBPROC __glewMultiTexCoord3fARB = NULL;
+PFNGLMULTITEXCOORD3FVARBPROC __glewMultiTexCoord3fvARB = NULL;
+PFNGLMULTITEXCOORD3IARBPROC __glewMultiTexCoord3iARB = NULL;
+PFNGLMULTITEXCOORD3IVARBPROC __glewMultiTexCoord3ivARB = NULL;
+PFNGLMULTITEXCOORD3SARBPROC __glewMultiTexCoord3sARB = NULL;
+PFNGLMULTITEXCOORD3SVARBPROC __glewMultiTexCoord3svARB = NULL;
+PFNGLMULTITEXCOORD4DARBPROC __glewMultiTexCoord4dARB = NULL;
+PFNGLMULTITEXCOORD4DVARBPROC __glewMultiTexCoord4dvARB = NULL;
+PFNGLMULTITEXCOORD4FARBPROC __glewMultiTexCoord4fARB = NULL;
+PFNGLMULTITEXCOORD4FVARBPROC __glewMultiTexCoord4fvARB = NULL;
+PFNGLMULTITEXCOORD4IARBPROC __glewMultiTexCoord4iARB = NULL;
+PFNGLMULTITEXCOORD4IVARBPROC __glewMultiTexCoord4ivARB = NULL;
+PFNGLMULTITEXCOORD4SARBPROC __glewMultiTexCoord4sARB = NULL;
+PFNGLMULTITEXCOORD4SVARBPROC __glewMultiTexCoord4svARB = NULL;
+
+PFNGLBEGINQUERYARBPROC __glewBeginQueryARB = NULL;
+PFNGLDELETEQUERIESARBPROC __glewDeleteQueriesARB = NULL;
+PFNGLENDQUERYARBPROC __glewEndQueryARB = NULL;
+PFNGLGENQUERIESARBPROC __glewGenQueriesARB = NULL;
+PFNGLGETQUERYOBJECTIVARBPROC __glewGetQueryObjectivARB = NULL;
+PFNGLGETQUERYOBJECTUIVARBPROC __glewGetQueryObjectuivARB = NULL;
+PFNGLGETQUERYIVARBPROC __glewGetQueryivARB = NULL;
+PFNGLISQUERYARBPROC __glewIsQueryARB = NULL;
+
+PFNGLPOINTPARAMETERFARBPROC __glewPointParameterfARB = NULL;
+PFNGLPOINTPARAMETERFVARBPROC __glewPointParameterfvARB = NULL;
+
+PFNGLATTACHOBJECTARBPROC __glewAttachObjectARB = NULL;
+PFNGLCOMPILESHADERARBPROC __glewCompileShaderARB = NULL;
+PFNGLCREATEPROGRAMOBJECTARBPROC __glewCreateProgramObjectARB = NULL;
+PFNGLCREATESHADEROBJECTARBPROC __glewCreateShaderObjectARB = NULL;
+PFNGLDELETEOBJECTARBPROC __glewDeleteObjectARB = NULL;
+PFNGLDETACHOBJECTARBPROC __glewDetachObjectARB = NULL;
+PFNGLGETACTIVEUNIFORMARBPROC __glewGetActiveUniformARB = NULL;
+PFNGLGETATTACHEDOBJECTSARBPROC __glewGetAttachedObjectsARB = NULL;
+PFNGLGETHANDLEARBPROC __glewGetHandleARB = NULL;
+PFNGLGETINFOLOGARBPROC __glewGetInfoLogARB = NULL;
+PFNGLGETOBJECTPARAMETERFVARBPROC __glewGetObjectParameterfvARB = NULL;
+PFNGLGETOBJECTPARAMETERIVARBPROC __glewGetObjectParameterivARB = NULL;
+PFNGLGETSHADERSOURCEARBPROC __glewGetShaderSourceARB = NULL;
+PFNGLGETUNIFORMLOCATIONARBPROC __glewGetUniformLocationARB = NULL;
+PFNGLGETUNIFORMFVARBPROC __glewGetUniformfvARB = NULL;
+PFNGLGETUNIFORMIVARBPROC __glewGetUniformivARB = NULL;
+PFNGLLINKPROGRAMARBPROC __glewLinkProgramARB = NULL;
+PFNGLSHADERSOURCEARBPROC __glewShaderSourceARB = NULL;
+PFNGLUNIFORM1FARBPROC __glewUniform1fARB = NULL;
+PFNGLUNIFORM1FVARBPROC __glewUniform1fvARB = NULL;
+PFNGLUNIFORM1IARBPROC __glewUniform1iARB = NULL;
+PFNGLUNIFORM1IVARBPROC __glewUniform1ivARB = NULL;
+PFNGLUNIFORM2FARBPROC __glewUniform2fARB = NULL;
+PFNGLUNIFORM2FVARBPROC __glewUniform2fvARB = NULL;
+PFNGLUNIFORM2IARBPROC __glewUniform2iARB = NULL;
+PFNGLUNIFORM2IVARBPROC __glewUniform2ivARB = NULL;
+PFNGLUNIFORM3FARBPROC __glewUniform3fARB = NULL;
+PFNGLUNIFORM3FVARBPROC __glewUniform3fvARB = NULL;
+PFNGLUNIFORM3IARBPROC __glewUniform3iARB = NULL;
+PFNGLUNIFORM3IVARBPROC __glewUniform3ivARB = NULL;
+PFNGLUNIFORM4FARBPROC __glewUniform4fARB = NULL;
+PFNGLUNIFORM4FVARBPROC __glewUniform4fvARB = NULL;
+PFNGLUNIFORM4IARBPROC __glewUniform4iARB = NULL;
+PFNGLUNIFORM4IVARBPROC __glewUniform4ivARB = NULL;
+PFNGLUNIFORMMATRIX2FVARBPROC __glewUniformMatrix2fvARB = NULL;
+PFNGLUNIFORMMATRIX3FVARBPROC __glewUniformMatrix3fvARB = NULL;
+PFNGLUNIFORMMATRIX4FVARBPROC __glewUniformMatrix4fvARB = NULL;
+PFNGLUSEPROGRAMOBJECTARBPROC __glewUseProgramObjectARB = NULL;
+PFNGLVALIDATEPROGRAMARBPROC __glewValidateProgramARB = NULL;
+
+PFNGLCOMPRESSEDTEXIMAGE1DARBPROC __glewCompressedTexImage1DARB = NULL;
+PFNGLCOMPRESSEDTEXIMAGE2DARBPROC __glewCompressedTexImage2DARB = NULL;
+PFNGLCOMPRESSEDTEXIMAGE3DARBPROC __glewCompressedTexImage3DARB = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC __glewCompressedTexSubImage1DARB = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC __glewCompressedTexSubImage2DARB = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC __glewCompressedTexSubImage3DARB = NULL;
+PFNGLGETCOMPRESSEDTEXIMAGEARBPROC __glewGetCompressedTexImageARB = NULL;
+
+PFNGLLOADTRANSPOSEMATRIXDARBPROC __glewLoadTransposeMatrixdARB = NULL;
+PFNGLLOADTRANSPOSEMATRIXFARBPROC __glewLoadTransposeMatrixfARB = NULL;
+PFNGLMULTTRANSPOSEMATRIXDARBPROC __glewMultTransposeMatrixdARB = NULL;
+PFNGLMULTTRANSPOSEMATRIXFARBPROC __glewMultTransposeMatrixfARB = NULL;
+
+PFNGLVERTEXBLENDARBPROC __glewVertexBlendARB = NULL;
+PFNGLWEIGHTPOINTERARBPROC __glewWeightPointerARB = NULL;
+PFNGLWEIGHTBVARBPROC __glewWeightbvARB = NULL;
+PFNGLWEIGHTDVARBPROC __glewWeightdvARB = NULL;
+PFNGLWEIGHTFVARBPROC __glewWeightfvARB = NULL;
+PFNGLWEIGHTIVARBPROC __glewWeightivARB = NULL;
+PFNGLWEIGHTSVARBPROC __glewWeightsvARB = NULL;
+PFNGLWEIGHTUBVARBPROC __glewWeightubvARB = NULL;
+PFNGLWEIGHTUIVARBPROC __glewWeightuivARB = NULL;
+PFNGLWEIGHTUSVARBPROC __glewWeightusvARB = NULL;
+
+PFNGLBINDBUFFERARBPROC __glewBindBufferARB = NULL;
+PFNGLBUFFERDATAARBPROC __glewBufferDataARB = NULL;
+PFNGLBUFFERSUBDATAARBPROC __glewBufferSubDataARB = NULL;
+PFNGLDELETEBUFFERSARBPROC __glewDeleteBuffersARB = NULL;
+PFNGLGENBUFFERSARBPROC __glewGenBuffersARB = NULL;
+PFNGLGETBUFFERPARAMETERIVARBPROC __glewGetBufferParameterivARB = NULL;
+PFNGLGETBUFFERPOINTERVARBPROC __glewGetBufferPointervARB = NULL;
+PFNGLGETBUFFERSUBDATAARBPROC __glewGetBufferSubDataARB = NULL;
+PFNGLISBUFFERARBPROC __glewIsBufferARB = NULL;
+PFNGLMAPBUFFERARBPROC __glewMapBufferARB = NULL;
+PFNGLUNMAPBUFFERARBPROC __glewUnmapBufferARB = NULL;
+
+PFNGLBINDPROGRAMARBPROC __glewBindProgramARB = NULL;
+PFNGLDELETEPROGRAMSARBPROC __glewDeleteProgramsARB = NULL;
+PFNGLDISABLEVERTEXATTRIBARRAYARBPROC __glewDisableVertexAttribArrayARB = NULL;
+PFNGLENABLEVERTEXATTRIBARRAYARBPROC __glewEnableVertexAttribArrayARB = NULL;
+PFNGLGENPROGRAMSARBPROC __glewGenProgramsARB = NULL;
+PFNGLGETPROGRAMENVPARAMETERDVARBPROC __glewGetProgramEnvParameterdvARB = NULL;
+PFNGLGETPROGRAMENVPARAMETERFVARBPROC __glewGetProgramEnvParameterfvARB = NULL;
+PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC __glewGetProgramLocalParameterdvARB = NULL;
+PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC __glewGetProgramLocalParameterfvARB = NULL;
+PFNGLGETPROGRAMSTRINGARBPROC __glewGetProgramStringARB = NULL;
+PFNGLGETPROGRAMIVARBPROC __glewGetProgramivARB = NULL;
+PFNGLGETVERTEXATTRIBPOINTERVARBPROC __glewGetVertexAttribPointervARB = NULL;
+PFNGLGETVERTEXATTRIBDVARBPROC __glewGetVertexAttribdvARB = NULL;
+PFNGLGETVERTEXATTRIBFVARBPROC __glewGetVertexAttribfvARB = NULL;
+PFNGLGETVERTEXATTRIBIVARBPROC __glewGetVertexAttribivARB = NULL;
+PFNGLISPROGRAMARBPROC __glewIsProgramARB = NULL;
+PFNGLPROGRAMENVPARAMETER4DARBPROC __glewProgramEnvParameter4dARB = NULL;
+PFNGLPROGRAMENVPARAMETER4DVARBPROC __glewProgramEnvParameter4dvARB = NULL;
+PFNGLPROGRAMENVPARAMETER4FARBPROC __glewProgramEnvParameter4fARB = NULL;
+PFNGLPROGRAMENVPARAMETER4FVARBPROC __glewProgramEnvParameter4fvARB = NULL;
+PFNGLPROGRAMLOCALPARAMETER4DARBPROC __glewProgramLocalParameter4dARB = NULL;
+PFNGLPROGRAMLOCALPARAMETER4DVARBPROC __glewProgramLocalParameter4dvARB = NULL;
+PFNGLPROGRAMLOCALPARAMETER4FARBPROC __glewProgramLocalParameter4fARB = NULL;
+PFNGLPROGRAMLOCALPARAMETER4FVARBPROC __glewProgramLocalParameter4fvARB = NULL;
+PFNGLPROGRAMSTRINGARBPROC __glewProgramStringARB = NULL;
+PFNGLVERTEXATTRIB1DARBPROC __glewVertexAttrib1dARB = NULL;
+PFNGLVERTEXATTRIB1DVARBPROC __glewVertexAttrib1dvARB = NULL;
+PFNGLVERTEXATTRIB1FARBPROC __glewVertexAttrib1fARB = NULL;
+PFNGLVERTEXATTRIB1FVARBPROC __glewVertexAttrib1fvARB = NULL;
+PFNGLVERTEXATTRIB1SARBPROC __glewVertexAttrib1sARB = NULL;
+PFNGLVERTEXATTRIB1SVARBPROC __glewVertexAttrib1svARB = NULL;
+PFNGLVERTEXATTRIB2DARBPROC __glewVertexAttrib2dARB = NULL;
+PFNGLVERTEXATTRIB2DVARBPROC __glewVertexAttrib2dvARB = NULL;
+PFNGLVERTEXATTRIB2FARBPROC __glewVertexAttrib2fARB = NULL;
+PFNGLVERTEXATTRIB2FVARBPROC __glewVertexAttrib2fvARB = NULL;
+PFNGLVERTEXATTRIB2SARBPROC __glewVertexAttrib2sARB = NULL;
+PFNGLVERTEXATTRIB2SVARBPROC __glewVertexAttrib2svARB = NULL;
+PFNGLVERTEXATTRIB3DARBPROC __glewVertexAttrib3dARB = NULL;
+PFNGLVERTEXATTRIB3DVARBPROC __glewVertexAttrib3dvARB = NULL;
+PFNGLVERTEXATTRIB3FARBPROC __glewVertexAttrib3fARB = NULL;
+PFNGLVERTEXATTRIB3FVARBPROC __glewVertexAttrib3fvARB = NULL;
+PFNGLVERTEXATTRIB3SARBPROC __glewVertexAttrib3sARB = NULL;
+PFNGLVERTEXATTRIB3SVARBPROC __glewVertexAttrib3svARB = NULL;
+PFNGLVERTEXATTRIB4NBVARBPROC __glewVertexAttrib4NbvARB = NULL;
+PFNGLVERTEXATTRIB4NIVARBPROC __glewVertexAttrib4NivARB = NULL;
+PFNGLVERTEXATTRIB4NSVARBPROC __glewVertexAttrib4NsvARB = NULL;
+PFNGLVERTEXATTRIB4NUBARBPROC __glewVertexAttrib4NubARB = NULL;
+PFNGLVERTEXATTRIB4NUBVARBPROC __glewVertexAttrib4NubvARB = NULL;
+PFNGLVERTEXATTRIB4NUIVARBPROC __glewVertexAttrib4NuivARB = NULL;
+PFNGLVERTEXATTRIB4NUSVARBPROC __glewVertexAttrib4NusvARB = NULL;
+PFNGLVERTEXATTRIB4BVARBPROC __glewVertexAttrib4bvARB = NULL;
+PFNGLVERTEXATTRIB4DARBPROC __glewVertexAttrib4dARB = NULL;
+PFNGLVERTEXATTRIB4DVARBPROC __glewVertexAttrib4dvARB = NULL;
+PFNGLVERTEXATTRIB4FARBPROC __glewVertexAttrib4fARB = NULL;
+PFNGLVERTEXATTRIB4FVARBPROC __glewVertexAttrib4fvARB = NULL;
+PFNGLVERTEXATTRIB4IVARBPROC __glewVertexAttrib4ivARB = NULL;
+PFNGLVERTEXATTRIB4SARBPROC __glewVertexAttrib4sARB = NULL;
+PFNGLVERTEXATTRIB4SVARBPROC __glewVertexAttrib4svARB = NULL;
+PFNGLVERTEXATTRIB4UBVARBPROC __glewVertexAttrib4ubvARB = NULL;
+PFNGLVERTEXATTRIB4UIVARBPROC __glewVertexAttrib4uivARB = NULL;
+PFNGLVERTEXATTRIB4USVARBPROC __glewVertexAttrib4usvARB = NULL;
+PFNGLVERTEXATTRIBPOINTERARBPROC __glewVertexAttribPointerARB = NULL;
+
+PFNGLBINDATTRIBLOCATIONARBPROC __glewBindAttribLocationARB = NULL;
+PFNGLGETACTIVEATTRIBARBPROC __glewGetActiveAttribARB = NULL;
+PFNGLGETATTRIBLOCATIONARBPROC __glewGetAttribLocationARB = NULL;
+
+PFNGLWINDOWPOS2DARBPROC __glewWindowPos2dARB = NULL;
+PFNGLWINDOWPOS2DVARBPROC __glewWindowPos2dvARB = NULL;
+PFNGLWINDOWPOS2FARBPROC __glewWindowPos2fARB = NULL;
+PFNGLWINDOWPOS2FVARBPROC __glewWindowPos2fvARB = NULL;
+PFNGLWINDOWPOS2IARBPROC __glewWindowPos2iARB = NULL;
+PFNGLWINDOWPOS2IVARBPROC __glewWindowPos2ivARB = NULL;
+PFNGLWINDOWPOS2SARBPROC __glewWindowPos2sARB = NULL;
+PFNGLWINDOWPOS2SVARBPROC __glewWindowPos2svARB = NULL;
+PFNGLWINDOWPOS3DARBPROC __glewWindowPos3dARB = NULL;
+PFNGLWINDOWPOS3DVARBPROC __glewWindowPos3dvARB = NULL;
+PFNGLWINDOWPOS3FARBPROC __glewWindowPos3fARB = NULL;
+PFNGLWINDOWPOS3FVARBPROC __glewWindowPos3fvARB = NULL;
+PFNGLWINDOWPOS3IARBPROC __glewWindowPos3iARB = NULL;
+PFNGLWINDOWPOS3IVARBPROC __glewWindowPos3ivARB = NULL;
+PFNGLWINDOWPOS3SARBPROC __glewWindowPos3sARB = NULL;
+PFNGLWINDOWPOS3SVARBPROC __glewWindowPos3svARB = NULL;
+
+PFNGLDRAWBUFFERSATIPROC __glewDrawBuffersATI = NULL;
+
+PFNGLDRAWELEMENTARRAYATIPROC __glewDrawElementArrayATI = NULL;
+PFNGLDRAWRANGEELEMENTARRAYATIPROC __glewDrawRangeElementArrayATI = NULL;
+PFNGLELEMENTPOINTERATIPROC __glewElementPointerATI = NULL;
+
+PFNGLGETTEXBUMPPARAMETERFVATIPROC __glewGetTexBumpParameterfvATI = NULL;
+PFNGLGETTEXBUMPPARAMETERIVATIPROC __glewGetTexBumpParameterivATI = NULL;
+PFNGLTEXBUMPPARAMETERFVATIPROC __glewTexBumpParameterfvATI = NULL;
+PFNGLTEXBUMPPARAMETERIVATIPROC __glewTexBumpParameterivATI = NULL;
+
+PFNGLALPHAFRAGMENTOP1ATIPROC __glewAlphaFragmentOp1ATI = NULL;
+PFNGLALPHAFRAGMENTOP2ATIPROC __glewAlphaFragmentOp2ATI = NULL;
+PFNGLALPHAFRAGMENTOP3ATIPROC __glewAlphaFragmentOp3ATI = NULL;
+PFNGLBEGINFRAGMENTSHADERATIPROC __glewBeginFragmentShaderATI = NULL;
+PFNGLBINDFRAGMENTSHADERATIPROC __glewBindFragmentShaderATI = NULL;
+PFNGLCOLORFRAGMENTOP1ATIPROC __glewColorFragmentOp1ATI = NULL;
+PFNGLCOLORFRAGMENTOP2ATIPROC __glewColorFragmentOp2ATI = NULL;
+PFNGLCOLORFRAGMENTOP3ATIPROC __glewColorFragmentOp3ATI = NULL;
+PFNGLDELETEFRAGMENTSHADERATIPROC __glewDeleteFragmentShaderATI = NULL;
+PFNGLENDFRAGMENTSHADERATIPROC __glewEndFragmentShaderATI = NULL;
+PFNGLGENFRAGMENTSHADERSATIPROC __glewGenFragmentShadersATI = NULL;
+PFNGLPASSTEXCOORDATIPROC __glewPassTexCoordATI = NULL;
+PFNGLSAMPLEMAPATIPROC __glewSampleMapATI = NULL;
+PFNGLSETFRAGMENTSHADERCONSTANTATIPROC __glewSetFragmentShaderConstantATI = NULL;
+
+PFNGLMAPOBJECTBUFFERATIPROC __glewMapObjectBufferATI = NULL;
+PFNGLUNMAPOBJECTBUFFERATIPROC __glewUnmapObjectBufferATI = NULL;
+
+PFNGLPNTRIANGLESFATIPROC __glPNTrianglewesfATI = NULL;
+PFNGLPNTRIANGLESIATIPROC __glPNTrianglewesiATI = NULL;
+
+PFNGLSTENCILFUNCSEPARATEATIPROC __glewStencilFuncSeparateATI = NULL;
+PFNGLSTENCILOPSEPARATEATIPROC __glewStencilOpSeparateATI = NULL;
+
+PFNGLARRAYOBJECTATIPROC __glewArrayObjectATI = NULL;
+PFNGLFREEOBJECTBUFFERATIPROC __glewFreeObjectBufferATI = NULL;
+PFNGLGETARRAYOBJECTFVATIPROC __glewGetArrayObjectfvATI = NULL;
+PFNGLGETARRAYOBJECTIVATIPROC __glewGetArrayObjectivATI = NULL;
+PFNGLGETOBJECTBUFFERFVATIPROC __glewGetObjectBufferfvATI = NULL;
+PFNGLGETOBJECTBUFFERIVATIPROC __glewGetObjectBufferivATI = NULL;
+PFNGLGETVARIANTARRAYOBJECTFVATIPROC __glewGetVariantArrayObjectfvATI = NULL;
+PFNGLGETVARIANTARRAYOBJECTIVATIPROC __glewGetVariantArrayObjectivATI = NULL;
+PFNGLISOBJECTBUFFERATIPROC __glewIsObjectBufferATI = NULL;
+PFNGLNEWOBJECTBUFFERATIPROC __glewNewObjectBufferATI = NULL;
+PFNGLUPDATEOBJECTBUFFERATIPROC __glewUpdateObjectBufferATI = NULL;
+PFNGLVARIANTARRAYOBJECTATIPROC __glewVariantArrayObjectATI = NULL;
+
+PFNGLGETVERTEXATTRIBARRAYOBJECTFVATIPROC __glewGetVertexAttribArrayObjectfvATI = NULL;
+PFNGLGETVERTEXATTRIBARRAYOBJECTIVATIPROC __glewGetVertexAttribArrayObjectivATI = NULL;
+PFNGLVERTEXATTRIBARRAYOBJECTATIPROC __glewVertexAttribArrayObjectATI = NULL;
+
+PFNGLCLIENTACTIVEVERTEXSTREAMATIPROC __glewClientActiveVertexStreamATI = NULL;
+PFNGLNORMALSTREAM3BATIPROC __glewNormalStream3bATI = NULL;
+PFNGLNORMALSTREAM3BVATIPROC __glewNormalStream3bvATI = NULL;
+PFNGLNORMALSTREAM3DATIPROC __glewNormalStream3dATI = NULL;
+PFNGLNORMALSTREAM3DVATIPROC __glewNormalStream3dvATI = NULL;
+PFNGLNORMALSTREAM3FATIPROC __glewNormalStream3fATI = NULL;
+PFNGLNORMALSTREAM3FVATIPROC __glewNormalStream3fvATI = NULL;
+PFNGLNORMALSTREAM3IATIPROC __glewNormalStream3iATI = NULL;
+PFNGLNORMALSTREAM3IVATIPROC __glewNormalStream3ivATI = NULL;
+PFNGLNORMALSTREAM3SATIPROC __glewNormalStream3sATI = NULL;
+PFNGLNORMALSTREAM3SVATIPROC __glewNormalStream3svATI = NULL;
+PFNGLVERTEXBLENDENVFATIPROC __glewVertexBlendEnvfATI = NULL;
+PFNGLVERTEXBLENDENVIATIPROC __glewVertexBlendEnviATI = NULL;
+PFNGLVERTEXSTREAM2DATIPROC __glewVertexStream2dATI = NULL;
+PFNGLVERTEXSTREAM2DVATIPROC __glewVertexStream2dvATI = NULL;
+PFNGLVERTEXSTREAM2FATIPROC __glewVertexStream2fATI = NULL;
+PFNGLVERTEXSTREAM2FVATIPROC __glewVertexStream2fvATI = NULL;
+PFNGLVERTEXSTREAM2IATIPROC __glewVertexStream2iATI = NULL;
+PFNGLVERTEXSTREAM2IVATIPROC __glewVertexStream2ivATI = NULL;
+PFNGLVERTEXSTREAM2SATIPROC __glewVertexStream2sATI = NULL;
+PFNGLVERTEXSTREAM2SVATIPROC __glewVertexStream2svATI = NULL;
+PFNGLVERTEXSTREAM3DATIPROC __glewVertexStream3dATI = NULL;
+PFNGLVERTEXSTREAM3DVATIPROC __glewVertexStream3dvATI = NULL;
+PFNGLVERTEXSTREAM3FATIPROC __glewVertexStream3fATI = NULL;
+PFNGLVERTEXSTREAM3FVATIPROC __glewVertexStream3fvATI = NULL;
+PFNGLVERTEXSTREAM3IATIPROC __glewVertexStream3iATI = NULL;
+PFNGLVERTEXSTREAM3IVATIPROC __glewVertexStream3ivATI = NULL;
+PFNGLVERTEXSTREAM3SATIPROC __glewVertexStream3sATI = NULL;
+PFNGLVERTEXSTREAM3SVATIPROC __glewVertexStream3svATI = NULL;
+PFNGLVERTEXSTREAM4DATIPROC __glewVertexStream4dATI = NULL;
+PFNGLVERTEXSTREAM4DVATIPROC __glewVertexStream4dvATI = NULL;
+PFNGLVERTEXSTREAM4FATIPROC __glewVertexStream4fATI = NULL;
+PFNGLVERTEXSTREAM4FVATIPROC __glewVertexStream4fvATI = NULL;
+PFNGLVERTEXSTREAM4IATIPROC __glewVertexStream4iATI = NULL;
+PFNGLVERTEXSTREAM4IVATIPROC __glewVertexStream4ivATI = NULL;
+PFNGLVERTEXSTREAM4SATIPROC __glewVertexStream4sATI = NULL;
+PFNGLVERTEXSTREAM4SVATIPROC __glewVertexStream4svATI = NULL;
+
+PFNGLGETUNIFORMBUFFERSIZEEXTPROC __glewGetUniformBufferSizeEXT = NULL;
+PFNGLGETUNIFORMOFFSETEXTPROC __glewGetUniformOffsetEXT = NULL;
+PFNGLUNIFORMBUFFEREXTPROC __glewUniformBufferEXT = NULL;
+
+PFNGLBLENDCOLOREXTPROC __glewBlendColorEXT = NULL;
+
+PFNGLBLENDEQUATIONSEPARATEEXTPROC __glewBlendEquationSeparateEXT = NULL;
+
+PFNGLBLENDFUNCSEPARATEEXTPROC __glewBlendFuncSeparateEXT = NULL;
+
+PFNGLBLENDEQUATIONEXTPROC __glewBlendEquationEXT = NULL;
+
+PFNGLCOLORSUBTABLEEXTPROC __glewColorSubTableEXT = NULL;
+PFNGLCOPYCOLORSUBTABLEEXTPROC __glewCopyColorSubTableEXT = NULL;
+
+PFNGLLOCKARRAYSEXTPROC __glewLockArraysEXT = NULL;
+PFNGLUNLOCKARRAYSEXTPROC __glewUnlockArraysEXT = NULL;
+
+PFNGLCONVOLUTIONFILTER1DEXTPROC __glewConvolutionFilter1DEXT = NULL;
+PFNGLCONVOLUTIONFILTER2DEXTPROC __glewConvolutionFilter2DEXT = NULL;
+PFNGLCONVOLUTIONPARAMETERFEXTPROC __glewConvolutionParameterfEXT = NULL;
+PFNGLCONVOLUTIONPARAMETERFVEXTPROC __glewConvolutionParameterfvEXT = NULL;
+PFNGLCONVOLUTIONPARAMETERIEXTPROC __glewConvolutionParameteriEXT = NULL;
+PFNGLCONVOLUTIONPARAMETERIVEXTPROC __glewConvolutionParameterivEXT = NULL;
+PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC __glewCopyConvolutionFilter1DEXT = NULL;
+PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC __glewCopyConvolutionFilter2DEXT = NULL;
+PFNGLGETCONVOLUTIONFILTEREXTPROC __glewGetConvolutionFilterEXT = NULL;
+PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC __glewGetConvolutionParameterfvEXT = NULL;
+PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC __glewGetConvolutionParameterivEXT = NULL;
+PFNGLGETSEPARABLEFILTEREXTPROC __glewGetSeparableFilterEXT = NULL;
+PFNGLSEPARABLEFILTER2DEXTPROC __glewSeparableFilter2DEXT = NULL;
+
+PFNGLBINORMALPOINTEREXTPROC __glewBinormalPointerEXT = NULL;
+PFNGLTANGENTPOINTEREXTPROC __glewTangentPointerEXT = NULL;
+
+PFNGLCOPYTEXIMAGE1DEXTPROC __glewCopyTexImage1DEXT = NULL;
+PFNGLCOPYTEXIMAGE2DEXTPROC __glewCopyTexImage2DEXT = NULL;
+PFNGLCOPYTEXSUBIMAGE1DEXTPROC __glewCopyTexSubImage1DEXT = NULL;
+PFNGLCOPYTEXSUBIMAGE2DEXTPROC __glewCopyTexSubImage2DEXT = NULL;
+PFNGLCOPYTEXSUBIMAGE3DEXTPROC __glewCopyTexSubImage3DEXT = NULL;
+
+PFNGLCULLPARAMETERDVEXTPROC __glewCullParameterdvEXT = NULL;
+PFNGLCULLPARAMETERFVEXTPROC __glewCullParameterfvEXT = NULL;
+
+PFNGLDEPTHBOUNDSEXTPROC __glewDepthBoundsEXT = NULL;
+
+PFNGLCOLORMASKINDEXEDEXTPROC __glewColorMaskIndexedEXT = NULL;
+PFNGLDISABLEINDEXEDEXTPROC __glewDisableIndexedEXT = NULL;
+PFNGLENABLEINDEXEDEXTPROC __glewEnableIndexedEXT = NULL;
+PFNGLGETBOOLEANINDEXEDVEXTPROC __glewGetBooleanIndexedvEXT = NULL;
+PFNGLGETINTEGERINDEXEDVEXTPROC __glewGetIntegerIndexedvEXT = NULL;
+PFNGLISENABLEDINDEXEDEXTPROC __glewIsEnabledIndexedEXT = NULL;
+
+PFNGLDRAWARRAYSINSTANCEDEXTPROC __glewDrawArraysInstancedEXT = NULL;
+PFNGLDRAWELEMENTSINSTANCEDEXTPROC __glewDrawElementsInstancedEXT = NULL;
+
+PFNGLDRAWRANGEELEMENTSEXTPROC __glewDrawRangeElementsEXT = NULL;
+
+PFNGLFOGCOORDPOINTEREXTPROC __glewFogCoordPointerEXT = NULL;
+PFNGLFOGCOORDDEXTPROC __glewFogCoorddEXT = NULL;
+PFNGLFOGCOORDDVEXTPROC __glewFogCoorddvEXT = NULL;
+PFNGLFOGCOORDFEXTPROC __glewFogCoordfEXT = NULL;
+PFNGLFOGCOORDFVEXTPROC __glewFogCoordfvEXT = NULL;
+
+PFNGLFRAGMENTCOLORMATERIALEXTPROC __glewFragmentColorMaterialEXT = NULL;
+PFNGLFRAGMENTLIGHTMODELFEXTPROC __glewFragmentLightModelfEXT = NULL;
+PFNGLFRAGMENTLIGHTMODELFVEXTPROC __glewFragmentLightModelfvEXT = NULL;
+PFNGLFRAGMENTLIGHTMODELIEXTPROC __glewFragmentLightModeliEXT = NULL;
+PFNGLFRAGMENTLIGHTMODELIVEXTPROC __glewFragmentLightModelivEXT = NULL;
+PFNGLFRAGMENTLIGHTFEXTPROC __glewFragmentLightfEXT = NULL;
+PFNGLFRAGMENTLIGHTFVEXTPROC __glewFragmentLightfvEXT = NULL;
+PFNGLFRAGMENTLIGHTIEXTPROC __glewFragmentLightiEXT = NULL;
+PFNGLFRAGMENTLIGHTIVEXTPROC __glewFragmentLightivEXT = NULL;
+PFNGLFRAGMENTMATERIALFEXTPROC __glewFragmentMaterialfEXT = NULL;
+PFNGLFRAGMENTMATERIALFVEXTPROC __glewFragmentMaterialfvEXT = NULL;
+PFNGLFRAGMENTMATERIALIEXTPROC __glewFragmentMaterialiEXT = NULL;
+PFNGLFRAGMENTMATERIALIVEXTPROC __glewFragmentMaterialivEXT = NULL;
+PFNGLGETFRAGMENTLIGHTFVEXTPROC __glewGetFragmentLightfvEXT = NULL;
+PFNGLGETFRAGMENTLIGHTIVEXTPROC __glewGetFragmentLightivEXT = NULL;
+PFNGLGETFRAGMENTMATERIALFVEXTPROC __glewGetFragmentMaterialfvEXT = NULL;
+PFNGLGETFRAGMENTMATERIALIVEXTPROC __glewGetFragmentMaterialivEXT = NULL;
+PFNGLLIGHTENVIEXTPROC __glewLightEnviEXT = NULL;
+
+PFNGLBLITFRAMEBUFFEREXTPROC __glewBlitFramebufferEXT = NULL;
+
+PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC __glewRenderbufferStorageMultisampleEXT = NULL;
+
+PFNGLBINDFRAMEBUFFEREXTPROC __glewBindFramebufferEXT = NULL;
+PFNGLBINDRENDERBUFFEREXTPROC __glewBindRenderbufferEXT = NULL;
+PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC __glewCheckFramebufferStatusEXT = NULL;
+PFNGLDELETEFRAMEBUFFERSEXTPROC __glewDeleteFramebuffersEXT = NULL;
+PFNGLDELETERENDERBUFFERSEXTPROC __glewDeleteRenderbuffersEXT = NULL;
+PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC __glewFramebufferRenderbufferEXT = NULL;
+PFNGLFRAMEBUFFERTEXTURE1DEXTPROC __glewFramebufferTexture1DEXT = NULL;
+PFNGLFRAMEBUFFERTEXTURE2DEXTPROC __glewFramebufferTexture2DEXT = NULL;
+PFNGLFRAMEBUFFERTEXTURE3DEXTPROC __glewFramebufferTexture3DEXT = NULL;
+PFNGLGENFRAMEBUFFERSEXTPROC __glewGenFramebuffersEXT = NULL;
+PFNGLGENRENDERBUFFERSEXTPROC __glewGenRenderbuffersEXT = NULL;
+PFNGLGENERATEMIPMAPEXTPROC __glewGenerateMipmapEXT = NULL;
+PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC __glewGetFramebufferAttachmentParameterivEXT = NULL;
+PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC __glewGetRenderbufferParameterivEXT = NULL;
+PFNGLISFRAMEBUFFEREXTPROC __glewIsFramebufferEXT = NULL;
+PFNGLISRENDERBUFFEREXTPROC __glewIsRenderbufferEXT = NULL;
+PFNGLRENDERBUFFERSTORAGEEXTPROC __glewRenderbufferStorageEXT = NULL;
+
+PFNGLFRAMEBUFFERTEXTUREEXTPROC __glewFramebufferTextureEXT = NULL;
+PFNGLFRAMEBUFFERTEXTUREFACEEXTPROC __glewFramebufferTextureFaceEXT = NULL;
+PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC __glewFramebufferTextureLayerEXT = NULL;
+PFNGLPROGRAMPARAMETERIEXTPROC __glewProgramParameteriEXT = NULL;
+
+PFNGLPROGRAMENVPARAMETERS4FVEXTPROC __glewProgramEnvParameters4fvEXT = NULL;
+PFNGLPROGRAMLOCALPARAMETERS4FVEXTPROC __glewProgramLocalParameters4fvEXT = NULL;
+
+PFNGLBINDFRAGDATALOCATIONEXTPROC __glewBindFragDataLocationEXT = NULL;
+PFNGLGETFRAGDATALOCATIONEXTPROC __glewGetFragDataLocationEXT = NULL;
+PFNGLGETUNIFORMUIVEXTPROC __glewGetUniformuivEXT = NULL;
+PFNGLGETVERTEXATTRIBIIVEXTPROC __glewGetVertexAttribIivEXT = NULL;
+PFNGLGETVERTEXATTRIBIUIVEXTPROC __glewGetVertexAttribIuivEXT = NULL;
+PFNGLUNIFORM1UIEXTPROC __glewUniform1uiEXT = NULL;
+PFNGLUNIFORM1UIVEXTPROC __glewUniform1uivEXT = NULL;
+PFNGLUNIFORM2UIEXTPROC __glewUniform2uiEXT = NULL;
+PFNGLUNIFORM2UIVEXTPROC __glewUniform2uivEXT = NULL;
+PFNGLUNIFORM3UIEXTPROC __glewUniform3uiEXT = NULL;
+PFNGLUNIFORM3UIVEXTPROC __glewUniform3uivEXT = NULL;
+PFNGLUNIFORM4UIEXTPROC __glewUniform4uiEXT = NULL;
+PFNGLUNIFORM4UIVEXTPROC __glewUniform4uivEXT = NULL;
+PFNGLVERTEXATTRIBI1IEXTPROC __glewVertexAttribI1iEXT = NULL;
+PFNGLVERTEXATTRIBI1IVEXTPROC __glewVertexAttribI1ivEXT = NULL;
+PFNGLVERTEXATTRIBI1UIEXTPROC __glewVertexAttribI1uiEXT = NULL;
+PFNGLVERTEXATTRIBI1UIVEXTPROC __glewVertexAttribI1uivEXT = NULL;
+PFNGLVERTEXATTRIBI2IEXTPROC __glewVertexAttribI2iEXT = NULL;
+PFNGLVERTEXATTRIBI2IVEXTPROC __glewVertexAttribI2ivEXT = NULL;
+PFNGLVERTEXATTRIBI2UIEXTPROC __glewVertexAttribI2uiEXT = NULL;
+PFNGLVERTEXATTRIBI2UIVEXTPROC __glewVertexAttribI2uivEXT = NULL;
+PFNGLVERTEXATTRIBI3IEXTPROC __glewVertexAttribI3iEXT = NULL;
+PFNGLVERTEXATTRIBI3IVEXTPROC __glewVertexAttribI3ivEXT = NULL;
+PFNGLVERTEXATTRIBI3UIEXTPROC __glewVertexAttribI3uiEXT = NULL;
+PFNGLVERTEXATTRIBI3UIVEXTPROC __glewVertexAttribI3uivEXT = NULL;
+PFNGLVERTEXATTRIBI4BVEXTPROC __glewVertexAttribI4bvEXT = NULL;
+PFNGLVERTEXATTRIBI4IEXTPROC __glewVertexAttribI4iEXT = NULL;
+PFNGLVERTEXATTRIBI4IVEXTPROC __glewVertexAttribI4ivEXT = NULL;
+PFNGLVERTEXATTRIBI4SVEXTPROC __glewVertexAttribI4svEXT = NULL;
+PFNGLVERTEXATTRIBI4UBVEXTPROC __glewVertexAttribI4ubvEXT = NULL;
+PFNGLVERTEXATTRIBI4UIEXTPROC __glewVertexAttribI4uiEXT = NULL;
+PFNGLVERTEXATTRIBI4UIVEXTPROC __glewVertexAttribI4uivEXT = NULL;
+PFNGLVERTEXATTRIBI4USVEXTPROC __glewVertexAttribI4usvEXT = NULL;
+PFNGLVERTEXATTRIBIPOINTEREXTPROC __glewVertexAttribIPointerEXT = NULL;
+
+PFNGLGETHISTOGRAMEXTPROC __glewGetHistogramEXT = NULL;
+PFNGLGETHISTOGRAMPARAMETERFVEXTPROC __glewGetHistogramParameterfvEXT = NULL;
+PFNGLGETHISTOGRAMPARAMETERIVEXTPROC __glewGetHistogramParameterivEXT = NULL;
+PFNGLGETMINMAXEXTPROC __glewGetMinmaxEXT = NULL;
+PFNGLGETMINMAXPARAMETERFVEXTPROC __glewGetMinmaxParameterfvEXT = NULL;
+PFNGLGETMINMAXPARAMETERIVEXTPROC __glewGetMinmaxParameterivEXT = NULL;
+PFNGLHISTOGRAMEXTPROC __glewHistogramEXT = NULL;
+PFNGLMINMAXEXTPROC __glewMinmaxEXT = NULL;
+PFNGLRESETHISTOGRAMEXTPROC __glewResetHistogramEXT = NULL;
+PFNGLRESETMINMAXEXTPROC __glewResetMinmaxEXT = NULL;
+
+PFNGLINDEXFUNCEXTPROC __glewIndexFuncEXT = NULL;
+
+PFNGLINDEXMATERIALEXTPROC __glewIndexMaterialEXT = NULL;
+
+PFNGLAPPLYTEXTUREEXTPROC __glewApplyTextureEXT = NULL;
+PFNGLTEXTURELIGHTEXTPROC __glewTextureLightEXT = NULL;
+PFNGLTEXTUREMATERIALEXTPROC __glewTextureMaterialEXT = NULL;
+
+PFNGLMULTIDRAWARRAYSEXTPROC __glewMultiDrawArraysEXT = NULL;
+PFNGLMULTIDRAWELEMENTSEXTPROC __glewMultiDrawElementsEXT = NULL;
+
+PFNGLSAMPLEMASKEXTPROC __glewSampleMaskEXT = NULL;
+PFNGLSAMPLEPATTERNEXTPROC __glewSamplePatternEXT = NULL;
+
+PFNGLCOLORTABLEEXTPROC __glewColorTableEXT = NULL;
+PFNGLGETCOLORTABLEEXTPROC __glewGetColorTableEXT = NULL;
+PFNGLGETCOLORTABLEPARAMETERFVEXTPROC __glewGetColorTableParameterfvEXT = NULL;
+PFNGLGETCOLORTABLEPARAMETERIVEXTPROC __glewGetColorTableParameterivEXT = NULL;
+
+PFNGLGETPIXELTRANSFORMPARAMETERFVEXTPROC __glewGetPixelTransformParameterfvEXT = NULL;
+PFNGLGETPIXELTRANSFORMPARAMETERIVEXTPROC __glewGetPixelTransformParameterivEXT = NULL;
+PFNGLPIXELTRANSFORMPARAMETERFEXTPROC __glewPixelTransformParameterfEXT = NULL;
+PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC __glewPixelTransformParameterfvEXT = NULL;
+PFNGLPIXELTRANSFORMPARAMETERIEXTPROC __glewPixelTransformParameteriEXT = NULL;
+PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC __glewPixelTransformParameterivEXT = NULL;
+
+PFNGLPOINTPARAMETERFEXTPROC __glewPointParameterfEXT = NULL;
+PFNGLPOINTPARAMETERFVEXTPROC __glewPointParameterfvEXT = NULL;
+
+PFNGLPOLYGONOFFSETEXTPROC __glewPolygonOffsetEXT = NULL;
+
+PFNGLBEGINSCENEEXTPROC __glewBeginSceneEXT = NULL;
+PFNGLENDSCENEEXTPROC __glewEndSceneEXT = NULL;
+
+PFNGLSECONDARYCOLOR3BEXTPROC __glewSecondaryColor3bEXT = NULL;
+PFNGLSECONDARYCOLOR3BVEXTPROC __glewSecondaryColor3bvEXT = NULL;
+PFNGLSECONDARYCOLOR3DEXTPROC __glewSecondaryColor3dEXT = NULL;
+PFNGLSECONDARYCOLOR3DVEXTPROC __glewSecondaryColor3dvEXT = NULL;
+PFNGLSECONDARYCOLOR3FEXTPROC __glewSecondaryColor3fEXT = NULL;
+PFNGLSECONDARYCOLOR3FVEXTPROC __glewSecondaryColor3fvEXT = NULL;
+PFNGLSECONDARYCOLOR3IEXTPROC __glewSecondaryColor3iEXT = NULL;
+PFNGLSECONDARYCOLOR3IVEXTPROC __glewSecondaryColor3ivEXT = NULL;
+PFNGLSECONDARYCOLOR3SEXTPROC __glewSecondaryColor3sEXT = NULL;
+PFNGLSECONDARYCOLOR3SVEXTPROC __glewSecondaryColor3svEXT = NULL;
+PFNGLSECONDARYCOLOR3UBEXTPROC __glewSecondaryColor3ubEXT = NULL;
+PFNGLSECONDARYCOLOR3UBVEXTPROC __glewSecondaryColor3ubvEXT = NULL;
+PFNGLSECONDARYCOLOR3UIEXTPROC __glewSecondaryColor3uiEXT = NULL;
+PFNGLSECONDARYCOLOR3UIVEXTPROC __glewSecondaryColor3uivEXT = NULL;
+PFNGLSECONDARYCOLOR3USEXTPROC __glewSecondaryColor3usEXT = NULL;
+PFNGLSECONDARYCOLOR3USVEXTPROC __glewSecondaryColor3usvEXT = NULL;
+PFNGLSECONDARYCOLORPOINTEREXTPROC __glewSecondaryColorPointerEXT = NULL;
+
+PFNGLACTIVESTENCILFACEEXTPROC __glewActiveStencilFaceEXT = NULL;
+
+PFNGLTEXSUBIMAGE1DEXTPROC __glewTexSubImage1DEXT = NULL;
+PFNGLTEXSUBIMAGE2DEXTPROC __glewTexSubImage2DEXT = NULL;
+PFNGLTEXSUBIMAGE3DEXTPROC __glewTexSubImage3DEXT = NULL;
+
+PFNGLTEXIMAGE3DEXTPROC __glewTexImage3DEXT = NULL;
+
+PFNGLTEXBUFFEREXTPROC __glewTexBufferEXT = NULL;
+
+PFNGLCLEARCOLORIIEXTPROC __glewClearColorIiEXT = NULL;
+PFNGLCLEARCOLORIUIEXTPROC __glewClearColorIuiEXT = NULL;
+PFNGLGETTEXPARAMETERIIVEXTPROC __glewGetTexParameterIivEXT = NULL;
+PFNGLGETTEXPARAMETERIUIVEXTPROC __glewGetTexParameterIuivEXT = NULL;
+PFNGLTEXPARAMETERIIVEXTPROC __glewTexParameterIivEXT = NULL;
+PFNGLTEXPARAMETERIUIVEXTPROC __glewTexParameterIuivEXT = NULL;
+
+PFNGLARETEXTURESRESIDENTEXTPROC __glewAreTexturesResidentEXT = NULL;
+PFNGLBINDTEXTUREEXTPROC __glewBindTextureEXT = NULL;
+PFNGLDELETETEXTURESEXTPROC __glewDeleteTexturesEXT = NULL;
+PFNGLGENTEXTURESEXTPROC __glewGenTexturesEXT = NULL;
+PFNGLISTEXTUREEXTPROC __glewIsTextureEXT = NULL;
+PFNGLPRIORITIZETEXTURESEXTPROC __glewPrioritizeTexturesEXT = NULL;
+
+PFNGLTEXTURENORMALEXTPROC __glewTextureNormalEXT = NULL;
+
+PFNGLGETQUERYOBJECTI64VEXTPROC __glewGetQueryObjecti64vEXT = NULL;
+PFNGLGETQUERYOBJECTUI64VEXTPROC __glewGetQueryObjectui64vEXT = NULL;
+
+PFNGLARRAYELEMENTEXTPROC __glewArrayElementEXT = NULL;
+PFNGLCOLORPOINTEREXTPROC __glewColorPointerEXT = NULL;
+PFNGLDRAWARRAYSEXTPROC __glewDrawArraysEXT = NULL;
+PFNGLEDGEFLAGPOINTEREXTPROC __glewEdgeFlagPointerEXT = NULL;
+PFNGLGETPOINTERVEXTPROC __glewGetPointervEXT = NULL;
+PFNGLINDEXPOINTEREXTPROC __glewIndexPointerEXT = NULL;
+PFNGLNORMALPOINTEREXTPROC __glewNormalPointerEXT = NULL;
+PFNGLTEXCOORDPOINTEREXTPROC __glewTexCoordPointerEXT = NULL;
+PFNGLVERTEXPOINTEREXTPROC __glewVertexPointerEXT = NULL;
+
+PFNGLBEGINVERTEXSHADEREXTPROC __glewBeginVertexShaderEXT = NULL;
+PFNGLBINDLIGHTPARAMETEREXTPROC __glewBindLightParameterEXT = NULL;
+PFNGLBINDMATERIALPARAMETEREXTPROC __glewBindMaterialParameterEXT = NULL;
+PFNGLBINDPARAMETEREXTPROC __glewBindParameterEXT = NULL;
+PFNGLBINDTEXGENPARAMETEREXTPROC __glewBindTexGenParameterEXT = NULL;
+PFNGLBINDTEXTUREUNITPARAMETEREXTPROC __glewBindTextureUnitParameterEXT = NULL;
+PFNGLBINDVERTEXSHADEREXTPROC __glewBindVertexShaderEXT = NULL;
+PFNGLDELETEVERTEXSHADEREXTPROC __glewDeleteVertexShaderEXT = NULL;
+PFNGLDISABLEVARIANTCLIENTSTATEEXTPROC __glewDisableVariantClientStateEXT = NULL;
+PFNGLENABLEVARIANTCLIENTSTATEEXTPROC __glewEnableVariantClientStateEXT = NULL;
+PFNGLENDVERTEXSHADEREXTPROC __glewEndVertexShaderEXT = NULL;
+PFNGLEXTRACTCOMPONENTEXTPROC __glewExtractComponentEXT = NULL;
+PFNGLGENSYMBOLSEXTPROC __glewGenSymbolsEXT = NULL;
+PFNGLGENVERTEXSHADERSEXTPROC __glewGenVertexShadersEXT = NULL;
+PFNGLGETINVARIANTBOOLEANVEXTPROC __glewGetInvariantBooleanvEXT = NULL;
+PFNGLGETINVARIANTFLOATVEXTPROC __glewGetInvariantFloatvEXT = NULL;
+PFNGLGETINVARIANTINTEGERVEXTPROC __glewGetInvariantIntegervEXT = NULL;
+PFNGLGETLOCALCONSTANTBOOLEANVEXTPROC __glewGetLocalConstantBooleanvEXT = NULL;
+PFNGLGETLOCALCONSTANTFLOATVEXTPROC __glewGetLocalConstantFloatvEXT = NULL;
+PFNGLGETLOCALCONSTANTINTEGERVEXTPROC __glewGetLocalConstantIntegervEXT = NULL;
+PFNGLGETVARIANTBOOLEANVEXTPROC __glewGetVariantBooleanvEXT = NULL;
+PFNGLGETVARIANTFLOATVEXTPROC __glewGetVariantFloatvEXT = NULL;
+PFNGLGETVARIANTINTEGERVEXTPROC __glewGetVariantIntegervEXT = NULL;
+PFNGLGETVARIANTPOINTERVEXTPROC __glewGetVariantPointervEXT = NULL;
+PFNGLINSERTCOMPONENTEXTPROC __glewInsertComponentEXT = NULL;
+PFNGLISVARIANTENABLEDEXTPROC __glewIsVariantEnabledEXT = NULL;
+PFNGLSETINVARIANTEXTPROC __glewSetInvariantEXT = NULL;
+PFNGLSETLOCALCONSTANTEXTPROC __glewSetLocalConstantEXT = NULL;
+PFNGLSHADEROP1EXTPROC __glewShaderOp1EXT = NULL;
+PFNGLSHADEROP2EXTPROC __glewShaderOp2EXT = NULL;
+PFNGLSHADEROP3EXTPROC __glewShaderOp3EXT = NULL;
+PFNGLSWIZZLEEXTPROC __glewSwizzleEXT = NULL;
+PFNGLVARIANTPOINTEREXTPROC __glewVariantPointerEXT = NULL;
+PFNGLVARIANTBVEXTPROC __glewVariantbvEXT = NULL;
+PFNGLVARIANTDVEXTPROC __glewVariantdvEXT = NULL;
+PFNGLVARIANTFVEXTPROC __glewVariantfvEXT = NULL;
+PFNGLVARIANTIVEXTPROC __glewVariantivEXT = NULL;
+PFNGLVARIANTSVEXTPROC __glewVariantsvEXT = NULL;
+PFNGLVARIANTUBVEXTPROC __glewVariantubvEXT = NULL;
+PFNGLVARIANTUIVEXTPROC __glewVariantuivEXT = NULL;
+PFNGLVARIANTUSVEXTPROC __glewVariantusvEXT = NULL;
+PFNGLWRITEMASKEXTPROC __glewWriteMaskEXT = NULL;
+
+PFNGLVERTEXWEIGHTPOINTEREXTPROC __glewVertexWeightPointerEXT = NULL;
+PFNGLVERTEXWEIGHTFEXTPROC __glewVertexWeightfEXT = NULL;
+PFNGLVERTEXWEIGHTFVEXTPROC __glewVertexWeightfvEXT = NULL;
+
+PFNGLFRAMETERMINATORGREMEDYPROC __glewFrameTerminatorGREMEDY = NULL;
+
+PFNGLSTRINGMARKERGREMEDYPROC __glewStringMarkerGREMEDY = NULL;
+
+PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC __glewGetImageTransformParameterfvHP = NULL;
+PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC __glewGetImageTransformParameterivHP = NULL;
+PFNGLIMAGETRANSFORMPARAMETERFHPPROC __glewImageTransformParameterfHP = NULL;
+PFNGLIMAGETRANSFORMPARAMETERFVHPPROC __glewImageTransformParameterfvHP = NULL;
+PFNGLIMAGETRANSFORMPARAMETERIHPPROC __glewImageTransformParameteriHP = NULL;
+PFNGLIMAGETRANSFORMPARAMETERIVHPPROC __glewImageTransformParameterivHP = NULL;
+
+PFNGLMULTIMODEDRAWARRAYSIBMPROC __glewMultiModeDrawArraysIBM = NULL;
+PFNGLMULTIMODEDRAWELEMENTSIBMPROC __glewMultiModeDrawElementsIBM = NULL;
+
+PFNGLCOLORPOINTERLISTIBMPROC __glewColorPointerListIBM = NULL;
+PFNGLEDGEFLAGPOINTERLISTIBMPROC __glewEdgeFlagPointerListIBM = NULL;
+PFNGLFOGCOORDPOINTERLISTIBMPROC __glewFogCoordPointerListIBM = NULL;
+PFNGLINDEXPOINTERLISTIBMPROC __glewIndexPointerListIBM = NULL;
+PFNGLNORMALPOINTERLISTIBMPROC __glewNormalPointerListIBM = NULL;
+PFNGLSECONDARYCOLORPOINTERLISTIBMPROC __glewSecondaryColorPointerListIBM = NULL;
+PFNGLTEXCOORDPOINTERLISTIBMPROC __glewTexCoordPointerListIBM = NULL;
+PFNGLVERTEXPOINTERLISTIBMPROC __glewVertexPointerListIBM = NULL;
+
+PFNGLCOLORPOINTERVINTELPROC __glewColorPointervINTEL = NULL;
+PFNGLNORMALPOINTERVINTELPROC __glewNormalPointervINTEL = NULL;
+PFNGLTEXCOORDPOINTERVINTELPROC __glewTexCoordPointervINTEL = NULL;
+PFNGLVERTEXPOINTERVINTELPROC __glewVertexPointervINTEL = NULL;
+
+PFNGLTEXSCISSORFUNCINTELPROC __glewTexScissorFuncINTEL = NULL;
+PFNGLTEXSCISSORINTELPROC __glewTexScissorINTEL = NULL;
+
+PFNGLBUFFERREGIONENABLEDEXTPROC __glewBufferRegionEnabledEXT = NULL;
+PFNGLDELETEBUFFERREGIONEXTPROC __glewDeleteBufferRegionEXT = NULL;
+PFNGLDRAWBUFFERREGIONEXTPROC __glewDrawBufferRegionEXT = NULL;
+PFNGLNEWBUFFERREGIONEXTPROC __glewNewBufferRegionEXT = NULL;
+PFNGLREADBUFFERREGIONEXTPROC __glewReadBufferRegionEXT = NULL;
+
+PFNGLRESIZEBUFFERSMESAPROC __glewResizeBuffersMESA = NULL;
+
+PFNGLWINDOWPOS2DMESAPROC __glewWindowPos2dMESA = NULL;
+PFNGLWINDOWPOS2DVMESAPROC __glewWindowPos2dvMESA = NULL;
+PFNGLWINDOWPOS2FMESAPROC __glewWindowPos2fMESA = NULL;
+PFNGLWINDOWPOS2FVMESAPROC __glewWindowPos2fvMESA = NULL;
+PFNGLWINDOWPOS2IMESAPROC __glewWindowPos2iMESA = NULL;
+PFNGLWINDOWPOS2IVMESAPROC __glewWindowPos2ivMESA = NULL;
+PFNGLWINDOWPOS2SMESAPROC __glewWindowPos2sMESA = NULL;
+PFNGLWINDOWPOS2SVMESAPROC __glewWindowPos2svMESA = NULL;
+PFNGLWINDOWPOS3DMESAPROC __glewWindowPos3dMESA = NULL;
+PFNGLWINDOWPOS3DVMESAPROC __glewWindowPos3dvMESA = NULL;
+PFNGLWINDOWPOS3FMESAPROC __glewWindowPos3fMESA = NULL;
+PFNGLWINDOWPOS3FVMESAPROC __glewWindowPos3fvMESA = NULL;
+PFNGLWINDOWPOS3IMESAPROC __glewWindowPos3iMESA = NULL;
+PFNGLWINDOWPOS3IVMESAPROC __glewWindowPos3ivMESA = NULL;
+PFNGLWINDOWPOS3SMESAPROC __glewWindowPos3sMESA = NULL;
+PFNGLWINDOWPOS3SVMESAPROC __glewWindowPos3svMESA = NULL;
+PFNGLWINDOWPOS4DMESAPROC __glewWindowPos4dMESA = NULL;
+PFNGLWINDOWPOS4DVMESAPROC __glewWindowPos4dvMESA = NULL;
+PFNGLWINDOWPOS4FMESAPROC __glewWindowPos4fMESA = NULL;
+PFNGLWINDOWPOS4FVMESAPROC __glewWindowPos4fvMESA = NULL;
+PFNGLWINDOWPOS4IMESAPROC __glewWindowPos4iMESA = NULL;
+PFNGLWINDOWPOS4IVMESAPROC __glewWindowPos4ivMESA = NULL;
+PFNGLWINDOWPOS4SMESAPROC __glewWindowPos4sMESA = NULL;
+PFNGLWINDOWPOS4SVMESAPROC __glewWindowPos4svMESA = NULL;
+
+PFNGLCLEARDEPTHDNVPROC __glewClearDepthdNV = NULL;
+PFNGLDEPTHBOUNDSDNVPROC __glewDepthBoundsdNV = NULL;
+PFNGLDEPTHRANGEDNVPROC __glewDepthRangedNV = NULL;
+
+PFNGLEVALMAPSNVPROC __glewEvalMapsNV = NULL;
+PFNGLGETMAPATTRIBPARAMETERFVNVPROC __glewGetMapAttribParameterfvNV = NULL;
+PFNGLGETMAPATTRIBPARAMETERIVNVPROC __glewGetMapAttribParameterivNV = NULL;
+PFNGLGETMAPCONTROLPOINTSNVPROC __glewGetMapControlPointsNV = NULL;
+PFNGLGETMAPPARAMETERFVNVPROC __glewGetMapParameterfvNV = NULL;
+PFNGLGETMAPPARAMETERIVNVPROC __glewGetMapParameterivNV = NULL;
+PFNGLMAPCONTROLPOINTSNVPROC __glewMapControlPointsNV = NULL;
+PFNGLMAPPARAMETERFVNVPROC __glewMapParameterfvNV = NULL;
+PFNGLMAPPARAMETERIVNVPROC __glewMapParameterivNV = NULL;
+
+PFNGLDELETEFENCESNVPROC __glewDeleteFencesNV = NULL;
+PFNGLFINISHFENCENVPROC __glewFinishFenceNV = NULL;
+PFNGLGENFENCESNVPROC __glewGenFencesNV = NULL;
+PFNGLGETFENCEIVNVPROC __glewGetFenceivNV = NULL;
+PFNGLISFENCENVPROC __glewIsFenceNV = NULL;
+PFNGLSETFENCENVPROC __glewSetFenceNV = NULL;
+PFNGLTESTFENCENVPROC __glewTestFenceNV = NULL;
+
+PFNGLGETPROGRAMNAMEDPARAMETERDVNVPROC __glewGetProgramNamedParameterdvNV = NULL;
+PFNGLGETPROGRAMNAMEDPARAMETERFVNVPROC __glewGetProgramNamedParameterfvNV = NULL;
+PFNGLPROGRAMNAMEDPARAMETER4DNVPROC __glewProgramNamedParameter4dNV = NULL;
+PFNGLPROGRAMNAMEDPARAMETER4DVNVPROC __glewProgramNamedParameter4dvNV = NULL;
+PFNGLPROGRAMNAMEDPARAMETER4FNVPROC __glewProgramNamedParameter4fNV = NULL;
+PFNGLPROGRAMNAMEDPARAMETER4FVNVPROC __glewProgramNamedParameter4fvNV = NULL;
+
+PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC __glewRenderbufferStorageMultisampleCoverageNV = NULL;
+
+PFNGLPROGRAMVERTEXLIMITNVPROC __glewProgramVertexLimitNV = NULL;
+
+PFNGLPROGRAMENVPARAMETERI4INVPROC __glewProgramEnvParameterI4iNV = NULL;
+PFNGLPROGRAMENVPARAMETERI4IVNVPROC __glewProgramEnvParameterI4ivNV = NULL;
+PFNGLPROGRAMENVPARAMETERI4UINVPROC __glewProgramEnvParameterI4uiNV = NULL;
+PFNGLPROGRAMENVPARAMETERI4UIVNVPROC __glewProgramEnvParameterI4uivNV = NULL;
+PFNGLPROGRAMENVPARAMETERSI4IVNVPROC __glewProgramEnvParametersI4ivNV = NULL;
+PFNGLPROGRAMENVPARAMETERSI4UIVNVPROC __glewProgramEnvParametersI4uivNV = NULL;
+PFNGLPROGRAMLOCALPARAMETERI4INVPROC __glewProgramLocalParameterI4iNV = NULL;
+PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC __glewProgramLocalParameterI4ivNV = NULL;
+PFNGLPROGRAMLOCALPARAMETERI4UINVPROC __glewProgramLocalParameterI4uiNV = NULL;
+PFNGLPROGRAMLOCALPARAMETERI4UIVNVPROC __glewProgramLocalParameterI4uivNV = NULL;
+PFNGLPROGRAMLOCALPARAMETERSI4IVNVPROC __glewProgramLocalParametersI4ivNV = NULL;
+PFNGLPROGRAMLOCALPARAMETERSI4UIVNVPROC __glewProgramLocalParametersI4uivNV = NULL;
+
+PFNGLCOLOR3HNVPROC __glewColor3hNV = NULL;
+PFNGLCOLOR3HVNVPROC __glewColor3hvNV = NULL;
+PFNGLCOLOR4HNVPROC __glewColor4hNV = NULL;
+PFNGLCOLOR4HVNVPROC __glewColor4hvNV = NULL;
+PFNGLFOGCOORDHNVPROC __glewFogCoordhNV = NULL;
+PFNGLFOGCOORDHVNVPROC __glewFogCoordhvNV = NULL;
+PFNGLMULTITEXCOORD1HNVPROC __glewMultiTexCoord1hNV = NULL;
+PFNGLMULTITEXCOORD1HVNVPROC __glewMultiTexCoord1hvNV = NULL;
+PFNGLMULTITEXCOORD2HNVPROC __glewMultiTexCoord2hNV = NULL;
+PFNGLMULTITEXCOORD2HVNVPROC __glewMultiTexCoord2hvNV = NULL;
+PFNGLMULTITEXCOORD3HNVPROC __glewMultiTexCoord3hNV = NULL;
+PFNGLMULTITEXCOORD3HVNVPROC __glewMultiTexCoord3hvNV = NULL;
+PFNGLMULTITEXCOORD4HNVPROC __glewMultiTexCoord4hNV = NULL;
+PFNGLMULTITEXCOORD4HVNVPROC __glewMultiTexCoord4hvNV = NULL;
+PFNGLNORMAL3HNVPROC __glewNormal3hNV = NULL;
+PFNGLNORMAL3HVNVPROC __glewNormal3hvNV = NULL;
+PFNGLSECONDARYCOLOR3HNVPROC __glewSecondaryColor3hNV = NULL;
+PFNGLSECONDARYCOLOR3HVNVPROC __glewSecondaryColor3hvNV = NULL;
+PFNGLTEXCOORD1HNVPROC __glewTexCoord1hNV = NULL;
+PFNGLTEXCOORD1HVNVPROC __glewTexCoord1hvNV = NULL;
+PFNGLTEXCOORD2HNVPROC __glewTexCoord2hNV = NULL;
+PFNGLTEXCOORD2HVNVPROC __glewTexCoord2hvNV = NULL;
+PFNGLTEXCOORD3HNVPROC __glewTexCoord3hNV = NULL;
+PFNGLTEXCOORD3HVNVPROC __glewTexCoord3hvNV = NULL;
+PFNGLTEXCOORD4HNVPROC __glewTexCoord4hNV = NULL;
+PFNGLTEXCOORD4HVNVPROC __glewTexCoord4hvNV = NULL;
+PFNGLVERTEX2HNVPROC __glewVertex2hNV = NULL;
+PFNGLVERTEX2HVNVPROC __glewVertex2hvNV = NULL;
+PFNGLVERTEX3HNVPROC __glewVertex3hNV = NULL;
+PFNGLVERTEX3HVNVPROC __glewVertex3hvNV = NULL;
+PFNGLVERTEX4HNVPROC __glewVertex4hNV = NULL;
+PFNGLVERTEX4HVNVPROC __glewVertex4hvNV = NULL;
+PFNGLVERTEXATTRIB1HNVPROC __glewVertexAttrib1hNV = NULL;
+PFNGLVERTEXATTRIB1HVNVPROC __glewVertexAttrib1hvNV = NULL;
+PFNGLVERTEXATTRIB2HNVPROC __glewVertexAttrib2hNV = NULL;
+PFNGLVERTEXATTRIB2HVNVPROC __glewVertexAttrib2hvNV = NULL;
+PFNGLVERTEXATTRIB3HNVPROC __glewVertexAttrib3hNV = NULL;
+PFNGLVERTEXATTRIB3HVNVPROC __glewVertexAttrib3hvNV = NULL;
+PFNGLVERTEXATTRIB4HNVPROC __glewVertexAttrib4hNV = NULL;
+PFNGLVERTEXATTRIB4HVNVPROC __glewVertexAttrib4hvNV = NULL;
+PFNGLVERTEXATTRIBS1HVNVPROC __glewVertexAttribs1hvNV = NULL;
+PFNGLVERTEXATTRIBS2HVNVPROC __glewVertexAttribs2hvNV = NULL;
+PFNGLVERTEXATTRIBS3HVNVPROC __glewVertexAttribs3hvNV = NULL;
+PFNGLVERTEXATTRIBS4HVNVPROC __glewVertexAttribs4hvNV = NULL;
+PFNGLVERTEXWEIGHTHNVPROC __glewVertexWeighthNV = NULL;
+PFNGLVERTEXWEIGHTHVNVPROC __glewVertexWeighthvNV = NULL;
+
+PFNGLBEGINOCCLUSIONQUERYNVPROC __glewBeginOcclusionQueryNV = NULL;
+PFNGLDELETEOCCLUSIONQUERIESNVPROC __glewDeleteOcclusionQueriesNV = NULL;
+PFNGLENDOCCLUSIONQUERYNVPROC __glewEndOcclusionQueryNV = NULL;
+PFNGLGENOCCLUSIONQUERIESNVPROC __glewGenOcclusionQueriesNV = NULL;
+PFNGLGETOCCLUSIONQUERYIVNVPROC __glewGetOcclusionQueryivNV = NULL;
+PFNGLGETOCCLUSIONQUERYUIVNVPROC __glewGetOcclusionQueryuivNV = NULL;
+PFNGLISOCCLUSIONQUERYNVPROC __glewIsOcclusionQueryNV = NULL;
+
+PFNGLPROGRAMBUFFERPARAMETERSIIVNVPROC __glewProgramBufferParametersIivNV = NULL;
+PFNGLPROGRAMBUFFERPARAMETERSIUIVNVPROC __glewProgramBufferParametersIuivNV = NULL;
+PFNGLPROGRAMBUFFERPARAMETERSFVNVPROC __glewProgramBufferParametersfvNV = NULL;
+
+PFNGLFLUSHPIXELDATARANGENVPROC __glewFlushPixelDataRangeNV = NULL;
+PFNGLPIXELDATARANGENVPROC __glewPixelDataRangeNV = NULL;
+
+PFNGLPOINTPARAMETERINVPROC __glewPointParameteriNV = NULL;
+PFNGLPOINTPARAMETERIVNVPROC __glewPointParameterivNV = NULL;
+
+PFNGLPRIMITIVERESTARTINDEXNVPROC __glewPrimitiveRestartIndexNV = NULL;
+PFNGLPRIMITIVERESTARTNVPROC __glewPrimitiveRestartNV = NULL;
+
+PFNGLCOMBINERINPUTNVPROC __glewCombinerInputNV = NULL;
+PFNGLCOMBINEROUTPUTNVPROC __glewCombinerOutputNV = NULL;
+PFNGLCOMBINERPARAMETERFNVPROC __glewCombinerParameterfNV = NULL;
+PFNGLCOMBINERPARAMETERFVNVPROC __glewCombinerParameterfvNV = NULL;
+PFNGLCOMBINERPARAMETERINVPROC __glewCombinerParameteriNV = NULL;
+PFNGLCOMBINERPARAMETERIVNVPROC __glewCombinerParameterivNV = NULL;
+PFNGLFINALCOMBINERINPUTNVPROC __glewFinalCombinerInputNV = NULL;
+PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC __glewGetCombinerInputParameterfvNV = NULL;
+PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC __glewGetCombinerInputParameterivNV = NULL;
+PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC __glewGetCombinerOutputParameterfvNV = NULL;
+PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC __glewGetCombinerOutputParameterivNV = NULL;
+PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC __glewGetFinalCombinerInputParameterfvNV = NULL;
+PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC __glewGetFinalCombinerInputParameterivNV = NULL;
+
+PFNGLCOMBINERSTAGEPARAMETERFVNVPROC __glewCombinerStageParameterfvNV = NULL;
+PFNGLGETCOMBINERSTAGEPARAMETERFVNVPROC __glewGetCombinerStageParameterfvNV = NULL;
+
+PFNGLACTIVEVARYINGNVPROC __glewActiveVaryingNV = NULL;
+PFNGLBEGINTRANSFORMFEEDBACKNVPROC __glewBeginTransformFeedbackNV = NULL;
+PFNGLBINDBUFFERBASENVPROC __glewBindBufferBaseNV = NULL;
+PFNGLBINDBUFFEROFFSETNVPROC __glewBindBufferOffsetNV = NULL;
+PFNGLBINDBUFFERRANGENVPROC __glewBindBufferRangeNV = NULL;
+PFNGLENDTRANSFORMFEEDBACKNVPROC __glewEndTransformFeedbackNV = NULL;
+PFNGLGETACTIVEVARYINGNVPROC __glewGetActiveVaryingNV = NULL;
+PFNGLGETTRANSFORMFEEDBACKVARYINGNVPROC __glewGetTransformFeedbackVaryingNV = NULL;
+PFNGLGETVARYINGLOCATIONNVPROC __glewGetVaryingLocationNV = NULL;
+PFNGLTRANSFORMFEEDBACKATTRIBSNVPROC __glewTransformFeedbackAttribsNV = NULL;
+PFNGLTRANSFORMFEEDBACKVARYINGSNVPROC __glewTransformFeedbackVaryingsNV = NULL;
+
+PFNGLFLUSHVERTEXARRAYRANGENVPROC __glewFlushVertexArrayRangeNV = NULL;
+PFNGLVERTEXARRAYRANGENVPROC __glewVertexArrayRangeNV = NULL;
+
+PFNGLAREPROGRAMSRESIDENTNVPROC __glewAreProgramsResidentNV = NULL;
+PFNGLBINDPROGRAMNVPROC __glewBindProgramNV = NULL;
+PFNGLDELETEPROGRAMSNVPROC __glewDeleteProgramsNV = NULL;
+PFNGLEXECUTEPROGRAMNVPROC __glewExecuteProgramNV = NULL;
+PFNGLGENPROGRAMSNVPROC __glewGenProgramsNV = NULL;
+PFNGLGETPROGRAMPARAMETERDVNVPROC __glewGetProgramParameterdvNV = NULL;
+PFNGLGETPROGRAMPARAMETERFVNVPROC __glewGetProgramParameterfvNV = NULL;
+PFNGLGETPROGRAMSTRINGNVPROC __glewGetProgramStringNV = NULL;
+PFNGLGETPROGRAMIVNVPROC __glewGetProgramivNV = NULL;
+PFNGLGETTRACKMATRIXIVNVPROC __glewGetTrackMatrixivNV = NULL;
+PFNGLGETVERTEXATTRIBPOINTERVNVPROC __glewGetVertexAttribPointervNV = NULL;
+PFNGLGETVERTEXATTRIBDVNVPROC __glewGetVertexAttribdvNV = NULL;
+PFNGLGETVERTEXATTRIBFVNVPROC __glewGetVertexAttribfvNV = NULL;
+PFNGLGETVERTEXATTRIBIVNVPROC __glewGetVertexAttribivNV = NULL;
+PFNGLISPROGRAMNVPROC __glewIsProgramNV = NULL;
+PFNGLLOADPROGRAMNVPROC __glewLoadProgramNV = NULL;
+PFNGLPROGRAMPARAMETER4DNVPROC __glewProgramParameter4dNV = NULL;
+PFNGLPROGRAMPARAMETER4DVNVPROC __glewProgramParameter4dvNV = NULL;
+PFNGLPROGRAMPARAMETER4FNVPROC __glewProgramParameter4fNV = NULL;
+PFNGLPROGRAMPARAMETER4FVNVPROC __glewProgramParameter4fvNV = NULL;
+PFNGLPROGRAMPARAMETERS4DVNVPROC __glewProgramParameters4dvNV = NULL;
+PFNGLPROGRAMPARAMETERS4FVNVPROC __glewProgramParameters4fvNV = NULL;
+PFNGLREQUESTRESIDENTPROGRAMSNVPROC __glewRequestResidentProgramsNV = NULL;
+PFNGLTRACKMATRIXNVPROC __glewTrackMatrixNV = NULL;
+PFNGLVERTEXATTRIB1DNVPROC __glewVertexAttrib1dNV = NULL;
+PFNGLVERTEXATTRIB1DVNVPROC __glewVertexAttrib1dvNV = NULL;
+PFNGLVERTEXATTRIB1FNVPROC __glewVertexAttrib1fNV = NULL;
+PFNGLVERTEXATTRIB1FVNVPROC __glewVertexAttrib1fvNV = NULL;
+PFNGLVERTEXATTRIB1SNVPROC __glewVertexAttrib1sNV = NULL;
+PFNGLVERTEXATTRIB1SVNVPROC __glewVertexAttrib1svNV = NULL;
+PFNGLVERTEXATTRIB2DNVPROC __glewVertexAttrib2dNV = NULL;
+PFNGLVERTEXATTRIB2DVNVPROC __glewVertexAttrib2dvNV = NULL;
+PFNGLVERTEXATTRIB2FNVPROC __glewVertexAttrib2fNV = NULL;
+PFNGLVERTEXATTRIB2FVNVPROC __glewVertexAttrib2fvNV = NULL;
+PFNGLVERTEXATTRIB2SNVPROC __glewVertexAttrib2sNV = NULL;
+PFNGLVERTEXATTRIB2SVNVPROC __glewVertexAttrib2svNV = NULL;
+PFNGLVERTEXATTRIB3DNVPROC __glewVertexAttrib3dNV = NULL;
+PFNGLVERTEXATTRIB3DVNVPROC __glewVertexAttrib3dvNV = NULL;
+PFNGLVERTEXATTRIB3FNVPROC __glewVertexAttrib3fNV = NULL;
+PFNGLVERTEXATTRIB3FVNVPROC __glewVertexAttrib3fvNV = NULL;
+PFNGLVERTEXATTRIB3SNVPROC __glewVertexAttrib3sNV = NULL;
+PFNGLVERTEXATTRIB3SVNVPROC __glewVertexAttrib3svNV = NULL;
+PFNGLVERTEXATTRIB4DNVPROC __glewVertexAttrib4dNV = NULL;
+PFNGLVERTEXATTRIB4DVNVPROC __glewVertexAttrib4dvNV = NULL;
+PFNGLVERTEXATTRIB4FNVPROC __glewVertexAttrib4fNV = NULL;
+PFNGLVERTEXATTRIB4FVNVPROC __glewVertexAttrib4fvNV = NULL;
+PFNGLVERTEXATTRIB4SNVPROC __glewVertexAttrib4sNV = NULL;
+PFNGLVERTEXATTRIB4SVNVPROC __glewVertexAttrib4svNV = NULL;
+PFNGLVERTEXATTRIB4UBNVPROC __glewVertexAttrib4ubNV = NULL;
+PFNGLVERTEXATTRIB4UBVNVPROC __glewVertexAttrib4ubvNV = NULL;
+PFNGLVERTEXATTRIBPOINTERNVPROC __glewVertexAttribPointerNV = NULL;
+PFNGLVERTEXATTRIBS1DVNVPROC __glewVertexAttribs1dvNV = NULL;
+PFNGLVERTEXATTRIBS1FVNVPROC __glewVertexAttribs1fvNV = NULL;
+PFNGLVERTEXATTRIBS1SVNVPROC __glewVertexAttribs1svNV = NULL;
+PFNGLVERTEXATTRIBS2DVNVPROC __glewVertexAttribs2dvNV = NULL;
+PFNGLVERTEXATTRIBS2FVNVPROC __glewVertexAttribs2fvNV = NULL;
+PFNGLVERTEXATTRIBS2SVNVPROC __glewVertexAttribs2svNV = NULL;
+PFNGLVERTEXATTRIBS3DVNVPROC __glewVertexAttribs3dvNV = NULL;
+PFNGLVERTEXATTRIBS3FVNVPROC __glewVertexAttribs3fvNV = NULL;
+PFNGLVERTEXATTRIBS3SVNVPROC __glewVertexAttribs3svNV = NULL;
+PFNGLVERTEXATTRIBS4DVNVPROC __glewVertexAttribs4dvNV = NULL;
+PFNGLVERTEXATTRIBS4FVNVPROC __glewVertexAttribs4fvNV = NULL;
+PFNGLVERTEXATTRIBS4SVNVPROC __glewVertexAttribs4svNV = NULL;
+PFNGLVERTEXATTRIBS4UBVNVPROC __glewVertexAttribs4ubvNV = NULL;
+
+PFNGLCLEARDEPTHFOESPROC __glewClearDepthfOES = NULL;
+PFNGLCLIPPLANEFOESPROC __glewClipPlanefOES = NULL;
+PFNGLDEPTHRANGEFOESPROC __glewDepthRangefOES = NULL;
+PFNGLFRUSTUMFOESPROC __glewFrustumfOES = NULL;
+PFNGLGETCLIPPLANEFOESPROC __glewGetClipPlanefOES = NULL;
+PFNGLORTHOFOESPROC __glewOrthofOES = NULL;
+
+PFNGLDETAILTEXFUNCSGISPROC __glewDetailTexFuncSGIS = NULL;
+PFNGLGETDETAILTEXFUNCSGISPROC __glewGetDetailTexFuncSGIS = NULL;
+
+PFNGLFOGFUNCSGISPROC __glewFogFuncSGIS = NULL;
+PFNGLGETFOGFUNCSGISPROC __glewGetFogFuncSGIS = NULL;
+
+PFNGLSAMPLEMASKSGISPROC __glewSampleMaskSGIS = NULL;
+PFNGLSAMPLEPATTERNSGISPROC __glewSamplePatternSGIS = NULL;
+
+PFNGLGETSHARPENTEXFUNCSGISPROC __glewGetSharpenTexFuncSGIS = NULL;
+PFNGLSHARPENTEXFUNCSGISPROC __glewSharpenTexFuncSGIS = NULL;
+
+PFNGLTEXIMAGE4DSGISPROC __glewTexImage4DSGIS = NULL;
+PFNGLTEXSUBIMAGE4DSGISPROC __glewTexSubImage4DSGIS = NULL;
+
+PFNGLGETTEXFILTERFUNCSGISPROC __glewGetTexFilterFuncSGIS = NULL;
+PFNGLTEXFILTERFUNCSGISPROC __glewTexFilterFuncSGIS = NULL;
+
+PFNGLASYNCMARKERSGIXPROC __glewAsyncMarkerSGIX = NULL;
+PFNGLDELETEASYNCMARKERSSGIXPROC __glewDeleteAsyncMarkersSGIX = NULL;
+PFNGLFINISHASYNCSGIXPROC __glewFinishAsyncSGIX = NULL;
+PFNGLGENASYNCMARKERSSGIXPROC __glewGenAsyncMarkersSGIX = NULL;
+PFNGLISASYNCMARKERSGIXPROC __glewIsAsyncMarkerSGIX = NULL;
+PFNGLPOLLASYNCSGIXPROC __glewPollAsyncSGIX = NULL;
+
+PFNGLFLUSHRASTERSGIXPROC __glewFlushRasterSGIX = NULL;
+
+PFNGLTEXTUREFOGSGIXPROC __glewTextureFogSGIX = NULL;
+
+PFNGLFRAGMENTCOLORMATERIALSGIXPROC __glewFragmentColorMaterialSGIX = NULL;
+PFNGLFRAGMENTLIGHTMODELFSGIXPROC __glewFragmentLightModelfSGIX = NULL;
+PFNGLFRAGMENTLIGHTMODELFVSGIXPROC __glewFragmentLightModelfvSGIX = NULL;
+PFNGLFRAGMENTLIGHTMODELISGIXPROC __glewFragmentLightModeliSGIX = NULL;
+PFNGLFRAGMENTLIGHTMODELIVSGIXPROC __glewFragmentLightModelivSGIX = NULL;
+PFNGLFRAGMENTLIGHTFSGIXPROC __glewFragmentLightfSGIX = NULL;
+PFNGLFRAGMENTLIGHTFVSGIXPROC __glewFragmentLightfvSGIX = NULL;
+PFNGLFRAGMENTLIGHTISGIXPROC __glewFragmentLightiSGIX = NULL;
+PFNGLFRAGMENTLIGHTIVSGIXPROC __glewFragmentLightivSGIX = NULL;
+PFNGLFRAGMENTMATERIALFSGIXPROC __glewFragmentMaterialfSGIX = NULL;
+PFNGLFRAGMENTMATERIALFVSGIXPROC __glewFragmentMaterialfvSGIX = NULL;
+PFNGLFRAGMENTMATERIALISGIXPROC __glewFragmentMaterialiSGIX = NULL;
+PFNGLFRAGMENTMATERIALIVSGIXPROC __glewFragmentMaterialivSGIX = NULL;
+PFNGLGETFRAGMENTLIGHTFVSGIXPROC __glewGetFragmentLightfvSGIX = NULL;
+PFNGLGETFRAGMENTLIGHTIVSGIXPROC __glewGetFragmentLightivSGIX = NULL;
+PFNGLGETFRAGMENTMATERIALFVSGIXPROC __glewGetFragmentMaterialfvSGIX = NULL;
+PFNGLGETFRAGMENTMATERIALIVSGIXPROC __glewGetFragmentMaterialivSGIX = NULL;
+
+PFNGLFRAMEZOOMSGIXPROC __glewFrameZoomSGIX = NULL;
+
+PFNGLPIXELTEXGENSGIXPROC __glewPixelTexGenSGIX = NULL;
+
+PFNGLREFERENCEPLANESGIXPROC __glewReferencePlaneSGIX = NULL;
+
+PFNGLSPRITEPARAMETERFSGIXPROC __glewSpriteParameterfSGIX = NULL;
+PFNGLSPRITEPARAMETERFVSGIXPROC __glewSpriteParameterfvSGIX = NULL;
+PFNGLSPRITEPARAMETERISGIXPROC __glewSpriteParameteriSGIX = NULL;
+PFNGLSPRITEPARAMETERIVSGIXPROC __glewSpriteParameterivSGIX = NULL;
+
+PFNGLTAGSAMPLEBUFFERSGIXPROC __glewTagSampleBufferSGIX = NULL;
+
+PFNGLCOLORTABLEPARAMETERFVSGIPROC __glewColorTableParameterfvSGI = NULL;
+PFNGLCOLORTABLEPARAMETERIVSGIPROC __glewColorTableParameterivSGI = NULL;
+PFNGLCOLORTABLESGIPROC __glewColorTableSGI = NULL;
+PFNGLCOPYCOLORTABLESGIPROC __glewCopyColorTableSGI = NULL;
+PFNGLGETCOLORTABLEPARAMETERFVSGIPROC __glewGetColorTableParameterfvSGI = NULL;
+PFNGLGETCOLORTABLEPARAMETERIVSGIPROC __glewGetColorTableParameterivSGI = NULL;
+PFNGLGETCOLORTABLESGIPROC __glewGetColorTableSGI = NULL;
+
+PFNGLFINISHTEXTURESUNXPROC __glewFinishTextureSUNX = NULL;
+
+PFNGLGLOBALALPHAFACTORBSUNPROC __glewGlobalAlphaFactorbSUN = NULL;
+PFNGLGLOBALALPHAFACTORDSUNPROC __glewGlobalAlphaFactordSUN = NULL;
+PFNGLGLOBALALPHAFACTORFSUNPROC __glewGlobalAlphaFactorfSUN = NULL;
+PFNGLGLOBALALPHAFACTORISUNPROC __glewGlobalAlphaFactoriSUN = NULL;
+PFNGLGLOBALALPHAFACTORSSUNPROC __glewGlobalAlphaFactorsSUN = NULL;
+PFNGLGLOBALALPHAFACTORUBSUNPROC __glewGlobalAlphaFactorubSUN = NULL;
+PFNGLGLOBALALPHAFACTORUISUNPROC __glewGlobalAlphaFactoruiSUN = NULL;
+PFNGLGLOBALALPHAFACTORUSSUNPROC __glewGlobalAlphaFactorusSUN = NULL;
+
+PFNGLREADVIDEOPIXELSSUNPROC __glewReadVideoPixelsSUN = NULL;
+
+PFNGLREPLACEMENTCODEPOINTERSUNPROC __glewReplacementCodePointerSUN = NULL;
+PFNGLREPLACEMENTCODEUBSUNPROC __glewReplacementCodeubSUN = NULL;
+PFNGLREPLACEMENTCODEUBVSUNPROC __glewReplacementCodeubvSUN = NULL;
+PFNGLREPLACEMENTCODEUISUNPROC __glewReplacementCodeuiSUN = NULL;
+PFNGLREPLACEMENTCODEUIVSUNPROC __glewReplacementCodeuivSUN = NULL;
+PFNGLREPLACEMENTCODEUSSUNPROC __glewReplacementCodeusSUN = NULL;
+PFNGLREPLACEMENTCODEUSVSUNPROC __glewReplacementCodeusvSUN = NULL;
+
+PFNGLCOLOR3FVERTEX3FSUNPROC __glewColor3fVertex3fSUN = NULL;
+PFNGLCOLOR3FVERTEX3FVSUNPROC __glewColor3fVertex3fvSUN = NULL;
+PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC __glewColor4fNormal3fVertex3fSUN = NULL;
+PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewColor4fNormal3fVertex3fvSUN = NULL;
+PFNGLCOLOR4UBVERTEX2FSUNPROC __glewColor4ubVertex2fSUN = NULL;
+PFNGLCOLOR4UBVERTEX2FVSUNPROC __glewColor4ubVertex2fvSUN = NULL;
+PFNGLCOLOR4UBVERTEX3FSUNPROC __glewColor4ubVertex3fSUN = NULL;
+PFNGLCOLOR4UBVERTEX3FVSUNPROC __glewColor4ubVertex3fvSUN = NULL;
+PFNGLNORMAL3FVERTEX3FSUNPROC __glewNormal3fVertex3fSUN = NULL;
+PFNGLNORMAL3FVERTEX3FVSUNPROC __glewNormal3fVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC __glewReplacementCodeuiColor3fVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC __glewReplacementCodeuiColor3fVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiColor4fNormal3fVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiColor4fNormal3fVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC __glewReplacementCodeuiColor4ubVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC __glewReplacementCodeuiColor4ubVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiNormal3fVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiNormal3fVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC __glewReplacementCodeuiTexCoord2fNormal3fVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC __glewReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC __glewReplacementCodeuiTexCoord2fVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC __glewReplacementCodeuiTexCoord2fVertex3fvSUN = NULL;
+PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC __glewReplacementCodeuiVertex3fSUN = NULL;
+PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC __glewReplacementCodeuiVertex3fvSUN = NULL;
+PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC __glewTexCoord2fColor3fVertex3fSUN = NULL;
+PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC __glewTexCoord2fColor3fVertex3fvSUN = NULL;
+PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC __glewTexCoord2fColor4fNormal3fVertex3fSUN = NULL;
+PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC __glewTexCoord2fColor4fNormal3fVertex3fvSUN = NULL;
+PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC __glewTexCoord2fColor4ubVertex3fSUN = NULL;
+PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC __glewTexCoord2fColor4ubVertex3fvSUN = NULL;
+PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC __glewTexCoord2fNormal3fVertex3fSUN = NULL;
+PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC __glewTexCoord2fNormal3fVertex3fvSUN = NULL;
+PFNGLTEXCOORD2FVERTEX3FSUNPROC __glewTexCoord2fVertex3fSUN = NULL;
+PFNGLTEXCOORD2FVERTEX3FVSUNPROC __glewTexCoord2fVertex3fvSUN = NULL;
+PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC __glewTexCoord4fColor4fNormal3fVertex4fSUN = NULL;
+PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC __glewTexCoord4fColor4fNormal3fVertex4fvSUN = NULL;
+PFNGLTEXCOORD4FVERTEX4FSUNPROC __glewTexCoord4fVertex4fSUN = NULL;
+PFNGLTEXCOORD4FVERTEX4FVSUNPROC __glewTexCoord4fVertex4fvSUN = NULL;
+
+PFNGLADDSWAPHINTRECTWINPROC __glewAddSwapHintRectWIN = NULL;
+
+#endif /* !WIN32 || !GLEW_MX */
+
+#if !defined(GLEW_MX)
+
+GLboolean __GLEW_VERSION_1_1 = GL_FALSE;
+GLboolean __GLEW_VERSION_1_2 = GL_FALSE;
+GLboolean __GLEW_VERSION_1_3 = GL_FALSE;
+GLboolean __GLEW_VERSION_1_4 = GL_FALSE;
+GLboolean __GLEW_VERSION_1_5 = GL_FALSE;
+GLboolean __GLEW_VERSION_2_0 = GL_FALSE;
+GLboolean __GLEW_VERSION_2_1 = GL_FALSE;
+GLboolean __GLEW_3DFX_multisample = GL_FALSE;
+GLboolean __GLEW_3DFX_tbuffer = GL_FALSE;
+GLboolean __GLEW_3DFX_texture_compression_FXT1 = GL_FALSE;
+GLboolean __GLEW_APPLE_client_storage = GL_FALSE;
+GLboolean __GLEW_APPLE_element_array = GL_FALSE;
+GLboolean __GLEW_APPLE_fence = GL_FALSE;
+GLboolean __GLEW_APPLE_float_pixels = GL_FALSE;
+GLboolean __GLEW_APPLE_flush_buffer_range = GL_FALSE;
+GLboolean __GLEW_APPLE_pixel_buffer = GL_FALSE;
+GLboolean __GLEW_APPLE_specular_vector = GL_FALSE;
+GLboolean __GLEW_APPLE_texture_range = GL_FALSE;
+GLboolean __GLEW_APPLE_transform_hint = GL_FALSE;
+GLboolean __GLEW_APPLE_vertex_array_object = GL_FALSE;
+GLboolean __GLEW_APPLE_vertex_array_range = GL_FALSE;
+GLboolean __GLEW_APPLE_ycbcr_422 = GL_FALSE;
+GLboolean __GLEW_ARB_color_buffer_float = GL_FALSE;
+GLboolean __GLEW_ARB_depth_texture = GL_FALSE;
+GLboolean __GLEW_ARB_draw_buffers = GL_FALSE;
+GLboolean __GLEW_ARB_fragment_program = GL_FALSE;
+GLboolean __GLEW_ARB_fragment_program_shadow = GL_FALSE;
+GLboolean __GLEW_ARB_fragment_shader = GL_FALSE;
+GLboolean __GLEW_ARB_half_float_pixel = GL_FALSE;
+GLboolean __GLEW_ARB_imaging = GL_FALSE;
+GLboolean __GLEW_ARB_matrix_palette = GL_FALSE;
+GLboolean __GLEW_ARB_multisample = GL_FALSE;
+GLboolean __GLEW_ARB_multitexture = GL_FALSE;
+GLboolean __GLEW_ARB_occlusion_query = GL_FALSE;
+GLboolean __GLEW_ARB_pixel_buffer_object = GL_FALSE;
+GLboolean __GLEW_ARB_point_parameters = GL_FALSE;
+GLboolean __GLEW_ARB_point_sprite = GL_FALSE;
+GLboolean __GLEW_ARB_shader_objects = GL_FALSE;
+GLboolean __GLEW_ARB_shading_language_100 = GL_FALSE;
+GLboolean __GLEW_ARB_shadow = GL_FALSE;
+GLboolean __GLEW_ARB_shadow_ambient = GL_FALSE;
+GLboolean __GLEW_ARB_texture_border_clamp = GL_FALSE;
+GLboolean __GLEW_ARB_texture_compression = GL_FALSE;
+GLboolean __GLEW_ARB_texture_cube_map = GL_FALSE;
+GLboolean __GLEW_ARB_texture_env_add = GL_FALSE;
+GLboolean __GLEW_ARB_texture_env_combine = GL_FALSE;
+GLboolean __GLEW_ARB_texture_env_crossbar = GL_FALSE;
+GLboolean __GLEW_ARB_texture_env_dot3 = GL_FALSE;
+GLboolean __GLEW_ARB_texture_float = GL_FALSE;
+GLboolean __GLEW_ARB_texture_mirrored_repeat = GL_FALSE;
+GLboolean __GLEW_ARB_texture_non_power_of_two = GL_FALSE;
+GLboolean __GLEW_ARB_texture_rectangle = GL_FALSE;
+GLboolean __GLEW_ARB_transpose_matrix = GL_FALSE;
+GLboolean __GLEW_ARB_vertex_blend = GL_FALSE;
+GLboolean __GLEW_ARB_vertex_buffer_object = GL_FALSE;
+GLboolean __GLEW_ARB_vertex_program = GL_FALSE;
+GLboolean __GLEW_ARB_vertex_shader = GL_FALSE;
+GLboolean __GLEW_ARB_window_pos = GL_FALSE;
+GLboolean __GLEW_ATIX_point_sprites = GL_FALSE;
+GLboolean __GLEW_ATIX_texture_env_combine3 = GL_FALSE;
+GLboolean __GLEW_ATIX_texture_env_route = GL_FALSE;
+GLboolean __GLEW_ATIX_vertex_shader_output_point_size = GL_FALSE;
+GLboolean __GLEW_ATI_draw_buffers = GL_FALSE;
+GLboolean __GLEW_ATI_element_array = GL_FALSE;
+GLboolean __GLEW_ATI_envmap_bumpmap = GL_FALSE;
+GLboolean __GLEW_ATI_fragment_shader = GL_FALSE;
+GLboolean __GLEW_ATI_map_object_buffer = GL_FALSE;
+GLboolean __GLEW_ATI_pn_triangles = GL_FALSE;
+GLboolean __GLEW_ATI_separate_stencil = GL_FALSE;
+GLboolean __GLEW_ATI_shader_texture_lod = GL_FALSE;
+GLboolean __GLEW_ATI_text_fragment_shader = GL_FALSE;
+GLboolean __GLEW_ATI_texture_compression_3dc = GL_FALSE;
+GLboolean __GLEW_ATI_texture_env_combine3 = GL_FALSE;
+GLboolean __GLEW_ATI_texture_float = GL_FALSE;
+GLboolean __GLEW_ATI_texture_mirror_once = GL_FALSE;
+GLboolean __GLEW_ATI_vertex_array_object = GL_FALSE;
+GLboolean __GLEW_ATI_vertex_attrib_array_object = GL_FALSE;
+GLboolean __GLEW_ATI_vertex_streams = GL_FALSE;
+GLboolean __GLEW_EXT_422_pixels = GL_FALSE;
+GLboolean __GLEW_EXT_Cg_shader = GL_FALSE;
+GLboolean __GLEW_EXT_abgr = GL_FALSE;
+GLboolean __GLEW_EXT_bgra = GL_FALSE;
+GLboolean __GLEW_EXT_bindable_uniform = GL_FALSE;
+GLboolean __GLEW_EXT_blend_color = GL_FALSE;
+GLboolean __GLEW_EXT_blend_equation_separate = GL_FALSE;
+GLboolean __GLEW_EXT_blend_func_separate = GL_FALSE;
+GLboolean __GLEW_EXT_blend_logic_op = GL_FALSE;
+GLboolean __GLEW_EXT_blend_minmax = GL_FALSE;
+GLboolean __GLEW_EXT_blend_subtract = GL_FALSE;
+GLboolean __GLEW_EXT_clip_volume_hint = GL_FALSE;
+GLboolean __GLEW_EXT_cmyka = GL_FALSE;
+GLboolean __GLEW_EXT_color_subtable = GL_FALSE;
+GLboolean __GLEW_EXT_compiled_vertex_array = GL_FALSE;
+GLboolean __GLEW_EXT_convolution = GL_FALSE;
+GLboolean __GLEW_EXT_coordinate_frame = GL_FALSE;
+GLboolean __GLEW_EXT_copy_texture = GL_FALSE;
+GLboolean __GLEW_EXT_cull_vertex = GL_FALSE;
+GLboolean __GLEW_EXT_depth_bounds_test = GL_FALSE;
+GLboolean __GLEW_EXT_draw_buffers2 = GL_FALSE;
+GLboolean __GLEW_EXT_draw_instanced = GL_FALSE;
+GLboolean __GLEW_EXT_draw_range_elements = GL_FALSE;
+GLboolean __GLEW_EXT_fog_coord = GL_FALSE;
+GLboolean __GLEW_EXT_fragment_lighting = GL_FALSE;
+GLboolean __GLEW_EXT_framebuffer_blit = GL_FALSE;
+GLboolean __GLEW_EXT_framebuffer_multisample = GL_FALSE;
+GLboolean __GLEW_EXT_framebuffer_object = GL_FALSE;
+GLboolean __GLEW_EXT_framebuffer_sRGB = GL_FALSE;
+GLboolean __GLEW_EXT_geometry_shader4 = GL_FALSE;
+GLboolean __GLEW_EXT_gpu_program_parameters = GL_FALSE;
+GLboolean __GLEW_EXT_gpu_shader4 = GL_FALSE;
+GLboolean __GLEW_EXT_histogram = GL_FALSE;
+GLboolean __GLEW_EXT_index_array_formats = GL_FALSE;
+GLboolean __GLEW_EXT_index_func = GL_FALSE;
+GLboolean __GLEW_EXT_index_material = GL_FALSE;
+GLboolean __GLEW_EXT_index_texture = GL_FALSE;
+GLboolean __GLEW_EXT_light_texture = GL_FALSE;
+GLboolean __GLEW_EXT_misc_attribute = GL_FALSE;
+GLboolean __GLEW_EXT_multi_draw_arrays = GL_FALSE;
+GLboolean __GLEW_EXT_multisample = GL_FALSE;
+GLboolean __GLEW_EXT_packed_depth_stencil = GL_FALSE;
+GLboolean __GLEW_EXT_packed_float = GL_FALSE;
+GLboolean __GLEW_EXT_packed_pixels = GL_FALSE;
+GLboolean __GLEW_EXT_paletted_texture = GL_FALSE;
+GLboolean __GLEW_EXT_pixel_buffer_object = GL_FALSE;
+GLboolean __GLEW_EXT_pixel_transform = GL_FALSE;
+GLboolean __GLEW_EXT_pixel_transform_color_table = GL_FALSE;
+GLboolean __GLEW_EXT_point_parameters = GL_FALSE;
+GLboolean __GLEW_EXT_polygon_offset = GL_FALSE;
+GLboolean __GLEW_EXT_rescale_normal = GL_FALSE;
+GLboolean __GLEW_EXT_scene_marker = GL_FALSE;
+GLboolean __GLEW_EXT_secondary_color = GL_FALSE;
+GLboolean __GLEW_EXT_separate_specular_color = GL_FALSE;
+GLboolean __GLEW_EXT_shadow_funcs = GL_FALSE;
+GLboolean __GLEW_EXT_shared_texture_palette = GL_FALSE;
+GLboolean __GLEW_EXT_stencil_clear_tag = GL_FALSE;
+GLboolean __GLEW_EXT_stencil_two_side = GL_FALSE;
+GLboolean __GLEW_EXT_stencil_wrap = GL_FALSE;
+GLboolean __GLEW_EXT_subtexture = GL_FALSE;
+GLboolean __GLEW_EXT_texture = GL_FALSE;
+GLboolean __GLEW_EXT_texture3D = GL_FALSE;
+GLboolean __GLEW_EXT_texture_array = GL_FALSE;
+GLboolean __GLEW_EXT_texture_buffer_object = GL_FALSE;
+GLboolean __GLEW_EXT_texture_compression_dxt1 = GL_FALSE;
+GLboolean __GLEW_EXT_texture_compression_latc = GL_FALSE;
+GLboolean __GLEW_EXT_texture_compression_rgtc = GL_FALSE;
+GLboolean __GLEW_EXT_texture_compression_s3tc = GL_FALSE;
+GLboolean __GLEW_EXT_texture_cube_map = GL_FALSE;
+GLboolean __GLEW_EXT_texture_edge_clamp = GL_FALSE;
+GLboolean __GLEW_EXT_texture_env = GL_FALSE;
+GLboolean __GLEW_EXT_texture_env_add = GL_FALSE;
+GLboolean __GLEW_EXT_texture_env_combine = GL_FALSE;
+GLboolean __GLEW_EXT_texture_env_dot3 = GL_FALSE;
+GLboolean __GLEW_EXT_texture_filter_anisotropic = GL_FALSE;
+GLboolean __GLEW_EXT_texture_integer = GL_FALSE;
+GLboolean __GLEW_EXT_texture_lod_bias = GL_FALSE;
+GLboolean __GLEW_EXT_texture_mirror_clamp = GL_FALSE;
+GLboolean __GLEW_EXT_texture_object = GL_FALSE;
+GLboolean __GLEW_EXT_texture_perturb_normal = GL_FALSE;
+GLboolean __GLEW_EXT_texture_rectangle = GL_FALSE;
+GLboolean __GLEW_EXT_texture_sRGB = GL_FALSE;
+GLboolean __GLEW_EXT_texture_shared_exponent = GL_FALSE;
+GLboolean __GLEW_EXT_timer_query = GL_FALSE;
+GLboolean __GLEW_EXT_vertex_array = GL_FALSE;
+GLboolean __GLEW_EXT_vertex_shader = GL_FALSE;
+GLboolean __GLEW_EXT_vertex_weighting = GL_FALSE;
+GLboolean __GLEW_GREMEDY_frame_terminator = GL_FALSE;
+GLboolean __GLEW_GREMEDY_string_marker = GL_FALSE;
+GLboolean __GLEW_HP_convolution_border_modes = GL_FALSE;
+GLboolean __GLEW_HP_image_transform = GL_FALSE;
+GLboolean __GLEW_HP_occlusion_test = GL_FALSE;
+GLboolean __GLEW_HP_texture_lighting = GL_FALSE;
+GLboolean __GLEW_IBM_cull_vertex = GL_FALSE;
+GLboolean __GLEW_IBM_multimode_draw_arrays = GL_FALSE;
+GLboolean __GLEW_IBM_rasterpos_clip = GL_FALSE;
+GLboolean __GLEW_IBM_static_data = GL_FALSE;
+GLboolean __GLEW_IBM_texture_mirrored_repeat = GL_FALSE;
+GLboolean __GLEW_IBM_vertex_array_lists = GL_FALSE;
+GLboolean __GLEW_INGR_color_clamp = GL_FALSE;
+GLboolean __GLEW_INGR_interlace_read = GL_FALSE;
+GLboolean __GLEW_INTEL_parallel_arrays = GL_FALSE;
+GLboolean __GLEW_INTEL_texture_scissor = GL_FALSE;
+GLboolean __GLEW_KTX_buffer_region = GL_FALSE;
+GLboolean __GLEW_MESAX_texture_stack = GL_FALSE;
+GLboolean __GLEW_MESA_pack_invert = GL_FALSE;
+GLboolean __GLEW_MESA_resize_buffers = GL_FALSE;
+GLboolean __GLEW_MESA_window_pos = GL_FALSE;
+GLboolean __GLEW_MESA_ycbcr_texture = GL_FALSE;
+GLboolean __GLEW_NV_blend_square = GL_FALSE;
+GLboolean __GLEW_NV_copy_depth_to_color = GL_FALSE;
+GLboolean __GLEW_NV_depth_buffer_float = GL_FALSE;
+GLboolean __GLEW_NV_depth_clamp = GL_FALSE;
+GLboolean __GLEW_NV_depth_range_unclamped = GL_FALSE;
+GLboolean __GLEW_NV_evaluators = GL_FALSE;
+GLboolean __GLEW_NV_fence = GL_FALSE;
+GLboolean __GLEW_NV_float_buffer = GL_FALSE;
+GLboolean __GLEW_NV_fog_distance = GL_FALSE;
+GLboolean __GLEW_NV_fragment_program = GL_FALSE;
+GLboolean __GLEW_NV_fragment_program2 = GL_FALSE;
+GLboolean __GLEW_NV_fragment_program4 = GL_FALSE;
+GLboolean __GLEW_NV_fragment_program_option = GL_FALSE;
+GLboolean __GLEW_NV_framebuffer_multisample_coverage = GL_FALSE;
+GLboolean __GLEW_NV_geometry_program4 = GL_FALSE;
+GLboolean __GLEW_NV_geometry_shader4 = GL_FALSE;
+GLboolean __GLEW_NV_gpu_program4 = GL_FALSE;
+GLboolean __GLEW_NV_half_float = GL_FALSE;
+GLboolean __GLEW_NV_light_max_exponent = GL_FALSE;
+GLboolean __GLEW_NV_multisample_filter_hint = GL_FALSE;
+GLboolean __GLEW_NV_occlusion_query = GL_FALSE;
+GLboolean __GLEW_NV_packed_depth_stencil = GL_FALSE;
+GLboolean __GLEW_NV_parameter_buffer_object = GL_FALSE;
+GLboolean __GLEW_NV_pixel_data_range = GL_FALSE;
+GLboolean __GLEW_NV_point_sprite = GL_FALSE;
+GLboolean __GLEW_NV_primitive_restart = GL_FALSE;
+GLboolean __GLEW_NV_register_combiners = GL_FALSE;
+GLboolean __GLEW_NV_register_combiners2 = GL_FALSE;
+GLboolean __GLEW_NV_texgen_emboss = GL_FALSE;
+GLboolean __GLEW_NV_texgen_reflection = GL_FALSE;
+GLboolean __GLEW_NV_texture_compression_vtc = GL_FALSE;
+GLboolean __GLEW_NV_texture_env_combine4 = GL_FALSE;
+GLboolean __GLEW_NV_texture_expand_normal = GL_FALSE;
+GLboolean __GLEW_NV_texture_rectangle = GL_FALSE;
+GLboolean __GLEW_NV_texture_shader = GL_FALSE;
+GLboolean __GLEW_NV_texture_shader2 = GL_FALSE;
+GLboolean __GLEW_NV_texture_shader3 = GL_FALSE;
+GLboolean __GLEW_NV_transform_feedback = GL_FALSE;
+GLboolean __GLEW_NV_vertex_array_range = GL_FALSE;
+GLboolean __GLEW_NV_vertex_array_range2 = GL_FALSE;
+GLboolean __GLEW_NV_vertex_program = GL_FALSE;
+GLboolean __GLEW_NV_vertex_program1_1 = GL_FALSE;
+GLboolean __GLEW_NV_vertex_program2 = GL_FALSE;
+GLboolean __GLEW_NV_vertex_program2_option = GL_FALSE;
+GLboolean __GLEW_NV_vertex_program3 = GL_FALSE;
+GLboolean __GLEW_NV_vertex_program4 = GL_FALSE;
+GLboolean __GLEW_OES_byte_coordinates = GL_FALSE;
+GLboolean __GLEW_OES_compressed_paletted_texture = GL_FALSE;
+GLboolean __GLEW_OES_read_format = GL_FALSE;
+GLboolean __GLEW_OES_single_precision = GL_FALSE;
+GLboolean __GLEW_OML_interlace = GL_FALSE;
+GLboolean __GLEW_OML_resample = GL_FALSE;
+GLboolean __GLEW_OML_subsample = GL_FALSE;
+GLboolean __GLEW_PGI_misc_hints = GL_FALSE;
+GLboolean __GLEW_PGI_vertex_hints = GL_FALSE;
+GLboolean __GLEW_REND_screen_coordinates = GL_FALSE;
+GLboolean __GLEW_S3_s3tc = GL_FALSE;
+GLboolean __GLEW_SGIS_color_range = GL_FALSE;
+GLboolean __GLEW_SGIS_detail_texture = GL_FALSE;
+GLboolean __GLEW_SGIS_fog_function = GL_FALSE;
+GLboolean __GLEW_SGIS_generate_mipmap = GL_FALSE;
+GLboolean __GLEW_SGIS_multisample = GL_FALSE;
+GLboolean __GLEW_SGIS_pixel_texture = GL_FALSE;
+GLboolean __GLEW_SGIS_sharpen_texture = GL_FALSE;
+GLboolean __GLEW_SGIS_texture4D = GL_FALSE;
+GLboolean __GLEW_SGIS_texture_border_clamp = GL_FALSE;
+GLboolean __GLEW_SGIS_texture_edge_clamp = GL_FALSE;
+GLboolean __GLEW_SGIS_texture_filter4 = GL_FALSE;
+GLboolean __GLEW_SGIS_texture_lod = GL_FALSE;
+GLboolean __GLEW_SGIS_texture_select = GL_FALSE;
+GLboolean __GLEW_SGIX_async = GL_FALSE;
+GLboolean __GLEW_SGIX_async_histogram = GL_FALSE;
+GLboolean __GLEW_SGIX_async_pixel = GL_FALSE;
+GLboolean __GLEW_SGIX_blend_alpha_minmax = GL_FALSE;
+GLboolean __GLEW_SGIX_clipmap = GL_FALSE;
+GLboolean __GLEW_SGIX_depth_texture = GL_FALSE;
+GLboolean __GLEW_SGIX_flush_raster = GL_FALSE;
+GLboolean __GLEW_SGIX_fog_offset = GL_FALSE;
+GLboolean __GLEW_SGIX_fog_texture = GL_FALSE;
+GLboolean __GLEW_SGIX_fragment_specular_lighting = GL_FALSE;
+GLboolean __GLEW_SGIX_framezoom = GL_FALSE;
+GLboolean __GLEW_SGIX_interlace = GL_FALSE;
+GLboolean __GLEW_SGIX_ir_instrument1 = GL_FALSE;
+GLboolean __GLEW_SGIX_list_priority = GL_FALSE;
+GLboolean __GLEW_SGIX_pixel_texture = GL_FALSE;
+GLboolean __GLEW_SGIX_pixel_texture_bits = GL_FALSE;
+GLboolean __GLEW_SGIX_reference_plane = GL_FALSE;
+GLboolean __GLEW_SGIX_resample = GL_FALSE;
+GLboolean __GLEW_SGIX_shadow = GL_FALSE;
+GLboolean __GLEW_SGIX_shadow_ambient = GL_FALSE;
+GLboolean __GLEW_SGIX_sprite = GL_FALSE;
+GLboolean __GLEW_SGIX_tag_sample_buffer = GL_FALSE;
+GLboolean __GLEW_SGIX_texture_add_env = GL_FALSE;
+GLboolean __GLEW_SGIX_texture_coordinate_clamp = GL_FALSE;
+GLboolean __GLEW_SGIX_texture_lod_bias = GL_FALSE;
+GLboolean __GLEW_SGIX_texture_multi_buffer = GL_FALSE;
+GLboolean __GLEW_SGIX_texture_range = GL_FALSE;
+GLboolean __GLEW_SGIX_texture_scale_bias = GL_FALSE;
+GLboolean __GLEW_SGIX_vertex_preclip = GL_FALSE;
+GLboolean __GLEW_SGIX_vertex_preclip_hint = GL_FALSE;
+GLboolean __GLEW_SGIX_ycrcb = GL_FALSE;
+GLboolean __GLEW_SGI_color_matrix = GL_FALSE;
+GLboolean __GLEW_SGI_color_table = GL_FALSE;
+GLboolean __GLEW_SGI_texture_color_table = GL_FALSE;
+GLboolean __GLEW_SUNX_constant_data = GL_FALSE;
+GLboolean __GLEW_SUN_convolution_border_modes = GL_FALSE;
+GLboolean __GLEW_SUN_global_alpha = GL_FALSE;
+GLboolean __GLEW_SUN_mesh_array = GL_FALSE;
+GLboolean __GLEW_SUN_read_video_pixels = GL_FALSE;
+GLboolean __GLEW_SUN_slice_accum = GL_FALSE;
+GLboolean __GLEW_SUN_triangle_list = GL_FALSE;
+GLboolean __GLEW_SUN_vertex = GL_FALSE;
+GLboolean __GLEW_WIN_phong_shading = GL_FALSE;
+GLboolean __GLEW_WIN_specular_fog = GL_FALSE;
+GLboolean __GLEW_WIN_swap_hint = GL_FALSE;
+
+#endif /* !GLEW_MX */
+
+#ifdef GL_VERSION_1_2
+
+static GLboolean _glewInit_GL_VERSION_1_2 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glCopyTexSubImage3D = (PFNGLCOPYTEXSUBIMAGE3DPROC)glewGetProcAddress((const GLubyte*)"glCopyTexSubImage3D")) == NULL) || r;
+  r = ((glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC)glewGetProcAddress((const GLubyte*)"glDrawRangeElements")) == NULL) || r;
+  r = ((glTexImage3D = (PFNGLTEXIMAGE3DPROC)glewGetProcAddress((const GLubyte*)"glTexImage3D")) == NULL) || r;
+  r = ((glTexSubImage3D = (PFNGLTEXSUBIMAGE3DPROC)glewGetProcAddress((const GLubyte*)"glTexSubImage3D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_VERSION_1_2 */
+
+#ifdef GL_VERSION_1_3
+
+static GLboolean _glewInit_GL_VERSION_1_3 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glActiveTexture = (PFNGLACTIVETEXTUREPROC)glewGetProcAddress((const GLubyte*)"glActiveTexture")) == NULL) || r;
+  r = ((glClientActiveTexture = (PFNGLCLIENTACTIVETEXTUREPROC)glewGetProcAddress((const GLubyte*)"glClientActiveTexture")) == NULL) || r;
+  r = ((glCompressedTexImage1D = (PFNGLCOMPRESSEDTEXIMAGE1DPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexImage1D")) == NULL) || r;
+  r = ((glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexImage2D")) == NULL) || r;
+  r = ((glCompressedTexImage3D = (PFNGLCOMPRESSEDTEXIMAGE3DPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexImage3D")) == NULL) || r;
+  r = ((glCompressedTexSubImage1D = (PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexSubImage1D")) == NULL) || r;
+  r = ((glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexSubImage2D")) == NULL) || r;
+  r = ((glCompressedTexSubImage3D = (PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexSubImage3D")) == NULL) || r;
+  r = ((glGetCompressedTexImage = (PFNGLGETCOMPRESSEDTEXIMAGEPROC)glewGetProcAddress((const GLubyte*)"glGetCompressedTexImage")) == NULL) || r;
+  r = ((glLoadTransposeMatrixd = (PFNGLLOADTRANSPOSEMATRIXDPROC)glewGetProcAddress((const GLubyte*)"glLoadTransposeMatrixd")) == NULL) || r;
+  r = ((glLoadTransposeMatrixf = (PFNGLLOADTRANSPOSEMATRIXFPROC)glewGetProcAddress((const GLubyte*)"glLoadTransposeMatrixf")) == NULL) || r;
+  r = ((glMultTransposeMatrixd = (PFNGLMULTTRANSPOSEMATRIXDPROC)glewGetProcAddress((const GLubyte*)"glMultTransposeMatrixd")) == NULL) || r;
+  r = ((glMultTransposeMatrixf = (PFNGLMULTTRANSPOSEMATRIXFPROC)glewGetProcAddress((const GLubyte*)"glMultTransposeMatrixf")) == NULL) || r;
+  r = ((glMultiTexCoord1d = (PFNGLMULTITEXCOORD1DPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1d")) == NULL) || r;
+  r = ((glMultiTexCoord1dv = (PFNGLMULTITEXCOORD1DVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1dv")) == NULL) || r;
+  r = ((glMultiTexCoord1f = (PFNGLMULTITEXCOORD1FPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1f")) == NULL) || r;
+  r = ((glMultiTexCoord1fv = (PFNGLMULTITEXCOORD1FVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1fv")) == NULL) || r;
+  r = ((glMultiTexCoord1i = (PFNGLMULTITEXCOORD1IPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1i")) == NULL) || r;
+  r = ((glMultiTexCoord1iv = (PFNGLMULTITEXCOORD1IVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1iv")) == NULL) || r;
+  r = ((glMultiTexCoord1s = (PFNGLMULTITEXCOORD1SPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1s")) == NULL) || r;
+  r = ((glMultiTexCoord1sv = (PFNGLMULTITEXCOORD1SVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1sv")) == NULL) || r;
+  r = ((glMultiTexCoord2d = (PFNGLMULTITEXCOORD2DPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2d")) == NULL) || r;
+  r = ((glMultiTexCoord2dv = (PFNGLMULTITEXCOORD2DVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2dv")) == NULL) || r;
+  r = ((glMultiTexCoord2f = (PFNGLMULTITEXCOORD2FPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2f")) == NULL) || r;
+  r = ((glMultiTexCoord2fv = (PFNGLMULTITEXCOORD2FVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2fv")) == NULL) || r;
+  r = ((glMultiTexCoord2i = (PFNGLMULTITEXCOORD2IPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2i")) == NULL) || r;
+  r = ((glMultiTexCoord2iv = (PFNGLMULTITEXCOORD2IVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2iv")) == NULL) || r;
+  r = ((glMultiTexCoord2s = (PFNGLMULTITEXCOORD2SPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2s")) == NULL) || r;
+  r = ((glMultiTexCoord2sv = (PFNGLMULTITEXCOORD2SVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2sv")) == NULL) || r;
+  r = ((glMultiTexCoord3d = (PFNGLMULTITEXCOORD3DPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3d")) == NULL) || r;
+  r = ((glMultiTexCoord3dv = (PFNGLMULTITEXCOORD3DVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3dv")) == NULL) || r;
+  r = ((glMultiTexCoord3f = (PFNGLMULTITEXCOORD3FPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3f")) == NULL) || r;
+  r = ((glMultiTexCoord3fv = (PFNGLMULTITEXCOORD3FVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3fv")) == NULL) || r;
+  r = ((glMultiTexCoord3i = (PFNGLMULTITEXCOORD3IPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3i")) == NULL) || r;
+  r = ((glMultiTexCoord3iv = (PFNGLMULTITEXCOORD3IVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3iv")) == NULL) || r;
+  r = ((glMultiTexCoord3s = (PFNGLMULTITEXCOORD3SPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3s")) == NULL) || r;
+  r = ((glMultiTexCoord3sv = (PFNGLMULTITEXCOORD3SVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3sv")) == NULL) || r;
+  r = ((glMultiTexCoord4d = (PFNGLMULTITEXCOORD4DPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4d")) == NULL) || r;
+  r = ((glMultiTexCoord4dv = (PFNGLMULTITEXCOORD4DVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4dv")) == NULL) || r;
+  r = ((glMultiTexCoord4f = (PFNGLMULTITEXCOORD4FPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4f")) == NULL) || r;
+  r = ((glMultiTexCoord4fv = (PFNGLMULTITEXCOORD4FVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4fv")) == NULL) || r;
+  r = ((glMultiTexCoord4i = (PFNGLMULTITEXCOORD4IPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4i")) == NULL) || r;
+  r = ((glMultiTexCoord4iv = (PFNGLMULTITEXCOORD4IVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4iv")) == NULL) || r;
+  r = ((glMultiTexCoord4s = (PFNGLMULTITEXCOORD4SPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4s")) == NULL) || r;
+  r = ((glMultiTexCoord4sv = (PFNGLMULTITEXCOORD4SVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4sv")) == NULL) || r;
+  r = ((glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC)glewGetProcAddress((const GLubyte*)"glSampleCoverage")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_VERSION_1_3 */
+
+#ifdef GL_VERSION_1_4
+
+static GLboolean _glewInit_GL_VERSION_1_4 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBlendColor = (PFNGLBLENDCOLORPROC)glewGetProcAddress((const GLubyte*)"glBlendColor")) == NULL) || r;
+  r = ((glBlendEquation = (PFNGLBLENDEQUATIONPROC)glewGetProcAddress((const GLubyte*)"glBlendEquation")) == NULL) || r;
+  r = ((glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC)glewGetProcAddress((const GLubyte*)"glBlendFuncSeparate")) == NULL) || r;
+  r = ((glFogCoordPointer = (PFNGLFOGCOORDPOINTERPROC)glewGetProcAddress((const GLubyte*)"glFogCoordPointer")) == NULL) || r;
+  r = ((glFogCoordd = (PFNGLFOGCOORDDPROC)glewGetProcAddress((const GLubyte*)"glFogCoordd")) == NULL) || r;
+  r = ((glFogCoorddv = (PFNGLFOGCOORDDVPROC)glewGetProcAddress((const GLubyte*)"glFogCoorddv")) == NULL) || r;
+  r = ((glFogCoordf = (PFNGLFOGCOORDFPROC)glewGetProcAddress((const GLubyte*)"glFogCoordf")) == NULL) || r;
+  r = ((glFogCoordfv = (PFNGLFOGCOORDFVPROC)glewGetProcAddress((const GLubyte*)"glFogCoordfv")) == NULL) || r;
+  r = ((glMultiDrawArrays = (PFNGLMULTIDRAWARRAYSPROC)glewGetProcAddress((const GLubyte*)"glMultiDrawArrays")) == NULL) || r;
+  r = ((glMultiDrawElements = (PFNGLMULTIDRAWELEMENTSPROC)glewGetProcAddress((const GLubyte*)"glMultiDrawElements")) == NULL) || r;
+  r = ((glPointParameterf = (PFNGLPOINTPARAMETERFPROC)glewGetProcAddress((const GLubyte*)"glPointParameterf")) == NULL) || r;
+  r = ((glPointParameterfv = (PFNGLPOINTPARAMETERFVPROC)glewGetProcAddress((const GLubyte*)"glPointParameterfv")) == NULL) || r;
+  r = ((glPointParameteri = (PFNGLPOINTPARAMETERIPROC)glewGetProcAddress((const GLubyte*)"glPointParameteri")) == NULL) || r;
+  r = ((glPointParameteriv = (PFNGLPOINTPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glPointParameteriv")) == NULL) || r;
+  r = ((glSecondaryColor3b = (PFNGLSECONDARYCOLOR3BPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3b")) == NULL) || r;
+  r = ((glSecondaryColor3bv = (PFNGLSECONDARYCOLOR3BVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3bv")) == NULL) || r;
+  r = ((glSecondaryColor3d = (PFNGLSECONDARYCOLOR3DPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3d")) == NULL) || r;
+  r = ((glSecondaryColor3dv = (PFNGLSECONDARYCOLOR3DVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3dv")) == NULL) || r;
+  r = ((glSecondaryColor3f = (PFNGLSECONDARYCOLOR3FPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3f")) == NULL) || r;
+  r = ((glSecondaryColor3fv = (PFNGLSECONDARYCOLOR3FVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3fv")) == NULL) || r;
+  r = ((glSecondaryColor3i = (PFNGLSECONDARYCOLOR3IPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3i")) == NULL) || r;
+  r = ((glSecondaryColor3iv = (PFNGLSECONDARYCOLOR3IVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3iv")) == NULL) || r;
+  r = ((glSecondaryColor3s = (PFNGLSECONDARYCOLOR3SPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3s")) == NULL) || r;
+  r = ((glSecondaryColor3sv = (PFNGLSECONDARYCOLOR3SVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3sv")) == NULL) || r;
+  r = ((glSecondaryColor3ub = (PFNGLSECONDARYCOLOR3UBPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3ub")) == NULL) || r;
+  r = ((glSecondaryColor3ubv = (PFNGLSECONDARYCOLOR3UBVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3ubv")) == NULL) || r;
+  r = ((glSecondaryColor3ui = (PFNGLSECONDARYCOLOR3UIPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3ui")) == NULL) || r;
+  r = ((glSecondaryColor3uiv = (PFNGLSECONDARYCOLOR3UIVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3uiv")) == NULL) || r;
+  r = ((glSecondaryColor3us = (PFNGLSECONDARYCOLOR3USPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3us")) == NULL) || r;
+  r = ((glSecondaryColor3usv = (PFNGLSECONDARYCOLOR3USVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3usv")) == NULL) || r;
+  r = ((glSecondaryColorPointer = (PFNGLSECONDARYCOLORPOINTERPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColorPointer")) == NULL) || r;
+  r = ((glWindowPos2d = (PFNGLWINDOWPOS2DPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2d")) == NULL) || r;
+  r = ((glWindowPos2dv = (PFNGLWINDOWPOS2DVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2dv")) == NULL) || r;
+  r = ((glWindowPos2f = (PFNGLWINDOWPOS2FPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2f")) == NULL) || r;
+  r = ((glWindowPos2fv = (PFNGLWINDOWPOS2FVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2fv")) == NULL) || r;
+  r = ((glWindowPos2i = (PFNGLWINDOWPOS2IPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2i")) == NULL) || r;
+  r = ((glWindowPos2iv = (PFNGLWINDOWPOS2IVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2iv")) == NULL) || r;
+  r = ((glWindowPos2s = (PFNGLWINDOWPOS2SPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2s")) == NULL) || r;
+  r = ((glWindowPos2sv = (PFNGLWINDOWPOS2SVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2sv")) == NULL) || r;
+  r = ((glWindowPos3d = (PFNGLWINDOWPOS3DPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3d")) == NULL) || r;
+  r = ((glWindowPos3dv = (PFNGLWINDOWPOS3DVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3dv")) == NULL) || r;
+  r = ((glWindowPos3f = (PFNGLWINDOWPOS3FPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3f")) == NULL) || r;
+  r = ((glWindowPos3fv = (PFNGLWINDOWPOS3FVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3fv")) == NULL) || r;
+  r = ((glWindowPos3i = (PFNGLWINDOWPOS3IPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3i")) == NULL) || r;
+  r = ((glWindowPos3iv = (PFNGLWINDOWPOS3IVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3iv")) == NULL) || r;
+  r = ((glWindowPos3s = (PFNGLWINDOWPOS3SPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3s")) == NULL) || r;
+  r = ((glWindowPos3sv = (PFNGLWINDOWPOS3SVPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3sv")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_VERSION_1_4 */
+
+#ifdef GL_VERSION_1_5
+
+static GLboolean _glewInit_GL_VERSION_1_5 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBeginQuery = (PFNGLBEGINQUERYPROC)glewGetProcAddress((const GLubyte*)"glBeginQuery")) == NULL) || r;
+  r = ((glBindBuffer = (PFNGLBINDBUFFERPROC)glewGetProcAddress((const GLubyte*)"glBindBuffer")) == NULL) || r;
+  r = ((glBufferData = (PFNGLBUFFERDATAPROC)glewGetProcAddress((const GLubyte*)"glBufferData")) == NULL) || r;
+  r = ((glBufferSubData = (PFNGLBUFFERSUBDATAPROC)glewGetProcAddress((const GLubyte*)"glBufferSubData")) == NULL) || r;
+  r = ((glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)glewGetProcAddress((const GLubyte*)"glDeleteBuffers")) == NULL) || r;
+  r = ((glDeleteQueries = (PFNGLDELETEQUERIESPROC)glewGetProcAddress((const GLubyte*)"glDeleteQueries")) == NULL) || r;
+  r = ((glEndQuery = (PFNGLENDQUERYPROC)glewGetProcAddress((const GLubyte*)"glEndQuery")) == NULL) || r;
+  r = ((glGenBuffers = (PFNGLGENBUFFERSPROC)glewGetProcAddress((const GLubyte*)"glGenBuffers")) == NULL) || r;
+  r = ((glGenQueries = (PFNGLGENQUERIESPROC)glewGetProcAddress((const GLubyte*)"glGenQueries")) == NULL) || r;
+  r = ((glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glGetBufferParameteriv")) == NULL) || r;
+  r = ((glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC)glewGetProcAddress((const GLubyte*)"glGetBufferPointerv")) == NULL) || r;
+  r = ((glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC)glewGetProcAddress((const GLubyte*)"glGetBufferSubData")) == NULL) || r;
+  r = ((glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC)glewGetProcAddress((const GLubyte*)"glGetQueryObjectiv")) == NULL) || r;
+  r = ((glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC)glewGetProcAddress((const GLubyte*)"glGetQueryObjectuiv")) == NULL) || r;
+  r = ((glGetQueryiv = (PFNGLGETQUERYIVPROC)glewGetProcAddress((const GLubyte*)"glGetQueryiv")) == NULL) || r;
+  r = ((glIsBuffer = (PFNGLISBUFFERPROC)glewGetProcAddress((const GLubyte*)"glIsBuffer")) == NULL) || r;
+  r = ((glIsQuery = (PFNGLISQUERYPROC)glewGetProcAddress((const GLubyte*)"glIsQuery")) == NULL) || r;
+  r = ((glMapBuffer = (PFNGLMAPBUFFERPROC)glewGetProcAddress((const GLubyte*)"glMapBuffer")) == NULL) || r;
+  r = ((glUnmapBuffer = (PFNGLUNMAPBUFFERPROC)glewGetProcAddress((const GLubyte*)"glUnmapBuffer")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_VERSION_1_5 */
+
+#ifdef GL_VERSION_2_0
+
+static GLboolean _glewInit_GL_VERSION_2_0 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glAttachShader = (PFNGLATTACHSHADERPROC)glewGetProcAddress((const GLubyte*)"glAttachShader")) == NULL) || r;
+  r = ((glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC)glewGetProcAddress((const GLubyte*)"glBindAttribLocation")) == NULL) || r;
+  r = ((glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC)glewGetProcAddress((const GLubyte*)"glBlendEquationSeparate")) == NULL) || r;
+  r = ((glCompileShader = (PFNGLCOMPILESHADERPROC)glewGetProcAddress((const GLubyte*)"glCompileShader")) == NULL) || r;
+  r = ((glCreateProgram = (PFNGLCREATEPROGRAMPROC)glewGetProcAddress((const GLubyte*)"glCreateProgram")) == NULL) || r;
+  r = ((glCreateShader = (PFNGLCREATESHADERPROC)glewGetProcAddress((const GLubyte*)"glCreateShader")) == NULL) || r;
+  r = ((glDeleteProgram = (PFNGLDELETEPROGRAMPROC)glewGetProcAddress((const GLubyte*)"glDeleteProgram")) == NULL) || r;
+  r = ((glDeleteShader = (PFNGLDELETESHADERPROC)glewGetProcAddress((const GLubyte*)"glDeleteShader")) == NULL) || r;
+  r = ((glDetachShader = (PFNGLDETACHSHADERPROC)glewGetProcAddress((const GLubyte*)"glDetachShader")) == NULL) || r;
+  r = ((glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC)glewGetProcAddress((const GLubyte*)"glDisableVertexAttribArray")) == NULL) || r;
+  r = ((glDrawBuffers = (PFNGLDRAWBUFFERSPROC)glewGetProcAddress((const GLubyte*)"glDrawBuffers")) == NULL) || r;
+  r = ((glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC)glewGetProcAddress((const GLubyte*)"glEnableVertexAttribArray")) == NULL) || r;
+  r = ((glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC)glewGetProcAddress((const GLubyte*)"glGetActiveAttrib")) == NULL) || r;
+  r = ((glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC)glewGetProcAddress((const GLubyte*)"glGetActiveUniform")) == NULL) || r;
+  r = ((glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC)glewGetProcAddress((const GLubyte*)"glGetAttachedShaders")) == NULL) || r;
+  r = ((glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC)glewGetProcAddress((const GLubyte*)"glGetAttribLocation")) == NULL) || r;
+  r = ((glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC)glewGetProcAddress((const GLubyte*)"glGetProgramInfoLog")) == NULL) || r;
+  r = ((glGetProgramiv = (PFNGLGETPROGRAMIVPROC)glewGetProcAddress((const GLubyte*)"glGetProgramiv")) == NULL) || r;
+  r = ((glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC)glewGetProcAddress((const GLubyte*)"glGetShaderInfoLog")) == NULL) || r;
+  r = ((glGetShaderSource = (PFNGLGETSHADERSOURCEPROC)glewGetProcAddress((const GLubyte*)"glGetShaderSource")) == NULL) || r;
+  r = ((glGetShaderiv = (PFNGLGETSHADERIVPROC)glewGetProcAddress((const GLubyte*)"glGetShaderiv")) == NULL) || r;
+  r = ((glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC)glewGetProcAddress((const GLubyte*)"glGetUniformLocation")) == NULL) || r;
+  r = ((glGetUniformfv = (PFNGLGETUNIFORMFVPROC)glewGetProcAddress((const GLubyte*)"glGetUniformfv")) == NULL) || r;
+  r = ((glGetUniformiv = (PFNGLGETUNIFORMIVPROC)glewGetProcAddress((const GLubyte*)"glGetUniformiv")) == NULL) || r;
+  r = ((glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribPointerv")) == NULL) || r;
+  r = ((glGetVertexAttribdv = (PFNGLGETVERTEXATTRIBDVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribdv")) == NULL) || r;
+  r = ((glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribfv")) == NULL) || r;
+  r = ((glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribiv")) == NULL) || r;
+  r = ((glIsProgram = (PFNGLISPROGRAMPROC)glewGetProcAddress((const GLubyte*)"glIsProgram")) == NULL) || r;
+  r = ((glIsShader = (PFNGLISSHADERPROC)glewGetProcAddress((const GLubyte*)"glIsShader")) == NULL) || r;
+  r = ((glLinkProgram = (PFNGLLINKPROGRAMPROC)glewGetProcAddress((const GLubyte*)"glLinkProgram")) == NULL) || r;
+  r = ((glShaderSource = (PFNGLSHADERSOURCEPROC)glewGetProcAddress((const GLubyte*)"glShaderSource")) == NULL) || r;
+  r = ((glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC)glewGetProcAddress((const GLubyte*)"glStencilFuncSeparate")) == NULL) || r;
+  r = ((glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC)glewGetProcAddress((const GLubyte*)"glStencilMaskSeparate")) == NULL) || r;
+  r = ((glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC)glewGetProcAddress((const GLubyte*)"glStencilOpSeparate")) == NULL) || r;
+  r = ((glUniform1f = (PFNGLUNIFORM1FPROC)glewGetProcAddress((const GLubyte*)"glUniform1f")) == NULL) || r;
+  r = ((glUniform1fv = (PFNGLUNIFORM1FVPROC)glewGetProcAddress((const GLubyte*)"glUniform1fv")) == NULL) || r;
+  r = ((glUniform1i = (PFNGLUNIFORM1IPROC)glewGetProcAddress((const GLubyte*)"glUniform1i")) == NULL) || r;
+  r = ((glUniform1iv = (PFNGLUNIFORM1IVPROC)glewGetProcAddress((const GLubyte*)"glUniform1iv")) == NULL) || r;
+  r = ((glUniform2f = (PFNGLUNIFORM2FPROC)glewGetProcAddress((const GLubyte*)"glUniform2f")) == NULL) || r;
+  r = ((glUniform2fv = (PFNGLUNIFORM2FVPROC)glewGetProcAddress((const GLubyte*)"glUniform2fv")) == NULL) || r;
+  r = ((glUniform2i = (PFNGLUNIFORM2IPROC)glewGetProcAddress((const GLubyte*)"glUniform2i")) == NULL) || r;
+  r = ((glUniform2iv = (PFNGLUNIFORM2IVPROC)glewGetProcAddress((const GLubyte*)"glUniform2iv")) == NULL) || r;
+  r = ((glUniform3f = (PFNGLUNIFORM3FPROC)glewGetProcAddress((const GLubyte*)"glUniform3f")) == NULL) || r;
+  r = ((glUniform3fv = (PFNGLUNIFORM3FVPROC)glewGetProcAddress((const GLubyte*)"glUniform3fv")) == NULL) || r;
+  r = ((glUniform3i = (PFNGLUNIFORM3IPROC)glewGetProcAddress((const GLubyte*)"glUniform3i")) == NULL) || r;
+  r = ((glUniform3iv = (PFNGLUNIFORM3IVPROC)glewGetProcAddress((const GLubyte*)"glUniform3iv")) == NULL) || r;
+  r = ((glUniform4f = (PFNGLUNIFORM4FPROC)glewGetProcAddress((const GLubyte*)"glUniform4f")) == NULL) || r;
+  r = ((glUniform4fv = (PFNGLUNIFORM4FVPROC)glewGetProcAddress((const GLubyte*)"glUniform4fv")) == NULL) || r;
+  r = ((glUniform4i = (PFNGLUNIFORM4IPROC)glewGetProcAddress((const GLubyte*)"glUniform4i")) == NULL) || r;
+  r = ((glUniform4iv = (PFNGLUNIFORM4IVPROC)glewGetProcAddress((const GLubyte*)"glUniform4iv")) == NULL) || r;
+  r = ((glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix2fv")) == NULL) || r;
+  r = ((glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix3fv")) == NULL) || r;
+  r = ((glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix4fv")) == NULL) || r;
+  r = ((glUseProgram = (PFNGLUSEPROGRAMPROC)glewGetProcAddress((const GLubyte*)"glUseProgram")) == NULL) || r;
+  r = ((glValidateProgram = (PFNGLVALIDATEPROGRAMPROC)glewGetProcAddress((const GLubyte*)"glValidateProgram")) == NULL) || r;
+  r = ((glVertexAttrib1d = (PFNGLVERTEXATTRIB1DPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1d")) == NULL) || r;
+  r = ((glVertexAttrib1dv = (PFNGLVERTEXATTRIB1DVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1dv")) == NULL) || r;
+  r = ((glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1f")) == NULL) || r;
+  r = ((glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1fv")) == NULL) || r;
+  r = ((glVertexAttrib1s = (PFNGLVERTEXATTRIB1SPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1s")) == NULL) || r;
+  r = ((glVertexAttrib1sv = (PFNGLVERTEXATTRIB1SVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1sv")) == NULL) || r;
+  r = ((glVertexAttrib2d = (PFNGLVERTEXATTRIB2DPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2d")) == NULL) || r;
+  r = ((glVertexAttrib2dv = (PFNGLVERTEXATTRIB2DVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2dv")) == NULL) || r;
+  r = ((glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2f")) == NULL) || r;
+  r = ((glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2fv")) == NULL) || r;
+  r = ((glVertexAttrib2s = (PFNGLVERTEXATTRIB2SPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2s")) == NULL) || r;
+  r = ((glVertexAttrib2sv = (PFNGLVERTEXATTRIB2SVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2sv")) == NULL) || r;
+  r = ((glVertexAttrib3d = (PFNGLVERTEXATTRIB3DPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3d")) == NULL) || r;
+  r = ((glVertexAttrib3dv = (PFNGLVERTEXATTRIB3DVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3dv")) == NULL) || r;
+  r = ((glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3f")) == NULL) || r;
+  r = ((glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3fv")) == NULL) || r;
+  r = ((glVertexAttrib3s = (PFNGLVERTEXATTRIB3SPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3s")) == NULL) || r;
+  r = ((glVertexAttrib3sv = (PFNGLVERTEXATTRIB3SVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3sv")) == NULL) || r;
+  r = ((glVertexAttrib4Nbv = (PFNGLVERTEXATTRIB4NBVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4Nbv")) == NULL) || r;
+  r = ((glVertexAttrib4Niv = (PFNGLVERTEXATTRIB4NIVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4Niv")) == NULL) || r;
+  r = ((glVertexAttrib4Nsv = (PFNGLVERTEXATTRIB4NSVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4Nsv")) == NULL) || r;
+  r = ((glVertexAttrib4Nub = (PFNGLVERTEXATTRIB4NUBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4Nub")) == NULL) || r;
+  r = ((glVertexAttrib4Nubv = (PFNGLVERTEXATTRIB4NUBVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4Nubv")) == NULL) || r;
+  r = ((glVertexAttrib4Nuiv = (PFNGLVERTEXATTRIB4NUIVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4Nuiv")) == NULL) || r;
+  r = ((glVertexAttrib4Nusv = (PFNGLVERTEXATTRIB4NUSVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4Nusv")) == NULL) || r;
+  r = ((glVertexAttrib4bv = (PFNGLVERTEXATTRIB4BVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4bv")) == NULL) || r;
+  r = ((glVertexAttrib4d = (PFNGLVERTEXATTRIB4DPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4d")) == NULL) || r;
+  r = ((glVertexAttrib4dv = (PFNGLVERTEXATTRIB4DVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4dv")) == NULL) || r;
+  r = ((glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4f")) == NULL) || r;
+  r = ((glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4fv")) == NULL) || r;
+  r = ((glVertexAttrib4iv = (PFNGLVERTEXATTRIB4IVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4iv")) == NULL) || r;
+  r = ((glVertexAttrib4s = (PFNGLVERTEXATTRIB4SPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4s")) == NULL) || r;
+  r = ((glVertexAttrib4sv = (PFNGLVERTEXATTRIB4SVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4sv")) == NULL) || r;
+  r = ((glVertexAttrib4ubv = (PFNGLVERTEXATTRIB4UBVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4ubv")) == NULL) || r;
+  r = ((glVertexAttrib4uiv = (PFNGLVERTEXATTRIB4UIVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4uiv")) == NULL) || r;
+  r = ((glVertexAttrib4usv = (PFNGLVERTEXATTRIB4USVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4usv")) == NULL) || r;
+  r = ((glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribPointer")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_VERSION_2_0 */
+
+#ifdef GL_VERSION_2_1
+
+static GLboolean _glewInit_GL_VERSION_2_1 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glUniformMatrix2x3fv = (PFNGLUNIFORMMATRIX2X3FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix2x3fv")) == NULL) || r;
+  r = ((glUniformMatrix2x4fv = (PFNGLUNIFORMMATRIX2X4FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix2x4fv")) == NULL) || r;
+  r = ((glUniformMatrix3x2fv = (PFNGLUNIFORMMATRIX3X2FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix3x2fv")) == NULL) || r;
+  r = ((glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix3x4fv")) == NULL) || r;
+  r = ((glUniformMatrix4x2fv = (PFNGLUNIFORMMATRIX4X2FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix4x2fv")) == NULL) || r;
+  r = ((glUniformMatrix4x3fv = (PFNGLUNIFORMMATRIX4X3FVPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix4x3fv")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_VERSION_2_1 */
+
+#ifdef GL_3DFX_multisample
+
+#endif /* GL_3DFX_multisample */
+
+#ifdef GL_3DFX_tbuffer
+
+static GLboolean _glewInit_GL_3DFX_tbuffer (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTbufferMask3DFX = (PFNGLTBUFFERMASK3DFXPROC)glewGetProcAddress((const GLubyte*)"glTbufferMask3DFX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_3DFX_tbuffer */
+
+#ifdef GL_3DFX_texture_compression_FXT1
+
+#endif /* GL_3DFX_texture_compression_FXT1 */
+
+#ifdef GL_APPLE_client_storage
+
+#endif /* GL_APPLE_client_storage */
+
+#ifdef GL_APPLE_element_array
+
+static GLboolean _glewInit_GL_APPLE_element_array (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDrawElementArrayAPPLE = (PFNGLDRAWELEMENTARRAYAPPLEPROC)glewGetProcAddress((const GLubyte*)"glDrawElementArrayAPPLE")) == NULL) || r;
+  r = ((glDrawRangeElementArrayAPPLE = (PFNGLDRAWRANGEELEMENTARRAYAPPLEPROC)glewGetProcAddress((const GLubyte*)"glDrawRangeElementArrayAPPLE")) == NULL) || r;
+  r = ((glElementPointerAPPLE = (PFNGLELEMENTPOINTERAPPLEPROC)glewGetProcAddress((const GLubyte*)"glElementPointerAPPLE")) == NULL) || r;
+  r = ((glMultiDrawElementArrayAPPLE = (PFNGLMULTIDRAWELEMENTARRAYAPPLEPROC)glewGetProcAddress((const GLubyte*)"glMultiDrawElementArrayAPPLE")) == NULL) || r;
+  r = ((glMultiDrawRangeElementArrayAPPLE = (PFNGLMULTIDRAWRANGEELEMENTARRAYAPPLEPROC)glewGetProcAddress((const GLubyte*)"glMultiDrawRangeElementArrayAPPLE")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_APPLE_element_array */
+
+#ifdef GL_APPLE_fence
+
+static GLboolean _glewInit_GL_APPLE_fence (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDeleteFencesAPPLE = (PFNGLDELETEFENCESAPPLEPROC)glewGetProcAddress((const GLubyte*)"glDeleteFencesAPPLE")) == NULL) || r;
+  r = ((glFinishFenceAPPLE = (PFNGLFINISHFENCEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glFinishFenceAPPLE")) == NULL) || r;
+  r = ((glFinishObjectAPPLE = (PFNGLFINISHOBJECTAPPLEPROC)glewGetProcAddress((const GLubyte*)"glFinishObjectAPPLE")) == NULL) || r;
+  r = ((glGenFencesAPPLE = (PFNGLGENFENCESAPPLEPROC)glewGetProcAddress((const GLubyte*)"glGenFencesAPPLE")) == NULL) || r;
+  r = ((glIsFenceAPPLE = (PFNGLISFENCEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glIsFenceAPPLE")) == NULL) || r;
+  r = ((glSetFenceAPPLE = (PFNGLSETFENCEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glSetFenceAPPLE")) == NULL) || r;
+  r = ((glTestFenceAPPLE = (PFNGLTESTFENCEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glTestFenceAPPLE")) == NULL) || r;
+  r = ((glTestObjectAPPLE = (PFNGLTESTOBJECTAPPLEPROC)glewGetProcAddress((const GLubyte*)"glTestObjectAPPLE")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_APPLE_fence */
+
+#ifdef GL_APPLE_float_pixels
+
+#endif /* GL_APPLE_float_pixels */
+
+#ifdef GL_APPLE_flush_buffer_range
+
+static GLboolean _glewInit_GL_APPLE_flush_buffer_range (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBufferParameteriAPPLE = (PFNGLBUFFERPARAMETERIAPPLEPROC)glewGetProcAddress((const GLubyte*)"glBufferParameteriAPPLE")) == NULL) || r;
+  r = ((glFlushMappedBufferRangeAPPLE = (PFNGLFLUSHMAPPEDBUFFERRANGEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glFlushMappedBufferRangeAPPLE")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_APPLE_flush_buffer_range */
+
+#ifdef GL_APPLE_pixel_buffer
+
+#endif /* GL_APPLE_pixel_buffer */
+
+#ifdef GL_APPLE_specular_vector
+
+#endif /* GL_APPLE_specular_vector */
+
+#ifdef GL_APPLE_texture_range
+
+static GLboolean _glewInit_GL_APPLE_texture_range (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetTexParameterPointervAPPLE = (PFNGLGETTEXPARAMETERPOINTERVAPPLEPROC)glewGetProcAddress((const GLubyte*)"glGetTexParameterPointervAPPLE")) == NULL) || r;
+  r = ((glTextureRangeAPPLE = (PFNGLTEXTURERANGEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glTextureRangeAPPLE")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_APPLE_texture_range */
+
+#ifdef GL_APPLE_transform_hint
+
+#endif /* GL_APPLE_transform_hint */
+
+#ifdef GL_APPLE_vertex_array_object
+
+static GLboolean _glewInit_GL_APPLE_vertex_array_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBindVertexArrayAPPLE = (PFNGLBINDVERTEXARRAYAPPLEPROC)glewGetProcAddress((const GLubyte*)"glBindVertexArrayAPPLE")) == NULL) || r;
+  r = ((glDeleteVertexArraysAPPLE = (PFNGLDELETEVERTEXARRAYSAPPLEPROC)glewGetProcAddress((const GLubyte*)"glDeleteVertexArraysAPPLE")) == NULL) || r;
+  r = ((glGenVertexArraysAPPLE = (PFNGLGENVERTEXARRAYSAPPLEPROC)glewGetProcAddress((const GLubyte*)"glGenVertexArraysAPPLE")) == NULL) || r;
+  r = ((glIsVertexArrayAPPLE = (PFNGLISVERTEXARRAYAPPLEPROC)glewGetProcAddress((const GLubyte*)"glIsVertexArrayAPPLE")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_APPLE_vertex_array_object */
+
+#ifdef GL_APPLE_vertex_array_range
+
+static GLboolean _glewInit_GL_APPLE_vertex_array_range (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFlushVertexArrayRangeAPPLE = (PFNGLFLUSHVERTEXARRAYRANGEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glFlushVertexArrayRangeAPPLE")) == NULL) || r;
+  r = ((glVertexArrayParameteriAPPLE = (PFNGLVERTEXARRAYPARAMETERIAPPLEPROC)glewGetProcAddress((const GLubyte*)"glVertexArrayParameteriAPPLE")) == NULL) || r;
+  r = ((glVertexArrayRangeAPPLE = (PFNGLVERTEXARRAYRANGEAPPLEPROC)glewGetProcAddress((const GLubyte*)"glVertexArrayRangeAPPLE")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_APPLE_vertex_array_range */
+
+#ifdef GL_APPLE_ycbcr_422
+
+#endif /* GL_APPLE_ycbcr_422 */
+
+#ifdef GL_ARB_color_buffer_float
+
+static GLboolean _glewInit_GL_ARB_color_buffer_float (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glClampColorARB = (PFNGLCLAMPCOLORARBPROC)glewGetProcAddress((const GLubyte*)"glClampColorARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_color_buffer_float */
+
+#ifdef GL_ARB_depth_texture
+
+#endif /* GL_ARB_depth_texture */
+
+#ifdef GL_ARB_draw_buffers
+
+static GLboolean _glewInit_GL_ARB_draw_buffers (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDrawBuffersARB = (PFNGLDRAWBUFFERSARBPROC)glewGetProcAddress((const GLubyte*)"glDrawBuffersARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_draw_buffers */
+
+#ifdef GL_ARB_fragment_program
+
+#endif /* GL_ARB_fragment_program */
+
+#ifdef GL_ARB_fragment_program_shadow
+
+#endif /* GL_ARB_fragment_program_shadow */
+
+#ifdef GL_ARB_fragment_shader
+
+#endif /* GL_ARB_fragment_shader */
+
+#ifdef GL_ARB_half_float_pixel
+
+#endif /* GL_ARB_half_float_pixel */
+
+#ifdef GL_ARB_imaging
+
+static GLboolean _glewInit_GL_ARB_imaging (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBlendEquation = (PFNGLBLENDEQUATIONPROC)glewGetProcAddress((const GLubyte*)"glBlendEquation")) == NULL) || r;
+  r = ((glColorSubTable = (PFNGLCOLORSUBTABLEPROC)glewGetProcAddress((const GLubyte*)"glColorSubTable")) == NULL) || r;
+  r = ((glColorTable = (PFNGLCOLORTABLEPROC)glewGetProcAddress((const GLubyte*)"glColorTable")) == NULL) || r;
+  r = ((glColorTableParameterfv = (PFNGLCOLORTABLEPARAMETERFVPROC)glewGetProcAddress((const GLubyte*)"glColorTableParameterfv")) == NULL) || r;
+  r = ((glColorTableParameteriv = (PFNGLCOLORTABLEPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glColorTableParameteriv")) == NULL) || r;
+  r = ((glConvolutionFilter1D = (PFNGLCONVOLUTIONFILTER1DPROC)glewGetProcAddress((const GLubyte*)"glConvolutionFilter1D")) == NULL) || r;
+  r = ((glConvolutionFilter2D = (PFNGLCONVOLUTIONFILTER2DPROC)glewGetProcAddress((const GLubyte*)"glConvolutionFilter2D")) == NULL) || r;
+  r = ((glConvolutionParameterf = (PFNGLCONVOLUTIONPARAMETERFPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameterf")) == NULL) || r;
+  r = ((glConvolutionParameterfv = (PFNGLCONVOLUTIONPARAMETERFVPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameterfv")) == NULL) || r;
+  r = ((glConvolutionParameteri = (PFNGLCONVOLUTIONPARAMETERIPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameteri")) == NULL) || r;
+  r = ((glConvolutionParameteriv = (PFNGLCONVOLUTIONPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameteriv")) == NULL) || r;
+  r = ((glCopyColorSubTable = (PFNGLCOPYCOLORSUBTABLEPROC)glewGetProcAddress((const GLubyte*)"glCopyColorSubTable")) == NULL) || r;
+  r = ((glCopyColorTable = (PFNGLCOPYCOLORTABLEPROC)glewGetProcAddress((const GLubyte*)"glCopyColorTable")) == NULL) || r;
+  r = ((glCopyConvolutionFilter1D = (PFNGLCOPYCONVOLUTIONFILTER1DPROC)glewGetProcAddress((const GLubyte*)"glCopyConvolutionFilter1D")) == NULL) || r;
+  r = ((glCopyConvolutionFilter2D = (PFNGLCOPYCONVOLUTIONFILTER2DPROC)glewGetProcAddress((const GLubyte*)"glCopyConvolutionFilter2D")) == NULL) || r;
+  r = ((glGetColorTable = (PFNGLGETCOLORTABLEPROC)glewGetProcAddress((const GLubyte*)"glGetColorTable")) == NULL) || r;
+  r = ((glGetColorTableParameterfv = (PFNGLGETCOLORTABLEPARAMETERFVPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableParameterfv")) == NULL) || r;
+  r = ((glGetColorTableParameteriv = (PFNGLGETCOLORTABLEPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableParameteriv")) == NULL) || r;
+  r = ((glGetConvolutionFilter = (PFNGLGETCONVOLUTIONFILTERPROC)glewGetProcAddress((const GLubyte*)"glGetConvolutionFilter")) == NULL) || r;
+  r = ((glGetConvolutionParameterfv = (PFNGLGETCONVOLUTIONPARAMETERFVPROC)glewGetProcAddress((const GLubyte*)"glGetConvolutionParameterfv")) == NULL) || r;
+  r = ((glGetConvolutionParameteriv = (PFNGLGETCONVOLUTIONPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glGetConvolutionParameteriv")) == NULL) || r;
+  r = ((glGetHistogram = (PFNGLGETHISTOGRAMPROC)glewGetProcAddress((const GLubyte*)"glGetHistogram")) == NULL) || r;
+  r = ((glGetHistogramParameterfv = (PFNGLGETHISTOGRAMPARAMETERFVPROC)glewGetProcAddress((const GLubyte*)"glGetHistogramParameterfv")) == NULL) || r;
+  r = ((glGetHistogramParameteriv = (PFNGLGETHISTOGRAMPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glGetHistogramParameteriv")) == NULL) || r;
+  r = ((glGetMinmax = (PFNGLGETMINMAXPROC)glewGetProcAddress((const GLubyte*)"glGetMinmax")) == NULL) || r;
+  r = ((glGetMinmaxParameterfv = (PFNGLGETMINMAXPARAMETERFVPROC)glewGetProcAddress((const GLubyte*)"glGetMinmaxParameterfv")) == NULL) || r;
+  r = ((glGetMinmaxParameteriv = (PFNGLGETMINMAXPARAMETERIVPROC)glewGetProcAddress((const GLubyte*)"glGetMinmaxParameteriv")) == NULL) || r;
+  r = ((glGetSeparableFilter = (PFNGLGETSEPARABLEFILTERPROC)glewGetProcAddress((const GLubyte*)"glGetSeparableFilter")) == NULL) || r;
+  r = ((glHistogram = (PFNGLHISTOGRAMPROC)glewGetProcAddress((const GLubyte*)"glHistogram")) == NULL) || r;
+  r = ((glMinmax = (PFNGLMINMAXPROC)glewGetProcAddress((const GLubyte*)"glMinmax")) == NULL) || r;
+  r = ((glResetHistogram = (PFNGLRESETHISTOGRAMPROC)glewGetProcAddress((const GLubyte*)"glResetHistogram")) == NULL) || r;
+  r = ((glResetMinmax = (PFNGLRESETMINMAXPROC)glewGetProcAddress((const GLubyte*)"glResetMinmax")) == NULL) || r;
+  r = ((glSeparableFilter2D = (PFNGLSEPARABLEFILTER2DPROC)glewGetProcAddress((const GLubyte*)"glSeparableFilter2D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_imaging */
+
+#ifdef GL_ARB_matrix_palette
+
+static GLboolean _glewInit_GL_ARB_matrix_palette (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glCurrentPaletteMatrixARB = (PFNGLCURRENTPALETTEMATRIXARBPROC)glewGetProcAddress((const GLubyte*)"glCurrentPaletteMatrixARB")) == NULL) || r;
+  r = ((glMatrixIndexPointerARB = (PFNGLMATRIXINDEXPOINTERARBPROC)glewGetProcAddress((const GLubyte*)"glMatrixIndexPointerARB")) == NULL) || r;
+  r = ((glMatrixIndexubvARB = (PFNGLMATRIXINDEXUBVARBPROC)glewGetProcAddress((const GLubyte*)"glMatrixIndexubvARB")) == NULL) || r;
+  r = ((glMatrixIndexuivARB = (PFNGLMATRIXINDEXUIVARBPROC)glewGetProcAddress((const GLubyte*)"glMatrixIndexuivARB")) == NULL) || r;
+  r = ((glMatrixIndexusvARB = (PFNGLMATRIXINDEXUSVARBPROC)glewGetProcAddress((const GLubyte*)"glMatrixIndexusvARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_matrix_palette */
+
+#ifdef GL_ARB_multisample
+
+static GLboolean _glewInit_GL_ARB_multisample (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glSampleCoverageARB = (PFNGLSAMPLECOVERAGEARBPROC)glewGetProcAddress((const GLubyte*)"glSampleCoverageARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_multisample */
+
+#ifdef GL_ARB_multitexture
+
+static GLboolean _glewInit_GL_ARB_multitexture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glActiveTextureARB = (PFNGLACTIVETEXTUREARBPROC)glewGetProcAddress((const GLubyte*)"glActiveTextureARB")) == NULL) || r;
+  r = ((glClientActiveTextureARB = (PFNGLCLIENTACTIVETEXTUREARBPROC)glewGetProcAddress((const GLubyte*)"glClientActiveTextureARB")) == NULL) || r;
+  r = ((glMultiTexCoord1dARB = (PFNGLMULTITEXCOORD1DARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1dARB")) == NULL) || r;
+  r = ((glMultiTexCoord1dvARB = (PFNGLMULTITEXCOORD1DVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1dvARB")) == NULL) || r;
+  r = ((glMultiTexCoord1fARB = (PFNGLMULTITEXCOORD1FARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1fARB")) == NULL) || r;
+  r = ((glMultiTexCoord1fvARB = (PFNGLMULTITEXCOORD1FVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1fvARB")) == NULL) || r;
+  r = ((glMultiTexCoord1iARB = (PFNGLMULTITEXCOORD1IARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1iARB")) == NULL) || r;
+  r = ((glMultiTexCoord1ivARB = (PFNGLMULTITEXCOORD1IVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1ivARB")) == NULL) || r;
+  r = ((glMultiTexCoord1sARB = (PFNGLMULTITEXCOORD1SARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1sARB")) == NULL) || r;
+  r = ((glMultiTexCoord1svARB = (PFNGLMULTITEXCOORD1SVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1svARB")) == NULL) || r;
+  r = ((glMultiTexCoord2dARB = (PFNGLMULTITEXCOORD2DARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2dARB")) == NULL) || r;
+  r = ((glMultiTexCoord2dvARB = (PFNGLMULTITEXCOORD2DVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2dvARB")) == NULL) || r;
+  r = ((glMultiTexCoord2fARB = (PFNGLMULTITEXCOORD2FARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2fARB")) == NULL) || r;
+  r = ((glMultiTexCoord2fvARB = (PFNGLMULTITEXCOORD2FVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2fvARB")) == NULL) || r;
+  r = ((glMultiTexCoord2iARB = (PFNGLMULTITEXCOORD2IARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2iARB")) == NULL) || r;
+  r = ((glMultiTexCoord2ivARB = (PFNGLMULTITEXCOORD2IVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2ivARB")) == NULL) || r;
+  r = ((glMultiTexCoord2sARB = (PFNGLMULTITEXCOORD2SARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2sARB")) == NULL) || r;
+  r = ((glMultiTexCoord2svARB = (PFNGLMULTITEXCOORD2SVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2svARB")) == NULL) || r;
+  r = ((glMultiTexCoord3dARB = (PFNGLMULTITEXCOORD3DARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3dARB")) == NULL) || r;
+  r = ((glMultiTexCoord3dvARB = (PFNGLMULTITEXCOORD3DVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3dvARB")) == NULL) || r;
+  r = ((glMultiTexCoord3fARB = (PFNGLMULTITEXCOORD3FARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3fARB")) == NULL) || r;
+  r = ((glMultiTexCoord3fvARB = (PFNGLMULTITEXCOORD3FVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3fvARB")) == NULL) || r;
+  r = ((glMultiTexCoord3iARB = (PFNGLMULTITEXCOORD3IARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3iARB")) == NULL) || r;
+  r = ((glMultiTexCoord3ivARB = (PFNGLMULTITEXCOORD3IVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3ivARB")) == NULL) || r;
+  r = ((glMultiTexCoord3sARB = (PFNGLMULTITEXCOORD3SARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3sARB")) == NULL) || r;
+  r = ((glMultiTexCoord3svARB = (PFNGLMULTITEXCOORD3SVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3svARB")) == NULL) || r;
+  r = ((glMultiTexCoord4dARB = (PFNGLMULTITEXCOORD4DARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4dARB")) == NULL) || r;
+  r = ((glMultiTexCoord4dvARB = (PFNGLMULTITEXCOORD4DVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4dvARB")) == NULL) || r;
+  r = ((glMultiTexCoord4fARB = (PFNGLMULTITEXCOORD4FARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4fARB")) == NULL) || r;
+  r = ((glMultiTexCoord4fvARB = (PFNGLMULTITEXCOORD4FVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4fvARB")) == NULL) || r;
+  r = ((glMultiTexCoord4iARB = (PFNGLMULTITEXCOORD4IARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4iARB")) == NULL) || r;
+  r = ((glMultiTexCoord4ivARB = (PFNGLMULTITEXCOORD4IVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4ivARB")) == NULL) || r;
+  r = ((glMultiTexCoord4sARB = (PFNGLMULTITEXCOORD4SARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4sARB")) == NULL) || r;
+  r = ((glMultiTexCoord4svARB = (PFNGLMULTITEXCOORD4SVARBPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4svARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_multitexture */
+
+#ifdef GL_ARB_occlusion_query
+
+static GLboolean _glewInit_GL_ARB_occlusion_query (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBeginQueryARB = (PFNGLBEGINQUERYARBPROC)glewGetProcAddress((const GLubyte*)"glBeginQueryARB")) == NULL) || r;
+  r = ((glDeleteQueriesARB = (PFNGLDELETEQUERIESARBPROC)glewGetProcAddress((const GLubyte*)"glDeleteQueriesARB")) == NULL) || r;
+  r = ((glEndQueryARB = (PFNGLENDQUERYARBPROC)glewGetProcAddress((const GLubyte*)"glEndQueryARB")) == NULL) || r;
+  r = ((glGenQueriesARB = (PFNGLGENQUERIESARBPROC)glewGetProcAddress((const GLubyte*)"glGenQueriesARB")) == NULL) || r;
+  r = ((glGetQueryObjectivARB = (PFNGLGETQUERYOBJECTIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetQueryObjectivARB")) == NULL) || r;
+  r = ((glGetQueryObjectuivARB = (PFNGLGETQUERYOBJECTUIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetQueryObjectuivARB")) == NULL) || r;
+  r = ((glGetQueryivARB = (PFNGLGETQUERYIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetQueryivARB")) == NULL) || r;
+  r = ((glIsQueryARB = (PFNGLISQUERYARBPROC)glewGetProcAddress((const GLubyte*)"glIsQueryARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_occlusion_query */
+
+#ifdef GL_ARB_pixel_buffer_object
+
+#endif /* GL_ARB_pixel_buffer_object */
+
+#ifdef GL_ARB_point_parameters
+
+static GLboolean _glewInit_GL_ARB_point_parameters (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glPointParameterfARB = (PFNGLPOINTPARAMETERFARBPROC)glewGetProcAddress((const GLubyte*)"glPointParameterfARB")) == NULL) || r;
+  r = ((glPointParameterfvARB = (PFNGLPOINTPARAMETERFVARBPROC)glewGetProcAddress((const GLubyte*)"glPointParameterfvARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_point_parameters */
+
+#ifdef GL_ARB_point_sprite
+
+#endif /* GL_ARB_point_sprite */
+
+#ifdef GL_ARB_shader_objects
+
+static GLboolean _glewInit_GL_ARB_shader_objects (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glAttachObjectARB = (PFNGLATTACHOBJECTARBPROC)glewGetProcAddress((const GLubyte*)"glAttachObjectARB")) == NULL) || r;
+  r = ((glCompileShaderARB = (PFNGLCOMPILESHADERARBPROC)glewGetProcAddress((const GLubyte*)"glCompileShaderARB")) == NULL) || r;
+  r = ((glCreateProgramObjectARB = (PFNGLCREATEPROGRAMOBJECTARBPROC)glewGetProcAddress((const GLubyte*)"glCreateProgramObjectARB")) == NULL) || r;
+  r = ((glCreateShaderObjectARB = (PFNGLCREATESHADEROBJECTARBPROC)glewGetProcAddress((const GLubyte*)"glCreateShaderObjectARB")) == NULL) || r;
+  r = ((glDeleteObjectARB = (PFNGLDELETEOBJECTARBPROC)glewGetProcAddress((const GLubyte*)"glDeleteObjectARB")) == NULL) || r;
+  r = ((glDetachObjectARB = (PFNGLDETACHOBJECTARBPROC)glewGetProcAddress((const GLubyte*)"glDetachObjectARB")) == NULL) || r;
+  r = ((glGetActiveUniformARB = (PFNGLGETACTIVEUNIFORMARBPROC)glewGetProcAddress((const GLubyte*)"glGetActiveUniformARB")) == NULL) || r;
+  r = ((glGetAttachedObjectsARB = (PFNGLGETATTACHEDOBJECTSARBPROC)glewGetProcAddress((const GLubyte*)"glGetAttachedObjectsARB")) == NULL) || r;
+  r = ((glGetHandleARB = (PFNGLGETHANDLEARBPROC)glewGetProcAddress((const GLubyte*)"glGetHandleARB")) == NULL) || r;
+  r = ((glGetInfoLogARB = (PFNGLGETINFOLOGARBPROC)glewGetProcAddress((const GLubyte*)"glGetInfoLogARB")) == NULL) || r;
+  r = ((glGetObjectParameterfvARB = (PFNGLGETOBJECTPARAMETERFVARBPROC)glewGetProcAddress((const GLubyte*)"glGetObjectParameterfvARB")) == NULL) || r;
+  r = ((glGetObjectParameterivARB = (PFNGLGETOBJECTPARAMETERIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetObjectParameterivARB")) == NULL) || r;
+  r = ((glGetShaderSourceARB = (PFNGLGETSHADERSOURCEARBPROC)glewGetProcAddress((const GLubyte*)"glGetShaderSourceARB")) == NULL) || r;
+  r = ((glGetUniformLocationARB = (PFNGLGETUNIFORMLOCATIONARBPROC)glewGetProcAddress((const GLubyte*)"glGetUniformLocationARB")) == NULL) || r;
+  r = ((glGetUniformfvARB = (PFNGLGETUNIFORMFVARBPROC)glewGetProcAddress((const GLubyte*)"glGetUniformfvARB")) == NULL) || r;
+  r = ((glGetUniformivARB = (PFNGLGETUNIFORMIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetUniformivARB")) == NULL) || r;
+  r = ((glLinkProgramARB = (PFNGLLINKPROGRAMARBPROC)glewGetProcAddress((const GLubyte*)"glLinkProgramARB")) == NULL) || r;
+  r = ((glShaderSourceARB = (PFNGLSHADERSOURCEARBPROC)glewGetProcAddress((const GLubyte*)"glShaderSourceARB")) == NULL) || r;
+  r = ((glUniform1fARB = (PFNGLUNIFORM1FARBPROC)glewGetProcAddress((const GLubyte*)"glUniform1fARB")) == NULL) || r;
+  r = ((glUniform1fvARB = (PFNGLUNIFORM1FVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform1fvARB")) == NULL) || r;
+  r = ((glUniform1iARB = (PFNGLUNIFORM1IARBPROC)glewGetProcAddress((const GLubyte*)"glUniform1iARB")) == NULL) || r;
+  r = ((glUniform1ivARB = (PFNGLUNIFORM1IVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform1ivARB")) == NULL) || r;
+  r = ((glUniform2fARB = (PFNGLUNIFORM2FARBPROC)glewGetProcAddress((const GLubyte*)"glUniform2fARB")) == NULL) || r;
+  r = ((glUniform2fvARB = (PFNGLUNIFORM2FVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform2fvARB")) == NULL) || r;
+  r = ((glUniform2iARB = (PFNGLUNIFORM2IARBPROC)glewGetProcAddress((const GLubyte*)"glUniform2iARB")) == NULL) || r;
+  r = ((glUniform2ivARB = (PFNGLUNIFORM2IVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform2ivARB")) == NULL) || r;
+  r = ((glUniform3fARB = (PFNGLUNIFORM3FARBPROC)glewGetProcAddress((const GLubyte*)"glUniform3fARB")) == NULL) || r;
+  r = ((glUniform3fvARB = (PFNGLUNIFORM3FVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform3fvARB")) == NULL) || r;
+  r = ((glUniform3iARB = (PFNGLUNIFORM3IARBPROC)glewGetProcAddress((const GLubyte*)"glUniform3iARB")) == NULL) || r;
+  r = ((glUniform3ivARB = (PFNGLUNIFORM3IVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform3ivARB")) == NULL) || r;
+  r = ((glUniform4fARB = (PFNGLUNIFORM4FARBPROC)glewGetProcAddress((const GLubyte*)"glUniform4fARB")) == NULL) || r;
+  r = ((glUniform4fvARB = (PFNGLUNIFORM4FVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform4fvARB")) == NULL) || r;
+  r = ((glUniform4iARB = (PFNGLUNIFORM4IARBPROC)glewGetProcAddress((const GLubyte*)"glUniform4iARB")) == NULL) || r;
+  r = ((glUniform4ivARB = (PFNGLUNIFORM4IVARBPROC)glewGetProcAddress((const GLubyte*)"glUniform4ivARB")) == NULL) || r;
+  r = ((glUniformMatrix2fvARB = (PFNGLUNIFORMMATRIX2FVARBPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix2fvARB")) == NULL) || r;
+  r = ((glUniformMatrix3fvARB = (PFNGLUNIFORMMATRIX3FVARBPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix3fvARB")) == NULL) || r;
+  r = ((glUniformMatrix4fvARB = (PFNGLUNIFORMMATRIX4FVARBPROC)glewGetProcAddress((const GLubyte*)"glUniformMatrix4fvARB")) == NULL) || r;
+  r = ((glUseProgramObjectARB = (PFNGLUSEPROGRAMOBJECTARBPROC)glewGetProcAddress((const GLubyte*)"glUseProgramObjectARB")) == NULL) || r;
+  r = ((glValidateProgramARB = (PFNGLVALIDATEPROGRAMARBPROC)glewGetProcAddress((const GLubyte*)"glValidateProgramARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_shader_objects */
+
+#ifdef GL_ARB_shading_language_100
+
+#endif /* GL_ARB_shading_language_100 */
+
+#ifdef GL_ARB_shadow
+
+#endif /* GL_ARB_shadow */
+
+#ifdef GL_ARB_shadow_ambient
+
+#endif /* GL_ARB_shadow_ambient */
+
+#ifdef GL_ARB_texture_border_clamp
+
+#endif /* GL_ARB_texture_border_clamp */
+
+#ifdef GL_ARB_texture_compression
+
+static GLboolean _glewInit_GL_ARB_texture_compression (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glCompressedTexImage1DARB = (PFNGLCOMPRESSEDTEXIMAGE1DARBPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexImage1DARB")) == NULL) || r;
+  r = ((glCompressedTexImage2DARB = (PFNGLCOMPRESSEDTEXIMAGE2DARBPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexImage2DARB")) == NULL) || r;
+  r = ((glCompressedTexImage3DARB = (PFNGLCOMPRESSEDTEXIMAGE3DARBPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexImage3DARB")) == NULL) || r;
+  r = ((glCompressedTexSubImage1DARB = (PFNGLCOMPRESSEDTEXSUBIMAGE1DARBPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexSubImage1DARB")) == NULL) || r;
+  r = ((glCompressedTexSubImage2DARB = (PFNGLCOMPRESSEDTEXSUBIMAGE2DARBPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexSubImage2DARB")) == NULL) || r;
+  r = ((glCompressedTexSubImage3DARB = (PFNGLCOMPRESSEDTEXSUBIMAGE3DARBPROC)glewGetProcAddress((const GLubyte*)"glCompressedTexSubImage3DARB")) == NULL) || r;
+  r = ((glGetCompressedTexImageARB = (PFNGLGETCOMPRESSEDTEXIMAGEARBPROC)glewGetProcAddress((const GLubyte*)"glGetCompressedTexImageARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_texture_compression */
+
+#ifdef GL_ARB_texture_cube_map
+
+#endif /* GL_ARB_texture_cube_map */
+
+#ifdef GL_ARB_texture_env_add
+
+#endif /* GL_ARB_texture_env_add */
+
+#ifdef GL_ARB_texture_env_combine
+
+#endif /* GL_ARB_texture_env_combine */
+
+#ifdef GL_ARB_texture_env_crossbar
+
+#endif /* GL_ARB_texture_env_crossbar */
+
+#ifdef GL_ARB_texture_env_dot3
+
+#endif /* GL_ARB_texture_env_dot3 */
+
+#ifdef GL_ARB_texture_float
+
+#endif /* GL_ARB_texture_float */
+
+#ifdef GL_ARB_texture_mirrored_repeat
+
+#endif /* GL_ARB_texture_mirrored_repeat */
+
+#ifdef GL_ARB_texture_non_power_of_two
+
+#endif /* GL_ARB_texture_non_power_of_two */
+
+#ifdef GL_ARB_texture_rectangle
+
+#endif /* GL_ARB_texture_rectangle */
+
+#ifdef GL_ARB_transpose_matrix
+
+static GLboolean _glewInit_GL_ARB_transpose_matrix (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glLoadTransposeMatrixdARB = (PFNGLLOADTRANSPOSEMATRIXDARBPROC)glewGetProcAddress((const GLubyte*)"glLoadTransposeMatrixdARB")) == NULL) || r;
+  r = ((glLoadTransposeMatrixfARB = (PFNGLLOADTRANSPOSEMATRIXFARBPROC)glewGetProcAddress((const GLubyte*)"glLoadTransposeMatrixfARB")) == NULL) || r;
+  r = ((glMultTransposeMatrixdARB = (PFNGLMULTTRANSPOSEMATRIXDARBPROC)glewGetProcAddress((const GLubyte*)"glMultTransposeMatrixdARB")) == NULL) || r;
+  r = ((glMultTransposeMatrixfARB = (PFNGLMULTTRANSPOSEMATRIXFARBPROC)glewGetProcAddress((const GLubyte*)"glMultTransposeMatrixfARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_transpose_matrix */
+
+#ifdef GL_ARB_vertex_blend
+
+static GLboolean _glewInit_GL_ARB_vertex_blend (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glVertexBlendARB = (PFNGLVERTEXBLENDARBPROC)glewGetProcAddress((const GLubyte*)"glVertexBlendARB")) == NULL) || r;
+  r = ((glWeightPointerARB = (PFNGLWEIGHTPOINTERARBPROC)glewGetProcAddress((const GLubyte*)"glWeightPointerARB")) == NULL) || r;
+  r = ((glWeightbvARB = (PFNGLWEIGHTBVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightbvARB")) == NULL) || r;
+  r = ((glWeightdvARB = (PFNGLWEIGHTDVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightdvARB")) == NULL) || r;
+  r = ((glWeightfvARB = (PFNGLWEIGHTFVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightfvARB")) == NULL) || r;
+  r = ((glWeightivARB = (PFNGLWEIGHTIVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightivARB")) == NULL) || r;
+  r = ((glWeightsvARB = (PFNGLWEIGHTSVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightsvARB")) == NULL) || r;
+  r = ((glWeightubvARB = (PFNGLWEIGHTUBVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightubvARB")) == NULL) || r;
+  r = ((glWeightuivARB = (PFNGLWEIGHTUIVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightuivARB")) == NULL) || r;
+  r = ((glWeightusvARB = (PFNGLWEIGHTUSVARBPROC)glewGetProcAddress((const GLubyte*)"glWeightusvARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_vertex_blend */
+
+#ifdef GL_ARB_vertex_buffer_object
+
+static GLboolean _glewInit_GL_ARB_vertex_buffer_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBindBufferARB = (PFNGLBINDBUFFERARBPROC)glewGetProcAddress((const GLubyte*)"glBindBufferARB")) == NULL) || r;
+  r = ((glBufferDataARB = (PFNGLBUFFERDATAARBPROC)glewGetProcAddress((const GLubyte*)"glBufferDataARB")) == NULL) || r;
+  r = ((glBufferSubDataARB = (PFNGLBUFFERSUBDATAARBPROC)glewGetProcAddress((const GLubyte*)"glBufferSubDataARB")) == NULL) || r;
+  r = ((glDeleteBuffersARB = (PFNGLDELETEBUFFERSARBPROC)glewGetProcAddress((const GLubyte*)"glDeleteBuffersARB")) == NULL) || r;
+  r = ((glGenBuffersARB = (PFNGLGENBUFFERSARBPROC)glewGetProcAddress((const GLubyte*)"glGenBuffersARB")) == NULL) || r;
+  r = ((glGetBufferParameterivARB = (PFNGLGETBUFFERPARAMETERIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetBufferParameterivARB")) == NULL) || r;
+  r = ((glGetBufferPointervARB = (PFNGLGETBUFFERPOINTERVARBPROC)glewGetProcAddress((const GLubyte*)"glGetBufferPointervARB")) == NULL) || r;
+  r = ((glGetBufferSubDataARB = (PFNGLGETBUFFERSUBDATAARBPROC)glewGetProcAddress((const GLubyte*)"glGetBufferSubDataARB")) == NULL) || r;
+  r = ((glIsBufferARB = (PFNGLISBUFFERARBPROC)glewGetProcAddress((const GLubyte*)"glIsBufferARB")) == NULL) || r;
+  r = ((glMapBufferARB = (PFNGLMAPBUFFERARBPROC)glewGetProcAddress((const GLubyte*)"glMapBufferARB")) == NULL) || r;
+  r = ((glUnmapBufferARB = (PFNGLUNMAPBUFFERARBPROC)glewGetProcAddress((const GLubyte*)"glUnmapBufferARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_vertex_buffer_object */
+
+#ifdef GL_ARB_vertex_program
+
+static GLboolean _glewInit_GL_ARB_vertex_program (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBindProgramARB = (PFNGLBINDPROGRAMARBPROC)glewGetProcAddress((const GLubyte*)"glBindProgramARB")) == NULL) || r;
+  r = ((glDeleteProgramsARB = (PFNGLDELETEPROGRAMSARBPROC)glewGetProcAddress((const GLubyte*)"glDeleteProgramsARB")) == NULL) || r;
+  r = ((glDisableVertexAttribArrayARB = (PFNGLDISABLEVERTEXATTRIBARRAYARBPROC)glewGetProcAddress((const GLubyte*)"glDisableVertexAttribArrayARB")) == NULL) || r;
+  r = ((glEnableVertexAttribArrayARB = (PFNGLENABLEVERTEXATTRIBARRAYARBPROC)glewGetProcAddress((const GLubyte*)"glEnableVertexAttribArrayARB")) == NULL) || r;
+  r = ((glGenProgramsARB = (PFNGLGENPROGRAMSARBPROC)glewGetProcAddress((const GLubyte*)"glGenProgramsARB")) == NULL) || r;
+  r = ((glGetProgramEnvParameterdvARB = (PFNGLGETPROGRAMENVPARAMETERDVARBPROC)glewGetProcAddress((const GLubyte*)"glGetProgramEnvParameterdvARB")) == NULL) || r;
+  r = ((glGetProgramEnvParameterfvARB = (PFNGLGETPROGRAMENVPARAMETERFVARBPROC)glewGetProcAddress((const GLubyte*)"glGetProgramEnvParameterfvARB")) == NULL) || r;
+  r = ((glGetProgramLocalParameterdvARB = (PFNGLGETPROGRAMLOCALPARAMETERDVARBPROC)glewGetProcAddress((const GLubyte*)"glGetProgramLocalParameterdvARB")) == NULL) || r;
+  r = ((glGetProgramLocalParameterfvARB = (PFNGLGETPROGRAMLOCALPARAMETERFVARBPROC)glewGetProcAddress((const GLubyte*)"glGetProgramLocalParameterfvARB")) == NULL) || r;
+  r = ((glGetProgramStringARB = (PFNGLGETPROGRAMSTRINGARBPROC)glewGetProcAddress((const GLubyte*)"glGetProgramStringARB")) == NULL) || r;
+  r = ((glGetProgramivARB = (PFNGLGETPROGRAMIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetProgramivARB")) == NULL) || r;
+  r = ((glGetVertexAttribPointervARB = (PFNGLGETVERTEXATTRIBPOINTERVARBPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribPointervARB")) == NULL) || r;
+  r = ((glGetVertexAttribdvARB = (PFNGLGETVERTEXATTRIBDVARBPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribdvARB")) == NULL) || r;
+  r = ((glGetVertexAttribfvARB = (PFNGLGETVERTEXATTRIBFVARBPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribfvARB")) == NULL) || r;
+  r = ((glGetVertexAttribivARB = (PFNGLGETVERTEXATTRIBIVARBPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribivARB")) == NULL) || r;
+  r = ((glIsProgramARB = (PFNGLISPROGRAMARBPROC)glewGetProcAddress((const GLubyte*)"glIsProgramARB")) == NULL) || r;
+  r = ((glProgramEnvParameter4dARB = (PFNGLPROGRAMENVPARAMETER4DARBPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameter4dARB")) == NULL) || r;
+  r = ((glProgramEnvParameter4dvARB = (PFNGLPROGRAMENVPARAMETER4DVARBPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameter4dvARB")) == NULL) || r;
+  r = ((glProgramEnvParameter4fARB = (PFNGLPROGRAMENVPARAMETER4FARBPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameter4fARB")) == NULL) || r;
+  r = ((glProgramEnvParameter4fvARB = (PFNGLPROGRAMENVPARAMETER4FVARBPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameter4fvARB")) == NULL) || r;
+  r = ((glProgramLocalParameter4dARB = (PFNGLPROGRAMLOCALPARAMETER4DARBPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameter4dARB")) == NULL) || r;
+  r = ((glProgramLocalParameter4dvARB = (PFNGLPROGRAMLOCALPARAMETER4DVARBPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameter4dvARB")) == NULL) || r;
+  r = ((glProgramLocalParameter4fARB = (PFNGLPROGRAMLOCALPARAMETER4FARBPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameter4fARB")) == NULL) || r;
+  r = ((glProgramLocalParameter4fvARB = (PFNGLPROGRAMLOCALPARAMETER4FVARBPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameter4fvARB")) == NULL) || r;
+  r = ((glProgramStringARB = (PFNGLPROGRAMSTRINGARBPROC)glewGetProcAddress((const GLubyte*)"glProgramStringARB")) == NULL) || r;
+  r = ((glVertexAttrib1dARB = (PFNGLVERTEXATTRIB1DARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1dARB")) == NULL) || r;
+  r = ((glVertexAttrib1dvARB = (PFNGLVERTEXATTRIB1DVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1dvARB")) == NULL) || r;
+  r = ((glVertexAttrib1fARB = (PFNGLVERTEXATTRIB1FARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1fARB")) == NULL) || r;
+  r = ((glVertexAttrib1fvARB = (PFNGLVERTEXATTRIB1FVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1fvARB")) == NULL) || r;
+  r = ((glVertexAttrib1sARB = (PFNGLVERTEXATTRIB1SARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1sARB")) == NULL) || r;
+  r = ((glVertexAttrib1svARB = (PFNGLVERTEXATTRIB1SVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1svARB")) == NULL) || r;
+  r = ((glVertexAttrib2dARB = (PFNGLVERTEXATTRIB2DARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2dARB")) == NULL) || r;
+  r = ((glVertexAttrib2dvARB = (PFNGLVERTEXATTRIB2DVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2dvARB")) == NULL) || r;
+  r = ((glVertexAttrib2fARB = (PFNGLVERTEXATTRIB2FARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2fARB")) == NULL) || r;
+  r = ((glVertexAttrib2fvARB = (PFNGLVERTEXATTRIB2FVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2fvARB")) == NULL) || r;
+  r = ((glVertexAttrib2sARB = (PFNGLVERTEXATTRIB2SARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2sARB")) == NULL) || r;
+  r = ((glVertexAttrib2svARB = (PFNGLVERTEXATTRIB2SVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2svARB")) == NULL) || r;
+  r = ((glVertexAttrib3dARB = (PFNGLVERTEXATTRIB3DARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3dARB")) == NULL) || r;
+  r = ((glVertexAttrib3dvARB = (PFNGLVERTEXATTRIB3DVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3dvARB")) == NULL) || r;
+  r = ((glVertexAttrib3fARB = (PFNGLVERTEXATTRIB3FARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3fARB")) == NULL) || r;
+  r = ((glVertexAttrib3fvARB = (PFNGLVERTEXATTRIB3FVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3fvARB")) == NULL) || r;
+  r = ((glVertexAttrib3sARB = (PFNGLVERTEXATTRIB3SARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3sARB")) == NULL) || r;
+  r = ((glVertexAttrib3svARB = (PFNGLVERTEXATTRIB3SVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3svARB")) == NULL) || r;
+  r = ((glVertexAttrib4NbvARB = (PFNGLVERTEXATTRIB4NBVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4NbvARB")) == NULL) || r;
+  r = ((glVertexAttrib4NivARB = (PFNGLVERTEXATTRIB4NIVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4NivARB")) == NULL) || r;
+  r = ((glVertexAttrib4NsvARB = (PFNGLVERTEXATTRIB4NSVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4NsvARB")) == NULL) || r;
+  r = ((glVertexAttrib4NubARB = (PFNGLVERTEXATTRIB4NUBARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4NubARB")) == NULL) || r;
+  r = ((glVertexAttrib4NubvARB = (PFNGLVERTEXATTRIB4NUBVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4NubvARB")) == NULL) || r;
+  r = ((glVertexAttrib4NuivARB = (PFNGLVERTEXATTRIB4NUIVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4NuivARB")) == NULL) || r;
+  r = ((glVertexAttrib4NusvARB = (PFNGLVERTEXATTRIB4NUSVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4NusvARB")) == NULL) || r;
+  r = ((glVertexAttrib4bvARB = (PFNGLVERTEXATTRIB4BVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4bvARB")) == NULL) || r;
+  r = ((glVertexAttrib4dARB = (PFNGLVERTEXATTRIB4DARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4dARB")) == NULL) || r;
+  r = ((glVertexAttrib4dvARB = (PFNGLVERTEXATTRIB4DVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4dvARB")) == NULL) || r;
+  r = ((glVertexAttrib4fARB = (PFNGLVERTEXATTRIB4FARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4fARB")) == NULL) || r;
+  r = ((glVertexAttrib4fvARB = (PFNGLVERTEXATTRIB4FVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4fvARB")) == NULL) || r;
+  r = ((glVertexAttrib4ivARB = (PFNGLVERTEXATTRIB4IVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4ivARB")) == NULL) || r;
+  r = ((glVertexAttrib4sARB = (PFNGLVERTEXATTRIB4SARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4sARB")) == NULL) || r;
+  r = ((glVertexAttrib4svARB = (PFNGLVERTEXATTRIB4SVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4svARB")) == NULL) || r;
+  r = ((glVertexAttrib4ubvARB = (PFNGLVERTEXATTRIB4UBVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4ubvARB")) == NULL) || r;
+  r = ((glVertexAttrib4uivARB = (PFNGLVERTEXATTRIB4UIVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4uivARB")) == NULL) || r;
+  r = ((glVertexAttrib4usvARB = (PFNGLVERTEXATTRIB4USVARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4usvARB")) == NULL) || r;
+  r = ((glVertexAttribPointerARB = (PFNGLVERTEXATTRIBPOINTERARBPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribPointerARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_vertex_program */
+
+#ifdef GL_ARB_vertex_shader
+
+static GLboolean _glewInit_GL_ARB_vertex_shader (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBindAttribLocationARB = (PFNGLBINDATTRIBLOCATIONARBPROC)glewGetProcAddress((const GLubyte*)"glBindAttribLocationARB")) == NULL) || r;
+  r = ((glGetActiveAttribARB = (PFNGLGETACTIVEATTRIBARBPROC)glewGetProcAddress((const GLubyte*)"glGetActiveAttribARB")) == NULL) || r;
+  r = ((glGetAttribLocationARB = (PFNGLGETATTRIBLOCATIONARBPROC)glewGetProcAddress((const GLubyte*)"glGetAttribLocationARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_vertex_shader */
+
+#ifdef GL_ARB_window_pos
+
+static GLboolean _glewInit_GL_ARB_window_pos (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glWindowPos2dARB = (PFNGLWINDOWPOS2DARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2dARB")) == NULL) || r;
+  r = ((glWindowPos2dvARB = (PFNGLWINDOWPOS2DVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2dvARB")) == NULL) || r;
+  r = ((glWindowPos2fARB = (PFNGLWINDOWPOS2FARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2fARB")) == NULL) || r;
+  r = ((glWindowPos2fvARB = (PFNGLWINDOWPOS2FVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2fvARB")) == NULL) || r;
+  r = ((glWindowPos2iARB = (PFNGLWINDOWPOS2IARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2iARB")) == NULL) || r;
+  r = ((glWindowPos2ivARB = (PFNGLWINDOWPOS2IVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2ivARB")) == NULL) || r;
+  r = ((glWindowPos2sARB = (PFNGLWINDOWPOS2SARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2sARB")) == NULL) || r;
+  r = ((glWindowPos2svARB = (PFNGLWINDOWPOS2SVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2svARB")) == NULL) || r;
+  r = ((glWindowPos3dARB = (PFNGLWINDOWPOS3DARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3dARB")) == NULL) || r;
+  r = ((glWindowPos3dvARB = (PFNGLWINDOWPOS3DVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3dvARB")) == NULL) || r;
+  r = ((glWindowPos3fARB = (PFNGLWINDOWPOS3FARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3fARB")) == NULL) || r;
+  r = ((glWindowPos3fvARB = (PFNGLWINDOWPOS3FVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3fvARB")) == NULL) || r;
+  r = ((glWindowPos3iARB = (PFNGLWINDOWPOS3IARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3iARB")) == NULL) || r;
+  r = ((glWindowPos3ivARB = (PFNGLWINDOWPOS3IVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3ivARB")) == NULL) || r;
+  r = ((glWindowPos3sARB = (PFNGLWINDOWPOS3SARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3sARB")) == NULL) || r;
+  r = ((glWindowPos3svARB = (PFNGLWINDOWPOS3SVARBPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3svARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ARB_window_pos */
+
+#ifdef GL_ATIX_point_sprites
+
+#endif /* GL_ATIX_point_sprites */
+
+#ifdef GL_ATIX_texture_env_combine3
+
+#endif /* GL_ATIX_texture_env_combine3 */
+
+#ifdef GL_ATIX_texture_env_route
+
+#endif /* GL_ATIX_texture_env_route */
+
+#ifdef GL_ATIX_vertex_shader_output_point_size
+
+#endif /* GL_ATIX_vertex_shader_output_point_size */
+
+#ifdef GL_ATI_draw_buffers
+
+static GLboolean _glewInit_GL_ATI_draw_buffers (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDrawBuffersATI = (PFNGLDRAWBUFFERSATIPROC)glewGetProcAddress((const GLubyte*)"glDrawBuffersATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_draw_buffers */
+
+#ifdef GL_ATI_element_array
+
+static GLboolean _glewInit_GL_ATI_element_array (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDrawElementArrayATI = (PFNGLDRAWELEMENTARRAYATIPROC)glewGetProcAddress((const GLubyte*)"glDrawElementArrayATI")) == NULL) || r;
+  r = ((glDrawRangeElementArrayATI = (PFNGLDRAWRANGEELEMENTARRAYATIPROC)glewGetProcAddress((const GLubyte*)"glDrawRangeElementArrayATI")) == NULL) || r;
+  r = ((glElementPointerATI = (PFNGLELEMENTPOINTERATIPROC)glewGetProcAddress((const GLubyte*)"glElementPointerATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_element_array */
+
+#ifdef GL_ATI_envmap_bumpmap
+
+static GLboolean _glewInit_GL_ATI_envmap_bumpmap (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetTexBumpParameterfvATI = (PFNGLGETTEXBUMPPARAMETERFVATIPROC)glewGetProcAddress((const GLubyte*)"glGetTexBumpParameterfvATI")) == NULL) || r;
+  r = ((glGetTexBumpParameterivATI = (PFNGLGETTEXBUMPPARAMETERIVATIPROC)glewGetProcAddress((const GLubyte*)"glGetTexBumpParameterivATI")) == NULL) || r;
+  r = ((glTexBumpParameterfvATI = (PFNGLTEXBUMPPARAMETERFVATIPROC)glewGetProcAddress((const GLubyte*)"glTexBumpParameterfvATI")) == NULL) || r;
+  r = ((glTexBumpParameterivATI = (PFNGLTEXBUMPPARAMETERIVATIPROC)glewGetProcAddress((const GLubyte*)"glTexBumpParameterivATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_envmap_bumpmap */
+
+#ifdef GL_ATI_fragment_shader
+
+static GLboolean _glewInit_GL_ATI_fragment_shader (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glAlphaFragmentOp1ATI = (PFNGLALPHAFRAGMENTOP1ATIPROC)glewGetProcAddress((const GLubyte*)"glAlphaFragmentOp1ATI")) == NULL) || r;
+  r = ((glAlphaFragmentOp2ATI = (PFNGLALPHAFRAGMENTOP2ATIPROC)glewGetProcAddress((const GLubyte*)"glAlphaFragmentOp2ATI")) == NULL) || r;
+  r = ((glAlphaFragmentOp3ATI = (PFNGLALPHAFRAGMENTOP3ATIPROC)glewGetProcAddress((const GLubyte*)"glAlphaFragmentOp3ATI")) == NULL) || r;
+  r = ((glBeginFragmentShaderATI = (PFNGLBEGINFRAGMENTSHADERATIPROC)glewGetProcAddress((const GLubyte*)"glBeginFragmentShaderATI")) == NULL) || r;
+  r = ((glBindFragmentShaderATI = (PFNGLBINDFRAGMENTSHADERATIPROC)glewGetProcAddress((const GLubyte*)"glBindFragmentShaderATI")) == NULL) || r;
+  r = ((glColorFragmentOp1ATI = (PFNGLCOLORFRAGMENTOP1ATIPROC)glewGetProcAddress((const GLubyte*)"glColorFragmentOp1ATI")) == NULL) || r;
+  r = ((glColorFragmentOp2ATI = (PFNGLCOLORFRAGMENTOP2ATIPROC)glewGetProcAddress((const GLubyte*)"glColorFragmentOp2ATI")) == NULL) || r;
+  r = ((glColorFragmentOp3ATI = (PFNGLCOLORFRAGMENTOP3ATIPROC)glewGetProcAddress((const GLubyte*)"glColorFragmentOp3ATI")) == NULL) || r;
+  r = ((glDeleteFragmentShaderATI = (PFNGLDELETEFRAGMENTSHADERATIPROC)glewGetProcAddress((const GLubyte*)"glDeleteFragmentShaderATI")) == NULL) || r;
+  r = ((glEndFragmentShaderATI = (PFNGLENDFRAGMENTSHADERATIPROC)glewGetProcAddress((const GLubyte*)"glEndFragmentShaderATI")) == NULL) || r;
+  r = ((glGenFragmentShadersATI = (PFNGLGENFRAGMENTSHADERSATIPROC)glewGetProcAddress((const GLubyte*)"glGenFragmentShadersATI")) == NULL) || r;
+  r = ((glPassTexCoordATI = (PFNGLPASSTEXCOORDATIPROC)glewGetProcAddress((const GLubyte*)"glPassTexCoordATI")) == NULL) || r;
+  r = ((glSampleMapATI = (PFNGLSAMPLEMAPATIPROC)glewGetProcAddress((const GLubyte*)"glSampleMapATI")) == NULL) || r;
+  r = ((glSetFragmentShaderConstantATI = (PFNGLSETFRAGMENTSHADERCONSTANTATIPROC)glewGetProcAddress((const GLubyte*)"glSetFragmentShaderConstantATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_fragment_shader */
+
+#ifdef GL_ATI_map_object_buffer
+
+static GLboolean _glewInit_GL_ATI_map_object_buffer (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glMapObjectBufferATI = (PFNGLMAPOBJECTBUFFERATIPROC)glewGetProcAddress((const GLubyte*)"glMapObjectBufferATI")) == NULL) || r;
+  r = ((glUnmapObjectBufferATI = (PFNGLUNMAPOBJECTBUFFERATIPROC)glewGetProcAddress((const GLubyte*)"glUnmapObjectBufferATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_map_object_buffer */
+
+#ifdef GL_ATI_pn_triangles
+
+static GLboolean _glewInit_GL_ATI_pn_triangles (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glPNTrianglesfATI = (PFNGLPNTRIANGLESFATIPROC)glewGetProcAddress((const GLubyte*)"glPNTrianglesfATI")) == NULL) || r;
+  r = ((glPNTrianglesiATI = (PFNGLPNTRIANGLESIATIPROC)glewGetProcAddress((const GLubyte*)"glPNTrianglesiATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_pn_triangles */
+
+#ifdef GL_ATI_separate_stencil
+
+static GLboolean _glewInit_GL_ATI_separate_stencil (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glStencilFuncSeparateATI = (PFNGLSTENCILFUNCSEPARATEATIPROC)glewGetProcAddress((const GLubyte*)"glStencilFuncSeparateATI")) == NULL) || r;
+  r = ((glStencilOpSeparateATI = (PFNGLSTENCILOPSEPARATEATIPROC)glewGetProcAddress((const GLubyte*)"glStencilOpSeparateATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_separate_stencil */
+
+#ifdef GL_ATI_shader_texture_lod
+
+#endif /* GL_ATI_shader_texture_lod */
+
+#ifdef GL_ATI_text_fragment_shader
+
+#endif /* GL_ATI_text_fragment_shader */
+
+#ifdef GL_ATI_texture_compression_3dc
+
+#endif /* GL_ATI_texture_compression_3dc */
+
+#ifdef GL_ATI_texture_env_combine3
+
+#endif /* GL_ATI_texture_env_combine3 */
+
+#ifdef GL_ATI_texture_float
+
+#endif /* GL_ATI_texture_float */
+
+#ifdef GL_ATI_texture_mirror_once
+
+#endif /* GL_ATI_texture_mirror_once */
+
+#ifdef GL_ATI_vertex_array_object
+
+static GLboolean _glewInit_GL_ATI_vertex_array_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glArrayObjectATI = (PFNGLARRAYOBJECTATIPROC)glewGetProcAddress((const GLubyte*)"glArrayObjectATI")) == NULL) || r;
+  r = ((glFreeObjectBufferATI = (PFNGLFREEOBJECTBUFFERATIPROC)glewGetProcAddress((const GLubyte*)"glFreeObjectBufferATI")) == NULL) || r;
+  r = ((glGetArrayObjectfvATI = (PFNGLGETARRAYOBJECTFVATIPROC)glewGetProcAddress((const GLubyte*)"glGetArrayObjectfvATI")) == NULL) || r;
+  r = ((glGetArrayObjectivATI = (PFNGLGETARRAYOBJECTIVATIPROC)glewGetProcAddress((const GLubyte*)"glGetArrayObjectivATI")) == NULL) || r;
+  r = ((glGetObjectBufferfvATI = (PFNGLGETOBJECTBUFFERFVATIPROC)glewGetProcAddress((const GLubyte*)"glGetObjectBufferfvATI")) == NULL) || r;
+  r = ((glGetObjectBufferivATI = (PFNGLGETOBJECTBUFFERIVATIPROC)glewGetProcAddress((const GLubyte*)"glGetObjectBufferivATI")) == NULL) || r;
+  r = ((glGetVariantArrayObjectfvATI = (PFNGLGETVARIANTARRAYOBJECTFVATIPROC)glewGetProcAddress((const GLubyte*)"glGetVariantArrayObjectfvATI")) == NULL) || r;
+  r = ((glGetVariantArrayObjectivATI = (PFNGLGETVARIANTARRAYOBJECTIVATIPROC)glewGetProcAddress((const GLubyte*)"glGetVariantArrayObjectivATI")) == NULL) || r;
+  r = ((glIsObjectBufferATI = (PFNGLISOBJECTBUFFERATIPROC)glewGetProcAddress((const GLubyte*)"glIsObjectBufferATI")) == NULL) || r;
+  r = ((glNewObjectBufferATI = (PFNGLNEWOBJECTBUFFERATIPROC)glewGetProcAddress((const GLubyte*)"glNewObjectBufferATI")) == NULL) || r;
+  r = ((glUpdateObjectBufferATI = (PFNGLUPDATEOBJECTBUFFERATIPROC)glewGetProcAddress((const GLubyte*)"glUpdateObjectBufferATI")) == NULL) || r;
+  r = ((glVariantArrayObjectATI = (PFNGLVARIANTARRAYOBJECTATIPROC)glewGetProcAddress((const GLubyte*)"glVariantArrayObjectATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_vertex_array_object */
+
+#ifdef GL_ATI_vertex_attrib_array_object
+
+static GLboolean _glewInit_GL_ATI_vertex_attrib_array_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetVertexAttribArrayObjectfvATI = (PFNGLGETVERTEXATTRIBARRAYOBJECTFVATIPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribArrayObjectfvATI")) == NULL) || r;
+  r = ((glGetVertexAttribArrayObjectivATI = (PFNGLGETVERTEXATTRIBARRAYOBJECTIVATIPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribArrayObjectivATI")) == NULL) || r;
+  r = ((glVertexAttribArrayObjectATI = (PFNGLVERTEXATTRIBARRAYOBJECTATIPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribArrayObjectATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_vertex_attrib_array_object */
+
+#ifdef GL_ATI_vertex_streams
+
+static GLboolean _glewInit_GL_ATI_vertex_streams (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glClientActiveVertexStreamATI = (PFNGLCLIENTACTIVEVERTEXSTREAMATIPROC)glewGetProcAddress((const GLubyte*)"glClientActiveVertexStreamATI")) == NULL) || r;
+  r = ((glNormalStream3bATI = (PFNGLNORMALSTREAM3BATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3bATI")) == NULL) || r;
+  r = ((glNormalStream3bvATI = (PFNGLNORMALSTREAM3BVATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3bvATI")) == NULL) || r;
+  r = ((glNormalStream3dATI = (PFNGLNORMALSTREAM3DATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3dATI")) == NULL) || r;
+  r = ((glNormalStream3dvATI = (PFNGLNORMALSTREAM3DVATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3dvATI")) == NULL) || r;
+  r = ((glNormalStream3fATI = (PFNGLNORMALSTREAM3FATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3fATI")) == NULL) || r;
+  r = ((glNormalStream3fvATI = (PFNGLNORMALSTREAM3FVATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3fvATI")) == NULL) || r;
+  r = ((glNormalStream3iATI = (PFNGLNORMALSTREAM3IATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3iATI")) == NULL) || r;
+  r = ((glNormalStream3ivATI = (PFNGLNORMALSTREAM3IVATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3ivATI")) == NULL) || r;
+  r = ((glNormalStream3sATI = (PFNGLNORMALSTREAM3SATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3sATI")) == NULL) || r;
+  r = ((glNormalStream3svATI = (PFNGLNORMALSTREAM3SVATIPROC)glewGetProcAddress((const GLubyte*)"glNormalStream3svATI")) == NULL) || r;
+  r = ((glVertexBlendEnvfATI = (PFNGLVERTEXBLENDENVFATIPROC)glewGetProcAddress((const GLubyte*)"glVertexBlendEnvfATI")) == NULL) || r;
+  r = ((glVertexBlendEnviATI = (PFNGLVERTEXBLENDENVIATIPROC)glewGetProcAddress((const GLubyte*)"glVertexBlendEnviATI")) == NULL) || r;
+  r = ((glVertexStream2dATI = (PFNGLVERTEXSTREAM2DATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2dATI")) == NULL) || r;
+  r = ((glVertexStream2dvATI = (PFNGLVERTEXSTREAM2DVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2dvATI")) == NULL) || r;
+  r = ((glVertexStream2fATI = (PFNGLVERTEXSTREAM2FATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2fATI")) == NULL) || r;
+  r = ((glVertexStream2fvATI = (PFNGLVERTEXSTREAM2FVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2fvATI")) == NULL) || r;
+  r = ((glVertexStream2iATI = (PFNGLVERTEXSTREAM2IATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2iATI")) == NULL) || r;
+  r = ((glVertexStream2ivATI = (PFNGLVERTEXSTREAM2IVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2ivATI")) == NULL) || r;
+  r = ((glVertexStream2sATI = (PFNGLVERTEXSTREAM2SATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2sATI")) == NULL) || r;
+  r = ((glVertexStream2svATI = (PFNGLVERTEXSTREAM2SVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream2svATI")) == NULL) || r;
+  r = ((glVertexStream3dATI = (PFNGLVERTEXSTREAM3DATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3dATI")) == NULL) || r;
+  r = ((glVertexStream3dvATI = (PFNGLVERTEXSTREAM3DVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3dvATI")) == NULL) || r;
+  r = ((glVertexStream3fATI = (PFNGLVERTEXSTREAM3FATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3fATI")) == NULL) || r;
+  r = ((glVertexStream3fvATI = (PFNGLVERTEXSTREAM3FVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3fvATI")) == NULL) || r;
+  r = ((glVertexStream3iATI = (PFNGLVERTEXSTREAM3IATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3iATI")) == NULL) || r;
+  r = ((glVertexStream3ivATI = (PFNGLVERTEXSTREAM3IVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3ivATI")) == NULL) || r;
+  r = ((glVertexStream3sATI = (PFNGLVERTEXSTREAM3SATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3sATI")) == NULL) || r;
+  r = ((glVertexStream3svATI = (PFNGLVERTEXSTREAM3SVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream3svATI")) == NULL) || r;
+  r = ((glVertexStream4dATI = (PFNGLVERTEXSTREAM4DATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4dATI")) == NULL) || r;
+  r = ((glVertexStream4dvATI = (PFNGLVERTEXSTREAM4DVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4dvATI")) == NULL) || r;
+  r = ((glVertexStream4fATI = (PFNGLVERTEXSTREAM4FATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4fATI")) == NULL) || r;
+  r = ((glVertexStream4fvATI = (PFNGLVERTEXSTREAM4FVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4fvATI")) == NULL) || r;
+  r = ((glVertexStream4iATI = (PFNGLVERTEXSTREAM4IATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4iATI")) == NULL) || r;
+  r = ((glVertexStream4ivATI = (PFNGLVERTEXSTREAM4IVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4ivATI")) == NULL) || r;
+  r = ((glVertexStream4sATI = (PFNGLVERTEXSTREAM4SATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4sATI")) == NULL) || r;
+  r = ((glVertexStream4svATI = (PFNGLVERTEXSTREAM4SVATIPROC)glewGetProcAddress((const GLubyte*)"glVertexStream4svATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_ATI_vertex_streams */
+
+#ifdef GL_EXT_422_pixels
+
+#endif /* GL_EXT_422_pixels */
+
+#ifdef GL_EXT_Cg_shader
+
+#endif /* GL_EXT_Cg_shader */
+
+#ifdef GL_EXT_abgr
+
+#endif /* GL_EXT_abgr */
+
+#ifdef GL_EXT_bgra
+
+#endif /* GL_EXT_bgra */
+
+#ifdef GL_EXT_bindable_uniform
+
+static GLboolean _glewInit_GL_EXT_bindable_uniform (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetUniformBufferSizeEXT = (PFNGLGETUNIFORMBUFFERSIZEEXTPROC)glewGetProcAddress((const GLubyte*)"glGetUniformBufferSizeEXT")) == NULL) || r;
+  r = ((glGetUniformOffsetEXT = (PFNGLGETUNIFORMOFFSETEXTPROC)glewGetProcAddress((const GLubyte*)"glGetUniformOffsetEXT")) == NULL) || r;
+  r = ((glUniformBufferEXT = (PFNGLUNIFORMBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glUniformBufferEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_bindable_uniform */
+
+#ifdef GL_EXT_blend_color
+
+static GLboolean _glewInit_GL_EXT_blend_color (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBlendColorEXT = (PFNGLBLENDCOLOREXTPROC)glewGetProcAddress((const GLubyte*)"glBlendColorEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_blend_color */
+
+#ifdef GL_EXT_blend_equation_separate
+
+static GLboolean _glewInit_GL_EXT_blend_equation_separate (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBlendEquationSeparateEXT = (PFNGLBLENDEQUATIONSEPARATEEXTPROC)glewGetProcAddress((const GLubyte*)"glBlendEquationSeparateEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_blend_equation_separate */
+
+#ifdef GL_EXT_blend_func_separate
+
+static GLboolean _glewInit_GL_EXT_blend_func_separate (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBlendFuncSeparateEXT = (PFNGLBLENDFUNCSEPARATEEXTPROC)glewGetProcAddress((const GLubyte*)"glBlendFuncSeparateEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_blend_func_separate */
+
+#ifdef GL_EXT_blend_logic_op
+
+#endif /* GL_EXT_blend_logic_op */
+
+#ifdef GL_EXT_blend_minmax
+
+static GLboolean _glewInit_GL_EXT_blend_minmax (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBlendEquationEXT = (PFNGLBLENDEQUATIONEXTPROC)glewGetProcAddress((const GLubyte*)"glBlendEquationEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_blend_minmax */
+
+#ifdef GL_EXT_blend_subtract
+
+#endif /* GL_EXT_blend_subtract */
+
+#ifdef GL_EXT_clip_volume_hint
+
+#endif /* GL_EXT_clip_volume_hint */
+
+#ifdef GL_EXT_cmyka
+
+#endif /* GL_EXT_cmyka */
+
+#ifdef GL_EXT_color_subtable
+
+static GLboolean _glewInit_GL_EXT_color_subtable (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColorSubTableEXT = (PFNGLCOLORSUBTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"glColorSubTableEXT")) == NULL) || r;
+  r = ((glCopyColorSubTableEXT = (PFNGLCOPYCOLORSUBTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyColorSubTableEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_color_subtable */
+
+#ifdef GL_EXT_compiled_vertex_array
+
+static GLboolean _glewInit_GL_EXT_compiled_vertex_array (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glLockArraysEXT = (PFNGLLOCKARRAYSEXTPROC)glewGetProcAddress((const GLubyte*)"glLockArraysEXT")) == NULL) || r;
+  r = ((glUnlockArraysEXT = (PFNGLUNLOCKARRAYSEXTPROC)glewGetProcAddress((const GLubyte*)"glUnlockArraysEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_compiled_vertex_array */
+
+#ifdef GL_EXT_convolution
+
+static GLboolean _glewInit_GL_EXT_convolution (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glConvolutionFilter1DEXT = (PFNGLCONVOLUTIONFILTER1DEXTPROC)glewGetProcAddress((const GLubyte*)"glConvolutionFilter1DEXT")) == NULL) || r;
+  r = ((glConvolutionFilter2DEXT = (PFNGLCONVOLUTIONFILTER2DEXTPROC)glewGetProcAddress((const GLubyte*)"glConvolutionFilter2DEXT")) == NULL) || r;
+  r = ((glConvolutionParameterfEXT = (PFNGLCONVOLUTIONPARAMETERFEXTPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameterfEXT")) == NULL) || r;
+  r = ((glConvolutionParameterfvEXT = (PFNGLCONVOLUTIONPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameterfvEXT")) == NULL) || r;
+  r = ((glConvolutionParameteriEXT = (PFNGLCONVOLUTIONPARAMETERIEXTPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameteriEXT")) == NULL) || r;
+  r = ((glConvolutionParameterivEXT = (PFNGLCONVOLUTIONPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glConvolutionParameterivEXT")) == NULL) || r;
+  r = ((glCopyConvolutionFilter1DEXT = (PFNGLCOPYCONVOLUTIONFILTER1DEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyConvolutionFilter1DEXT")) == NULL) || r;
+  r = ((glCopyConvolutionFilter2DEXT = (PFNGLCOPYCONVOLUTIONFILTER2DEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyConvolutionFilter2DEXT")) == NULL) || r;
+  r = ((glGetConvolutionFilterEXT = (PFNGLGETCONVOLUTIONFILTEREXTPROC)glewGetProcAddress((const GLubyte*)"glGetConvolutionFilterEXT")) == NULL) || r;
+  r = ((glGetConvolutionParameterfvEXT = (PFNGLGETCONVOLUTIONPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetConvolutionParameterfvEXT")) == NULL) || r;
+  r = ((glGetConvolutionParameterivEXT = (PFNGLGETCONVOLUTIONPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetConvolutionParameterivEXT")) == NULL) || r;
+  r = ((glGetSeparableFilterEXT = (PFNGLGETSEPARABLEFILTEREXTPROC)glewGetProcAddress((const GLubyte*)"glGetSeparableFilterEXT")) == NULL) || r;
+  r = ((glSeparableFilter2DEXT = (PFNGLSEPARABLEFILTER2DEXTPROC)glewGetProcAddress((const GLubyte*)"glSeparableFilter2DEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_convolution */
+
+#ifdef GL_EXT_coordinate_frame
+
+static GLboolean _glewInit_GL_EXT_coordinate_frame (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBinormalPointerEXT = (PFNGLBINORMALPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glBinormalPointerEXT")) == NULL) || r;
+  r = ((glTangentPointerEXT = (PFNGLTANGENTPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glTangentPointerEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_coordinate_frame */
+
+#ifdef GL_EXT_copy_texture
+
+static GLboolean _glewInit_GL_EXT_copy_texture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glCopyTexImage1DEXT = (PFNGLCOPYTEXIMAGE1DEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyTexImage1DEXT")) == NULL) || r;
+  r = ((glCopyTexImage2DEXT = (PFNGLCOPYTEXIMAGE2DEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyTexImage2DEXT")) == NULL) || r;
+  r = ((glCopyTexSubImage1DEXT = (PFNGLCOPYTEXSUBIMAGE1DEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyTexSubImage1DEXT")) == NULL) || r;
+  r = ((glCopyTexSubImage2DEXT = (PFNGLCOPYTEXSUBIMAGE2DEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyTexSubImage2DEXT")) == NULL) || r;
+  r = ((glCopyTexSubImage3DEXT = (PFNGLCOPYTEXSUBIMAGE3DEXTPROC)glewGetProcAddress((const GLubyte*)"glCopyTexSubImage3DEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_copy_texture */
+
+#ifdef GL_EXT_cull_vertex
+
+static GLboolean _glewInit_GL_EXT_cull_vertex (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glCullParameterdvEXT = (PFNGLCULLPARAMETERDVEXTPROC)glewGetProcAddress((const GLubyte*)"glCullParameterdvEXT")) == NULL) || r;
+  r = ((glCullParameterfvEXT = (PFNGLCULLPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glCullParameterfvEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_cull_vertex */
+
+#ifdef GL_EXT_depth_bounds_test
+
+static GLboolean _glewInit_GL_EXT_depth_bounds_test (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDepthBoundsEXT = (PFNGLDEPTHBOUNDSEXTPROC)glewGetProcAddress((const GLubyte*)"glDepthBoundsEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_depth_bounds_test */
+
+#ifdef GL_EXT_draw_buffers2
+
+static GLboolean _glewInit_GL_EXT_draw_buffers2 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColorMaskIndexedEXT = (PFNGLCOLORMASKINDEXEDEXTPROC)glewGetProcAddress((const GLubyte*)"glColorMaskIndexedEXT")) == NULL) || r;
+  r = ((glDisableIndexedEXT = (PFNGLDISABLEINDEXEDEXTPROC)glewGetProcAddress((const GLubyte*)"glDisableIndexedEXT")) == NULL) || r;
+  r = ((glEnableIndexedEXT = (PFNGLENABLEINDEXEDEXTPROC)glewGetProcAddress((const GLubyte*)"glEnableIndexedEXT")) == NULL) || r;
+  r = ((glGetBooleanIndexedvEXT = (PFNGLGETBOOLEANINDEXEDVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetBooleanIndexedvEXT")) == NULL) || r;
+  r = ((glGetIntegerIndexedvEXT = (PFNGLGETINTEGERINDEXEDVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetIntegerIndexedvEXT")) == NULL) || r;
+  r = ((glIsEnabledIndexedEXT = (PFNGLISENABLEDINDEXEDEXTPROC)glewGetProcAddress((const GLubyte*)"glIsEnabledIndexedEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_draw_buffers2 */
+
+#ifdef GL_EXT_draw_instanced
+
+static GLboolean _glewInit_GL_EXT_draw_instanced (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDrawArraysInstancedEXT = (PFNGLDRAWARRAYSINSTANCEDEXTPROC)glewGetProcAddress((const GLubyte*)"glDrawArraysInstancedEXT")) == NULL) || r;
+  r = ((glDrawElementsInstancedEXT = (PFNGLDRAWELEMENTSINSTANCEDEXTPROC)glewGetProcAddress((const GLubyte*)"glDrawElementsInstancedEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_draw_instanced */
+
+#ifdef GL_EXT_draw_range_elements
+
+static GLboolean _glewInit_GL_EXT_draw_range_elements (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDrawRangeElementsEXT = (PFNGLDRAWRANGEELEMENTSEXTPROC)glewGetProcAddress((const GLubyte*)"glDrawRangeElementsEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_draw_range_elements */
+
+#ifdef GL_EXT_fog_coord
+
+static GLboolean _glewInit_GL_EXT_fog_coord (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFogCoordPointerEXT = (PFNGLFOGCOORDPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glFogCoordPointerEXT")) == NULL) || r;
+  r = ((glFogCoorddEXT = (PFNGLFOGCOORDDEXTPROC)glewGetProcAddress((const GLubyte*)"glFogCoorddEXT")) == NULL) || r;
+  r = ((glFogCoorddvEXT = (PFNGLFOGCOORDDVEXTPROC)glewGetProcAddress((const GLubyte*)"glFogCoorddvEXT")) == NULL) || r;
+  r = ((glFogCoordfEXT = (PFNGLFOGCOORDFEXTPROC)glewGetProcAddress((const GLubyte*)"glFogCoordfEXT")) == NULL) || r;
+  r = ((glFogCoordfvEXT = (PFNGLFOGCOORDFVEXTPROC)glewGetProcAddress((const GLubyte*)"glFogCoordfvEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_fog_coord */
+
+#ifdef GL_EXT_fragment_lighting
+
+static GLboolean _glewInit_GL_EXT_fragment_lighting (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFragmentColorMaterialEXT = (PFNGLFRAGMENTCOLORMATERIALEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentColorMaterialEXT")) == NULL) || r;
+  r = ((glFragmentLightModelfEXT = (PFNGLFRAGMENTLIGHTMODELFEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModelfEXT")) == NULL) || r;
+  r = ((glFragmentLightModelfvEXT = (PFNGLFRAGMENTLIGHTMODELFVEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModelfvEXT")) == NULL) || r;
+  r = ((glFragmentLightModeliEXT = (PFNGLFRAGMENTLIGHTMODELIEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModeliEXT")) == NULL) || r;
+  r = ((glFragmentLightModelivEXT = (PFNGLFRAGMENTLIGHTMODELIVEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModelivEXT")) == NULL) || r;
+  r = ((glFragmentLightfEXT = (PFNGLFRAGMENTLIGHTFEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightfEXT")) == NULL) || r;
+  r = ((glFragmentLightfvEXT = (PFNGLFRAGMENTLIGHTFVEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightfvEXT")) == NULL) || r;
+  r = ((glFragmentLightiEXT = (PFNGLFRAGMENTLIGHTIEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightiEXT")) == NULL) || r;
+  r = ((glFragmentLightivEXT = (PFNGLFRAGMENTLIGHTIVEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightivEXT")) == NULL) || r;
+  r = ((glFragmentMaterialfEXT = (PFNGLFRAGMENTMATERIALFEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialfEXT")) == NULL) || r;
+  r = ((glFragmentMaterialfvEXT = (PFNGLFRAGMENTMATERIALFVEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialfvEXT")) == NULL) || r;
+  r = ((glFragmentMaterialiEXT = (PFNGLFRAGMENTMATERIALIEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialiEXT")) == NULL) || r;
+  r = ((glFragmentMaterialivEXT = (PFNGLFRAGMENTMATERIALIVEXTPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialivEXT")) == NULL) || r;
+  r = ((glGetFragmentLightfvEXT = (PFNGLGETFRAGMENTLIGHTFVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentLightfvEXT")) == NULL) || r;
+  r = ((glGetFragmentLightivEXT = (PFNGLGETFRAGMENTLIGHTIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentLightivEXT")) == NULL) || r;
+  r = ((glGetFragmentMaterialfvEXT = (PFNGLGETFRAGMENTMATERIALFVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentMaterialfvEXT")) == NULL) || r;
+  r = ((glGetFragmentMaterialivEXT = (PFNGLGETFRAGMENTMATERIALIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentMaterialivEXT")) == NULL) || r;
+  r = ((glLightEnviEXT = (PFNGLLIGHTENVIEXTPROC)glewGetProcAddress((const GLubyte*)"glLightEnviEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_fragment_lighting */
+
+#ifdef GL_EXT_framebuffer_blit
+
+static GLboolean _glewInit_GL_EXT_framebuffer_blit (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBlitFramebufferEXT = (PFNGLBLITFRAMEBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glBlitFramebufferEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_framebuffer_blit */
+
+#ifdef GL_EXT_framebuffer_multisample
+
+static GLboolean _glewInit_GL_EXT_framebuffer_multisample (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glRenderbufferStorageMultisampleEXT = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC)glewGetProcAddress((const GLubyte*)"glRenderbufferStorageMultisampleEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_framebuffer_multisample */
+
+#ifdef GL_EXT_framebuffer_object
+
+static GLboolean _glewInit_GL_EXT_framebuffer_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBindFramebufferEXT = (PFNGLBINDFRAMEBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindFramebufferEXT")) == NULL) || r;
+  r = ((glBindRenderbufferEXT = (PFNGLBINDRENDERBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindRenderbufferEXT")) == NULL) || r;
+  r = ((glCheckFramebufferStatusEXT = (PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC)glewGetProcAddress((const GLubyte*)"glCheckFramebufferStatusEXT")) == NULL) || r;
+  r = ((glDeleteFramebuffersEXT = (PFNGLDELETEFRAMEBUFFERSEXTPROC)glewGetProcAddress((const GLubyte*)"glDeleteFramebuffersEXT")) == NULL) || r;
+  r = ((glDeleteRenderbuffersEXT = (PFNGLDELETERENDERBUFFERSEXTPROC)glewGetProcAddress((const GLubyte*)"glDeleteRenderbuffersEXT")) == NULL) || r;
+  r = ((glFramebufferRenderbufferEXT = (PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glFramebufferRenderbufferEXT")) == NULL) || r;
+  r = ((glFramebufferTexture1DEXT = (PFNGLFRAMEBUFFERTEXTURE1DEXTPROC)glewGetProcAddress((const GLubyte*)"glFramebufferTexture1DEXT")) == NULL) || r;
+  r = ((glFramebufferTexture2DEXT = (PFNGLFRAMEBUFFERTEXTURE2DEXTPROC)glewGetProcAddress((const GLubyte*)"glFramebufferTexture2DEXT")) == NULL) || r;
+  r = ((glFramebufferTexture3DEXT = (PFNGLFRAMEBUFFERTEXTURE3DEXTPROC)glewGetProcAddress((const GLubyte*)"glFramebufferTexture3DEXT")) == NULL) || r;
+  r = ((glGenFramebuffersEXT = (PFNGLGENFRAMEBUFFERSEXTPROC)glewGetProcAddress((const GLubyte*)"glGenFramebuffersEXT")) == NULL) || r;
+  r = ((glGenRenderbuffersEXT = (PFNGLGENRENDERBUFFERSEXTPROC)glewGetProcAddress((const GLubyte*)"glGenRenderbuffersEXT")) == NULL) || r;
+  r = ((glGenerateMipmapEXT = (PFNGLGENERATEMIPMAPEXTPROC)glewGetProcAddress((const GLubyte*)"glGenerateMipmapEXT")) == NULL) || r;
+  r = ((glGetFramebufferAttachmentParameterivEXT = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetFramebufferAttachmentParameterivEXT")) == NULL) || r;
+  r = ((glGetRenderbufferParameterivEXT = (PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetRenderbufferParameterivEXT")) == NULL) || r;
+  r = ((glIsFramebufferEXT = (PFNGLISFRAMEBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glIsFramebufferEXT")) == NULL) || r;
+  r = ((glIsRenderbufferEXT = (PFNGLISRENDERBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glIsRenderbufferEXT")) == NULL) || r;
+  r = ((glRenderbufferStorageEXT = (PFNGLRENDERBUFFERSTORAGEEXTPROC)glewGetProcAddress((const GLubyte*)"glRenderbufferStorageEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_framebuffer_object */
+
+#ifdef GL_EXT_framebuffer_sRGB
+
+#endif /* GL_EXT_framebuffer_sRGB */
+
+#ifdef GL_EXT_geometry_shader4
+
+static GLboolean _glewInit_GL_EXT_geometry_shader4 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFramebufferTextureEXT = (PFNGLFRAMEBUFFERTEXTUREEXTPROC)glewGetProcAddress((const GLubyte*)"glFramebufferTextureEXT")) == NULL) || r;
+  r = ((glFramebufferTextureFaceEXT = (PFNGLFRAMEBUFFERTEXTUREFACEEXTPROC)glewGetProcAddress((const GLubyte*)"glFramebufferTextureFaceEXT")) == NULL) || r;
+  r = ((glFramebufferTextureLayerEXT = (PFNGLFRAMEBUFFERTEXTURELAYEREXTPROC)glewGetProcAddress((const GLubyte*)"glFramebufferTextureLayerEXT")) == NULL) || r;
+  r = ((glProgramParameteriEXT = (PFNGLPROGRAMPARAMETERIEXTPROC)glewGetProcAddress((const GLubyte*)"glProgramParameteriEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_geometry_shader4 */
+
+#ifdef GL_EXT_gpu_program_parameters
+
+static GLboolean _glewInit_GL_EXT_gpu_program_parameters (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glProgramEnvParameters4fvEXT = (PFNGLPROGRAMENVPARAMETERS4FVEXTPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameters4fvEXT")) == NULL) || r;
+  r = ((glProgramLocalParameters4fvEXT = (PFNGLPROGRAMLOCALPARAMETERS4FVEXTPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameters4fvEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_gpu_program_parameters */
+
+#ifdef GL_EXT_gpu_shader4
+
+static GLboolean _glewInit_GL_EXT_gpu_shader4 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBindFragDataLocationEXT = (PFNGLBINDFRAGDATALOCATIONEXTPROC)glewGetProcAddress((const GLubyte*)"glBindFragDataLocationEXT")) == NULL) || r;
+  r = ((glGetFragDataLocationEXT = (PFNGLGETFRAGDATALOCATIONEXTPROC)glewGetProcAddress((const GLubyte*)"glGetFragDataLocationEXT")) == NULL) || r;
+  r = ((glGetUniformuivEXT = (PFNGLGETUNIFORMUIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetUniformuivEXT")) == NULL) || r;
+  r = ((glGetVertexAttribIivEXT = (PFNGLGETVERTEXATTRIBIIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribIivEXT")) == NULL) || r;
+  r = ((glGetVertexAttribIuivEXT = (PFNGLGETVERTEXATTRIBIUIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribIuivEXT")) == NULL) || r;
+  r = ((glUniform1uiEXT = (PFNGLUNIFORM1UIEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform1uiEXT")) == NULL) || r;
+  r = ((glUniform1uivEXT = (PFNGLUNIFORM1UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform1uivEXT")) == NULL) || r;
+  r = ((glUniform2uiEXT = (PFNGLUNIFORM2UIEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform2uiEXT")) == NULL) || r;
+  r = ((glUniform2uivEXT = (PFNGLUNIFORM2UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform2uivEXT")) == NULL) || r;
+  r = ((glUniform3uiEXT = (PFNGLUNIFORM3UIEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform3uiEXT")) == NULL) || r;
+  r = ((glUniform3uivEXT = (PFNGLUNIFORM3UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform3uivEXT")) == NULL) || r;
+  r = ((glUniform4uiEXT = (PFNGLUNIFORM4UIEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform4uiEXT")) == NULL) || r;
+  r = ((glUniform4uivEXT = (PFNGLUNIFORM4UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glUniform4uivEXT")) == NULL) || r;
+  r = ((glVertexAttribI1iEXT = (PFNGLVERTEXATTRIBI1IEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI1iEXT")) == NULL) || r;
+  r = ((glVertexAttribI1ivEXT = (PFNGLVERTEXATTRIBI1IVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI1ivEXT")) == NULL) || r;
+  r = ((glVertexAttribI1uiEXT = (PFNGLVERTEXATTRIBI1UIEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI1uiEXT")) == NULL) || r;
+  r = ((glVertexAttribI1uivEXT = (PFNGLVERTEXATTRIBI1UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI1uivEXT")) == NULL) || r;
+  r = ((glVertexAttribI2iEXT = (PFNGLVERTEXATTRIBI2IEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI2iEXT")) == NULL) || r;
+  r = ((glVertexAttribI2ivEXT = (PFNGLVERTEXATTRIBI2IVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI2ivEXT")) == NULL) || r;
+  r = ((glVertexAttribI2uiEXT = (PFNGLVERTEXATTRIBI2UIEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI2uiEXT")) == NULL) || r;
+  r = ((glVertexAttribI2uivEXT = (PFNGLVERTEXATTRIBI2UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI2uivEXT")) == NULL) || r;
+  r = ((glVertexAttribI3iEXT = (PFNGLVERTEXATTRIBI3IEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI3iEXT")) == NULL) || r;
+  r = ((glVertexAttribI3ivEXT = (PFNGLVERTEXATTRIBI3IVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI3ivEXT")) == NULL) || r;
+  r = ((glVertexAttribI3uiEXT = (PFNGLVERTEXATTRIBI3UIEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI3uiEXT")) == NULL) || r;
+  r = ((glVertexAttribI3uivEXT = (PFNGLVERTEXATTRIBI3UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI3uivEXT")) == NULL) || r;
+  r = ((glVertexAttribI4bvEXT = (PFNGLVERTEXATTRIBI4BVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4bvEXT")) == NULL) || r;
+  r = ((glVertexAttribI4iEXT = (PFNGLVERTEXATTRIBI4IEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4iEXT")) == NULL) || r;
+  r = ((glVertexAttribI4ivEXT = (PFNGLVERTEXATTRIBI4IVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4ivEXT")) == NULL) || r;
+  r = ((glVertexAttribI4svEXT = (PFNGLVERTEXATTRIBI4SVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4svEXT")) == NULL) || r;
+  r = ((glVertexAttribI4ubvEXT = (PFNGLVERTEXATTRIBI4UBVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4ubvEXT")) == NULL) || r;
+  r = ((glVertexAttribI4uiEXT = (PFNGLVERTEXATTRIBI4UIEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4uiEXT")) == NULL) || r;
+  r = ((glVertexAttribI4uivEXT = (PFNGLVERTEXATTRIBI4UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4uivEXT")) == NULL) || r;
+  r = ((glVertexAttribI4usvEXT = (PFNGLVERTEXATTRIBI4USVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribI4usvEXT")) == NULL) || r;
+  r = ((glVertexAttribIPointerEXT = (PFNGLVERTEXATTRIBIPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribIPointerEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_gpu_shader4 */
+
+#ifdef GL_EXT_histogram
+
+static GLboolean _glewInit_GL_EXT_histogram (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetHistogramEXT = (PFNGLGETHISTOGRAMEXTPROC)glewGetProcAddress((const GLubyte*)"glGetHistogramEXT")) == NULL) || r;
+  r = ((glGetHistogramParameterfvEXT = (PFNGLGETHISTOGRAMPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetHistogramParameterfvEXT")) == NULL) || r;
+  r = ((glGetHistogramParameterivEXT = (PFNGLGETHISTOGRAMPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetHistogramParameterivEXT")) == NULL) || r;
+  r = ((glGetMinmaxEXT = (PFNGLGETMINMAXEXTPROC)glewGetProcAddress((const GLubyte*)"glGetMinmaxEXT")) == NULL) || r;
+  r = ((glGetMinmaxParameterfvEXT = (PFNGLGETMINMAXPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetMinmaxParameterfvEXT")) == NULL) || r;
+  r = ((glGetMinmaxParameterivEXT = (PFNGLGETMINMAXPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetMinmaxParameterivEXT")) == NULL) || r;
+  r = ((glHistogramEXT = (PFNGLHISTOGRAMEXTPROC)glewGetProcAddress((const GLubyte*)"glHistogramEXT")) == NULL) || r;
+  r = ((glMinmaxEXT = (PFNGLMINMAXEXTPROC)glewGetProcAddress((const GLubyte*)"glMinmaxEXT")) == NULL) || r;
+  r = ((glResetHistogramEXT = (PFNGLRESETHISTOGRAMEXTPROC)glewGetProcAddress((const GLubyte*)"glResetHistogramEXT")) == NULL) || r;
+  r = ((glResetMinmaxEXT = (PFNGLRESETMINMAXEXTPROC)glewGetProcAddress((const GLubyte*)"glResetMinmaxEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_histogram */
+
+#ifdef GL_EXT_index_array_formats
+
+#endif /* GL_EXT_index_array_formats */
+
+#ifdef GL_EXT_index_func
+
+static GLboolean _glewInit_GL_EXT_index_func (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glIndexFuncEXT = (PFNGLINDEXFUNCEXTPROC)glewGetProcAddress((const GLubyte*)"glIndexFuncEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_index_func */
+
+#ifdef GL_EXT_index_material
+
+static GLboolean _glewInit_GL_EXT_index_material (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glIndexMaterialEXT = (PFNGLINDEXMATERIALEXTPROC)glewGetProcAddress((const GLubyte*)"glIndexMaterialEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_index_material */
+
+#ifdef GL_EXT_index_texture
+
+#endif /* GL_EXT_index_texture */
+
+#ifdef GL_EXT_light_texture
+
+static GLboolean _glewInit_GL_EXT_light_texture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glApplyTextureEXT = (PFNGLAPPLYTEXTUREEXTPROC)glewGetProcAddress((const GLubyte*)"glApplyTextureEXT")) == NULL) || r;
+  r = ((glTextureLightEXT = (PFNGLTEXTURELIGHTEXTPROC)glewGetProcAddress((const GLubyte*)"glTextureLightEXT")) == NULL) || r;
+  r = ((glTextureMaterialEXT = (PFNGLTEXTUREMATERIALEXTPROC)glewGetProcAddress((const GLubyte*)"glTextureMaterialEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_light_texture */
+
+#ifdef GL_EXT_misc_attribute
+
+#endif /* GL_EXT_misc_attribute */
+
+#ifdef GL_EXT_multi_draw_arrays
+
+static GLboolean _glewInit_GL_EXT_multi_draw_arrays (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glMultiDrawArraysEXT = (PFNGLMULTIDRAWARRAYSEXTPROC)glewGetProcAddress((const GLubyte*)"glMultiDrawArraysEXT")) == NULL) || r;
+  r = ((glMultiDrawElementsEXT = (PFNGLMULTIDRAWELEMENTSEXTPROC)glewGetProcAddress((const GLubyte*)"glMultiDrawElementsEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_multi_draw_arrays */
+
+#ifdef GL_EXT_multisample
+
+static GLboolean _glewInit_GL_EXT_multisample (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glSampleMaskEXT = (PFNGLSAMPLEMASKEXTPROC)glewGetProcAddress((const GLubyte*)"glSampleMaskEXT")) == NULL) || r;
+  r = ((glSamplePatternEXT = (PFNGLSAMPLEPATTERNEXTPROC)glewGetProcAddress((const GLubyte*)"glSamplePatternEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_multisample */
+
+#ifdef GL_EXT_packed_depth_stencil
+
+#endif /* GL_EXT_packed_depth_stencil */
+
+#ifdef GL_EXT_packed_float
+
+#endif /* GL_EXT_packed_float */
+
+#ifdef GL_EXT_packed_pixels
+
+#endif /* GL_EXT_packed_pixels */
+
+#ifdef GL_EXT_paletted_texture
+
+static GLboolean _glewInit_GL_EXT_paletted_texture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColorTableEXT = (PFNGLCOLORTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"glColorTableEXT")) == NULL) || r;
+  r = ((glGetColorTableEXT = (PFNGLGETCOLORTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableEXT")) == NULL) || r;
+  r = ((glGetColorTableParameterfvEXT = (PFNGLGETCOLORTABLEPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableParameterfvEXT")) == NULL) || r;
+  r = ((glGetColorTableParameterivEXT = (PFNGLGETCOLORTABLEPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableParameterivEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_paletted_texture */
+
+#ifdef GL_EXT_pixel_buffer_object
+
+#endif /* GL_EXT_pixel_buffer_object */
+
+#ifdef GL_EXT_pixel_transform
+
+static GLboolean _glewInit_GL_EXT_pixel_transform (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetPixelTransformParameterfvEXT = (PFNGLGETPIXELTRANSFORMPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetPixelTransformParameterfvEXT")) == NULL) || r;
+  r = ((glGetPixelTransformParameterivEXT = (PFNGLGETPIXELTRANSFORMPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetPixelTransformParameterivEXT")) == NULL) || r;
+  r = ((glPixelTransformParameterfEXT = (PFNGLPIXELTRANSFORMPARAMETERFEXTPROC)glewGetProcAddress((const GLubyte*)"glPixelTransformParameterfEXT")) == NULL) || r;
+  r = ((glPixelTransformParameterfvEXT = (PFNGLPIXELTRANSFORMPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glPixelTransformParameterfvEXT")) == NULL) || r;
+  r = ((glPixelTransformParameteriEXT = (PFNGLPIXELTRANSFORMPARAMETERIEXTPROC)glewGetProcAddress((const GLubyte*)"glPixelTransformParameteriEXT")) == NULL) || r;
+  r = ((glPixelTransformParameterivEXT = (PFNGLPIXELTRANSFORMPARAMETERIVEXTPROC)glewGetProcAddress((const GLubyte*)"glPixelTransformParameterivEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_pixel_transform */
+
+#ifdef GL_EXT_pixel_transform_color_table
+
+#endif /* GL_EXT_pixel_transform_color_table */
+
+#ifdef GL_EXT_point_parameters
+
+static GLboolean _glewInit_GL_EXT_point_parameters (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glPointParameterfEXT = (PFNGLPOINTPARAMETERFEXTPROC)glewGetProcAddress((const GLubyte*)"glPointParameterfEXT")) == NULL) || r;
+  r = ((glPointParameterfvEXT = (PFNGLPOINTPARAMETERFVEXTPROC)glewGetProcAddress((const GLubyte*)"glPointParameterfvEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_point_parameters */
+
+#ifdef GL_EXT_polygon_offset
+
+static GLboolean _glewInit_GL_EXT_polygon_offset (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glPolygonOffsetEXT = (PFNGLPOLYGONOFFSETEXTPROC)glewGetProcAddress((const GLubyte*)"glPolygonOffsetEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_polygon_offset */
+
+#ifdef GL_EXT_rescale_normal
+
+#endif /* GL_EXT_rescale_normal */
+
+#ifdef GL_EXT_scene_marker
+
+static GLboolean _glewInit_GL_EXT_scene_marker (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBeginSceneEXT = (PFNGLBEGINSCENEEXTPROC)glewGetProcAddress((const GLubyte*)"glBeginSceneEXT")) == NULL) || r;
+  r = ((glEndSceneEXT = (PFNGLENDSCENEEXTPROC)glewGetProcAddress((const GLubyte*)"glEndSceneEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_scene_marker */
+
+#ifdef GL_EXT_secondary_color
+
+static GLboolean _glewInit_GL_EXT_secondary_color (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glSecondaryColor3bEXT = (PFNGLSECONDARYCOLOR3BEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3bEXT")) == NULL) || r;
+  r = ((glSecondaryColor3bvEXT = (PFNGLSECONDARYCOLOR3BVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3bvEXT")) == NULL) || r;
+  r = ((glSecondaryColor3dEXT = (PFNGLSECONDARYCOLOR3DEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3dEXT")) == NULL) || r;
+  r = ((glSecondaryColor3dvEXT = (PFNGLSECONDARYCOLOR3DVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3dvEXT")) == NULL) || r;
+  r = ((glSecondaryColor3fEXT = (PFNGLSECONDARYCOLOR3FEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3fEXT")) == NULL) || r;
+  r = ((glSecondaryColor3fvEXT = (PFNGLSECONDARYCOLOR3FVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3fvEXT")) == NULL) || r;
+  r = ((glSecondaryColor3iEXT = (PFNGLSECONDARYCOLOR3IEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3iEXT")) == NULL) || r;
+  r = ((glSecondaryColor3ivEXT = (PFNGLSECONDARYCOLOR3IVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3ivEXT")) == NULL) || r;
+  r = ((glSecondaryColor3sEXT = (PFNGLSECONDARYCOLOR3SEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3sEXT")) == NULL) || r;
+  r = ((glSecondaryColor3svEXT = (PFNGLSECONDARYCOLOR3SVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3svEXT")) == NULL) || r;
+  r = ((glSecondaryColor3ubEXT = (PFNGLSECONDARYCOLOR3UBEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3ubEXT")) == NULL) || r;
+  r = ((glSecondaryColor3ubvEXT = (PFNGLSECONDARYCOLOR3UBVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3ubvEXT")) == NULL) || r;
+  r = ((glSecondaryColor3uiEXT = (PFNGLSECONDARYCOLOR3UIEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3uiEXT")) == NULL) || r;
+  r = ((glSecondaryColor3uivEXT = (PFNGLSECONDARYCOLOR3UIVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3uivEXT")) == NULL) || r;
+  r = ((glSecondaryColor3usEXT = (PFNGLSECONDARYCOLOR3USEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3usEXT")) == NULL) || r;
+  r = ((glSecondaryColor3usvEXT = (PFNGLSECONDARYCOLOR3USVEXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3usvEXT")) == NULL) || r;
+  r = ((glSecondaryColorPointerEXT = (PFNGLSECONDARYCOLORPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColorPointerEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_secondary_color */
+
+#ifdef GL_EXT_separate_specular_color
+
+#endif /* GL_EXT_separate_specular_color */
+
+#ifdef GL_EXT_shadow_funcs
+
+#endif /* GL_EXT_shadow_funcs */
+
+#ifdef GL_EXT_shared_texture_palette
+
+#endif /* GL_EXT_shared_texture_palette */
+
+#ifdef GL_EXT_stencil_clear_tag
+
+#endif /* GL_EXT_stencil_clear_tag */
+
+#ifdef GL_EXT_stencil_two_side
+
+static GLboolean _glewInit_GL_EXT_stencil_two_side (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glActiveStencilFaceEXT = (PFNGLACTIVESTENCILFACEEXTPROC)glewGetProcAddress((const GLubyte*)"glActiveStencilFaceEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_stencil_two_side */
+
+#ifdef GL_EXT_stencil_wrap
+
+#endif /* GL_EXT_stencil_wrap */
+
+#ifdef GL_EXT_subtexture
+
+static GLboolean _glewInit_GL_EXT_subtexture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTexSubImage1DEXT = (PFNGLTEXSUBIMAGE1DEXTPROC)glewGetProcAddress((const GLubyte*)"glTexSubImage1DEXT")) == NULL) || r;
+  r = ((glTexSubImage2DEXT = (PFNGLTEXSUBIMAGE2DEXTPROC)glewGetProcAddress((const GLubyte*)"glTexSubImage2DEXT")) == NULL) || r;
+  r = ((glTexSubImage3DEXT = (PFNGLTEXSUBIMAGE3DEXTPROC)glewGetProcAddress((const GLubyte*)"glTexSubImage3DEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_subtexture */
+
+#ifdef GL_EXT_texture
+
+#endif /* GL_EXT_texture */
+
+#ifdef GL_EXT_texture3D
+
+static GLboolean _glewInit_GL_EXT_texture3D (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTexImage3DEXT = (PFNGLTEXIMAGE3DEXTPROC)glewGetProcAddress((const GLubyte*)"glTexImage3DEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_texture3D */
+
+#ifdef GL_EXT_texture_array
+
+#endif /* GL_EXT_texture_array */
+
+#ifdef GL_EXT_texture_buffer_object
+
+static GLboolean _glewInit_GL_EXT_texture_buffer_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTexBufferEXT = (PFNGLTEXBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"glTexBufferEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_texture_buffer_object */
+
+#ifdef GL_EXT_texture_compression_dxt1
+
+#endif /* GL_EXT_texture_compression_dxt1 */
+
+#ifdef GL_EXT_texture_compression_latc
+
+#endif /* GL_EXT_texture_compression_latc */
+
+#ifdef GL_EXT_texture_compression_rgtc
+
+#endif /* GL_EXT_texture_compression_rgtc */
+
+#ifdef GL_EXT_texture_compression_s3tc
+
+#endif /* GL_EXT_texture_compression_s3tc */
+
+#ifdef GL_EXT_texture_cube_map
+
+#endif /* GL_EXT_texture_cube_map */
+
+#ifdef GL_EXT_texture_edge_clamp
+
+#endif /* GL_EXT_texture_edge_clamp */
+
+#ifdef GL_EXT_texture_env
+
+#endif /* GL_EXT_texture_env */
+
+#ifdef GL_EXT_texture_env_add
+
+#endif /* GL_EXT_texture_env_add */
+
+#ifdef GL_EXT_texture_env_combine
+
+#endif /* GL_EXT_texture_env_combine */
+
+#ifdef GL_EXT_texture_env_dot3
+
+#endif /* GL_EXT_texture_env_dot3 */
+
+#ifdef GL_EXT_texture_filter_anisotropic
+
+#endif /* GL_EXT_texture_filter_anisotropic */
+
+#ifdef GL_EXT_texture_integer
+
+static GLboolean _glewInit_GL_EXT_texture_integer (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glClearColorIiEXT = (PFNGLCLEARCOLORIIEXTPROC)glewGetProcAddress((const GLubyte*)"glClearColorIiEXT")) == NULL) || r;
+  r = ((glClearColorIuiEXT = (PFNGLCLEARCOLORIUIEXTPROC)glewGetProcAddress((const GLubyte*)"glClearColorIuiEXT")) == NULL) || r;
+  r = ((glGetTexParameterIivEXT = (PFNGLGETTEXPARAMETERIIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetTexParameterIivEXT")) == NULL) || r;
+  r = ((glGetTexParameterIuivEXT = (PFNGLGETTEXPARAMETERIUIVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetTexParameterIuivEXT")) == NULL) || r;
+  r = ((glTexParameterIivEXT = (PFNGLTEXPARAMETERIIVEXTPROC)glewGetProcAddress((const GLubyte*)"glTexParameterIivEXT")) == NULL) || r;
+  r = ((glTexParameterIuivEXT = (PFNGLTEXPARAMETERIUIVEXTPROC)glewGetProcAddress((const GLubyte*)"glTexParameterIuivEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_texture_integer */
+
+#ifdef GL_EXT_texture_lod_bias
+
+#endif /* GL_EXT_texture_lod_bias */
+
+#ifdef GL_EXT_texture_mirror_clamp
+
+#endif /* GL_EXT_texture_mirror_clamp */
+
+#ifdef GL_EXT_texture_object
+
+static GLboolean _glewInit_GL_EXT_texture_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glAreTexturesResidentEXT = (PFNGLARETEXTURESRESIDENTEXTPROC)glewGetProcAddress((const GLubyte*)"glAreTexturesResidentEXT")) == NULL) || r;
+  r = ((glBindTextureEXT = (PFNGLBINDTEXTUREEXTPROC)glewGetProcAddress((const GLubyte*)"glBindTextureEXT")) == NULL) || r;
+  r = ((glDeleteTexturesEXT = (PFNGLDELETETEXTURESEXTPROC)glewGetProcAddress((const GLubyte*)"glDeleteTexturesEXT")) == NULL) || r;
+  r = ((glGenTexturesEXT = (PFNGLGENTEXTURESEXTPROC)glewGetProcAddress((const GLubyte*)"glGenTexturesEXT")) == NULL) || r;
+  r = ((glIsTextureEXT = (PFNGLISTEXTUREEXTPROC)glewGetProcAddress((const GLubyte*)"glIsTextureEXT")) == NULL) || r;
+  r = ((glPrioritizeTexturesEXT = (PFNGLPRIORITIZETEXTURESEXTPROC)glewGetProcAddress((const GLubyte*)"glPrioritizeTexturesEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_texture_object */
+
+#ifdef GL_EXT_texture_perturb_normal
+
+static GLboolean _glewInit_GL_EXT_texture_perturb_normal (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTextureNormalEXT = (PFNGLTEXTURENORMALEXTPROC)glewGetProcAddress((const GLubyte*)"glTextureNormalEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_texture_perturb_normal */
+
+#ifdef GL_EXT_texture_rectangle
+
+#endif /* GL_EXT_texture_rectangle */
+
+#ifdef GL_EXT_texture_sRGB
+
+#endif /* GL_EXT_texture_sRGB */
+
+#ifdef GL_EXT_texture_shared_exponent
+
+#endif /* GL_EXT_texture_shared_exponent */
+
+#ifdef GL_EXT_timer_query
+
+static GLboolean _glewInit_GL_EXT_timer_query (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetQueryObjecti64vEXT = (PFNGLGETQUERYOBJECTI64VEXTPROC)glewGetProcAddress((const GLubyte*)"glGetQueryObjecti64vEXT")) == NULL) || r;
+  r = ((glGetQueryObjectui64vEXT = (PFNGLGETQUERYOBJECTUI64VEXTPROC)glewGetProcAddress((const GLubyte*)"glGetQueryObjectui64vEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_timer_query */
+
+#ifdef GL_EXT_vertex_array
+
+static GLboolean _glewInit_GL_EXT_vertex_array (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glArrayElementEXT = (PFNGLARRAYELEMENTEXTPROC)glewGetProcAddress((const GLubyte*)"glArrayElementEXT")) == NULL) || r;
+  r = ((glColorPointerEXT = (PFNGLCOLORPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glColorPointerEXT")) == NULL) || r;
+  r = ((glDrawArraysEXT = (PFNGLDRAWARRAYSEXTPROC)glewGetProcAddress((const GLubyte*)"glDrawArraysEXT")) == NULL) || r;
+  r = ((glEdgeFlagPointerEXT = (PFNGLEDGEFLAGPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glEdgeFlagPointerEXT")) == NULL) || r;
+  r = ((glGetPointervEXT = (PFNGLGETPOINTERVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetPointervEXT")) == NULL) || r;
+  r = ((glIndexPointerEXT = (PFNGLINDEXPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glIndexPointerEXT")) == NULL) || r;
+  r = ((glNormalPointerEXT = (PFNGLNORMALPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glNormalPointerEXT")) == NULL) || r;
+  r = ((glTexCoordPointerEXT = (PFNGLTEXCOORDPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glTexCoordPointerEXT")) == NULL) || r;
+  r = ((glVertexPointerEXT = (PFNGLVERTEXPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glVertexPointerEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_vertex_array */
+
+#ifdef GL_EXT_vertex_shader
+
+static GLboolean _glewInit_GL_EXT_vertex_shader (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBeginVertexShaderEXT = (PFNGLBEGINVERTEXSHADEREXTPROC)glewGetProcAddress((const GLubyte*)"glBeginVertexShaderEXT")) == NULL) || r;
+  r = ((glBindLightParameterEXT = (PFNGLBINDLIGHTPARAMETEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindLightParameterEXT")) == NULL) || r;
+  r = ((glBindMaterialParameterEXT = (PFNGLBINDMATERIALPARAMETEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindMaterialParameterEXT")) == NULL) || r;
+  r = ((glBindParameterEXT = (PFNGLBINDPARAMETEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindParameterEXT")) == NULL) || r;
+  r = ((glBindTexGenParameterEXT = (PFNGLBINDTEXGENPARAMETEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindTexGenParameterEXT")) == NULL) || r;
+  r = ((glBindTextureUnitParameterEXT = (PFNGLBINDTEXTUREUNITPARAMETEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindTextureUnitParameterEXT")) == NULL) || r;
+  r = ((glBindVertexShaderEXT = (PFNGLBINDVERTEXSHADEREXTPROC)glewGetProcAddress((const GLubyte*)"glBindVertexShaderEXT")) == NULL) || r;
+  r = ((glDeleteVertexShaderEXT = (PFNGLDELETEVERTEXSHADEREXTPROC)glewGetProcAddress((const GLubyte*)"glDeleteVertexShaderEXT")) == NULL) || r;
+  r = ((glDisableVariantClientStateEXT = (PFNGLDISABLEVARIANTCLIENTSTATEEXTPROC)glewGetProcAddress((const GLubyte*)"glDisableVariantClientStateEXT")) == NULL) || r;
+  r = ((glEnableVariantClientStateEXT = (PFNGLENABLEVARIANTCLIENTSTATEEXTPROC)glewGetProcAddress((const GLubyte*)"glEnableVariantClientStateEXT")) == NULL) || r;
+  r = ((glEndVertexShaderEXT = (PFNGLENDVERTEXSHADEREXTPROC)glewGetProcAddress((const GLubyte*)"glEndVertexShaderEXT")) == NULL) || r;
+  r = ((glExtractComponentEXT = (PFNGLEXTRACTCOMPONENTEXTPROC)glewGetProcAddress((const GLubyte*)"glExtractComponentEXT")) == NULL) || r;
+  r = ((glGenSymbolsEXT = (PFNGLGENSYMBOLSEXTPROC)glewGetProcAddress((const GLubyte*)"glGenSymbolsEXT")) == NULL) || r;
+  r = ((glGenVertexShadersEXT = (PFNGLGENVERTEXSHADERSEXTPROC)glewGetProcAddress((const GLubyte*)"glGenVertexShadersEXT")) == NULL) || r;
+  r = ((glGetInvariantBooleanvEXT = (PFNGLGETINVARIANTBOOLEANVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetInvariantBooleanvEXT")) == NULL) || r;
+  r = ((glGetInvariantFloatvEXT = (PFNGLGETINVARIANTFLOATVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetInvariantFloatvEXT")) == NULL) || r;
+  r = ((glGetInvariantIntegervEXT = (PFNGLGETINVARIANTINTEGERVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetInvariantIntegervEXT")) == NULL) || r;
+  r = ((glGetLocalConstantBooleanvEXT = (PFNGLGETLOCALCONSTANTBOOLEANVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetLocalConstantBooleanvEXT")) == NULL) || r;
+  r = ((glGetLocalConstantFloatvEXT = (PFNGLGETLOCALCONSTANTFLOATVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetLocalConstantFloatvEXT")) == NULL) || r;
+  r = ((glGetLocalConstantIntegervEXT = (PFNGLGETLOCALCONSTANTINTEGERVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetLocalConstantIntegervEXT")) == NULL) || r;
+  r = ((glGetVariantBooleanvEXT = (PFNGLGETVARIANTBOOLEANVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetVariantBooleanvEXT")) == NULL) || r;
+  r = ((glGetVariantFloatvEXT = (PFNGLGETVARIANTFLOATVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetVariantFloatvEXT")) == NULL) || r;
+  r = ((glGetVariantIntegervEXT = (PFNGLGETVARIANTINTEGERVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetVariantIntegervEXT")) == NULL) || r;
+  r = ((glGetVariantPointervEXT = (PFNGLGETVARIANTPOINTERVEXTPROC)glewGetProcAddress((const GLubyte*)"glGetVariantPointervEXT")) == NULL) || r;
+  r = ((glInsertComponentEXT = (PFNGLINSERTCOMPONENTEXTPROC)glewGetProcAddress((const GLubyte*)"glInsertComponentEXT")) == NULL) || r;
+  r = ((glIsVariantEnabledEXT = (PFNGLISVARIANTENABLEDEXTPROC)glewGetProcAddress((const GLubyte*)"glIsVariantEnabledEXT")) == NULL) || r;
+  r = ((glSetInvariantEXT = (PFNGLSETINVARIANTEXTPROC)glewGetProcAddress((const GLubyte*)"glSetInvariantEXT")) == NULL) || r;
+  r = ((glSetLocalConstantEXT = (PFNGLSETLOCALCONSTANTEXTPROC)glewGetProcAddress((const GLubyte*)"glSetLocalConstantEXT")) == NULL) || r;
+  r = ((glShaderOp1EXT = (PFNGLSHADEROP1EXTPROC)glewGetProcAddress((const GLubyte*)"glShaderOp1EXT")) == NULL) || r;
+  r = ((glShaderOp2EXT = (PFNGLSHADEROP2EXTPROC)glewGetProcAddress((const GLubyte*)"glShaderOp2EXT")) == NULL) || r;
+  r = ((glShaderOp3EXT = (PFNGLSHADEROP3EXTPROC)glewGetProcAddress((const GLubyte*)"glShaderOp3EXT")) == NULL) || r;
+  r = ((glSwizzleEXT = (PFNGLSWIZZLEEXTPROC)glewGetProcAddress((const GLubyte*)"glSwizzleEXT")) == NULL) || r;
+  r = ((glVariantPointerEXT = (PFNGLVARIANTPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glVariantPointerEXT")) == NULL) || r;
+  r = ((glVariantbvEXT = (PFNGLVARIANTBVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantbvEXT")) == NULL) || r;
+  r = ((glVariantdvEXT = (PFNGLVARIANTDVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantdvEXT")) == NULL) || r;
+  r = ((glVariantfvEXT = (PFNGLVARIANTFVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantfvEXT")) == NULL) || r;
+  r = ((glVariantivEXT = (PFNGLVARIANTIVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantivEXT")) == NULL) || r;
+  r = ((glVariantsvEXT = (PFNGLVARIANTSVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantsvEXT")) == NULL) || r;
+  r = ((glVariantubvEXT = (PFNGLVARIANTUBVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantubvEXT")) == NULL) || r;
+  r = ((glVariantuivEXT = (PFNGLVARIANTUIVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantuivEXT")) == NULL) || r;
+  r = ((glVariantusvEXT = (PFNGLVARIANTUSVEXTPROC)glewGetProcAddress((const GLubyte*)"glVariantusvEXT")) == NULL) || r;
+  r = ((glWriteMaskEXT = (PFNGLWRITEMASKEXTPROC)glewGetProcAddress((const GLubyte*)"glWriteMaskEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_vertex_shader */
+
+#ifdef GL_EXT_vertex_weighting
+
+static GLboolean _glewInit_GL_EXT_vertex_weighting (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glVertexWeightPointerEXT = (PFNGLVERTEXWEIGHTPOINTEREXTPROC)glewGetProcAddress((const GLubyte*)"glVertexWeightPointerEXT")) == NULL) || r;
+  r = ((glVertexWeightfEXT = (PFNGLVERTEXWEIGHTFEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexWeightfEXT")) == NULL) || r;
+  r = ((glVertexWeightfvEXT = (PFNGLVERTEXWEIGHTFVEXTPROC)glewGetProcAddress((const GLubyte*)"glVertexWeightfvEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_EXT_vertex_weighting */
+
+#ifdef GL_GREMEDY_frame_terminator
+
+static GLboolean _glewInit_GL_GREMEDY_frame_terminator (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFrameTerminatorGREMEDY = (PFNGLFRAMETERMINATORGREMEDYPROC)glewGetProcAddress((const GLubyte*)"glFrameTerminatorGREMEDY")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_GREMEDY_frame_terminator */
+
+#ifdef GL_GREMEDY_string_marker
+
+static GLboolean _glewInit_GL_GREMEDY_string_marker (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glStringMarkerGREMEDY = (PFNGLSTRINGMARKERGREMEDYPROC)glewGetProcAddress((const GLubyte*)"glStringMarkerGREMEDY")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_GREMEDY_string_marker */
+
+#ifdef GL_HP_convolution_border_modes
+
+#endif /* GL_HP_convolution_border_modes */
+
+#ifdef GL_HP_image_transform
+
+static GLboolean _glewInit_GL_HP_image_transform (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetImageTransformParameterfvHP = (PFNGLGETIMAGETRANSFORMPARAMETERFVHPPROC)glewGetProcAddress((const GLubyte*)"glGetImageTransformParameterfvHP")) == NULL) || r;
+  r = ((glGetImageTransformParameterivHP = (PFNGLGETIMAGETRANSFORMPARAMETERIVHPPROC)glewGetProcAddress((const GLubyte*)"glGetImageTransformParameterivHP")) == NULL) || r;
+  r = ((glImageTransformParameterfHP = (PFNGLIMAGETRANSFORMPARAMETERFHPPROC)glewGetProcAddress((const GLubyte*)"glImageTransformParameterfHP")) == NULL) || r;
+  r = ((glImageTransformParameterfvHP = (PFNGLIMAGETRANSFORMPARAMETERFVHPPROC)glewGetProcAddress((const GLubyte*)"glImageTransformParameterfvHP")) == NULL) || r;
+  r = ((glImageTransformParameteriHP = (PFNGLIMAGETRANSFORMPARAMETERIHPPROC)glewGetProcAddress((const GLubyte*)"glImageTransformParameteriHP")) == NULL) || r;
+  r = ((glImageTransformParameterivHP = (PFNGLIMAGETRANSFORMPARAMETERIVHPPROC)glewGetProcAddress((const GLubyte*)"glImageTransformParameterivHP")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_HP_image_transform */
+
+#ifdef GL_HP_occlusion_test
+
+#endif /* GL_HP_occlusion_test */
+
+#ifdef GL_HP_texture_lighting
+
+#endif /* GL_HP_texture_lighting */
+
+#ifdef GL_IBM_cull_vertex
+
+#endif /* GL_IBM_cull_vertex */
+
+#ifdef GL_IBM_multimode_draw_arrays
+
+static GLboolean _glewInit_GL_IBM_multimode_draw_arrays (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glMultiModeDrawArraysIBM = (PFNGLMULTIMODEDRAWARRAYSIBMPROC)glewGetProcAddress((const GLubyte*)"glMultiModeDrawArraysIBM")) == NULL) || r;
+  r = ((glMultiModeDrawElementsIBM = (PFNGLMULTIMODEDRAWELEMENTSIBMPROC)glewGetProcAddress((const GLubyte*)"glMultiModeDrawElementsIBM")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_IBM_multimode_draw_arrays */
+
+#ifdef GL_IBM_rasterpos_clip
+
+#endif /* GL_IBM_rasterpos_clip */
+
+#ifdef GL_IBM_static_data
+
+#endif /* GL_IBM_static_data */
+
+#ifdef GL_IBM_texture_mirrored_repeat
+
+#endif /* GL_IBM_texture_mirrored_repeat */
+
+#ifdef GL_IBM_vertex_array_lists
+
+static GLboolean _glewInit_GL_IBM_vertex_array_lists (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColorPointerListIBM = (PFNGLCOLORPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glColorPointerListIBM")) == NULL) || r;
+  r = ((glEdgeFlagPointerListIBM = (PFNGLEDGEFLAGPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glEdgeFlagPointerListIBM")) == NULL) || r;
+  r = ((glFogCoordPointerListIBM = (PFNGLFOGCOORDPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glFogCoordPointerListIBM")) == NULL) || r;
+  r = ((glIndexPointerListIBM = (PFNGLINDEXPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glIndexPointerListIBM")) == NULL) || r;
+  r = ((glNormalPointerListIBM = (PFNGLNORMALPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glNormalPointerListIBM")) == NULL) || r;
+  r = ((glSecondaryColorPointerListIBM = (PFNGLSECONDARYCOLORPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColorPointerListIBM")) == NULL) || r;
+  r = ((glTexCoordPointerListIBM = (PFNGLTEXCOORDPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glTexCoordPointerListIBM")) == NULL) || r;
+  r = ((glVertexPointerListIBM = (PFNGLVERTEXPOINTERLISTIBMPROC)glewGetProcAddress((const GLubyte*)"glVertexPointerListIBM")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_IBM_vertex_array_lists */
+
+#ifdef GL_INGR_color_clamp
+
+#endif /* GL_INGR_color_clamp */
+
+#ifdef GL_INGR_interlace_read
+
+#endif /* GL_INGR_interlace_read */
+
+#ifdef GL_INTEL_parallel_arrays
+
+static GLboolean _glewInit_GL_INTEL_parallel_arrays (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColorPointervINTEL = (PFNGLCOLORPOINTERVINTELPROC)glewGetProcAddress((const GLubyte*)"glColorPointervINTEL")) == NULL) || r;
+  r = ((glNormalPointervINTEL = (PFNGLNORMALPOINTERVINTELPROC)glewGetProcAddress((const GLubyte*)"glNormalPointervINTEL")) == NULL) || r;
+  r = ((glTexCoordPointervINTEL = (PFNGLTEXCOORDPOINTERVINTELPROC)glewGetProcAddress((const GLubyte*)"glTexCoordPointervINTEL")) == NULL) || r;
+  r = ((glVertexPointervINTEL = (PFNGLVERTEXPOINTERVINTELPROC)glewGetProcAddress((const GLubyte*)"glVertexPointervINTEL")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_INTEL_parallel_arrays */
+
+#ifdef GL_INTEL_texture_scissor
+
+static GLboolean _glewInit_GL_INTEL_texture_scissor (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTexScissorFuncINTEL = (PFNGLTEXSCISSORFUNCINTELPROC)glewGetProcAddress((const GLubyte*)"glTexScissorFuncINTEL")) == NULL) || r;
+  r = ((glTexScissorINTEL = (PFNGLTEXSCISSORINTELPROC)glewGetProcAddress((const GLubyte*)"glTexScissorINTEL")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_INTEL_texture_scissor */
+
+#ifdef GL_KTX_buffer_region
+
+static GLboolean _glewInit_GL_KTX_buffer_region (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBufferRegionEnabledEXT = (PFNGLBUFFERREGIONENABLEDEXTPROC)glewGetProcAddress((const GLubyte*)"glBufferRegionEnabledEXT")) == NULL) || r;
+  r = ((glDeleteBufferRegionEXT = (PFNGLDELETEBUFFERREGIONEXTPROC)glewGetProcAddress((const GLubyte*)"glDeleteBufferRegionEXT")) == NULL) || r;
+  r = ((glDrawBufferRegionEXT = (PFNGLDRAWBUFFERREGIONEXTPROC)glewGetProcAddress((const GLubyte*)"glDrawBufferRegionEXT")) == NULL) || r;
+  r = ((glNewBufferRegionEXT = (PFNGLNEWBUFFERREGIONEXTPROC)glewGetProcAddress((const GLubyte*)"glNewBufferRegionEXT")) == NULL) || r;
+  r = ((glReadBufferRegionEXT = (PFNGLREADBUFFERREGIONEXTPROC)glewGetProcAddress((const GLubyte*)"glReadBufferRegionEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_KTX_buffer_region */
+
+#ifdef GL_MESAX_texture_stack
+
+#endif /* GL_MESAX_texture_stack */
+
+#ifdef GL_MESA_pack_invert
+
+#endif /* GL_MESA_pack_invert */
+
+#ifdef GL_MESA_resize_buffers
+
+static GLboolean _glewInit_GL_MESA_resize_buffers (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glResizeBuffersMESA = (PFNGLRESIZEBUFFERSMESAPROC)glewGetProcAddress((const GLubyte*)"glResizeBuffersMESA")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_MESA_resize_buffers */
+
+#ifdef GL_MESA_window_pos
+
+static GLboolean _glewInit_GL_MESA_window_pos (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glWindowPos2dMESA = (PFNGLWINDOWPOS2DMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2dMESA")) == NULL) || r;
+  r = ((glWindowPos2dvMESA = (PFNGLWINDOWPOS2DVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2dvMESA")) == NULL) || r;
+  r = ((glWindowPos2fMESA = (PFNGLWINDOWPOS2FMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2fMESA")) == NULL) || r;
+  r = ((glWindowPos2fvMESA = (PFNGLWINDOWPOS2FVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2fvMESA")) == NULL) || r;
+  r = ((glWindowPos2iMESA = (PFNGLWINDOWPOS2IMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2iMESA")) == NULL) || r;
+  r = ((glWindowPos2ivMESA = (PFNGLWINDOWPOS2IVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2ivMESA")) == NULL) || r;
+  r = ((glWindowPos2sMESA = (PFNGLWINDOWPOS2SMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2sMESA")) == NULL) || r;
+  r = ((glWindowPos2svMESA = (PFNGLWINDOWPOS2SVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos2svMESA")) == NULL) || r;
+  r = ((glWindowPos3dMESA = (PFNGLWINDOWPOS3DMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3dMESA")) == NULL) || r;
+  r = ((glWindowPos3dvMESA = (PFNGLWINDOWPOS3DVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3dvMESA")) == NULL) || r;
+  r = ((glWindowPos3fMESA = (PFNGLWINDOWPOS3FMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3fMESA")) == NULL) || r;
+  r = ((glWindowPos3fvMESA = (PFNGLWINDOWPOS3FVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3fvMESA")) == NULL) || r;
+  r = ((glWindowPos3iMESA = (PFNGLWINDOWPOS3IMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3iMESA")) == NULL) || r;
+  r = ((glWindowPos3ivMESA = (PFNGLWINDOWPOS3IVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3ivMESA")) == NULL) || r;
+  r = ((glWindowPos3sMESA = (PFNGLWINDOWPOS3SMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3sMESA")) == NULL) || r;
+  r = ((glWindowPos3svMESA = (PFNGLWINDOWPOS3SVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos3svMESA")) == NULL) || r;
+  r = ((glWindowPos4dMESA = (PFNGLWINDOWPOS4DMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4dMESA")) == NULL) || r;
+  r = ((glWindowPos4dvMESA = (PFNGLWINDOWPOS4DVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4dvMESA")) == NULL) || r;
+  r = ((glWindowPos4fMESA = (PFNGLWINDOWPOS4FMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4fMESA")) == NULL) || r;
+  r = ((glWindowPos4fvMESA = (PFNGLWINDOWPOS4FVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4fvMESA")) == NULL) || r;
+  r = ((glWindowPos4iMESA = (PFNGLWINDOWPOS4IMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4iMESA")) == NULL) || r;
+  r = ((glWindowPos4ivMESA = (PFNGLWINDOWPOS4IVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4ivMESA")) == NULL) || r;
+  r = ((glWindowPos4sMESA = (PFNGLWINDOWPOS4SMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4sMESA")) == NULL) || r;
+  r = ((glWindowPos4svMESA = (PFNGLWINDOWPOS4SVMESAPROC)glewGetProcAddress((const GLubyte*)"glWindowPos4svMESA")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_MESA_window_pos */
+
+#ifdef GL_MESA_ycbcr_texture
+
+#endif /* GL_MESA_ycbcr_texture */
+
+#ifdef GL_NV_blend_square
+
+#endif /* GL_NV_blend_square */
+
+#ifdef GL_NV_copy_depth_to_color
+
+#endif /* GL_NV_copy_depth_to_color */
+
+#ifdef GL_NV_depth_buffer_float
+
+static GLboolean _glewInit_GL_NV_depth_buffer_float (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glClearDepthdNV = (PFNGLCLEARDEPTHDNVPROC)glewGetProcAddress((const GLubyte*)"glClearDepthdNV")) == NULL) || r;
+  r = ((glDepthBoundsdNV = (PFNGLDEPTHBOUNDSDNVPROC)glewGetProcAddress((const GLubyte*)"glDepthBoundsdNV")) == NULL) || r;
+  r = ((glDepthRangedNV = (PFNGLDEPTHRANGEDNVPROC)glewGetProcAddress((const GLubyte*)"glDepthRangedNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_depth_buffer_float */
+
+#ifdef GL_NV_depth_clamp
+
+#endif /* GL_NV_depth_clamp */
+
+#ifdef GL_NV_depth_range_unclamped
+
+#endif /* GL_NV_depth_range_unclamped */
+
+#ifdef GL_NV_evaluators
+
+static GLboolean _glewInit_GL_NV_evaluators (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glEvalMapsNV = (PFNGLEVALMAPSNVPROC)glewGetProcAddress((const GLubyte*)"glEvalMapsNV")) == NULL) || r;
+  r = ((glGetMapAttribParameterfvNV = (PFNGLGETMAPATTRIBPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetMapAttribParameterfvNV")) == NULL) || r;
+  r = ((glGetMapAttribParameterivNV = (PFNGLGETMAPATTRIBPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetMapAttribParameterivNV")) == NULL) || r;
+  r = ((glGetMapControlPointsNV = (PFNGLGETMAPCONTROLPOINTSNVPROC)glewGetProcAddress((const GLubyte*)"glGetMapControlPointsNV")) == NULL) || r;
+  r = ((glGetMapParameterfvNV = (PFNGLGETMAPPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetMapParameterfvNV")) == NULL) || r;
+  r = ((glGetMapParameterivNV = (PFNGLGETMAPPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetMapParameterivNV")) == NULL) || r;
+  r = ((glMapControlPointsNV = (PFNGLMAPCONTROLPOINTSNVPROC)glewGetProcAddress((const GLubyte*)"glMapControlPointsNV")) == NULL) || r;
+  r = ((glMapParameterfvNV = (PFNGLMAPPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glMapParameterfvNV")) == NULL) || r;
+  r = ((glMapParameterivNV = (PFNGLMAPPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glMapParameterivNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_evaluators */
+
+#ifdef GL_NV_fence
+
+static GLboolean _glewInit_GL_NV_fence (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDeleteFencesNV = (PFNGLDELETEFENCESNVPROC)glewGetProcAddress((const GLubyte*)"glDeleteFencesNV")) == NULL) || r;
+  r = ((glFinishFenceNV = (PFNGLFINISHFENCENVPROC)glewGetProcAddress((const GLubyte*)"glFinishFenceNV")) == NULL) || r;
+  r = ((glGenFencesNV = (PFNGLGENFENCESNVPROC)glewGetProcAddress((const GLubyte*)"glGenFencesNV")) == NULL) || r;
+  r = ((glGetFenceivNV = (PFNGLGETFENCEIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetFenceivNV")) == NULL) || r;
+  r = ((glIsFenceNV = (PFNGLISFENCENVPROC)glewGetProcAddress((const GLubyte*)"glIsFenceNV")) == NULL) || r;
+  r = ((glSetFenceNV = (PFNGLSETFENCENVPROC)glewGetProcAddress((const GLubyte*)"glSetFenceNV")) == NULL) || r;
+  r = ((glTestFenceNV = (PFNGLTESTFENCENVPROC)glewGetProcAddress((const GLubyte*)"glTestFenceNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_fence */
+
+#ifdef GL_NV_float_buffer
+
+#endif /* GL_NV_float_buffer */
+
+#ifdef GL_NV_fog_distance
+
+#endif /* GL_NV_fog_distance */
+
+#ifdef GL_NV_fragment_program
+
+static GLboolean _glewInit_GL_NV_fragment_program (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetProgramNamedParameterdvNV = (PFNGLGETPROGRAMNAMEDPARAMETERDVNVPROC)glewGetProcAddress((const GLubyte*)"glGetProgramNamedParameterdvNV")) == NULL) || r;
+  r = ((glGetProgramNamedParameterfvNV = (PFNGLGETPROGRAMNAMEDPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetProgramNamedParameterfvNV")) == NULL) || r;
+  r = ((glProgramNamedParameter4dNV = (PFNGLPROGRAMNAMEDPARAMETER4DNVPROC)glewGetProcAddress((const GLubyte*)"glProgramNamedParameter4dNV")) == NULL) || r;
+  r = ((glProgramNamedParameter4dvNV = (PFNGLPROGRAMNAMEDPARAMETER4DVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramNamedParameter4dvNV")) == NULL) || r;
+  r = ((glProgramNamedParameter4fNV = (PFNGLPROGRAMNAMEDPARAMETER4FNVPROC)glewGetProcAddress((const GLubyte*)"glProgramNamedParameter4fNV")) == NULL) || r;
+  r = ((glProgramNamedParameter4fvNV = (PFNGLPROGRAMNAMEDPARAMETER4FVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramNamedParameter4fvNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_fragment_program */
+
+#ifdef GL_NV_fragment_program2
+
+#endif /* GL_NV_fragment_program2 */
+
+#ifdef GL_NV_fragment_program4
+
+#endif /* GL_NV_fragment_program4 */
+
+#ifdef GL_NV_fragment_program_option
+
+#endif /* GL_NV_fragment_program_option */
+
+#ifdef GL_NV_framebuffer_multisample_coverage
+
+static GLboolean _glewInit_GL_NV_framebuffer_multisample_coverage (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glRenderbufferStorageMultisampleCoverageNV = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC)glewGetProcAddress((const GLubyte*)"glRenderbufferStorageMultisampleCoverageNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_framebuffer_multisample_coverage */
+
+#ifdef GL_NV_geometry_program4
+
+static GLboolean _glewInit_GL_NV_geometry_program4 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glProgramVertexLimitNV = (PFNGLPROGRAMVERTEXLIMITNVPROC)glewGetProcAddress((const GLubyte*)"glProgramVertexLimitNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_geometry_program4 */
+
+#ifdef GL_NV_geometry_shader4
+
+#endif /* GL_NV_geometry_shader4 */
+
+#ifdef GL_NV_gpu_program4
+
+static GLboolean _glewInit_GL_NV_gpu_program4 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glProgramEnvParameterI4iNV = (PFNGLPROGRAMENVPARAMETERI4INVPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameterI4iNV")) == NULL) || r;
+  r = ((glProgramEnvParameterI4ivNV = (PFNGLPROGRAMENVPARAMETERI4IVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameterI4ivNV")) == NULL) || r;
+  r = ((glProgramEnvParameterI4uiNV = (PFNGLPROGRAMENVPARAMETERI4UINVPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameterI4uiNV")) == NULL) || r;
+  r = ((glProgramEnvParameterI4uivNV = (PFNGLPROGRAMENVPARAMETERI4UIVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParameterI4uivNV")) == NULL) || r;
+  r = ((glProgramEnvParametersI4ivNV = (PFNGLPROGRAMENVPARAMETERSI4IVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParametersI4ivNV")) == NULL) || r;
+  r = ((glProgramEnvParametersI4uivNV = (PFNGLPROGRAMENVPARAMETERSI4UIVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramEnvParametersI4uivNV")) == NULL) || r;
+  r = ((glProgramLocalParameterI4iNV = (PFNGLPROGRAMLOCALPARAMETERI4INVPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameterI4iNV")) == NULL) || r;
+  r = ((glProgramLocalParameterI4ivNV = (PFNGLPROGRAMLOCALPARAMETERI4IVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameterI4ivNV")) == NULL) || r;
+  r = ((glProgramLocalParameterI4uiNV = (PFNGLPROGRAMLOCALPARAMETERI4UINVPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameterI4uiNV")) == NULL) || r;
+  r = ((glProgramLocalParameterI4uivNV = (PFNGLPROGRAMLOCALPARAMETERI4UIVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParameterI4uivNV")) == NULL) || r;
+  r = ((glProgramLocalParametersI4ivNV = (PFNGLPROGRAMLOCALPARAMETERSI4IVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParametersI4ivNV")) == NULL) || r;
+  r = ((glProgramLocalParametersI4uivNV = (PFNGLPROGRAMLOCALPARAMETERSI4UIVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramLocalParametersI4uivNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_gpu_program4 */
+
+#ifdef GL_NV_half_float
+
+static GLboolean _glewInit_GL_NV_half_float (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColor3hNV = (PFNGLCOLOR3HNVPROC)glewGetProcAddress((const GLubyte*)"glColor3hNV")) == NULL) || r;
+  r = ((glColor3hvNV = (PFNGLCOLOR3HVNVPROC)glewGetProcAddress((const GLubyte*)"glColor3hvNV")) == NULL) || r;
+  r = ((glColor4hNV = (PFNGLCOLOR4HNVPROC)glewGetProcAddress((const GLubyte*)"glColor4hNV")) == NULL) || r;
+  r = ((glColor4hvNV = (PFNGLCOLOR4HVNVPROC)glewGetProcAddress((const GLubyte*)"glColor4hvNV")) == NULL) || r;
+  r = ((glFogCoordhNV = (PFNGLFOGCOORDHNVPROC)glewGetProcAddress((const GLubyte*)"glFogCoordhNV")) == NULL) || r;
+  r = ((glFogCoordhvNV = (PFNGLFOGCOORDHVNVPROC)glewGetProcAddress((const GLubyte*)"glFogCoordhvNV")) == NULL) || r;
+  r = ((glMultiTexCoord1hNV = (PFNGLMULTITEXCOORD1HNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1hNV")) == NULL) || r;
+  r = ((glMultiTexCoord1hvNV = (PFNGLMULTITEXCOORD1HVNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord1hvNV")) == NULL) || r;
+  r = ((glMultiTexCoord2hNV = (PFNGLMULTITEXCOORD2HNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2hNV")) == NULL) || r;
+  r = ((glMultiTexCoord2hvNV = (PFNGLMULTITEXCOORD2HVNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord2hvNV")) == NULL) || r;
+  r = ((glMultiTexCoord3hNV = (PFNGLMULTITEXCOORD3HNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3hNV")) == NULL) || r;
+  r = ((glMultiTexCoord3hvNV = (PFNGLMULTITEXCOORD3HVNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord3hvNV")) == NULL) || r;
+  r = ((glMultiTexCoord4hNV = (PFNGLMULTITEXCOORD4HNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4hNV")) == NULL) || r;
+  r = ((glMultiTexCoord4hvNV = (PFNGLMULTITEXCOORD4HVNVPROC)glewGetProcAddress((const GLubyte*)"glMultiTexCoord4hvNV")) == NULL) || r;
+  r = ((glNormal3hNV = (PFNGLNORMAL3HNVPROC)glewGetProcAddress((const GLubyte*)"glNormal3hNV")) == NULL) || r;
+  r = ((glNormal3hvNV = (PFNGLNORMAL3HVNVPROC)glewGetProcAddress((const GLubyte*)"glNormal3hvNV")) == NULL) || r;
+  r = ((glSecondaryColor3hNV = (PFNGLSECONDARYCOLOR3HNVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3hNV")) == NULL) || r;
+  r = ((glSecondaryColor3hvNV = (PFNGLSECONDARYCOLOR3HVNVPROC)glewGetProcAddress((const GLubyte*)"glSecondaryColor3hvNV")) == NULL) || r;
+  r = ((glTexCoord1hNV = (PFNGLTEXCOORD1HNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord1hNV")) == NULL) || r;
+  r = ((glTexCoord1hvNV = (PFNGLTEXCOORD1HVNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord1hvNV")) == NULL) || r;
+  r = ((glTexCoord2hNV = (PFNGLTEXCOORD2HNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2hNV")) == NULL) || r;
+  r = ((glTexCoord2hvNV = (PFNGLTEXCOORD2HVNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2hvNV")) == NULL) || r;
+  r = ((glTexCoord3hNV = (PFNGLTEXCOORD3HNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord3hNV")) == NULL) || r;
+  r = ((glTexCoord3hvNV = (PFNGLTEXCOORD3HVNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord3hvNV")) == NULL) || r;
+  r = ((glTexCoord4hNV = (PFNGLTEXCOORD4HNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord4hNV")) == NULL) || r;
+  r = ((glTexCoord4hvNV = (PFNGLTEXCOORD4HVNVPROC)glewGetProcAddress((const GLubyte*)"glTexCoord4hvNV")) == NULL) || r;
+  r = ((glVertex2hNV = (PFNGLVERTEX2HNVPROC)glewGetProcAddress((const GLubyte*)"glVertex2hNV")) == NULL) || r;
+  r = ((glVertex2hvNV = (PFNGLVERTEX2HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertex2hvNV")) == NULL) || r;
+  r = ((glVertex3hNV = (PFNGLVERTEX3HNVPROC)glewGetProcAddress((const GLubyte*)"glVertex3hNV")) == NULL) || r;
+  r = ((glVertex3hvNV = (PFNGLVERTEX3HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertex3hvNV")) == NULL) || r;
+  r = ((glVertex4hNV = (PFNGLVERTEX4HNVPROC)glewGetProcAddress((const GLubyte*)"glVertex4hNV")) == NULL) || r;
+  r = ((glVertex4hvNV = (PFNGLVERTEX4HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertex4hvNV")) == NULL) || r;
+  r = ((glVertexAttrib1hNV = (PFNGLVERTEXATTRIB1HNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1hNV")) == NULL) || r;
+  r = ((glVertexAttrib1hvNV = (PFNGLVERTEXATTRIB1HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1hvNV")) == NULL) || r;
+  r = ((glVertexAttrib2hNV = (PFNGLVERTEXATTRIB2HNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2hNV")) == NULL) || r;
+  r = ((glVertexAttrib2hvNV = (PFNGLVERTEXATTRIB2HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2hvNV")) == NULL) || r;
+  r = ((glVertexAttrib3hNV = (PFNGLVERTEXATTRIB3HNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3hNV")) == NULL) || r;
+  r = ((glVertexAttrib3hvNV = (PFNGLVERTEXATTRIB3HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3hvNV")) == NULL) || r;
+  r = ((glVertexAttrib4hNV = (PFNGLVERTEXATTRIB4HNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4hNV")) == NULL) || r;
+  r = ((glVertexAttrib4hvNV = (PFNGLVERTEXATTRIB4HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4hvNV")) == NULL) || r;
+  r = ((glVertexAttribs1hvNV = (PFNGLVERTEXATTRIBS1HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs1hvNV")) == NULL) || r;
+  r = ((glVertexAttribs2hvNV = (PFNGLVERTEXATTRIBS2HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs2hvNV")) == NULL) || r;
+  r = ((glVertexAttribs3hvNV = (PFNGLVERTEXATTRIBS3HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs3hvNV")) == NULL) || r;
+  r = ((glVertexAttribs4hvNV = (PFNGLVERTEXATTRIBS4HVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs4hvNV")) == NULL) || r;
+  r = ((glVertexWeighthNV = (PFNGLVERTEXWEIGHTHNVPROC)glewGetProcAddress((const GLubyte*)"glVertexWeighthNV")) == NULL) || r;
+  r = ((glVertexWeighthvNV = (PFNGLVERTEXWEIGHTHVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexWeighthvNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_half_float */
+
+#ifdef GL_NV_light_max_exponent
+
+#endif /* GL_NV_light_max_exponent */
+
+#ifdef GL_NV_multisample_filter_hint
+
+#endif /* GL_NV_multisample_filter_hint */
+
+#ifdef GL_NV_occlusion_query
+
+static GLboolean _glewInit_GL_NV_occlusion_query (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glBeginOcclusionQueryNV = (PFNGLBEGINOCCLUSIONQUERYNVPROC)glewGetProcAddress((const GLubyte*)"glBeginOcclusionQueryNV")) == NULL) || r;
+  r = ((glDeleteOcclusionQueriesNV = (PFNGLDELETEOCCLUSIONQUERIESNVPROC)glewGetProcAddress((const GLubyte*)"glDeleteOcclusionQueriesNV")) == NULL) || r;
+  r = ((glEndOcclusionQueryNV = (PFNGLENDOCCLUSIONQUERYNVPROC)glewGetProcAddress((const GLubyte*)"glEndOcclusionQueryNV")) == NULL) || r;
+  r = ((glGenOcclusionQueriesNV = (PFNGLGENOCCLUSIONQUERIESNVPROC)glewGetProcAddress((const GLubyte*)"glGenOcclusionQueriesNV")) == NULL) || r;
+  r = ((glGetOcclusionQueryivNV = (PFNGLGETOCCLUSIONQUERYIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetOcclusionQueryivNV")) == NULL) || r;
+  r = ((glGetOcclusionQueryuivNV = (PFNGLGETOCCLUSIONQUERYUIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetOcclusionQueryuivNV")) == NULL) || r;
+  r = ((glIsOcclusionQueryNV = (PFNGLISOCCLUSIONQUERYNVPROC)glewGetProcAddress((const GLubyte*)"glIsOcclusionQueryNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_occlusion_query */
+
+#ifdef GL_NV_packed_depth_stencil
+
+#endif /* GL_NV_packed_depth_stencil */
+
+#ifdef GL_NV_parameter_buffer_object
+
+static GLboolean _glewInit_GL_NV_parameter_buffer_object (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glProgramBufferParametersIivNV = (PFNGLPROGRAMBUFFERPARAMETERSIIVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramBufferParametersIivNV")) == NULL) || r;
+  r = ((glProgramBufferParametersIuivNV = (PFNGLPROGRAMBUFFERPARAMETERSIUIVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramBufferParametersIuivNV")) == NULL) || r;
+  r = ((glProgramBufferParametersfvNV = (PFNGLPROGRAMBUFFERPARAMETERSFVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramBufferParametersfvNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_parameter_buffer_object */
+
+#ifdef GL_NV_pixel_data_range
+
+static GLboolean _glewInit_GL_NV_pixel_data_range (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFlushPixelDataRangeNV = (PFNGLFLUSHPIXELDATARANGENVPROC)glewGetProcAddress((const GLubyte*)"glFlushPixelDataRangeNV")) == NULL) || r;
+  r = ((glPixelDataRangeNV = (PFNGLPIXELDATARANGENVPROC)glewGetProcAddress((const GLubyte*)"glPixelDataRangeNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_pixel_data_range */
+
+#ifdef GL_NV_point_sprite
+
+static GLboolean _glewInit_GL_NV_point_sprite (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glPointParameteriNV = (PFNGLPOINTPARAMETERINVPROC)glewGetProcAddress((const GLubyte*)"glPointParameteriNV")) == NULL) || r;
+  r = ((glPointParameterivNV = (PFNGLPOINTPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glPointParameterivNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_point_sprite */
+
+#ifdef GL_NV_primitive_restart
+
+static GLboolean _glewInit_GL_NV_primitive_restart (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glPrimitiveRestartIndexNV = (PFNGLPRIMITIVERESTARTINDEXNVPROC)glewGetProcAddress((const GLubyte*)"glPrimitiveRestartIndexNV")) == NULL) || r;
+  r = ((glPrimitiveRestartNV = (PFNGLPRIMITIVERESTARTNVPROC)glewGetProcAddress((const GLubyte*)"glPrimitiveRestartNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_primitive_restart */
+
+#ifdef GL_NV_register_combiners
+
+static GLboolean _glewInit_GL_NV_register_combiners (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glCombinerInputNV = (PFNGLCOMBINERINPUTNVPROC)glewGetProcAddress((const GLubyte*)"glCombinerInputNV")) == NULL) || r;
+  r = ((glCombinerOutputNV = (PFNGLCOMBINEROUTPUTNVPROC)glewGetProcAddress((const GLubyte*)"glCombinerOutputNV")) == NULL) || r;
+  r = ((glCombinerParameterfNV = (PFNGLCOMBINERPARAMETERFNVPROC)glewGetProcAddress((const GLubyte*)"glCombinerParameterfNV")) == NULL) || r;
+  r = ((glCombinerParameterfvNV = (PFNGLCOMBINERPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glCombinerParameterfvNV")) == NULL) || r;
+  r = ((glCombinerParameteriNV = (PFNGLCOMBINERPARAMETERINVPROC)glewGetProcAddress((const GLubyte*)"glCombinerParameteriNV")) == NULL) || r;
+  r = ((glCombinerParameterivNV = (PFNGLCOMBINERPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glCombinerParameterivNV")) == NULL) || r;
+  r = ((glFinalCombinerInputNV = (PFNGLFINALCOMBINERINPUTNVPROC)glewGetProcAddress((const GLubyte*)"glFinalCombinerInputNV")) == NULL) || r;
+  r = ((glGetCombinerInputParameterfvNV = (PFNGLGETCOMBINERINPUTPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetCombinerInputParameterfvNV")) == NULL) || r;
+  r = ((glGetCombinerInputParameterivNV = (PFNGLGETCOMBINERINPUTPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetCombinerInputParameterivNV")) == NULL) || r;
+  r = ((glGetCombinerOutputParameterfvNV = (PFNGLGETCOMBINEROUTPUTPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetCombinerOutputParameterfvNV")) == NULL) || r;
+  r = ((glGetCombinerOutputParameterivNV = (PFNGLGETCOMBINEROUTPUTPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetCombinerOutputParameterivNV")) == NULL) || r;
+  r = ((glGetFinalCombinerInputParameterfvNV = (PFNGLGETFINALCOMBINERINPUTPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetFinalCombinerInputParameterfvNV")) == NULL) || r;
+  r = ((glGetFinalCombinerInputParameterivNV = (PFNGLGETFINALCOMBINERINPUTPARAMETERIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetFinalCombinerInputParameterivNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_register_combiners */
+
+#ifdef GL_NV_register_combiners2
+
+static GLboolean _glewInit_GL_NV_register_combiners2 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glCombinerStageParameterfvNV = (PFNGLCOMBINERSTAGEPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glCombinerStageParameterfvNV")) == NULL) || r;
+  r = ((glGetCombinerStageParameterfvNV = (PFNGLGETCOMBINERSTAGEPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetCombinerStageParameterfvNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_register_combiners2 */
+
+#ifdef GL_NV_texgen_emboss
+
+#endif /* GL_NV_texgen_emboss */
+
+#ifdef GL_NV_texgen_reflection
+
+#endif /* GL_NV_texgen_reflection */
+
+#ifdef GL_NV_texture_compression_vtc
+
+#endif /* GL_NV_texture_compression_vtc */
+
+#ifdef GL_NV_texture_env_combine4
+
+#endif /* GL_NV_texture_env_combine4 */
+
+#ifdef GL_NV_texture_expand_normal
+
+#endif /* GL_NV_texture_expand_normal */
+
+#ifdef GL_NV_texture_rectangle
+
+#endif /* GL_NV_texture_rectangle */
+
+#ifdef GL_NV_texture_shader
+
+#endif /* GL_NV_texture_shader */
+
+#ifdef GL_NV_texture_shader2
+
+#endif /* GL_NV_texture_shader2 */
+
+#ifdef GL_NV_texture_shader3
+
+#endif /* GL_NV_texture_shader3 */
+
+#ifdef GL_NV_transform_feedback
+
+static GLboolean _glewInit_GL_NV_transform_feedback (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glActiveVaryingNV = (PFNGLACTIVEVARYINGNVPROC)glewGetProcAddress((const GLubyte*)"glActiveVaryingNV")) == NULL) || r;
+  r = ((glBeginTransformFeedbackNV = (PFNGLBEGINTRANSFORMFEEDBACKNVPROC)glewGetProcAddress((const GLubyte*)"glBeginTransformFeedbackNV")) == NULL) || r;
+  r = ((glBindBufferBaseNV = (PFNGLBINDBUFFERBASENVPROC)glewGetProcAddress((const GLubyte*)"glBindBufferBaseNV")) == NULL) || r;
+  r = ((glBindBufferOffsetNV = (PFNGLBINDBUFFEROFFSETNVPROC)glewGetProcAddress((const GLubyte*)"glBindBufferOffsetNV")) == NULL) || r;
+  r = ((glBindBufferRangeNV = (PFNGLBINDBUFFERRANGENVPROC)glewGetProcAddress((const GLubyte*)"glBindBufferRangeNV")) == NULL) || r;
+  r = ((glEndTransformFeedbackNV = (PFNGLENDTRANSFORMFEEDBACKNVPROC)glewGetProcAddress((const GLubyte*)"glEndTransformFeedbackNV")) == NULL) || r;
+  r = ((glGetActiveVaryingNV = (PFNGLGETACTIVEVARYINGNVPROC)glewGetProcAddress((const GLubyte*)"glGetActiveVaryingNV")) == NULL) || r;
+  r = ((glGetTransformFeedbackVaryingNV = (PFNGLGETTRANSFORMFEEDBACKVARYINGNVPROC)glewGetProcAddress((const GLubyte*)"glGetTransformFeedbackVaryingNV")) == NULL) || r;
+  r = ((glGetVaryingLocationNV = (PFNGLGETVARYINGLOCATIONNVPROC)glewGetProcAddress((const GLubyte*)"glGetVaryingLocationNV")) == NULL) || r;
+  r = ((glTransformFeedbackAttribsNV = (PFNGLTRANSFORMFEEDBACKATTRIBSNVPROC)glewGetProcAddress((const GLubyte*)"glTransformFeedbackAttribsNV")) == NULL) || r;
+  r = ((glTransformFeedbackVaryingsNV = (PFNGLTRANSFORMFEEDBACKVARYINGSNVPROC)glewGetProcAddress((const GLubyte*)"glTransformFeedbackVaryingsNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_transform_feedback */
+
+#ifdef GL_NV_vertex_array_range
+
+static GLboolean _glewInit_GL_NV_vertex_array_range (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFlushVertexArrayRangeNV = (PFNGLFLUSHVERTEXARRAYRANGENVPROC)glewGetProcAddress((const GLubyte*)"glFlushVertexArrayRangeNV")) == NULL) || r;
+  r = ((glVertexArrayRangeNV = (PFNGLVERTEXARRAYRANGENVPROC)glewGetProcAddress((const GLubyte*)"glVertexArrayRangeNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_vertex_array_range */
+
+#ifdef GL_NV_vertex_array_range2
+
+#endif /* GL_NV_vertex_array_range2 */
+
+#ifdef GL_NV_vertex_program
+
+static GLboolean _glewInit_GL_NV_vertex_program (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glAreProgramsResidentNV = (PFNGLAREPROGRAMSRESIDENTNVPROC)glewGetProcAddress((const GLubyte*)"glAreProgramsResidentNV")) == NULL) || r;
+  r = ((glBindProgramNV = (PFNGLBINDPROGRAMNVPROC)glewGetProcAddress((const GLubyte*)"glBindProgramNV")) == NULL) || r;
+  r = ((glDeleteProgramsNV = (PFNGLDELETEPROGRAMSNVPROC)glewGetProcAddress((const GLubyte*)"glDeleteProgramsNV")) == NULL) || r;
+  r = ((glExecuteProgramNV = (PFNGLEXECUTEPROGRAMNVPROC)glewGetProcAddress((const GLubyte*)"glExecuteProgramNV")) == NULL) || r;
+  r = ((glGenProgramsNV = (PFNGLGENPROGRAMSNVPROC)glewGetProcAddress((const GLubyte*)"glGenProgramsNV")) == NULL) || r;
+  r = ((glGetProgramParameterdvNV = (PFNGLGETPROGRAMPARAMETERDVNVPROC)glewGetProcAddress((const GLubyte*)"glGetProgramParameterdvNV")) == NULL) || r;
+  r = ((glGetProgramParameterfvNV = (PFNGLGETPROGRAMPARAMETERFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetProgramParameterfvNV")) == NULL) || r;
+  r = ((glGetProgramStringNV = (PFNGLGETPROGRAMSTRINGNVPROC)glewGetProcAddress((const GLubyte*)"glGetProgramStringNV")) == NULL) || r;
+  r = ((glGetProgramivNV = (PFNGLGETPROGRAMIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetProgramivNV")) == NULL) || r;
+  r = ((glGetTrackMatrixivNV = (PFNGLGETTRACKMATRIXIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetTrackMatrixivNV")) == NULL) || r;
+  r = ((glGetVertexAttribPointervNV = (PFNGLGETVERTEXATTRIBPOINTERVNVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribPointervNV")) == NULL) || r;
+  r = ((glGetVertexAttribdvNV = (PFNGLGETVERTEXATTRIBDVNVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribdvNV")) == NULL) || r;
+  r = ((glGetVertexAttribfvNV = (PFNGLGETVERTEXATTRIBFVNVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribfvNV")) == NULL) || r;
+  r = ((glGetVertexAttribivNV = (PFNGLGETVERTEXATTRIBIVNVPROC)glewGetProcAddress((const GLubyte*)"glGetVertexAttribivNV")) == NULL) || r;
+  r = ((glIsProgramNV = (PFNGLISPROGRAMNVPROC)glewGetProcAddress((const GLubyte*)"glIsProgramNV")) == NULL) || r;
+  r = ((glLoadProgramNV = (PFNGLLOADPROGRAMNVPROC)glewGetProcAddress((const GLubyte*)"glLoadProgramNV")) == NULL) || r;
+  r = ((glProgramParameter4dNV = (PFNGLPROGRAMPARAMETER4DNVPROC)glewGetProcAddress((const GLubyte*)"glProgramParameter4dNV")) == NULL) || r;
+  r = ((glProgramParameter4dvNV = (PFNGLPROGRAMPARAMETER4DVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramParameter4dvNV")) == NULL) || r;
+  r = ((glProgramParameter4fNV = (PFNGLPROGRAMPARAMETER4FNVPROC)glewGetProcAddress((const GLubyte*)"glProgramParameter4fNV")) == NULL) || r;
+  r = ((glProgramParameter4fvNV = (PFNGLPROGRAMPARAMETER4FVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramParameter4fvNV")) == NULL) || r;
+  r = ((glProgramParameters4dvNV = (PFNGLPROGRAMPARAMETERS4DVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramParameters4dvNV")) == NULL) || r;
+  r = ((glProgramParameters4fvNV = (PFNGLPROGRAMPARAMETERS4FVNVPROC)glewGetProcAddress((const GLubyte*)"glProgramParameters4fvNV")) == NULL) || r;
+  r = ((glRequestResidentProgramsNV = (PFNGLREQUESTRESIDENTPROGRAMSNVPROC)glewGetProcAddress((const GLubyte*)"glRequestResidentProgramsNV")) == NULL) || r;
+  r = ((glTrackMatrixNV = (PFNGLTRACKMATRIXNVPROC)glewGetProcAddress((const GLubyte*)"glTrackMatrixNV")) == NULL) || r;
+  r = ((glVertexAttrib1dNV = (PFNGLVERTEXATTRIB1DNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1dNV")) == NULL) || r;
+  r = ((glVertexAttrib1dvNV = (PFNGLVERTEXATTRIB1DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1dvNV")) == NULL) || r;
+  r = ((glVertexAttrib1fNV = (PFNGLVERTEXATTRIB1FNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1fNV")) == NULL) || r;
+  r = ((glVertexAttrib1fvNV = (PFNGLVERTEXATTRIB1FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1fvNV")) == NULL) || r;
+  r = ((glVertexAttrib1sNV = (PFNGLVERTEXATTRIB1SNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1sNV")) == NULL) || r;
+  r = ((glVertexAttrib1svNV = (PFNGLVERTEXATTRIB1SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib1svNV")) == NULL) || r;
+  r = ((glVertexAttrib2dNV = (PFNGLVERTEXATTRIB2DNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2dNV")) == NULL) || r;
+  r = ((glVertexAttrib2dvNV = (PFNGLVERTEXATTRIB2DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2dvNV")) == NULL) || r;
+  r = ((glVertexAttrib2fNV = (PFNGLVERTEXATTRIB2FNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2fNV")) == NULL) || r;
+  r = ((glVertexAttrib2fvNV = (PFNGLVERTEXATTRIB2FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2fvNV")) == NULL) || r;
+  r = ((glVertexAttrib2sNV = (PFNGLVERTEXATTRIB2SNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2sNV")) == NULL) || r;
+  r = ((glVertexAttrib2svNV = (PFNGLVERTEXATTRIB2SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib2svNV")) == NULL) || r;
+  r = ((glVertexAttrib3dNV = (PFNGLVERTEXATTRIB3DNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3dNV")) == NULL) || r;
+  r = ((glVertexAttrib3dvNV = (PFNGLVERTEXATTRIB3DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3dvNV")) == NULL) || r;
+  r = ((glVertexAttrib3fNV = (PFNGLVERTEXATTRIB3FNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3fNV")) == NULL) || r;
+  r = ((glVertexAttrib3fvNV = (PFNGLVERTEXATTRIB3FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3fvNV")) == NULL) || r;
+  r = ((glVertexAttrib3sNV = (PFNGLVERTEXATTRIB3SNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3sNV")) == NULL) || r;
+  r = ((glVertexAttrib3svNV = (PFNGLVERTEXATTRIB3SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib3svNV")) == NULL) || r;
+  r = ((glVertexAttrib4dNV = (PFNGLVERTEXATTRIB4DNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4dNV")) == NULL) || r;
+  r = ((glVertexAttrib4dvNV = (PFNGLVERTEXATTRIB4DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4dvNV")) == NULL) || r;
+  r = ((glVertexAttrib4fNV = (PFNGLVERTEXATTRIB4FNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4fNV")) == NULL) || r;
+  r = ((glVertexAttrib4fvNV = (PFNGLVERTEXATTRIB4FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4fvNV")) == NULL) || r;
+  r = ((glVertexAttrib4sNV = (PFNGLVERTEXATTRIB4SNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4sNV")) == NULL) || r;
+  r = ((glVertexAttrib4svNV = (PFNGLVERTEXATTRIB4SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4svNV")) == NULL) || r;
+  r = ((glVertexAttrib4ubNV = (PFNGLVERTEXATTRIB4UBNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4ubNV")) == NULL) || r;
+  r = ((glVertexAttrib4ubvNV = (PFNGLVERTEXATTRIB4UBVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttrib4ubvNV")) == NULL) || r;
+  r = ((glVertexAttribPointerNV = (PFNGLVERTEXATTRIBPOINTERNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribPointerNV")) == NULL) || r;
+  r = ((glVertexAttribs1dvNV = (PFNGLVERTEXATTRIBS1DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs1dvNV")) == NULL) || r;
+  r = ((glVertexAttribs1fvNV = (PFNGLVERTEXATTRIBS1FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs1fvNV")) == NULL) || r;
+  r = ((glVertexAttribs1svNV = (PFNGLVERTEXATTRIBS1SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs1svNV")) == NULL) || r;
+  r = ((glVertexAttribs2dvNV = (PFNGLVERTEXATTRIBS2DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs2dvNV")) == NULL) || r;
+  r = ((glVertexAttribs2fvNV = (PFNGLVERTEXATTRIBS2FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs2fvNV")) == NULL) || r;
+  r = ((glVertexAttribs2svNV = (PFNGLVERTEXATTRIBS2SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs2svNV")) == NULL) || r;
+  r = ((glVertexAttribs3dvNV = (PFNGLVERTEXATTRIBS3DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs3dvNV")) == NULL) || r;
+  r = ((glVertexAttribs3fvNV = (PFNGLVERTEXATTRIBS3FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs3fvNV")) == NULL) || r;
+  r = ((glVertexAttribs3svNV = (PFNGLVERTEXATTRIBS3SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs3svNV")) == NULL) || r;
+  r = ((glVertexAttribs4dvNV = (PFNGLVERTEXATTRIBS4DVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs4dvNV")) == NULL) || r;
+  r = ((glVertexAttribs4fvNV = (PFNGLVERTEXATTRIBS4FVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs4fvNV")) == NULL) || r;
+  r = ((glVertexAttribs4svNV = (PFNGLVERTEXATTRIBS4SVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs4svNV")) == NULL) || r;
+  r = ((glVertexAttribs4ubvNV = (PFNGLVERTEXATTRIBS4UBVNVPROC)glewGetProcAddress((const GLubyte*)"glVertexAttribs4ubvNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_NV_vertex_program */
+
+#ifdef GL_NV_vertex_program1_1
+
+#endif /* GL_NV_vertex_program1_1 */
+
+#ifdef GL_NV_vertex_program2
+
+#endif /* GL_NV_vertex_program2 */
+
+#ifdef GL_NV_vertex_program2_option
+
+#endif /* GL_NV_vertex_program2_option */
+
+#ifdef GL_NV_vertex_program3
+
+#endif /* GL_NV_vertex_program3 */
+
+#ifdef GL_NV_vertex_program4
+
+#endif /* GL_NV_vertex_program4 */
+
+#ifdef GL_OES_byte_coordinates
+
+#endif /* GL_OES_byte_coordinates */
+
+#ifdef GL_OES_compressed_paletted_texture
+
+#endif /* GL_OES_compressed_paletted_texture */
+
+#ifdef GL_OES_read_format
+
+#endif /* GL_OES_read_format */
+
+#ifdef GL_OES_single_precision
+
+static GLboolean _glewInit_GL_OES_single_precision (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glClearDepthfOES = (PFNGLCLEARDEPTHFOESPROC)glewGetProcAddress((const GLubyte*)"glClearDepthfOES")) == NULL) || r;
+  r = ((glClipPlanefOES = (PFNGLCLIPPLANEFOESPROC)glewGetProcAddress((const GLubyte*)"glClipPlanefOES")) == NULL) || r;
+  r = ((glDepthRangefOES = (PFNGLDEPTHRANGEFOESPROC)glewGetProcAddress((const GLubyte*)"glDepthRangefOES")) == NULL) || r;
+  r = ((glFrustumfOES = (PFNGLFRUSTUMFOESPROC)glewGetProcAddress((const GLubyte*)"glFrustumfOES")) == NULL) || r;
+  r = ((glGetClipPlanefOES = (PFNGLGETCLIPPLANEFOESPROC)glewGetProcAddress((const GLubyte*)"glGetClipPlanefOES")) == NULL) || r;
+  r = ((glOrthofOES = (PFNGLORTHOFOESPROC)glewGetProcAddress((const GLubyte*)"glOrthofOES")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_OES_single_precision */
+
+#ifdef GL_OML_interlace
+
+#endif /* GL_OML_interlace */
+
+#ifdef GL_OML_resample
+
+#endif /* GL_OML_resample */
+
+#ifdef GL_OML_subsample
+
+#endif /* GL_OML_subsample */
+
+#ifdef GL_PGI_misc_hints
+
+#endif /* GL_PGI_misc_hints */
+
+#ifdef GL_PGI_vertex_hints
+
+#endif /* GL_PGI_vertex_hints */
+
+#ifdef GL_REND_screen_coordinates
+
+#endif /* GL_REND_screen_coordinates */
+
+#ifdef GL_S3_s3tc
+
+#endif /* GL_S3_s3tc */
+
+#ifdef GL_SGIS_color_range
+
+#endif /* GL_SGIS_color_range */
+
+#ifdef GL_SGIS_detail_texture
+
+static GLboolean _glewInit_GL_SGIS_detail_texture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glDetailTexFuncSGIS = (PFNGLDETAILTEXFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glDetailTexFuncSGIS")) == NULL) || r;
+  r = ((glGetDetailTexFuncSGIS = (PFNGLGETDETAILTEXFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glGetDetailTexFuncSGIS")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIS_detail_texture */
+
+#ifdef GL_SGIS_fog_function
+
+static GLboolean _glewInit_GL_SGIS_fog_function (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFogFuncSGIS = (PFNGLFOGFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glFogFuncSGIS")) == NULL) || r;
+  r = ((glGetFogFuncSGIS = (PFNGLGETFOGFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glGetFogFuncSGIS")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIS_fog_function */
+
+#ifdef GL_SGIS_generate_mipmap
+
+#endif /* GL_SGIS_generate_mipmap */
+
+#ifdef GL_SGIS_multisample
+
+static GLboolean _glewInit_GL_SGIS_multisample (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glSampleMaskSGIS = (PFNGLSAMPLEMASKSGISPROC)glewGetProcAddress((const GLubyte*)"glSampleMaskSGIS")) == NULL) || r;
+  r = ((glSamplePatternSGIS = (PFNGLSAMPLEPATTERNSGISPROC)glewGetProcAddress((const GLubyte*)"glSamplePatternSGIS")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIS_multisample */
+
+#ifdef GL_SGIS_pixel_texture
+
+#endif /* GL_SGIS_pixel_texture */
+
+#ifdef GL_SGIS_sharpen_texture
+
+static GLboolean _glewInit_GL_SGIS_sharpen_texture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetSharpenTexFuncSGIS = (PFNGLGETSHARPENTEXFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glGetSharpenTexFuncSGIS")) == NULL) || r;
+  r = ((glSharpenTexFuncSGIS = (PFNGLSHARPENTEXFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glSharpenTexFuncSGIS")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIS_sharpen_texture */
+
+#ifdef GL_SGIS_texture4D
+
+static GLboolean _glewInit_GL_SGIS_texture4D (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTexImage4DSGIS = (PFNGLTEXIMAGE4DSGISPROC)glewGetProcAddress((const GLubyte*)"glTexImage4DSGIS")) == NULL) || r;
+  r = ((glTexSubImage4DSGIS = (PFNGLTEXSUBIMAGE4DSGISPROC)glewGetProcAddress((const GLubyte*)"glTexSubImage4DSGIS")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIS_texture4D */
+
+#ifdef GL_SGIS_texture_border_clamp
+
+#endif /* GL_SGIS_texture_border_clamp */
+
+#ifdef GL_SGIS_texture_edge_clamp
+
+#endif /* GL_SGIS_texture_edge_clamp */
+
+#ifdef GL_SGIS_texture_filter4
+
+static GLboolean _glewInit_GL_SGIS_texture_filter4 (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGetTexFilterFuncSGIS = (PFNGLGETTEXFILTERFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glGetTexFilterFuncSGIS")) == NULL) || r;
+  r = ((glTexFilterFuncSGIS = (PFNGLTEXFILTERFUNCSGISPROC)glewGetProcAddress((const GLubyte*)"glTexFilterFuncSGIS")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIS_texture_filter4 */
+
+#ifdef GL_SGIS_texture_lod
+
+#endif /* GL_SGIS_texture_lod */
+
+#ifdef GL_SGIS_texture_select
+
+#endif /* GL_SGIS_texture_select */
+
+#ifdef GL_SGIX_async
+
+static GLboolean _glewInit_GL_SGIX_async (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glAsyncMarkerSGIX = (PFNGLASYNCMARKERSGIXPROC)glewGetProcAddress((const GLubyte*)"glAsyncMarkerSGIX")) == NULL) || r;
+  r = ((glDeleteAsyncMarkersSGIX = (PFNGLDELETEASYNCMARKERSSGIXPROC)glewGetProcAddress((const GLubyte*)"glDeleteAsyncMarkersSGIX")) == NULL) || r;
+  r = ((glFinishAsyncSGIX = (PFNGLFINISHASYNCSGIXPROC)glewGetProcAddress((const GLubyte*)"glFinishAsyncSGIX")) == NULL) || r;
+  r = ((glGenAsyncMarkersSGIX = (PFNGLGENASYNCMARKERSSGIXPROC)glewGetProcAddress((const GLubyte*)"glGenAsyncMarkersSGIX")) == NULL) || r;
+  r = ((glIsAsyncMarkerSGIX = (PFNGLISASYNCMARKERSGIXPROC)glewGetProcAddress((const GLubyte*)"glIsAsyncMarkerSGIX")) == NULL) || r;
+  r = ((glPollAsyncSGIX = (PFNGLPOLLASYNCSGIXPROC)glewGetProcAddress((const GLubyte*)"glPollAsyncSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_async */
+
+#ifdef GL_SGIX_async_histogram
+
+#endif /* GL_SGIX_async_histogram */
+
+#ifdef GL_SGIX_async_pixel
+
+#endif /* GL_SGIX_async_pixel */
+
+#ifdef GL_SGIX_blend_alpha_minmax
+
+#endif /* GL_SGIX_blend_alpha_minmax */
+
+#ifdef GL_SGIX_clipmap
+
+#endif /* GL_SGIX_clipmap */
+
+#ifdef GL_SGIX_depth_texture
+
+#endif /* GL_SGIX_depth_texture */
+
+#ifdef GL_SGIX_flush_raster
+
+static GLboolean _glewInit_GL_SGIX_flush_raster (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFlushRasterSGIX = (PFNGLFLUSHRASTERSGIXPROC)glewGetProcAddress((const GLubyte*)"glFlushRasterSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_flush_raster */
+
+#ifdef GL_SGIX_fog_offset
+
+#endif /* GL_SGIX_fog_offset */
+
+#ifdef GL_SGIX_fog_texture
+
+static GLboolean _glewInit_GL_SGIX_fog_texture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTextureFogSGIX = (PFNGLTEXTUREFOGSGIXPROC)glewGetProcAddress((const GLubyte*)"glTextureFogSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_fog_texture */
+
+#ifdef GL_SGIX_fragment_specular_lighting
+
+static GLboolean _glewInit_GL_SGIX_fragment_specular_lighting (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFragmentColorMaterialSGIX = (PFNGLFRAGMENTCOLORMATERIALSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentColorMaterialSGIX")) == NULL) || r;
+  r = ((glFragmentLightModelfSGIX = (PFNGLFRAGMENTLIGHTMODELFSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModelfSGIX")) == NULL) || r;
+  r = ((glFragmentLightModelfvSGIX = (PFNGLFRAGMENTLIGHTMODELFVSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModelfvSGIX")) == NULL) || r;
+  r = ((glFragmentLightModeliSGIX = (PFNGLFRAGMENTLIGHTMODELISGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModeliSGIX")) == NULL) || r;
+  r = ((glFragmentLightModelivSGIX = (PFNGLFRAGMENTLIGHTMODELIVSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightModelivSGIX")) == NULL) || r;
+  r = ((glFragmentLightfSGIX = (PFNGLFRAGMENTLIGHTFSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightfSGIX")) == NULL) || r;
+  r = ((glFragmentLightfvSGIX = (PFNGLFRAGMENTLIGHTFVSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightfvSGIX")) == NULL) || r;
+  r = ((glFragmentLightiSGIX = (PFNGLFRAGMENTLIGHTISGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightiSGIX")) == NULL) || r;
+  r = ((glFragmentLightivSGIX = (PFNGLFRAGMENTLIGHTIVSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentLightivSGIX")) == NULL) || r;
+  r = ((glFragmentMaterialfSGIX = (PFNGLFRAGMENTMATERIALFSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialfSGIX")) == NULL) || r;
+  r = ((glFragmentMaterialfvSGIX = (PFNGLFRAGMENTMATERIALFVSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialfvSGIX")) == NULL) || r;
+  r = ((glFragmentMaterialiSGIX = (PFNGLFRAGMENTMATERIALISGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialiSGIX")) == NULL) || r;
+  r = ((glFragmentMaterialivSGIX = (PFNGLFRAGMENTMATERIALIVSGIXPROC)glewGetProcAddress((const GLubyte*)"glFragmentMaterialivSGIX")) == NULL) || r;
+  r = ((glGetFragmentLightfvSGIX = (PFNGLGETFRAGMENTLIGHTFVSGIXPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentLightfvSGIX")) == NULL) || r;
+  r = ((glGetFragmentLightivSGIX = (PFNGLGETFRAGMENTLIGHTIVSGIXPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentLightivSGIX")) == NULL) || r;
+  r = ((glGetFragmentMaterialfvSGIX = (PFNGLGETFRAGMENTMATERIALFVSGIXPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentMaterialfvSGIX")) == NULL) || r;
+  r = ((glGetFragmentMaterialivSGIX = (PFNGLGETFRAGMENTMATERIALIVSGIXPROC)glewGetProcAddress((const GLubyte*)"glGetFragmentMaterialivSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_fragment_specular_lighting */
+
+#ifdef GL_SGIX_framezoom
+
+static GLboolean _glewInit_GL_SGIX_framezoom (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFrameZoomSGIX = (PFNGLFRAMEZOOMSGIXPROC)glewGetProcAddress((const GLubyte*)"glFrameZoomSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_framezoom */
+
+#ifdef GL_SGIX_interlace
+
+#endif /* GL_SGIX_interlace */
+
+#ifdef GL_SGIX_ir_instrument1
+
+#endif /* GL_SGIX_ir_instrument1 */
+
+#ifdef GL_SGIX_list_priority
+
+#endif /* GL_SGIX_list_priority */
+
+#ifdef GL_SGIX_pixel_texture
+
+static GLboolean _glewInit_GL_SGIX_pixel_texture (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glPixelTexGenSGIX = (PFNGLPIXELTEXGENSGIXPROC)glewGetProcAddress((const GLubyte*)"glPixelTexGenSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_pixel_texture */
+
+#ifdef GL_SGIX_pixel_texture_bits
+
+#endif /* GL_SGIX_pixel_texture_bits */
+
+#ifdef GL_SGIX_reference_plane
+
+static GLboolean _glewInit_GL_SGIX_reference_plane (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glReferencePlaneSGIX = (PFNGLREFERENCEPLANESGIXPROC)glewGetProcAddress((const GLubyte*)"glReferencePlaneSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_reference_plane */
+
+#ifdef GL_SGIX_resample
+
+#endif /* GL_SGIX_resample */
+
+#ifdef GL_SGIX_shadow
+
+#endif /* GL_SGIX_shadow */
+
+#ifdef GL_SGIX_shadow_ambient
+
+#endif /* GL_SGIX_shadow_ambient */
+
+#ifdef GL_SGIX_sprite
+
+static GLboolean _glewInit_GL_SGIX_sprite (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glSpriteParameterfSGIX = (PFNGLSPRITEPARAMETERFSGIXPROC)glewGetProcAddress((const GLubyte*)"glSpriteParameterfSGIX")) == NULL) || r;
+  r = ((glSpriteParameterfvSGIX = (PFNGLSPRITEPARAMETERFVSGIXPROC)glewGetProcAddress((const GLubyte*)"glSpriteParameterfvSGIX")) == NULL) || r;
+  r = ((glSpriteParameteriSGIX = (PFNGLSPRITEPARAMETERISGIXPROC)glewGetProcAddress((const GLubyte*)"glSpriteParameteriSGIX")) == NULL) || r;
+  r = ((glSpriteParameterivSGIX = (PFNGLSPRITEPARAMETERIVSGIXPROC)glewGetProcAddress((const GLubyte*)"glSpriteParameterivSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_sprite */
+
+#ifdef GL_SGIX_tag_sample_buffer
+
+static GLboolean _glewInit_GL_SGIX_tag_sample_buffer (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glTagSampleBufferSGIX = (PFNGLTAGSAMPLEBUFFERSGIXPROC)glewGetProcAddress((const GLubyte*)"glTagSampleBufferSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGIX_tag_sample_buffer */
+
+#ifdef GL_SGIX_texture_add_env
+
+#endif /* GL_SGIX_texture_add_env */
+
+#ifdef GL_SGIX_texture_coordinate_clamp
+
+#endif /* GL_SGIX_texture_coordinate_clamp */
+
+#ifdef GL_SGIX_texture_lod_bias
+
+#endif /* GL_SGIX_texture_lod_bias */
+
+#ifdef GL_SGIX_texture_multi_buffer
+
+#endif /* GL_SGIX_texture_multi_buffer */
+
+#ifdef GL_SGIX_texture_range
+
+#endif /* GL_SGIX_texture_range */
+
+#ifdef GL_SGIX_texture_scale_bias
+
+#endif /* GL_SGIX_texture_scale_bias */
+
+#ifdef GL_SGIX_vertex_preclip
+
+#endif /* GL_SGIX_vertex_preclip */
+
+#ifdef GL_SGIX_vertex_preclip_hint
+
+#endif /* GL_SGIX_vertex_preclip_hint */
+
+#ifdef GL_SGIX_ycrcb
+
+#endif /* GL_SGIX_ycrcb */
+
+#ifdef GL_SGI_color_matrix
+
+#endif /* GL_SGI_color_matrix */
+
+#ifdef GL_SGI_color_table
+
+static GLboolean _glewInit_GL_SGI_color_table (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColorTableParameterfvSGI = (PFNGLCOLORTABLEPARAMETERFVSGIPROC)glewGetProcAddress((const GLubyte*)"glColorTableParameterfvSGI")) == NULL) || r;
+  r = ((glColorTableParameterivSGI = (PFNGLCOLORTABLEPARAMETERIVSGIPROC)glewGetProcAddress((const GLubyte*)"glColorTableParameterivSGI")) == NULL) || r;
+  r = ((glColorTableSGI = (PFNGLCOLORTABLESGIPROC)glewGetProcAddress((const GLubyte*)"glColorTableSGI")) == NULL) || r;
+  r = ((glCopyColorTableSGI = (PFNGLCOPYCOLORTABLESGIPROC)glewGetProcAddress((const GLubyte*)"glCopyColorTableSGI")) == NULL) || r;
+  r = ((glGetColorTableParameterfvSGI = (PFNGLGETCOLORTABLEPARAMETERFVSGIPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableParameterfvSGI")) == NULL) || r;
+  r = ((glGetColorTableParameterivSGI = (PFNGLGETCOLORTABLEPARAMETERIVSGIPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableParameterivSGI")) == NULL) || r;
+  r = ((glGetColorTableSGI = (PFNGLGETCOLORTABLESGIPROC)glewGetProcAddress((const GLubyte*)"glGetColorTableSGI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SGI_color_table */
+
+#ifdef GL_SGI_texture_color_table
+
+#endif /* GL_SGI_texture_color_table */
+
+#ifdef GL_SUNX_constant_data
+
+static GLboolean _glewInit_GL_SUNX_constant_data (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glFinishTextureSUNX = (PFNGLFINISHTEXTURESUNXPROC)glewGetProcAddress((const GLubyte*)"glFinishTextureSUNX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SUNX_constant_data */
+
+#ifdef GL_SUN_convolution_border_modes
+
+#endif /* GL_SUN_convolution_border_modes */
+
+#ifdef GL_SUN_global_alpha
+
+static GLboolean _glewInit_GL_SUN_global_alpha (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glGlobalAlphaFactorbSUN = (PFNGLGLOBALALPHAFACTORBSUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactorbSUN")) == NULL) || r;
+  r = ((glGlobalAlphaFactordSUN = (PFNGLGLOBALALPHAFACTORDSUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactordSUN")) == NULL) || r;
+  r = ((glGlobalAlphaFactorfSUN = (PFNGLGLOBALALPHAFACTORFSUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactorfSUN")) == NULL) || r;
+  r = ((glGlobalAlphaFactoriSUN = (PFNGLGLOBALALPHAFACTORISUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactoriSUN")) == NULL) || r;
+  r = ((glGlobalAlphaFactorsSUN = (PFNGLGLOBALALPHAFACTORSSUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactorsSUN")) == NULL) || r;
+  r = ((glGlobalAlphaFactorubSUN = (PFNGLGLOBALALPHAFACTORUBSUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactorubSUN")) == NULL) || r;
+  r = ((glGlobalAlphaFactoruiSUN = (PFNGLGLOBALALPHAFACTORUISUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactoruiSUN")) == NULL) || r;
+  r = ((glGlobalAlphaFactorusSUN = (PFNGLGLOBALALPHAFACTORUSSUNPROC)glewGetProcAddress((const GLubyte*)"glGlobalAlphaFactorusSUN")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SUN_global_alpha */
+
+#ifdef GL_SUN_mesh_array
+
+#endif /* GL_SUN_mesh_array */
+
+#ifdef GL_SUN_read_video_pixels
+
+static GLboolean _glewInit_GL_SUN_read_video_pixels (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glReadVideoPixelsSUN = (PFNGLREADVIDEOPIXELSSUNPROC)glewGetProcAddress((const GLubyte*)"glReadVideoPixelsSUN")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SUN_read_video_pixels */
+
+#ifdef GL_SUN_slice_accum
+
+#endif /* GL_SUN_slice_accum */
+
+#ifdef GL_SUN_triangle_list
+
+static GLboolean _glewInit_GL_SUN_triangle_list (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glReplacementCodePointerSUN = (PFNGLREPLACEMENTCODEPOINTERSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodePointerSUN")) == NULL) || r;
+  r = ((glReplacementCodeubSUN = (PFNGLREPLACEMENTCODEUBSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeubSUN")) == NULL) || r;
+  r = ((glReplacementCodeubvSUN = (PFNGLREPLACEMENTCODEUBVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeubvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiSUN = (PFNGLREPLACEMENTCODEUISUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiSUN")) == NULL) || r;
+  r = ((glReplacementCodeuivSUN = (PFNGLREPLACEMENTCODEUIVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuivSUN")) == NULL) || r;
+  r = ((glReplacementCodeusSUN = (PFNGLREPLACEMENTCODEUSSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeusSUN")) == NULL) || r;
+  r = ((glReplacementCodeusvSUN = (PFNGLREPLACEMENTCODEUSVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeusvSUN")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SUN_triangle_list */
+
+#ifdef GL_SUN_vertex
+
+static GLboolean _glewInit_GL_SUN_vertex (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glColor3fVertex3fSUN = (PFNGLCOLOR3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glColor3fVertex3fSUN")) == NULL) || r;
+  r = ((glColor3fVertex3fvSUN = (PFNGLCOLOR3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glColor3fVertex3fvSUN")) == NULL) || r;
+  r = ((glColor4fNormal3fVertex3fSUN = (PFNGLCOLOR4FNORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glColor4fNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glColor4fNormal3fVertex3fvSUN = (PFNGLCOLOR4FNORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glColor4fNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glColor4ubVertex2fSUN = (PFNGLCOLOR4UBVERTEX2FSUNPROC)glewGetProcAddress((const GLubyte*)"glColor4ubVertex2fSUN")) == NULL) || r;
+  r = ((glColor4ubVertex2fvSUN = (PFNGLCOLOR4UBVERTEX2FVSUNPROC)glewGetProcAddress((const GLubyte*)"glColor4ubVertex2fvSUN")) == NULL) || r;
+  r = ((glColor4ubVertex3fSUN = (PFNGLCOLOR4UBVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glColor4ubVertex3fSUN")) == NULL) || r;
+  r = ((glColor4ubVertex3fvSUN = (PFNGLCOLOR4UBVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glColor4ubVertex3fvSUN")) == NULL) || r;
+  r = ((glNormal3fVertex3fSUN = (PFNGLNORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glNormal3fVertex3fvSUN = (PFNGLNORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiColor3fVertex3fSUN = (PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiColor3fVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiColor3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUICOLOR3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiColor3fVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiColor4fNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiColor4fNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiColor4fNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUICOLOR4FNORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiColor4fNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiColor4ubVertex3fSUN = (PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiColor4ubVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiColor4ubVertex3fvSUN = (PFNGLREPLACEMENTCODEUICOLOR4UBVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiColor4ubVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUINORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FNORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiTexCoord2fVertex3fSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiTexCoord2fVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiTexCoord2fVertex3fvSUN = (PFNGLREPLACEMENTCODEUITEXCOORD2FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiTexCoord2fVertex3fvSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiVertex3fSUN = (PFNGLREPLACEMENTCODEUIVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiVertex3fSUN")) == NULL) || r;
+  r = ((glReplacementCodeuiVertex3fvSUN = (PFNGLREPLACEMENTCODEUIVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glReplacementCodeuiVertex3fvSUN")) == NULL) || r;
+  r = ((glTexCoord2fColor3fVertex3fSUN = (PFNGLTEXCOORD2FCOLOR3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fColor3fVertex3fSUN")) == NULL) || r;
+  r = ((glTexCoord2fColor3fVertex3fvSUN = (PFNGLTEXCOORD2FCOLOR3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fColor3fVertex3fvSUN")) == NULL) || r;
+  r = ((glTexCoord2fColor4fNormal3fVertex3fSUN = (PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fColor4fNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glTexCoord2fColor4fNormal3fVertex3fvSUN = (PFNGLTEXCOORD2FCOLOR4FNORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fColor4fNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glTexCoord2fColor4ubVertex3fSUN = (PFNGLTEXCOORD2FCOLOR4UBVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fColor4ubVertex3fSUN")) == NULL) || r;
+  r = ((glTexCoord2fColor4ubVertex3fvSUN = (PFNGLTEXCOORD2FCOLOR4UBVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fColor4ubVertex3fvSUN")) == NULL) || r;
+  r = ((glTexCoord2fNormal3fVertex3fSUN = (PFNGLTEXCOORD2FNORMAL3FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fNormal3fVertex3fSUN")) == NULL) || r;
+  r = ((glTexCoord2fNormal3fVertex3fvSUN = (PFNGLTEXCOORD2FNORMAL3FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fNormal3fVertex3fvSUN")) == NULL) || r;
+  r = ((glTexCoord2fVertex3fSUN = (PFNGLTEXCOORD2FVERTEX3FSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fVertex3fSUN")) == NULL) || r;
+  r = ((glTexCoord2fVertex3fvSUN = (PFNGLTEXCOORD2FVERTEX3FVSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord2fVertex3fvSUN")) == NULL) || r;
+  r = ((glTexCoord4fColor4fNormal3fVertex4fSUN = (PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord4fColor4fNormal3fVertex4fSUN")) == NULL) || r;
+  r = ((glTexCoord4fColor4fNormal3fVertex4fvSUN = (PFNGLTEXCOORD4FCOLOR4FNORMAL3FVERTEX4FVSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord4fColor4fNormal3fVertex4fvSUN")) == NULL) || r;
+  r = ((glTexCoord4fVertex4fSUN = (PFNGLTEXCOORD4FVERTEX4FSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord4fVertex4fSUN")) == NULL) || r;
+  r = ((glTexCoord4fVertex4fvSUN = (PFNGLTEXCOORD4FVERTEX4FVSUNPROC)glewGetProcAddress((const GLubyte*)"glTexCoord4fVertex4fvSUN")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_SUN_vertex */
+
+#ifdef GL_WIN_phong_shading
+
+#endif /* GL_WIN_phong_shading */
+
+#ifdef GL_WIN_specular_fog
+
+#endif /* GL_WIN_specular_fog */
+
+#ifdef GL_WIN_swap_hint
+
+static GLboolean _glewInit_GL_WIN_swap_hint (GLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glAddSwapHintRectWIN = (PFNGLADDSWAPHINTRECTWINPROC)glewGetProcAddress((const GLubyte*)"glAddSwapHintRectWIN")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GL_WIN_swap_hint */
+
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Search for name in the extensions string. Use of strstr()
+ * is not sufficient because extension names can be prefixes of
+ * other extension names. Could use strtok() but the constant
+ * string returned by glGetString might be in read-only memory.
+ */
+GLboolean glewGetExtension (const char* name)
+{
+  GLubyte* p;
+  GLubyte* end;
+  GLuint len = _glewStrLen((const GLubyte*)name);
+  p = (GLubyte*)glGetString(GL_EXTENSIONS);
+  if (0 == p) return GL_FALSE;
+  end = p + _glewStrLen(p);
+  while (p < end)
+  {
+    GLuint n = _glewStrCLen(p, ' ');
+    if (len == n && _glewStrSame((const GLubyte*)name, p, n)) return GL_TRUE;
+    p += n+1;
+  }
+  return GL_FALSE;
+}
+
+/* ------------------------------------------------------------------------- */
+
+#ifndef GLEW_MX
+static
+#endif
+GLenum glewContextInit (GLEW_CONTEXT_ARG_DEF_LIST)
+{
+  const GLubyte* s;
+  GLuint dot, major, minor;
+  /* query opengl version */
+  s = glGetString(GL_VERSION);
+  dot = _glewStrCLen(s, '.');
+  major = dot-1;
+  minor = dot+1;
+  if (dot == 0 || s[minor] == '\0')
+    return GLEW_ERROR_NO_GL_VERSION;
+  if (s[major] == '1' && s[minor] == '0')
+  {
+    return GLEW_ERROR_GL_VERSION_10_ONLY;
+  }
+  else
+  {
+    CONST_CAST(GLEW_VERSION_1_1) = GL_TRUE;
+    if (s[major] >= '2')
+    {
+      CONST_CAST(GLEW_VERSION_1_2) = GL_TRUE;
+      CONST_CAST(GLEW_VERSION_1_3) = GL_TRUE;
+      CONST_CAST(GLEW_VERSION_1_4) = GL_TRUE;
+      CONST_CAST(GLEW_VERSION_1_5) = GL_TRUE;
+      CONST_CAST(GLEW_VERSION_2_0) = GL_TRUE;
+      if (s[minor] >= '1')
+      {
+        CONST_CAST(GLEW_VERSION_2_1) = GL_TRUE;
+      }
+    }
+    else
+    {
+      if (s[minor] >= '5')
+      {
+        CONST_CAST(GLEW_VERSION_1_2) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_3) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_4) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_5) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_2_0) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_1) = GL_FALSE;
+      }
+      if (s[minor] == '4')
+      {
+        CONST_CAST(GLEW_VERSION_1_2) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_3) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_4) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_5) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_0) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_1) = GL_FALSE;
+      }
+      if (s[minor] == '3')
+      {
+        CONST_CAST(GLEW_VERSION_1_2) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_3) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_4) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_1_5) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_0) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_1) = GL_FALSE;
+      }
+      if (s[minor] == '2')
+      {
+        CONST_CAST(GLEW_VERSION_1_2) = GL_TRUE;
+        CONST_CAST(GLEW_VERSION_1_3) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_1_4) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_1_5) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_0) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_1) = GL_FALSE;
+      }
+      if (s[minor] < '2')
+      {
+        CONST_CAST(GLEW_VERSION_1_2) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_1_3) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_1_4) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_1_5) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_0) = GL_FALSE;
+        CONST_CAST(GLEW_VERSION_2_1) = GL_FALSE;
+      }
+    }
+  }
+  /* initialize extensions */
+#ifdef GL_VERSION_1_2
+  if (glewExperimental || GLEW_VERSION_1_2) CONST_CAST(GLEW_VERSION_1_2) = !_glewInit_GL_VERSION_1_2(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_VERSION_1_2 */
+#ifdef GL_VERSION_1_3
+  if (glewExperimental || GLEW_VERSION_1_3) CONST_CAST(GLEW_VERSION_1_3) = !_glewInit_GL_VERSION_1_3(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_VERSION_1_3 */
+#ifdef GL_VERSION_1_4
+  if (glewExperimental || GLEW_VERSION_1_4) CONST_CAST(GLEW_VERSION_1_4) = !_glewInit_GL_VERSION_1_4(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_VERSION_1_4 */
+#ifdef GL_VERSION_1_5
+  if (glewExperimental || GLEW_VERSION_1_5) CONST_CAST(GLEW_VERSION_1_5) = !_glewInit_GL_VERSION_1_5(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_VERSION_1_5 */
+#ifdef GL_VERSION_2_0
+  if (glewExperimental || GLEW_VERSION_2_0) CONST_CAST(GLEW_VERSION_2_0) = !_glewInit_GL_VERSION_2_0(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_VERSION_2_0 */
+#ifdef GL_VERSION_2_1
+  if (glewExperimental || GLEW_VERSION_2_1) CONST_CAST(GLEW_VERSION_2_1) = !_glewInit_GL_VERSION_2_1(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_VERSION_2_1 */
+#ifdef GL_3DFX_multisample
+  CONST_CAST(GLEW_3DFX_multisample) = glewGetExtension("GL_3DFX_multisample");
+#endif /* GL_3DFX_multisample */
+#ifdef GL_3DFX_tbuffer
+  CONST_CAST(GLEW_3DFX_tbuffer) = glewGetExtension("GL_3DFX_tbuffer");
+  if (glewExperimental || GLEW_3DFX_tbuffer) CONST_CAST(GLEW_3DFX_tbuffer) = !_glewInit_GL_3DFX_tbuffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_3DFX_tbuffer */
+#ifdef GL_3DFX_texture_compression_FXT1
+  CONST_CAST(GLEW_3DFX_texture_compression_FXT1) = glewGetExtension("GL_3DFX_texture_compression_FXT1");
+#endif /* GL_3DFX_texture_compression_FXT1 */
+#ifdef GL_APPLE_client_storage
+  CONST_CAST(GLEW_APPLE_client_storage) = glewGetExtension("GL_APPLE_client_storage");
+#endif /* GL_APPLE_client_storage */
+#ifdef GL_APPLE_element_array
+  CONST_CAST(GLEW_APPLE_element_array) = glewGetExtension("GL_APPLE_element_array");
+  if (glewExperimental || GLEW_APPLE_element_array) CONST_CAST(GLEW_APPLE_element_array) = !_glewInit_GL_APPLE_element_array(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_APPLE_element_array */
+#ifdef GL_APPLE_fence
+  CONST_CAST(GLEW_APPLE_fence) = glewGetExtension("GL_APPLE_fence");
+  if (glewExperimental || GLEW_APPLE_fence) CONST_CAST(GLEW_APPLE_fence) = !_glewInit_GL_APPLE_fence(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_APPLE_fence */
+#ifdef GL_APPLE_float_pixels
+  CONST_CAST(GLEW_APPLE_float_pixels) = glewGetExtension("GL_APPLE_float_pixels");
+#endif /* GL_APPLE_float_pixels */
+#ifdef GL_APPLE_flush_buffer_range
+  CONST_CAST(GLEW_APPLE_flush_buffer_range) = glewGetExtension("GL_APPLE_flush_buffer_range");
+  if (glewExperimental || GLEW_APPLE_flush_buffer_range) CONST_CAST(GLEW_APPLE_flush_buffer_range) = !_glewInit_GL_APPLE_flush_buffer_range(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_APPLE_flush_buffer_range */
+#ifdef GL_APPLE_pixel_buffer
+  CONST_CAST(GLEW_APPLE_pixel_buffer) = glewGetExtension("GL_APPLE_pixel_buffer");
+#endif /* GL_APPLE_pixel_buffer */
+#ifdef GL_APPLE_specular_vector
+  CONST_CAST(GLEW_APPLE_specular_vector) = glewGetExtension("GL_APPLE_specular_vector");
+#endif /* GL_APPLE_specular_vector */
+#ifdef GL_APPLE_texture_range
+  CONST_CAST(GLEW_APPLE_texture_range) = glewGetExtension("GL_APPLE_texture_range");
+  if (glewExperimental || GLEW_APPLE_texture_range) CONST_CAST(GLEW_APPLE_texture_range) = !_glewInit_GL_APPLE_texture_range(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_APPLE_texture_range */
+#ifdef GL_APPLE_transform_hint
+  CONST_CAST(GLEW_APPLE_transform_hint) = glewGetExtension("GL_APPLE_transform_hint");
+#endif /* GL_APPLE_transform_hint */
+#ifdef GL_APPLE_vertex_array_object
+  CONST_CAST(GLEW_APPLE_vertex_array_object) = glewGetExtension("GL_APPLE_vertex_array_object");
+  if (glewExperimental || GLEW_APPLE_vertex_array_object) CONST_CAST(GLEW_APPLE_vertex_array_object) = !_glewInit_GL_APPLE_vertex_array_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_APPLE_vertex_array_object */
+#ifdef GL_APPLE_vertex_array_range
+  CONST_CAST(GLEW_APPLE_vertex_array_range) = glewGetExtension("GL_APPLE_vertex_array_range");
+  if (glewExperimental || GLEW_APPLE_vertex_array_range) CONST_CAST(GLEW_APPLE_vertex_array_range) = !_glewInit_GL_APPLE_vertex_array_range(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_APPLE_vertex_array_range */
+#ifdef GL_APPLE_ycbcr_422
+  CONST_CAST(GLEW_APPLE_ycbcr_422) = glewGetExtension("GL_APPLE_ycbcr_422");
+#endif /* GL_APPLE_ycbcr_422 */
+#ifdef GL_ARB_color_buffer_float
+  CONST_CAST(GLEW_ARB_color_buffer_float) = glewGetExtension("GL_ARB_color_buffer_float");
+  if (glewExperimental || GLEW_ARB_color_buffer_float) CONST_CAST(GLEW_ARB_color_buffer_float) = !_glewInit_GL_ARB_color_buffer_float(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_color_buffer_float */
+#ifdef GL_ARB_depth_texture
+  CONST_CAST(GLEW_ARB_depth_texture) = glewGetExtension("GL_ARB_depth_texture");
+#endif /* GL_ARB_depth_texture */
+#ifdef GL_ARB_draw_buffers
+  CONST_CAST(GLEW_ARB_draw_buffers) = glewGetExtension("GL_ARB_draw_buffers");
+  if (glewExperimental || GLEW_ARB_draw_buffers) CONST_CAST(GLEW_ARB_draw_buffers) = !_glewInit_GL_ARB_draw_buffers(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_draw_buffers */
+#ifdef GL_ARB_fragment_program
+  CONST_CAST(GLEW_ARB_fragment_program) = glewGetExtension("GL_ARB_fragment_program");
+#endif /* GL_ARB_fragment_program */
+#ifdef GL_ARB_fragment_program_shadow
+  CONST_CAST(GLEW_ARB_fragment_program_shadow) = glewGetExtension("GL_ARB_fragment_program_shadow");
+#endif /* GL_ARB_fragment_program_shadow */
+#ifdef GL_ARB_fragment_shader
+  CONST_CAST(GLEW_ARB_fragment_shader) = glewGetExtension("GL_ARB_fragment_shader");
+#endif /* GL_ARB_fragment_shader */
+#ifdef GL_ARB_half_float_pixel
+  CONST_CAST(GLEW_ARB_half_float_pixel) = glewGetExtension("GL_ARB_half_float_pixel");
+#endif /* GL_ARB_half_float_pixel */
+#ifdef GL_ARB_imaging
+  CONST_CAST(GLEW_ARB_imaging) = glewGetExtension("GL_ARB_imaging");
+  if (glewExperimental || GLEW_ARB_imaging) CONST_CAST(GLEW_ARB_imaging) = !_glewInit_GL_ARB_imaging(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_imaging */
+#ifdef GL_ARB_matrix_palette
+  CONST_CAST(GLEW_ARB_matrix_palette) = glewGetExtension("GL_ARB_matrix_palette");
+  if (glewExperimental || GLEW_ARB_matrix_palette) CONST_CAST(GLEW_ARB_matrix_palette) = !_glewInit_GL_ARB_matrix_palette(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_matrix_palette */
+#ifdef GL_ARB_multisample
+  CONST_CAST(GLEW_ARB_multisample) = glewGetExtension("GL_ARB_multisample");
+  if (glewExperimental || GLEW_ARB_multisample) CONST_CAST(GLEW_ARB_multisample) = !_glewInit_GL_ARB_multisample(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_multisample */
+#ifdef GL_ARB_multitexture
+  CONST_CAST(GLEW_ARB_multitexture) = glewGetExtension("GL_ARB_multitexture");
+  if (glewExperimental || GLEW_ARB_multitexture) CONST_CAST(GLEW_ARB_multitexture) = !_glewInit_GL_ARB_multitexture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_multitexture */
+#ifdef GL_ARB_occlusion_query
+  CONST_CAST(GLEW_ARB_occlusion_query) = glewGetExtension("GL_ARB_occlusion_query");
+  if (glewExperimental || GLEW_ARB_occlusion_query) CONST_CAST(GLEW_ARB_occlusion_query) = !_glewInit_GL_ARB_occlusion_query(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_occlusion_query */
+#ifdef GL_ARB_pixel_buffer_object
+  CONST_CAST(GLEW_ARB_pixel_buffer_object) = glewGetExtension("GL_ARB_pixel_buffer_object");
+#endif /* GL_ARB_pixel_buffer_object */
+#ifdef GL_ARB_point_parameters
+  CONST_CAST(GLEW_ARB_point_parameters) = glewGetExtension("GL_ARB_point_parameters");
+  if (glewExperimental || GLEW_ARB_point_parameters) CONST_CAST(GLEW_ARB_point_parameters) = !_glewInit_GL_ARB_point_parameters(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_point_parameters */
+#ifdef GL_ARB_point_sprite
+  CONST_CAST(GLEW_ARB_point_sprite) = glewGetExtension("GL_ARB_point_sprite");
+#endif /* GL_ARB_point_sprite */
+#ifdef GL_ARB_shader_objects
+  CONST_CAST(GLEW_ARB_shader_objects) = glewGetExtension("GL_ARB_shader_objects");
+  if (glewExperimental || GLEW_ARB_shader_objects) CONST_CAST(GLEW_ARB_shader_objects) = !_glewInit_GL_ARB_shader_objects(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_shader_objects */
+#ifdef GL_ARB_shading_language_100
+  CONST_CAST(GLEW_ARB_shading_language_100) = glewGetExtension("GL_ARB_shading_language_100");
+#endif /* GL_ARB_shading_language_100 */
+#ifdef GL_ARB_shadow
+  CONST_CAST(GLEW_ARB_shadow) = glewGetExtension("GL_ARB_shadow");
+#endif /* GL_ARB_shadow */
+#ifdef GL_ARB_shadow_ambient
+  CONST_CAST(GLEW_ARB_shadow_ambient) = glewGetExtension("GL_ARB_shadow_ambient");
+#endif /* GL_ARB_shadow_ambient */
+#ifdef GL_ARB_texture_border_clamp
+  CONST_CAST(GLEW_ARB_texture_border_clamp) = glewGetExtension("GL_ARB_texture_border_clamp");
+#endif /* GL_ARB_texture_border_clamp */
+#ifdef GL_ARB_texture_compression
+  CONST_CAST(GLEW_ARB_texture_compression) = glewGetExtension("GL_ARB_texture_compression");
+  if (glewExperimental || GLEW_ARB_texture_compression) CONST_CAST(GLEW_ARB_texture_compression) = !_glewInit_GL_ARB_texture_compression(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_texture_compression */
+#ifdef GL_ARB_texture_cube_map
+  CONST_CAST(GLEW_ARB_texture_cube_map) = glewGetExtension("GL_ARB_texture_cube_map");
+#endif /* GL_ARB_texture_cube_map */
+#ifdef GL_ARB_texture_env_add
+  CONST_CAST(GLEW_ARB_texture_env_add) = glewGetExtension("GL_ARB_texture_env_add");
+#endif /* GL_ARB_texture_env_add */
+#ifdef GL_ARB_texture_env_combine
+  CONST_CAST(GLEW_ARB_texture_env_combine) = glewGetExtension("GL_ARB_texture_env_combine");
+#endif /* GL_ARB_texture_env_combine */
+#ifdef GL_ARB_texture_env_crossbar
+  CONST_CAST(GLEW_ARB_texture_env_crossbar) = glewGetExtension("GL_ARB_texture_env_crossbar");
+#endif /* GL_ARB_texture_env_crossbar */
+#ifdef GL_ARB_texture_env_dot3
+  CONST_CAST(GLEW_ARB_texture_env_dot3) = glewGetExtension("GL_ARB_texture_env_dot3");
+#endif /* GL_ARB_texture_env_dot3 */
+#ifdef GL_ARB_texture_float
+  CONST_CAST(GLEW_ARB_texture_float) = glewGetExtension("GL_ARB_texture_float");
+#endif /* GL_ARB_texture_float */
+#ifdef GL_ARB_texture_mirrored_repeat
+  CONST_CAST(GLEW_ARB_texture_mirrored_repeat) = glewGetExtension("GL_ARB_texture_mirrored_repeat");
+#endif /* GL_ARB_texture_mirrored_repeat */
+#ifdef GL_ARB_texture_non_power_of_two
+  CONST_CAST(GLEW_ARB_texture_non_power_of_two) = glewGetExtension("GL_ARB_texture_non_power_of_two");
+#endif /* GL_ARB_texture_non_power_of_two */
+#ifdef GL_ARB_texture_rectangle
+  CONST_CAST(GLEW_ARB_texture_rectangle) = glewGetExtension("GL_ARB_texture_rectangle");
+#endif /* GL_ARB_texture_rectangle */
+#ifdef GL_ARB_transpose_matrix
+  CONST_CAST(GLEW_ARB_transpose_matrix) = glewGetExtension("GL_ARB_transpose_matrix");
+  if (glewExperimental || GLEW_ARB_transpose_matrix) CONST_CAST(GLEW_ARB_transpose_matrix) = !_glewInit_GL_ARB_transpose_matrix(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_transpose_matrix */
+#ifdef GL_ARB_vertex_blend
+  CONST_CAST(GLEW_ARB_vertex_blend) = glewGetExtension("GL_ARB_vertex_blend");
+  if (glewExperimental || GLEW_ARB_vertex_blend) CONST_CAST(GLEW_ARB_vertex_blend) = !_glewInit_GL_ARB_vertex_blend(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_vertex_blend */
+#ifdef GL_ARB_vertex_buffer_object
+  CONST_CAST(GLEW_ARB_vertex_buffer_object) = glewGetExtension("GL_ARB_vertex_buffer_object");
+  if (glewExperimental || GLEW_ARB_vertex_buffer_object) CONST_CAST(GLEW_ARB_vertex_buffer_object) = !_glewInit_GL_ARB_vertex_buffer_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_vertex_buffer_object */
+#ifdef GL_ARB_vertex_program
+  CONST_CAST(GLEW_ARB_vertex_program) = glewGetExtension("GL_ARB_vertex_program");
+  if (glewExperimental || GLEW_ARB_vertex_program) CONST_CAST(GLEW_ARB_vertex_program) = !_glewInit_GL_ARB_vertex_program(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_vertex_program */
+#ifdef GL_ARB_vertex_shader
+  CONST_CAST(GLEW_ARB_vertex_shader) = glewGetExtension("GL_ARB_vertex_shader");
+  if (glewExperimental || GLEW_ARB_vertex_shader) CONST_CAST(GLEW_ARB_vertex_shader) = !_glewInit_GL_ARB_vertex_shader(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_vertex_shader */
+#ifdef GL_ARB_window_pos
+  CONST_CAST(GLEW_ARB_window_pos) = glewGetExtension("GL_ARB_window_pos");
+  if (glewExperimental || GLEW_ARB_window_pos) CONST_CAST(GLEW_ARB_window_pos) = !_glewInit_GL_ARB_window_pos(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ARB_window_pos */
+#ifdef GL_ATIX_point_sprites
+  CONST_CAST(GLEW_ATIX_point_sprites) = glewGetExtension("GL_ATIX_point_sprites");
+#endif /* GL_ATIX_point_sprites */
+#ifdef GL_ATIX_texture_env_combine3
+  CONST_CAST(GLEW_ATIX_texture_env_combine3) = glewGetExtension("GL_ATIX_texture_env_combine3");
+#endif /* GL_ATIX_texture_env_combine3 */
+#ifdef GL_ATIX_texture_env_route
+  CONST_CAST(GLEW_ATIX_texture_env_route) = glewGetExtension("GL_ATIX_texture_env_route");
+#endif /* GL_ATIX_texture_env_route */
+#ifdef GL_ATIX_vertex_shader_output_point_size
+  CONST_CAST(GLEW_ATIX_vertex_shader_output_point_size) = glewGetExtension("GL_ATIX_vertex_shader_output_point_size");
+#endif /* GL_ATIX_vertex_shader_output_point_size */
+#ifdef GL_ATI_draw_buffers
+  CONST_CAST(GLEW_ATI_draw_buffers) = glewGetExtension("GL_ATI_draw_buffers");
+  if (glewExperimental || GLEW_ATI_draw_buffers) CONST_CAST(GLEW_ATI_draw_buffers) = !_glewInit_GL_ATI_draw_buffers(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_draw_buffers */
+#ifdef GL_ATI_element_array
+  CONST_CAST(GLEW_ATI_element_array) = glewGetExtension("GL_ATI_element_array");
+  if (glewExperimental || GLEW_ATI_element_array) CONST_CAST(GLEW_ATI_element_array) = !_glewInit_GL_ATI_element_array(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_element_array */
+#ifdef GL_ATI_envmap_bumpmap
+  CONST_CAST(GLEW_ATI_envmap_bumpmap) = glewGetExtension("GL_ATI_envmap_bumpmap");
+  if (glewExperimental || GLEW_ATI_envmap_bumpmap) CONST_CAST(GLEW_ATI_envmap_bumpmap) = !_glewInit_GL_ATI_envmap_bumpmap(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_envmap_bumpmap */
+#ifdef GL_ATI_fragment_shader
+  CONST_CAST(GLEW_ATI_fragment_shader) = glewGetExtension("GL_ATI_fragment_shader");
+  if (glewExperimental || GLEW_ATI_fragment_shader) CONST_CAST(GLEW_ATI_fragment_shader) = !_glewInit_GL_ATI_fragment_shader(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_fragment_shader */
+#ifdef GL_ATI_map_object_buffer
+  CONST_CAST(GLEW_ATI_map_object_buffer) = glewGetExtension("GL_ATI_map_object_buffer");
+  if (glewExperimental || GLEW_ATI_map_object_buffer) CONST_CAST(GLEW_ATI_map_object_buffer) = !_glewInit_GL_ATI_map_object_buffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_map_object_buffer */
+#ifdef GL_ATI_pn_triangles
+  CONST_CAST(GLEW_ATI_pn_triangles) = glewGetExtension("GL_ATI_pn_triangles");
+  if (glewExperimental || GLEW_ATI_pn_triangles) CONST_CAST(GLEW_ATI_pn_triangles) = !_glewInit_GL_ATI_pn_triangles(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_pn_triangles */
+#ifdef GL_ATI_separate_stencil
+  CONST_CAST(GLEW_ATI_separate_stencil) = glewGetExtension("GL_ATI_separate_stencil");
+  if (glewExperimental || GLEW_ATI_separate_stencil) CONST_CAST(GLEW_ATI_separate_stencil) = !_glewInit_GL_ATI_separate_stencil(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_separate_stencil */
+#ifdef GL_ATI_shader_texture_lod
+  CONST_CAST(GLEW_ATI_shader_texture_lod) = glewGetExtension("GL_ATI_shader_texture_lod");
+#endif /* GL_ATI_shader_texture_lod */
+#ifdef GL_ATI_text_fragment_shader
+  CONST_CAST(GLEW_ATI_text_fragment_shader) = glewGetExtension("GL_ATI_text_fragment_shader");
+#endif /* GL_ATI_text_fragment_shader */
+#ifdef GL_ATI_texture_compression_3dc
+  CONST_CAST(GLEW_ATI_texture_compression_3dc) = glewGetExtension("GL_ATI_texture_compression_3dc");
+#endif /* GL_ATI_texture_compression_3dc */
+#ifdef GL_ATI_texture_env_combine3
+  CONST_CAST(GLEW_ATI_texture_env_combine3) = glewGetExtension("GL_ATI_texture_env_combine3");
+#endif /* GL_ATI_texture_env_combine3 */
+#ifdef GL_ATI_texture_float
+  CONST_CAST(GLEW_ATI_texture_float) = glewGetExtension("GL_ATI_texture_float");
+#endif /* GL_ATI_texture_float */
+#ifdef GL_ATI_texture_mirror_once
+  CONST_CAST(GLEW_ATI_texture_mirror_once) = glewGetExtension("GL_ATI_texture_mirror_once");
+#endif /* GL_ATI_texture_mirror_once */
+#ifdef GL_ATI_vertex_array_object
+  CONST_CAST(GLEW_ATI_vertex_array_object) = glewGetExtension("GL_ATI_vertex_array_object");
+  if (glewExperimental || GLEW_ATI_vertex_array_object) CONST_CAST(GLEW_ATI_vertex_array_object) = !_glewInit_GL_ATI_vertex_array_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_vertex_array_object */
+#ifdef GL_ATI_vertex_attrib_array_object
+  CONST_CAST(GLEW_ATI_vertex_attrib_array_object) = glewGetExtension("GL_ATI_vertex_attrib_array_object");
+  if (glewExperimental || GLEW_ATI_vertex_attrib_array_object) CONST_CAST(GLEW_ATI_vertex_attrib_array_object) = !_glewInit_GL_ATI_vertex_attrib_array_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_vertex_attrib_array_object */
+#ifdef GL_ATI_vertex_streams
+  CONST_CAST(GLEW_ATI_vertex_streams) = glewGetExtension("GL_ATI_vertex_streams");
+  if (glewExperimental || GLEW_ATI_vertex_streams) CONST_CAST(GLEW_ATI_vertex_streams) = !_glewInit_GL_ATI_vertex_streams(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_ATI_vertex_streams */
+#ifdef GL_EXT_422_pixels
+  CONST_CAST(GLEW_EXT_422_pixels) = glewGetExtension("GL_EXT_422_pixels");
+#endif /* GL_EXT_422_pixels */
+#ifdef GL_EXT_Cg_shader
+  CONST_CAST(GLEW_EXT_Cg_shader) = glewGetExtension("GL_EXT_Cg_shader");
+#endif /* GL_EXT_Cg_shader */
+#ifdef GL_EXT_abgr
+  CONST_CAST(GLEW_EXT_abgr) = glewGetExtension("GL_EXT_abgr");
+#endif /* GL_EXT_abgr */
+#ifdef GL_EXT_bgra
+  CONST_CAST(GLEW_EXT_bgra) = glewGetExtension("GL_EXT_bgra");
+#endif /* GL_EXT_bgra */
+#ifdef GL_EXT_bindable_uniform
+  CONST_CAST(GLEW_EXT_bindable_uniform) = glewGetExtension("GL_EXT_bindable_uniform");
+  if (glewExperimental || GLEW_EXT_bindable_uniform) CONST_CAST(GLEW_EXT_bindable_uniform) = !_glewInit_GL_EXT_bindable_uniform(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_bindable_uniform */
+#ifdef GL_EXT_blend_color
+  CONST_CAST(GLEW_EXT_blend_color) = glewGetExtension("GL_EXT_blend_color");
+  if (glewExperimental || GLEW_EXT_blend_color) CONST_CAST(GLEW_EXT_blend_color) = !_glewInit_GL_EXT_blend_color(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_blend_color */
+#ifdef GL_EXT_blend_equation_separate
+  CONST_CAST(GLEW_EXT_blend_equation_separate) = glewGetExtension("GL_EXT_blend_equation_separate");
+  if (glewExperimental || GLEW_EXT_blend_equation_separate) CONST_CAST(GLEW_EXT_blend_equation_separate) = !_glewInit_GL_EXT_blend_equation_separate(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_blend_equation_separate */
+#ifdef GL_EXT_blend_func_separate
+  CONST_CAST(GLEW_EXT_blend_func_separate) = glewGetExtension("GL_EXT_blend_func_separate");
+  if (glewExperimental || GLEW_EXT_blend_func_separate) CONST_CAST(GLEW_EXT_blend_func_separate) = !_glewInit_GL_EXT_blend_func_separate(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_blend_func_separate */
+#ifdef GL_EXT_blend_logic_op
+  CONST_CAST(GLEW_EXT_blend_logic_op) = glewGetExtension("GL_EXT_blend_logic_op");
+#endif /* GL_EXT_blend_logic_op */
+#ifdef GL_EXT_blend_minmax
+  CONST_CAST(GLEW_EXT_blend_minmax) = glewGetExtension("GL_EXT_blend_minmax");
+  if (glewExperimental || GLEW_EXT_blend_minmax) CONST_CAST(GLEW_EXT_blend_minmax) = !_glewInit_GL_EXT_blend_minmax(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_blend_minmax */
+#ifdef GL_EXT_blend_subtract
+  CONST_CAST(GLEW_EXT_blend_subtract) = glewGetExtension("GL_EXT_blend_subtract");
+#endif /* GL_EXT_blend_subtract */
+#ifdef GL_EXT_clip_volume_hint
+  CONST_CAST(GLEW_EXT_clip_volume_hint) = glewGetExtension("GL_EXT_clip_volume_hint");
+#endif /* GL_EXT_clip_volume_hint */
+#ifdef GL_EXT_cmyka
+  CONST_CAST(GLEW_EXT_cmyka) = glewGetExtension("GL_EXT_cmyka");
+#endif /* GL_EXT_cmyka */
+#ifdef GL_EXT_color_subtable
+  CONST_CAST(GLEW_EXT_color_subtable) = glewGetExtension("GL_EXT_color_subtable");
+  if (glewExperimental || GLEW_EXT_color_subtable) CONST_CAST(GLEW_EXT_color_subtable) = !_glewInit_GL_EXT_color_subtable(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_color_subtable */
+#ifdef GL_EXT_compiled_vertex_array
+  CONST_CAST(GLEW_EXT_compiled_vertex_array) = glewGetExtension("GL_EXT_compiled_vertex_array");
+  if (glewExperimental || GLEW_EXT_compiled_vertex_array) CONST_CAST(GLEW_EXT_compiled_vertex_array) = !_glewInit_GL_EXT_compiled_vertex_array(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_compiled_vertex_array */
+#ifdef GL_EXT_convolution
+  CONST_CAST(GLEW_EXT_convolution) = glewGetExtension("GL_EXT_convolution");
+  if (glewExperimental || GLEW_EXT_convolution) CONST_CAST(GLEW_EXT_convolution) = !_glewInit_GL_EXT_convolution(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_convolution */
+#ifdef GL_EXT_coordinate_frame
+  CONST_CAST(GLEW_EXT_coordinate_frame) = glewGetExtension("GL_EXT_coordinate_frame");
+  if (glewExperimental || GLEW_EXT_coordinate_frame) CONST_CAST(GLEW_EXT_coordinate_frame) = !_glewInit_GL_EXT_coordinate_frame(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_coordinate_frame */
+#ifdef GL_EXT_copy_texture
+  CONST_CAST(GLEW_EXT_copy_texture) = glewGetExtension("GL_EXT_copy_texture");
+  if (glewExperimental || GLEW_EXT_copy_texture) CONST_CAST(GLEW_EXT_copy_texture) = !_glewInit_GL_EXT_copy_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_copy_texture */
+#ifdef GL_EXT_cull_vertex
+  CONST_CAST(GLEW_EXT_cull_vertex) = glewGetExtension("GL_EXT_cull_vertex");
+  if (glewExperimental || GLEW_EXT_cull_vertex) CONST_CAST(GLEW_EXT_cull_vertex) = !_glewInit_GL_EXT_cull_vertex(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_cull_vertex */
+#ifdef GL_EXT_depth_bounds_test
+  CONST_CAST(GLEW_EXT_depth_bounds_test) = glewGetExtension("GL_EXT_depth_bounds_test");
+  if (glewExperimental || GLEW_EXT_depth_bounds_test) CONST_CAST(GLEW_EXT_depth_bounds_test) = !_glewInit_GL_EXT_depth_bounds_test(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_depth_bounds_test */
+#ifdef GL_EXT_draw_buffers2
+  CONST_CAST(GLEW_EXT_draw_buffers2) = glewGetExtension("GL_EXT_draw_buffers2");
+  if (glewExperimental || GLEW_EXT_draw_buffers2) CONST_CAST(GLEW_EXT_draw_buffers2) = !_glewInit_GL_EXT_draw_buffers2(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_draw_buffers2 */
+#ifdef GL_EXT_draw_instanced
+  CONST_CAST(GLEW_EXT_draw_instanced) = glewGetExtension("GL_EXT_draw_instanced");
+  if (glewExperimental || GLEW_EXT_draw_instanced) CONST_CAST(GLEW_EXT_draw_instanced) = !_glewInit_GL_EXT_draw_instanced(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_draw_instanced */
+#ifdef GL_EXT_draw_range_elements
+  CONST_CAST(GLEW_EXT_draw_range_elements) = glewGetExtension("GL_EXT_draw_range_elements");
+  if (glewExperimental || GLEW_EXT_draw_range_elements) CONST_CAST(GLEW_EXT_draw_range_elements) = !_glewInit_GL_EXT_draw_range_elements(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_draw_range_elements */
+#ifdef GL_EXT_fog_coord
+  CONST_CAST(GLEW_EXT_fog_coord) = glewGetExtension("GL_EXT_fog_coord");
+  if (glewExperimental || GLEW_EXT_fog_coord) CONST_CAST(GLEW_EXT_fog_coord) = !_glewInit_GL_EXT_fog_coord(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_fog_coord */
+#ifdef GL_EXT_fragment_lighting
+  CONST_CAST(GLEW_EXT_fragment_lighting) = glewGetExtension("GL_EXT_fragment_lighting");
+  if (glewExperimental || GLEW_EXT_fragment_lighting) CONST_CAST(GLEW_EXT_fragment_lighting) = !_glewInit_GL_EXT_fragment_lighting(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_fragment_lighting */
+#ifdef GL_EXT_framebuffer_blit
+  CONST_CAST(GLEW_EXT_framebuffer_blit) = glewGetExtension("GL_EXT_framebuffer_blit");
+  if (glewExperimental || GLEW_EXT_framebuffer_blit) CONST_CAST(GLEW_EXT_framebuffer_blit) = !_glewInit_GL_EXT_framebuffer_blit(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_framebuffer_blit */
+#ifdef GL_EXT_framebuffer_multisample
+  CONST_CAST(GLEW_EXT_framebuffer_multisample) = glewGetExtension("GL_EXT_framebuffer_multisample");
+  if (glewExperimental || GLEW_EXT_framebuffer_multisample) CONST_CAST(GLEW_EXT_framebuffer_multisample) = !_glewInit_GL_EXT_framebuffer_multisample(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_framebuffer_multisample */
+#ifdef GL_EXT_framebuffer_object
+  CONST_CAST(GLEW_EXT_framebuffer_object) = glewGetExtension("GL_EXT_framebuffer_object");
+  if (glewExperimental || GLEW_EXT_framebuffer_object) CONST_CAST(GLEW_EXT_framebuffer_object) = !_glewInit_GL_EXT_framebuffer_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_framebuffer_object */
+#ifdef GL_EXT_framebuffer_sRGB
+  CONST_CAST(GLEW_EXT_framebuffer_sRGB) = glewGetExtension("GL_EXT_framebuffer_sRGB");
+#endif /* GL_EXT_framebuffer_sRGB */
+#ifdef GL_EXT_geometry_shader4
+  CONST_CAST(GLEW_EXT_geometry_shader4) = glewGetExtension("GL_EXT_geometry_shader4");
+  if (glewExperimental || GLEW_EXT_geometry_shader4) CONST_CAST(GLEW_EXT_geometry_shader4) = !_glewInit_GL_EXT_geometry_shader4(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_geometry_shader4 */
+#ifdef GL_EXT_gpu_program_parameters
+  CONST_CAST(GLEW_EXT_gpu_program_parameters) = glewGetExtension("GL_EXT_gpu_program_parameters");
+  if (glewExperimental || GLEW_EXT_gpu_program_parameters) CONST_CAST(GLEW_EXT_gpu_program_parameters) = !_glewInit_GL_EXT_gpu_program_parameters(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_gpu_program_parameters */
+#ifdef GL_EXT_gpu_shader4
+  CONST_CAST(GLEW_EXT_gpu_shader4) = glewGetExtension("GL_EXT_gpu_shader4");
+  if (glewExperimental || GLEW_EXT_gpu_shader4) CONST_CAST(GLEW_EXT_gpu_shader4) = !_glewInit_GL_EXT_gpu_shader4(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_gpu_shader4 */
+#ifdef GL_EXT_histogram
+  CONST_CAST(GLEW_EXT_histogram) = glewGetExtension("GL_EXT_histogram");
+  if (glewExperimental || GLEW_EXT_histogram) CONST_CAST(GLEW_EXT_histogram) = !_glewInit_GL_EXT_histogram(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_histogram */
+#ifdef GL_EXT_index_array_formats
+  CONST_CAST(GLEW_EXT_index_array_formats) = glewGetExtension("GL_EXT_index_array_formats");
+#endif /* GL_EXT_index_array_formats */
+#ifdef GL_EXT_index_func
+  CONST_CAST(GLEW_EXT_index_func) = glewGetExtension("GL_EXT_index_func");
+  if (glewExperimental || GLEW_EXT_index_func) CONST_CAST(GLEW_EXT_index_func) = !_glewInit_GL_EXT_index_func(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_index_func */
+#ifdef GL_EXT_index_material
+  CONST_CAST(GLEW_EXT_index_material) = glewGetExtension("GL_EXT_index_material");
+  if (glewExperimental || GLEW_EXT_index_material) CONST_CAST(GLEW_EXT_index_material) = !_glewInit_GL_EXT_index_material(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_index_material */
+#ifdef GL_EXT_index_texture
+  CONST_CAST(GLEW_EXT_index_texture) = glewGetExtension("GL_EXT_index_texture");
+#endif /* GL_EXT_index_texture */
+#ifdef GL_EXT_light_texture
+  CONST_CAST(GLEW_EXT_light_texture) = glewGetExtension("GL_EXT_light_texture");
+  if (glewExperimental || GLEW_EXT_light_texture) CONST_CAST(GLEW_EXT_light_texture) = !_glewInit_GL_EXT_light_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_light_texture */
+#ifdef GL_EXT_misc_attribute
+  CONST_CAST(GLEW_EXT_misc_attribute) = glewGetExtension("GL_EXT_misc_attribute");
+#endif /* GL_EXT_misc_attribute */
+#ifdef GL_EXT_multi_draw_arrays
+  CONST_CAST(GLEW_EXT_multi_draw_arrays) = glewGetExtension("GL_EXT_multi_draw_arrays");
+  if (glewExperimental || GLEW_EXT_multi_draw_arrays) CONST_CAST(GLEW_EXT_multi_draw_arrays) = !_glewInit_GL_EXT_multi_draw_arrays(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_multi_draw_arrays */
+#ifdef GL_EXT_multisample
+  CONST_CAST(GLEW_EXT_multisample) = glewGetExtension("GL_EXT_multisample");
+  if (glewExperimental || GLEW_EXT_multisample) CONST_CAST(GLEW_EXT_multisample) = !_glewInit_GL_EXT_multisample(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_multisample */
+#ifdef GL_EXT_packed_depth_stencil
+  CONST_CAST(GLEW_EXT_packed_depth_stencil) = glewGetExtension("GL_EXT_packed_depth_stencil");
+#endif /* GL_EXT_packed_depth_stencil */
+#ifdef GL_EXT_packed_float
+  CONST_CAST(GLEW_EXT_packed_float) = glewGetExtension("GL_EXT_packed_float");
+#endif /* GL_EXT_packed_float */
+#ifdef GL_EXT_packed_pixels
+  CONST_CAST(GLEW_EXT_packed_pixels) = glewGetExtension("GL_EXT_packed_pixels");
+#endif /* GL_EXT_packed_pixels */
+#ifdef GL_EXT_paletted_texture
+  CONST_CAST(GLEW_EXT_paletted_texture) = glewGetExtension("GL_EXT_paletted_texture");
+  if (glewExperimental || GLEW_EXT_paletted_texture) CONST_CAST(GLEW_EXT_paletted_texture) = !_glewInit_GL_EXT_paletted_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_paletted_texture */
+#ifdef GL_EXT_pixel_buffer_object
+  CONST_CAST(GLEW_EXT_pixel_buffer_object) = glewGetExtension("GL_EXT_pixel_buffer_object");
+#endif /* GL_EXT_pixel_buffer_object */
+#ifdef GL_EXT_pixel_transform
+  CONST_CAST(GLEW_EXT_pixel_transform) = glewGetExtension("GL_EXT_pixel_transform");
+  if (glewExperimental || GLEW_EXT_pixel_transform) CONST_CAST(GLEW_EXT_pixel_transform) = !_glewInit_GL_EXT_pixel_transform(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_pixel_transform */
+#ifdef GL_EXT_pixel_transform_color_table
+  CONST_CAST(GLEW_EXT_pixel_transform_color_table) = glewGetExtension("GL_EXT_pixel_transform_color_table");
+#endif /* GL_EXT_pixel_transform_color_table */
+#ifdef GL_EXT_point_parameters
+  CONST_CAST(GLEW_EXT_point_parameters) = glewGetExtension("GL_EXT_point_parameters");
+  if (glewExperimental || GLEW_EXT_point_parameters) CONST_CAST(GLEW_EXT_point_parameters) = !_glewInit_GL_EXT_point_parameters(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_point_parameters */
+#ifdef GL_EXT_polygon_offset
+  CONST_CAST(GLEW_EXT_polygon_offset) = glewGetExtension("GL_EXT_polygon_offset");
+  if (glewExperimental || GLEW_EXT_polygon_offset) CONST_CAST(GLEW_EXT_polygon_offset) = !_glewInit_GL_EXT_polygon_offset(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_polygon_offset */
+#ifdef GL_EXT_rescale_normal
+  CONST_CAST(GLEW_EXT_rescale_normal) = glewGetExtension("GL_EXT_rescale_normal");
+#endif /* GL_EXT_rescale_normal */
+#ifdef GL_EXT_scene_marker
+  CONST_CAST(GLEW_EXT_scene_marker) = glewGetExtension("GL_EXT_scene_marker");
+  if (glewExperimental || GLEW_EXT_scene_marker) CONST_CAST(GLEW_EXT_scene_marker) = !_glewInit_GL_EXT_scene_marker(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_scene_marker */
+#ifdef GL_EXT_secondary_color
+  CONST_CAST(GLEW_EXT_secondary_color) = glewGetExtension("GL_EXT_secondary_color");
+  if (glewExperimental || GLEW_EXT_secondary_color) CONST_CAST(GLEW_EXT_secondary_color) = !_glewInit_GL_EXT_secondary_color(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_secondary_color */
+#ifdef GL_EXT_separate_specular_color
+  CONST_CAST(GLEW_EXT_separate_specular_color) = glewGetExtension("GL_EXT_separate_specular_color");
+#endif /* GL_EXT_separate_specular_color */
+#ifdef GL_EXT_shadow_funcs
+  CONST_CAST(GLEW_EXT_shadow_funcs) = glewGetExtension("GL_EXT_shadow_funcs");
+#endif /* GL_EXT_shadow_funcs */
+#ifdef GL_EXT_shared_texture_palette
+  CONST_CAST(GLEW_EXT_shared_texture_palette) = glewGetExtension("GL_EXT_shared_texture_palette");
+#endif /* GL_EXT_shared_texture_palette */
+#ifdef GL_EXT_stencil_clear_tag
+  CONST_CAST(GLEW_EXT_stencil_clear_tag) = glewGetExtension("GL_EXT_stencil_clear_tag");
+#endif /* GL_EXT_stencil_clear_tag */
+#ifdef GL_EXT_stencil_two_side
+  CONST_CAST(GLEW_EXT_stencil_two_side) = glewGetExtension("GL_EXT_stencil_two_side");
+  if (glewExperimental || GLEW_EXT_stencil_two_side) CONST_CAST(GLEW_EXT_stencil_two_side) = !_glewInit_GL_EXT_stencil_two_side(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_stencil_two_side */
+#ifdef GL_EXT_stencil_wrap
+  CONST_CAST(GLEW_EXT_stencil_wrap) = glewGetExtension("GL_EXT_stencil_wrap");
+#endif /* GL_EXT_stencil_wrap */
+#ifdef GL_EXT_subtexture
+  CONST_CAST(GLEW_EXT_subtexture) = glewGetExtension("GL_EXT_subtexture");
+  if (glewExperimental || GLEW_EXT_subtexture) CONST_CAST(GLEW_EXT_subtexture) = !_glewInit_GL_EXT_subtexture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_subtexture */
+#ifdef GL_EXT_texture
+  CONST_CAST(GLEW_EXT_texture) = glewGetExtension("GL_EXT_texture");
+#endif /* GL_EXT_texture */
+#ifdef GL_EXT_texture3D
+  CONST_CAST(GLEW_EXT_texture3D) = glewGetExtension("GL_EXT_texture3D");
+  if (glewExperimental || GLEW_EXT_texture3D) CONST_CAST(GLEW_EXT_texture3D) = !_glewInit_GL_EXT_texture3D(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_texture3D */
+#ifdef GL_EXT_texture_array
+  CONST_CAST(GLEW_EXT_texture_array) = glewGetExtension("GL_EXT_texture_array");
+#endif /* GL_EXT_texture_array */
+#ifdef GL_EXT_texture_buffer_object
+  CONST_CAST(GLEW_EXT_texture_buffer_object) = glewGetExtension("GL_EXT_texture_buffer_object");
+  if (glewExperimental || GLEW_EXT_texture_buffer_object) CONST_CAST(GLEW_EXT_texture_buffer_object) = !_glewInit_GL_EXT_texture_buffer_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_texture_buffer_object */
+#ifdef GL_EXT_texture_compression_dxt1
+  CONST_CAST(GLEW_EXT_texture_compression_dxt1) = glewGetExtension("GL_EXT_texture_compression_dxt1");
+#endif /* GL_EXT_texture_compression_dxt1 */
+#ifdef GL_EXT_texture_compression_latc
+  CONST_CAST(GLEW_EXT_texture_compression_latc) = glewGetExtension("GL_EXT_texture_compression_latc");
+#endif /* GL_EXT_texture_compression_latc */
+#ifdef GL_EXT_texture_compression_rgtc
+  CONST_CAST(GLEW_EXT_texture_compression_rgtc) = glewGetExtension("GL_EXT_texture_compression_rgtc");
+#endif /* GL_EXT_texture_compression_rgtc */
+#ifdef GL_EXT_texture_compression_s3tc
+  CONST_CAST(GLEW_EXT_texture_compression_s3tc) = glewGetExtension("GL_EXT_texture_compression_s3tc");
+#endif /* GL_EXT_texture_compression_s3tc */
+#ifdef GL_EXT_texture_cube_map
+  CONST_CAST(GLEW_EXT_texture_cube_map) = glewGetExtension("GL_EXT_texture_cube_map");
+#endif /* GL_EXT_texture_cube_map */
+#ifdef GL_EXT_texture_edge_clamp
+  CONST_CAST(GLEW_EXT_texture_edge_clamp) = glewGetExtension("GL_EXT_texture_edge_clamp");
+#endif /* GL_EXT_texture_edge_clamp */
+#ifdef GL_EXT_texture_env
+  CONST_CAST(GLEW_EXT_texture_env) = glewGetExtension("GL_EXT_texture_env");
+#endif /* GL_EXT_texture_env */
+#ifdef GL_EXT_texture_env_add
+  CONST_CAST(GLEW_EXT_texture_env_add) = glewGetExtension("GL_EXT_texture_env_add");
+#endif /* GL_EXT_texture_env_add */
+#ifdef GL_EXT_texture_env_combine
+  CONST_CAST(GLEW_EXT_texture_env_combine) = glewGetExtension("GL_EXT_texture_env_combine");
+#endif /* GL_EXT_texture_env_combine */
+#ifdef GL_EXT_texture_env_dot3
+  CONST_CAST(GLEW_EXT_texture_env_dot3) = glewGetExtension("GL_EXT_texture_env_dot3");
+#endif /* GL_EXT_texture_env_dot3 */
+#ifdef GL_EXT_texture_filter_anisotropic
+  CONST_CAST(GLEW_EXT_texture_filter_anisotropic) = glewGetExtension("GL_EXT_texture_filter_anisotropic");
+#endif /* GL_EXT_texture_filter_anisotropic */
+#ifdef GL_EXT_texture_integer
+  CONST_CAST(GLEW_EXT_texture_integer) = glewGetExtension("GL_EXT_texture_integer");
+  if (glewExperimental || GLEW_EXT_texture_integer) CONST_CAST(GLEW_EXT_texture_integer) = !_glewInit_GL_EXT_texture_integer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_texture_integer */
+#ifdef GL_EXT_texture_lod_bias
+  CONST_CAST(GLEW_EXT_texture_lod_bias) = glewGetExtension("GL_EXT_texture_lod_bias");
+#endif /* GL_EXT_texture_lod_bias */
+#ifdef GL_EXT_texture_mirror_clamp
+  CONST_CAST(GLEW_EXT_texture_mirror_clamp) = glewGetExtension("GL_EXT_texture_mirror_clamp");
+#endif /* GL_EXT_texture_mirror_clamp */
+#ifdef GL_EXT_texture_object
+  CONST_CAST(GLEW_EXT_texture_object) = glewGetExtension("GL_EXT_texture_object");
+  if (glewExperimental || GLEW_EXT_texture_object) CONST_CAST(GLEW_EXT_texture_object) = !_glewInit_GL_EXT_texture_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_texture_object */
+#ifdef GL_EXT_texture_perturb_normal
+  CONST_CAST(GLEW_EXT_texture_perturb_normal) = glewGetExtension("GL_EXT_texture_perturb_normal");
+  if (glewExperimental || GLEW_EXT_texture_perturb_normal) CONST_CAST(GLEW_EXT_texture_perturb_normal) = !_glewInit_GL_EXT_texture_perturb_normal(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_texture_perturb_normal */
+#ifdef GL_EXT_texture_rectangle
+  CONST_CAST(GLEW_EXT_texture_rectangle) = glewGetExtension("GL_EXT_texture_rectangle");
+#endif /* GL_EXT_texture_rectangle */
+#ifdef GL_EXT_texture_sRGB
+  CONST_CAST(GLEW_EXT_texture_sRGB) = glewGetExtension("GL_EXT_texture_sRGB");
+#endif /* GL_EXT_texture_sRGB */
+#ifdef GL_EXT_texture_shared_exponent
+  CONST_CAST(GLEW_EXT_texture_shared_exponent) = glewGetExtension("GL_EXT_texture_shared_exponent");
+#endif /* GL_EXT_texture_shared_exponent */
+#ifdef GL_EXT_timer_query
+  CONST_CAST(GLEW_EXT_timer_query) = glewGetExtension("GL_EXT_timer_query");
+  if (glewExperimental || GLEW_EXT_timer_query) CONST_CAST(GLEW_EXT_timer_query) = !_glewInit_GL_EXT_timer_query(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_timer_query */
+#ifdef GL_EXT_vertex_array
+  CONST_CAST(GLEW_EXT_vertex_array) = glewGetExtension("GL_EXT_vertex_array");
+  if (glewExperimental || GLEW_EXT_vertex_array) CONST_CAST(GLEW_EXT_vertex_array) = !_glewInit_GL_EXT_vertex_array(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_vertex_array */
+#ifdef GL_EXT_vertex_shader
+  CONST_CAST(GLEW_EXT_vertex_shader) = glewGetExtension("GL_EXT_vertex_shader");
+  if (glewExperimental || GLEW_EXT_vertex_shader) CONST_CAST(GLEW_EXT_vertex_shader) = !_glewInit_GL_EXT_vertex_shader(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_vertex_shader */
+#ifdef GL_EXT_vertex_weighting
+  CONST_CAST(GLEW_EXT_vertex_weighting) = glewGetExtension("GL_EXT_vertex_weighting");
+  if (glewExperimental || GLEW_EXT_vertex_weighting) CONST_CAST(GLEW_EXT_vertex_weighting) = !_glewInit_GL_EXT_vertex_weighting(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_EXT_vertex_weighting */
+#ifdef GL_GREMEDY_frame_terminator
+  CONST_CAST(GLEW_GREMEDY_frame_terminator) = glewGetExtension("GL_GREMEDY_frame_terminator");
+  if (glewExperimental || GLEW_GREMEDY_frame_terminator) CONST_CAST(GLEW_GREMEDY_frame_terminator) = !_glewInit_GL_GREMEDY_frame_terminator(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_GREMEDY_frame_terminator */
+#ifdef GL_GREMEDY_string_marker
+  CONST_CAST(GLEW_GREMEDY_string_marker) = glewGetExtension("GL_GREMEDY_string_marker");
+  if (glewExperimental || GLEW_GREMEDY_string_marker) CONST_CAST(GLEW_GREMEDY_string_marker) = !_glewInit_GL_GREMEDY_string_marker(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_GREMEDY_string_marker */
+#ifdef GL_HP_convolution_border_modes
+  CONST_CAST(GLEW_HP_convolution_border_modes) = glewGetExtension("GL_HP_convolution_border_modes");
+#endif /* GL_HP_convolution_border_modes */
+#ifdef GL_HP_image_transform
+  CONST_CAST(GLEW_HP_image_transform) = glewGetExtension("GL_HP_image_transform");
+  if (glewExperimental || GLEW_HP_image_transform) CONST_CAST(GLEW_HP_image_transform) = !_glewInit_GL_HP_image_transform(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_HP_image_transform */
+#ifdef GL_HP_occlusion_test
+  CONST_CAST(GLEW_HP_occlusion_test) = glewGetExtension("GL_HP_occlusion_test");
+#endif /* GL_HP_occlusion_test */
+#ifdef GL_HP_texture_lighting
+  CONST_CAST(GLEW_HP_texture_lighting) = glewGetExtension("GL_HP_texture_lighting");
+#endif /* GL_HP_texture_lighting */
+#ifdef GL_IBM_cull_vertex
+  CONST_CAST(GLEW_IBM_cull_vertex) = glewGetExtension("GL_IBM_cull_vertex");
+#endif /* GL_IBM_cull_vertex */
+#ifdef GL_IBM_multimode_draw_arrays
+  CONST_CAST(GLEW_IBM_multimode_draw_arrays) = glewGetExtension("GL_IBM_multimode_draw_arrays");
+  if (glewExperimental || GLEW_IBM_multimode_draw_arrays) CONST_CAST(GLEW_IBM_multimode_draw_arrays) = !_glewInit_GL_IBM_multimode_draw_arrays(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_IBM_multimode_draw_arrays */
+#ifdef GL_IBM_rasterpos_clip
+  CONST_CAST(GLEW_IBM_rasterpos_clip) = glewGetExtension("GL_IBM_rasterpos_clip");
+#endif /* GL_IBM_rasterpos_clip */
+#ifdef GL_IBM_static_data
+  CONST_CAST(GLEW_IBM_static_data) = glewGetExtension("GL_IBM_static_data");
+#endif /* GL_IBM_static_data */
+#ifdef GL_IBM_texture_mirrored_repeat
+  CONST_CAST(GLEW_IBM_texture_mirrored_repeat) = glewGetExtension("GL_IBM_texture_mirrored_repeat");
+#endif /* GL_IBM_texture_mirrored_repeat */
+#ifdef GL_IBM_vertex_array_lists
+  CONST_CAST(GLEW_IBM_vertex_array_lists) = glewGetExtension("GL_IBM_vertex_array_lists");
+  if (glewExperimental || GLEW_IBM_vertex_array_lists) CONST_CAST(GLEW_IBM_vertex_array_lists) = !_glewInit_GL_IBM_vertex_array_lists(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_IBM_vertex_array_lists */
+#ifdef GL_INGR_color_clamp
+  CONST_CAST(GLEW_INGR_color_clamp) = glewGetExtension("GL_INGR_color_clamp");
+#endif /* GL_INGR_color_clamp */
+#ifdef GL_INGR_interlace_read
+  CONST_CAST(GLEW_INGR_interlace_read) = glewGetExtension("GL_INGR_interlace_read");
+#endif /* GL_INGR_interlace_read */
+#ifdef GL_INTEL_parallel_arrays
+  CONST_CAST(GLEW_INTEL_parallel_arrays) = glewGetExtension("GL_INTEL_parallel_arrays");
+  if (glewExperimental || GLEW_INTEL_parallel_arrays) CONST_CAST(GLEW_INTEL_parallel_arrays) = !_glewInit_GL_INTEL_parallel_arrays(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_INTEL_parallel_arrays */
+#ifdef GL_INTEL_texture_scissor
+  CONST_CAST(GLEW_INTEL_texture_scissor) = glewGetExtension("GL_INTEL_texture_scissor");
+  if (glewExperimental || GLEW_INTEL_texture_scissor) CONST_CAST(GLEW_INTEL_texture_scissor) = !_glewInit_GL_INTEL_texture_scissor(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_INTEL_texture_scissor */
+#ifdef GL_KTX_buffer_region
+  CONST_CAST(GLEW_KTX_buffer_region) = glewGetExtension("GL_KTX_buffer_region");
+  if (glewExperimental || GLEW_KTX_buffer_region) CONST_CAST(GLEW_KTX_buffer_region) = !_glewInit_GL_KTX_buffer_region(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_KTX_buffer_region */
+#ifdef GL_MESAX_texture_stack
+  CONST_CAST(GLEW_MESAX_texture_stack) = glewGetExtension("GL_MESAX_texture_stack");
+#endif /* GL_MESAX_texture_stack */
+#ifdef GL_MESA_pack_invert
+  CONST_CAST(GLEW_MESA_pack_invert) = glewGetExtension("GL_MESA_pack_invert");
+#endif /* GL_MESA_pack_invert */
+#ifdef GL_MESA_resize_buffers
+  CONST_CAST(GLEW_MESA_resize_buffers) = glewGetExtension("GL_MESA_resize_buffers");
+  if (glewExperimental || GLEW_MESA_resize_buffers) CONST_CAST(GLEW_MESA_resize_buffers) = !_glewInit_GL_MESA_resize_buffers(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_MESA_resize_buffers */
+#ifdef GL_MESA_window_pos
+  CONST_CAST(GLEW_MESA_window_pos) = glewGetExtension("GL_MESA_window_pos");
+  if (glewExperimental || GLEW_MESA_window_pos) CONST_CAST(GLEW_MESA_window_pos) = !_glewInit_GL_MESA_window_pos(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_MESA_window_pos */
+#ifdef GL_MESA_ycbcr_texture
+  CONST_CAST(GLEW_MESA_ycbcr_texture) = glewGetExtension("GL_MESA_ycbcr_texture");
+#endif /* GL_MESA_ycbcr_texture */
+#ifdef GL_NV_blend_square
+  CONST_CAST(GLEW_NV_blend_square) = glewGetExtension("GL_NV_blend_square");
+#endif /* GL_NV_blend_square */
+#ifdef GL_NV_copy_depth_to_color
+  CONST_CAST(GLEW_NV_copy_depth_to_color) = glewGetExtension("GL_NV_copy_depth_to_color");
+#endif /* GL_NV_copy_depth_to_color */
+#ifdef GL_NV_depth_buffer_float
+  CONST_CAST(GLEW_NV_depth_buffer_float) = glewGetExtension("GL_NV_depth_buffer_float");
+  if (glewExperimental || GLEW_NV_depth_buffer_float) CONST_CAST(GLEW_NV_depth_buffer_float) = !_glewInit_GL_NV_depth_buffer_float(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_depth_buffer_float */
+#ifdef GL_NV_depth_clamp
+  CONST_CAST(GLEW_NV_depth_clamp) = glewGetExtension("GL_NV_depth_clamp");
+#endif /* GL_NV_depth_clamp */
+#ifdef GL_NV_depth_range_unclamped
+  CONST_CAST(GLEW_NV_depth_range_unclamped) = glewGetExtension("GL_NV_depth_range_unclamped");
+#endif /* GL_NV_depth_range_unclamped */
+#ifdef GL_NV_evaluators
+  CONST_CAST(GLEW_NV_evaluators) = glewGetExtension("GL_NV_evaluators");
+  if (glewExperimental || GLEW_NV_evaluators) CONST_CAST(GLEW_NV_evaluators) = !_glewInit_GL_NV_evaluators(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_evaluators */
+#ifdef GL_NV_fence
+  CONST_CAST(GLEW_NV_fence) = glewGetExtension("GL_NV_fence");
+  if (glewExperimental || GLEW_NV_fence) CONST_CAST(GLEW_NV_fence) = !_glewInit_GL_NV_fence(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_fence */
+#ifdef GL_NV_float_buffer
+  CONST_CAST(GLEW_NV_float_buffer) = glewGetExtension("GL_NV_float_buffer");
+#endif /* GL_NV_float_buffer */
+#ifdef GL_NV_fog_distance
+  CONST_CAST(GLEW_NV_fog_distance) = glewGetExtension("GL_NV_fog_distance");
+#endif /* GL_NV_fog_distance */
+#ifdef GL_NV_fragment_program
+  CONST_CAST(GLEW_NV_fragment_program) = glewGetExtension("GL_NV_fragment_program");
+  if (glewExperimental || GLEW_NV_fragment_program) CONST_CAST(GLEW_NV_fragment_program) = !_glewInit_GL_NV_fragment_program(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_fragment_program */
+#ifdef GL_NV_fragment_program2
+  CONST_CAST(GLEW_NV_fragment_program2) = glewGetExtension("GL_NV_fragment_program2");
+#endif /* GL_NV_fragment_program2 */
+#ifdef GL_NV_fragment_program4
+  CONST_CAST(GLEW_NV_fragment_program4) = glewGetExtension("GL_NV_fragment_program4");
+#endif /* GL_NV_fragment_program4 */
+#ifdef GL_NV_fragment_program_option
+  CONST_CAST(GLEW_NV_fragment_program_option) = glewGetExtension("GL_NV_fragment_program_option");
+#endif /* GL_NV_fragment_program_option */
+#ifdef GL_NV_framebuffer_multisample_coverage
+  CONST_CAST(GLEW_NV_framebuffer_multisample_coverage) = glewGetExtension("GL_NV_framebuffer_multisample_coverage");
+  if (glewExperimental || GLEW_NV_framebuffer_multisample_coverage) CONST_CAST(GLEW_NV_framebuffer_multisample_coverage) = !_glewInit_GL_NV_framebuffer_multisample_coverage(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_framebuffer_multisample_coverage */
+#ifdef GL_NV_geometry_program4
+  CONST_CAST(GLEW_NV_geometry_program4) = glewGetExtension("GL_NV_geometry_program4");
+  if (glewExperimental || GLEW_NV_geometry_program4) CONST_CAST(GLEW_NV_geometry_program4) = !_glewInit_GL_NV_geometry_program4(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_geometry_program4 */
+#ifdef GL_NV_geometry_shader4
+  CONST_CAST(GLEW_NV_geometry_shader4) = glewGetExtension("GL_NV_geometry_shader4");
+#endif /* GL_NV_geometry_shader4 */
+#ifdef GL_NV_gpu_program4
+  CONST_CAST(GLEW_NV_gpu_program4) = glewGetExtension("GL_NV_gpu_program4");
+  if (glewExperimental || GLEW_NV_gpu_program4) CONST_CAST(GLEW_NV_gpu_program4) = !_glewInit_GL_NV_gpu_program4(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_gpu_program4 */
+#ifdef GL_NV_half_float
+  CONST_CAST(GLEW_NV_half_float) = glewGetExtension("GL_NV_half_float");
+  if (glewExperimental || GLEW_NV_half_float) CONST_CAST(GLEW_NV_half_float) = !_glewInit_GL_NV_half_float(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_half_float */
+#ifdef GL_NV_light_max_exponent
+  CONST_CAST(GLEW_NV_light_max_exponent) = glewGetExtension("GL_NV_light_max_exponent");
+#endif /* GL_NV_light_max_exponent */
+#ifdef GL_NV_multisample_filter_hint
+  CONST_CAST(GLEW_NV_multisample_filter_hint) = glewGetExtension("GL_NV_multisample_filter_hint");
+#endif /* GL_NV_multisample_filter_hint */
+#ifdef GL_NV_occlusion_query
+  CONST_CAST(GLEW_NV_occlusion_query) = glewGetExtension("GL_NV_occlusion_query");
+  if (glewExperimental || GLEW_NV_occlusion_query) CONST_CAST(GLEW_NV_occlusion_query) = !_glewInit_GL_NV_occlusion_query(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_occlusion_query */
+#ifdef GL_NV_packed_depth_stencil
+  CONST_CAST(GLEW_NV_packed_depth_stencil) = glewGetExtension("GL_NV_packed_depth_stencil");
+#endif /* GL_NV_packed_depth_stencil */
+#ifdef GL_NV_parameter_buffer_object
+  CONST_CAST(GLEW_NV_parameter_buffer_object) = glewGetExtension("GL_NV_parameter_buffer_object");
+  if (glewExperimental || GLEW_NV_parameter_buffer_object) CONST_CAST(GLEW_NV_parameter_buffer_object) = !_glewInit_GL_NV_parameter_buffer_object(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_parameter_buffer_object */
+#ifdef GL_NV_pixel_data_range
+  CONST_CAST(GLEW_NV_pixel_data_range) = glewGetExtension("GL_NV_pixel_data_range");
+  if (glewExperimental || GLEW_NV_pixel_data_range) CONST_CAST(GLEW_NV_pixel_data_range) = !_glewInit_GL_NV_pixel_data_range(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_pixel_data_range */
+#ifdef GL_NV_point_sprite
+  CONST_CAST(GLEW_NV_point_sprite) = glewGetExtension("GL_NV_point_sprite");
+  if (glewExperimental || GLEW_NV_point_sprite) CONST_CAST(GLEW_NV_point_sprite) = !_glewInit_GL_NV_point_sprite(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_point_sprite */
+#ifdef GL_NV_primitive_restart
+  CONST_CAST(GLEW_NV_primitive_restart) = glewGetExtension("GL_NV_primitive_restart");
+  if (glewExperimental || GLEW_NV_primitive_restart) CONST_CAST(GLEW_NV_primitive_restart) = !_glewInit_GL_NV_primitive_restart(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_primitive_restart */
+#ifdef GL_NV_register_combiners
+  CONST_CAST(GLEW_NV_register_combiners) = glewGetExtension("GL_NV_register_combiners");
+  if (glewExperimental || GLEW_NV_register_combiners) CONST_CAST(GLEW_NV_register_combiners) = !_glewInit_GL_NV_register_combiners(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_register_combiners */
+#ifdef GL_NV_register_combiners2
+  CONST_CAST(GLEW_NV_register_combiners2) = glewGetExtension("GL_NV_register_combiners2");
+  if (glewExperimental || GLEW_NV_register_combiners2) CONST_CAST(GLEW_NV_register_combiners2) = !_glewInit_GL_NV_register_combiners2(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_register_combiners2 */
+#ifdef GL_NV_texgen_emboss
+  CONST_CAST(GLEW_NV_texgen_emboss) = glewGetExtension("GL_NV_texgen_emboss");
+#endif /* GL_NV_texgen_emboss */
+#ifdef GL_NV_texgen_reflection
+  CONST_CAST(GLEW_NV_texgen_reflection) = glewGetExtension("GL_NV_texgen_reflection");
+#endif /* GL_NV_texgen_reflection */
+#ifdef GL_NV_texture_compression_vtc
+  CONST_CAST(GLEW_NV_texture_compression_vtc) = glewGetExtension("GL_NV_texture_compression_vtc");
+#endif /* GL_NV_texture_compression_vtc */
+#ifdef GL_NV_texture_env_combine4
+  CONST_CAST(GLEW_NV_texture_env_combine4) = glewGetExtension("GL_NV_texture_env_combine4");
+#endif /* GL_NV_texture_env_combine4 */
+#ifdef GL_NV_texture_expand_normal
+  CONST_CAST(GLEW_NV_texture_expand_normal) = glewGetExtension("GL_NV_texture_expand_normal");
+#endif /* GL_NV_texture_expand_normal */
+#ifdef GL_NV_texture_rectangle
+  CONST_CAST(GLEW_NV_texture_rectangle) = glewGetExtension("GL_NV_texture_rectangle");
+#endif /* GL_NV_texture_rectangle */
+#ifdef GL_NV_texture_shader
+  CONST_CAST(GLEW_NV_texture_shader) = glewGetExtension("GL_NV_texture_shader");
+#endif /* GL_NV_texture_shader */
+#ifdef GL_NV_texture_shader2
+  CONST_CAST(GLEW_NV_texture_shader2) = glewGetExtension("GL_NV_texture_shader2");
+#endif /* GL_NV_texture_shader2 */
+#ifdef GL_NV_texture_shader3
+  CONST_CAST(GLEW_NV_texture_shader3) = glewGetExtension("GL_NV_texture_shader3");
+#endif /* GL_NV_texture_shader3 */
+#ifdef GL_NV_transform_feedback
+  CONST_CAST(GLEW_NV_transform_feedback) = glewGetExtension("GL_NV_transform_feedback");
+  if (glewExperimental || GLEW_NV_transform_feedback) CONST_CAST(GLEW_NV_transform_feedback) = !_glewInit_GL_NV_transform_feedback(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_transform_feedback */
+#ifdef GL_NV_vertex_array_range
+  CONST_CAST(GLEW_NV_vertex_array_range) = glewGetExtension("GL_NV_vertex_array_range");
+  if (glewExperimental || GLEW_NV_vertex_array_range) CONST_CAST(GLEW_NV_vertex_array_range) = !_glewInit_GL_NV_vertex_array_range(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_vertex_array_range */
+#ifdef GL_NV_vertex_array_range2
+  CONST_CAST(GLEW_NV_vertex_array_range2) = glewGetExtension("GL_NV_vertex_array_range2");
+#endif /* GL_NV_vertex_array_range2 */
+#ifdef GL_NV_vertex_program
+  CONST_CAST(GLEW_NV_vertex_program) = glewGetExtension("GL_NV_vertex_program");
+  if (glewExperimental || GLEW_NV_vertex_program) CONST_CAST(GLEW_NV_vertex_program) = !_glewInit_GL_NV_vertex_program(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_NV_vertex_program */
+#ifdef GL_NV_vertex_program1_1
+  CONST_CAST(GLEW_NV_vertex_program1_1) = glewGetExtension("GL_NV_vertex_program1_1");
+#endif /* GL_NV_vertex_program1_1 */
+#ifdef GL_NV_vertex_program2
+  CONST_CAST(GLEW_NV_vertex_program2) = glewGetExtension("GL_NV_vertex_program2");
+#endif /* GL_NV_vertex_program2 */
+#ifdef GL_NV_vertex_program2_option
+  CONST_CAST(GLEW_NV_vertex_program2_option) = glewGetExtension("GL_NV_vertex_program2_option");
+#endif /* GL_NV_vertex_program2_option */
+#ifdef GL_NV_vertex_program3
+  CONST_CAST(GLEW_NV_vertex_program3) = glewGetExtension("GL_NV_vertex_program3");
+#endif /* GL_NV_vertex_program3 */
+#ifdef GL_NV_vertex_program4
+  CONST_CAST(GLEW_NV_vertex_program4) = glewGetExtension("GL_NV_vertex_program4");
+#endif /* GL_NV_vertex_program4 */
+#ifdef GL_OES_byte_coordinates
+  CONST_CAST(GLEW_OES_byte_coordinates) = glewGetExtension("GL_OES_byte_coordinates");
+#endif /* GL_OES_byte_coordinates */
+#ifdef GL_OES_compressed_paletted_texture
+  CONST_CAST(GLEW_OES_compressed_paletted_texture) = glewGetExtension("GL_OES_compressed_paletted_texture");
+#endif /* GL_OES_compressed_paletted_texture */
+#ifdef GL_OES_read_format
+  CONST_CAST(GLEW_OES_read_format) = glewGetExtension("GL_OES_read_format");
+#endif /* GL_OES_read_format */
+#ifdef GL_OES_single_precision
+  CONST_CAST(GLEW_OES_single_precision) = glewGetExtension("GL_OES_single_precision");
+  if (glewExperimental || GLEW_OES_single_precision) CONST_CAST(GLEW_OES_single_precision) = !_glewInit_GL_OES_single_precision(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_OES_single_precision */
+#ifdef GL_OML_interlace
+  CONST_CAST(GLEW_OML_interlace) = glewGetExtension("GL_OML_interlace");
+#endif /* GL_OML_interlace */
+#ifdef GL_OML_resample
+  CONST_CAST(GLEW_OML_resample) = glewGetExtension("GL_OML_resample");
+#endif /* GL_OML_resample */
+#ifdef GL_OML_subsample
+  CONST_CAST(GLEW_OML_subsample) = glewGetExtension("GL_OML_subsample");
+#endif /* GL_OML_subsample */
+#ifdef GL_PGI_misc_hints
+  CONST_CAST(GLEW_PGI_misc_hints) = glewGetExtension("GL_PGI_misc_hints");
+#endif /* GL_PGI_misc_hints */
+#ifdef GL_PGI_vertex_hints
+  CONST_CAST(GLEW_PGI_vertex_hints) = glewGetExtension("GL_PGI_vertex_hints");
+#endif /* GL_PGI_vertex_hints */
+#ifdef GL_REND_screen_coordinates
+  CONST_CAST(GLEW_REND_screen_coordinates) = glewGetExtension("GL_REND_screen_coordinates");
+#endif /* GL_REND_screen_coordinates */
+#ifdef GL_S3_s3tc
+  CONST_CAST(GLEW_S3_s3tc) = glewGetExtension("GL_S3_s3tc");
+#endif /* GL_S3_s3tc */
+#ifdef GL_SGIS_color_range
+  CONST_CAST(GLEW_SGIS_color_range) = glewGetExtension("GL_SGIS_color_range");
+#endif /* GL_SGIS_color_range */
+#ifdef GL_SGIS_detail_texture
+  CONST_CAST(GLEW_SGIS_detail_texture) = glewGetExtension("GL_SGIS_detail_texture");
+  if (glewExperimental || GLEW_SGIS_detail_texture) CONST_CAST(GLEW_SGIS_detail_texture) = !_glewInit_GL_SGIS_detail_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIS_detail_texture */
+#ifdef GL_SGIS_fog_function
+  CONST_CAST(GLEW_SGIS_fog_function) = glewGetExtension("GL_SGIS_fog_function");
+  if (glewExperimental || GLEW_SGIS_fog_function) CONST_CAST(GLEW_SGIS_fog_function) = !_glewInit_GL_SGIS_fog_function(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIS_fog_function */
+#ifdef GL_SGIS_generate_mipmap
+  CONST_CAST(GLEW_SGIS_generate_mipmap) = glewGetExtension("GL_SGIS_generate_mipmap");
+#endif /* GL_SGIS_generate_mipmap */
+#ifdef GL_SGIS_multisample
+  CONST_CAST(GLEW_SGIS_multisample) = glewGetExtension("GL_SGIS_multisample");
+  if (glewExperimental || GLEW_SGIS_multisample) CONST_CAST(GLEW_SGIS_multisample) = !_glewInit_GL_SGIS_multisample(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIS_multisample */
+#ifdef GL_SGIS_pixel_texture
+  CONST_CAST(GLEW_SGIS_pixel_texture) = glewGetExtension("GL_SGIS_pixel_texture");
+#endif /* GL_SGIS_pixel_texture */
+#ifdef GL_SGIS_sharpen_texture
+  CONST_CAST(GLEW_SGIS_sharpen_texture) = glewGetExtension("GL_SGIS_sharpen_texture");
+  if (glewExperimental || GLEW_SGIS_sharpen_texture) CONST_CAST(GLEW_SGIS_sharpen_texture) = !_glewInit_GL_SGIS_sharpen_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIS_sharpen_texture */
+#ifdef GL_SGIS_texture4D
+  CONST_CAST(GLEW_SGIS_texture4D) = glewGetExtension("GL_SGIS_texture4D");
+  if (glewExperimental || GLEW_SGIS_texture4D) CONST_CAST(GLEW_SGIS_texture4D) = !_glewInit_GL_SGIS_texture4D(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIS_texture4D */
+#ifdef GL_SGIS_texture_border_clamp
+  CONST_CAST(GLEW_SGIS_texture_border_clamp) = glewGetExtension("GL_SGIS_texture_border_clamp");
+#endif /* GL_SGIS_texture_border_clamp */
+#ifdef GL_SGIS_texture_edge_clamp
+  CONST_CAST(GLEW_SGIS_texture_edge_clamp) = glewGetExtension("GL_SGIS_texture_edge_clamp");
+#endif /* GL_SGIS_texture_edge_clamp */
+#ifdef GL_SGIS_texture_filter4
+  CONST_CAST(GLEW_SGIS_texture_filter4) = glewGetExtension("GL_SGIS_texture_filter4");
+  if (glewExperimental || GLEW_SGIS_texture_filter4) CONST_CAST(GLEW_SGIS_texture_filter4) = !_glewInit_GL_SGIS_texture_filter4(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIS_texture_filter4 */
+#ifdef GL_SGIS_texture_lod
+  CONST_CAST(GLEW_SGIS_texture_lod) = glewGetExtension("GL_SGIS_texture_lod");
+#endif /* GL_SGIS_texture_lod */
+#ifdef GL_SGIS_texture_select
+  CONST_CAST(GLEW_SGIS_texture_select) = glewGetExtension("GL_SGIS_texture_select");
+#endif /* GL_SGIS_texture_select */
+#ifdef GL_SGIX_async
+  CONST_CAST(GLEW_SGIX_async) = glewGetExtension("GL_SGIX_async");
+  if (glewExperimental || GLEW_SGIX_async) CONST_CAST(GLEW_SGIX_async) = !_glewInit_GL_SGIX_async(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_async */
+#ifdef GL_SGIX_async_histogram
+  CONST_CAST(GLEW_SGIX_async_histogram) = glewGetExtension("GL_SGIX_async_histogram");
+#endif /* GL_SGIX_async_histogram */
+#ifdef GL_SGIX_async_pixel
+  CONST_CAST(GLEW_SGIX_async_pixel) = glewGetExtension("GL_SGIX_async_pixel");
+#endif /* GL_SGIX_async_pixel */
+#ifdef GL_SGIX_blend_alpha_minmax
+  CONST_CAST(GLEW_SGIX_blend_alpha_minmax) = glewGetExtension("GL_SGIX_blend_alpha_minmax");
+#endif /* GL_SGIX_blend_alpha_minmax */
+#ifdef GL_SGIX_clipmap
+  CONST_CAST(GLEW_SGIX_clipmap) = glewGetExtension("GL_SGIX_clipmap");
+#endif /* GL_SGIX_clipmap */
+#ifdef GL_SGIX_depth_texture
+  CONST_CAST(GLEW_SGIX_depth_texture) = glewGetExtension("GL_SGIX_depth_texture");
+#endif /* GL_SGIX_depth_texture */
+#ifdef GL_SGIX_flush_raster
+  CONST_CAST(GLEW_SGIX_flush_raster) = glewGetExtension("GL_SGIX_flush_raster");
+  if (glewExperimental || GLEW_SGIX_flush_raster) CONST_CAST(GLEW_SGIX_flush_raster) = !_glewInit_GL_SGIX_flush_raster(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_flush_raster */
+#ifdef GL_SGIX_fog_offset
+  CONST_CAST(GLEW_SGIX_fog_offset) = glewGetExtension("GL_SGIX_fog_offset");
+#endif /* GL_SGIX_fog_offset */
+#ifdef GL_SGIX_fog_texture
+  CONST_CAST(GLEW_SGIX_fog_texture) = glewGetExtension("GL_SGIX_fog_texture");
+  if (glewExperimental || GLEW_SGIX_fog_texture) CONST_CAST(GLEW_SGIX_fog_texture) = !_glewInit_GL_SGIX_fog_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_fog_texture */
+#ifdef GL_SGIX_fragment_specular_lighting
+  CONST_CAST(GLEW_SGIX_fragment_specular_lighting) = glewGetExtension("GL_SGIX_fragment_specular_lighting");
+  if (glewExperimental || GLEW_SGIX_fragment_specular_lighting) CONST_CAST(GLEW_SGIX_fragment_specular_lighting) = !_glewInit_GL_SGIX_fragment_specular_lighting(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_fragment_specular_lighting */
+#ifdef GL_SGIX_framezoom
+  CONST_CAST(GLEW_SGIX_framezoom) = glewGetExtension("GL_SGIX_framezoom");
+  if (glewExperimental || GLEW_SGIX_framezoom) CONST_CAST(GLEW_SGIX_framezoom) = !_glewInit_GL_SGIX_framezoom(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_framezoom */
+#ifdef GL_SGIX_interlace
+  CONST_CAST(GLEW_SGIX_interlace) = glewGetExtension("GL_SGIX_interlace");
+#endif /* GL_SGIX_interlace */
+#ifdef GL_SGIX_ir_instrument1
+  CONST_CAST(GLEW_SGIX_ir_instrument1) = glewGetExtension("GL_SGIX_ir_instrument1");
+#endif /* GL_SGIX_ir_instrument1 */
+#ifdef GL_SGIX_list_priority
+  CONST_CAST(GLEW_SGIX_list_priority) = glewGetExtension("GL_SGIX_list_priority");
+#endif /* GL_SGIX_list_priority */
+#ifdef GL_SGIX_pixel_texture
+  CONST_CAST(GLEW_SGIX_pixel_texture) = glewGetExtension("GL_SGIX_pixel_texture");
+  if (glewExperimental || GLEW_SGIX_pixel_texture) CONST_CAST(GLEW_SGIX_pixel_texture) = !_glewInit_GL_SGIX_pixel_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_pixel_texture */
+#ifdef GL_SGIX_pixel_texture_bits
+  CONST_CAST(GLEW_SGIX_pixel_texture_bits) = glewGetExtension("GL_SGIX_pixel_texture_bits");
+#endif /* GL_SGIX_pixel_texture_bits */
+#ifdef GL_SGIX_reference_plane
+  CONST_CAST(GLEW_SGIX_reference_plane) = glewGetExtension("GL_SGIX_reference_plane");
+  if (glewExperimental || GLEW_SGIX_reference_plane) CONST_CAST(GLEW_SGIX_reference_plane) = !_glewInit_GL_SGIX_reference_plane(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_reference_plane */
+#ifdef GL_SGIX_resample
+  CONST_CAST(GLEW_SGIX_resample) = glewGetExtension("GL_SGIX_resample");
+#endif /* GL_SGIX_resample */
+#ifdef GL_SGIX_shadow
+  CONST_CAST(GLEW_SGIX_shadow) = glewGetExtension("GL_SGIX_shadow");
+#endif /* GL_SGIX_shadow */
+#ifdef GL_SGIX_shadow_ambient
+  CONST_CAST(GLEW_SGIX_shadow_ambient) = glewGetExtension("GL_SGIX_shadow_ambient");
+#endif /* GL_SGIX_shadow_ambient */
+#ifdef GL_SGIX_sprite
+  CONST_CAST(GLEW_SGIX_sprite) = glewGetExtension("GL_SGIX_sprite");
+  if (glewExperimental || GLEW_SGIX_sprite) CONST_CAST(GLEW_SGIX_sprite) = !_glewInit_GL_SGIX_sprite(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_sprite */
+#ifdef GL_SGIX_tag_sample_buffer
+  CONST_CAST(GLEW_SGIX_tag_sample_buffer) = glewGetExtension("GL_SGIX_tag_sample_buffer");
+  if (glewExperimental || GLEW_SGIX_tag_sample_buffer) CONST_CAST(GLEW_SGIX_tag_sample_buffer) = !_glewInit_GL_SGIX_tag_sample_buffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGIX_tag_sample_buffer */
+#ifdef GL_SGIX_texture_add_env
+  CONST_CAST(GLEW_SGIX_texture_add_env) = glewGetExtension("GL_SGIX_texture_add_env");
+#endif /* GL_SGIX_texture_add_env */
+#ifdef GL_SGIX_texture_coordinate_clamp
+  CONST_CAST(GLEW_SGIX_texture_coordinate_clamp) = glewGetExtension("GL_SGIX_texture_coordinate_clamp");
+#endif /* GL_SGIX_texture_coordinate_clamp */
+#ifdef GL_SGIX_texture_lod_bias
+  CONST_CAST(GLEW_SGIX_texture_lod_bias) = glewGetExtension("GL_SGIX_texture_lod_bias");
+#endif /* GL_SGIX_texture_lod_bias */
+#ifdef GL_SGIX_texture_multi_buffer
+  CONST_CAST(GLEW_SGIX_texture_multi_buffer) = glewGetExtension("GL_SGIX_texture_multi_buffer");
+#endif /* GL_SGIX_texture_multi_buffer */
+#ifdef GL_SGIX_texture_range
+  CONST_CAST(GLEW_SGIX_texture_range) = glewGetExtension("GL_SGIX_texture_range");
+#endif /* GL_SGIX_texture_range */
+#ifdef GL_SGIX_texture_scale_bias
+  CONST_CAST(GLEW_SGIX_texture_scale_bias) = glewGetExtension("GL_SGIX_texture_scale_bias");
+#endif /* GL_SGIX_texture_scale_bias */
+#ifdef GL_SGIX_vertex_preclip
+  CONST_CAST(GLEW_SGIX_vertex_preclip) = glewGetExtension("GL_SGIX_vertex_preclip");
+#endif /* GL_SGIX_vertex_preclip */
+#ifdef GL_SGIX_vertex_preclip_hint
+  CONST_CAST(GLEW_SGIX_vertex_preclip_hint) = glewGetExtension("GL_SGIX_vertex_preclip_hint");
+#endif /* GL_SGIX_vertex_preclip_hint */
+#ifdef GL_SGIX_ycrcb
+  CONST_CAST(GLEW_SGIX_ycrcb) = glewGetExtension("GL_SGIX_ycrcb");
+#endif /* GL_SGIX_ycrcb */
+#ifdef GL_SGI_color_matrix
+  CONST_CAST(GLEW_SGI_color_matrix) = glewGetExtension("GL_SGI_color_matrix");
+#endif /* GL_SGI_color_matrix */
+#ifdef GL_SGI_color_table
+  CONST_CAST(GLEW_SGI_color_table) = glewGetExtension("GL_SGI_color_table");
+  if (glewExperimental || GLEW_SGI_color_table) CONST_CAST(GLEW_SGI_color_table) = !_glewInit_GL_SGI_color_table(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SGI_color_table */
+#ifdef GL_SGI_texture_color_table
+  CONST_CAST(GLEW_SGI_texture_color_table) = glewGetExtension("GL_SGI_texture_color_table");
+#endif /* GL_SGI_texture_color_table */
+#ifdef GL_SUNX_constant_data
+  CONST_CAST(GLEW_SUNX_constant_data) = glewGetExtension("GL_SUNX_constant_data");
+  if (glewExperimental || GLEW_SUNX_constant_data) CONST_CAST(GLEW_SUNX_constant_data) = !_glewInit_GL_SUNX_constant_data(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SUNX_constant_data */
+#ifdef GL_SUN_convolution_border_modes
+  CONST_CAST(GLEW_SUN_convolution_border_modes) = glewGetExtension("GL_SUN_convolution_border_modes");
+#endif /* GL_SUN_convolution_border_modes */
+#ifdef GL_SUN_global_alpha
+  CONST_CAST(GLEW_SUN_global_alpha) = glewGetExtension("GL_SUN_global_alpha");
+  if (glewExperimental || GLEW_SUN_global_alpha) CONST_CAST(GLEW_SUN_global_alpha) = !_glewInit_GL_SUN_global_alpha(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SUN_global_alpha */
+#ifdef GL_SUN_mesh_array
+  CONST_CAST(GLEW_SUN_mesh_array) = glewGetExtension("GL_SUN_mesh_array");
+#endif /* GL_SUN_mesh_array */
+#ifdef GL_SUN_read_video_pixels
+  CONST_CAST(GLEW_SUN_read_video_pixels) = glewGetExtension("GL_SUN_read_video_pixels");
+  if (glewExperimental || GLEW_SUN_read_video_pixels) CONST_CAST(GLEW_SUN_read_video_pixels) = !_glewInit_GL_SUN_read_video_pixels(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SUN_read_video_pixels */
+#ifdef GL_SUN_slice_accum
+  CONST_CAST(GLEW_SUN_slice_accum) = glewGetExtension("GL_SUN_slice_accum");
+#endif /* GL_SUN_slice_accum */
+#ifdef GL_SUN_triangle_list
+  CONST_CAST(GLEW_SUN_triangle_list) = glewGetExtension("GL_SUN_triangle_list");
+  if (glewExperimental || GLEW_SUN_triangle_list) CONST_CAST(GLEW_SUN_triangle_list) = !_glewInit_GL_SUN_triangle_list(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SUN_triangle_list */
+#ifdef GL_SUN_vertex
+  CONST_CAST(GLEW_SUN_vertex) = glewGetExtension("GL_SUN_vertex");
+  if (glewExperimental || GLEW_SUN_vertex) CONST_CAST(GLEW_SUN_vertex) = !_glewInit_GL_SUN_vertex(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_SUN_vertex */
+#ifdef GL_WIN_phong_shading
+  CONST_CAST(GLEW_WIN_phong_shading) = glewGetExtension("GL_WIN_phong_shading");
+#endif /* GL_WIN_phong_shading */
+#ifdef GL_WIN_specular_fog
+  CONST_CAST(GLEW_WIN_specular_fog) = glewGetExtension("GL_WIN_specular_fog");
+#endif /* GL_WIN_specular_fog */
+#ifdef GL_WIN_swap_hint
+  CONST_CAST(GLEW_WIN_swap_hint) = glewGetExtension("GL_WIN_swap_hint");
+  if (glewExperimental || GLEW_WIN_swap_hint) CONST_CAST(GLEW_WIN_swap_hint) = !_glewInit_GL_WIN_swap_hint(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GL_WIN_swap_hint */
+
+  return GLEW_OK;
+}
+
+
+#if defined(_WIN32)
+
+#if !defined(GLEW_MX)
+
+PFNWGLSETSTEREOEMITTERSTATE3DLPROC __wglewSetStereoEmitterState3DL = NULL;
+
+PFNWGLCREATEBUFFERREGIONARBPROC __wglewCreateBufferRegionARB = NULL;
+PFNWGLDELETEBUFFERREGIONARBPROC __wglewDeleteBufferRegionARB = NULL;
+PFNWGLRESTOREBUFFERREGIONARBPROC __wglewRestoreBufferRegionARB = NULL;
+PFNWGLSAVEBUFFERREGIONARBPROC __wglewSaveBufferRegionARB = NULL;
+
+PFNWGLGETEXTENSIONSSTRINGARBPROC __wglewGetExtensionsStringARB = NULL;
+
+PFNWGLGETCURRENTREADDCARBPROC __wglewGetCurrentReadDCARB = NULL;
+PFNWGLMAKECONTEXTCURRENTARBPROC __wglewMakeContextCurrentARB = NULL;
+
+PFNWGLCREATEPBUFFERARBPROC __wglewCreatePbufferARB = NULL;
+PFNWGLDESTROYPBUFFERARBPROC __wglewDestroyPbufferARB = NULL;
+PFNWGLGETPBUFFERDCARBPROC __wglewGetPbufferDCARB = NULL;
+PFNWGLQUERYPBUFFERARBPROC __wglewQueryPbufferARB = NULL;
+PFNWGLRELEASEPBUFFERDCARBPROC __wglewReleasePbufferDCARB = NULL;
+
+PFNWGLCHOOSEPIXELFORMATARBPROC __wglewChoosePixelFormatARB = NULL;
+PFNWGLGETPIXELFORMATATTRIBFVARBPROC __wglewGetPixelFormatAttribfvARB = NULL;
+PFNWGLGETPIXELFORMATATTRIBIVARBPROC __wglewGetPixelFormatAttribivARB = NULL;
+
+PFNWGLBINDTEXIMAGEARBPROC __wglewBindTexImageARB = NULL;
+PFNWGLRELEASETEXIMAGEARBPROC __wglewReleaseTexImageARB = NULL;
+PFNWGLSETPBUFFERATTRIBARBPROC __wglewSetPbufferAttribARB = NULL;
+
+PFNWGLBINDDISPLAYCOLORTABLEEXTPROC __wglewBindDisplayColorTableEXT = NULL;
+PFNWGLCREATEDISPLAYCOLORTABLEEXTPROC __wglewCreateDisplayColorTableEXT = NULL;
+PFNWGLDESTROYDISPLAYCOLORTABLEEXTPROC __wglewDestroyDisplayColorTableEXT = NULL;
+PFNWGLLOADDISPLAYCOLORTABLEEXTPROC __wglewLoadDisplayColorTableEXT = NULL;
+
+PFNWGLGETEXTENSIONSSTRINGEXTPROC __wglewGetExtensionsStringEXT = NULL;
+
+PFNWGLGETCURRENTREADDCEXTPROC __wglewGetCurrentReadDCEXT = NULL;
+PFNWGLMAKECONTEXTCURRENTEXTPROC __wglewMakeContextCurrentEXT = NULL;
+
+PFNWGLCREATEPBUFFEREXTPROC __wglewCreatePbufferEXT = NULL;
+PFNWGLDESTROYPBUFFEREXTPROC __wglewDestroyPbufferEXT = NULL;
+PFNWGLGETPBUFFERDCEXTPROC __wglewGetPbufferDCEXT = NULL;
+PFNWGLQUERYPBUFFEREXTPROC __wglewQueryPbufferEXT = NULL;
+PFNWGLRELEASEPBUFFERDCEXTPROC __wglewReleasePbufferDCEXT = NULL;
+
+PFNWGLCHOOSEPIXELFORMATEXTPROC __wglewChoosePixelFormatEXT = NULL;
+PFNWGLGETPIXELFORMATATTRIBFVEXTPROC __wglewGetPixelFormatAttribfvEXT = NULL;
+PFNWGLGETPIXELFORMATATTRIBIVEXTPROC __wglewGetPixelFormatAttribivEXT = NULL;
+
+PFNWGLGETSWAPINTERVALEXTPROC __wglewGetSwapIntervalEXT = NULL;
+PFNWGLSWAPINTERVALEXTPROC __wglewSwapIntervalEXT = NULL;
+
+PFNWGLGETDIGITALVIDEOPARAMETERSI3DPROC __wglewGetDigitalVideoParametersI3D = NULL;
+PFNWGLSETDIGITALVIDEOPARAMETERSI3DPROC __wglewSetDigitalVideoParametersI3D = NULL;
+
+PFNWGLGETGAMMATABLEI3DPROC __wglewGetGammaTableI3D = NULL;
+PFNWGLGETGAMMATABLEPARAMETERSI3DPROC __wglewGetGammaTableParametersI3D = NULL;
+PFNWGLSETGAMMATABLEI3DPROC __wglewSetGammaTableI3D = NULL;
+PFNWGLSETGAMMATABLEPARAMETERSI3DPROC __wglewSetGammaTableParametersI3D = NULL;
+
+PFNWGLDISABLEGENLOCKI3DPROC __wglewDisableGenlockI3D = NULL;
+PFNWGLENABLEGENLOCKI3DPROC __wglewEnableGenlockI3D = NULL;
+PFNWGLGENLOCKSAMPLERATEI3DPROC __wglewGenlockSampleRateI3D = NULL;
+PFNWGLGENLOCKSOURCEDELAYI3DPROC __wglewGenlockSourceDelayI3D = NULL;
+PFNWGLGENLOCKSOURCEEDGEI3DPROC __wglewGenlockSourceEdgeI3D = NULL;
+PFNWGLGENLOCKSOURCEI3DPROC __wglewGenlockSourceI3D = NULL;
+PFNWGLGETGENLOCKSAMPLERATEI3DPROC __wglewGetGenlockSampleRateI3D = NULL;
+PFNWGLGETGENLOCKSOURCEDELAYI3DPROC __wglewGetGenlockSourceDelayI3D = NULL;
+PFNWGLGETGENLOCKSOURCEEDGEI3DPROC __wglewGetGenlockSourceEdgeI3D = NULL;
+PFNWGLGETGENLOCKSOURCEI3DPROC __wglewGetGenlockSourceI3D = NULL;
+PFNWGLISENABLEDGENLOCKI3DPROC __wglewIsEnabledGenlockI3D = NULL;
+PFNWGLQUERYGENLOCKMAXSOURCEDELAYI3DPROC __wglewQueryGenlockMaxSourceDelayI3D = NULL;
+
+PFNWGLASSOCIATEIMAGEBUFFEREVENTSI3DPROC __wglewAssociateImageBufferEventsI3D = NULL;
+PFNWGLCREATEIMAGEBUFFERI3DPROC __wglewCreateImageBufferI3D = NULL;
+PFNWGLDESTROYIMAGEBUFFERI3DPROC __wglewDestroyImageBufferI3D = NULL;
+PFNWGLRELEASEIMAGEBUFFEREVENTSI3DPROC __wglewReleaseImageBufferEventsI3D = NULL;
+
+PFNWGLDISABLEFRAMELOCKI3DPROC __wglewDisableFrameLockI3D = NULL;
+PFNWGLENABLEFRAMELOCKI3DPROC __wglewEnableFrameLockI3D = NULL;
+PFNWGLISENABLEDFRAMELOCKI3DPROC __wglewIsEnabledFrameLockI3D = NULL;
+PFNWGLQUERYFRAMELOCKMASTERI3DPROC __wglewQueryFrameLockMasterI3D = NULL;
+
+PFNWGLBEGINFRAMETRACKINGI3DPROC __wglewBeginFrameTrackingI3D = NULL;
+PFNWGLENDFRAMETRACKINGI3DPROC __wglewEndFrameTrackingI3D = NULL;
+PFNWGLGETFRAMEUSAGEI3DPROC __wglewGetFrameUsageI3D = NULL;
+PFNWGLQUERYFRAMETRACKINGI3DPROC __wglewQueryFrameTrackingI3D = NULL;
+
+PFNWGLCREATEAFFINITYDCNVPROC __wglewCreateAffinityDCNV = NULL;
+PFNWGLDELETEDCNVPROC __wglewDeleteDCNV = NULL;
+PFNWGLENUMGPUDEVICESNVPROC __wglewEnumGpuDevicesNV = NULL;
+PFNWGLENUMGPUSFROMAFFINITYDCNVPROC __wglewEnumGpusFromAffinityDCNV = NULL;
+PFNWGLENUMGPUSNVPROC __wglewEnumGpusNV = NULL;
+
+PFNWGLALLOCATEMEMORYNVPROC __wglewAllocateMemoryNV = NULL;
+PFNWGLFREEMEMORYNVPROC __wglewFreeMemoryNV = NULL;
+
+PFNWGLGETMSCRATEOMLPROC __wglewGetMscRateOML = NULL;
+PFNWGLGETSYNCVALUESOMLPROC __wglewGetSyncValuesOML = NULL;
+PFNWGLSWAPBUFFERSMSCOMLPROC __wglewSwapBuffersMscOML = NULL;
+PFNWGLSWAPLAYERBUFFERSMSCOMLPROC __wglewSwapLayerBuffersMscOML = NULL;
+PFNWGLWAITFORMSCOMLPROC __wglewWaitForMscOML = NULL;
+PFNWGLWAITFORSBCOMLPROC __wglewWaitForSbcOML = NULL;
+GLboolean __WGLEW_3DFX_multisample = GL_FALSE;
+GLboolean __WGLEW_3DL_stereo_control = GL_FALSE;
+GLboolean __WGLEW_ARB_buffer_region = GL_FALSE;
+GLboolean __WGLEW_ARB_extensions_string = GL_FALSE;
+GLboolean __WGLEW_ARB_make_current_read = GL_FALSE;
+GLboolean __WGLEW_ARB_multisample = GL_FALSE;
+GLboolean __WGLEW_ARB_pbuffer = GL_FALSE;
+GLboolean __WGLEW_ARB_pixel_format = GL_FALSE;
+GLboolean __WGLEW_ARB_pixel_format_float = GL_FALSE;
+GLboolean __WGLEW_ARB_render_texture = GL_FALSE;
+GLboolean __WGLEW_ATI_pixel_format_float = GL_FALSE;
+GLboolean __WGLEW_ATI_render_texture_rectangle = GL_FALSE;
+GLboolean __WGLEW_EXT_depth_float = GL_FALSE;
+GLboolean __WGLEW_EXT_display_color_table = GL_FALSE;
+GLboolean __WGLEW_EXT_extensions_string = GL_FALSE;
+GLboolean __WGLEW_EXT_framebuffer_sRGB = GL_FALSE;
+GLboolean __WGLEW_EXT_make_current_read = GL_FALSE;
+GLboolean __WGLEW_EXT_multisample = GL_FALSE;
+GLboolean __WGLEW_EXT_pbuffer = GL_FALSE;
+GLboolean __WGLEW_EXT_pixel_format = GL_FALSE;
+GLboolean __WGLEW_EXT_pixel_format_packed_float = GL_FALSE;
+GLboolean __WGLEW_EXT_swap_control = GL_FALSE;
+GLboolean __WGLEW_I3D_digital_video_control = GL_FALSE;
+GLboolean __WGLEW_I3D_gamma = GL_FALSE;
+GLboolean __WGLEW_I3D_genlock = GL_FALSE;
+GLboolean __WGLEW_I3D_image_buffer = GL_FALSE;
+GLboolean __WGLEW_I3D_swap_frame_lock = GL_FALSE;
+GLboolean __WGLEW_I3D_swap_frame_usage = GL_FALSE;
+GLboolean __WGLEW_NV_float_buffer = GL_FALSE;
+GLboolean __WGLEW_NV_gpu_affinity = GL_FALSE;
+GLboolean __WGLEW_NV_render_depth_texture = GL_FALSE;
+GLboolean __WGLEW_NV_render_texture_rectangle = GL_FALSE;
+GLboolean __WGLEW_NV_vertex_array_range = GL_FALSE;
+GLboolean __WGLEW_OML_sync_control = GL_FALSE;
+
+#endif /* !GLEW_MX */
+
+#ifdef WGL_3DFX_multisample
+
+#endif /* WGL_3DFX_multisample */
+
+#ifdef WGL_3DL_stereo_control
+
+static GLboolean _glewInit_WGL_3DL_stereo_control (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglSetStereoEmitterState3DL = (PFNWGLSETSTEREOEMITTERSTATE3DLPROC)glewGetProcAddress((const GLubyte*)"wglSetStereoEmitterState3DL")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_3DL_stereo_control */
+
+#ifdef WGL_ARB_buffer_region
+
+static GLboolean _glewInit_WGL_ARB_buffer_region (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglCreateBufferRegionARB = (PFNWGLCREATEBUFFERREGIONARBPROC)glewGetProcAddress((const GLubyte*)"wglCreateBufferRegionARB")) == NULL) || r;
+  r = ((wglDeleteBufferRegionARB = (PFNWGLDELETEBUFFERREGIONARBPROC)glewGetProcAddress((const GLubyte*)"wglDeleteBufferRegionARB")) == NULL) || r;
+  r = ((wglRestoreBufferRegionARB = (PFNWGLRESTOREBUFFERREGIONARBPROC)glewGetProcAddress((const GLubyte*)"wglRestoreBufferRegionARB")) == NULL) || r;
+  r = ((wglSaveBufferRegionARB = (PFNWGLSAVEBUFFERREGIONARBPROC)glewGetProcAddress((const GLubyte*)"wglSaveBufferRegionARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_ARB_buffer_region */
+
+#ifdef WGL_ARB_extensions_string
+
+static GLboolean _glewInit_WGL_ARB_extensions_string (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC)glewGetProcAddress((const GLubyte*)"wglGetExtensionsStringARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_ARB_extensions_string */
+
+#ifdef WGL_ARB_make_current_read
+
+static GLboolean _glewInit_WGL_ARB_make_current_read (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetCurrentReadDCARB = (PFNWGLGETCURRENTREADDCARBPROC)glewGetProcAddress((const GLubyte*)"wglGetCurrentReadDCARB")) == NULL) || r;
+  r = ((wglMakeContextCurrentARB = (PFNWGLMAKECONTEXTCURRENTARBPROC)glewGetProcAddress((const GLubyte*)"wglMakeContextCurrentARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_ARB_make_current_read */
+
+#ifdef WGL_ARB_multisample
+
+#endif /* WGL_ARB_multisample */
+
+#ifdef WGL_ARB_pbuffer
+
+static GLboolean _glewInit_WGL_ARB_pbuffer (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglCreatePbufferARB = (PFNWGLCREATEPBUFFERARBPROC)glewGetProcAddress((const GLubyte*)"wglCreatePbufferARB")) == NULL) || r;
+  r = ((wglDestroyPbufferARB = (PFNWGLDESTROYPBUFFERARBPROC)glewGetProcAddress((const GLubyte*)"wglDestroyPbufferARB")) == NULL) || r;
+  r = ((wglGetPbufferDCARB = (PFNWGLGETPBUFFERDCARBPROC)glewGetProcAddress((const GLubyte*)"wglGetPbufferDCARB")) == NULL) || r;
+  r = ((wglQueryPbufferARB = (PFNWGLQUERYPBUFFERARBPROC)glewGetProcAddress((const GLubyte*)"wglQueryPbufferARB")) == NULL) || r;
+  r = ((wglReleasePbufferDCARB = (PFNWGLRELEASEPBUFFERDCARBPROC)glewGetProcAddress((const GLubyte*)"wglReleasePbufferDCARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_ARB_pbuffer */
+
+#ifdef WGL_ARB_pixel_format
+
+static GLboolean _glewInit_WGL_ARB_pixel_format (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglChoosePixelFormatARB = (PFNWGLCHOOSEPIXELFORMATARBPROC)glewGetProcAddress((const GLubyte*)"wglChoosePixelFormatARB")) == NULL) || r;
+  r = ((wglGetPixelFormatAttribfvARB = (PFNWGLGETPIXELFORMATATTRIBFVARBPROC)glewGetProcAddress((const GLubyte*)"wglGetPixelFormatAttribfvARB")) == NULL) || r;
+  r = ((wglGetPixelFormatAttribivARB = (PFNWGLGETPIXELFORMATATTRIBIVARBPROC)glewGetProcAddress((const GLubyte*)"wglGetPixelFormatAttribivARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_ARB_pixel_format */
+
+#ifdef WGL_ARB_pixel_format_float
+
+#endif /* WGL_ARB_pixel_format_float */
+
+#ifdef WGL_ARB_render_texture
+
+static GLboolean _glewInit_WGL_ARB_render_texture (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglBindTexImageARB = (PFNWGLBINDTEXIMAGEARBPROC)glewGetProcAddress((const GLubyte*)"wglBindTexImageARB")) == NULL) || r;
+  r = ((wglReleaseTexImageARB = (PFNWGLRELEASETEXIMAGEARBPROC)glewGetProcAddress((const GLubyte*)"wglReleaseTexImageARB")) == NULL) || r;
+  r = ((wglSetPbufferAttribARB = (PFNWGLSETPBUFFERATTRIBARBPROC)glewGetProcAddress((const GLubyte*)"wglSetPbufferAttribARB")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_ARB_render_texture */
+
+#ifdef WGL_ATI_pixel_format_float
+
+#endif /* WGL_ATI_pixel_format_float */
+
+#ifdef WGL_ATI_render_texture_rectangle
+
+#endif /* WGL_ATI_render_texture_rectangle */
+
+#ifdef WGL_EXT_depth_float
+
+#endif /* WGL_EXT_depth_float */
+
+#ifdef WGL_EXT_display_color_table
+
+static GLboolean _glewInit_WGL_EXT_display_color_table (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglBindDisplayColorTableEXT = (PFNWGLBINDDISPLAYCOLORTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"wglBindDisplayColorTableEXT")) == NULL) || r;
+  r = ((wglCreateDisplayColorTableEXT = (PFNWGLCREATEDISPLAYCOLORTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"wglCreateDisplayColorTableEXT")) == NULL) || r;
+  r = ((wglDestroyDisplayColorTableEXT = (PFNWGLDESTROYDISPLAYCOLORTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"wglDestroyDisplayColorTableEXT")) == NULL) || r;
+  r = ((wglLoadDisplayColorTableEXT = (PFNWGLLOADDISPLAYCOLORTABLEEXTPROC)glewGetProcAddress((const GLubyte*)"wglLoadDisplayColorTableEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_EXT_display_color_table */
+
+#ifdef WGL_EXT_extensions_string
+
+static GLboolean _glewInit_WGL_EXT_extensions_string (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetExtensionsStringEXT = (PFNWGLGETEXTENSIONSSTRINGEXTPROC)glewGetProcAddress((const GLubyte*)"wglGetExtensionsStringEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_EXT_extensions_string */
+
+#ifdef WGL_EXT_framebuffer_sRGB
+
+#endif /* WGL_EXT_framebuffer_sRGB */
+
+#ifdef WGL_EXT_make_current_read
+
+static GLboolean _glewInit_WGL_EXT_make_current_read (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetCurrentReadDCEXT = (PFNWGLGETCURRENTREADDCEXTPROC)glewGetProcAddress((const GLubyte*)"wglGetCurrentReadDCEXT")) == NULL) || r;
+  r = ((wglMakeContextCurrentEXT = (PFNWGLMAKECONTEXTCURRENTEXTPROC)glewGetProcAddress((const GLubyte*)"wglMakeContextCurrentEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_EXT_make_current_read */
+
+#ifdef WGL_EXT_multisample
+
+#endif /* WGL_EXT_multisample */
+
+#ifdef WGL_EXT_pbuffer
+
+static GLboolean _glewInit_WGL_EXT_pbuffer (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglCreatePbufferEXT = (PFNWGLCREATEPBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"wglCreatePbufferEXT")) == NULL) || r;
+  r = ((wglDestroyPbufferEXT = (PFNWGLDESTROYPBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"wglDestroyPbufferEXT")) == NULL) || r;
+  r = ((wglGetPbufferDCEXT = (PFNWGLGETPBUFFERDCEXTPROC)glewGetProcAddress((const GLubyte*)"wglGetPbufferDCEXT")) == NULL) || r;
+  r = ((wglQueryPbufferEXT = (PFNWGLQUERYPBUFFEREXTPROC)glewGetProcAddress((const GLubyte*)"wglQueryPbufferEXT")) == NULL) || r;
+  r = ((wglReleasePbufferDCEXT = (PFNWGLRELEASEPBUFFERDCEXTPROC)glewGetProcAddress((const GLubyte*)"wglReleasePbufferDCEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_EXT_pbuffer */
+
+#ifdef WGL_EXT_pixel_format
+
+static GLboolean _glewInit_WGL_EXT_pixel_format (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglChoosePixelFormatEXT = (PFNWGLCHOOSEPIXELFORMATEXTPROC)glewGetProcAddress((const GLubyte*)"wglChoosePixelFormatEXT")) == NULL) || r;
+  r = ((wglGetPixelFormatAttribfvEXT = (PFNWGLGETPIXELFORMATATTRIBFVEXTPROC)glewGetProcAddress((const GLubyte*)"wglGetPixelFormatAttribfvEXT")) == NULL) || r;
+  r = ((wglGetPixelFormatAttribivEXT = (PFNWGLGETPIXELFORMATATTRIBIVEXTPROC)glewGetProcAddress((const GLubyte*)"wglGetPixelFormatAttribivEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_EXT_pixel_format */
+
+#ifdef WGL_EXT_pixel_format_packed_float
+
+#endif /* WGL_EXT_pixel_format_packed_float */
+
+#ifdef WGL_EXT_swap_control
+
+static GLboolean _glewInit_WGL_EXT_swap_control (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetSwapIntervalEXT = (PFNWGLGETSWAPINTERVALEXTPROC)glewGetProcAddress((const GLubyte*)"wglGetSwapIntervalEXT")) == NULL) || r;
+  r = ((wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC)glewGetProcAddress((const GLubyte*)"wglSwapIntervalEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_EXT_swap_control */
+
+#ifdef WGL_I3D_digital_video_control
+
+static GLboolean _glewInit_WGL_I3D_digital_video_control (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetDigitalVideoParametersI3D = (PFNWGLGETDIGITALVIDEOPARAMETERSI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetDigitalVideoParametersI3D")) == NULL) || r;
+  r = ((wglSetDigitalVideoParametersI3D = (PFNWGLSETDIGITALVIDEOPARAMETERSI3DPROC)glewGetProcAddress((const GLubyte*)"wglSetDigitalVideoParametersI3D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_I3D_digital_video_control */
+
+#ifdef WGL_I3D_gamma
+
+static GLboolean _glewInit_WGL_I3D_gamma (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetGammaTableI3D = (PFNWGLGETGAMMATABLEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetGammaTableI3D")) == NULL) || r;
+  r = ((wglGetGammaTableParametersI3D = (PFNWGLGETGAMMATABLEPARAMETERSI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetGammaTableParametersI3D")) == NULL) || r;
+  r = ((wglSetGammaTableI3D = (PFNWGLSETGAMMATABLEI3DPROC)glewGetProcAddress((const GLubyte*)"wglSetGammaTableI3D")) == NULL) || r;
+  r = ((wglSetGammaTableParametersI3D = (PFNWGLSETGAMMATABLEPARAMETERSI3DPROC)glewGetProcAddress((const GLubyte*)"wglSetGammaTableParametersI3D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_I3D_gamma */
+
+#ifdef WGL_I3D_genlock
+
+static GLboolean _glewInit_WGL_I3D_genlock (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglDisableGenlockI3D = (PFNWGLDISABLEGENLOCKI3DPROC)glewGetProcAddress((const GLubyte*)"wglDisableGenlockI3D")) == NULL) || r;
+  r = ((wglEnableGenlockI3D = (PFNWGLENABLEGENLOCKI3DPROC)glewGetProcAddress((const GLubyte*)"wglEnableGenlockI3D")) == NULL) || r;
+  r = ((wglGenlockSampleRateI3D = (PFNWGLGENLOCKSAMPLERATEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGenlockSampleRateI3D")) == NULL) || r;
+  r = ((wglGenlockSourceDelayI3D = (PFNWGLGENLOCKSOURCEDELAYI3DPROC)glewGetProcAddress((const GLubyte*)"wglGenlockSourceDelayI3D")) == NULL) || r;
+  r = ((wglGenlockSourceEdgeI3D = (PFNWGLGENLOCKSOURCEEDGEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGenlockSourceEdgeI3D")) == NULL) || r;
+  r = ((wglGenlockSourceI3D = (PFNWGLGENLOCKSOURCEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGenlockSourceI3D")) == NULL) || r;
+  r = ((wglGetGenlockSampleRateI3D = (PFNWGLGETGENLOCKSAMPLERATEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetGenlockSampleRateI3D")) == NULL) || r;
+  r = ((wglGetGenlockSourceDelayI3D = (PFNWGLGETGENLOCKSOURCEDELAYI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetGenlockSourceDelayI3D")) == NULL) || r;
+  r = ((wglGetGenlockSourceEdgeI3D = (PFNWGLGETGENLOCKSOURCEEDGEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetGenlockSourceEdgeI3D")) == NULL) || r;
+  r = ((wglGetGenlockSourceI3D = (PFNWGLGETGENLOCKSOURCEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetGenlockSourceI3D")) == NULL) || r;
+  r = ((wglIsEnabledGenlockI3D = (PFNWGLISENABLEDGENLOCKI3DPROC)glewGetProcAddress((const GLubyte*)"wglIsEnabledGenlockI3D")) == NULL) || r;
+  r = ((wglQueryGenlockMaxSourceDelayI3D = (PFNWGLQUERYGENLOCKMAXSOURCEDELAYI3DPROC)glewGetProcAddress((const GLubyte*)"wglQueryGenlockMaxSourceDelayI3D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_I3D_genlock */
+
+#ifdef WGL_I3D_image_buffer
+
+static GLboolean _glewInit_WGL_I3D_image_buffer (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglAssociateImageBufferEventsI3D = (PFNWGLASSOCIATEIMAGEBUFFEREVENTSI3DPROC)glewGetProcAddress((const GLubyte*)"wglAssociateImageBufferEventsI3D")) == NULL) || r;
+  r = ((wglCreateImageBufferI3D = (PFNWGLCREATEIMAGEBUFFERI3DPROC)glewGetProcAddress((const GLubyte*)"wglCreateImageBufferI3D")) == NULL) || r;
+  r = ((wglDestroyImageBufferI3D = (PFNWGLDESTROYIMAGEBUFFERI3DPROC)glewGetProcAddress((const GLubyte*)"wglDestroyImageBufferI3D")) == NULL) || r;
+  r = ((wglReleaseImageBufferEventsI3D = (PFNWGLRELEASEIMAGEBUFFEREVENTSI3DPROC)glewGetProcAddress((const GLubyte*)"wglReleaseImageBufferEventsI3D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_I3D_image_buffer */
+
+#ifdef WGL_I3D_swap_frame_lock
+
+static GLboolean _glewInit_WGL_I3D_swap_frame_lock (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglDisableFrameLockI3D = (PFNWGLDISABLEFRAMELOCKI3DPROC)glewGetProcAddress((const GLubyte*)"wglDisableFrameLockI3D")) == NULL) || r;
+  r = ((wglEnableFrameLockI3D = (PFNWGLENABLEFRAMELOCKI3DPROC)glewGetProcAddress((const GLubyte*)"wglEnableFrameLockI3D")) == NULL) || r;
+  r = ((wglIsEnabledFrameLockI3D = (PFNWGLISENABLEDFRAMELOCKI3DPROC)glewGetProcAddress((const GLubyte*)"wglIsEnabledFrameLockI3D")) == NULL) || r;
+  r = ((wglQueryFrameLockMasterI3D = (PFNWGLQUERYFRAMELOCKMASTERI3DPROC)glewGetProcAddress((const GLubyte*)"wglQueryFrameLockMasterI3D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_I3D_swap_frame_lock */
+
+#ifdef WGL_I3D_swap_frame_usage
+
+static GLboolean _glewInit_WGL_I3D_swap_frame_usage (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglBeginFrameTrackingI3D = (PFNWGLBEGINFRAMETRACKINGI3DPROC)glewGetProcAddress((const GLubyte*)"wglBeginFrameTrackingI3D")) == NULL) || r;
+  r = ((wglEndFrameTrackingI3D = (PFNWGLENDFRAMETRACKINGI3DPROC)glewGetProcAddress((const GLubyte*)"wglEndFrameTrackingI3D")) == NULL) || r;
+  r = ((wglGetFrameUsageI3D = (PFNWGLGETFRAMEUSAGEI3DPROC)glewGetProcAddress((const GLubyte*)"wglGetFrameUsageI3D")) == NULL) || r;
+  r = ((wglQueryFrameTrackingI3D = (PFNWGLQUERYFRAMETRACKINGI3DPROC)glewGetProcAddress((const GLubyte*)"wglQueryFrameTrackingI3D")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_I3D_swap_frame_usage */
+
+#ifdef WGL_NV_float_buffer
+
+#endif /* WGL_NV_float_buffer */
+
+#ifdef WGL_NV_gpu_affinity
+
+static GLboolean _glewInit_WGL_NV_gpu_affinity (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglCreateAffinityDCNV = (PFNWGLCREATEAFFINITYDCNVPROC)glewGetProcAddress((const GLubyte*)"wglCreateAffinityDCNV")) == NULL) || r;
+  r = ((wglDeleteDCNV = (PFNWGLDELETEDCNVPROC)glewGetProcAddress((const GLubyte*)"wglDeleteDCNV")) == NULL) || r;
+  r = ((wglEnumGpuDevicesNV = (PFNWGLENUMGPUDEVICESNVPROC)glewGetProcAddress((const GLubyte*)"wglEnumGpuDevicesNV")) == NULL) || r;
+  r = ((wglEnumGpusFromAffinityDCNV = (PFNWGLENUMGPUSFROMAFFINITYDCNVPROC)glewGetProcAddress((const GLubyte*)"wglEnumGpusFromAffinityDCNV")) == NULL) || r;
+  r = ((wglEnumGpusNV = (PFNWGLENUMGPUSNVPROC)glewGetProcAddress((const GLubyte*)"wglEnumGpusNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_NV_gpu_affinity */
+
+#ifdef WGL_NV_render_depth_texture
+
+#endif /* WGL_NV_render_depth_texture */
+
+#ifdef WGL_NV_render_texture_rectangle
+
+#endif /* WGL_NV_render_texture_rectangle */
+
+#ifdef WGL_NV_vertex_array_range
+
+static GLboolean _glewInit_WGL_NV_vertex_array_range (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglAllocateMemoryNV = (PFNWGLALLOCATEMEMORYNVPROC)glewGetProcAddress((const GLubyte*)"wglAllocateMemoryNV")) == NULL) || r;
+  r = ((wglFreeMemoryNV = (PFNWGLFREEMEMORYNVPROC)glewGetProcAddress((const GLubyte*)"wglFreeMemoryNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_NV_vertex_array_range */
+
+#ifdef WGL_OML_sync_control
+
+static GLboolean _glewInit_WGL_OML_sync_control (WGLEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((wglGetMscRateOML = (PFNWGLGETMSCRATEOMLPROC)glewGetProcAddress((const GLubyte*)"wglGetMscRateOML")) == NULL) || r;
+  r = ((wglGetSyncValuesOML = (PFNWGLGETSYNCVALUESOMLPROC)glewGetProcAddress((const GLubyte*)"wglGetSyncValuesOML")) == NULL) || r;
+  r = ((wglSwapBuffersMscOML = (PFNWGLSWAPBUFFERSMSCOMLPROC)glewGetProcAddress((const GLubyte*)"wglSwapBuffersMscOML")) == NULL) || r;
+  r = ((wglSwapLayerBuffersMscOML = (PFNWGLSWAPLAYERBUFFERSMSCOMLPROC)glewGetProcAddress((const GLubyte*)"wglSwapLayerBuffersMscOML")) == NULL) || r;
+  r = ((wglWaitForMscOML = (PFNWGLWAITFORMSCOMLPROC)glewGetProcAddress((const GLubyte*)"wglWaitForMscOML")) == NULL) || r;
+  r = ((wglWaitForSbcOML = (PFNWGLWAITFORSBCOMLPROC)glewGetProcAddress((const GLubyte*)"wglWaitForSbcOML")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* WGL_OML_sync_control */
+
+/* ------------------------------------------------------------------------- */
+
+static PFNWGLGETEXTENSIONSSTRINGARBPROC _wglewGetExtensionsStringARB = NULL;
+static PFNWGLGETEXTENSIONSSTRINGEXTPROC _wglewGetExtensionsStringEXT = NULL;
+
+GLboolean wglewGetExtension (const char* name)
+{
+  GLubyte* p;
+  GLubyte* end;
+  GLuint len = _glewStrLen((const GLubyte*)name);
+  if (_wglewGetExtensionsStringARB == NULL)
+    if (_wglewGetExtensionsStringEXT == NULL)
+      return GL_FALSE;
+    else
+      p = (GLubyte*)_wglewGetExtensionsStringEXT();
+  else
+    p = (GLubyte*)_wglewGetExtensionsStringARB(wglGetCurrentDC());
+  if (0 == p) return GL_FALSE;
+  end = p + _glewStrLen(p);
+  while (p < end)
+  {
+    GLuint n = _glewStrCLen(p, ' ');
+    if (len == n && _glewStrSame((const GLubyte*)name, p, n)) return GL_TRUE;
+    p += n+1;
+  }
+  return GL_FALSE;
+}
+
+GLenum wglewContextInit (WGLEW_CONTEXT_ARG_DEF_LIST)
+{
+  GLboolean crippled;
+  /* find wgl extension string query functions */
+  _wglewGetExtensionsStringARB = (PFNWGLGETEXTENSIONSSTRINGARBPROC)glewGetProcAddress((const GLubyte*)"wglGetExtensionsStringARB");
+  _wglewGetExtensionsStringEXT = (PFNWGLGETEXTENSIONSSTRINGEXTPROC)glewGetProcAddress((const GLubyte*)"wglGetExtensionsStringEXT");
+  /* initialize extensions */
+  crippled = _wglewGetExtensionsStringARB == NULL && _wglewGetExtensionsStringEXT == NULL;
+#ifdef WGL_3DFX_multisample
+  CONST_CAST(WGLEW_3DFX_multisample) = wglewGetExtension("WGL_3DFX_multisample");
+#endif /* WGL_3DFX_multisample */
+#ifdef WGL_3DL_stereo_control
+  CONST_CAST(WGLEW_3DL_stereo_control) = wglewGetExtension("WGL_3DL_stereo_control");
+  if (glewExperimental || WGLEW_3DL_stereo_control|| crippled) CONST_CAST(WGLEW_3DL_stereo_control)= !_glewInit_WGL_3DL_stereo_control(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_3DL_stereo_control */
+#ifdef WGL_ARB_buffer_region
+  CONST_CAST(WGLEW_ARB_buffer_region) = wglewGetExtension("WGL_ARB_buffer_region");
+  if (glewExperimental || WGLEW_ARB_buffer_region|| crippled) CONST_CAST(WGLEW_ARB_buffer_region)= !_glewInit_WGL_ARB_buffer_region(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_ARB_buffer_region */
+#ifdef WGL_ARB_extensions_string
+  CONST_CAST(WGLEW_ARB_extensions_string) = wglewGetExtension("WGL_ARB_extensions_string");
+  if (glewExperimental || WGLEW_ARB_extensions_string|| crippled) CONST_CAST(WGLEW_ARB_extensions_string)= !_glewInit_WGL_ARB_extensions_string(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_ARB_extensions_string */
+#ifdef WGL_ARB_make_current_read
+  CONST_CAST(WGLEW_ARB_make_current_read) = wglewGetExtension("WGL_ARB_make_current_read");
+  if (glewExperimental || WGLEW_ARB_make_current_read|| crippled) CONST_CAST(WGLEW_ARB_make_current_read)= !_glewInit_WGL_ARB_make_current_read(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_ARB_make_current_read */
+#ifdef WGL_ARB_multisample
+  CONST_CAST(WGLEW_ARB_multisample) = wglewGetExtension("WGL_ARB_multisample");
+#endif /* WGL_ARB_multisample */
+#ifdef WGL_ARB_pbuffer
+  CONST_CAST(WGLEW_ARB_pbuffer) = wglewGetExtension("WGL_ARB_pbuffer");
+  if (glewExperimental || WGLEW_ARB_pbuffer|| crippled) CONST_CAST(WGLEW_ARB_pbuffer)= !_glewInit_WGL_ARB_pbuffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_ARB_pbuffer */
+#ifdef WGL_ARB_pixel_format
+  CONST_CAST(WGLEW_ARB_pixel_format) = wglewGetExtension("WGL_ARB_pixel_format");
+  if (glewExperimental || WGLEW_ARB_pixel_format|| crippled) CONST_CAST(WGLEW_ARB_pixel_format)= !_glewInit_WGL_ARB_pixel_format(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_ARB_pixel_format */
+#ifdef WGL_ARB_pixel_format_float
+  CONST_CAST(WGLEW_ARB_pixel_format_float) = wglewGetExtension("WGL_ARB_pixel_format_float");
+#endif /* WGL_ARB_pixel_format_float */
+#ifdef WGL_ARB_render_texture
+  CONST_CAST(WGLEW_ARB_render_texture) = wglewGetExtension("WGL_ARB_render_texture");
+  if (glewExperimental || WGLEW_ARB_render_texture|| crippled) CONST_CAST(WGLEW_ARB_render_texture)= !_glewInit_WGL_ARB_render_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_ARB_render_texture */
+#ifdef WGL_ATI_pixel_format_float
+  CONST_CAST(WGLEW_ATI_pixel_format_float) = wglewGetExtension("WGL_ATI_pixel_format_float");
+#endif /* WGL_ATI_pixel_format_float */
+#ifdef WGL_ATI_render_texture_rectangle
+  CONST_CAST(WGLEW_ATI_render_texture_rectangle) = wglewGetExtension("WGL_ATI_render_texture_rectangle");
+#endif /* WGL_ATI_render_texture_rectangle */
+#ifdef WGL_EXT_depth_float
+  CONST_CAST(WGLEW_EXT_depth_float) = wglewGetExtension("WGL_EXT_depth_float");
+#endif /* WGL_EXT_depth_float */
+#ifdef WGL_EXT_display_color_table
+  CONST_CAST(WGLEW_EXT_display_color_table) = wglewGetExtension("WGL_EXT_display_color_table");
+  if (glewExperimental || WGLEW_EXT_display_color_table|| crippled) CONST_CAST(WGLEW_EXT_display_color_table)= !_glewInit_WGL_EXT_display_color_table(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_EXT_display_color_table */
+#ifdef WGL_EXT_extensions_string
+  CONST_CAST(WGLEW_EXT_extensions_string) = wglewGetExtension("WGL_EXT_extensions_string");
+  if (glewExperimental || WGLEW_EXT_extensions_string|| crippled) CONST_CAST(WGLEW_EXT_extensions_string)= !_glewInit_WGL_EXT_extensions_string(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_EXT_extensions_string */
+#ifdef WGL_EXT_framebuffer_sRGB
+  CONST_CAST(WGLEW_EXT_framebuffer_sRGB) = wglewGetExtension("WGL_EXT_framebuffer_sRGB");
+#endif /* WGL_EXT_framebuffer_sRGB */
+#ifdef WGL_EXT_make_current_read
+  CONST_CAST(WGLEW_EXT_make_current_read) = wglewGetExtension("WGL_EXT_make_current_read");
+  if (glewExperimental || WGLEW_EXT_make_current_read|| crippled) CONST_CAST(WGLEW_EXT_make_current_read)= !_glewInit_WGL_EXT_make_current_read(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_EXT_make_current_read */
+#ifdef WGL_EXT_multisample
+  CONST_CAST(WGLEW_EXT_multisample) = wglewGetExtension("WGL_EXT_multisample");
+#endif /* WGL_EXT_multisample */
+#ifdef WGL_EXT_pbuffer
+  CONST_CAST(WGLEW_EXT_pbuffer) = wglewGetExtension("WGL_EXT_pbuffer");
+  if (glewExperimental || WGLEW_EXT_pbuffer|| crippled) CONST_CAST(WGLEW_EXT_pbuffer)= !_glewInit_WGL_EXT_pbuffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_EXT_pbuffer */
+#ifdef WGL_EXT_pixel_format
+  CONST_CAST(WGLEW_EXT_pixel_format) = wglewGetExtension("WGL_EXT_pixel_format");
+  if (glewExperimental || WGLEW_EXT_pixel_format|| crippled) CONST_CAST(WGLEW_EXT_pixel_format)= !_glewInit_WGL_EXT_pixel_format(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_EXT_pixel_format */
+#ifdef WGL_EXT_pixel_format_packed_float
+  CONST_CAST(WGLEW_EXT_pixel_format_packed_float) = wglewGetExtension("WGL_EXT_pixel_format_packed_float");
+#endif /* WGL_EXT_pixel_format_packed_float */
+#ifdef WGL_EXT_swap_control
+  CONST_CAST(WGLEW_EXT_swap_control) = wglewGetExtension("WGL_EXT_swap_control");
+  if (glewExperimental || WGLEW_EXT_swap_control|| crippled) CONST_CAST(WGLEW_EXT_swap_control)= !_glewInit_WGL_EXT_swap_control(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_EXT_swap_control */
+#ifdef WGL_I3D_digital_video_control
+  CONST_CAST(WGLEW_I3D_digital_video_control) = wglewGetExtension("WGL_I3D_digital_video_control");
+  if (glewExperimental || WGLEW_I3D_digital_video_control|| crippled) CONST_CAST(WGLEW_I3D_digital_video_control)= !_glewInit_WGL_I3D_digital_video_control(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_I3D_digital_video_control */
+#ifdef WGL_I3D_gamma
+  CONST_CAST(WGLEW_I3D_gamma) = wglewGetExtension("WGL_I3D_gamma");
+  if (glewExperimental || WGLEW_I3D_gamma|| crippled) CONST_CAST(WGLEW_I3D_gamma)= !_glewInit_WGL_I3D_gamma(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_I3D_gamma */
+#ifdef WGL_I3D_genlock
+  CONST_CAST(WGLEW_I3D_genlock) = wglewGetExtension("WGL_I3D_genlock");
+  if (glewExperimental || WGLEW_I3D_genlock|| crippled) CONST_CAST(WGLEW_I3D_genlock)= !_glewInit_WGL_I3D_genlock(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_I3D_genlock */
+#ifdef WGL_I3D_image_buffer
+  CONST_CAST(WGLEW_I3D_image_buffer) = wglewGetExtension("WGL_I3D_image_buffer");
+  if (glewExperimental || WGLEW_I3D_image_buffer|| crippled) CONST_CAST(WGLEW_I3D_image_buffer)= !_glewInit_WGL_I3D_image_buffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_I3D_image_buffer */
+#ifdef WGL_I3D_swap_frame_lock
+  CONST_CAST(WGLEW_I3D_swap_frame_lock) = wglewGetExtension("WGL_I3D_swap_frame_lock");
+  if (glewExperimental || WGLEW_I3D_swap_frame_lock|| crippled) CONST_CAST(WGLEW_I3D_swap_frame_lock)= !_glewInit_WGL_I3D_swap_frame_lock(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_I3D_swap_frame_lock */
+#ifdef WGL_I3D_swap_frame_usage
+  CONST_CAST(WGLEW_I3D_swap_frame_usage) = wglewGetExtension("WGL_I3D_swap_frame_usage");
+  if (glewExperimental || WGLEW_I3D_swap_frame_usage|| crippled) CONST_CAST(WGLEW_I3D_swap_frame_usage)= !_glewInit_WGL_I3D_swap_frame_usage(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_I3D_swap_frame_usage */
+#ifdef WGL_NV_float_buffer
+  CONST_CAST(WGLEW_NV_float_buffer) = wglewGetExtension("WGL_NV_float_buffer");
+#endif /* WGL_NV_float_buffer */
+#ifdef WGL_NV_gpu_affinity
+  CONST_CAST(WGLEW_NV_gpu_affinity) = wglewGetExtension("WGL_NV_gpu_affinity");
+  if (glewExperimental || WGLEW_NV_gpu_affinity|| crippled) CONST_CAST(WGLEW_NV_gpu_affinity)= !_glewInit_WGL_NV_gpu_affinity(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_NV_gpu_affinity */
+#ifdef WGL_NV_render_depth_texture
+  CONST_CAST(WGLEW_NV_render_depth_texture) = wglewGetExtension("WGL_NV_render_depth_texture");
+#endif /* WGL_NV_render_depth_texture */
+#ifdef WGL_NV_render_texture_rectangle
+  CONST_CAST(WGLEW_NV_render_texture_rectangle) = wglewGetExtension("WGL_NV_render_texture_rectangle");
+#endif /* WGL_NV_render_texture_rectangle */
+#ifdef WGL_NV_vertex_array_range
+  CONST_CAST(WGLEW_NV_vertex_array_range) = wglewGetExtension("WGL_NV_vertex_array_range");
+  if (glewExperimental || WGLEW_NV_vertex_array_range|| crippled) CONST_CAST(WGLEW_NV_vertex_array_range)= !_glewInit_WGL_NV_vertex_array_range(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_NV_vertex_array_range */
+#ifdef WGL_OML_sync_control
+  CONST_CAST(WGLEW_OML_sync_control) = wglewGetExtension("WGL_OML_sync_control");
+  if (glewExperimental || WGLEW_OML_sync_control|| crippled) CONST_CAST(WGLEW_OML_sync_control)= !_glewInit_WGL_OML_sync_control(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* WGL_OML_sync_control */
+
+  return GLEW_OK;
+}
+
+#elif !defined(__APPLE__) || defined(GLEW_APPLE_GLX)
+
+PFNGLXGETCURRENTDISPLAYPROC __glewXGetCurrentDisplay = NULL;
+
+PFNGLXCHOOSEFBCONFIGPROC __glewXChooseFBConfig = NULL;
+PFNGLXCREATENEWCONTEXTPROC __glewXCreateNewContext = NULL;
+PFNGLXCREATEPBUFFERPROC __glewXCreatePbuffer = NULL;
+PFNGLXCREATEPIXMAPPROC __glewXCreatePixmap = NULL;
+PFNGLXCREATEWINDOWPROC __glewXCreateWindow = NULL;
+PFNGLXDESTROYPBUFFERPROC __glewXDestroyPbuffer = NULL;
+PFNGLXDESTROYPIXMAPPROC __glewXDestroyPixmap = NULL;
+PFNGLXDESTROYWINDOWPROC __glewXDestroyWindow = NULL;
+PFNGLXGETCURRENTREADDRAWABLEPROC __glewXGetCurrentReadDrawable = NULL;
+PFNGLXGETFBCONFIGATTRIBPROC __glewXGetFBConfigAttrib = NULL;
+PFNGLXGETFBCONFIGSPROC __glewXGetFBConfigs = NULL;
+PFNGLXGETSELECTEDEVENTPROC __glewXGetSelectedEvent = NULL;
+PFNGLXGETVISUALFROMFBCONFIGPROC __glewXGetVisualFromFBConfig = NULL;
+PFNGLXMAKECONTEXTCURRENTPROC __glewXMakeContextCurrent = NULL;
+PFNGLXQUERYCONTEXTPROC __glewXQueryContext = NULL;
+PFNGLXQUERYDRAWABLEPROC __glewXQueryDrawable = NULL;
+PFNGLXSELECTEVENTPROC __glewXSelectEvent = NULL;
+
+PFNGLXBINDTEXIMAGEATIPROC __glewXBindTexImageATI = NULL;
+PFNGLXDRAWABLEATTRIBATIPROC __glewXDrawableAttribATI = NULL;
+PFNGLXRELEASETEXIMAGEATIPROC __glewXReleaseTexImageATI = NULL;
+
+PFNGLXFREECONTEXTEXTPROC __glewXFreeContextEXT = NULL;
+PFNGLXGETCONTEXTIDEXTPROC __glewXGetContextIDEXT = NULL;
+PFNGLXIMPORTCONTEXTEXTPROC __glewXImportContextEXT = NULL;
+PFNGLXQUERYCONTEXTINFOEXTPROC __glewXQueryContextInfoEXT = NULL;
+
+PFNGLXBINDTEXIMAGEEXTPROC __glewXBindTexImageEXT = NULL;
+PFNGLXRELEASETEXIMAGEEXTPROC __glewXReleaseTexImageEXT = NULL;
+
+PFNGLXGETAGPOFFSETMESAPROC __glewXGetAGPOffsetMESA = NULL;
+
+PFNGLXCOPYSUBBUFFERMESAPROC __glewXCopySubBufferMESA = NULL;
+
+PFNGLXCREATEGLXPIXMAPMESAPROC __glewXCreateGLXPixmapMESA = NULL;
+
+PFNGLXRELEASEBUFFERSMESAPROC __glewXReleaseBuffersMESA = NULL;
+
+PFNGLXSET3DFXMODEMESAPROC __glewXSet3DfxModeMESA = NULL;
+
+PFNGLXALLOCATEMEMORYNVPROC __glewXAllocateMemoryNV = NULL;
+PFNGLXFREEMEMORYNVPROC __glewXFreeMemoryNV = NULL;
+
+#ifdef GLX_OML_sync_control
+PFNGLXGETMSCRATEOMLPROC __glewXGetMscRateOML = NULL;
+PFNGLXGETSYNCVALUESOMLPROC __glewXGetSyncValuesOML = NULL;
+PFNGLXSWAPBUFFERSMSCOMLPROC __glewXSwapBuffersMscOML = NULL;
+PFNGLXWAITFORMSCOMLPROC __glewXWaitForMscOML = NULL;
+PFNGLXWAITFORSBCOMLPROC __glewXWaitForSbcOML = NULL;
+#endif
+
+PFNGLXCHOOSEFBCONFIGSGIXPROC __glewXChooseFBConfigSGIX = NULL;
+PFNGLXCREATECONTEXTWITHCONFIGSGIXPROC __glewXCreateContextWithConfigSGIX = NULL;
+PFNGLXCREATEGLXPIXMAPWITHCONFIGSGIXPROC __glewXCreateGLXPixmapWithConfigSGIX = NULL;
+PFNGLXGETFBCONFIGATTRIBSGIXPROC __glewXGetFBConfigAttribSGIX = NULL;
+PFNGLXGETFBCONFIGFROMVISUALSGIXPROC __glewXGetFBConfigFromVisualSGIX = NULL;
+PFNGLXGETVISUALFROMFBCONFIGSGIXPROC __glewXGetVisualFromFBConfigSGIX = NULL;
+
+PFNGLXBINDHYPERPIPESGIXPROC __glewXBindHyperpipeSGIX = NULL;
+PFNGLXDESTROYHYPERPIPECONFIGSGIXPROC __glewXDestroyHyperpipeConfigSGIX = NULL;
+PFNGLXHYPERPIPEATTRIBSGIXPROC __glewXHyperpipeAttribSGIX = NULL;
+PFNGLXHYPERPIPECONFIGSGIXPROC __glewXHyperpipeConfigSGIX = NULL;
+PFNGLXQUERYHYPERPIPEATTRIBSGIXPROC __glewXQueryHyperpipeAttribSGIX = NULL;
+PFNGLXQUERYHYPERPIPEBESTATTRIBSGIXPROC __glewXQueryHyperpipeBestAttribSGIX = NULL;
+PFNGLXQUERYHYPERPIPECONFIGSGIXPROC __glewXQueryHyperpipeConfigSGIX = NULL;
+PFNGLXQUERYHYPERPIPENETWORKSGIXPROC __glewXQueryHyperpipeNetworkSGIX = NULL;
+
+PFNGLXCREATEGLXPBUFFERSGIXPROC __glewXCreateGLXPbufferSGIX = NULL;
+PFNGLXDESTROYGLXPBUFFERSGIXPROC __glewXDestroyGLXPbufferSGIX = NULL;
+PFNGLXGETSELECTEDEVENTSGIXPROC __glewXGetSelectedEventSGIX = NULL;
+PFNGLXQUERYGLXPBUFFERSGIXPROC __glewXQueryGLXPbufferSGIX = NULL;
+PFNGLXSELECTEVENTSGIXPROC __glewXSelectEventSGIX = NULL;
+
+PFNGLXBINDSWAPBARRIERSGIXPROC __glewXBindSwapBarrierSGIX = NULL;
+PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC __glewXQueryMaxSwapBarriersSGIX = NULL;
+
+PFNGLXJOINSWAPGROUPSGIXPROC __glewXJoinSwapGroupSGIX = NULL;
+
+PFNGLXBINDCHANNELTOWINDOWSGIXPROC __glewXBindChannelToWindowSGIX = NULL;
+PFNGLXCHANNELRECTSGIXPROC __glewXChannelRectSGIX = NULL;
+PFNGLXCHANNELRECTSYNCSGIXPROC __glewXChannelRectSyncSGIX = NULL;
+PFNGLXQUERYCHANNELDELTASSGIXPROC __glewXQueryChannelDeltasSGIX = NULL;
+PFNGLXQUERYCHANNELRECTSGIXPROC __glewXQueryChannelRectSGIX = NULL;
+
+PFNGLXCUSHIONSGIPROC __glewXCushionSGI = NULL;
+
+PFNGLXGETCURRENTREADDRAWABLESGIPROC __glewXGetCurrentReadDrawableSGI = NULL;
+PFNGLXMAKECURRENTREADSGIPROC __glewXMakeCurrentReadSGI = NULL;
+
+PFNGLXSWAPINTERVALSGIPROC __glewXSwapIntervalSGI = NULL;
+
+PFNGLXGETVIDEOSYNCSGIPROC __glewXGetVideoSyncSGI = NULL;
+PFNGLXWAITVIDEOSYNCSGIPROC __glewXWaitVideoSyncSGI = NULL;
+
+PFNGLXGETTRANSPARENTINDEXSUNPROC __glewXGetTransparentIndexSUN = NULL;
+
+PFNGLXGETVIDEORESIZESUNPROC __glewXGetVideoResizeSUN = NULL;
+PFNGLXVIDEORESIZESUNPROC __glewXVideoResizeSUN = NULL;
+
+#if !defined(GLEW_MX)
+
+GLboolean __GLXEW_VERSION_1_0 = GL_FALSE;
+GLboolean __GLXEW_VERSION_1_1 = GL_FALSE;
+GLboolean __GLXEW_VERSION_1_2 = GL_FALSE;
+GLboolean __GLXEW_VERSION_1_3 = GL_FALSE;
+GLboolean __GLXEW_VERSION_1_4 = GL_FALSE;
+GLboolean __GLXEW_3DFX_multisample = GL_FALSE;
+GLboolean __GLXEW_ARB_fbconfig_float = GL_FALSE;
+GLboolean __GLXEW_ARB_get_proc_address = GL_FALSE;
+GLboolean __GLXEW_ARB_multisample = GL_FALSE;
+GLboolean __GLXEW_ATI_pixel_format_float = GL_FALSE;
+GLboolean __GLXEW_ATI_render_texture = GL_FALSE;
+GLboolean __GLXEW_EXT_fbconfig_packed_float = GL_FALSE;
+GLboolean __GLXEW_EXT_framebuffer_sRGB = GL_FALSE;
+GLboolean __GLXEW_EXT_import_context = GL_FALSE;
+GLboolean __GLXEW_EXT_scene_marker = GL_FALSE;
+GLboolean __GLXEW_EXT_texture_from_pixmap = GL_FALSE;
+GLboolean __GLXEW_EXT_visual_info = GL_FALSE;
+GLboolean __GLXEW_EXT_visual_rating = GL_FALSE;
+GLboolean __GLXEW_MESA_agp_offset = GL_FALSE;
+GLboolean __GLXEW_MESA_copy_sub_buffer = GL_FALSE;
+GLboolean __GLXEW_MESA_pixmap_colormap = GL_FALSE;
+GLboolean __GLXEW_MESA_release_buffers = GL_FALSE;
+GLboolean __GLXEW_MESA_set_3dfx_mode = GL_FALSE;
+GLboolean __GLXEW_NV_float_buffer = GL_FALSE;
+GLboolean __GLXEW_NV_vertex_array_range = GL_FALSE;
+GLboolean __GLXEW_OML_swap_method = GL_FALSE;
+#ifdef GLX_OML_sync_control
+GLboolean __GLXEW_OML_sync_control = GL_FALSE;
+#endif
+GLboolean __GLXEW_SGIS_blended_overlay = GL_FALSE;
+GLboolean __GLXEW_SGIS_color_range = GL_FALSE;
+GLboolean __GLXEW_SGIS_multisample = GL_FALSE;
+GLboolean __GLXEW_SGIS_shared_multisample = GL_FALSE;
+GLboolean __GLXEW_SGIX_fbconfig = GL_FALSE;
+GLboolean __GLXEW_SGIX_hyperpipe = GL_FALSE;
+GLboolean __GLXEW_SGIX_pbuffer = GL_FALSE;
+GLboolean __GLXEW_SGIX_swap_barrier = GL_FALSE;
+GLboolean __GLXEW_SGIX_swap_group = GL_FALSE;
+GLboolean __GLXEW_SGIX_video_resize = GL_FALSE;
+GLboolean __GLXEW_SGIX_visual_select_group = GL_FALSE;
+GLboolean __GLXEW_SGI_cushion = GL_FALSE;
+GLboolean __GLXEW_SGI_make_current_read = GL_FALSE;
+GLboolean __GLXEW_SGI_swap_control = GL_FALSE;
+GLboolean __GLXEW_SGI_video_sync = GL_FALSE;
+GLboolean __GLXEW_SUN_get_transparent_index = GL_FALSE;
+GLboolean __GLXEW_SUN_video_resize = GL_FALSE;
+
+#endif /* !GLEW_MX */
+
+#ifdef GLX_VERSION_1_2
+
+static GLboolean _glewInit_GLX_VERSION_1_2 (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXGetCurrentDisplay = (PFNGLXGETCURRENTDISPLAYPROC)glewGetProcAddress((const GLubyte*)"glXGetCurrentDisplay")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_VERSION_1_2 */
+
+#ifdef GLX_VERSION_1_3
+
+static GLboolean _glewInit_GLX_VERSION_1_3 (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXChooseFBConfig = (PFNGLXCHOOSEFBCONFIGPROC)glewGetProcAddress((const GLubyte*)"glXChooseFBConfig")) == NULL) || r;
+  r = ((glXCreateNewContext = (PFNGLXCREATENEWCONTEXTPROC)glewGetProcAddress((const GLubyte*)"glXCreateNewContext")) == NULL) || r;
+  r = ((glXCreatePbuffer = (PFNGLXCREATEPBUFFERPROC)glewGetProcAddress((const GLubyte*)"glXCreatePbuffer")) == NULL) || r;
+  r = ((glXCreatePixmap = (PFNGLXCREATEPIXMAPPROC)glewGetProcAddress((const GLubyte*)"glXCreatePixmap")) == NULL) || r;
+  r = ((glXCreateWindow = (PFNGLXCREATEWINDOWPROC)glewGetProcAddress((const GLubyte*)"glXCreateWindow")) == NULL) || r;
+  r = ((glXDestroyPbuffer = (PFNGLXDESTROYPBUFFERPROC)glewGetProcAddress((const GLubyte*)"glXDestroyPbuffer")) == NULL) || r;
+  r = ((glXDestroyPixmap = (PFNGLXDESTROYPIXMAPPROC)glewGetProcAddress((const GLubyte*)"glXDestroyPixmap")) == NULL) || r;
+  r = ((glXDestroyWindow = (PFNGLXDESTROYWINDOWPROC)glewGetProcAddress((const GLubyte*)"glXDestroyWindow")) == NULL) || r;
+  r = ((glXGetCurrentReadDrawable = (PFNGLXGETCURRENTREADDRAWABLEPROC)glewGetProcAddress((const GLubyte*)"glXGetCurrentReadDrawable")) == NULL) || r;
+  r = ((glXGetFBConfigAttrib = (PFNGLXGETFBCONFIGATTRIBPROC)glewGetProcAddress((const GLubyte*)"glXGetFBConfigAttrib")) == NULL) || r;
+  r = ((glXGetFBConfigs = (PFNGLXGETFBCONFIGSPROC)glewGetProcAddress((const GLubyte*)"glXGetFBConfigs")) == NULL) || r;
+  r = ((glXGetSelectedEvent = (PFNGLXGETSELECTEDEVENTPROC)glewGetProcAddress((const GLubyte*)"glXGetSelectedEvent")) == NULL) || r;
+  r = ((glXGetVisualFromFBConfig = (PFNGLXGETVISUALFROMFBCONFIGPROC)glewGetProcAddress((const GLubyte*)"glXGetVisualFromFBConfig")) == NULL) || r;
+  r = ((glXMakeContextCurrent = (PFNGLXMAKECONTEXTCURRENTPROC)glewGetProcAddress((const GLubyte*)"glXMakeContextCurrent")) == NULL) || r;
+  r = ((glXQueryContext = (PFNGLXQUERYCONTEXTPROC)glewGetProcAddress((const GLubyte*)"glXQueryContext")) == NULL) || r;
+  r = ((glXQueryDrawable = (PFNGLXQUERYDRAWABLEPROC)glewGetProcAddress((const GLubyte*)"glXQueryDrawable")) == NULL) || r;
+  r = ((glXSelectEvent = (PFNGLXSELECTEVENTPROC)glewGetProcAddress((const GLubyte*)"glXSelectEvent")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_VERSION_1_3 */
+
+#ifdef GLX_VERSION_1_4
+
+#endif /* GLX_VERSION_1_4 */
+
+#ifdef GLX_3DFX_multisample
+
+#endif /* GLX_3DFX_multisample */
+
+#ifdef GLX_ARB_fbconfig_float
+
+#endif /* GLX_ARB_fbconfig_float */
+
+#ifdef GLX_ARB_get_proc_address
+
+#endif /* GLX_ARB_get_proc_address */
+
+#ifdef GLX_ARB_multisample
+
+#endif /* GLX_ARB_multisample */
+
+#ifdef GLX_ATI_pixel_format_float
+
+#endif /* GLX_ATI_pixel_format_float */
+
+#ifdef GLX_ATI_render_texture
+
+static GLboolean _glewInit_GLX_ATI_render_texture (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXBindTexImageATI = (PFNGLXBINDTEXIMAGEATIPROC)glewGetProcAddress((const GLubyte*)"glXBindTexImageATI")) == NULL) || r;
+  r = ((glXDrawableAttribATI = (PFNGLXDRAWABLEATTRIBATIPROC)glewGetProcAddress((const GLubyte*)"glXDrawableAttribATI")) == NULL) || r;
+  r = ((glXReleaseTexImageATI = (PFNGLXRELEASETEXIMAGEATIPROC)glewGetProcAddress((const GLubyte*)"glXReleaseTexImageATI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_ATI_render_texture */
+
+#ifdef GLX_EXT_fbconfig_packed_float
+
+#endif /* GLX_EXT_fbconfig_packed_float */
+
+#ifdef GLX_EXT_framebuffer_sRGB
+
+#endif /* GLX_EXT_framebuffer_sRGB */
+
+#ifdef GLX_EXT_import_context
+
+static GLboolean _glewInit_GLX_EXT_import_context (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXFreeContextEXT = (PFNGLXFREECONTEXTEXTPROC)glewGetProcAddress((const GLubyte*)"glXFreeContextEXT")) == NULL) || r;
+  r = ((glXGetContextIDEXT = (PFNGLXGETCONTEXTIDEXTPROC)glewGetProcAddress((const GLubyte*)"glXGetContextIDEXT")) == NULL) || r;
+  r = ((glXImportContextEXT = (PFNGLXIMPORTCONTEXTEXTPROC)glewGetProcAddress((const GLubyte*)"glXImportContextEXT")) == NULL) || r;
+  r = ((glXQueryContextInfoEXT = (PFNGLXQUERYCONTEXTINFOEXTPROC)glewGetProcAddress((const GLubyte*)"glXQueryContextInfoEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_EXT_import_context */
+
+#ifdef GLX_EXT_scene_marker
+
+#endif /* GLX_EXT_scene_marker */
+
+#ifdef GLX_EXT_texture_from_pixmap
+
+static GLboolean _glewInit_GLX_EXT_texture_from_pixmap (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXBindTexImageEXT = (PFNGLXBINDTEXIMAGEEXTPROC)glewGetProcAddress((const GLubyte*)"glXBindTexImageEXT")) == NULL) || r;
+  r = ((glXReleaseTexImageEXT = (PFNGLXRELEASETEXIMAGEEXTPROC)glewGetProcAddress((const GLubyte*)"glXReleaseTexImageEXT")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_EXT_texture_from_pixmap */
+
+#ifdef GLX_EXT_visual_info
+
+#endif /* GLX_EXT_visual_info */
+
+#ifdef GLX_EXT_visual_rating
+
+#endif /* GLX_EXT_visual_rating */
+
+#ifdef GLX_MESA_agp_offset
+
+static GLboolean _glewInit_GLX_MESA_agp_offset (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXGetAGPOffsetMESA = (PFNGLXGETAGPOFFSETMESAPROC)glewGetProcAddress((const GLubyte*)"glXGetAGPOffsetMESA")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_MESA_agp_offset */
+
+#ifdef GLX_MESA_copy_sub_buffer
+
+static GLboolean _glewInit_GLX_MESA_copy_sub_buffer (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXCopySubBufferMESA = (PFNGLXCOPYSUBBUFFERMESAPROC)glewGetProcAddress((const GLubyte*)"glXCopySubBufferMESA")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_MESA_copy_sub_buffer */
+
+#ifdef GLX_MESA_pixmap_colormap
+
+static GLboolean _glewInit_GLX_MESA_pixmap_colormap (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXCreateGLXPixmapMESA = (PFNGLXCREATEGLXPIXMAPMESAPROC)glewGetProcAddress((const GLubyte*)"glXCreateGLXPixmapMESA")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_MESA_pixmap_colormap */
+
+#ifdef GLX_MESA_release_buffers
+
+static GLboolean _glewInit_GLX_MESA_release_buffers (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXReleaseBuffersMESA = (PFNGLXRELEASEBUFFERSMESAPROC)glewGetProcAddress((const GLubyte*)"glXReleaseBuffersMESA")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_MESA_release_buffers */
+
+#ifdef GLX_MESA_set_3dfx_mode
+
+static GLboolean _glewInit_GLX_MESA_set_3dfx_mode (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXSet3DfxModeMESA = (PFNGLXSET3DFXMODEMESAPROC)glewGetProcAddress((const GLubyte*)"glXSet3DfxModeMESA")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_MESA_set_3dfx_mode */
+
+#ifdef GLX_NV_float_buffer
+
+#endif /* GLX_NV_float_buffer */
+
+#ifdef GLX_NV_vertex_array_range
+
+static GLboolean _glewInit_GLX_NV_vertex_array_range (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXAllocateMemoryNV = (PFNGLXALLOCATEMEMORYNVPROC)glewGetProcAddress((const GLubyte*)"glXAllocateMemoryNV")) == NULL) || r;
+  r = ((glXFreeMemoryNV = (PFNGLXFREEMEMORYNVPROC)glewGetProcAddress((const GLubyte*)"glXFreeMemoryNV")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_NV_vertex_array_range */
+
+#ifdef GLX_OML_swap_method
+
+#endif /* GLX_OML_swap_method */
+
+#if defined(GLX_OML_sync_control) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#include <inttypes.h>
+
+static GLboolean _glewInit_GLX_OML_sync_control (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXGetMscRateOML = (PFNGLXGETMSCRATEOMLPROC)glewGetProcAddress((const GLubyte*)"glXGetMscRateOML")) == NULL) || r;
+  r = ((glXGetSyncValuesOML = (PFNGLXGETSYNCVALUESOMLPROC)glewGetProcAddress((const GLubyte*)"glXGetSyncValuesOML")) == NULL) || r;
+  r = ((glXSwapBuffersMscOML = (PFNGLXSWAPBUFFERSMSCOMLPROC)glewGetProcAddress((const GLubyte*)"glXSwapBuffersMscOML")) == NULL) || r;
+  r = ((glXWaitForMscOML = (PFNGLXWAITFORMSCOMLPROC)glewGetProcAddress((const GLubyte*)"glXWaitForMscOML")) == NULL) || r;
+  r = ((glXWaitForSbcOML = (PFNGLXWAITFORSBCOMLPROC)glewGetProcAddress((const GLubyte*)"glXWaitForSbcOML")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_OML_sync_control */
+
+#ifdef GLX_SGIS_blended_overlay
+
+#endif /* GLX_SGIS_blended_overlay */
+
+#ifdef GLX_SGIS_color_range
+
+#endif /* GLX_SGIS_color_range */
+
+#ifdef GLX_SGIS_multisample
+
+#endif /* GLX_SGIS_multisample */
+
+#ifdef GLX_SGIS_shared_multisample
+
+#endif /* GLX_SGIS_shared_multisample */
+
+#ifdef GLX_SGIX_fbconfig
+
+static GLboolean _glewInit_GLX_SGIX_fbconfig (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXChooseFBConfigSGIX = (PFNGLXCHOOSEFBCONFIGSGIXPROC)glewGetProcAddress((const GLubyte*)"glXChooseFBConfigSGIX")) == NULL) || r;
+  r = ((glXCreateContextWithConfigSGIX = (PFNGLXCREATECONTEXTWITHCONFIGSGIXPROC)glewGetProcAddress((const GLubyte*)"glXCreateContextWithConfigSGIX")) == NULL) || r;
+  r = ((glXCreateGLXPixmapWithConfigSGIX = (PFNGLXCREATEGLXPIXMAPWITHCONFIGSGIXPROC)glewGetProcAddress((const GLubyte*)"glXCreateGLXPixmapWithConfigSGIX")) == NULL) || r;
+  r = ((glXGetFBConfigAttribSGIX = (PFNGLXGETFBCONFIGATTRIBSGIXPROC)glewGetProcAddress((const GLubyte*)"glXGetFBConfigAttribSGIX")) == NULL) || r;
+  r = ((glXGetFBConfigFromVisualSGIX = (PFNGLXGETFBCONFIGFROMVISUALSGIXPROC)glewGetProcAddress((const GLubyte*)"glXGetFBConfigFromVisualSGIX")) == NULL) || r;
+  r = ((glXGetVisualFromFBConfigSGIX = (PFNGLXGETVISUALFROMFBCONFIGSGIXPROC)glewGetProcAddress((const GLubyte*)"glXGetVisualFromFBConfigSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGIX_fbconfig */
+
+#ifdef GLX_SGIX_hyperpipe
+
+static GLboolean _glewInit_GLX_SGIX_hyperpipe (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXBindHyperpipeSGIX = (PFNGLXBINDHYPERPIPESGIXPROC)glewGetProcAddress((const GLubyte*)"glXBindHyperpipeSGIX")) == NULL) || r;
+  r = ((glXDestroyHyperpipeConfigSGIX = (PFNGLXDESTROYHYPERPIPECONFIGSGIXPROC)glewGetProcAddress((const GLubyte*)"glXDestroyHyperpipeConfigSGIX")) == NULL) || r;
+  r = ((glXHyperpipeAttribSGIX = (PFNGLXHYPERPIPEATTRIBSGIXPROC)glewGetProcAddress((const GLubyte*)"glXHyperpipeAttribSGIX")) == NULL) || r;
+  r = ((glXHyperpipeConfigSGIX = (PFNGLXHYPERPIPECONFIGSGIXPROC)glewGetProcAddress((const GLubyte*)"glXHyperpipeConfigSGIX")) == NULL) || r;
+  r = ((glXQueryHyperpipeAttribSGIX = (PFNGLXQUERYHYPERPIPEATTRIBSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryHyperpipeAttribSGIX")) == NULL) || r;
+  r = ((glXQueryHyperpipeBestAttribSGIX = (PFNGLXQUERYHYPERPIPEBESTATTRIBSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryHyperpipeBestAttribSGIX")) == NULL) || r;
+  r = ((glXQueryHyperpipeConfigSGIX = (PFNGLXQUERYHYPERPIPECONFIGSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryHyperpipeConfigSGIX")) == NULL) || r;
+  r = ((glXQueryHyperpipeNetworkSGIX = (PFNGLXQUERYHYPERPIPENETWORKSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryHyperpipeNetworkSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGIX_hyperpipe */
+
+#ifdef GLX_SGIX_pbuffer
+
+static GLboolean _glewInit_GLX_SGIX_pbuffer (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXCreateGLXPbufferSGIX = (PFNGLXCREATEGLXPBUFFERSGIXPROC)glewGetProcAddress((const GLubyte*)"glXCreateGLXPbufferSGIX")) == NULL) || r;
+  r = ((glXDestroyGLXPbufferSGIX = (PFNGLXDESTROYGLXPBUFFERSGIXPROC)glewGetProcAddress((const GLubyte*)"glXDestroyGLXPbufferSGIX")) == NULL) || r;
+  r = ((glXGetSelectedEventSGIX = (PFNGLXGETSELECTEDEVENTSGIXPROC)glewGetProcAddress((const GLubyte*)"glXGetSelectedEventSGIX")) == NULL) || r;
+  r = ((glXQueryGLXPbufferSGIX = (PFNGLXQUERYGLXPBUFFERSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryGLXPbufferSGIX")) == NULL) || r;
+  r = ((glXSelectEventSGIX = (PFNGLXSELECTEVENTSGIXPROC)glewGetProcAddress((const GLubyte*)"glXSelectEventSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGIX_pbuffer */
+
+#ifdef GLX_SGIX_swap_barrier
+
+static GLboolean _glewInit_GLX_SGIX_swap_barrier (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXBindSwapBarrierSGIX = (PFNGLXBINDSWAPBARRIERSGIXPROC)glewGetProcAddress((const GLubyte*)"glXBindSwapBarrierSGIX")) == NULL) || r;
+  r = ((glXQueryMaxSwapBarriersSGIX = (PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryMaxSwapBarriersSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGIX_swap_barrier */
+
+#ifdef GLX_SGIX_swap_group
+
+static GLboolean _glewInit_GLX_SGIX_swap_group (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXJoinSwapGroupSGIX = (PFNGLXJOINSWAPGROUPSGIXPROC)glewGetProcAddress((const GLubyte*)"glXJoinSwapGroupSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGIX_swap_group */
+
+#ifdef GLX_SGIX_video_resize
+
+static GLboolean _glewInit_GLX_SGIX_video_resize (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXBindChannelToWindowSGIX = (PFNGLXBINDCHANNELTOWINDOWSGIXPROC)glewGetProcAddress((const GLubyte*)"glXBindChannelToWindowSGIX")) == NULL) || r;
+  r = ((glXChannelRectSGIX = (PFNGLXCHANNELRECTSGIXPROC)glewGetProcAddress((const GLubyte*)"glXChannelRectSGIX")) == NULL) || r;
+  r = ((glXChannelRectSyncSGIX = (PFNGLXCHANNELRECTSYNCSGIXPROC)glewGetProcAddress((const GLubyte*)"glXChannelRectSyncSGIX")) == NULL) || r;
+  r = ((glXQueryChannelDeltasSGIX = (PFNGLXQUERYCHANNELDELTASSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryChannelDeltasSGIX")) == NULL) || r;
+  r = ((glXQueryChannelRectSGIX = (PFNGLXQUERYCHANNELRECTSGIXPROC)glewGetProcAddress((const GLubyte*)"glXQueryChannelRectSGIX")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGIX_video_resize */
+
+#ifdef GLX_SGIX_visual_select_group
+
+#endif /* GLX_SGIX_visual_select_group */
+
+#ifdef GLX_SGI_cushion
+
+static GLboolean _glewInit_GLX_SGI_cushion (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXCushionSGI = (PFNGLXCUSHIONSGIPROC)glewGetProcAddress((const GLubyte*)"glXCushionSGI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGI_cushion */
+
+#ifdef GLX_SGI_make_current_read
+
+static GLboolean _glewInit_GLX_SGI_make_current_read (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXGetCurrentReadDrawableSGI = (PFNGLXGETCURRENTREADDRAWABLESGIPROC)glewGetProcAddress((const GLubyte*)"glXGetCurrentReadDrawableSGI")) == NULL) || r;
+  r = ((glXMakeCurrentReadSGI = (PFNGLXMAKECURRENTREADSGIPROC)glewGetProcAddress((const GLubyte*)"glXMakeCurrentReadSGI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGI_make_current_read */
+
+#ifdef GLX_SGI_swap_control
+
+static GLboolean _glewInit_GLX_SGI_swap_control (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXSwapIntervalSGI = (PFNGLXSWAPINTERVALSGIPROC)glewGetProcAddress((const GLubyte*)"glXSwapIntervalSGI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGI_swap_control */
+
+#ifdef GLX_SGI_video_sync
+
+static GLboolean _glewInit_GLX_SGI_video_sync (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXGetVideoSyncSGI = (PFNGLXGETVIDEOSYNCSGIPROC)glewGetProcAddress((const GLubyte*)"glXGetVideoSyncSGI")) == NULL) || r;
+  r = ((glXWaitVideoSyncSGI = (PFNGLXWAITVIDEOSYNCSGIPROC)glewGetProcAddress((const GLubyte*)"glXWaitVideoSyncSGI")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SGI_video_sync */
+
+#ifdef GLX_SUN_get_transparent_index
+
+static GLboolean _glewInit_GLX_SUN_get_transparent_index (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXGetTransparentIndexSUN = (PFNGLXGETTRANSPARENTINDEXSUNPROC)glewGetProcAddress((const GLubyte*)"glXGetTransparentIndexSUN")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SUN_get_transparent_index */
+
+#ifdef GLX_SUN_video_resize
+
+static GLboolean _glewInit_GLX_SUN_video_resize (GLXEW_CONTEXT_ARG_DEF_INIT)
+{
+  GLboolean r = GL_FALSE;
+
+  r = ((glXGetVideoResizeSUN = (PFNGLXGETVIDEORESIZESUNPROC)glewGetProcAddress((const GLubyte*)"glXGetVideoResizeSUN")) == NULL) || r;
+  r = ((glXVideoResizeSUN = (PFNGLXVIDEORESIZESUNPROC)glewGetProcAddress((const GLubyte*)"glXVideoResizeSUN")) == NULL) || r;
+
+  return r;
+}
+
+#endif /* GLX_SUN_video_resize */
+
+/* ------------------------------------------------------------------------ */
+
+GLboolean glxewGetExtension (const char* name)
+{
+  GLubyte* p;
+  GLubyte* end;
+  GLuint len = _glewStrLen((const GLubyte*)name);
+/*   if (glXQueryExtensionsString == NULL || glXGetCurrentDisplay == NULL) return GL_FALSE; */
+/*   p = (GLubyte*)glXQueryExtensionsString(glXGetCurrentDisplay(), DefaultScreen(glXGetCurrentDisplay())); */
+  if (glXGetClientString == NULL || glXGetCurrentDisplay == NULL) return GL_FALSE;
+  p = (GLubyte*)glXGetClientString(glXGetCurrentDisplay(), GLX_EXTENSIONS);
+  if (0 == p) return GL_FALSE;
+  end = p + _glewStrLen(p);
+  while (p < end)
+  {
+    GLuint n = _glewStrCLen(p, ' ');
+    if (len == n && _glewStrSame((const GLubyte*)name, p, n)) return GL_TRUE;
+    p += n+1;
+  }
+  return GL_FALSE;
+}
+
+GLenum glxewContextInit (GLXEW_CONTEXT_ARG_DEF_LIST)
+{
+  int major, minor;
+  /* initialize core GLX 1.2 */
+  if (_glewInit_GLX_VERSION_1_2(GLEW_CONTEXT_ARG_VAR_INIT)) return GLEW_ERROR_GLX_VERSION_11_ONLY;
+  /* initialize flags */
+  CONST_CAST(GLXEW_VERSION_1_0) = GL_TRUE;
+  CONST_CAST(GLXEW_VERSION_1_1) = GL_TRUE;
+  CONST_CAST(GLXEW_VERSION_1_2) = GL_TRUE;
+  CONST_CAST(GLXEW_VERSION_1_3) = GL_TRUE;
+  CONST_CAST(GLXEW_VERSION_1_4) = GL_TRUE;
+  /* query GLX version */
+  glXQueryVersion(glXGetCurrentDisplay(), &major, &minor);
+  if (major == 1 && minor <= 3)
+  {
+    switch (minor)
+    {
+      case 3:
+      CONST_CAST(GLXEW_VERSION_1_4) = GL_FALSE;
+      break;
+      case 2:
+      CONST_CAST(GLXEW_VERSION_1_4) = GL_FALSE;
+      CONST_CAST(GLXEW_VERSION_1_3) = GL_FALSE;
+      break;
+      default:
+      return GLEW_ERROR_GLX_VERSION_11_ONLY;
+      break;
+    }
+  }
+  /* initialize extensions */
+#ifdef GLX_VERSION_1_3
+  if (glewExperimental || GLXEW_VERSION_1_3) CONST_CAST(GLXEW_VERSION_1_3) = !_glewInit_GLX_VERSION_1_3(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_VERSION_1_3 */
+#ifdef GLX_3DFX_multisample
+  CONST_CAST(GLXEW_3DFX_multisample) = glxewGetExtension("GLX_3DFX_multisample");
+#endif /* GLX_3DFX_multisample */
+#ifdef GLX_ARB_fbconfig_float
+  CONST_CAST(GLXEW_ARB_fbconfig_float) = glxewGetExtension("GLX_ARB_fbconfig_float");
+#endif /* GLX_ARB_fbconfig_float */
+#ifdef GLX_ARB_get_proc_address
+  CONST_CAST(GLXEW_ARB_get_proc_address) = glxewGetExtension("GLX_ARB_get_proc_address");
+#endif /* GLX_ARB_get_proc_address */
+#ifdef GLX_ARB_multisample
+  CONST_CAST(GLXEW_ARB_multisample) = glxewGetExtension("GLX_ARB_multisample");
+#endif /* GLX_ARB_multisample */
+#ifdef GLX_ATI_pixel_format_float
+  CONST_CAST(GLXEW_ATI_pixel_format_float) = glxewGetExtension("GLX_ATI_pixel_format_float");
+#endif /* GLX_ATI_pixel_format_float */
+#ifdef GLX_ATI_render_texture
+  CONST_CAST(GLXEW_ATI_render_texture) = glxewGetExtension("GLX_ATI_render_texture");
+  if (glewExperimental || GLXEW_ATI_render_texture) CONST_CAST(GLXEW_ATI_render_texture) = !_glewInit_GLX_ATI_render_texture(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_ATI_render_texture */
+#ifdef GLX_EXT_fbconfig_packed_float
+  CONST_CAST(GLXEW_EXT_fbconfig_packed_float) = glxewGetExtension("GLX_EXT_fbconfig_packed_float");
+#endif /* GLX_EXT_fbconfig_packed_float */
+#ifdef GLX_EXT_framebuffer_sRGB
+  CONST_CAST(GLXEW_EXT_framebuffer_sRGB) = glxewGetExtension("GLX_EXT_framebuffer_sRGB");
+#endif /* GLX_EXT_framebuffer_sRGB */
+#ifdef GLX_EXT_import_context
+  CONST_CAST(GLXEW_EXT_import_context) = glxewGetExtension("GLX_EXT_import_context");
+  if (glewExperimental || GLXEW_EXT_import_context) CONST_CAST(GLXEW_EXT_import_context) = !_glewInit_GLX_EXT_import_context(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_EXT_import_context */
+#ifdef GLX_EXT_scene_marker
+  CONST_CAST(GLXEW_EXT_scene_marker) = glxewGetExtension("GLX_EXT_scene_marker");
+#endif /* GLX_EXT_scene_marker */
+#ifdef GLX_EXT_texture_from_pixmap
+  CONST_CAST(GLXEW_EXT_texture_from_pixmap) = glxewGetExtension("GLX_EXT_texture_from_pixmap");
+  if (glewExperimental || GLXEW_EXT_texture_from_pixmap) CONST_CAST(GLXEW_EXT_texture_from_pixmap) = !_glewInit_GLX_EXT_texture_from_pixmap(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_EXT_texture_from_pixmap */
+#ifdef GLX_EXT_visual_info
+  CONST_CAST(GLXEW_EXT_visual_info) = glxewGetExtension("GLX_EXT_visual_info");
+#endif /* GLX_EXT_visual_info */
+#ifdef GLX_EXT_visual_rating
+  CONST_CAST(GLXEW_EXT_visual_rating) = glxewGetExtension("GLX_EXT_visual_rating");
+#endif /* GLX_EXT_visual_rating */
+#ifdef GLX_MESA_agp_offset
+  CONST_CAST(GLXEW_MESA_agp_offset) = glxewGetExtension("GLX_MESA_agp_offset");
+  if (glewExperimental || GLXEW_MESA_agp_offset) CONST_CAST(GLXEW_MESA_agp_offset) = !_glewInit_GLX_MESA_agp_offset(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_MESA_agp_offset */
+#ifdef GLX_MESA_copy_sub_buffer
+  CONST_CAST(GLXEW_MESA_copy_sub_buffer) = glxewGetExtension("GLX_MESA_copy_sub_buffer");
+  if (glewExperimental || GLXEW_MESA_copy_sub_buffer) CONST_CAST(GLXEW_MESA_copy_sub_buffer) = !_glewInit_GLX_MESA_copy_sub_buffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_MESA_copy_sub_buffer */
+#ifdef GLX_MESA_pixmap_colormap
+  CONST_CAST(GLXEW_MESA_pixmap_colormap) = glxewGetExtension("GLX_MESA_pixmap_colormap");
+  if (glewExperimental || GLXEW_MESA_pixmap_colormap) CONST_CAST(GLXEW_MESA_pixmap_colormap) = !_glewInit_GLX_MESA_pixmap_colormap(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_MESA_pixmap_colormap */
+#ifdef GLX_MESA_release_buffers
+  CONST_CAST(GLXEW_MESA_release_buffers) = glxewGetExtension("GLX_MESA_release_buffers");
+  if (glewExperimental || GLXEW_MESA_release_buffers) CONST_CAST(GLXEW_MESA_release_buffers) = !_glewInit_GLX_MESA_release_buffers(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_MESA_release_buffers */
+#ifdef GLX_MESA_set_3dfx_mode
+  CONST_CAST(GLXEW_MESA_set_3dfx_mode) = glxewGetExtension("GLX_MESA_set_3dfx_mode");
+  if (glewExperimental || GLXEW_MESA_set_3dfx_mode) CONST_CAST(GLXEW_MESA_set_3dfx_mode) = !_glewInit_GLX_MESA_set_3dfx_mode(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_MESA_set_3dfx_mode */
+#ifdef GLX_NV_float_buffer
+  CONST_CAST(GLXEW_NV_float_buffer) = glxewGetExtension("GLX_NV_float_buffer");
+#endif /* GLX_NV_float_buffer */
+#ifdef GLX_NV_vertex_array_range
+  CONST_CAST(GLXEW_NV_vertex_array_range) = glxewGetExtension("GLX_NV_vertex_array_range");
+  if (glewExperimental || GLXEW_NV_vertex_array_range) CONST_CAST(GLXEW_NV_vertex_array_range) = !_glewInit_GLX_NV_vertex_array_range(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_NV_vertex_array_range */
+#ifdef GLX_OML_swap_method
+  CONST_CAST(GLXEW_OML_swap_method) = glxewGetExtension("GLX_OML_swap_method");
+#endif /* GLX_OML_swap_method */
+#if defined(GLX_OML_sync_control) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#include <inttypes.h>
+  CONST_CAST(GLXEW_OML_sync_control) = glxewGetExtension("GLX_OML_sync_control");
+  if (glewExperimental || GLXEW_OML_sync_control) CONST_CAST(GLXEW_OML_sync_control) = !_glewInit_GLX_OML_sync_control(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_OML_sync_control */
+#ifdef GLX_SGIS_blended_overlay
+  CONST_CAST(GLXEW_SGIS_blended_overlay) = glxewGetExtension("GLX_SGIS_blended_overlay");
+#endif /* GLX_SGIS_blended_overlay */
+#ifdef GLX_SGIS_color_range
+  CONST_CAST(GLXEW_SGIS_color_range) = glxewGetExtension("GLX_SGIS_color_range");
+#endif /* GLX_SGIS_color_range */
+#ifdef GLX_SGIS_multisample
+  CONST_CAST(GLXEW_SGIS_multisample) = glxewGetExtension("GLX_SGIS_multisample");
+#endif /* GLX_SGIS_multisample */
+#ifdef GLX_SGIS_shared_multisample
+  CONST_CAST(GLXEW_SGIS_shared_multisample) = glxewGetExtension("GLX_SGIS_shared_multisample");
+#endif /* GLX_SGIS_shared_multisample */
+#ifdef GLX_SGIX_fbconfig
+  CONST_CAST(GLXEW_SGIX_fbconfig) = glxewGetExtension("GLX_SGIX_fbconfig");
+  if (glewExperimental || GLXEW_SGIX_fbconfig) CONST_CAST(GLXEW_SGIX_fbconfig) = !_glewInit_GLX_SGIX_fbconfig(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGIX_fbconfig */
+#ifdef GLX_SGIX_hyperpipe
+  CONST_CAST(GLXEW_SGIX_hyperpipe) = glxewGetExtension("GLX_SGIX_hyperpipe");
+  if (glewExperimental || GLXEW_SGIX_hyperpipe) CONST_CAST(GLXEW_SGIX_hyperpipe) = !_glewInit_GLX_SGIX_hyperpipe(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGIX_hyperpipe */
+#ifdef GLX_SGIX_pbuffer
+  CONST_CAST(GLXEW_SGIX_pbuffer) = glxewGetExtension("GLX_SGIX_pbuffer");
+  if (glewExperimental || GLXEW_SGIX_pbuffer) CONST_CAST(GLXEW_SGIX_pbuffer) = !_glewInit_GLX_SGIX_pbuffer(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGIX_pbuffer */
+#ifdef GLX_SGIX_swap_barrier
+  CONST_CAST(GLXEW_SGIX_swap_barrier) = glxewGetExtension("GLX_SGIX_swap_barrier");
+  if (glewExperimental || GLXEW_SGIX_swap_barrier) CONST_CAST(GLXEW_SGIX_swap_barrier) = !_glewInit_GLX_SGIX_swap_barrier(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGIX_swap_barrier */
+#ifdef GLX_SGIX_swap_group
+  CONST_CAST(GLXEW_SGIX_swap_group) = glxewGetExtension("GLX_SGIX_swap_group");
+  if (glewExperimental || GLXEW_SGIX_swap_group) CONST_CAST(GLXEW_SGIX_swap_group) = !_glewInit_GLX_SGIX_swap_group(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGIX_swap_group */
+#ifdef GLX_SGIX_video_resize
+  CONST_CAST(GLXEW_SGIX_video_resize) = glxewGetExtension("GLX_SGIX_video_resize");
+  if (glewExperimental || GLXEW_SGIX_video_resize) CONST_CAST(GLXEW_SGIX_video_resize) = !_glewInit_GLX_SGIX_video_resize(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGIX_video_resize */
+#ifdef GLX_SGIX_visual_select_group
+  CONST_CAST(GLXEW_SGIX_visual_select_group) = glxewGetExtension("GLX_SGIX_visual_select_group");
+#endif /* GLX_SGIX_visual_select_group */
+#ifdef GLX_SGI_cushion
+  CONST_CAST(GLXEW_SGI_cushion) = glxewGetExtension("GLX_SGI_cushion");
+  if (glewExperimental || GLXEW_SGI_cushion) CONST_CAST(GLXEW_SGI_cushion) = !_glewInit_GLX_SGI_cushion(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGI_cushion */
+#ifdef GLX_SGI_make_current_read
+  CONST_CAST(GLXEW_SGI_make_current_read) = glxewGetExtension("GLX_SGI_make_current_read");
+  if (glewExperimental || GLXEW_SGI_make_current_read) CONST_CAST(GLXEW_SGI_make_current_read) = !_glewInit_GLX_SGI_make_current_read(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGI_make_current_read */
+#ifdef GLX_SGI_swap_control
+  CONST_CAST(GLXEW_SGI_swap_control) = glxewGetExtension("GLX_SGI_swap_control");
+  if (glewExperimental || GLXEW_SGI_swap_control) CONST_CAST(GLXEW_SGI_swap_control) = !_glewInit_GLX_SGI_swap_control(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGI_swap_control */
+#ifdef GLX_SGI_video_sync
+  CONST_CAST(GLXEW_SGI_video_sync) = glxewGetExtension("GLX_SGI_video_sync");
+  if (glewExperimental || GLXEW_SGI_video_sync) CONST_CAST(GLXEW_SGI_video_sync) = !_glewInit_GLX_SGI_video_sync(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SGI_video_sync */
+#ifdef GLX_SUN_get_transparent_index
+  CONST_CAST(GLXEW_SUN_get_transparent_index) = glxewGetExtension("GLX_SUN_get_transparent_index");
+  if (glewExperimental || GLXEW_SUN_get_transparent_index) CONST_CAST(GLXEW_SUN_get_transparent_index) = !_glewInit_GLX_SUN_get_transparent_index(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SUN_get_transparent_index */
+#ifdef GLX_SUN_video_resize
+  CONST_CAST(GLXEW_SUN_video_resize) = glxewGetExtension("GLX_SUN_video_resize");
+  if (glewExperimental || GLXEW_SUN_video_resize) CONST_CAST(GLXEW_SUN_video_resize) = !_glewInit_GLX_SUN_video_resize(GLEW_CONTEXT_ARG_VAR_INIT);
+#endif /* GLX_SUN_video_resize */
+
+  return GLEW_OK;
+}
+
+#endif /* !__APPLE__ || GLEW_APPLE_GLX */
+
+/* ------------------------------------------------------------------------ */
+
+const GLubyte* glewGetErrorString (GLenum error)
+{
+  static const GLubyte* _glewErrorString[] =
+  {
+    (const GLubyte*)"No error",
+    (const GLubyte*)"Missing GL version",
+    (const GLubyte*)"GL 1.1 and up are not supported",
+    (const GLubyte*)"GLX 1.2 and up are not supported",
+    (const GLubyte*)"Unknown error"
+  };
+  const int max_error = sizeof(_glewErrorString)/sizeof(*_glewErrorString) - 1;
+  return _glewErrorString[(int)error > max_error ? max_error : (int)error];
+}
+
+const GLubyte* glewGetString (GLenum name)
+{
+  static const GLubyte* _glewString[] =
+  {
+    (const GLubyte*)NULL,
+    (const GLubyte*)"1.5.0",
+    (const GLubyte*)"1",
+    (const GLubyte*)"5",
+    (const GLubyte*)"0"
+  };
+  const int max_string = sizeof(_glewString)/sizeof(*_glewString) - 1;
+  return _glewString[(int)name > max_string ? 0 : (int)name];
+}
+
+/* ------------------------------------------------------------------------ */
+
+GLboolean glewExperimental = GL_FALSE;
+
+#if !defined(GLEW_MX)
+
+#if defined(_WIN32)
+extern GLenum wglewContextInit (void);
+#elif !defined(__APPLE__) || defined(GLEW_APPLE_GLX) /* _UNIX */
+extern GLenum glxewContextInit (void);
+#endif /* _WIN32 */
+
+GLenum glewInit ()
+{
+  GLenum r;
+  if ( (r = glewContextInit()) ) return r;
+#if defined(_WIN32)
+  return wglewContextInit();
+#elif !defined(__APPLE__) || defined(GLEW_APPLE_GLX) /* _UNIX */
+  return glxewContextInit();
+#else
+  return r;
+#endif /* _WIN32 */
+}
+
+#endif /* !GLEW_MX */
+#ifdef GLEW_MX
+GLboolean glewContextIsSupported (GLEWContext* ctx, const char* name)
+#else
+GLboolean glewIsSupported (const char* name)
+#endif
+{
+  GLubyte* pos = (GLubyte*)name;
+  GLuint len = _glewStrLen(pos);
+  GLboolean ret = GL_TRUE;
+  while (ret && len > 0)
+  {
+    if (_glewStrSame1(&pos, &len, (const GLubyte*)"GL_", 3))
+    {
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"VERSION_", 8))
+      {
+#ifdef GL_VERSION_1_2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"1_2", 3))
+        {
+          ret = GLEW_VERSION_1_2;
+          continue;
+        }
+#endif
+#ifdef GL_VERSION_1_3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"1_3", 3))
+        {
+          ret = GLEW_VERSION_1_3;
+          continue;
+        }
+#endif
+#ifdef GL_VERSION_1_4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"1_4", 3))
+        {
+          ret = GLEW_VERSION_1_4;
+          continue;
+        }
+#endif
+#ifdef GL_VERSION_1_5
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"1_5", 3))
+        {
+          ret = GLEW_VERSION_1_5;
+          continue;
+        }
+#endif
+#ifdef GL_VERSION_2_0
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"2_0", 3))
+        {
+          ret = GLEW_VERSION_2_0;
+          continue;
+        }
+#endif
+#ifdef GL_VERSION_2_1
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"2_1", 3))
+        {
+          ret = GLEW_VERSION_2_1;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"3DFX_", 5))
+      {
+#ifdef GL_3DFX_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = GLEW_3DFX_multisample;
+          continue;
+        }
+#endif
+#ifdef GL_3DFX_tbuffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"tbuffer", 7))
+        {
+          ret = GLEW_3DFX_tbuffer;
+          continue;
+        }
+#endif
+#ifdef GL_3DFX_texture_compression_FXT1
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression_FXT1", 24))
+        {
+          ret = GLEW_3DFX_texture_compression_FXT1;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"APPLE_", 6))
+      {
+#ifdef GL_APPLE_client_storage
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"client_storage", 14))
+        {
+          ret = GLEW_APPLE_client_storage;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_element_array
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"element_array", 13))
+        {
+          ret = GLEW_APPLE_element_array;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_fence
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fence", 5))
+        {
+          ret = GLEW_APPLE_fence;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_float_pixels
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"float_pixels", 12))
+        {
+          ret = GLEW_APPLE_float_pixels;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_flush_buffer_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"flush_buffer_range", 18))
+        {
+          ret = GLEW_APPLE_flush_buffer_range;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_pixel_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_buffer", 12))
+        {
+          ret = GLEW_APPLE_pixel_buffer;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_specular_vector
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"specular_vector", 15))
+        {
+          ret = GLEW_APPLE_specular_vector;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_texture_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_range", 13))
+        {
+          ret = GLEW_APPLE_texture_range;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_transform_hint
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"transform_hint", 14))
+        {
+          ret = GLEW_APPLE_transform_hint;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_vertex_array_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_object", 19))
+        {
+          ret = GLEW_APPLE_vertex_array_object;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_vertex_array_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_range", 18))
+        {
+          ret = GLEW_APPLE_vertex_array_range;
+          continue;
+        }
+#endif
+#ifdef GL_APPLE_ycbcr_422
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"ycbcr_422", 9))
+        {
+          ret = GLEW_APPLE_ycbcr_422;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"ARB_", 4))
+      {
+#ifdef GL_ARB_color_buffer_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"color_buffer_float", 18))
+        {
+          ret = GLEW_ARB_color_buffer_float;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_depth_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"depth_texture", 13))
+        {
+          ret = GLEW_ARB_depth_texture;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_draw_buffers
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"draw_buffers", 12))
+        {
+          ret = GLEW_ARB_draw_buffers;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_fragment_program
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_program", 16))
+        {
+          ret = GLEW_ARB_fragment_program;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_fragment_program_shadow
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_program_shadow", 23))
+        {
+          ret = GLEW_ARB_fragment_program_shadow;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_fragment_shader
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_shader", 15))
+        {
+          ret = GLEW_ARB_fragment_shader;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_half_float_pixel
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"half_float_pixel", 16))
+        {
+          ret = GLEW_ARB_half_float_pixel;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_imaging
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"imaging", 7))
+        {
+          ret = GLEW_ARB_imaging;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_matrix_palette
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"matrix_palette", 14))
+        {
+          ret = GLEW_ARB_matrix_palette;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = GLEW_ARB_multisample;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_multitexture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multitexture", 12))
+        {
+          ret = GLEW_ARB_multitexture;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_occlusion_query
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"occlusion_query", 15))
+        {
+          ret = GLEW_ARB_occlusion_query;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_pixel_buffer_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_buffer_object", 19))
+        {
+          ret = GLEW_ARB_pixel_buffer_object;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_point_parameters
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"point_parameters", 16))
+        {
+          ret = GLEW_ARB_point_parameters;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_point_sprite
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"point_sprite", 12))
+        {
+          ret = GLEW_ARB_point_sprite;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_shader_objects
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shader_objects", 14))
+        {
+          ret = GLEW_ARB_shader_objects;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_shading_language_100
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shading_language_100", 20))
+        {
+          ret = GLEW_ARB_shading_language_100;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_shadow
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shadow", 6))
+        {
+          ret = GLEW_ARB_shadow;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_shadow_ambient
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shadow_ambient", 14))
+        {
+          ret = GLEW_ARB_shadow_ambient;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_border_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_border_clamp", 20))
+        {
+          ret = GLEW_ARB_texture_border_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_compression
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression", 19))
+        {
+          ret = GLEW_ARB_texture_compression;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_cube_map
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_cube_map", 16))
+        {
+          ret = GLEW_ARB_texture_cube_map;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_env_add
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_add", 15))
+        {
+          ret = GLEW_ARB_texture_env_add;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_env_combine
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_combine", 19))
+        {
+          ret = GLEW_ARB_texture_env_combine;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_env_crossbar
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_crossbar", 20))
+        {
+          ret = GLEW_ARB_texture_env_crossbar;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_env_dot3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_dot3", 16))
+        {
+          ret = GLEW_ARB_texture_env_dot3;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_float", 13))
+        {
+          ret = GLEW_ARB_texture_float;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_mirrored_repeat
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_mirrored_repeat", 23))
+        {
+          ret = GLEW_ARB_texture_mirrored_repeat;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_non_power_of_two
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_non_power_of_two", 24))
+        {
+          ret = GLEW_ARB_texture_non_power_of_two;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_texture_rectangle
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_rectangle", 17))
+        {
+          ret = GLEW_ARB_texture_rectangle;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_transpose_matrix
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"transpose_matrix", 16))
+        {
+          ret = GLEW_ARB_transpose_matrix;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_vertex_blend
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_blend", 12))
+        {
+          ret = GLEW_ARB_vertex_blend;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_vertex_buffer_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_buffer_object", 20))
+        {
+          ret = GLEW_ARB_vertex_buffer_object;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_vertex_program
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_program", 14))
+        {
+          ret = GLEW_ARB_vertex_program;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_vertex_shader
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_shader", 13))
+        {
+          ret = GLEW_ARB_vertex_shader;
+          continue;
+        }
+#endif
+#ifdef GL_ARB_window_pos
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"window_pos", 10))
+        {
+          ret = GLEW_ARB_window_pos;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"ATIX_", 5))
+      {
+#ifdef GL_ATIX_point_sprites
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"point_sprites", 13))
+        {
+          ret = GLEW_ATIX_point_sprites;
+          continue;
+        }
+#endif
+#ifdef GL_ATIX_texture_env_combine3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_combine3", 20))
+        {
+          ret = GLEW_ATIX_texture_env_combine3;
+          continue;
+        }
+#endif
+#ifdef GL_ATIX_texture_env_route
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_route", 17))
+        {
+          ret = GLEW_ATIX_texture_env_route;
+          continue;
+        }
+#endif
+#ifdef GL_ATIX_vertex_shader_output_point_size
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_shader_output_point_size", 31))
+        {
+          ret = GLEW_ATIX_vertex_shader_output_point_size;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"ATI_", 4))
+      {
+#ifdef GL_ATI_draw_buffers
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"draw_buffers", 12))
+        {
+          ret = GLEW_ATI_draw_buffers;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_element_array
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"element_array", 13))
+        {
+          ret = GLEW_ATI_element_array;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_envmap_bumpmap
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"envmap_bumpmap", 14))
+        {
+          ret = GLEW_ATI_envmap_bumpmap;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_fragment_shader
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_shader", 15))
+        {
+          ret = GLEW_ATI_fragment_shader;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_map_object_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"map_object_buffer", 17))
+        {
+          ret = GLEW_ATI_map_object_buffer;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_pn_triangles
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pn_triangles", 12))
+        {
+          ret = GLEW_ATI_pn_triangles;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_separate_stencil
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"separate_stencil", 16))
+        {
+          ret = GLEW_ATI_separate_stencil;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_shader_texture_lod
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shader_texture_lod", 18))
+        {
+          ret = GLEW_ATI_shader_texture_lod;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_text_fragment_shader
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"text_fragment_shader", 20))
+        {
+          ret = GLEW_ATI_text_fragment_shader;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_texture_compression_3dc
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression_3dc", 23))
+        {
+          ret = GLEW_ATI_texture_compression_3dc;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_texture_env_combine3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_combine3", 20))
+        {
+          ret = GLEW_ATI_texture_env_combine3;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_texture_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_float", 13))
+        {
+          ret = GLEW_ATI_texture_float;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_texture_mirror_once
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_mirror_once", 19))
+        {
+          ret = GLEW_ATI_texture_mirror_once;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_vertex_array_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_object", 19))
+        {
+          ret = GLEW_ATI_vertex_array_object;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_vertex_attrib_array_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_attrib_array_object", 26))
+        {
+          ret = GLEW_ATI_vertex_attrib_array_object;
+          continue;
+        }
+#endif
+#ifdef GL_ATI_vertex_streams
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_streams", 14))
+        {
+          ret = GLEW_ATI_vertex_streams;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"EXT_", 4))
+      {
+#ifdef GL_EXT_422_pixels
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"422_pixels", 10))
+        {
+          ret = GLEW_EXT_422_pixels;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_Cg_shader
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"Cg_shader", 9))
+        {
+          ret = GLEW_EXT_Cg_shader;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_abgr
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"abgr", 4))
+        {
+          ret = GLEW_EXT_abgr;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_bgra
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"bgra", 4))
+        {
+          ret = GLEW_EXT_bgra;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_bindable_uniform
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"bindable_uniform", 16))
+        {
+          ret = GLEW_EXT_bindable_uniform;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_blend_color
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_color", 11))
+        {
+          ret = GLEW_EXT_blend_color;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_blend_equation_separate
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_equation_separate", 23))
+        {
+          ret = GLEW_EXT_blend_equation_separate;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_blend_func_separate
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_func_separate", 19))
+        {
+          ret = GLEW_EXT_blend_func_separate;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_blend_logic_op
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_logic_op", 14))
+        {
+          ret = GLEW_EXT_blend_logic_op;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_blend_minmax
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_minmax", 12))
+        {
+          ret = GLEW_EXT_blend_minmax;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_blend_subtract
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_subtract", 14))
+        {
+          ret = GLEW_EXT_blend_subtract;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_clip_volume_hint
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"clip_volume_hint", 16))
+        {
+          ret = GLEW_EXT_clip_volume_hint;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_cmyka
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"cmyka", 5))
+        {
+          ret = GLEW_EXT_cmyka;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_color_subtable
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"color_subtable", 14))
+        {
+          ret = GLEW_EXT_color_subtable;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_compiled_vertex_array
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"compiled_vertex_array", 21))
+        {
+          ret = GLEW_EXT_compiled_vertex_array;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_convolution
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"convolution", 11))
+        {
+          ret = GLEW_EXT_convolution;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_coordinate_frame
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"coordinate_frame", 16))
+        {
+          ret = GLEW_EXT_coordinate_frame;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_copy_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"copy_texture", 12))
+        {
+          ret = GLEW_EXT_copy_texture;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_cull_vertex
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"cull_vertex", 11))
+        {
+          ret = GLEW_EXT_cull_vertex;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_depth_bounds_test
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"depth_bounds_test", 17))
+        {
+          ret = GLEW_EXT_depth_bounds_test;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_draw_buffers2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"draw_buffers2", 13))
+        {
+          ret = GLEW_EXT_draw_buffers2;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_draw_instanced
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"draw_instanced", 14))
+        {
+          ret = GLEW_EXT_draw_instanced;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_draw_range_elements
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"draw_range_elements", 19))
+        {
+          ret = GLEW_EXT_draw_range_elements;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_fog_coord
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fog_coord", 9))
+        {
+          ret = GLEW_EXT_fog_coord;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_fragment_lighting
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_lighting", 17))
+        {
+          ret = GLEW_EXT_fragment_lighting;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_framebuffer_blit
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framebuffer_blit", 16))
+        {
+          ret = GLEW_EXT_framebuffer_blit;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_framebuffer_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framebuffer_multisample", 23))
+        {
+          ret = GLEW_EXT_framebuffer_multisample;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_framebuffer_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framebuffer_object", 18))
+        {
+          ret = GLEW_EXT_framebuffer_object;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_framebuffer_sRGB
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framebuffer_sRGB", 16))
+        {
+          ret = GLEW_EXT_framebuffer_sRGB;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_geometry_shader4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"geometry_shader4", 16))
+        {
+          ret = GLEW_EXT_geometry_shader4;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_gpu_program_parameters
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"gpu_program_parameters", 22))
+        {
+          ret = GLEW_EXT_gpu_program_parameters;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_gpu_shader4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"gpu_shader4", 11))
+        {
+          ret = GLEW_EXT_gpu_shader4;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_histogram
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"histogram", 9))
+        {
+          ret = GLEW_EXT_histogram;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_index_array_formats
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"index_array_formats", 19))
+        {
+          ret = GLEW_EXT_index_array_formats;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_index_func
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"index_func", 10))
+        {
+          ret = GLEW_EXT_index_func;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_index_material
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"index_material", 14))
+        {
+          ret = GLEW_EXT_index_material;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_index_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"index_texture", 13))
+        {
+          ret = GLEW_EXT_index_texture;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_light_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"light_texture", 13))
+        {
+          ret = GLEW_EXT_light_texture;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_misc_attribute
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"misc_attribute", 14))
+        {
+          ret = GLEW_EXT_misc_attribute;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_multi_draw_arrays
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multi_draw_arrays", 17))
+        {
+          ret = GLEW_EXT_multi_draw_arrays;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = GLEW_EXT_multisample;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_packed_depth_stencil
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"packed_depth_stencil", 20))
+        {
+          ret = GLEW_EXT_packed_depth_stencil;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_packed_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"packed_float", 12))
+        {
+          ret = GLEW_EXT_packed_float;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_packed_pixels
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"packed_pixels", 13))
+        {
+          ret = GLEW_EXT_packed_pixels;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_paletted_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"paletted_texture", 16))
+        {
+          ret = GLEW_EXT_paletted_texture;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_pixel_buffer_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_buffer_object", 19))
+        {
+          ret = GLEW_EXT_pixel_buffer_object;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_pixel_transform
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_transform", 15))
+        {
+          ret = GLEW_EXT_pixel_transform;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_pixel_transform_color_table
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_transform_color_table", 27))
+        {
+          ret = GLEW_EXT_pixel_transform_color_table;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_point_parameters
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"point_parameters", 16))
+        {
+          ret = GLEW_EXT_point_parameters;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_polygon_offset
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"polygon_offset", 14))
+        {
+          ret = GLEW_EXT_polygon_offset;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_rescale_normal
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"rescale_normal", 14))
+        {
+          ret = GLEW_EXT_rescale_normal;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_scene_marker
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"scene_marker", 12))
+        {
+          ret = GLEW_EXT_scene_marker;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_secondary_color
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"secondary_color", 15))
+        {
+          ret = GLEW_EXT_secondary_color;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_separate_specular_color
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"separate_specular_color", 23))
+        {
+          ret = GLEW_EXT_separate_specular_color;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_shadow_funcs
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shadow_funcs", 12))
+        {
+          ret = GLEW_EXT_shadow_funcs;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_shared_texture_palette
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shared_texture_palette", 22))
+        {
+          ret = GLEW_EXT_shared_texture_palette;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_stencil_clear_tag
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"stencil_clear_tag", 17))
+        {
+          ret = GLEW_EXT_stencil_clear_tag;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_stencil_two_side
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"stencil_two_side", 16))
+        {
+          ret = GLEW_EXT_stencil_two_side;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_stencil_wrap
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"stencil_wrap", 12))
+        {
+          ret = GLEW_EXT_stencil_wrap;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_subtexture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"subtexture", 10))
+        {
+          ret = GLEW_EXT_subtexture;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture", 7))
+        {
+          ret = GLEW_EXT_texture;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture3D
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture3D", 9))
+        {
+          ret = GLEW_EXT_texture3D;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_array
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_array", 13))
+        {
+          ret = GLEW_EXT_texture_array;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_buffer_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_buffer_object", 21))
+        {
+          ret = GLEW_EXT_texture_buffer_object;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_compression_dxt1
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression_dxt1", 24))
+        {
+          ret = GLEW_EXT_texture_compression_dxt1;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_compression_latc
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression_latc", 24))
+        {
+          ret = GLEW_EXT_texture_compression_latc;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_compression_rgtc
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression_rgtc", 24))
+        {
+          ret = GLEW_EXT_texture_compression_rgtc;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_compression_s3tc
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression_s3tc", 24))
+        {
+          ret = GLEW_EXT_texture_compression_s3tc;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_cube_map
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_cube_map", 16))
+        {
+          ret = GLEW_EXT_texture_cube_map;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_edge_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_edge_clamp", 18))
+        {
+          ret = GLEW_EXT_texture_edge_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_env
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env", 11))
+        {
+          ret = GLEW_EXT_texture_env;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_env_add
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_add", 15))
+        {
+          ret = GLEW_EXT_texture_env_add;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_env_combine
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_combine", 19))
+        {
+          ret = GLEW_EXT_texture_env_combine;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_env_dot3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_dot3", 16))
+        {
+          ret = GLEW_EXT_texture_env_dot3;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_filter_anisotropic
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_filter_anisotropic", 26))
+        {
+          ret = GLEW_EXT_texture_filter_anisotropic;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_integer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_integer", 15))
+        {
+          ret = GLEW_EXT_texture_integer;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_lod_bias
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_lod_bias", 16))
+        {
+          ret = GLEW_EXT_texture_lod_bias;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_mirror_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_mirror_clamp", 20))
+        {
+          ret = GLEW_EXT_texture_mirror_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_object", 14))
+        {
+          ret = GLEW_EXT_texture_object;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_perturb_normal
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_perturb_normal", 22))
+        {
+          ret = GLEW_EXT_texture_perturb_normal;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_rectangle
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_rectangle", 17))
+        {
+          ret = GLEW_EXT_texture_rectangle;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_sRGB
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_sRGB", 12))
+        {
+          ret = GLEW_EXT_texture_sRGB;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_texture_shared_exponent
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_shared_exponent", 23))
+        {
+          ret = GLEW_EXT_texture_shared_exponent;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_timer_query
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"timer_query", 11))
+        {
+          ret = GLEW_EXT_timer_query;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_vertex_array
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array", 12))
+        {
+          ret = GLEW_EXT_vertex_array;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_vertex_shader
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_shader", 13))
+        {
+          ret = GLEW_EXT_vertex_shader;
+          continue;
+        }
+#endif
+#ifdef GL_EXT_vertex_weighting
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_weighting", 16))
+        {
+          ret = GLEW_EXT_vertex_weighting;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"GREMEDY_", 8))
+      {
+#ifdef GL_GREMEDY_frame_terminator
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"frame_terminator", 16))
+        {
+          ret = GLEW_GREMEDY_frame_terminator;
+          continue;
+        }
+#endif
+#ifdef GL_GREMEDY_string_marker
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"string_marker", 13))
+        {
+          ret = GLEW_GREMEDY_string_marker;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"HP_", 3))
+      {
+#ifdef GL_HP_convolution_border_modes
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"convolution_border_modes", 24))
+        {
+          ret = GLEW_HP_convolution_border_modes;
+          continue;
+        }
+#endif
+#ifdef GL_HP_image_transform
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"image_transform", 15))
+        {
+          ret = GLEW_HP_image_transform;
+          continue;
+        }
+#endif
+#ifdef GL_HP_occlusion_test
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"occlusion_test", 14))
+        {
+          ret = GLEW_HP_occlusion_test;
+          continue;
+        }
+#endif
+#ifdef GL_HP_texture_lighting
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_lighting", 16))
+        {
+          ret = GLEW_HP_texture_lighting;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"IBM_", 4))
+      {
+#ifdef GL_IBM_cull_vertex
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"cull_vertex", 11))
+        {
+          ret = GLEW_IBM_cull_vertex;
+          continue;
+        }
+#endif
+#ifdef GL_IBM_multimode_draw_arrays
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multimode_draw_arrays", 21))
+        {
+          ret = GLEW_IBM_multimode_draw_arrays;
+          continue;
+        }
+#endif
+#ifdef GL_IBM_rasterpos_clip
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"rasterpos_clip", 14))
+        {
+          ret = GLEW_IBM_rasterpos_clip;
+          continue;
+        }
+#endif
+#ifdef GL_IBM_static_data
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"static_data", 11))
+        {
+          ret = GLEW_IBM_static_data;
+          continue;
+        }
+#endif
+#ifdef GL_IBM_texture_mirrored_repeat
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_mirrored_repeat", 23))
+        {
+          ret = GLEW_IBM_texture_mirrored_repeat;
+          continue;
+        }
+#endif
+#ifdef GL_IBM_vertex_array_lists
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_lists", 18))
+        {
+          ret = GLEW_IBM_vertex_array_lists;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"INGR_", 5))
+      {
+#ifdef GL_INGR_color_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"color_clamp", 11))
+        {
+          ret = GLEW_INGR_color_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_INGR_interlace_read
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"interlace_read", 14))
+        {
+          ret = GLEW_INGR_interlace_read;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"INTEL_", 6))
+      {
+#ifdef GL_INTEL_parallel_arrays
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"parallel_arrays", 15))
+        {
+          ret = GLEW_INTEL_parallel_arrays;
+          continue;
+        }
+#endif
+#ifdef GL_INTEL_texture_scissor
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_scissor", 15))
+        {
+          ret = GLEW_INTEL_texture_scissor;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"KTX_", 4))
+      {
+#ifdef GL_KTX_buffer_region
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"buffer_region", 13))
+        {
+          ret = GLEW_KTX_buffer_region;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"MESAX_", 6))
+      {
+#ifdef GL_MESAX_texture_stack
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_stack", 13))
+        {
+          ret = GLEW_MESAX_texture_stack;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"MESA_", 5))
+      {
+#ifdef GL_MESA_pack_invert
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pack_invert", 11))
+        {
+          ret = GLEW_MESA_pack_invert;
+          continue;
+        }
+#endif
+#ifdef GL_MESA_resize_buffers
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"resize_buffers", 14))
+        {
+          ret = GLEW_MESA_resize_buffers;
+          continue;
+        }
+#endif
+#ifdef GL_MESA_window_pos
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"window_pos", 10))
+        {
+          ret = GLEW_MESA_window_pos;
+          continue;
+        }
+#endif
+#ifdef GL_MESA_ycbcr_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"ycbcr_texture", 13))
+        {
+          ret = GLEW_MESA_ycbcr_texture;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"NV_", 3))
+      {
+#ifdef GL_NV_blend_square
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_square", 12))
+        {
+          ret = GLEW_NV_blend_square;
+          continue;
+        }
+#endif
+#ifdef GL_NV_copy_depth_to_color
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"copy_depth_to_color", 19))
+        {
+          ret = GLEW_NV_copy_depth_to_color;
+          continue;
+        }
+#endif
+#ifdef GL_NV_depth_buffer_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"depth_buffer_float", 18))
+        {
+          ret = GLEW_NV_depth_buffer_float;
+          continue;
+        }
+#endif
+#ifdef GL_NV_depth_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"depth_clamp", 11))
+        {
+          ret = GLEW_NV_depth_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_NV_depth_range_unclamped
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"depth_range_unclamped", 21))
+        {
+          ret = GLEW_NV_depth_range_unclamped;
+          continue;
+        }
+#endif
+#ifdef GL_NV_evaluators
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"evaluators", 10))
+        {
+          ret = GLEW_NV_evaluators;
+          continue;
+        }
+#endif
+#ifdef GL_NV_fence
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fence", 5))
+        {
+          ret = GLEW_NV_fence;
+          continue;
+        }
+#endif
+#ifdef GL_NV_float_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"float_buffer", 12))
+        {
+          ret = GLEW_NV_float_buffer;
+          continue;
+        }
+#endif
+#ifdef GL_NV_fog_distance
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fog_distance", 12))
+        {
+          ret = GLEW_NV_fog_distance;
+          continue;
+        }
+#endif
+#ifdef GL_NV_fragment_program
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_program", 16))
+        {
+          ret = GLEW_NV_fragment_program;
+          continue;
+        }
+#endif
+#ifdef GL_NV_fragment_program2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_program2", 17))
+        {
+          ret = GLEW_NV_fragment_program2;
+          continue;
+        }
+#endif
+#ifdef GL_NV_fragment_program4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_program4", 17))
+        {
+          ret = GLEW_NV_fragment_program4;
+          continue;
+        }
+#endif
+#ifdef GL_NV_fragment_program_option
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_program_option", 23))
+        {
+          ret = GLEW_NV_fragment_program_option;
+          continue;
+        }
+#endif
+#ifdef GL_NV_framebuffer_multisample_coverage
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framebuffer_multisample_coverage", 32))
+        {
+          ret = GLEW_NV_framebuffer_multisample_coverage;
+          continue;
+        }
+#endif
+#ifdef GL_NV_geometry_program4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"geometry_program4", 17))
+        {
+          ret = GLEW_NV_geometry_program4;
+          continue;
+        }
+#endif
+#ifdef GL_NV_geometry_shader4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"geometry_shader4", 16))
+        {
+          ret = GLEW_NV_geometry_shader4;
+          continue;
+        }
+#endif
+#ifdef GL_NV_gpu_program4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"gpu_program4", 12))
+        {
+          ret = GLEW_NV_gpu_program4;
+          continue;
+        }
+#endif
+#ifdef GL_NV_half_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"half_float", 10))
+        {
+          ret = GLEW_NV_half_float;
+          continue;
+        }
+#endif
+#ifdef GL_NV_light_max_exponent
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"light_max_exponent", 18))
+        {
+          ret = GLEW_NV_light_max_exponent;
+          continue;
+        }
+#endif
+#ifdef GL_NV_multisample_filter_hint
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample_filter_hint", 23))
+        {
+          ret = GLEW_NV_multisample_filter_hint;
+          continue;
+        }
+#endif
+#ifdef GL_NV_occlusion_query
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"occlusion_query", 15))
+        {
+          ret = GLEW_NV_occlusion_query;
+          continue;
+        }
+#endif
+#ifdef GL_NV_packed_depth_stencil
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"packed_depth_stencil", 20))
+        {
+          ret = GLEW_NV_packed_depth_stencil;
+          continue;
+        }
+#endif
+#ifdef GL_NV_parameter_buffer_object
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"parameter_buffer_object", 23))
+        {
+          ret = GLEW_NV_parameter_buffer_object;
+          continue;
+        }
+#endif
+#ifdef GL_NV_pixel_data_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_data_range", 16))
+        {
+          ret = GLEW_NV_pixel_data_range;
+          continue;
+        }
+#endif
+#ifdef GL_NV_point_sprite
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"point_sprite", 12))
+        {
+          ret = GLEW_NV_point_sprite;
+          continue;
+        }
+#endif
+#ifdef GL_NV_primitive_restart
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"primitive_restart", 17))
+        {
+          ret = GLEW_NV_primitive_restart;
+          continue;
+        }
+#endif
+#ifdef GL_NV_register_combiners
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"register_combiners", 18))
+        {
+          ret = GLEW_NV_register_combiners;
+          continue;
+        }
+#endif
+#ifdef GL_NV_register_combiners2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"register_combiners2", 19))
+        {
+          ret = GLEW_NV_register_combiners2;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texgen_emboss
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texgen_emboss", 13))
+        {
+          ret = GLEW_NV_texgen_emboss;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texgen_reflection
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texgen_reflection", 17))
+        {
+          ret = GLEW_NV_texgen_reflection;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texture_compression_vtc
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_compression_vtc", 23))
+        {
+          ret = GLEW_NV_texture_compression_vtc;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texture_env_combine4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_env_combine4", 20))
+        {
+          ret = GLEW_NV_texture_env_combine4;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texture_expand_normal
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_expand_normal", 21))
+        {
+          ret = GLEW_NV_texture_expand_normal;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texture_rectangle
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_rectangle", 17))
+        {
+          ret = GLEW_NV_texture_rectangle;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texture_shader
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_shader", 14))
+        {
+          ret = GLEW_NV_texture_shader;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texture_shader2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_shader2", 15))
+        {
+          ret = GLEW_NV_texture_shader2;
+          continue;
+        }
+#endif
+#ifdef GL_NV_texture_shader3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_shader3", 15))
+        {
+          ret = GLEW_NV_texture_shader3;
+          continue;
+        }
+#endif
+#ifdef GL_NV_transform_feedback
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"transform_feedback", 18))
+        {
+          ret = GLEW_NV_transform_feedback;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_array_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_range", 18))
+        {
+          ret = GLEW_NV_vertex_array_range;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_array_range2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_range2", 19))
+        {
+          ret = GLEW_NV_vertex_array_range2;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_program
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_program", 14))
+        {
+          ret = GLEW_NV_vertex_program;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_program1_1
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_program1_1", 17))
+        {
+          ret = GLEW_NV_vertex_program1_1;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_program2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_program2", 15))
+        {
+          ret = GLEW_NV_vertex_program2;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_program2_option
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_program2_option", 22))
+        {
+          ret = GLEW_NV_vertex_program2_option;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_program3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_program3", 15))
+        {
+          ret = GLEW_NV_vertex_program3;
+          continue;
+        }
+#endif
+#ifdef GL_NV_vertex_program4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_program4", 15))
+        {
+          ret = GLEW_NV_vertex_program4;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"OES_", 4))
+      {
+#ifdef GL_OES_byte_coordinates
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"byte_coordinates", 16))
+        {
+          ret = GLEW_OES_byte_coordinates;
+          continue;
+        }
+#endif
+#ifdef GL_OES_compressed_paletted_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"compressed_paletted_texture", 27))
+        {
+          ret = GLEW_OES_compressed_paletted_texture;
+          continue;
+        }
+#endif
+#ifdef GL_OES_read_format
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"read_format", 11))
+        {
+          ret = GLEW_OES_read_format;
+          continue;
+        }
+#endif
+#ifdef GL_OES_single_precision
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"single_precision", 16))
+        {
+          ret = GLEW_OES_single_precision;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"OML_", 4))
+      {
+#ifdef GL_OML_interlace
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"interlace", 9))
+        {
+          ret = GLEW_OML_interlace;
+          continue;
+        }
+#endif
+#ifdef GL_OML_resample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"resample", 8))
+        {
+          ret = GLEW_OML_resample;
+          continue;
+        }
+#endif
+#ifdef GL_OML_subsample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"subsample", 9))
+        {
+          ret = GLEW_OML_subsample;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"PGI_", 4))
+      {
+#ifdef GL_PGI_misc_hints
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"misc_hints", 10))
+        {
+          ret = GLEW_PGI_misc_hints;
+          continue;
+        }
+#endif
+#ifdef GL_PGI_vertex_hints
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_hints", 12))
+        {
+          ret = GLEW_PGI_vertex_hints;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"REND_", 5))
+      {
+#ifdef GL_REND_screen_coordinates
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"screen_coordinates", 18))
+        {
+          ret = GLEW_REND_screen_coordinates;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"S3_", 3))
+      {
+#ifdef GL_S3_s3tc
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"s3tc", 4))
+        {
+          ret = GLEW_S3_s3tc;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SGIS_", 5))
+      {
+#ifdef GL_SGIS_color_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"color_range", 11))
+        {
+          ret = GLEW_SGIS_color_range;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_detail_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"detail_texture", 14))
+        {
+          ret = GLEW_SGIS_detail_texture;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_fog_function
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fog_function", 12))
+        {
+          ret = GLEW_SGIS_fog_function;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_generate_mipmap
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"generate_mipmap", 15))
+        {
+          ret = GLEW_SGIS_generate_mipmap;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = GLEW_SGIS_multisample;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_pixel_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_texture", 13))
+        {
+          ret = GLEW_SGIS_pixel_texture;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_sharpen_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"sharpen_texture", 15))
+        {
+          ret = GLEW_SGIS_sharpen_texture;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_texture4D
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture4D", 9))
+        {
+          ret = GLEW_SGIS_texture4D;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_texture_border_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_border_clamp", 20))
+        {
+          ret = GLEW_SGIS_texture_border_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_texture_edge_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_edge_clamp", 18))
+        {
+          ret = GLEW_SGIS_texture_edge_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_texture_filter4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_filter4", 15))
+        {
+          ret = GLEW_SGIS_texture_filter4;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_texture_lod
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_lod", 11))
+        {
+          ret = GLEW_SGIS_texture_lod;
+          continue;
+        }
+#endif
+#ifdef GL_SGIS_texture_select
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_select", 14))
+        {
+          ret = GLEW_SGIS_texture_select;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SGIX_", 5))
+      {
+#ifdef GL_SGIX_async
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"async", 5))
+        {
+          ret = GLEW_SGIX_async;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_async_histogram
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"async_histogram", 15))
+        {
+          ret = GLEW_SGIX_async_histogram;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_async_pixel
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"async_pixel", 11))
+        {
+          ret = GLEW_SGIX_async_pixel;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_blend_alpha_minmax
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blend_alpha_minmax", 18))
+        {
+          ret = GLEW_SGIX_blend_alpha_minmax;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_clipmap
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"clipmap", 7))
+        {
+          ret = GLEW_SGIX_clipmap;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_depth_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"depth_texture", 13))
+        {
+          ret = GLEW_SGIX_depth_texture;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_flush_raster
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"flush_raster", 12))
+        {
+          ret = GLEW_SGIX_flush_raster;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_fog_offset
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fog_offset", 10))
+        {
+          ret = GLEW_SGIX_fog_offset;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_fog_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fog_texture", 11))
+        {
+          ret = GLEW_SGIX_fog_texture;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_fragment_specular_lighting
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fragment_specular_lighting", 26))
+        {
+          ret = GLEW_SGIX_fragment_specular_lighting;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_framezoom
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framezoom", 9))
+        {
+          ret = GLEW_SGIX_framezoom;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_interlace
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"interlace", 9))
+        {
+          ret = GLEW_SGIX_interlace;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_ir_instrument1
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"ir_instrument1", 14))
+        {
+          ret = GLEW_SGIX_ir_instrument1;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_list_priority
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"list_priority", 13))
+        {
+          ret = GLEW_SGIX_list_priority;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_pixel_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_texture", 13))
+        {
+          ret = GLEW_SGIX_pixel_texture;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_pixel_texture_bits
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_texture_bits", 18))
+        {
+          ret = GLEW_SGIX_pixel_texture_bits;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_reference_plane
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"reference_plane", 15))
+        {
+          ret = GLEW_SGIX_reference_plane;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_resample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"resample", 8))
+        {
+          ret = GLEW_SGIX_resample;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_shadow
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shadow", 6))
+        {
+          ret = GLEW_SGIX_shadow;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_shadow_ambient
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shadow_ambient", 14))
+        {
+          ret = GLEW_SGIX_shadow_ambient;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_sprite
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"sprite", 6))
+        {
+          ret = GLEW_SGIX_sprite;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_tag_sample_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"tag_sample_buffer", 17))
+        {
+          ret = GLEW_SGIX_tag_sample_buffer;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_texture_add_env
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_add_env", 15))
+        {
+          ret = GLEW_SGIX_texture_add_env;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_texture_coordinate_clamp
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_coordinate_clamp", 24))
+        {
+          ret = GLEW_SGIX_texture_coordinate_clamp;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_texture_lod_bias
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_lod_bias", 16))
+        {
+          ret = GLEW_SGIX_texture_lod_bias;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_texture_multi_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_multi_buffer", 20))
+        {
+          ret = GLEW_SGIX_texture_multi_buffer;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_texture_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_range", 13))
+        {
+          ret = GLEW_SGIX_texture_range;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_texture_scale_bias
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_scale_bias", 18))
+        {
+          ret = GLEW_SGIX_texture_scale_bias;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_vertex_preclip
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_preclip", 14))
+        {
+          ret = GLEW_SGIX_vertex_preclip;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_vertex_preclip_hint
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_preclip_hint", 19))
+        {
+          ret = GLEW_SGIX_vertex_preclip_hint;
+          continue;
+        }
+#endif
+#ifdef GL_SGIX_ycrcb
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"ycrcb", 5))
+        {
+          ret = GLEW_SGIX_ycrcb;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SGI_", 4))
+      {
+#ifdef GL_SGI_color_matrix
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"color_matrix", 12))
+        {
+          ret = GLEW_SGI_color_matrix;
+          continue;
+        }
+#endif
+#ifdef GL_SGI_color_table
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"color_table", 11))
+        {
+          ret = GLEW_SGI_color_table;
+          continue;
+        }
+#endif
+#ifdef GL_SGI_texture_color_table
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_color_table", 19))
+        {
+          ret = GLEW_SGI_texture_color_table;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SUNX_", 5))
+      {
+#ifdef GL_SUNX_constant_data
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"constant_data", 13))
+        {
+          ret = GLEW_SUNX_constant_data;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SUN_", 4))
+      {
+#ifdef GL_SUN_convolution_border_modes
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"convolution_border_modes", 24))
+        {
+          ret = GLEW_SUN_convolution_border_modes;
+          continue;
+        }
+#endif
+#ifdef GL_SUN_global_alpha
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"global_alpha", 12))
+        {
+          ret = GLEW_SUN_global_alpha;
+          continue;
+        }
+#endif
+#ifdef GL_SUN_mesh_array
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"mesh_array", 10))
+        {
+          ret = GLEW_SUN_mesh_array;
+          continue;
+        }
+#endif
+#ifdef GL_SUN_read_video_pixels
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"read_video_pixels", 17))
+        {
+          ret = GLEW_SUN_read_video_pixels;
+          continue;
+        }
+#endif
+#ifdef GL_SUN_slice_accum
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"slice_accum", 11))
+        {
+          ret = GLEW_SUN_slice_accum;
+          continue;
+        }
+#endif
+#ifdef GL_SUN_triangle_list
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"triangle_list", 13))
+        {
+          ret = GLEW_SUN_triangle_list;
+          continue;
+        }
+#endif
+#ifdef GL_SUN_vertex
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex", 6))
+        {
+          ret = GLEW_SUN_vertex;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"WIN_", 4))
+      {
+#ifdef GL_WIN_phong_shading
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"phong_shading", 13))
+        {
+          ret = GLEW_WIN_phong_shading;
+          continue;
+        }
+#endif
+#ifdef GL_WIN_specular_fog
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"specular_fog", 12))
+        {
+          ret = GLEW_WIN_specular_fog;
+          continue;
+        }
+#endif
+#ifdef GL_WIN_swap_hint
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_hint", 9))
+        {
+          ret = GLEW_WIN_swap_hint;
+          continue;
+        }
+#endif
+      }
+    }
+    ret = (len == 0);
+  }
+  return ret;
+}
+
+#if defined(_WIN32)
+
+#if defined(GLEW_MX)
+GLboolean wglewContextIsSupported (WGLEWContext* ctx, const char* name)
+#else
+GLboolean wglewIsSupported (const char* name)
+#endif
+{
+  GLubyte* pos = (GLubyte*)name;
+  GLuint len = _glewStrLen(pos);
+  GLboolean ret = GL_TRUE;
+  while (ret && len > 0)
+  {
+    if (_glewStrSame1(&pos, &len, (const GLubyte*)"WGL_", 4))
+    {
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"3DFX_", 5))
+      {
+#ifdef WGL_3DFX_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = WGLEW_3DFX_multisample;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"3DL_", 4))
+      {
+#ifdef WGL_3DL_stereo_control
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"stereo_control", 14))
+        {
+          ret = WGLEW_3DL_stereo_control;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"ARB_", 4))
+      {
+#ifdef WGL_ARB_buffer_region
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"buffer_region", 13))
+        {
+          ret = WGLEW_ARB_buffer_region;
+          continue;
+        }
+#endif
+#ifdef WGL_ARB_extensions_string
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"extensions_string", 17))
+        {
+          ret = WGLEW_ARB_extensions_string;
+          continue;
+        }
+#endif
+#ifdef WGL_ARB_make_current_read
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"make_current_read", 17))
+        {
+          ret = WGLEW_ARB_make_current_read;
+          continue;
+        }
+#endif
+#ifdef WGL_ARB_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = WGLEW_ARB_multisample;
+          continue;
+        }
+#endif
+#ifdef WGL_ARB_pbuffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pbuffer", 7))
+        {
+          ret = WGLEW_ARB_pbuffer;
+          continue;
+        }
+#endif
+#ifdef WGL_ARB_pixel_format
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_format", 12))
+        {
+          ret = WGLEW_ARB_pixel_format;
+          continue;
+        }
+#endif
+#ifdef WGL_ARB_pixel_format_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_format_float", 18))
+        {
+          ret = WGLEW_ARB_pixel_format_float;
+          continue;
+        }
+#endif
+#ifdef WGL_ARB_render_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"render_texture", 14))
+        {
+          ret = WGLEW_ARB_render_texture;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"ATI_", 4))
+      {
+#ifdef WGL_ATI_pixel_format_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_format_float", 18))
+        {
+          ret = WGLEW_ATI_pixel_format_float;
+          continue;
+        }
+#endif
+#ifdef WGL_ATI_render_texture_rectangle
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"render_texture_rectangle", 24))
+        {
+          ret = WGLEW_ATI_render_texture_rectangle;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"EXT_", 4))
+      {
+#ifdef WGL_EXT_depth_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"depth_float", 11))
+        {
+          ret = WGLEW_EXT_depth_float;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_display_color_table
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"display_color_table", 19))
+        {
+          ret = WGLEW_EXT_display_color_table;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_extensions_string
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"extensions_string", 17))
+        {
+          ret = WGLEW_EXT_extensions_string;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_framebuffer_sRGB
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framebuffer_sRGB", 16))
+        {
+          ret = WGLEW_EXT_framebuffer_sRGB;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_make_current_read
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"make_current_read", 17))
+        {
+          ret = WGLEW_EXT_make_current_read;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = WGLEW_EXT_multisample;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_pbuffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pbuffer", 7))
+        {
+          ret = WGLEW_EXT_pbuffer;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_pixel_format
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_format", 12))
+        {
+          ret = WGLEW_EXT_pixel_format;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_pixel_format_packed_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_format_packed_float", 25))
+        {
+          ret = WGLEW_EXT_pixel_format_packed_float;
+          continue;
+        }
+#endif
+#ifdef WGL_EXT_swap_control
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_control", 12))
+        {
+          ret = WGLEW_EXT_swap_control;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"I3D_", 4))
+      {
+#ifdef WGL_I3D_digital_video_control
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"digital_video_control", 21))
+        {
+          ret = WGLEW_I3D_digital_video_control;
+          continue;
+        }
+#endif
+#ifdef WGL_I3D_gamma
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"gamma", 5))
+        {
+          ret = WGLEW_I3D_gamma;
+          continue;
+        }
+#endif
+#ifdef WGL_I3D_genlock
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"genlock", 7))
+        {
+          ret = WGLEW_I3D_genlock;
+          continue;
+        }
+#endif
+#ifdef WGL_I3D_image_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"image_buffer", 12))
+        {
+          ret = WGLEW_I3D_image_buffer;
+          continue;
+        }
+#endif
+#ifdef WGL_I3D_swap_frame_lock
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_frame_lock", 15))
+        {
+          ret = WGLEW_I3D_swap_frame_lock;
+          continue;
+        }
+#endif
+#ifdef WGL_I3D_swap_frame_usage
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_frame_usage", 16))
+        {
+          ret = WGLEW_I3D_swap_frame_usage;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"NV_", 3))
+      {
+#ifdef WGL_NV_float_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"float_buffer", 12))
+        {
+          ret = WGLEW_NV_float_buffer;
+          continue;
+        }
+#endif
+#ifdef WGL_NV_gpu_affinity
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"gpu_affinity", 12))
+        {
+          ret = WGLEW_NV_gpu_affinity;
+          continue;
+        }
+#endif
+#ifdef WGL_NV_render_depth_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"render_depth_texture", 20))
+        {
+          ret = WGLEW_NV_render_depth_texture;
+          continue;
+        }
+#endif
+#ifdef WGL_NV_render_texture_rectangle
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"render_texture_rectangle", 24))
+        {
+          ret = WGLEW_NV_render_texture_rectangle;
+          continue;
+        }
+#endif
+#ifdef WGL_NV_vertex_array_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_range", 18))
+        {
+          ret = WGLEW_NV_vertex_array_range;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"OML_", 4))
+      {
+#ifdef WGL_OML_sync_control
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"sync_control", 12))
+        {
+          ret = WGLEW_OML_sync_control;
+          continue;
+        }
+#endif
+      }
+    }
+    ret = (len == 0);
+  }
+  return ret;
+}
+
+#elif !defined(__APPLE__) || defined(GLEW_APPLE_GLX)
+
+#if defined(GLEW_MX)
+GLboolean glxewContextIsSupported (GLXEWContext* ctx, const char* name)
+#else
+GLboolean glxewIsSupported (const char* name)
+#endif
+{
+  GLubyte* pos = (GLubyte*)name;
+  GLuint len = _glewStrLen(pos);
+  GLboolean ret = GL_TRUE;
+  while (ret && len > 0)
+  {
+    if(_glewStrSame1(&pos, &len, (const GLubyte*)"GLX_", 4))
+    {
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"VERSION_", 8))
+      {
+#ifdef GLX_VERSION_1_2
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"1_2", 3))
+        {
+          ret = GLXEW_VERSION_1_2;
+          continue;
+        }
+#endif
+#ifdef GLX_VERSION_1_3
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"1_3", 3))
+        {
+          ret = GLXEW_VERSION_1_3;
+          continue;
+        }
+#endif
+#ifdef GLX_VERSION_1_4
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"1_4", 3))
+        {
+          ret = GLXEW_VERSION_1_4;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"3DFX_", 5))
+      {
+#ifdef GLX_3DFX_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = GLXEW_3DFX_multisample;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"ARB_", 4))
+      {
+#ifdef GLX_ARB_fbconfig_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fbconfig_float", 14))
+        {
+          ret = GLXEW_ARB_fbconfig_float;
+          continue;
+        }
+#endif
+#ifdef GLX_ARB_get_proc_address
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"get_proc_address", 16))
+        {
+          ret = GLXEW_ARB_get_proc_address;
+          continue;
+        }
+#endif
+#ifdef GLX_ARB_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = GLXEW_ARB_multisample;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"ATI_", 4))
+      {
+#ifdef GLX_ATI_pixel_format_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixel_format_float", 18))
+        {
+          ret = GLXEW_ATI_pixel_format_float;
+          continue;
+        }
+#endif
+#ifdef GLX_ATI_render_texture
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"render_texture", 14))
+        {
+          ret = GLXEW_ATI_render_texture;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"EXT_", 4))
+      {
+#ifdef GLX_EXT_fbconfig_packed_float
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fbconfig_packed_float", 21))
+        {
+          ret = GLXEW_EXT_fbconfig_packed_float;
+          continue;
+        }
+#endif
+#ifdef GLX_EXT_framebuffer_sRGB
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"framebuffer_sRGB", 16))
+        {
+          ret = GLXEW_EXT_framebuffer_sRGB;
+          continue;
+        }
+#endif
+#ifdef GLX_EXT_import_context
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"import_context", 14))
+        {
+          ret = GLXEW_EXT_import_context;
+          continue;
+        }
+#endif
+#ifdef GLX_EXT_scene_marker
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"scene_marker", 12))
+        {
+          ret = GLXEW_EXT_scene_marker;
+          continue;
+        }
+#endif
+#ifdef GLX_EXT_texture_from_pixmap
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"texture_from_pixmap", 19))
+        {
+          ret = GLXEW_EXT_texture_from_pixmap;
+          continue;
+        }
+#endif
+#ifdef GLX_EXT_visual_info
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"visual_info", 11))
+        {
+          ret = GLXEW_EXT_visual_info;
+          continue;
+        }
+#endif
+#ifdef GLX_EXT_visual_rating
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"visual_rating", 13))
+        {
+          ret = GLXEW_EXT_visual_rating;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"MESA_", 5))
+      {
+#ifdef GLX_MESA_agp_offset
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"agp_offset", 10))
+        {
+          ret = GLXEW_MESA_agp_offset;
+          continue;
+        }
+#endif
+#ifdef GLX_MESA_copy_sub_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"copy_sub_buffer", 15))
+        {
+          ret = GLXEW_MESA_copy_sub_buffer;
+          continue;
+        }
+#endif
+#ifdef GLX_MESA_pixmap_colormap
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pixmap_colormap", 15))
+        {
+          ret = GLXEW_MESA_pixmap_colormap;
+          continue;
+        }
+#endif
+#ifdef GLX_MESA_release_buffers
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"release_buffers", 15))
+        {
+          ret = GLXEW_MESA_release_buffers;
+          continue;
+        }
+#endif
+#ifdef GLX_MESA_set_3dfx_mode
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"set_3dfx_mode", 13))
+        {
+          ret = GLXEW_MESA_set_3dfx_mode;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"NV_", 3))
+      {
+#ifdef GLX_NV_float_buffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"float_buffer", 12))
+        {
+          ret = GLXEW_NV_float_buffer;
+          continue;
+        }
+#endif
+#ifdef GLX_NV_vertex_array_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"vertex_array_range", 18))
+        {
+          ret = GLXEW_NV_vertex_array_range;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"OML_", 4))
+      {
+#ifdef GLX_OML_swap_method
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_method", 11))
+        {
+          ret = GLXEW_OML_swap_method;
+          continue;
+        }
+#endif
+#if defined(GLX_OML_sync_control) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
+#include <inttypes.h>
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"sync_control", 12))
+        {
+          ret = GLXEW_OML_sync_control;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SGIS_", 5))
+      {
+#ifdef GLX_SGIS_blended_overlay
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"blended_overlay", 15))
+        {
+          ret = GLXEW_SGIS_blended_overlay;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIS_color_range
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"color_range", 11))
+        {
+          ret = GLXEW_SGIS_color_range;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIS_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"multisample", 11))
+        {
+          ret = GLXEW_SGIS_multisample;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIS_shared_multisample
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"shared_multisample", 18))
+        {
+          ret = GLXEW_SGIS_shared_multisample;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SGIX_", 5))
+      {
+#ifdef GLX_SGIX_fbconfig
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"fbconfig", 8))
+        {
+          ret = GLXEW_SGIX_fbconfig;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIX_hyperpipe
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"hyperpipe", 9))
+        {
+          ret = GLXEW_SGIX_hyperpipe;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIX_pbuffer
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"pbuffer", 7))
+        {
+          ret = GLXEW_SGIX_pbuffer;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIX_swap_barrier
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_barrier", 12))
+        {
+          ret = GLXEW_SGIX_swap_barrier;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIX_swap_group
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_group", 10))
+        {
+          ret = GLXEW_SGIX_swap_group;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIX_video_resize
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"video_resize", 12))
+        {
+          ret = GLXEW_SGIX_video_resize;
+          continue;
+        }
+#endif
+#ifdef GLX_SGIX_visual_select_group
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"visual_select_group", 19))
+        {
+          ret = GLXEW_SGIX_visual_select_group;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SGI_", 4))
+      {
+#ifdef GLX_SGI_cushion
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"cushion", 7))
+        {
+          ret = GLXEW_SGI_cushion;
+          continue;
+        }
+#endif
+#ifdef GLX_SGI_make_current_read
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"make_current_read", 17))
+        {
+          ret = GLXEW_SGI_make_current_read;
+          continue;
+        }
+#endif
+#ifdef GLX_SGI_swap_control
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"swap_control", 12))
+        {
+          ret = GLXEW_SGI_swap_control;
+          continue;
+        }
+#endif
+#ifdef GLX_SGI_video_sync
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"video_sync", 10))
+        {
+          ret = GLXEW_SGI_video_sync;
+          continue;
+        }
+#endif
+      }
+      if (_glewStrSame2(&pos, &len, (const GLubyte*)"SUN_", 4))
+      {
+#ifdef GLX_SUN_get_transparent_index
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"get_transparent_index", 21))
+        {
+          ret = GLXEW_SUN_get_transparent_index;
+          continue;
+        }
+#endif
+#ifdef GLX_SUN_video_resize
+        if (_glewStrSame3(&pos, &len, (const GLubyte*)"video_resize", 12))
+        {
+          ret = GLXEW_SUN_video_resize;
+          continue;
+        }
+#endif
+      }
+    }
+    ret = (len == 0);
+  }
+  return ret;
+}
+
+#endif /* _WIN32 */
diff --git a/test_conformance/gl/Jamfile b/test_conformance/gl/Jamfile
new file mode 100644
index 00000000..5064d5e5
--- /dev/null
+++ b/test_conformance/gl/Jamfile
@@ -0,0 +1,28 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_gl
+    : helpers.cpp
+      main.cpp
+      test_buffers.cpp
+      test_images_2D.cpp
+      test_images_2D_info.cpp
+      test_images_3D.cpp
+      test_images_3D_info.cpp
+      test_renderbuffer.cpp
+      test_renderbuffer_info.cpp
+    : <target-os>darwin:<source>setup_osx.cpp
+      <target-os>linux:<source>setup_x11.cpp
+      <target-os>windows:<source>setup_win32.cpp
+      <library>../..//glut
+      <library>../..//glew
+    ;
+
+install dist
+    : test_gl
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/gl
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/gl
+    ;
diff --git a/test_conformance/gl/Makefile b/test_conformance/gl/Makefile
new file mode 100644
index 00000000..521e0688
--- /dev/null
+++ b/test_conformance/gl/Makefile
@@ -0,0 +1,62 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		  helpers.cpp \
+		  test_buffers.cpp \
+		  test_fence_sync.cpp \
+		  test_images_1D.cpp \
+		  test_images_1Darray.cpp \
+		  test_images_2Darray.cpp \
+		  test_images_2D.cpp \
+		  test_images_3D.cpp \
+		  test_images_getinfo_common.cpp \
+		  test_images_read_common.cpp \
+		  test_images_write_common.cpp \
+		  test_renderbuffer.cpp \
+		  test_renderbuffer_info.cpp \
+      test_images_depth.cpp \
+      test_images_multisample.cpp \
+			test_image_methods.cpp \
+		  ../../test_common/gl/helpers.cpp \
+		  ../../test_common/gl/setup_osx.cpp \
+		  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/genericThread.cpp \
+		  ../../test_common/harness/imageHelpers.c \
+		  ../../test_common/harness/kernelHelpers.c \
+		  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/threadTesting.c 
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_gl
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/gl/common.h b/test_conformance/gl/common.h
new file mode 100644
index 00000000..36221da1
--- /dev/null
+++ b/test_conformance/gl/common.h
@@ -0,0 +1,78 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#include "testBase.h"
+
+typedef struct {
+  size_t width;
+  size_t height;
+  size_t depth;
+} sizevec_t;
+
+struct format {
+  GLenum internal;
+  GLenum formattype;
+  GLenum datatype;
+  ExplicitType type;
+};
+
+// These are the typically tested formats.
+
+static struct format common_formats[] = {
+#ifdef __APPLE__
+  { GL_RGBA8,        GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8,         kUChar },
+  { GL_RGBA8,        GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV,     kUChar },
+  { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV,     kUChar },
+  // { GL_RGB10,        GL_BGRA,             GL_UNSIGNED_INT_2_10_10_10_REV,  kFloat },
+#endif
+  { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,                kUChar },
+  { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,               kUShort },
+  { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                         kChar },
+  { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                        kShort },
+  { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                          kInt },
+  { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,                kUChar },
+  { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,               kUShort },
+  { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,                 kUInt },
+  { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                        kFloat },
+  { GL_RGBA16F_ARB,  GL_RGBA,             GL_HALF_FLOAT,                   kHalf }
+};
+
+#ifdef GL_VERSION_3_2
+static struct format depth_formats[] = {
+  { GL_DEPTH_COMPONENT16,  GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,                 kUShort },
+  { GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT,                          kFloat },
+  { GL_DEPTH24_STENCIL8,   GL_DEPTH_STENCIL,   GL_UNSIGNED_INT_24_8,              kUInt },
+  { GL_DEPTH32F_STENCIL8,  GL_DEPTH_STENCIL,   GL_FLOAT_32_UNSIGNED_INT_24_8_REV, kFloat },
+};
+#endif
+
+int test_images_write_common(cl_device_id device, cl_context context,
+  cl_command_queue queue, struct format* formats, size_t nformats,
+  GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes );
+
+int test_images_read_common( cl_device_id device, cl_context context,
+  cl_command_queue queue, struct format* formats, size_t nformats,
+  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes );
+
+int test_images_get_info_common( cl_device_id device, cl_context context,
+  cl_command_queue queue, struct format* formats, size_t nformats,
+  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes );
+
+int is_rgb_101010_supported( cl_context context, GLenum gl_target );
+
+#endif // __COMMON_H__
diff --git a/test_conformance/gl/helpers.cpp b/test_conformance/gl/helpers.cpp
new file mode 100644
index 00000000..864059ba
--- /dev/null
+++ b/test_conformance/gl/helpers.cpp
@@ -0,0 +1,657 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#if defined( __APPLE__ )
+        #include <OpenGL/glu.h>
+#else
+        #include <GL/glu.h>
+#endif
+
+const char *get_kernel_suffix( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+    case CL_UNORM_INT24:
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+        case CL_UNORM_INT_101010:
+            return "f";
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+            return "i";
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+            return "ui";
+        default:
+      log_error("Test error: unsupported kernel suffix for image_channel_data_type 0x%X\n",format->image_channel_data_type);
+            return "";
+    }
+}
+
+ExplicitType get_read_kernel_type( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+    case CL_UNORM_INT24:
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+        case CL_UNORM_INT_101010:
+#ifdef GL_VERSION_3_2
+    case CL_DEPTH:
+#endif
+            return kFloat;
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+            return kInt;
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+            return kUInt;
+        default:
+      log_error("Test error: unsupported kernel suffix for image_channel_data_type 0x%X\n",format->image_channel_data_type);
+            return kNumExplicitTypes;
+    }
+}
+
+ExplicitType get_write_kernel_type( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+            return kFloat;
+        case CL_UNORM_INT16:
+            return kFloat;
+    case CL_UNORM_INT24:
+      return kFloat;
+        case CL_SNORM_INT8:
+            return kFloat;
+        case CL_SNORM_INT16:
+            return kFloat;
+        case CL_HALF_FLOAT:
+            return kHalf;
+        case CL_FLOAT:
+            return kFloat;
+        case CL_SIGNED_INT8:
+            return kChar;
+        case CL_SIGNED_INT16:
+            return kShort;
+        case CL_SIGNED_INT32:
+            return kInt;
+        case CL_UNSIGNED_INT8:
+            return kUChar;
+        case CL_UNSIGNED_INT16:
+            return kUShort;
+        case CL_UNSIGNED_INT32:
+            return kUInt;
+        case CL_UNORM_INT_101010:
+            return kFloat;
+#ifdef GL_VERSION_3_2
+    case CL_DEPTH:
+      return kFloat;
+#endif
+        default:
+            return kInt;
+    }
+}
+
+const char* get_write_conversion( cl_image_format *format, ExplicitType type )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_HALF_FLOAT:
+        case CL_FLOAT:
+        case CL_UNORM_INT_101010:
+        case CL_UNORM_INT24:
+            if(type != kFloat) return "convert_float4";
+            break;
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+            if(type != kInt) return "convert_int4";
+            break;
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+            if(type != kUInt) return "convert_uint4";
+            break;
+        default:
+            return "";
+    }
+    return "";
+}
+
+// The only three input types to this function are kInt, kUInt and kFloat, due to the way we set up our tests
+// The output types, though, are pretty much anything valid for GL to receive
+
+#define DOWNSCALE_INTEGER_CASE( enum, type, bitShift )    \
+    case enum:    \
+    {        \
+        cl_##type *dst = new cl_##type[ numPixels * 4 ]; \
+        for( size_t i = 0; i < numPixels * 4; i++ ) \
+            dst[ i ] = src[ i ];    \
+        return (char *)dst;        \
+    }
+
+#define UPSCALE_FLOAT_CASE( enum, type, typeMax )    \
+    case enum:    \
+    {        \
+        cl_##type *dst = new cl_##type[ numPixels * 4 ]; \
+        for( size_t i = 0; i < numPixels * 4; i++ ) \
+            dst[ i ] = (cl_##type)( src[ i ] * typeMax );    \
+        return (char *)dst;        \
+    }
+
+char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType inType, ExplicitType outType, size_t channelNum, GLenum glDataType )
+{
+#ifdef DEBUG
+    log_info( "- Converting from input type '%s' to output type '%s'\n",
+             get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+#endif
+
+    if( inType == outType )
+    {
+        char *outData = new char[ numPixels * channelNum * get_explicit_type_size(outType) ] ; // sizeof( cl_int ) ];
+        if (glDataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) {
+            for (size_t i = 0; i < numPixels; ++i) {
+                ((cl_float*)outData)[i] = ((cl_float*)inputBuffer)[2 * i];
+            }
+        }
+        else {
+            memcpy( outData, inputBuffer, numPixels * channelNum * get_explicit_type_size(inType)  );
+        }
+        return outData;
+    }
+    else if( inType == kChar )
+    {
+        cl_char *src = (cl_char *)inputBuffer;
+
+        switch( outType )
+        {
+            case kInt:
+            {
+                cl_int *outData = new cl_int[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_int)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_float)src[ i ] / 127.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kUChar )
+    {
+        cl_uchar *src = (cl_uchar *)inputBuffer;
+
+        switch( outType )
+        {
+            case kUInt:
+            {
+                cl_uint *outData = new cl_uint[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_uint)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_float)(src[ i ]) / 256.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kShort )
+    {
+        cl_short *src = (cl_short *)inputBuffer;
+
+        switch( outType )
+        {
+            case kInt:
+            {
+                cl_int *outData = new cl_int[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_int)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_float)src[ i ] / 32768.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kUShort )
+    {
+        cl_ushort *src = (cl_ushort *)inputBuffer;
+
+        switch( outType )
+        {
+            case kUInt:
+            {
+                cl_uint *outData = new cl_uint[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_uint)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_float)(src[ i ]) / 65535.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kInt )
+    {
+        cl_int *src = (cl_int *)inputBuffer;
+
+        switch( outType )
+        {
+                DOWNSCALE_INTEGER_CASE( kShort, short, 16 )
+                DOWNSCALE_INTEGER_CASE( kChar, char, 24 )
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_float)fmaxf( (float)src[ i ] / 2147483647.f, -1.f );
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kUInt )
+    {
+        cl_uint *src = (cl_uint *)inputBuffer;
+
+        switch( outType )
+        {
+                DOWNSCALE_INTEGER_CASE( kUShort, ushort, 16 )
+                DOWNSCALE_INTEGER_CASE( kUChar, uchar, 24 )
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * channelNum ];
+                const cl_float MaxValue = (glDataType == GL_UNSIGNED_INT_24_8) ? 16777215.f : 4294967295.f;
+                const cl_uint ShiftBits = (glDataType == GL_UNSIGNED_INT_24_8) ? 8 : 0;
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = (cl_float)(src[ i ] >> ShiftBits) / MaxValue;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kHalf )
+    {
+        cl_half *src = (cl_half *)inputBuffer;
+
+        switch( outType )
+        {
+            case kFloat:
+            {
+                cl_float *outData = new cl_float[ numPixels * channelNum ];
+                for( size_t i = 0; i < numPixels * channelNum; i++ )
+                {
+                    outData[ i ] = convert_half_to_float(src[ i ]);
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else
+    {
+        cl_float *src = (cl_float *)inputBuffer;
+
+        switch( outType )
+        {
+                UPSCALE_FLOAT_CASE( kChar, char, 127.f )
+                UPSCALE_FLOAT_CASE( kUChar, uchar, 255.f )
+                UPSCALE_FLOAT_CASE( kShort, short, 32767.f )
+                UPSCALE_FLOAT_CASE( kUShort, ushort, 65535.f )
+                UPSCALE_FLOAT_CASE( kInt, int, 2147483647.f )
+                UPSCALE_FLOAT_CASE( kUInt, uint, 4294967295.f )
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+
+    return NULL;
+}
+
+int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t typeSize )
+{
+    return validate_integer_results( expectedResults, actualResults, width, height, sampleNum, 0, typeSize );
+}
+
+int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t typeSize )
+{
+    char *expected = (char *)expectedResults;
+    char *actual = (char *)actualResults;
+  for ( size_t s = 0; s < sampleNum; s++ )
+  {
+    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    {
+        for( size_t y = 0; y < height; y++ )
+        {
+            for( size_t x = 0; x < width; x++ )
+            {
+                if( memcmp( expected, actual, typeSize * 4 ) != 0 )
+                {
+                    char scratch[ 1024 ];
+
+                    if( depth == 0 )
+              log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)s );
+                    else
+              log_error( "ERROR: Data sample %d,%d,%d,%d did not validate!\n", (int)x, (int)y, (int)z, (int)s );
+                    log_error( "\tExpected: %s\n", GetDataVectorString( expected, typeSize, 4, scratch ) );
+                    log_error( "\t  Actual: %s\n", GetDataVectorString( actual, typeSize, 4, scratch ) );
+                    return -1;
+                }
+                expected += typeSize * 4;
+                actual += typeSize * 4;
+            }
+        }
+    }
+  }
+
+    return 0;
+}
+
+int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t channelNum  )
+{
+    return validate_float_results( expectedResults, actualResults, width, height, sampleNum, 0, channelNum );
+}
+
+int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t channelNum )
+{
+    cl_float *expected = (cl_float *)expectedResults;
+    cl_float *actual = (cl_float *)actualResults;
+  for ( size_t s = 0; s < sampleNum; s++ )
+  {
+    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    {
+        for( size_t y = 0; y < height; y++ )
+        {
+            for( size_t x = 0; x < width; x++ )
+            {
+                float err = 0.f;
+          for( size_t i = 0; i < channelNum; i++ )
+                {
+                    float error = fabsf( expected[ i ] - actual[ i ] );
+                    if( error > err )
+                        err = error;
+                }
+
+                if( err > 1.f / 127.f ) // Max expected range of error if we converted from an 8-bit integer to a normalized float
+                {
+                    if( depth == 0 )
+              log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)s );
+                    else
+              log_error( "ERROR: Data sample %d,%d,%d,%d did not validate!\n", (int)x, (int)y, (int)z, (int)s );
+
+            if (channelNum == 4)
+            {
+                    log_error( "\tExpected: %f %f %f %f\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                    log_error( "\t        : %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                    log_error( "\t  Actual: %f %f %f %f\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                    log_error( "\t        : %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+            }
+            else if(channelNum == 1)
+            {
+              log_error( "\tExpected: %f\n", expected[ 0 ] );
+              log_error( "\t        : %a\n", expected[ 0 ]  );
+              log_error( "\t  Actual: %f\n", actual[ 0 ] );
+              log_error( "\t        : %a\n", actual[ 0 ] );
+            }
+                    return -1;
+                }
+          expected += channelNum;
+          actual += channelNum;
+            }
+        }
+    }
+  }
+
+    return 0;
+}
+
+int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum )
+{
+    return validate_float_results_rgb_101010( expectedResults, actualResults, width, height, sampleNum, 0 );
+}
+
+int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum )
+{
+    cl_float *expected = (cl_float *)expectedResults;
+    cl_float *actual = (cl_float *)actualResults;
+  for ( size_t s = 0; s < sampleNum; s++ )
+  {
+    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    {
+        for( size_t y = 0; y < height; y++ )
+        {
+            for( size_t x = 0; x < width; x++ )
+            {
+                float err = 0.f;
+                for( size_t i = 0; i < 3; i++ ) // skip the fourth channel
+                {
+                    float error = fabsf( expected[ i ] - actual[ i ] );
+                    if( error > err )
+                        err = error;
+                }
+
+                if( err > 1.f / 127.f ) // Max expected range of error if we converted from an 8-bit integer to a normalized float
+                {
+                    if( depth == 0 )
+              log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)s );
+                    else
+              log_error( "ERROR: Data sample %d,%d,%d,%d did not validate!\n", (int)x, (int)y, (int)z, (int)s );
+                    log_error( "\tExpected: %f %f %f\n", expected[ 0 ], expected[ 1 ], expected[ 2 ] );
+                    log_error( "\t        : %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ] );
+                    log_error( "\t  Actual: %f %f %f\n", actual[ 0 ], actual[ 1 ], actual[ 2 ] );
+                    log_error( "\t        : %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ] );
+                    return -1;
+                }
+                expected += 4;
+                actual += 4;
+            }
+        }
+    }
+  }
+
+    return 0;
+}
+
+int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type, GLuint expected_gl_name,
+                  GLenum expected_cl_gl_texture_target, GLint expected_cl_gl_mipmap_level)
+{
+  cl_gl_object_type object_type;
+  GLuint object_name;
+  GLenum texture_target;
+  GLint mipmap_level;
+    int error;
+
+  error = (*clGetGLObjectInfo_ptr)(mem, &object_type, &object_name);
+  test_error( error, "clGetGLObjectInfo failed");
+  if (object_type != expected_cl_gl_type) {
+    log_error("clGetGLObjectInfo did not return expected object type: expected %d, got %d.\n", expected_cl_gl_type, object_type);
+    return -1;
+  }
+  if (object_name != expected_gl_name) {
+    log_error("clGetGLObjectInfo did not return expected object name: expected %d, got %d.\n", expected_gl_name, object_name);
+    return -1;
+  }
+
+  // If we're dealing with a buffer or render buffer, we are done.
+
+  if (object_type == CL_GL_OBJECT_BUFFER || object_type == CL_GL_OBJECT_RENDERBUFFER) {
+    return 0;
+  }
+
+  // Otherwise, it's a texture-based object and requires a bit more checking.
+
+  error = (*clGetGLTextureInfo_ptr)(mem, CL_GL_TEXTURE_TARGET, sizeof(texture_target), &texture_target, NULL);
+  test_error( error, "clGetGLTextureInfo for CL_GL_TEXTURE_TARGET failed");
+
+  if (texture_target != expected_cl_gl_texture_target) {
+    log_error("clGetGLTextureInfo did not return expected texture target: expected %d, got %d.\n", expected_cl_gl_texture_target, texture_target);
+    return -1;
+  }
+
+  error = (*clGetGLTextureInfo_ptr)(mem, CL_GL_MIPMAP_LEVEL, sizeof(mipmap_level), &mipmap_level, NULL);
+  test_error( error, "clGetGLTextureInfo for CL_GL_MIPMAP_LEVEL failed");
+
+  if (mipmap_level != expected_cl_gl_mipmap_level) {
+    log_error("clGetGLTextureInfo did not return expected mipmap level: expected %d, got %d.\n", expected_cl_gl_mipmap_level, mipmap_level);
+    return -1;
+  }
+
+  return 0;
+}
+
+bool CheckGLIntegerExtensionSupport()
+{
+    // Get the OpenGL version and supported extensions
+    const GLubyte *glVersion = glGetString(GL_VERSION);
+    const GLubyte *glExtensionList = glGetString(GL_EXTENSIONS);
+
+    // Check if the OpenGL vrsion is 3.0 or grater or GL_EXT_texture_integer is supported
+    return (((glVersion[0] - '0') >= 3) || (strstr((const char*)glExtensionList, "GL_EXT_texture_integer")));
+}
+
+int is_rgb_101010_supported( cl_context context, GLenum gl_target )
+{
+    cl_image_format formatList[ 128 ];
+    cl_uint formatCount = 0;
+    unsigned int i;
+    int error;
+
+    cl_mem_object_type image_type;
+
+    switch (get_base_gl_target(gl_target)) {
+        case GL_TEXTURE_1D:
+            image_type = CL_MEM_OBJECT_IMAGE1D;
+        case GL_TEXTURE_BUFFER:
+            image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
+            break;
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_2D:
+        case GL_COLOR_ATTACHMENT0:
+        case GL_RENDERBUFFER:
+        case GL_TEXTURE_CUBE_MAP:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+            image_type = CL_MEM_OBJECT_IMAGE2D;
+            break;
+        case GL_TEXTURE_3D:
+            image_type = CL_MEM_OBJECT_IMAGE3D;
+        case GL_TEXTURE_1D_ARRAY:
+            image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        case GL_TEXTURE_2D_ARRAY:
+            image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+            break;
+        default:
+            image_type = CL_MEM_OBJECT_IMAGE2D;
+    }
+
+    if ((error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
+                                                                                    image_type, 128, formatList,
+                                                                                    &formatCount ))) {
+        return error;
+    }
+
+    // Check if the RGB 101010 format is supported
+    for( i = 0; i < formatCount; i++ )
+    {
+        if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 )
+        {
+            return 1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/gl/main.cpp b/test_conformance/gl/main.cpp
new file mode 100644
index 00000000..5b223784
--- /dev/null
+++ b/test_conformance/gl/main.cpp
@@ -0,0 +1,453 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#endif
+
+#include "procs.h"
+#include "../../test_common/gl/setup.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/parseParameters.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+static cl_context        sCurrentContext = NULL;
+
+
+#define TEST_FN_REDIRECT( fn )    redirect_##fn
+#define TEST_FN_REDIRECTOR( fn ) \
+int redirect_##fn(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )    \
+{ \
+    int error; \
+    clCommandQueueWrapper realQueue = clCreateCommandQueueWithProperties( sCurrentContext, device, 0, &error ); \
+    test_error( error, "Unable to create command queue" );    \
+    return fn( device, sCurrentContext, realQueue, numElements ); \
+}
+
+// buffers:
+TEST_FN_REDIRECTOR( test_buffers )
+TEST_FN_REDIRECTOR( test_buffers_getinfo )
+
+// 1D images:
+TEST_FN_REDIRECTOR( test_images_read_1D )
+TEST_FN_REDIRECTOR( test_images_write_1D )
+TEST_FN_REDIRECTOR( test_images_1D_getinfo )
+
+// 1D image arrays:
+TEST_FN_REDIRECTOR( test_images_read_1Darray )
+TEST_FN_REDIRECTOR( test_images_write_1Darray )
+TEST_FN_REDIRECTOR( test_images_1Darray_getinfo )
+
+// 2D images:
+TEST_FN_REDIRECTOR( test_images_read_2D )
+TEST_FN_REDIRECTOR( test_images_read_cube )
+TEST_FN_REDIRECTOR( test_images_write )
+TEST_FN_REDIRECTOR( test_images_write_cube )
+TEST_FN_REDIRECTOR( test_images_2D_getinfo )
+TEST_FN_REDIRECTOR( test_images_cube_getinfo )
+
+// 2D image arrays:
+TEST_FN_REDIRECTOR( test_images_read_2Darray )
+TEST_FN_REDIRECTOR( test_images_write_2Darray )
+TEST_FN_REDIRECTOR( test_images_2Darray_getinfo )
+
+// 3D images:
+TEST_FN_REDIRECTOR( test_images_read_3D )
+TEST_FN_REDIRECTOR( test_images_write_3D )
+TEST_FN_REDIRECTOR( test_images_3D_getinfo )
+
+#ifdef GL_VERSION_3_2
+
+TEST_FN_REDIRECTOR( test_images_read_texturebuffer )
+TEST_FN_REDIRECTOR( test_images_write_texturebuffer )
+TEST_FN_REDIRECTOR( test_images_texturebuffer_getinfo )
+
+// depth textures
+TEST_FN_REDIRECTOR( test_images_read_2D_depth )
+TEST_FN_REDIRECTOR( test_images_write_2D_depth )
+TEST_FN_REDIRECTOR( test_images_read_2Darray_depth )
+TEST_FN_REDIRECTOR( test_images_write_2Darray_depth )
+
+TEST_FN_REDIRECTOR( test_images_read_2D_multisample )
+TEST_FN_REDIRECTOR( test_images_read_2Darray_multisample )
+TEST_FN_REDIRECTOR( test_image_methods_depth );
+TEST_FN_REDIRECTOR( test_image_methods_multisample );
+#endif
+
+// Renderbuffer-backed images:
+TEST_FN_REDIRECTOR( test_renderbuffer_read )
+TEST_FN_REDIRECTOR( test_renderbuffer_write )
+TEST_FN_REDIRECTOR( test_renderbuffer_getinfo )
+
+TEST_FN_REDIRECTOR( test_fence_sync )
+
+basefn    basefn_list[] = {
+    TEST_FN_REDIRECT( test_buffers ),
+  TEST_FN_REDIRECT( test_buffers_getinfo ),
+
+  TEST_FN_REDIRECT( test_images_read_1D ),
+  TEST_FN_REDIRECT( test_images_write_1D ),
+  TEST_FN_REDIRECT( test_images_1D_getinfo ),
+
+  TEST_FN_REDIRECT( test_images_read_1Darray ),
+  TEST_FN_REDIRECT( test_images_write_1Darray ),
+  TEST_FN_REDIRECT( test_images_1Darray_getinfo ),
+
+    TEST_FN_REDIRECT( test_images_read_2D ),
+  TEST_FN_REDIRECT( test_images_write ),
+  TEST_FN_REDIRECT( test_images_2D_getinfo ),
+
+    TEST_FN_REDIRECT( test_images_read_cube ),
+  TEST_FN_REDIRECT( test_images_write_cube ),
+  TEST_FN_REDIRECT( test_images_cube_getinfo ),
+
+  TEST_FN_REDIRECT( test_images_read_2Darray ),
+  TEST_FN_REDIRECT( test_images_write_2Darray),
+  TEST_FN_REDIRECT( test_images_2Darray_getinfo ),
+
+    TEST_FN_REDIRECT( test_images_read_3D ),
+  TEST_FN_REDIRECT( test_images_write_3D ),
+  TEST_FN_REDIRECT( test_images_3D_getinfo ),
+
+    TEST_FN_REDIRECT( test_renderbuffer_read ),
+     TEST_FN_REDIRECT( test_renderbuffer_write ),
+  TEST_FN_REDIRECT( test_renderbuffer_getinfo )
+};
+
+basefn    basefn_list32[] = {
+  TEST_FN_REDIRECT( test_images_read_texturebuffer ),
+  TEST_FN_REDIRECT( test_images_write_texturebuffer ),
+  TEST_FN_REDIRECT( test_images_texturebuffer_getinfo ),
+
+  TEST_FN_REDIRECT( test_fence_sync ),
+  TEST_FN_REDIRECT( test_images_read_2D_depth ),
+  TEST_FN_REDIRECT( test_images_write_2D_depth ),
+  TEST_FN_REDIRECT( test_images_read_2Darray_depth ),
+  TEST_FN_REDIRECT( test_images_write_2Darray_depth ),
+  TEST_FN_REDIRECT( test_images_read_2D_multisample ),
+  TEST_FN_REDIRECT( test_images_read_2Darray_multisample ),
+  TEST_FN_REDIRECT( test_image_methods_depth ),
+  TEST_FN_REDIRECT( test_image_methods_multisample)
+};
+
+const char    *basefn_names[] = {
+    "buffers",
+  "buffers_getinfo",
+
+  "images_read_1D",
+  "images_write_1D",
+  "images_1D_getinfo",
+
+  "images_read_1Darray",
+  "images_write_1Darray",
+  "images_1Darray_getinfo",
+
+    "images_read", /* 2D */
+  "images_write",
+  "images_2D_getinfo",
+
+     "images_read_cube",
+  "images_write_cube",
+  "images_cube_getinfo",
+
+  "images_read_2Darray",
+  "images_write_2Darray",
+  "images_2Darray_getinfo",
+
+    "images_read_3D",
+  "images_write_3D",
+  "images_3D_getinfo",
+
+    "renderbuffer_read",
+    "renderbuffer_write",
+  "renderbuffer_getinfo",
+};
+
+const char    *basefn_names32[] = {
+  "images_read_texturebuffer",
+  "images_write_texturebuffer",
+  "images_texturebuffer_getinfo",
+
+    "fence_sync",
+  "images_read_2D_depth",
+  "images_write_2D_depth",
+  "images_read_2D_array_depth",
+  "images_write_2D_array_depth",
+
+  "images_read_2D_multisample",
+  "images_read_2D_array_multisample",
+  "image_methods_depth",
+  "image_methods_multisample",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+ct_assert((sizeof(basefn_names32) / sizeof(basefn_names32[0])) == (sizeof(basefn_list32) / sizeof(basefn_list32[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+int num_fns32 = sizeof(basefn_names32) / sizeof(char *);
+
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = true;
+
+int main(int argc, const char *argv[])
+{
+  int error = 0;
+  int numErrors = 0;
+
+  test_start();
+  argc = parseCustomParam(argc, argv);
+  if (argc == -1)
+  {
+    test_finish ();
+    return -1;
+  }	
+
+  cl_device_type requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
+  checkDeviceTypeOverride(&requestedDeviceType);
+
+  /* Do we have a CPU/GPU specification? */
+  if( argc > 1 )
+  {
+    if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
+    {
+      requestedDeviceType = CL_DEVICE_TYPE_GPU;
+      argc--;
+  }
+    else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
+    {
+      requestedDeviceType = CL_DEVICE_TYPE_CPU;
+      argc--;
+    }
+    else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+    {
+      requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+      argc--;
+    }
+    else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+    {
+      requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
+      argc--;
+    }
+  }
+
+    if( argc > 1 && strcmp( argv[ 1 ], "-list" ) == 0 )
+    {
+        log_info( "Available 2.x tests:\n" );
+        for( int i = 0; i < num_fns; i++ )
+            log_info( "\t%s\n", basefn_names[ i ] );
+
+        log_info( "Available 3.2 tests:\n" );
+        for( int i = 0; i < num_fns32; i++ )
+            log_info( "\t%s\n", basefn_names32[ i ] );
+
+    log_info( "Note: Any 3.2 test names must follow 2.1 test names on the command line.\n" );
+    log_info( "Use environment variables to specify desired device.\n" );
+
+        test_finish();
+        return 0;
+    }
+
+  // Check to see if any 2.x or 3.2 test names were specified on the command line.
+  unsigned first_32_testname = 0;
+
+  for (int j=1; (j<argc) && (!first_32_testname); ++j)
+    for (int i=0;i<num_fns32;++i)
+      if (strcmp(basefn_names32[i],argv[j])==0) {
+        first_32_testname = j;
+        break;
+      }
+
+  // Create the environment for the test.
+    GLEnvironment *glEnv = GLEnvironment::Instance();
+
+  // Check if any devices of the requested type support CL/GL interop.
+  int supported = glEnv->SupportsCLGLInterop( requestedDeviceType );
+  if( supported == 0 ) {
+    log_info("Test not run because GL-CL interop is not supported for any devices of the requested type.\n");
+    test_finish();
+    return 0;
+  } else if ( supported == -1 ) {
+    log_error("Unable to setup the test or failed to determine if CL-GL interop is supported.\n");
+    test_finish();
+    return -1;
+  }
+
+  // Initialize function pointers.
+  error = init_clgl_ext();
+  if (error < 0) {
+    return error;
+  }
+
+  // OpenGL tests for non-3.2 ////////////////////////////////////////////////////////
+  if ((argc == 1) || (first_32_testname != 1)) {
+
+    // At least one device supports CL-GL interop, so init the test.
+    if( glEnv->Init( &argc, (char **)argv, CL_FALSE ) ) {
+      log_error("Failed to initialize the GL environment for this test.\n");
+      test_finish();
+      return -1;
+    }
+
+    // Create a context to use and then grab a device (or devices) from it
+    sCurrentContext = glEnv->CreateCLContext();
+    if( sCurrentContext == NULL )
+      {
+        log_error( "ERROR: Unable to obtain CL context from GL\n" );
+        test_finish();
+        return -1;
+      }
+
+    size_t numDevices = 0;
+    cl_device_id *deviceIDs;
+
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL, &numDevices);
+    if( error != CL_SUCCESS )
+      {
+        print_error( error, "Unable to get device count from context" );
+        test_finish();
+        return -1;
+      }
+    deviceIDs = (cl_device_id *)malloc(numDevices);
+    if (deviceIDs == NULL) {
+        print_error( error, "malloc failed" );
+        test_finish();
+        return -1;
+    }
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, numDevices, deviceIDs, NULL);
+    if( error != CL_SUCCESS ) {
+      print_error( error, "Unable to get device list from context" );
+      test_finish();
+      return -1;
+    }
+
+    numDevices /= sizeof(cl_device_id);
+
+    if (numDevices < 1) {
+      log_error("No devices found.\n");
+      test_finish();
+      return -1;
+    }
+
+    // Execute tests.
+    int argc_ = (first_32_testname) ? first_32_testname : argc;
+
+      for( size_t i = 0; i < numDevices; i++ ) {
+        log_info( "\nTesting OpenGL 2.x\n" );
+        if( printDeviceHeader( deviceIDs[ i ] ) != CL_SUCCESS ) {
+          test_finish();
+          return -1;
+        }
+
+        // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
+        error = parseAndCallCommandLineTests( argc_, argv, deviceIDs[ i ], num_fns, basefn_list, basefn_names, true, 0, 1024 );
+        if( error != 0 )
+          break;
+    }
+
+    numErrors += error;
+
+    // Clean-up.
+      free(deviceIDs);
+      clReleaseContext( sCurrentContext );
+      //delete glEnv;
+  }
+
+  // OpenGL 3.2 tests. ////////////////////////////////////////////////////////
+  if ((argc==1) || first_32_testname) {
+
+    // At least one device supports CL-GL interop, so init the test.
+    if( glEnv->Init( &argc, (char **)argv, CL_TRUE ) ) {
+      log_error("Failed to initialize the GL environment for this test.\n");
+      test_finish();
+      return -1;
+    }
+
+    // Create a context to use and then grab a device (or devices) from it
+    sCurrentContext = glEnv->CreateCLContext();
+    if( sCurrentContext == NULL ) {
+      log_error( "ERROR: Unable to obtain CL context from GL\n" );
+      test_finish();
+      return -1;
+    }
+
+    size_t numDevices = 0;
+    cl_device_id *deviceIDs;
+
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL, &numDevices);
+    if( error != CL_SUCCESS ) {
+      print_error( error, "Unable to get device count from context" );
+      test_finish();
+      return -1;
+    }
+    deviceIDs = (cl_device_id *)malloc(numDevices);
+    if (deviceIDs == NULL) {
+        print_error( error, "malloc failed" );
+        test_finish();
+        return -1;
+    }
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, numDevices, deviceIDs, NULL);
+    if( error != CL_SUCCESS ) {
+      print_error( error, "Unable to get device list from context" );
+      test_finish();
+      return -1;
+    }
+
+    numDevices /= sizeof(cl_device_id);
+
+    if (numDevices < 1) {
+      log_error("No devices found.\n");
+      test_finish();
+      return -1;
+    }
+
+    int argc_ = (first_32_testname) ? 1 + (argc - first_32_testname) : argc;
+    const char** argv_ = (first_32_testname) ? &argv[first_32_testname-1] : argv;
+
+    // Execute the tests.
+      for( size_t i = 0; i < numDevices; i++ ) {
+        log_info( "\nTesting OpenGL 3.2\n" );
+        if( printDeviceHeader( deviceIDs[ i ] ) != CL_SUCCESS ) {
+          test_finish();
+          return -1;
+        }
+
+        // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
+        error = parseAndCallCommandLineTests( argc_, argv_, deviceIDs[ i ], num_fns32, basefn_list32, basefn_names32, true, 0, 1024 );
+        if( error != 0 )
+          break;
+    }
+
+    numErrors += error;
+
+    // Clean-up.
+      free(deviceIDs);
+      clReleaseContext( sCurrentContext );
+      delete glEnv;
+
+  }
+
+  //All done.
+  return numErrors;
+}
+
+
diff --git a/test_conformance/gl/procs.h b/test_conformance/gl/procs.h
new file mode 100644
index 00000000..57c5eefa
--- /dev/null
+++ b/test_conformance/gl/procs.h
@@ -0,0 +1,140 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/mt19937.h"
+
+
+#pragma mark -
+#pragma Misc tests
+
+extern int test_buffers( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_fence_sync( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+
+#pragma mark -
+#pragma mark Tead tests
+
+extern int test_images_read_2D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_read_1D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_read_texturebuffer( cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements );
+
+extern int test_images_read_1Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_read_2Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_read_cube( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_read_3D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_renderbuffer_read( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+#pragma mark -
+#pragma mark Write tests
+
+// 2D tests are the ones with no suffix:
+
+extern int test_images_write( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_write_cube( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_renderbuffer_write( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+// Here are the rest:
+
+extern int test_images_write_1D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_write_texturebuffer( cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements );
+
+extern int test_images_write_1Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_write_2Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+extern int test_images_write_3D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int num_elements );
+
+#pragma mark -
+#pragma mark Get info test entry points
+
+extern int test_buffers_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_1D_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_texturebuffer_getinfo( cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int numElements );
+
+extern int test_images_1Darray_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_2D_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_2Darray_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_cube_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_3D_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_read_2D_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_write_2D_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_read_2Darray_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int );
+
+extern int test_images_write_2Darray_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_read_2D_multisample( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
+
+extern int test_images_read_2Darray_multisample( cl_device_id device, cl_context context,
+  cl_command_queue queue, int );
+
+extern int test_image_methods_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int );
+
+extern int test_image_methods_multisample( cl_device_id device, cl_context context,
+  cl_command_queue queue, int );
+
+extern int test_renderbuffer_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements );
\ No newline at end of file
diff --git a/test_conformance/gl/testBase.h b/test_conformance/gl/testBase.h
new file mode 100644
index 00000000..6d445f7c
--- /dev/null
+++ b/test_conformance/gl/testBase.h
@@ -0,0 +1,65 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#include "../../test_common/gl/gl_headers.h"
+#include <CL/cl_gl.h>
+#else
+#include "../../test_common/gl/gl_headers.h"
+#endif
+
+#include "../../test_common/harness/imageHelpers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+#include "../../test_common/gl/helpers.h"
+
+extern const char *get_kernel_suffix( cl_image_format *format );
+extern const char *get_write_conversion( cl_image_format *format, ExplicitType type);
+extern ExplicitType get_read_kernel_type( cl_image_format *format );
+extern ExplicitType get_write_kernel_type( cl_image_format *format );
+
+extern char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType inType, ExplicitType outType, size_t channelNum, GLenum glDataType = 0);
+extern int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t typeSize );
+extern int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t typeSize );
+extern int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t channelNum );
+extern int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t channelNum );
+extern int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum );
+extern int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum );
+
+extern int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type, GLuint expected_gl_name,
+                             GLenum expected_cl_gl_texture_target, GLint expected_cl_gl_mipmap_level);
+
+extern bool CheckGLIntegerExtensionSupport();
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp
new file mode 100644
index 00000000..afe7e32e
--- /dev/null
+++ b/test_conformance/gl/test_buffers.cpp
@@ -0,0 +1,357 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+#if !defined (__APPLE__)
+    #include <CL/cl_gl.h>
+#endif
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+static const char *bufferKernelPattern =
+"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"     clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
+"     glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
+"}\n";
+
+#define TYPE_CASE( enum, type, range, offset )    \
+    case enum:    \
+    {                \
+        cl_##type *ptr = (cl_##type *)outData; \
+        for( i = 0; i < count; i++ ) \
+            ptr[ i ] = (cl_##type)( ( genrand_int32(d) & range ) - offset ); \
+        break; \
+    }
+
+void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
+{
+    size_t i;
+
+    switch( type )
+    {
+        case kBool:
+        {
+            bool *boolPtr = (bool *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                boolPtr[i] = ( genrand_int32(d) & 1 ) ? true : false;
+            }
+            break;
+        }
+
+        TYPE_CASE( kChar, char, 250, 127 )
+        TYPE_CASE( kUChar, uchar, 250, 0 )
+        TYPE_CASE( kShort, short, 65530, 32767 )
+        TYPE_CASE( kUShort, ushort, 65530, 0 )
+        TYPE_CASE( kInt, int, 0x0fffffff, 0x70000000 )
+        TYPE_CASE( kUInt, uint, 0x0fffffff, 0 )
+
+        case kLong:
+        {
+            cl_long *longPtr = (cl_long *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+            }
+            break;
+        }
+
+        case kULong:
+        {
+            cl_ulong *ulongPtr = (cl_ulong *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+            }
+            break;
+        }
+
+        case kFloat:
+        {
+            cl_float *floatPtr = (float *)outData;
+            for( i = 0; i < count; i++ )
+                floatPtr[i] = get_random_float( -100000.f, 100000.f, d );
+            break;
+        }
+
+        default:
+            log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
+            break;
+    }
+}
+
+#define INC_CASE( enum, type )    \
+    case enum:    \
+    {                \
+        cl_##type *src = (cl_##type *)inData; \
+        cl_##type *dst = (cl_##type *)outData; \
+        *dst = *src + 1; \
+        break; \
+    }
+
+void get_incremented_value( void *inData, void *outData, ExplicitType type )
+{
+    switch( type )
+    {
+        INC_CASE( kChar, char )
+        INC_CASE( kUChar, uchar )
+        INC_CASE( kShort, short )
+        INC_CASE( kUShort, ushort )
+        INC_CASE( kInt, int )
+        INC_CASE( kUInt, uint )
+        INC_CASE( kLong, long )
+        INC_CASE( kULong, ulong )
+        INC_CASE( kFloat, float )
+        default:
+            break;
+    }
+}
+
+int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 3 ];
+    size_t dataSize = numElements * 16 * sizeof(cl_long);
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ];
+#else
+    cl_long* inData    = (cl_long*)_malloca(dataSize);
+    cl_long* outDataCL = (cl_long*)_malloca(dataSize);
+    cl_long* outDataGL = (cl_long*)_malloca(dataSize);
+#endif
+    glBufferWrapper inGLBuffer, outGLBuffer;
+    int    i;
+    size_t bufferSize;
+
+    int error;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4];
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", (int)vecSize );
+
+    sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    bufferSize = numElements * vecSize * get_explicit_type_size( vecType );
+
+    /* Generate some almost-random input data */
+    gen_input_data( vecType, vecSize * numElements, d, inData );
+    memset( outDataCL, 0, dataSize );
+    memset( outDataGL, 0, dataSize );
+
+    /* Generate some GL buffers to go against */
+    glGenBuffers( 1, &inGLBuffer );
+    glGenBuffers( 1, &outGLBuffer );
+
+    glBindBuffer( GL_ARRAY_BUFFER, inGLBuffer );
+    glBufferData( GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW );
+
+    // Note: we need to bind the output buffer, even though we don't care about its values yet,
+    // because CL needs it to get the buffer size
+    glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
+    glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW );
+
+    glBindBuffer( GL_ARRAY_BUFFER, 0 );
+    glFlush();
+
+
+    /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */
+    streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error );
+    test_error( error, "Unable to create input GL buffer" );
+
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error );
+    test_error( error, "Unable to create output CL buffer" );
+
+    streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error );
+    test_error( error, "Unable to create output GL buffer" );
+
+
+  /* Validate the info */
+  if (validate_only) {
+    int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) |
+                  CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) );
+    for(i=0;i<3;i++)
+    {
+        clReleaseMemObject(streams[i]);
+        streams[i] = NULL;
+    }
+
+    glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
+    glDeleteBuffers(1, &outGLBuffer);    outGLBuffer = 0;
+
+    return result;
+  }
+
+    /* Assign streams and execute */
+    for( int i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[ i ] ), &streams[ i ] );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
+
+    /* Run the kernel */
+    threads[0] = numElements;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+  error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL );
+  test_error(error, "clEnqueueReleaseGLObjects failed");
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL );
+  test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    // Get the results from both CL and GL and make sure everything looks correct
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL );
+    test_error( error, "Unable to read output CL array!" );
+
+    glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
+    void *glMem = glMapBuffer( GL_ARRAY_BUFFER, GL_READ_ONLY );
+    memcpy( outDataGL, glMem, bufferSize );
+    glUnmapBuffer( GL_ARRAY_BUFFER );
+
+    char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL;
+    error = 0;
+    for( size_t i = 0; i < numElements * vecSize; i++ )
+    {
+        cl_long expectedCLValue, expectedGLValue;
+        get_incremented_value( inP, &expectedCLValue, vecType );
+        get_incremented_value( &expectedCLValue, &expectedGLValue, vecType );
+
+        if( memcmp( clP, &expectedCLValue, get_explicit_type_size( vecType ) ) != 0 )
+        {
+            char scratch[ 64 ];
+            log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i );
+            log_error( "\t   Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\t  Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) );
+            error = -1;
+        }
+
+        if( memcmp( glP, &expectedGLValue, get_explicit_type_size( vecType ) ) != 0 )
+        {
+            char scratch[ 64 ];
+            log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i );
+            log_error( "\t   Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\t  Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) );
+            error = -1;
+        }
+
+        if( error )
+            return error;
+
+        inP += get_explicit_type_size( vecType );
+        glP += get_explicit_type_size( vecType );
+        clP += get_explicit_type_size( vecType );
+    }
+
+    for(i=0;i<3;i++)
+    {
+        clReleaseMemObject(streams[i]);
+        streams[i] = NULL;
+    }
+
+    glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
+    glDeleteBuffers(1, &outGLBuffer);    outGLBuffer = 0;
+
+    return 0;
+}
+
+int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            // Test!
+            if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 )
+            {
+                char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+                log_error( "   Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+                retVal++;
+            }
+        }
+    }
+
+    return retVal;
+
+}
+
+
+int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed( gRandomSeed );
+
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            // Test!
+            if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 )
+            {
+                char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+                log_error( "   Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+                retVal++;
+            }
+        }
+    }
+
+    return retVal;
+
+}
+
+
+
diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp
new file mode 100644
index 00000000..3a6857bd
--- /dev/null
+++ b/test_conformance/gl/test_fence_sync.cpp
@@ -0,0 +1,730 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/gl/setup.h"
+#include "../../test_common/harness/genericThread.h"
+
+#if defined( __APPLE__ )
+#include <OpenGL/glu.h>
+#else
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
+#if !defined(_WIN32) && !defined(__ANDROID__)
+#include <GL/glx.h>
+#endif
+#endif
+
+#ifndef GLsync
+// For OpenGL before 3.2, we look for the ARB_sync extension and try to use that
+#if !defined(_WIN32)
+#include <inttypes.h>
+#endif // !_WIN32
+typedef int64_t GLint64;
+typedef uint64_t GLuint64;
+typedef struct __GLsync *GLsync;
+
+#ifndef APIENTRY
+#define APIENTRY
+#endif
+
+typedef GLsync (APIENTRY *glFenceSyncPtr)(GLenum condition,GLbitfield flags);
+glFenceSyncPtr glFenceSyncFunc;
+
+typedef bool (APIENTRY *glIsSyncPtr)(GLsync sync);
+glIsSyncPtr glIsSyncFunc;
+
+typedef void (APIENTRY *glDeleteSyncPtr)(GLsync sync);
+glDeleteSyncPtr glDeleteSyncFunc;
+
+typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+glClientWaitSyncPtr glClientWaitSyncFunc;
+
+typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+glWaitSyncPtr glWaitSyncFunc;
+
+typedef void (APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
+glGetInteger64vPtr glGetInteger64vFunc;
+
+typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length,
+                               GLint *values);
+glGetSyncivPtr glGetSyncivFunc;
+
+#define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
+
+static void InitSyncFns( void )
+{
+    glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" );
+    glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" );
+    glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" );
+    glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" );
+    glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" );
+    glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
+    glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
+}
+
+#define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
+
+#define GL_OBJECT_TYPE            0x9112
+#define GL_SYNC_CONDITION            0x9113
+#define GL_SYNC_STATUS            0x9114
+#define GL_SYNC_FLAGS            0x9115
+
+#define GL_SYNC_FENCE            0x9116
+
+#define GL_SYNC_GPU_COMMANDS_COMPLETE    0x9117
+
+#define GL_UNSIGNALED            0x9118
+#define GL_SIGNALED            0x9119
+
+#define GL_SYNC_FLUSH_COMMANDS_BIT        0x00000001
+
+#define GL_TIMEOUT_IGNORED            0xFFFFFFFFFFFFFFFFull
+
+#define GL_ALREADY_SIGNALED        0x911A
+#define GL_TIMEOUT_EXPIRED            0x911B
+#define GL_CONDITION_SATISFIED        0x911C
+#define GL_WAIT_FAILED            0x911D
+
+#define USING_ARB_sync 1
+#endif
+
+typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ;
+
+clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
+
+
+static const char *updateBuffersKernel[] = {
+    "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n"
+    "{\n"
+    "    size_t tid = get_global_id(0);\n"
+    "\n"
+    "    size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
+    "    vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
+    "    vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n"
+    "\n"
+    "    int rowV = rowIdx + 1;\n"
+    "    colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
+    "    //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n"
+    "    colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
+    "}\n" };
+
+//Passthrough VertexShader
+static const char *vertexshader =
+"#version 150\n"
+"uniform mat4 projMatrix;\n"
+"in vec4 inPosition;\n"
+"in vec4 inColor;\n"
+"out vec4 vertColor;\n"
+"void main (void) {\n"
+"    gl_Position = projMatrix*inPosition;\n"
+"   vertColor = inColor;\n"
+"}\n";
+
+//Passthrough FragmentShader
+static const char *fragmentshader =
+"#version 150\n"
+"in vec4 vertColor;\n"
+"out vec4 outColor;\n"
+"void main (void) {\n"
+"    outColor = vertColor;\n"
+"}\n";
+
+GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
+{
+    GLint  logLength, status;
+    GLuint program = glCreateProgram();
+    GLuint vpShader;
+
+    vpShader = glCreateShader(GL_VERTEX_SHADER);
+    glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
+    glCompileShader(vpShader);
+    glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*) malloc(logLength);
+        glGetShaderInfoLog(vpShader, logLength, &logLength, log);
+        log_info("Vtx Shader compile log:\n%s", log);
+        free(log);
+    }
+
+    glGetShaderiv(vpShader, GL_COMPILE_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to compile vtx shader:\n");
+        return 0;
+    }
+
+    glAttachShader(program, vpShader);
+
+    GLuint fpShader;
+    fpShader = glCreateShader(GL_FRAGMENT_SHADER);
+    glShaderSource(fpShader, 1, (const GLchar **)&fragmentshader, NULL);
+    glCompileShader(fpShader);
+
+    glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*)malloc(logLength);
+        glGetShaderInfoLog(fpShader, logLength, &logLength, log);
+        log_info("Frag Shader compile log:\n%s", log);
+        free(log);
+    }
+
+    glAttachShader(program, fpShader);
+    glGetShaderiv(fpShader, GL_COMPILE_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to compile frag shader:\n\n");
+        return 0;
+    }
+
+    glLinkProgram(program);
+    glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*)malloc(logLength);
+        glGetProgramInfoLog(program, logLength, &logLength, log);
+        log_info("Program link log:\n%s", log);
+        free(log);
+    }
+
+    glGetProgramiv(program, GL_LINK_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to link program\n");
+        return 0;
+    }
+
+    *posLoc = glGetAttribLocation(program, "inPosition");
+    *colLoc = glGetAttribLocation(program, "inColor");
+
+    return program;
+}
+
+void destroyShaderProgram(GLuint program)
+{
+    GLuint shaders[2];
+    GLsizei count;
+    glUseProgram(0);
+    glGetAttachedShaders(program, 2, &count, shaders);
+    int i;
+    for(i = 0; i < count; i++)
+    {
+        glDetachShader(program, shaders[i]);
+        glDeleteShader(shaders[i]);
+    }
+    glDeleteProgram(program);
+}
+
+// This function queues up and runs the above CL kernel that writes the vertex data
+cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1,
+                     cl_int rowIdx, cl_event fenceEvent, size_t numThreads )
+{
+    cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx );
+    test_error( error, "Unable to set kernel arguments" );
+
+    clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
+    int numEvents = ( fenceEvent != NULL ) ? 1 : 0;
+    cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL;
+
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 );
+    test_error( error, "Unable to acquire GL obejcts");
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 );
+    test_error( error, "Unable to acquire GL obejcts");
+
+    cl_event evts[ 2 ] = { acqEvent1, acqEvent2 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 );
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 );
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    evts[ 0 ] = relEvent1;
+    evts[ 1 ] = relEvent2;
+    error = clWaitForEvents( 2, evts );
+    test_error( error, "Unable to wait for release events" );
+
+    return 0;
+}
+
+class RunThread : public genericThread
+{
+public:
+
+    cl_kernel mKernel;
+    cl_command_queue mQueue;
+    cl_mem mStream0, mStream1;
+    cl_int mRowIdx;
+    cl_event mFenceEvent;
+    size_t mNumThreads;
+
+    RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads )
+    : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads )
+    {
+    }
+
+    void SetRunData( cl_int rowIdx, cl_event fenceEvent )
+    {
+        mRowIdx = rowIdx;
+        mFenceEvent = fenceEvent;
+    }
+
+    virtual void * IRun( void )
+    {
+        cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
+        return (void *)error;
+    }
+};
+
+
+int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device )
+{
+    int error;
+    const int framebufferSize = 512;
+
+
+    if( !is_extension_available( device, "cl_khr_gl_event" ) )
+    {
+        log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+        return 0;
+    }
+
+    // Ask OpenCL for the platforms.  Warn if more than one platform found,
+    // since this might not be the platform we want.  By default, we simply
+    // use the first returned platform.
+
+    cl_uint nplatforms;
+    cl_platform_id platform;
+    clGetPlatformIDs(0, NULL, &nplatforms);
+    clGetPlatformIDs(1, &platform, NULL);
+
+    if (nplatforms > 1) {
+        log_info("clGetPlatformIDs returned multiple values.  This is not "
+            "an error, but might result in obtaining incorrect function "
+            "pointers if you do not want the first returned platform.\n");
+
+        // Show them the platform name, in case it is a problem.
+
+        size_t size;
+        char *name;
+
+        clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
+        name = (char*)malloc(size);
+        clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
+
+        log_info("Using platform with name: %s \n", name);
+        free(name);
+    }
+
+    clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR");
+    if( clCreateEventFromGLsyncKHR_ptr == NULL )
+    {
+        log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" );
+        clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE");
+        return -1;
+    }
+
+#ifdef USING_ARB_sync
+    char *gl_version_str = (char*)glGetString( GL_VERSION );
+    float glCoreVersion;
+    sscanf(gl_version_str, "%f", &glCoreVersion);
+    if( glCoreVersion < 3.0f )
+    {
+        log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+        return 0;
+    }
+
+#ifdef __APPLE__
+    CGLContextObj currCtx = CGLGetCurrentContext();
+    CGLPixelFormatObj pixFmt = CGLGetPixelFormat(currCtx);
+    GLint val, screen;
+    CGLGetVirtualScreen(currCtx, &screen);
+    CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
+    if(val != kCGLOGLPVersion_3_2_Core)
+    {
+        log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it"
+                  "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+        return -1;
+    }
+#else
+#ifdef _WIN32
+    HDC hdc = wglGetCurrentDC();
+    HGLRC hglrc = wglGetCurrentContext();
+#else
+    Display* dpy = glXGetCurrentDisplay();
+    GLXDrawable drawable = glXGetCurrentDrawable();
+    GLXContext ctx = glXGetCurrentContext();
+#endif
+#endif
+
+    InitSyncFns();
+#endif
+
+#ifdef __APPLE__
+    CGLSetVirtualScreen(CGLGetCurrentContext(), rend_vs);
+#else
+#ifdef _WIN32
+    wglMakeCurrent(hdc, hglrc);
+#else
+    glXMakeCurrent(dpy, drawable, ctx);
+#endif
+#endif
+
+    GLint posLoc, colLoc;
+    GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
+    if(!shaderprogram)
+    {
+        log_error("Failed to create shader program\n");
+        return -1;
+    }
+
+    float l = 0.0f; float r = framebufferSize;
+    float b = 0.0f; float t = framebufferSize;
+
+    float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f,
+        0.0f, 2.0f/(t-b), 0.0f, 0.0f,
+        0.0f, 0.0f, -1.0f, 0.0f,
+        -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f
+    };
+
+    glUseProgram(shaderprogram);
+    GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
+    glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
+    glUseProgram(0);
+
+    // Note: the framebuffer is just the target to verify our results against, so we don't
+    // really care to go through all the possible formats in this case
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT,
+                                    GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
+                                    &glFramebuffer, &glRenderbuffer );
+    if( error != 0 )
+        return error;
+
+    GLuint vao;
+    glGenVertexArrays(1, &vao);
+    glBindVertexArray(vao);
+
+    glBufferWrapper vtxBuffer, colorBuffer;
+    glGenBuffers( 1, &vtxBuffer );
+    glGenBuffers( 1, &colorBuffer );
+
+    const int numHorizVertices = ( framebufferSize * 64 ) + 1;
+
+    glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
+    glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+
+    glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
+    glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+
+    // Now that the requisite objects are bound, we can attempt program
+    // validation:
+
+    glValidateProgram(shaderprogram);
+
+    GLint logLength, status;
+    glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*)malloc(logLength);
+        glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
+        log_info("Program validate log:\n%s", log);
+        free(log);
+    }
+
+    glGetProgramiv(shaderprogram, GL_VALIDATE_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to validate program\n");
+        return 0;
+    }
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+
+    if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) )
+        return -1;
+
+    streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error );
+    test_error( error, "Unable to create CL buffer from GL vertex buffer" );
+
+    streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error );
+    test_error( error, "Unable to create CL buffer from GL color buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    cl_int horizWrap = (cl_int)framebufferSize;
+    error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap );
+    test_error( error, "Unable to set kernel arguments" );
+
+    glViewport( 0, 0, framebufferSize, framebufferSize );
+    glClearColor( 0, 0, 0, 0 );
+    glClear( GL_COLOR_BUFFER_BIT );
+    glClear( GL_DEPTH_BUFFER_BIT );
+    glDisable( GL_DEPTH_TEST );
+    glEnable( GL_BLEND );
+    glBlendFunc( GL_ONE, GL_ONE );
+
+    clEventWrapper fenceEvent;
+    GLsync glFence = 0;
+
+    // Do a loop through 8 different horizontal stripes against the framebuffer
+    RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices );
+
+    for( int i = 0; i < 8; i++ )
+    {
+        // if current rendering device is not the compute device and
+        // separateThreads == false which means compute is going on same
+        // thread and we are using implicit synchronization (no GLSync obj used)
+        // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need
+        // to wait for rendering to finish on other device before CL can start
+        // writing to CL/GL shared mem objects. When separateThreads is true i.e.
+        // we are using GLSync obj to synchronize then we dont need to call glFinish
+        // here since CL should wait for rendering on other device before this
+        // GLSync object to finish before it starts writing to shared mem object.
+        // Also rend_device == compute_device no need to call glFinish
+        if(rend_device != device && !separateThreads)
+            glFinish();
+
+        if( separateThreads )
+        {
+            if (fenceEvent != NULL)
+            {
+                clReleaseEvent(fenceEvent);
+                glDeleteSyncFunc(glFence);
+            }
+
+            glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+            fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
+            test_error(error, "Unable to create CL event from GL fence");
+
+            // in case of explicit synchronization, we just wait for the sync object to complete
+            // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
+            // to flush on the context on which glSync is created
+            glFlush();
+
+            thread.SetRunData( (cl_int)i, fenceEvent );
+            thread.Start();
+
+            error = (cl_int)(size_t)thread.Join();
+        }
+        else
+        {
+            error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices );
+        }
+        test_error( error, "Unable to run CL kernel" );
+
+        glUseProgram(shaderprogram);
+        glEnableVertexAttribArray(posLoc);
+        glEnableVertexAttribArray(colLoc);
+        glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
+        glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
+        glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
+        glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
+        glBindBuffer( GL_ARRAY_BUFFER, 0 );
+
+        glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 );
+
+        glDisableVertexAttribArray(posLoc);
+        glDisableVertexAttribArray(colLoc);
+        glUseProgram(0);
+
+        if( separateThreads )
+        {
+            // If we're on the same thread, then we're testing implicit syncing, so we
+            // don't need the actual fence code
+            if( fenceEvent != NULL )
+            {
+                clReleaseEvent( fenceEvent );
+                glDeleteSyncFunc( glFence );
+            }
+
+            glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
+            fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error );
+            test_error( error, "Unable to create CL event from GL fence" );
+
+            // in case of explicit synchronization, we just wait for the sync object to complete
+            // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
+            // to flush on the context on which glSync is created
+            glFlush();
+        }
+        else
+            glFinish();
+    }
+
+    if( glFence != 0 )
+        // Don't need the final release for fenceEvent, because the wrapper will take care of that
+        glDeleteSyncFunc( glFence );
+
+#ifdef __APPLE__
+    CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
+#else
+#ifdef _WIN32
+    wglMakeCurrent(hdc, hglrc);
+#else
+    glXMakeCurrent(dpy, drawable, ctx);
+#endif
+#endif
+    // Grab the contents of the final framebuffer
+    BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer,
+                                                         GL_COLOR_ATTACHMENT0_EXT,
+                                                         GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar,
+                                                         framebufferSize, 128 ) );
+
+    // Check the contents now. We should end up with solid color bands 32 pixels high and the
+    // full width of the framebuffer, at values (128,128,128) due to the additive blending
+    for( int i = 0; i < 8; i++ )
+    {
+        for( int y = 0; y < 4; y++ )
+        {
+            // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass
+            cl_uchar valA = ( ( ( i + 1 ) & 1 )      ) * numHorizVertices * 2 / framebufferSize;
+            cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize;
+            cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize;
+
+            cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ];
+            for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ )
+            {
+                if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) ||
+                   ( row[ x * 4 + 2 ] != valC ) )
+                {
+                    log_error( "ERROR: Output framebuffer did not validate!\n" );
+                    DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData );
+                    log_error( "RUNS:\n" );
+                    uint32_t *p = (uint32_t *)(char *)resultData;
+                    size_t a = 0;
+                    for( size_t t = 1; t < framebufferSize * framebufferSize; t++ )
+                    {
+                        if( p[ a ] != 0 )
+                        {
+                            if( p[ t ] == 0 )
+                            {
+                                log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
+                                          (int)( a % framebufferSize ), (int)( a / framebufferSize ),
+                                          (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
+                                          p[ a ] );
+                                a = t;
+                            }
+                        }
+                        else
+                        {
+                            if( p[ t ] != 0 )
+                            {
+                                a = t;
+                            }
+                        }
+
+                    }
+                    return -1;
+                }
+            }
+        }
+    }
+
+    destroyShaderProgram(shaderprogram);
+    glDeleteVertexArrays(1, &vao);
+    return 0;
+}
+
+int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLint vs_count = 0;
+    cl_device_id *device_list = NULL;
+
+    if( !is_extension_available( device, "cl_khr_gl_event" ) )
+    {
+        log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+        return 0;
+    }
+#ifdef __APPLE__
+    CGLContextObj ctx = CGLGetCurrentContext();
+    CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
+    CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
+
+    device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count);
+    clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL);
+#else
+    // Need platform specific way of getting devices from CL context to which OpenGL can render
+    // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES
+    size_t device_cb;
+    cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
+    if( err != CL_SUCCESS )
+    {
+      print_error( err, "Unable to get device count from context" );
+      test_finish();
+      return -1;
+    }
+    vs_count = (GLint)device_cb / sizeof(cl_device_id);
+
+    if (vs_count < 1) {
+      log_error("No devices found.\n");
+      return -1;
+    }
+
+    device_list = (cl_device_id *) malloc(device_cb);
+    err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL);
+    if( err != CL_SUCCESS ) {
+      free(device_list);
+      print_error( err, "Unable to get device list from context" );
+      return -1;
+    }
+
+#endif
+
+    GLint rend_vs, read_vs;
+    int error = 0;
+    int any_failed = 0;
+
+    // Loop through all the devices capable to OpenGL rendering
+    // and set them as current rendering target
+    for(rend_vs = 0; rend_vs < vs_count; rend_vs++)
+    {
+        // Loop through all the devices and set them as current
+        // compute target
+        for(read_vs = 0; read_vs < vs_count; read_vs++)
+        {
+            cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs];
+            char rend_name[200], read_name[200];
+
+            clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL);
+            clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL);
+
+            log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name);
+            error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device );
+            any_failed |= error;
+            if( error != 0 )
+                log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" );
+            else
+                log_info("Implicit syncing Passed\n");
+
+            error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device );
+            any_failed |= error;
+            if( error != 0 )
+                log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" );
+            else
+                log_info("Explicit syncing Passed\n");
+        }
+    }
+
+    free(device_list);
+
+    return any_failed;
+}
diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp
new file mode 100644
index 00000000..fa0c00a6
--- /dev/null
+++ b/test_conformance/gl/test_image_methods.cpp
@@ -0,0 +1,405 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+#include <algorithm>
+
+using namespace std;
+
+typedef struct image_kernel_data
+{
+    cl_int width;
+    cl_int height;
+    cl_int depth;
+  cl_int arraySize;
+    cl_int widthDim;
+    cl_int heightDim;
+    cl_int channelType;
+    cl_int channelOrder;
+    cl_int expectedChannelType;
+    cl_int expectedChannelOrder;
+  cl_int numSamples;
+};
+
+static const char *methodTestKernelPattern =
+"typedef struct {\n"
+"    int width;\n"
+"    int height;\n"
+"    int depth;\n"
+"    int arraySize;\n"
+"    int widthDim;\n"
+"    int heightDim;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+"    int numSamples;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only %s input, __global image_kernel_data *outData )\n"
+"{\n"
+"%s%s%s%s%s%s%s%s%s%s%s"
+"}\n";
+
+static const char *arraySizeKernelLine =
+"   outData->arraySize = get_image_array_size( input );\n";
+static const char *imageWidthKernelLine =
+"   outData->width = get_image_width( input );\n";
+static const char *imageHeightKernelLine =
+"   outData->height = get_image_height( input );\n";
+static const char *imageDimKernelLine =
+"   int2 dim = get_image_dim( input );\n";
+static const char *imageWidthDimKernelLine =
+"   outData->widthDim = dim.x;\n";
+static const char *imageHeightDimKernelLine =
+"   outData->heightDim = dim.y;\n";
+static const char *channelTypeKernelLine =
+"   outData->channelType = get_image_channel_data_type( input );\n";
+static const char *channelTypeConstLine =
+"   outData->expectedChannelType = CLK_%s;\n";
+static const char *channelOrderKernelLine =
+"   outData->channelOrder = get_image_channel_order( input );\n";
+static const char *channelOrderConstLine =
+"   outData->expectedChannelOrder = CLK_%s;\n";
+static const char *numSamplesKernelLine =
+"   outData->numSamples = get_image_num_samples( input );\n";
+
+static int verify(cl_int input, cl_int kernelOutput, const char * description)
+{
+  if( kernelOutput != input )
+  {
+    log_error( "ERROR: %s did not validate (expected %d, got %d)\n", description, input, kernelOutput);
+      return -1;
+  }
+  return 0;
+}
+
+extern int supportsMsaa(cl_context context, bool* supports_msaa);
+extern int supportsDepth(cl_context context, bool* supports_depth);
+
+int test_image_format_methods( cl_device_id device, cl_context context, cl_command_queue queue,
+                       size_t width, size_t height, size_t arraySize, size_t samples,
+                        GLenum target, format format, MTdata d )
+{
+    int error, result=0;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper image, outDataBuffer;
+    char programSrc[ 10240 ];
+
+    image_kernel_data    outKernelData;
+
+#ifdef GL_VERSION_3_2
+    if (get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE ||
+        get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+    {
+        bool supports_msaa;
+        error = supportsMsaa(context, &supports_msaa);
+        if( error != 0 ) return error;
+        if (!supports_msaa) return 0;
+    }
+    if (format.formattype == GL_DEPTH_COMPONENT ||
+        format.formattype == GL_DEPTH_STENCIL)
+    {
+        bool supports_depth;
+        error = supportsDepth(context, &supports_depth);
+        if( error != 0 ) return error;
+        if (!supports_depth) return 0;
+    }
+#endif
+  DetectFloatToHalfRoundingMode(queue);
+
+  glTextureWrapper glTexture;
+  switch (get_base_gl_target(target)) {
+    case GL_TEXTURE_2D:
+      CreateGLTexture2D( width, height, target,
+                        format.formattype, format.internal, format.datatype,
+                        format.type, &glTexture, &error, false, d );
+      break;
+    case GL_TEXTURE_2D_ARRAY:
+      CreateGLTexture2DArray( width, height, arraySize, target,
+                             format.formattype, format.internal, format.datatype,
+                             format.type, &glTexture, &error, false, d );
+      break;
+    case GL_TEXTURE_2D_MULTISAMPLE:
+      CreateGLTexture2DMultisample( width, height, samples, target,
+                                   format.formattype, format.internal, format.datatype,
+                                   format.type, &glTexture, &error, false, d, false);
+      break;
+    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      CreateGLTexture2DArrayMultisample( width, height, arraySize, samples, target,
+                                        format.formattype, format.internal, format.datatype,
+                                        format.type, &glTexture, &error, false, d, false);
+      break;
+
+    default:
+      log_error("Unsupported GL tex target (%s) passed to write test: "
+                "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
+                __FILE__, __LINE__);
+  }
+
+  // Check to see if the texture could not be created for some other reason like
+  // GL_FRAMEBUFFER_UNSUPPORTED
+  if (error == GL_FRAMEBUFFER_UNSUPPORTED) {
+    return 0;
+  }
+
+    // Construct testing source
+  log_info( " - Creating image %d by %d...\n", width, height );
+  // Create a CL image from the supplied GL texture
+  image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY,
+                                        target, 0, glTexture, &error );
+
+  if ( error != CL_SUCCESS ) {
+    print_error( error, "Unable to create CL image from GL texture" );
+    GLint fmt;
+    glGetTexLevelParameteriv( target, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+    log_error( "    Supplied GL texture was base format %s and internal "
+              "format %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
+    return error;
+  }
+
+  cl_image_format imageFormat;
+  error = clGetImageInfo (image, CL_IMAGE_FORMAT,
+                          sizeof(imageFormat), &imageFormat, NULL);
+  test_error(error, "Failed to get image format");
+
+  const char * imageType = 0;
+  bool doArraySize = false;
+  bool doImageWidth = false;
+  bool doImageHeight = false;
+  bool doImageChannelDataType = false;
+  bool doImageChannelOrder = false;
+  bool doImageDim = false;
+  bool doNumSamples = false;
+  switch(target) {
+    case GL_TEXTURE_2D:
+      imageType = "image2d_depth_t";
+      doImageWidth = true;
+      doImageHeight = true;
+      doImageChannelDataType = true;
+      doImageChannelOrder = true;
+      doImageDim = true;
+      break;
+    case GL_TEXTURE_2D_ARRAY:
+      imageType = "image2d_array_depth_t";
+      doImageWidth = true;
+      doImageHeight = true;
+      doArraySize = true;
+      doImageChannelDataType = true;
+      doImageChannelOrder = true;
+      doImageDim = true;
+      doArraySize = true;
+      break;
+    case GL_TEXTURE_2D_MULTISAMPLE:
+      doNumSamples = true;
+      if(format.formattype == GL_DEPTH_COMPONENT) {
+        doImageWidth = true;
+        imageType = "image2d_msaa_depth_t";
+      } else {
+        imageType = "image2d_msaa_t";
+      }
+      break;
+    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      if(format.formattype == GL_DEPTH_COMPONENT) {
+        doImageWidth = true;
+        imageType = "image2d_msaa_array_depth_t";
+      } else {
+        imageType = "image2d_array_msaa_t";
+      }
+      break;
+  }
+
+
+
+  char channelTypeConstKernelLine[512] = {0};
+  char channelOrderConstKernelLine[512] = {0};
+  const char* channelTypeName=0;
+  const char* channelOrderName=0;
+  if(doImageChannelDataType) {
+    channelTypeName = GetChannelTypeName( imageFormat.image_channel_data_type );
+    if(channelTypeName && strlen(channelTypeName)) {
+      // replace CL_* with CLK_*
+      sprintf(channelTypeConstKernelLine, channelTypeConstLine, &channelTypeName[3]);
+    }
+  }
+  if(doImageChannelOrder) {
+    channelOrderName = GetChannelOrderName( imageFormat.image_channel_order );
+    if(channelOrderName && strlen(channelOrderName)) {
+      // replace CL_* with CLK_*
+      sprintf(channelOrderConstKernelLine, channelOrderConstLine, &channelOrderName[3]);
+    }
+  }
+
+    // Create a program to run against
+    sprintf( programSrc, methodTestKernelPattern,
+            imageType,
+          ( doArraySize ) ? arraySizeKernelLine : "",
+          ( doImageWidth ) ? imageWidthKernelLine : "",
+          ( doImageHeight ) ? imageHeightKernelLine : "",
+          ( doImageChannelDataType ) ? channelTypeKernelLine : "",
+          ( doImageChannelDataType ) ? channelTypeConstKernelLine : "",
+          ( doImageChannelOrder ) ? channelOrderKernelLine : "",
+          ( doImageChannelOrder ) ? channelOrderConstKernelLine : "",
+          ( doImageDim ) ? imageDimKernelLine : "",
+          ( doImageDim && doImageWidth ) ? imageWidthDimKernelLine : "",
+          ( doImageDim && doImageHeight ) ? imageHeightDimKernelLine : "",
+          ( doNumSamples ) ? numSamplesKernelLine : "");
+
+
+  //log_info("-----------------------------------\n%s\n", programSrc);
+  error = clFinish(queue);
+  if (error)
+    print_error(error, "clFinish failed.\n");
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
+    test_error( error, "Unable to create kernel to test against" );
+
+    // Create an output buffer
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set up arguments and run
+    error = clSetKernelArg( kernel, 0, sizeof( image ), &image );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
+    test_error( error, "Unable to set kernel argument" );
+
+  // Flush and Acquire.
+  glFlush();
+  error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &image, 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
+
+    size_t threads[1] = { 1 }, localThreads[1] = { 1 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    error = clEnqueueReadBuffer( queue, outDataBuffer, CL_TRUE, 0, sizeof( outKernelData ), &outKernelData, 0, NULL, NULL );
+    test_error( error, "Unable to read data buffer" );
+
+    // Verify the results now
+  if( doImageWidth )
+    result |= verify(width, outKernelData.width, "width");
+  if( doImageHeight)
+    result |= verify(height, outKernelData.height, "height");
+  if( doImageDim && doImageWidth )
+    result |= verify(width, outKernelData.widthDim, "width from get_image_dim");
+  if( doImageDim && doImageHeight )
+    result |= verify(height, outKernelData.heightDim, "height from get_image_dim");
+  if( doImageChannelDataType )
+    result |= verify(outKernelData.channelType, outKernelData.expectedChannelType, channelTypeName);
+  if( doImageChannelOrder )
+    result |= verify(outKernelData.channelOrder, outKernelData.expectedChannelOrder, channelOrderName);
+  if( doArraySize )
+    result |= verify(arraySize, outKernelData.arraySize, "array size");
+  if( doNumSamples )
+    result |= verify(samples, outKernelData.numSamples, "samples");
+  if(result) {
+    log_error("Test image methods failed");
+  }
+
+  clEventWrapper event;
+  error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &image, 0, NULL, &event );
+  test_error(error, "clEnqueueReleaseGLObjects failed");
+
+  error = clWaitForEvents( 1, &event );
+  test_error(error, "clWaitForEvents failed");
+
+    return result;
+}
+
+int test_image_methods_depth( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ){
+  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
+    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+    size_t pixelSize;
+    int result = 0;
+  GLenum depth_targets[] = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
+  size_t ntargets = sizeof(depth_targets) / sizeof(depth_targets[0]);
+  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+
+  const size_t nsizes = 5;
+  sizevec_t sizes[nsizes];
+  // Need to limit texture size according to GL device properties
+  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, maxTextureLayers = 16, size;
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+
+  size = min(maxTextureSize, maxTextureRectangleSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
+  }
+
+  for (size_t i = 0; i < nsizes; i++) {
+    for(size_t itarget = 0; itarget < ntargets; ++itarget) {
+      for(size_t iformat = 0; iformat < nformats; ++iformat)
+        result |= test_image_format_methods(device, context, queue, sizes[i].width, sizes[i].height, (depth_targets[itarget] == GL_TEXTURE_2D_ARRAY) ? sizes[i].depth: 1, 0,
+                                  depth_targets[itarget], depth_formats[iformat], seed );
+    }
+  }
+    return result;
+}
+
+int test_image_methods_multisample( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ){
+  if (!is_extension_available(device, "cl_khr_gl_msaa_sharing")) {
+    log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+    size_t pixelSize;
+  int result = 0;
+  GLenum targets[] = {GL_TEXTURE_2D_MULTISAMPLE, GL_TEXTURE_2D_MULTISAMPLE_ARRAY};
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 5;
+  sizevec_t sizes[nsizes];
+  GLint maxTextureLayers = 16, maxTextureSize = 4096;
+  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
+        }
+
+  glEnable(GL_MULTISAMPLE);
+
+  for (size_t i = 0; i < nsizes; i++) {
+    for(size_t itarget = 0; itarget < ntargets; ++itarget) {
+      for(size_t iformat = 0; iformat < nformats; ++iformat) {
+        GLint samples = get_gl_max_samples(targets[itarget], common_formats[iformat].internal);
+        result |= test_image_format_methods(device, context, queue, sizes[i].width, sizes[i].height, (targets[ntargets] == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) ? sizes[i].depth: 1,
+                                  samples, targets[itarget], common_formats[iformat], seed );
+      }
+    }
+  }
+    return result;
+}
diff --git a/test_conformance/gl/test_images_1D.cpp b/test_conformance/gl/test_images_1D.cpp
new file mode 100644
index 00000000..172dd4b5
--- /dev/null
+++ b/test_conformance/gl/test_images_1D.cpp
@@ -0,0 +1,141 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+#include "testBase.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+#include <algorithm>
+
+using namespace std;
+
+void calc_test_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit array size according to GL device properties
+  GLint maxTextureSize = 4096, maxTextureBufferSize = 4096, size;
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+  glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &maxTextureBufferSize);
+
+  size = min(maxTextureSize, maxTextureBufferSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].height = 1;
+    sizes[i].depth  = 1;
+  }
+}
+
+int test_images_read_1D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_1D };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_test_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_write_1D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = { GL_TEXTURE_1D };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_test_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
+int test_images_1D_getinfo( cl_device_id device, cl_context context,
+    cl_command_queue queue, int numElements )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_1D };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_test_size_descriptors(sizes, nsizes);
+
+  return test_images_get_info_common( device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_read_texturebuffer( cl_device_id device, cl_context context,
+                        cl_command_queue queue, int numElements )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_BUFFER };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_test_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, common_formats,
+                                 nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_write_texturebuffer( cl_device_id device, cl_context context,
+                         cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = { GL_TEXTURE_BUFFER };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_test_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, common_formats,
+                                  nformats, targets, ntargets, sizes, nsizes );
+}
+
+int test_images_texturebuffer_getinfo( cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_BUFFER };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_test_size_descriptors(sizes, nsizes);
+
+  return test_images_get_info_common( device, context, queue, common_formats,
+                                     nformats, targets, ntargets, sizes, nsizes);
+}
+
diff --git a/test_conformance/gl/test_images_1Darray.cpp b/test_conformance/gl/test_images_1Darray.cpp
new file mode 100644
index 00000000..1914a457
--- /dev/null
+++ b/test_conformance/gl/test_images_1Darray.cpp
@@ -0,0 +1,90 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+#include "testBase.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+#include <algorithm>
+
+using namespace std;
+void calc_1D_array_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit array size according to GL device properties
+  GLint maxTextureLayers = 16, maxTextureSize = 4096;
+  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
+    sizes[i].depth  = 1;
+  }
+}
+
+int test_images_read_1Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_1D_array_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_write_1Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_1D_array_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
+int test_images_1Darray_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_1D_array_size_descriptors(sizes, nsizes);
+
+  return test_images_get_info_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
\ No newline at end of file
diff --git a/test_conformance/gl/test_images_2D.cpp b/test_conformance/gl/test_images_2D.cpp
new file mode 100644
index 00000000..5af48ea8
--- /dev/null
+++ b/test_conformance/gl/test_images_2D.cpp
@@ -0,0 +1,190 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "common.h"
+
+#if defined( __APPLE__ )
+#include <OpenGL/glu.h>
+#else
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
+#endif
+#include <algorithm>
+
+using namespace std;
+
+extern "C" { extern cl_uint gRandomSeed; }
+
+#pragma mark -
+#pragma mark _2D read tests
+
+void calc_2D_test_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit array size according to GL device properties
+  // Need to limit texture size according to GL device properties
+  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, size;
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+
+  size = min(maxTextureSize, maxTextureRectangleSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].depth  = 1;
+  }
+}
+
+void calc_cube_test_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit array size according to GL device properties
+  // Need to limit texture size according to GL device properties
+  GLint maxQubeMapSize = 4096;
+  glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &maxQubeMapSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = sizes[i].height = random_in_range( 2, min(maxQubeMapSize, 1<<(i+4)), seed );
+    sizes[i].depth  = 1;
+  }
+}
+
+int test_images_read_2D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_2D_test_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_read_cube( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = {
+    GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+    GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+    GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_Z };
+
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_cube_test_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes);
+}
+
+#pragma mark -
+#pragma mark _2D write tests
+
+#include "common.h"
+
+int test_images_write( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_2D_test_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
+int test_images_write_cube( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = {
+    GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+    GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+    GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
+  };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_cube_test_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
+#pragma mark -
+#pragma mark _2D get info tests
+
+int test_images_2D_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_2D_test_size_descriptors(sizes, nsizes);
+
+  return test_images_get_info_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_cube_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+    GLenum targets[] = {
+    GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+    GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+    GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+    GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
+  };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_cube_test_size_descriptors(sizes, nsizes);
+
+  return test_images_get_info_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
diff --git a/test_conformance/gl/test_images_2Darray.cpp b/test_conformance/gl/test_images_2Darray.cpp
new file mode 100644
index 00000000..bb7095d1
--- /dev/null
+++ b/test_conformance/gl/test_images_2Darray.cpp
@@ -0,0 +1,93 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+#include "testBase.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+#include <algorithm>
+
+using namespace std;
+
+void calc_2D_array_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit array size according to GL device properties
+  GLint maxTextureLayers = 16, maxTextureSize = 4096;
+  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
+  }
+}
+
+int test_images_read_2Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_2D_array_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_write_2Darray( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  // FIXME: Query for 2D image array write support.
+
+  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_2D_array_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
+int test_images_2Darray_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int )
+{
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_2D_array_size_descriptors(sizes, nsizes);
+
+  return test_images_get_info_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
\ No newline at end of file
diff --git a/test_conformance/gl/test_images_3D.cpp b/test_conformance/gl/test_images_3D.cpp
new file mode 100644
index 00000000..a8c80f6e
--- /dev/null
+++ b/test_conformance/gl/test_images_3D.cpp
@@ -0,0 +1,109 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "common.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+#include <algorithm>
+
+using namespace std;
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+#pragma mark -
+#pragma mark _3D read test
+
+void calc_3D_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit array size according to GL device properties
+  GLint maxTextureSize = 2048;
+  glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &maxTextureSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].depth  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+  }
+}
+
+int test_images_read_3D( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] = { GL_TEXTURE_3D };
+  size_t ntargets = 1;
+
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_3D_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes);
+}
+
+#pragma mark -
+#pragma marm _3D write test
+
+int test_images_write_3D( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  // TODO: Perhaps the expected behavior is to FAIL if 3D images are
+  //       unsupported?
+
+  if (!is_extension_available(device, "cl_khr_3d_image_writes")) {
+    log_info("This device does not support 3D image writes.  Skipping test.\n");
+    return 0;
+  }
+
+  GLenum targets[] = { GL_TEXTURE_3D };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_3D_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, common_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
+#pragma mark -
+#pragma mark _3D get info test
+
+int test_images_3D_getinfo( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  GLenum targets[] = { GL_TEXTURE_3D };
+  size_t ntargets = 1;
+
+  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_3D_size_descriptors(sizes, nsizes);
+
+    return test_images_get_info_common(device, context, queue, common_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
diff --git a/test_conformance/gl/test_images_depth.cpp b/test_conformance/gl/test_images_depth.cpp
new file mode 100644
index 00000000..09152850
--- /dev/null
+++ b/test_conformance/gl/test_images_depth.cpp
@@ -0,0 +1,162 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "common.h"
+
+#if defined( __APPLE__ )
+#include <OpenGL/glu.h>
+#else
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
+#endif
+
+#include <algorithm>
+
+using namespace std;
+
+extern "C" { extern cl_uint gRandomSeed; }
+
+#pragma mark -
+#pragma mark _2D depth read tests
+
+void calc_depth_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit texture size according to GL device properties
+  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, size;
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+
+  size = min(maxTextureSize, maxTextureRectangleSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].depth  = 1;
+  }
+}
+
+void calc_depth_array_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit texture size according to GL device properties
+  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, maxTextureLayers = 16, size;
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+
+  size = min(maxTextureSize, maxTextureRectangleSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
+    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
+  }
+}
+
+int test_images_read_2D_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
+    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+  RandomSeed seed( gRandomSeed );
+
+  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_depth_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, depth_formats,
+    nformats, targets, ntargets, sizes, nsizes);
+}
+
+#pragma mark -
+#pragma mark _2D depth write tests
+
+
+int test_images_write_2D_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
+    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_depth_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, depth_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
+int test_images_read_2Darray_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int )
+{
+  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
+    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_depth_array_size_descriptors(sizes, nsizes);
+
+  return test_images_read_common(device, context, queue, depth_formats,
+      nformats, targets, ntargets, sizes, nsizes);
+}
+
+int test_images_write_2Darray_depth( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
+    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+  // FIXME: Query for 2D image array write support.
+
+  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+
+  const size_t nsizes = 6;
+  sizevec_t sizes[nsizes];
+  calc_depth_array_size_descriptors(sizes, nsizes);
+
+  return test_images_write_common( device, context, queue, depth_formats,
+    nformats, targets, ntargets, sizes, nsizes );
+}
+
diff --git a/test_conformance/gl/test_images_getinfo_common.cpp b/test_conformance/gl/test_images_getinfo_common.cpp
new file mode 100644
index 00000000..bc21496a
--- /dev/null
+++ b/test_conformance/gl/test_images_getinfo_common.cpp
@@ -0,0 +1,262 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "common.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+extern int supportsHalf(cl_context context, bool* supports_half);
+
+static int test_image_info( cl_context context, cl_command_queue queue,
+  GLenum glTarget, GLuint glTexture, size_t imageWidth, size_t imageHeight,
+  size_t imageDepth, cl_image_format *outFormat, ExplicitType *outType,
+  void **outResultBuffer )
+{
+  clMemWrapper streams[ 2 ];
+
+  int error;
+
+  // Create a CL image from the supplied GL texture
+  streams[ 0 ] = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY,
+    glTarget, 0, glTexture, &error );
+  if( error != CL_SUCCESS )
+  {
+    print_error( error, "Unable to create CL image from GL texture" );
+    GLint fmt;
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+    log_error( "    Supplied GL texture was format %s\n", GetGLFormatName( fmt ) );
+    return error;
+  }
+
+  // Determine data type and format that CL came up with
+  error = clGetImageInfo( streams[ 0 ], CL_IMAGE_FORMAT,
+    sizeof( cl_image_format ), outFormat, NULL );
+  test_error( error, "Unable to get CL image format" );
+
+  cl_gl_object_type object_type;
+  switch (glTarget) {
+    case GL_TEXTURE_1D:
+      object_type = CL_GL_OBJECT_TEXTURE1D;
+      break;
+    case GL_TEXTURE_BUFFER:
+      object_type = CL_GL_OBJECT_TEXTURE_BUFFER;
+      break;
+    case GL_TEXTURE_1D_ARRAY:
+      object_type = CL_GL_OBJECT_TEXTURE1D_ARRAY;
+      break;
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      object_type = CL_GL_OBJECT_TEXTURE2D;
+      break;
+    case GL_TEXTURE_2D_ARRAY:
+      object_type = CL_GL_OBJECT_TEXTURE2D_ARRAY;
+      break;
+    case GL_TEXTURE_3D:
+      object_type = CL_GL_OBJECT_TEXTURE3D;
+      break;
+    default:
+      log_error("Unsupported texture target.");
+      return 1;
+  }
+
+  return CheckGLObjectInfo(streams[0], object_type, glTexture, glTarget, 0);
+}
+
+static int test_image_format_get_info(
+    cl_context context, cl_command_queue queue,
+    size_t width, size_t height, size_t depth,
+    GLenum target, struct format* fmt, MTdata data)
+{
+  int error = 0;
+
+  // If we're testing a half float format, then we need to determine the
+  // rounding mode of this machine.  Punt if we fail to do so.
+
+  if( fmt->type == kHalf )
+  {
+    if( DetectFloatToHalfRoundingMode(queue) )
+      return 0;
+    bool supports_half = false;
+    error = supportsHalf(context, &supports_half);
+    if( error != 0 )
+      return error;
+    if (!supports_half) return 0;
+  }
+
+  size_t w = width, h = height, d = depth;
+
+  // Unpack the format and use it, along with the target, to create an
+  // appropriate GL texture.
+
+  GLenum gl_fmt          = fmt->formattype;
+  GLenum gl_internal_fmt = fmt->internal;
+  GLenum gl_type         = fmt->datatype;
+  ExplicitType type      = fmt->type;
+
+  glTextureWrapper texture;
+  glBufferWrapper glbuf;
+
+  // If we're testing a half float format, then we need to determine the
+  // rounding mode of this machine.  Punt if we fail to do so.
+
+  if( fmt->type == kHalf )
+    if( DetectFloatToHalfRoundingMode(queue) )
+      return 1;
+
+  // Use the correct texture creation function depending on the target, and
+  // adjust width, height, depth as appropriate so subsequent size calculations
+  // succeed.
+
+  switch (target) {
+    case GL_TEXTURE_1D:
+      h = 1; d = 1;
+      CreateGLTexture1D( width, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
+      break;
+    case GL_TEXTURE_BUFFER:
+      h = 1; d = 1;
+      CreateGLTextureBuffer( width, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &glbuf, &error, false, data );
+      break;
+    case GL_TEXTURE_1D_ARRAY:
+      d = 1;
+      CreateGLTexture1DArray( width, height, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
+      break;
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+      d = 1;
+      CreateGLTexture2D( width, height, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
+      break;
+    case GL_TEXTURE_2D_ARRAY:
+      CreateGLTexture2DArray( width, height, depth, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
+      break;
+    case GL_TEXTURE_3D:
+      d = 1;
+      CreateGLTexture3D( width, height, depth, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, data, false );
+      break;
+    default:
+      log_error("Unsupported texture target.\n");
+      return 1;
+  }
+
+  if ( error == -2 ) {
+    log_info("OpenGL texture couldn't be created, because a texture is too big. Skipping test.\n");
+    return 0;
+  }
+
+  if ( error != 0 ) {
+    if ((gl_fmt == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport())) {
+      log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+        "Skipping test.\n");
+      return 0;
+    } else {
+      return error;
+    }
+  }
+
+  cl_image_format clFormat;
+  ExplicitType actualType;
+  char *outBuffer;
+
+  // Perform the info check:
+  return test_image_info( context, queue, target, texture, w, h, d, &clFormat,
+    &actualType, (void **)&outBuffer );
+}
+
+int test_images_get_info_common( cl_device_id device, cl_context context,
+  cl_command_queue queue, struct format* formats, size_t nformats,
+  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes )
+{
+  int error = 0;
+  RandomSeed seed(gRandomSeed);
+
+  // First, ensure this device supports images.
+
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images.  Skipping test.\n");
+    return 0;
+  }
+
+  size_t fidx, tidx, sidx;
+
+  // Test each format on every target, every size.
+
+  for ( fidx = 0; fidx < nformats; fidx++ ) {
+    for ( tidx = 0; tidx < ntargets; tidx++ ) {
+
+      if ( formats[ fidx ].datatype == GL_UNSIGNED_INT_2_10_10_10_REV )
+      {
+        // Check if the RGB 101010 format is supported
+        if ( is_rgb_101010_supported( context, targets[ tidx ] ) == 0 )
+          break; // skip
+      }
+
+      log_info( "Testing image info for GL format %s : %s : %s : %s\n",
+        GetGLTargetName( targets[ tidx ] ),
+        GetGLFormatName( formats[ fidx ].internal ),
+        GetGLBaseFormatName( formats[ fidx ].formattype ),
+        GetGLTypeName( formats[ fidx ].datatype ) );
+
+      for ( sidx = 0; sidx < nsizes; sidx++ ) {
+
+        // Test this format + size:
+
+        if ( test_image_format_get_info(context, queue,
+                                        sizes[sidx].width, sizes[sidx].height, sizes[sidx].depth,
+                                        targets[tidx], &formats[fidx], seed) )
+        {
+          // We land here in the event of test failure.
+
+          log_error( "ERROR: Image info test failed for %s : %s : %s : %s\n\n",
+            GetGLTargetName( targets[ tidx ] ),
+            GetGLFormatName( formats[ fidx ].internal ),
+            GetGLBaseFormatName( formats[ fidx ].formattype ),
+            GetGLTypeName( formats[ fidx ].datatype ) );
+          error++;
+
+          // Skip the other sizes for this format.
+
+          break;
+        }
+      }
+    }
+  }
+
+  return error;
+}
diff --git a/test_conformance/gl/test_images_multisample.cpp b/test_conformance/gl/test_images_multisample.cpp
new file mode 100644
index 00000000..99f9ff2e
--- /dev/null
+++ b/test_conformance/gl/test_images_multisample.cpp
@@ -0,0 +1,118 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+#include "testBase.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+
+#include <algorithm>
+
+using namespace std;
+
+void calc_2D_multisample_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit texture size according to GL device properties
+  GLint maxTextureSize = 4096;
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].depth  = 1;
+  }
+}
+
+void calc_2D_array_multisample_size_descriptors(sizevec_t* sizes, size_t nsizes)
+{
+  // Need to limit array size according to GL device properties
+  GLint maxTextureLayers = 16, maxTextureSize = 4096;
+  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+  RandomSeed seed( gRandomSeed );
+
+  // Generate some random sizes (within reasonable ranges)
+  for (size_t i = 0; i < nsizes; i++) {
+    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
+    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
+  }
+}
+
+int test_images_read_2D_multisample( cl_device_id device, cl_context context,
+  cl_command_queue queue, int numElements )
+{
+  if (!is_extension_available(device, "cl_khr_gl_msaa_sharing")) {
+    log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+  glEnable(GL_MULTISAMPLE);
+
+  const size_t nsizes = 8;
+  sizevec_t sizes[nsizes];
+  calc_2D_multisample_size_descriptors(sizes, nsizes);
+
+  size_t nformats;
+
+  GLenum targets[] = { GL_TEXTURE_2D_MULTISAMPLE };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+  int ret_common = test_images_read_common(device, context, queue, common_formats, nformats, targets, ntargets, sizes, nsizes);
+
+  nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+  int ret_depth = test_images_read_common(device, context, queue, depth_formats, nformats, targets, ntargets, sizes, nsizes);
+
+  return (ret_common) ? ret_common : ret_depth;
+}
+
+int test_images_read_2Darray_multisample( cl_device_id device, cl_context context,
+                             cl_command_queue queue, int )
+{
+  if (!is_extension_available(device, "cl_khr_gl_msaa_sharing")) {
+    log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is not supported by the tested device\n");
+    return 0;
+  }
+
+  glEnable(GL_MULTISAMPLE);
+
+  const size_t nsizes = 4;
+  sizevec_t sizes[nsizes];
+  calc_2D_array_multisample_size_descriptors(sizes, nsizes);
+
+  size_t nformats;
+
+  GLenum targets[] = { GL_TEXTURE_2D_MULTISAMPLE_ARRAY };
+  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+  nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+  int ret_common = test_images_read_common(device, context, queue, common_formats, nformats, targets, ntargets, sizes, nsizes);
+
+  nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+  int ret_depth = test_images_read_common(device, context, queue, depth_formats, nformats, targets, ntargets, sizes, nsizes);
+
+  return (ret_common) ? ret_common : ret_depth;
+}
+
diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp
new file mode 100644
index 00000000..068b9a29
--- /dev/null
+++ b/test_conformance/gl/test_images_read_common.cpp
@@ -0,0 +1,736 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+#include "testBase.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+extern int supportsHalf(cl_context context, bool* supports_half);
+extern int supportsMsaa(cl_context context, bool* supports_msaa);
+extern int supportsDepth(cl_context context, bool* supports_depth);
+
+static const char *kernelpattern_image_read_1d =
+"__kernel void sample_test( read_only image1d_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"  int offset = get_global_id(0);\n"
+"  results[ offset ] = read_image%s( source, sampler, offset );\n"
+"}\n";
+
+static const char *kernelpattern_image_read_1d_buffer =
+"__kernel void sample_test( read_only image1d_buffer_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"  int offset = get_global_id(0);\n"
+"  results[ offset ] = read_image%s( source, offset );\n"
+"}\n";
+
+static const char *kernelpattern_image_read_1darray =
+"__kernel void sample_test( read_only image1d_array_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    results[ tidY * get_image_width( source ) + tidX ] = read_image%s( source, sampler, (int2)( tidX, tidY ) );\n"
+"}\n";
+
+static const char *kernelpattern_image_read_2d =
+"__kernel void sample_test( read_only image2d_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    results[ tidY * get_image_width( source ) + tidX ] = read_image%s( source, sampler, (int2)( tidX, tidY ) );\n"
+"}\n";
+
+static const char *kernelpattern_image_read_2darray =
+"__kernel void sample_test( read_only image2d_array_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  tidZ = get_global_id(2);\n"
+"    int  width = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+"    int offset = tidZ * width * height + tidY * width + tidX;\n"
+"\n"
+"     results[ offset ] = read_image%s( source, sampler, (int4)( tidX, tidY, tidZ, 0 ) );\n"
+"}\n";
+
+static const char *kernelpattern_image_read_3d =
+"__kernel void sample_test( read_only image3d_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  tidZ = get_global_id(2);\n"
+"    int  width = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+"    int offset = tidZ * width * height + tidY * width + tidX;\n"
+"\n"
+"     results[ offset ] = read_image%s( source, sampler, (int4)( tidX, tidY, tidZ, 0 ) );\n"
+"}\n";
+
+static const char *kernelpattern_image_read_2d_depth =
+"__kernel void sample_test( read_only image2d_depth_t source, sampler_t sampler, __global %s *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    results[ tidY * get_image_width( source ) + tidX ] = read_image%s( source, sampler, (int2)( tidX, tidY ) );\n"
+"}\n";
+
+static const char *kernelpattern_image_read_2darray_depth =
+"__kernel void sample_test( read_only image2d_array_depth_t source, sampler_t sampler, __global %s *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  tidZ = get_global_id(2);\n"
+"    int  width = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+"    int offset = tidZ * width * height + tidY * width + tidX;\n"
+"\n"
+"     results[ offset ] = read_image%s( source, sampler, (int4)( tidX, tidY, tidZ, 0 ) );\n"
+"}\n";
+
+static const char *kernelpattern_image_multisample_read_2d =
+"__kernel void sample_test( read_only image2d_msaa_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  width = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+"    int  num_samples = get_image_num_samples( source );\n"
+"    for(size_t sample = 0; sample < num_samples; sample++ ) {\n"
+"    int  offset = sample * width * height + tidY * width + tidX;\n"
+"     results[ offset ] = read_image%s( source, (int2)( tidX, tidY ), sample );\n"
+"    }\n"
+"}\n";
+
+static const char *kernelpattern_image_multisample_read_2d_depth =
+  "__kernel void sample_test( read_only image2d_msaa_depth_t source, sampler_t sampler, __global %s *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  width = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+  "    int  num_samples = get_image_num_samples( source );\n"
+  "    for(size_t sample = 0; sample < num_samples; sample++ ) {\n"
+"    int  offset = sample * width * height + tidY * width + tidX;\n"
+"     results[ offset ] = read_image%s( source, (int2)( tidX, tidY ), sample );\n"
+  "    }\n"
+"}\n";
+
+static const char *kernelpattern_image_multisample_read_2darray =
+"__kernel void sample_test( read_only image2d_array_msaa_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  tidZ = get_global_id(2);\n"
+"    int  num_samples = get_image_num_samples( source );\n"
+"    int  width  = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+"    int  array_size = get_image_array_size( source );\n"
+"    for(size_t sample = 0; sample< num_samples; ++sample) {\n"
+"      int offset = (array_size * width * height) * sample + (width * height) * tidZ + tidY * width + tidX;\n"
+"         results[ offset ] = read_image%s( source, (int4)( tidX, tidY, tidZ, 1 ), sample );\n"
+"    }\n"
+"}\n";
+
+static const char *kernelpattern_image_multisample_read_2darray_depth =
+  "__kernel void sample_test( read_only image2d_array_msaa_depth_t source, sampler_t sampler, __global %s *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  tidZ = get_global_id(2);\n"
+"    int  num_samples = get_image_num_samples( source );\n"
+"    int  width  = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+  "    int  array_size = get_image_array_size( source );\n"
+  "    for(size_t sample = 0; sample < num_samples; ++sample) {\n"
+  "      int offset = (array_size * width * height) * sample + (width * height) * tidZ + tidY * width + tidX;\n"
+  "         results[ offset ] = read_image%s( source, (int4)( tidX, tidY, tidZ, 1 ), sample );\n"
+  "    }\n"
+"}\n";
+
+static const char* get_appropriate_kernel_for_target(GLenum target, cl_channel_order channel_order) {
+
+    switch (get_base_gl_target(target)) {
+    case GL_TEXTURE_1D:
+      return kernelpattern_image_read_1d;
+    case GL_TEXTURE_BUFFER:
+      return kernelpattern_image_read_1d_buffer;
+    case GL_TEXTURE_1D_ARRAY:
+      return kernelpattern_image_read_1darray;
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_2D:
+    case GL_COLOR_ATTACHMENT0:
+    case GL_RENDERBUFFER:
+    case GL_TEXTURE_CUBE_MAP:
+#ifdef GL_VERSION_3_2
+    if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+      return kernelpattern_image_read_2d_depth;
+#endif
+      return kernelpattern_image_read_2d;
+    case GL_TEXTURE_2D_ARRAY:
+#ifdef GL_VERSION_3_2
+      if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+        return kernelpattern_image_read_2darray_depth;
+#endif
+      return kernelpattern_image_read_2darray;
+    case GL_TEXTURE_3D:
+      return kernelpattern_image_read_3d;
+    case GL_TEXTURE_2D_MULTISAMPLE:
+#ifdef GL_VERSION_3_2
+        if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+          return kernelpattern_image_multisample_read_2d_depth;
+#endif
+      return kernelpattern_image_multisample_read_2d;
+      break;
+    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+#ifdef GL_VERSION_3_2
+        if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+          return kernelpattern_image_multisample_read_2darray_depth;
+#endif
+      return kernelpattern_image_multisample_read_2darray;
+      break;
+    default:
+      log_error("Unsupported texture target (%s); cannot determine "
+        "appropriate kernel.", GetGLTargetName(target));
+      return NULL;
+  }
+}
+
+int test_cl_image_read( cl_context context, cl_command_queue queue,
+  GLenum gl_target, cl_mem image, size_t width, size_t height, size_t depth, size_t sampleNum,
+  cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+  clProgramWrapper program;
+  clKernelWrapper kernel;
+  clMemWrapper streams[ 2 ];
+
+  int error;
+  char kernelSource[2048];
+  char *programPtr;
+
+  // Use the image created from the GL texture.
+  streams[ 0 ] = image;
+
+  // Determine data type and format that CL came up with
+  error = clGetImageInfo( streams[ 0 ], CL_IMAGE_FORMAT, sizeof( cl_image_format ), outFormat, NULL );
+  test_error( error, "Unable to get CL image format" );
+
+  // Determine the number of samples
+  cl_uint samples = 0;
+  error = clGetImageInfo( streams[ 0 ], CL_IMAGE_NUM_SAMPLES, sizeof( samples ), &samples, NULL );
+  test_error( error, "Unable to get CL_IMAGE_NUM_SAMPLES" );
+
+  // Create the source
+  *outType = get_read_kernel_type( outFormat );
+  size_t channelSize = get_explicit_type_size( *outType );
+
+  const char* source = get_appropriate_kernel_for_target(gl_target, outFormat->image_channel_order);
+
+  sprintf( kernelSource, source, get_explicit_type_name( *outType ),
+    get_kernel_suffix( outFormat ) );
+
+  programPtr = kernelSource;
+  if( create_single_kernel_helper( context, &program, &kernel, 1,
+    (const char **)&programPtr, "sample_test" ) )
+  {
+    return -1;
+  }
+
+  // Create a vanilla output buffer
+  cl_device_id device;
+  error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(device), &device, NULL);
+  test_error( error, "Unable to get queue device" );
+
+  cl_ulong maxAllocSize = 0;
+  error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+  test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE" );
+
+  size_t buffer_bytes = channelSize * get_channel_order_channel_count(outFormat->image_channel_order) * width * height * depth * sampleNum;
+  if (buffer_bytes > maxAllocSize) {
+    log_info("Output buffer size %d is too large for device (max alloc size %d) Skipping...\n",
+             (int)buffer_bytes, (int)maxAllocSize);
+    return 1;
+  }
+
+  streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, buffer_bytes, NULL, &error );
+  test_error( error, "Unable to create output buffer" );
+
+  /* Assign streams and execute */
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  clSamplerWrapper sampler = clCreateSamplerWithProperties( context, properties, &error );
+  test_error( error, "Unable to create sampler" );
+
+  error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+  test_error( error, "Unable to set kernel arguments" );
+  error = clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
+  test_error( error, "Unable to set kernel arguments" );
+  error = clSetKernelArg( kernel, 2, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+  test_error( error, "Unable to set kernel arguments" );
+
+  glFinish();
+
+  error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
+
+  // The ND range we use is a function of the dimensionality of the image.
+  size_t global_range[3] = { width, height, depth };
+  size_t *local_range = NULL;
+  int ndim = 1;
+
+  switch (get_base_gl_target(gl_target)) {
+    case GL_TEXTURE_1D:
+    case GL_TEXTURE_BUFFER:
+      ndim = 1;
+      break;
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_1D_ARRAY:
+    case GL_COLOR_ATTACHMENT0:
+    case GL_RENDERBUFFER:
+    case GL_TEXTURE_CUBE_MAP:
+      ndim = 2;
+      break;
+    case GL_TEXTURE_3D:
+    case GL_TEXTURE_2D_ARRAY:
+#ifdef GL_VERSION_3_2
+    case GL_TEXTURE_2D_MULTISAMPLE:
+    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      ndim = 3;
+      break;
+#endif
+    default:
+      log_error("Test error: Unsupported texture target.\n");
+      return 1;
+  }
+
+  // 2D and 3D images have a special way to set the local size (legacy).
+  // Otherwise, we let CL select by leaving local_range as NULL.
+
+  if (gl_target == GL_TEXTURE_2D) {
+    local_range = (size_t*)malloc(sizeof(size_t) * ndim);
+    get_max_common_2D_work_group_size( context, kernel, global_range, local_range );
+
+  } else if (gl_target == GL_TEXTURE_3D) {
+    local_range = (size_t*)malloc(sizeof(size_t) * ndim);
+    get_max_common_3D_work_group_size( context, kernel, global_range, local_range );
+  }
+
+  error = clEnqueueNDRangeKernel( queue, kernel, ndim, NULL, global_range,
+    local_range, 0, NULL, NULL );
+  test_error( error, "Unable to execute test kernel" );
+
+  error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ],
+    0, NULL, NULL );
+  test_error(error, "clEnqueueReleaseGLObjects failed");
+
+  // Read results from the CL buffer
+  *outResultBuffer = (void *)( new char[ channelSize * get_channel_order_channel_count(outFormat->image_channel_order) * width * height * depth * sampleNum] );
+  error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0,
+    channelSize * get_channel_order_channel_count(outFormat->image_channel_order) * width * height * depth * sampleNum, *outResultBuffer, 0, NULL, NULL );
+  test_error( error, "Unable to read output CL buffer!" );
+
+  // free the ranges
+  if (local_range) free(local_range);
+
+  return 0;
+}
+
+static int test_image_read( cl_context context, cl_command_queue queue,
+  GLenum target, GLuint globj, size_t width, size_t height, size_t depth, size_t sampleNum,
+  cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+  int error;
+
+  // Create a CL image from the supplied GL texture or renderbuffer.
+  cl_mem image;
+  if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
+    image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_READ_ONLY, globj, &error );
+  } else {
+    image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY,
+      target, 0, globj, &error );
+  }
+
+  if( error != CL_SUCCESS ) {
+    if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
+      print_error( error, "Unable to create CL image from GL renderbuffer" );
+    } else {
+      print_error( error, "Unable to create CL image from GL texture" );
+      GLint fmt;
+      glGetTexLevelParameteriv( target, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+      log_error( "    Supplied GL texture was base format %s and internal "
+        "format %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
+    }
+    return error;
+  }
+
+  return test_cl_image_read( context, queue, target, image,
+    width, height, depth, sampleNum, outFormat, outType, outResultBuffer );
+}
+
+static int test_image_format_read(
+    cl_context context, cl_command_queue queue,
+    size_t width, size_t height, size_t depth,
+    GLenum target, struct format* fmt, MTdata data)
+{
+  int error = 0;
+
+  // Determine the maximum number of supported samples
+  GLint samples = 1;
+  if (target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+    samples = get_gl_max_samples(target, fmt->internal);
+
+  // If we're testing a half float format, then we need to determine the
+  // rounding mode of this machine.  Punt if we fail to do so.
+
+  if( fmt->type == kHalf )
+  {
+    if( DetectFloatToHalfRoundingMode(queue) )
+      return 1;
+    bool supports_half = false;
+    error = supportsHalf(context, &supports_half);
+    if( error != 0 )
+      return error;
+    if (!supports_half) return 0;
+  }
+#ifdef GL_VERSION_3_2
+    if (get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE ||
+        get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+    {
+        bool supports_msaa;
+        error = supportsMsaa(context, &supports_msaa);
+        if( error != 0 ) return error;
+        if (!supports_msaa) return 0;
+    }
+    if (fmt->formattype == GL_DEPTH_COMPONENT ||
+        fmt->formattype == GL_DEPTH_STENCIL)
+    {
+        bool supports_depth;
+        error = supportsDepth(context, &supports_depth);
+        if( error != 0 ) return error;
+        if (!supports_depth) return 0;
+    }
+#endif
+  size_t w = width, h = height, d = depth;
+
+  // Unpack the format and use it, along with the target, to create an
+  // appropriate GL texture.
+
+  GLenum gl_fmt          = fmt->formattype;
+  GLenum gl_internal_fmt = fmt->internal;
+  GLenum gl_type         = fmt->datatype;
+  ExplicitType type      = fmt->type;
+
+  // Required for most of the texture-backed cases:
+  glTextureWrapper texture;
+
+  // Required for the special case of TextureBuffer textures:
+  glBufferWrapper glbuf;
+
+  // And these are required for the case of Renderbuffer images:
+  glFramebufferWrapper glFramebuffer;
+  glRenderbufferWrapper glRenderbuffer;
+
+  void* buffer = NULL;
+
+  // Use the correct texture creation function depending on the target, and
+  // adjust width, height, depth as appropriate so subsequent size calculations
+  // succeed.
+
+  switch (get_base_gl_target(target)) {
+    case GL_TEXTURE_1D:
+      h = 1; d = 1;
+      buffer = CreateGLTexture1D( width, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, true, data );
+      break;
+    case GL_TEXTURE_BUFFER:
+      h = 1; d = 1;
+      buffer = CreateGLTextureBuffer(width, target, gl_fmt, gl_internal_fmt,
+        gl_type, type, &texture, &glbuf, &error, true, data);
+      break;
+    case GL_RENDERBUFFER:
+    case GL_COLOR_ATTACHMENT0:
+      d = 1;
+      buffer = CreateGLRenderbuffer(width, height, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &glFramebuffer, &glRenderbuffer, &error,
+        data, true);
+      break;
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_CUBE_MAP:
+      d = 1;
+      buffer = CreateGLTexture2D(width, height, target, gl_fmt, gl_internal_fmt,
+        gl_type, type, &texture, &error, true, data);
+      break;
+    case GL_TEXTURE_1D_ARRAY:
+      d = 1;
+      buffer = CreateGLTexture1DArray( width, height, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, true, data );
+      break;
+    case GL_TEXTURE_2D_ARRAY:
+      buffer = CreateGLTexture2DArray( width, height, depth, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, true, data );
+      break;
+    case GL_TEXTURE_3D:
+      buffer = CreateGLTexture3D( width, height, depth, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, data, true );
+      break;
+#ifdef GL_VERSION_3_2
+    case GL_TEXTURE_2D_MULTISAMPLE:
+      d = 1;
+      buffer = CreateGLTexture2DMultisample( width, height, samples, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, true, data, true );
+      break;
+    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+      buffer = CreateGLTexture2DArrayMultisample( width, height, depth, samples, target, gl_fmt,
+        gl_internal_fmt, gl_type, type, &texture, &error, true, data, true );
+      break;
+#endif
+    default:
+      log_error("Unsupported texture target.");
+      return 1;
+  }
+
+  if ( error == -2 ) {
+    log_info("OpenGL texture couldn't be created, because a texture is too big. Skipping test.\n");
+    return 0;
+  }
+
+  // Check to see if the texture could not be created for some other reason like
+  // GL_FRAMEBUFFER_UNSUPPORTED
+  if (error == GL_FRAMEBUFFER_UNSUPPORTED) {
+    log_info("Skipping...\n");
+    return 0;
+  }
+
+  if ( error != 0 ) {
+    if ((gl_fmt == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport())){
+      log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+        "Skipping test.\n");
+      return 0;
+    } else {
+      return error;
+    }
+  }
+
+  BufferOwningPtr<char> inputBuffer(buffer);
+  if( inputBuffer == NULL )
+    return -1;
+
+  cl_image_format clFormat;
+  ExplicitType actualType;
+  char *outBuffer;
+
+  // Perform the read:
+
+  GLuint globj = texture;
+  if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
+    globj = glRenderbuffer;
+  }
+
+  error = test_image_read( context, queue, target, globj, w, h, d, samples, &clFormat,
+                          &actualType, (void **)&outBuffer );
+
+  if( error != 0 )
+    return error;
+
+  BufferOwningPtr<char> actualResults(outBuffer);
+  if( actualResults == NULL )
+    return -1;
+
+  log_info( "- Read [%4d x %4d x %4d x %4d] : GL Texture : %s : %s : %s => CL Image : %s : %s \n",
+    (int)w, (int)h, (int)d, (int)samples, GetGLFormatName( gl_fmt ), GetGLFormatName( gl_internal_fmt ),
+    GetGLTypeName( gl_type ), GetChannelOrderName( clFormat.image_channel_order ),
+    GetChannelTypeName( clFormat.image_channel_data_type ));
+
+  BufferOwningPtr<char> convertedInputs;
+
+  // We have to convert our input buffer to the returned type, so we can validate.
+  // This is necessary because OpenCL might not actually pick an internal format
+  // that actually matches our input format (for example, if it picks a normalized
+  // format, the results will come out as floats instead of going in as ints).
+
+  if ( gl_type == GL_UNSIGNED_INT_2_10_10_10_REV )
+  {
+    cl_uint *p = (cl_uint *)buffer;
+    float *inData = (float *)malloc( w * h * d * samples * sizeof(float) );
+
+    for( size_t i = 0; i < 4 * w * h * d * samples; i += 4 )
+    {
+      inData[ i + 0 ] = (float)( ( p[ 0 ] >> 20 ) & 0x3ff ) / (float)1023;
+      inData[ i + 1 ] = (float)( ( p[ 0 ] >> 10 ) & 0x3ff ) / (float)1023;
+      inData[ i + 2 ] = (float)( p[ 0 ] & 0x3ff ) / (float)1023;
+      p++;
+    }
+
+    convertedInputs.reset( inData );
+    if( convertedInputs == NULL )
+      return -1;
+  }
+  else if ( gl_type == GL_DEPTH24_STENCIL8 )
+  {
+    // GL_DEPTH24_STENCIL8 is treated as CL_UNORM_INT24 + CL_DEPTH_STENCIL where
+    // the stencil is ignored.
+    cl_uint *p = (cl_uint *)buffer;
+    float *inData = (float *)malloc( w * h * d * samples * sizeof(float) );
+
+    for( size_t i = 0; i < w * h * d * samples; i++ )
+    {
+      inData[ i ] = (float)((p[i] >> 8) & 0xffffff) / (float)0xfffffe;
+    }
+
+    convertedInputs.reset( inData );
+    if( convertedInputs == NULL )
+      return -1;
+  }
+  else if ( gl_type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
+  {
+    // GL_FLOAT_32_UNSIGNED_INT_24_8_REV is treated as a CL_FLOAT +
+    // unused 24 + CL_DEPTH_STENCIL; we check the float value and ignore the
+    // second word
+
+    float *p = (float *)buffer;
+    float *inData = (float *)malloc( w * h * d * samples * sizeof(float) );
+
+    for( size_t i = 0; i < w * h * d * samples; i++ )
+    {
+      inData[ i ] = p[i*2];
+    }
+
+    convertedInputs.reset( inData );
+    if( convertedInputs == NULL )
+      return -1;
+  }
+  else
+  {
+    convertedInputs.reset(convert_to_expected( inputBuffer,
+      w * h * d * samples, type, actualType, get_channel_order_channel_count(clFormat.image_channel_order) ));
+    if( convertedInputs == NULL )
+      return -1;
+  }
+
+  // Now we validate
+  if( actualType == kFloat )
+  {
+    if ( clFormat.image_channel_data_type == CL_UNORM_INT_101010 )
+    {
+      return validate_float_results_rgb_101010( convertedInputs, actualResults, w, h, d, samples );
+    }
+    else
+    {
+      return validate_float_results( convertedInputs, actualResults, w, h, d, samples, get_channel_order_channel_count(clFormat.image_channel_order) );
+    }
+  }
+  else
+  {
+    return validate_integer_results( convertedInputs, actualResults, w, h, d, samples, get_explicit_type_size( actualType ) );
+  }
+}
+
+int test_images_read_common( cl_device_id device, cl_context context,
+  cl_command_queue queue, struct format* formats, size_t nformats,
+  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes )
+{
+  int error = 0;
+  RandomSeed seed(gRandomSeed);
+
+  // First, ensure this device supports images.
+
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images.  Skipping test.\n");
+    return 0;
+  }
+
+  size_t fidx, tidx, sidx;
+
+  // Test each format on every target, every size.
+
+  for ( fidx = 0; fidx < nformats; fidx++ ) {
+    for ( tidx = 0; tidx < ntargets; tidx++ ) {
+
+      // Texture buffer only takes an internal format, so the level data passed
+      // by the test and used for verification must match the internal format
+      if ((targets[tidx] == GL_TEXTURE_BUFFER) && (GetGLFormat(formats[ fidx ].internal) != formats[fidx].formattype))
+        continue;
+
+      if ( formats[ fidx ].datatype == GL_UNSIGNED_INT_2_10_10_10_REV )
+      {
+        // Check if the RGB 101010 format is supported
+        if ( is_rgb_101010_supported( context, targets[ tidx ] ) == 0 )
+          break; // skip
+      }
+
+      if (targets[tidx] != GL_TEXTURE_BUFFER)
+        log_info( "Testing image read for GL format %s : %s : %s : %s\n",
+          GetGLTargetName( targets[ tidx ] ),
+          GetGLFormatName( formats[ fidx ].internal ),
+          GetGLBaseFormatName( formats[ fidx ].formattype ),
+          GetGLTypeName( formats[ fidx ].datatype ) );
+      else
+        log_info( "Testing image read for GL format %s : %s\n",
+                 GetGLTargetName( targets[ tidx ] ),
+                 GetGLFormatName( formats[ fidx ].internal ));
+
+      for ( sidx = 0; sidx < nsizes; sidx++ ) {
+
+        // Test this format + size:
+        int err;
+        if ((err = test_image_format_read(context, queue,
+                                    sizes[sidx].width, sizes[sidx].height, sizes[sidx].depth,
+                                    targets[tidx], &formats[fidx], seed) ))
+        {
+          // Negative return values are errors, positive mean the test was skipped
+          if (err < 0) {
+
+            // We land here in the event of test failure.
+
+            log_error( "ERROR: Image read test failed for %s : %s : %s : %s\n\n",
+              GetGLTargetName( targets[ tidx ] ),
+              GetGLFormatName( formats[ fidx ].internal ),
+              GetGLBaseFormatName( formats[ fidx ].formattype ),
+              GetGLTypeName( formats[ fidx ].datatype ) );
+            error++;
+          }
+
+          // Skip the other sizes for this format.
+          printf("Skipping remaining sizes for this format\n");
+
+          break;
+        }
+      }
+
+      // Note a successful format test, if we passed every size.
+
+      if( sidx == sizeof (sizes) / sizeof( sizes[0] ) ) {
+        log_info( "passed: Image read test for GL format  %s : %s : %s : %s\n\n",
+        GetGLTargetName( targets[ tidx ] ),
+        GetGLFormatName( formats[ fidx ].internal ),
+        GetGLBaseFormatName( formats[ fidx ].formattype ),
+        GetGLTypeName( formats[ fidx ].datatype ) );
+      }
+    }
+  }
+
+  return error;
+}
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
new file mode 100644
index 00000000..7b91f585
--- /dev/null
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -0,0 +1,868 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "common.h"
+#include <limits.h>
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+#pragma mark -
+#pragma mark Write test kernels
+
+static const char *kernelpattern_image_write_1D =
+"__kernel void sample_test( __global %s4 *source, write_only image1d_t dest )\n"
+"{\n"
+"    uint index = get_global_id(0);\n"
+"    %s4 value = source[index];\n"
+"    write_image%s( dest, index, %s(value));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_1D_half =
+"__kernel void sample_test( __global half4 *source, write_only image1d_t dest )\n"
+"{\n"
+"    uint index = get_global_id(0);\n"
+"    write_imagef( dest, index, vload_half4(index, (__global half *)source));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_1D_buffer =
+"__kernel void sample_test( __global %s4 *source, write_only image1d_buffer_t dest )\n"
+"{\n"
+"    uint index = get_global_id(0);\n"
+"    %s4 value = source[index];\n"
+"    write_image%s( dest, index, %s(value));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_1D_buffer_half =
+"__kernel void sample_test( __global half4 *source, write_only image1d_buffer_t dest )\n"
+"{\n"
+"    uint index = get_global_id(0);\n"
+"    write_imagef( dest, index, vload_half4(index, (__global half *)source));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_2D =
+"__kernel void sample_test( __global %s4 *source, write_only image2d_t dest )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    uint index = tidY * get_image_width( dest ) + tidX;\n"
+"    %s4 value = source[index];\n"
+"    write_image%s( dest, (int2)( tidX, tidY ), %s(value));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_2D_half =
+"__kernel void sample_test( __global half4 *source, write_only image2d_t dest )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    uint index = tidY * get_image_width( dest ) + tidX;\n"
+"    write_imagef( dest, (int2)( tidX, tidY ), vload_half4(index, (__global half *)source));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_1Darray =
+"__kernel void sample_test( __global %s4 *source, write_only image1d_array_t dest )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    uint index = tidY * get_image_width( dest ) + tidX;\n"
+"    %s4 value = source[index];\n"
+"    write_image%s( dest, (int2)( tidX, tidY ), %s(value));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_1Darray_half =
+"__kernel void sample_test( __global half4 *source, write_only image1d_array_t dest )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    uint index = tidY * get_image_width( dest ) + tidX;\n"
+"    write_imagef( dest, (int2)( tidX, tidY ), vload_half4(index, (__global half *)source));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_3D =
+"__kernel void sample_test( __global %s4 *source, write_only image3d_t dest )\n"
+"{\n"
+"    int  tidX   = get_global_id(0);\n"
+"    int  tidY   = get_global_id(1);\n"
+"    int  tidZ   = get_global_id(2);\n"
+"    int  width  = get_image_width( dest );\n"
+"    int  height = get_image_height( dest );\n"
+"    int  index = tidZ * width * height + tidY * width + tidX;\n"
+"    %s4 value = source[index];\n"
+"    write_image%s( dest, (int4)( tidX, tidY, tidZ, 0 ), %s(value));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_3D_half =
+"__kernel void sample_test( __global half4 *source, write_only image3d_t dest )\n"
+"{\n"
+"    int  tidX   = get_global_id(0);\n"
+"    int  tidY   = get_global_id(1);\n"
+"    int  tidZ   = get_global_id(2);\n"
+"    int  width  = get_image_width( dest );\n"
+"    int  height = get_image_height( dest );\n"
+"    int  index = tidZ * width * height + tidY * width + tidX;\n"
+"    write_imagef( dest, (int4)( tidX, tidY, tidZ, 0 ), vload_half4(index, (__global half *)source));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_2Darray =
+"__kernel void sample_test( __global %s4 *source, write_only image2d_array_t dest )\n"
+"{\n"
+"    int  tidX   = get_global_id(0);\n"
+"    int  tidY   = get_global_id(1);\n"
+"    int  tidZ   = get_global_id(2);\n"
+"    int  width  = get_image_width( dest );\n"
+"    int  height = get_image_height( dest );\n"
+"    int  index = tidZ * width * height + tidY * width + tidX;\n"
+"    %s4 value = source[index];\n"
+"    write_image%s( dest, (int4)( tidX, tidY, tidZ, 0 ), %s(value));\n"
+"}\n";
+
+static const char *kernelpattern_image_write_2Darray_half =
+"__kernel void sample_test( __global half4 *source, write_only image2d_array_t dest )\n"
+"{\n"
+"    int  tidX   = get_global_id(0);\n"
+"    int  tidY   = get_global_id(1);\n"
+"    int  tidZ   = get_global_id(2);\n"
+"    int  width  = get_image_width( dest );\n"
+"    int  height = get_image_height( dest );\n"
+"    int  index = tidZ * width * height + tidY * width + tidX;\n"
+"    write_imagef( dest, (int4)( tidX, tidY, tidZ, 0 ), vload_half4(index, (__global half *)source));\n"
+"}\n";
+
+#ifdef GL_VERSION_3_2
+
+static const char * kernelpattern_image_write_2D_depth =
+"__kernel void sample_test( __global %s *source, write_only image2d_depth_t dest )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    uint index = tidY * get_image_width( dest ) + tidX;\n"
+"    float value = source[index];\n"
+"    write_imagef( dest, (int2)( tidX, tidY ), value);\n"
+"}\n";
+
+static const char * kernelpattern_image_write_2D_array_depth =
+"__kernel void sample_test( __global %s *source, write_only image2d_array_depth_t dest )\n"
+"{\n"
+"    int  tidX   = get_global_id(0);\n"
+"    int  tidY   = get_global_id(1);\n"
+"    int  tidZ   = get_global_id(2);\n"
+"    int  width  = get_image_width( dest );\n"
+"    int  height = get_image_height( dest );\n"
+"    int  index = tidZ * width * height + tidY * width + tidX;\n"
+"    %s value = source[index];\n"
+"    write_image%s( dest, (int4)( tidX, tidY, tidZ, 0 ), %s(value));\n"
+"}\n";
+
+
+#endif
+
+#pragma mark -
+#pragma mark Utility functions
+
+static const char* get_appropriate_write_kernel(GLenum target,
+  ExplicitType type, cl_channel_order channel_order)
+{
+  switch (get_base_gl_target(target)) {
+    case GL_TEXTURE_1D:
+
+      if (type == kHalf)
+        return kernelpattern_image_write_1D_half;
+      else
+        return kernelpattern_image_write_1D;
+      break;
+    case GL_TEXTURE_BUFFER:
+       if (type == kHalf)
+        return kernelpattern_image_write_1D_buffer_half;
+      else
+        return kernelpattern_image_write_1D_buffer;
+      break;
+    case GL_TEXTURE_1D_ARRAY:
+      if (type == kHalf)
+        return kernelpattern_image_write_1Darray_half;
+      else
+        return kernelpattern_image_write_1Darray;
+      break;
+    case GL_COLOR_ATTACHMENT0:
+    case GL_RENDERBUFFER:
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_CUBE_MAP:
+#ifdef GL_VERSION_3_2
+      if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+        return kernelpattern_image_write_2D_depth;
+#endif
+      if (type == kHalf)
+        return kernelpattern_image_write_2D_half;
+      else
+        return kernelpattern_image_write_2D;
+      break;
+
+    case GL_TEXTURE_2D_ARRAY:
+#ifdef GL_VERSION_3_2
+      if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+        return kernelpattern_image_write_2D_array_depth;
+#endif
+      if (type == kHalf)
+        return kernelpattern_image_write_2Darray_half;
+      else
+        return kernelpattern_image_write_2Darray;
+      break;
+
+    case GL_TEXTURE_3D:
+      if (type == kHalf)
+        return kernelpattern_image_write_3D_half;
+      else
+        return kernelpattern_image_write_3D;
+      break;
+
+    default:
+      log_error("Unsupported GL tex target (%s) passed to write test: "
+        "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
+        __FILE__, __LINE__);
+      return NULL;
+  }
+}
+
+void set_dimensions_by_target(GLenum target, size_t *dims, size_t sizes[3],
+  size_t width, size_t height, size_t depth)
+{
+  switch (get_base_gl_target(target)) {
+    case GL_TEXTURE_1D:
+      sizes[0] = width;
+      *dims = 1;
+      break;
+
+    case GL_TEXTURE_BUFFER:
+      sizes[0] = width;
+      *dims = 1;
+      break;
+
+    case GL_TEXTURE_1D_ARRAY:
+      sizes[0] = width;
+      sizes[1] = height;
+      *dims = 2;
+      break;
+
+    case GL_COLOR_ATTACHMENT0:
+    case GL_RENDERBUFFER:
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_CUBE_MAP:
+
+      sizes[0] = width;
+      sizes[1] = height;
+      *dims = 2;
+      break;
+
+    case GL_TEXTURE_2D_ARRAY:
+      sizes[0] = width;
+      sizes[1] = height;
+      sizes[2] = depth;
+      *dims = 3;
+      break;
+
+    case GL_TEXTURE_3D:
+      sizes[0] = width;
+      sizes[1] = height;
+      sizes[2] = depth;
+      *dims = 3;
+      break;
+
+    default:
+      log_error("Unsupported GL tex target (%s) passed to write test: "
+        "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
+        __FILE__, __LINE__);
+  }
+}
+
+int test_cl_image_write( cl_context context, cl_command_queue queue,
+  GLenum target, cl_mem clImage, size_t width, size_t height, size_t depth,
+  cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer,
+  MTdata d, bool supports_half )
+{
+  size_t global_dims, global_sizes[3];
+  clProgramWrapper program;
+  clKernelWrapper kernel;
+  clMemWrapper inStream;
+  char* programPtr;
+  int error;
+  char kernelSource[2048];
+
+  // What CL format did we get from the texture?
+
+  error = clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format),
+    outFormat, NULL);
+  test_error(error, "Unable to get the CL image format");
+
+  // Create the kernel source.  The target and the data type will influence
+  // which particular kernel we choose.
+
+  *outType = get_write_kernel_type( outFormat );
+  size_t channelSize = get_explicit_type_size(*outType);
+
+  const char* appropriateKernel = get_appropriate_write_kernel(target,
+    *outType, outFormat->image_channel_order);
+  if (*outType == kHalf && !supports_half) {
+    log_info("cl_khr_fp16 isn't supported. Skip this test.\n");
+    return 0;
+  }
+
+  const char* suffix = get_kernel_suffix( outFormat );
+  const char* convert = get_write_conversion( outFormat, *outType );
+
+  sprintf(kernelSource, appropriateKernel, get_explicit_type_name( *outType ),
+    get_explicit_type_name( *outType ), suffix, convert);
+
+  programPtr = kernelSource;
+  if( create_single_kernel_helper( context, &program, &kernel, 1,
+    (const char **)&programPtr, "sample_test" ) )
+  {
+      return -1;
+  }
+
+  // Create an appropriately-sized output buffer.
+
+  // Check to see if the output buffer will fit on the device
+  size_t bytes = channelSize * 4 * width * height * depth;
+  cl_ulong alloc_size = 0;
+
+  cl_device_id device = NULL;
+  error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(device), &device, NULL);
+  test_error( error, "Unable to query command queue for device" );
+
+  error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(alloc_size), &alloc_size, NULL);
+  test_error( error, "Unable to device for max mem alloc size" );
+
+  if (bytes > alloc_size) {
+    log_info("  Skipping: Buffer size (%lu) is greater than CL_DEVICE_MAX_MEM_ALLOC_SIZE (%lu)\n", bytes, alloc_size);
+    *outSourceBuffer = NULL;
+    return 0;
+  }
+
+  *outSourceBuffer = CreateRandomData(*outType, width * height * depth * 4, d);
+
+  inStream = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR,
+    channelSize * 4 * width * height * depth, *outSourceBuffer, &error );
+  test_error( error, "Unable to create output buffer" );
+
+  cl_sampler_properties properties[] = {
+    CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+    CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+    CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+    0 };
+  clSamplerWrapper sampler = clCreateSamplerWithProperties( context, properties, &error );
+  test_error( error, "Unable to create sampler" );
+
+  error = clSetKernelArg( kernel, 0, sizeof( inStream ), &inStream );
+  test_error( error, "Unable to set kernel arguments" );
+
+  error = clSetKernelArg( kernel, 1, sizeof( clImage ), &clImage );
+  test_error( error, "Unable to set kernel arguments" );
+
+  // Flush and Acquire.
+
+  glFinish();
+
+  error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &clImage, 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
+
+  // Execute ( letting OpenCL choose the local size )
+
+  // Setup the global dimensions and sizes based on the target type.
+  set_dimensions_by_target(target, &global_dims, global_sizes,
+    width, height, depth);
+
+  error = clEnqueueNDRangeKernel( queue, kernel, global_dims, NULL,
+    global_sizes, NULL, 0, NULL, NULL );
+  test_error( error, "Unable to execute test kernel" );
+
+  clEventWrapper event;
+  error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &clImage, 0, NULL, &event );
+  test_error(error, "clEnqueueReleaseGLObjects failed");
+
+  error = clWaitForEvents( 1, &event );
+  test_error(error, "clWaitForEvents failed");
+
+  return 0;
+}
+
+static int test_image_write( cl_context context, cl_command_queue queue,
+  GLenum glTarget, GLuint glTexture, size_t width, size_t height, size_t depth,
+  cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer,
+  MTdata d, bool supports_half )
+{
+  int error;
+
+  // Create a CL image from the supplied GL texture
+  clMemWrapper image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_WRITE_ONLY,
+    glTarget, 0, glTexture, &error );
+
+  if ( error != CL_SUCCESS ) {
+    print_error( error, "Unable to create CL image from GL texture" );
+    GLint fmt;
+    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+    log_error( "    Supplied GL texture was base format %s and internal "
+      "format %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
+    return error;
+  }
+
+  return test_cl_image_write( context, queue, glTarget, image,
+    width, height, depth, outFormat, outType, outSourceBuffer, d, supports_half );
+}
+
+int supportsHalf(cl_context context, bool* supports_half)
+{
+  int error;
+  size_t  size;
+  cl_uint numDev;
+
+  error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
+
+  cl_device_id* devices = new cl_device_id[numDev];
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDev * sizeof(cl_device_id), devices, NULL);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  // Get the extensions string for the device
+  error = clGetDeviceInfo(devices[0], CL_DEVICE_EXTENSIONS, 0, NULL, &size);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS size failed");
+
+  char *extensions = new char[size+1];
+  if (extensions == 0) {
+      log_error("Failed to allocate memory for extensions string.\n");
+      return -1;
+  }
+  memset( extensions, CHAR_MIN, sizeof(char)*(size+1) );
+
+  error = clGetDeviceInfo(devices[0], CL_DEVICE_EXTENSIONS, sizeof(char)*size, extensions, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+
+  *supports_half = strstr(extensions, "cl_khr_fp16");
+  delete [] extensions;
+  delete [] devices;
+
+  return error;
+}
+
+int supportsMsaa(cl_context context, bool* supports_msaa)
+{
+  int error;
+  size_t  size;
+  cl_uint numDev;
+
+  error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
+
+  cl_device_id* devices = new cl_device_id[numDev];
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDev * sizeof(cl_device_id), devices, NULL);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  // Get the extensions string for the device
+  error = clGetDeviceInfo(devices[0], CL_DEVICE_EXTENSIONS, 0, NULL, &size);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS size failed");
+
+  char *extensions = new char[size+1];
+  if (extensions == 0) {
+      log_error("Failed to allocate memory for extensions string.\n");
+      return -1;
+  }
+  memset( extensions, CHAR_MIN, sizeof(char)*(size+1) );
+
+  error = clGetDeviceInfo(devices[0], CL_DEVICE_EXTENSIONS, sizeof(char)*size, extensions, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+
+  *supports_msaa = strstr(extensions, "cl_khr_gl_msaa_sharing");
+  delete [] extensions;
+  delete [] devices;
+
+  return error;
+}
+
+int supportsDepth(cl_context context, bool* supports_depth)
+{
+  int error;
+  size_t  size;
+  cl_uint numDev;
+
+  error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
+
+  cl_device_id* devices = new cl_device_id[numDev];
+  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDev * sizeof(cl_device_id), devices, NULL);
+  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+
+  // Get the extensions string for the device
+  error = clGetDeviceInfo(devices[0], CL_DEVICE_EXTENSIONS, 0, NULL, &size);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS size failed");
+
+  char *extensions = new char[size+1];
+  if (extensions == 0) {
+      log_error("Failed to allocate memory for extensions string.\n");
+      return -1;
+  }
+  memset( extensions, CHAR_MIN, sizeof(char)*(size+1) );
+
+  error = clGetDeviceInfo(devices[0], CL_DEVICE_EXTENSIONS, sizeof(char)*size, extensions, NULL);
+  test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
+
+  *supports_depth = strstr(extensions, "cl_khr_gl_depth_images");
+  delete [] extensions;
+  delete [] devices;
+
+  return error;
+}
+
+static int test_image_format_write( cl_context context, cl_command_queue queue,
+  size_t width, size_t height, size_t depth, GLenum target, GLenum format,
+  GLenum internalFormat,  GLenum glType, ExplicitType type, MTdata d )
+{
+  int error;
+  int samples = 8;
+  // If we're testing a half float format, then we need to determine the
+  // rounding mode of this machine.  Punt if we fail to do so.
+
+  if( type == kHalf )
+    if( DetectFloatToHalfRoundingMode(queue) )
+      return 1;
+
+  // Create an appropriate GL texture or renderbuffer, given the target.
+
+  glTextureWrapper glTexture;
+  glBufferWrapper glBuf;
+  glFramebufferWrapper glFramebuffer;
+  glRenderbufferWrapper glRenderbuffer;
+  switch (get_base_gl_target(target)) {
+    case GL_TEXTURE_1D:
+      CreateGLTexture1D( width, target, format, internalFormat, glType,
+        type, &glTexture, &error, false, d );
+      break;
+    case GL_TEXTURE_BUFFER:
+      CreateGLTextureBuffer( width, target, format, internalFormat, glType,
+        type, &glTexture, &glBuf, &error, false, d );
+      break;
+    case GL_TEXTURE_1D_ARRAY:
+      CreateGLTexture1DArray( width, height, target, format, internalFormat,
+        glType, type, &glTexture, &error, false, d );
+      break;
+    case GL_TEXTURE_RECTANGLE_EXT:
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_CUBE_MAP:
+      CreateGLTexture2D( width, height, target, format, internalFormat, glType,
+        type, &glTexture, &error, false, d );
+      break;
+    case GL_COLOR_ATTACHMENT0:
+    case GL_RENDERBUFFER:
+      CreateGLRenderbuffer(width, height, target, format, internalFormat,
+        glType, type, &glFramebuffer, &glRenderbuffer, &error, d, false);
+    case GL_TEXTURE_2D_ARRAY:
+      CreateGLTexture2DArray( width, height, depth, target, format,
+        internalFormat, glType, type, &glTexture, &error, false, d );
+      break;
+    case GL_TEXTURE_3D:
+      CreateGLTexture3D( width, height, depth, target, format,
+        internalFormat, glType, type, &glTexture, &error, d, false );
+      break;
+
+    default:
+      log_error("Unsupported GL tex target (%s) passed to write test: "
+        "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
+        __FILE__, __LINE__);
+  }
+
+  // If there was a problem during creation, make sure it isn't a known
+  // cause, and then complain.
+  if ( error == -2 ) {
+    log_info("OpenGL texture couldn't be created, because a texture is too big. Skipping test.\n");
+    return 0;
+  }
+
+  if ( error != 0 ) {
+    if ((format == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport())){
+      log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+        "Skipping test.\n");
+      return 0;
+    } else {
+      return error;
+    }
+  }
+
+  // Run and get the results
+  cl_image_format clFormat;
+  ExplicitType sourceType;
+  ExplicitType validationType;
+  void *outSourceBuffer = NULL;
+
+  GLenum globj = glTexture;
+  if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
+    globj = glRenderbuffer;
+  }
+
+  bool supports_half = false;
+  error = supportsHalf(context, &supports_half);
+  if( error != 0 )
+    return error;
+
+  error = test_image_write( context, queue, target, globj, width, height,
+    depth, &clFormat, &sourceType, (void **)&outSourceBuffer, d, supports_half );
+
+  if( error != 0 || ((sourceType == kHalf ) && !supports_half)) {
+    if (outSourceBuffer)
+      free(outSourceBuffer);
+    return error;
+  }
+
+  if (!outSourceBuffer)
+    return 0;
+
+  // If actual source type was half, convert to float for validation.
+
+  if ( sourceType == kHalf )
+    validationType = kFloat;
+  else
+    validationType = sourceType;
+
+  BufferOwningPtr<char> validationSource;
+
+  if ( clFormat.image_channel_data_type == CL_UNORM_INT_101010 )
+  {
+    validationSource.reset( outSourceBuffer );
+  }
+  else
+  {
+    validationSource.reset( convert_to_expected( outSourceBuffer,
+      width * height * depth, sourceType, validationType, get_channel_order_channel_count(clFormat.image_channel_order) ) );
+    free(outSourceBuffer);
+  }
+
+  log_info( "- Write for %s [%4ld x %4ld x %4ld] : GL Texture : %s : %s : %s =>"
+    " CL Image : %s : %s \n",
+    GetGLTargetName(target),
+    width, height, depth,
+    GetGLFormatName( format ),
+    GetGLFormatName( internalFormat ),
+    GetGLTypeName( glType),
+    GetChannelOrderName( clFormat.image_channel_order ),
+    GetChannelTypeName( clFormat.image_channel_data_type ));
+
+  // Read the results from the GL texture.
+
+  ExplicitType readType = type;
+  BufferOwningPtr<char> glResults( ReadGLTexture(
+    target, glTexture, glBuf, width, format,
+    internalFormat, glType, readType, /* unused */ 1, 1 ) );
+  if( glResults == NULL )
+    return -1;
+
+  // We have to convert our input buffer to the returned type, so we can validate.
+  BufferOwningPtr<char> convertedGLResults;
+  if ( clFormat.image_channel_data_type != CL_UNORM_INT_101010 )
+  {
+    convertedGLResults.reset( convert_to_expected(
+      glResults, width * height * depth, readType, validationType, get_channel_order_channel_count(clFormat.image_channel_order), glType ));
+  }
+
+  // Validate.
+
+  int valid = 0;
+  if (convertedGLResults) {
+    if( sourceType == kFloat || sourceType == kHalf )
+    {
+      if ( clFormat.image_channel_data_type == CL_UNORM_INT_101010 )
+      {
+        valid = validate_float_results_rgb_101010( validationSource, glResults, width, height, depth, 1 );
+      }
+      else
+      {
+        valid = validate_float_results( validationSource, convertedGLResults,
+          width, height, depth, 1, get_channel_order_channel_count(clFormat.image_channel_order) );
+      }
+    }
+    else
+    {
+      valid = validate_integer_results( validationSource, convertedGLResults,
+        width, height, depth, 1, get_explicit_type_size( readType ) );
+    }
+  }
+
+  return valid;
+}
+
+#pragma mark -
+#pragma mark Write test common entry point
+
+// This is the main loop for all of the write tests.  It iterates over the
+// given formats & targets, testing a variety of sizes against each
+// combination.
+
+int test_images_write_common(cl_device_id device, cl_context context,
+  cl_command_queue queue, struct format* formats, size_t nformats,
+  GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes )
+{
+  int err = 0;
+  int error = 0;
+  RandomSeed seed(gRandomSeed);
+
+  // First, ensure this device supports images.
+
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images.  Skipping test.\n");
+    return 0;
+  }
+
+  // Get the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE
+  cl_ulong max_individual_allocation_size = 0;
+  err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                        sizeof(max_individual_allocation_size),
+                        &max_individual_allocation_size, NULL);
+  if (err) {
+    log_error("ERROR: clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE.\n");
+    error++;
+    return error;
+  }
+
+  size_t total_allocation_size;
+  size_t fidx, tidx, sidx;
+
+  for ( fidx = 0; fidx < nformats; fidx++ ) {
+    for ( tidx = 0; tidx < ntargets; tidx++ ) {
+
+      // Texture buffer only takes an internal format, so the level data passed
+      // by the test and used for verification must match the internal format
+      if ((targets[tidx] == GL_TEXTURE_BUFFER) && (GetGLFormat(formats[ fidx ].internal) != formats[fidx].formattype))
+        continue;
+
+      if ( formats[ fidx ].datatype == GL_UNSIGNED_INT_2_10_10_10_REV )
+      {
+        // Check if the RGB 101010 format is supported
+        if ( is_rgb_101010_supported( context, targets[ tidx ] ) == 0 )
+          continue; // skip
+      }
+
+      if (formats[ fidx ].datatype == GL_UNSIGNED_INT_24_8)
+      {
+        //check if a implementation supports writing to the depth stencil formats
+        cl_image_format imageFormat = { CL_DEPTH_STENCIL, CL_UNORM_INT24 };
+        if (!is_image_format_supported(context, CL_MEM_WRITE_ONLY, (targets[tidx] == GL_TEXTURE_2D || targets[tidx] == GL_TEXTURE_RECTANGLE) ? CL_MEM_OBJECT_IMAGE2D: CL_MEM_OBJECT_IMAGE2D_ARRAY, &imageFormat))
+          continue;
+      }
+
+      if (formats[ fidx ].datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
+      {
+        //check if a implementation supports writing to the depth stencil formats
+        cl_image_format imageFormat = { CL_DEPTH_STENCIL, CL_FLOAT};
+        if (!is_image_format_supported(context, CL_MEM_WRITE_ONLY, (targets[tidx] == GL_TEXTURE_2D || targets[tidx] == GL_TEXTURE_RECTANGLE) ? CL_MEM_OBJECT_IMAGE2D: CL_MEM_OBJECT_IMAGE2D_ARRAY, &imageFormat))
+          continue;
+      }
+
+      if (targets[tidx] != GL_TEXTURE_BUFFER)
+        log_info( "Testing image write for GL format %s : %s : %s : %s\n",
+                 GetGLTargetName( targets[ tidx ] ),
+                 GetGLFormatName( formats[ fidx ].internal ),
+                 GetGLBaseFormatName( formats[ fidx ].formattype ),
+                 GetGLTypeName( formats[ fidx ].datatype ) );
+      else
+        log_info( "Testing image write for GL format %s : %s\n",
+                 GetGLTargetName( targets[ tidx ] ),
+                 GetGLFormatName( formats[ fidx ].internal ));
+
+
+      for (sidx = 0; sidx < nsizes; sidx++) {
+
+        // All tested formats are 4-channel formats
+        total_allocation_size =
+           sizes[sidx].width * sizes[sidx].height * sizes[sidx].depth *
+           4 * get_explicit_type_size( formats[ fidx ].type );
+
+        if (total_allocation_size > max_individual_allocation_size) {
+          log_info( "The requested allocation size (%gMB) is larger than the "
+                    "maximum individual allocation size (%gMB)\n",
+                    total_allocation_size/(1024.0*1024.0),
+                    max_individual_allocation_size/(1024.0*1024.0));
+          log_info( "Skipping write test for %s : %s : %s : %s "
+                    " and size (%ld, %ld, %ld)\n",
+                    GetGLTargetName( targets[ tidx ] ),
+                    GetGLFormatName( formats[ fidx ].internal ),
+                    GetGLBaseFormatName( formats[ fidx ].formattype ),
+                    GetGLTypeName( formats[ fidx ].datatype ),
+                    sizes[sidx].width,
+                    sizes[sidx].height,
+                    sizes[sidx].depth);
+          continue;
+        }
+#ifdef GL_VERSION_3_2
+        if (get_base_gl_target(targets[ tidx ]) == GL_TEXTURE_2D_MULTISAMPLE ||
+            get_base_gl_target(targets[ tidx ]) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+        {
+            bool supports_msaa;
+            error = supportsMsaa(context, &supports_msaa);
+            if( error != 0 ) return error;
+            if (!supports_msaa) return 0;
+        }
+        if (formats[ fidx ].formattype == GL_DEPTH_COMPONENT ||
+            formats[ fidx ].formattype == GL_DEPTH_STENCIL)
+        {
+            bool supports_depth;
+            error = supportsDepth(context, &supports_depth);
+            if( error != 0 ) return error;
+            if (!supports_depth) return 0;
+        }
+#endif
+
+        if( test_image_format_write( context, queue,
+                                     sizes[sidx].width,
+                                     sizes[sidx].height,
+                                     sizes[sidx].depth,
+                                     targets[ tidx ],
+                                     formats[ fidx ].formattype,
+                                     formats[ fidx ].internal,
+                                     formats[ fidx ].datatype,
+                                     formats[ fidx ].type, seed ) )
+        {
+          log_error( "ERROR: Image write test failed for %s : %s : %s : %s "
+            " and size (%ld, %ld, %ld)\n\n",
+            GetGLTargetName( targets[ tidx ] ),
+            GetGLFormatName( formats[ fidx ].internal ),
+            GetGLBaseFormatName( formats[ fidx ].formattype ),
+            GetGLTypeName( formats[ fidx ].datatype ),
+            sizes[sidx].width,
+            sizes[sidx].height,
+            sizes[sidx].depth);
+
+          error++;
+          break;    // Skip other sizes for this combination
+        }
+      }
+
+      // If we passed all sizes (check versus size loop count):
+
+      if (sidx == nsizes) {
+        log_info( "passed: Image write for GL format  %s : %s : %s : %s\n\n",
+          GetGLTargetName( targets[ tidx ] ),
+          GetGLFormatName( formats[ fidx ].internal ),
+          GetGLBaseFormatName( formats[ fidx ].formattype ),
+          GetGLTypeName( formats[ fidx ].datatype ) );
+      }
+    }
+  }
+
+  return error;
+}
diff --git a/test_conformance/gl/test_renderbuffer.cpp b/test_conformance/gl/test_renderbuffer.cpp
new file mode 100644
index 00000000..c7f6e702
--- /dev/null
+++ b/test_conformance/gl/test_renderbuffer.cpp
@@ -0,0 +1,479 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#if defined( __APPLE__ )
+    #include <OpenGL/glu.h>
+#else
+    #include <GL/glu.h>
+    #include <CL/cl_gl.h>
+#endif
+
+#if defined (__linux__)
+GLboolean
+gluCheckExtension(const GLubyte *extension, const GLubyte *extensions)
+{
+  const GLubyte *start;
+  GLubyte *where, *terminator;
+
+  /* Extension names should not have spaces. */
+  where = (GLubyte *) strchr((const char*)extension, ' ');
+  if (where || *extension == '\0')
+    return 0;
+  /* It takes a bit of care to be fool-proof about parsing the
+     OpenGL extensions string. Don't be fooled by sub-strings,
+     etc. */
+  start = extensions;
+  for (;;) {
+    where = (GLubyte *) strstr((const char *) start, (const char*) extension);
+    if (!where)
+      break;
+    terminator = where + strlen((const char*) extension);
+    if (where == start || *(where - 1) == ' ')
+      if (*terminator == ' ' || *terminator == '\0')
+        return 1;
+    start = terminator;
+  }
+  return 0;
+}
+#endif
+
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+// This is defined in the write common code:
+extern int test_cl_image_write( cl_context context, cl_command_queue queue,
+  GLenum target, cl_mem clImage, size_t width, size_t height, size_t depth,
+  cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer,
+  MTdata d, bool supports_half );
+
+extern int test_cl_image_read( cl_context context, cl_command_queue queue,
+  GLenum gl_target, cl_mem image, size_t width, size_t height, size_t depth, size_t sampleNum,
+  cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer );
+
+extern int supportsHalf(cl_context context, bool* supports_half);
+
+static int test_attach_renderbuffer_read_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
+                    size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+    int error;
+
+    // Create a CL image from the supplied GL renderbuffer
+    cl_mem image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_READ_ONLY, glRenderbuffer, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL renderbuffer" );
+        return error;
+    }
+
+    return test_cl_image_read( context, queue, glTarget, image, imageWidth,
+    imageHeight, 1, 1, outFormat, outType, outResultBuffer );
+}
+
+int test_renderbuffer_read_image( cl_context context, cl_command_queue queue,
+                            GLsizei width, GLsizei height, GLenum attachment,
+                            GLenum format, GLenum internalFormat,
+                            GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+    if( type == kHalf )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+
+    // Create the GL renderbuffer
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    void *tmp = CreateGLRenderbuffer( width, height, attachment, format, internalFormat, glType, type, &glFramebuffer, &glRenderbuffer, &error, d, true );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+    {
+        if ((format == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport()))
+        {
+            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. Skipping test.\n");
+            return 0;
+        }
+        else
+        {
+            return error;
+        }
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
+    error = test_attach_renderbuffer_read_image( context, queue, attachment, glRenderbuffer, width, height, &clFormat, &actualType, (void **)&outBuffer );
+    if( error != 0 )
+        return error;
+    BufferOwningPtr<char> actualResults(outBuffer);
+
+    log_info( "- Read [%4d x %4d] : GL renderbuffer : %s : %s : %s => CL Image : %s : %s \n", width, height,
+                    GetGLFormatName( format ), GetGLFormatName( internalFormat ), GetGLTypeName( glType),
+                    GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+
+#ifdef DEBUG
+    log_info("- start read GL data -- \n");
+    DumpGLBuffer(glType, width, height, actualResults);
+    log_info("- end read GL data -- \n");
+#endif
+
+    // We have to convert our input buffer to the returned type, so we can validate.
+    BufferOwningPtr<char> convertedInput(convert_to_expected( inputBuffer, width * height, type, actualType, get_channel_order_channel_count(clFormat.image_channel_order) ));
+
+#ifdef DEBUG
+    log_info("- start input data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height, convertedInput);
+    log_info("- end input data -- \n");
+#endif
+
+#ifdef DEBUG
+    log_info("- start converted data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height, actualResults);
+    log_info("- end converted data -- \n");
+#endif
+
+    // Now we validate
+    int valid = 0;
+    if(convertedInput) {
+        if( actualType == kFloat )
+            valid = validate_float_results( convertedInput, actualResults, width, height, 1, get_channel_order_channel_count(clFormat.image_channel_order) );
+        else
+            valid = validate_integer_results( convertedInput, actualResults, width, height, 1, get_explicit_type_size( actualType ) );
+    }
+
+    return valid;
+}
+
+int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+
+// Renderbuffers with integer formats do not seem to work reliably across
+// platforms/implementations. Disabling this in version 1.0 of CL conformance tests.
+
+#ifdef TEST_INTEGER_FORMATS
+
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+#endif
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat },
+        { GL_RGBA16F_ARB,  GL_RGBA,             GL_HALF_FLOAT,               kHalf }
+    };
+
+    size_t fmtIdx, attIdx;
+    int error = 0;
+#ifdef DEBUG
+    size_t iter = 1;
+#else
+    size_t iter = 6;
+#endif
+    RandomSeed seed( gRandomSeed );
+
+  // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    if( !gluCheckExtension( (const GLubyte *)"GL_EXT_framebuffer_object", glGetString( GL_EXTENSIONS ) ) )
+    {
+        log_info( "Renderbuffers are not supported by this OpenGL implementation; skipping test\n" );
+        return 0;
+    }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( attIdx = 0; attIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); attIdx++ )
+        {
+            size_t i;
+
+            log_info( "Testing renderbuffer read for %s : %s : %s : %s\n",
+                GetGLAttachmentName( attachments[ attIdx ] ),
+                GetGLFormatName( formats[ fmtIdx ].internal ),
+                GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                GLsizei width = random_in_range( 16, 512, seed );
+                GLsizei height = random_in_range( 16, 512, seed );
+#ifdef DEBUG
+                width = height = 4;
+#endif
+
+                if( test_renderbuffer_read_image( context, queue, width, height,
+                                                  attachments[ attIdx ],
+                                                  formats[ fmtIdx ].format,
+                                                  formats[ fmtIdx ].internal,
+                                                  formats[ fmtIdx ].datatype,
+                                                  formats[ fmtIdx ].type, seed ) )
+
+                {
+                    log_error( "ERROR: Renderbuffer read test failed for %s : %s : %s : %s\n\n",
+                                GetGLAttachmentName( attachments[ attIdx ] ),
+                                GetGLFormatName( formats[ fmtIdx ].internal ),
+                                GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Renderbuffer read test passed for %s : %s : %s : %s\n\n",
+                          GetGLAttachmentName( attachments[ attIdx ] ),
+                          GetGLFormatName( formats[ fmtIdx ].internal ),
+                          GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                          GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            }
+        }
+    }
+
+    return error;
+}
+
+
+#pragma mark -------------------- Write tests -------------------------
+
+int test_attach_renderbuffer_write_to_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
+                     size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, MTdata d, void **outSourceBuffer, bool supports_half )
+{
+    int error;
+
+    // Create a CL image from the supplied GL renderbuffer
+    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_WRITE_ONLY, glRenderbuffer, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL renderbuffer" );
+        return error;
+    }
+
+    return test_cl_image_write( context, queue, glTarget, image, imageWidth,
+    imageHeight, 1, outFormat, outType, outSourceBuffer, d, supports_half );
+}
+
+int test_renderbuffer_image_write( cl_context context, cl_command_queue queue,
+                                   GLsizei width, GLsizei height, GLenum attachment,
+                                   GLenum format, GLenum internalFormat,
+                                     GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+    if( type == kHalf )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+
+    // Create the GL renderbuffer
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    CreateGLRenderbuffer( width, height, attachment, format, internalFormat, glType, type, &glFramebuffer, &glRenderbuffer, &error, d, false );
+    if( error != 0 )
+    {
+        if ((format == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport()))
+        {
+            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. Skipping test.\n");
+            return 0;
+        }
+        else
+        {
+            return error;
+        }
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType sourceType;
+    ExplicitType validationType;
+    void *outSourceBuffer;
+
+    bool supports_half = false;
+    error = supportsHalf(context, &supports_half);
+    if( error != 0 )
+        return error;
+
+    error = test_attach_renderbuffer_write_to_image( context, queue, attachment, glRenderbuffer, width, height, &clFormat, &sourceType, d, (void **)&outSourceBuffer, supports_half );
+    if( error != 0 || ((sourceType == kHalf ) && !supports_half))
+        return error;
+
+    // If actual source type was half, convert to float for validation.
+    if( sourceType == kHalf )
+        validationType = kFloat;
+    else
+        validationType = sourceType;
+
+    BufferOwningPtr<char> validationSource( convert_to_expected( outSourceBuffer, width * height, sourceType, validationType, get_channel_order_channel_count(clFormat.image_channel_order) ) );
+
+    log_info( "- Write [%4d x %4d] : GL Renderbuffer : %s : %s : %s => CL Image : %s : %s \n", width, height,
+                    GetGLFormatName( format ), GetGLFormatName( internalFormat ), GetGLTypeName( glType),
+                    GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+
+    // Now read the results from the GL renderbuffer
+    BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer, attachment, format, internalFormat, glType, type, width, height ) );
+
+#ifdef DEBUG
+    log_info("- start result data -- \n");
+    DumpGLBuffer(glType, width, height, resultData);
+    log_info("- end result data -- \n");
+#endif
+
+    // We have to convert our input buffer to the returned type, so we can validate.
+    BufferOwningPtr<char> convertedData( convert_to_expected( resultData, width * height, type, validationType, get_channel_order_channel_count(clFormat.image_channel_order) ) );
+
+#ifdef DEBUG
+    log_info("- start input data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(validationType), width, height, validationSource);
+    log_info("- end input data -- \n");
+#endif
+
+#ifdef DEBUG
+    log_info("- start converted data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(validationType), width, height, convertedData);
+    log_info("- end converted data -- \n");
+#endif
+
+    // Now we validate
+    int valid = 0;
+    if(convertedData) {
+        if( sourceType == kFloat || sourceType == kHalf )
+            valid = validate_float_results( validationSource, convertedData, width, height, 1, get_channel_order_channel_count(clFormat.image_channel_order) );
+        else
+            valid = validate_integer_results( validationSource, convertedData, width, height, 1, get_explicit_type_size( type ) );
+    }
+
+    return valid;
+}
+
+int test_renderbuffer_write( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+
+// Renderbuffers with integer formats do not seem to work reliably across
+// platforms/implementations. Disabling this in version 1.0 of CL conformance tests.
+
+#ifdef TEST_INTEGER_FORMATS
+
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+#endif
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat },
+        { GL_RGBA16F_ARB,  GL_RGBA,             GL_HALF_FLOAT,               kHalf }
+    };
+
+    size_t fmtIdx, attIdx;
+    int error = 0;
+    size_t iter = 6;
+#ifdef DEBUG
+    iter = 1;
+#endif
+    RandomSeed seed( gRandomSeed );
+
+  // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    if( !gluCheckExtension( (const GLubyte *)"GL_EXT_framebuffer_object", glGetString( GL_EXTENSIONS ) ) )
+    {
+        log_info( "Renderbuffers are not supported by this OpenGL implementation; skipping test\n" );
+        return 0;
+    }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( attIdx = 0; attIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); attIdx++ )
+        {
+            log_info( "Testing Renderbuffer write test for %s : %s : %s : %s\n",
+                GetGLAttachmentName( attachments[ attIdx ] ),
+                GetGLFormatName( formats[ fmtIdx ].internal ),
+                GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            size_t i;
+            for( i = 0; i < iter; i++ )
+            {
+                GLsizei width = random_in_range( 16, 512, seed );
+                GLsizei height = random_in_range( 16, 512, seed );
+#ifdef DEBUG
+                width = height = 4;
+#endif
+
+                if( test_renderbuffer_image_write( context, queue, width, height,
+                                                   attachments[ attIdx ],
+                                                   formats[ fmtIdx ].format,
+                                                   formats[ fmtIdx ].internal,
+                                                   formats[ fmtIdx ].datatype,
+                                                   formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Renderbuffer write test failed for %s : %s : %s : %s\n\n",
+                          GetGLAttachmentName( attachments[ attIdx ] ),
+                          GetGLFormatName( formats[ fmtIdx ].internal ),
+                          GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                          GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Renderbuffer write test passed for %s : %s : %s : %s\n\n",
+                          GetGLAttachmentName( attachments[ attIdx ] ),
+                          GetGLFormatName( formats[ fmtIdx ].internal ),
+                          GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                          GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            }
+        }
+    }
+
+    return error;
+}
diff --git a/test_conformance/gl/test_renderbuffer_info.cpp b/test_conformance/gl/test_renderbuffer_info.cpp
new file mode 100644
index 00000000..61a8adfa
--- /dev/null
+++ b/test_conformance/gl/test_renderbuffer_info.cpp
@@ -0,0 +1,133 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#if defined( __APPLE__ )
+#include <OpenGL/glu.h>
+#else
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
+#endif
+
+extern "C" {extern cl_uint gRandomSeed;};
+
+static int test_renderbuffer_object_info( cl_context context, cl_command_queue queue,
+                                          GLsizei width, GLsizei height, GLenum attachment,
+                                          GLenum format, GLenum internalFormat,
+                                          GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+    if( type == kHalf )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+
+    // Create the GL render buffer
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    BufferOwningPtr<char> inputBuffer(CreateGLRenderbuffer( width, height, attachment, format, internalFormat, glType, type, &glFramebuffer, &glRenderbuffer, &error, d, true ));
+    if( error != 0 )
+        return error;
+
+    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)(context, CL_MEM_READ_ONLY, glRenderbuffer, &error);
+    test_error(error, "clCreateFromGLRenderbuffer failed");
+
+    log_info( "- Given a GL format of %s, input type was %s, size was %d x %d\n",
+              GetGLFormatName( internalFormat ),
+              get_explicit_type_name( type ), (int)width, (int)height );
+
+    // Verify the expected information here.
+    return CheckGLObjectInfo(image, CL_GL_OBJECT_RENDERBUFFER, (GLuint)glRenderbuffer, internalFormat, 0);
+}
+
+int test_renderbuffer_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,             kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat },
+        { GL_RGBA16F_ARB,  GL_RGBA,             GL_HALF_FLOAT,               kHalf }
+    };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed(gRandomSeed);
+
+  // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    if( !gluCheckExtension( (const GLubyte *)"GL_EXT_framebuffer_object", glGetString( GL_EXTENSIONS ) ) )
+    {
+        log_info( "Renderbuffers are not supported by this OpenGL implementation; skipping test\n" );
+        return 0;
+    }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); tgtIdx++ )
+        {
+            log_info( "Testing Renderbuffer object info for %s : %s : %s\n",
+                GetGLFormatName( formats[ fmtIdx ].internal ),
+                GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            size_t i;
+            for( i = 0; i < iter; i++ )
+            {
+                GLsizei width = random_in_range( 16, 512, seed );
+                GLsizei height = random_in_range( 16, 512, seed );
+
+                if( test_renderbuffer_object_info( context, queue, (int)width, (int)height,
+                                                   attachments[ tgtIdx ],
+                                                   formats[ fmtIdx ].format,
+                                                   formats[ fmtIdx ].internal,
+                                                   formats[ fmtIdx ].datatype,
+                                                   formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Renderbuffer write test failed for GL format %s : %s\n\n",
+                        GetGLFormatName( formats[ fmtIdx ].internal ),
+                        GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Renderbuffer write test passed for GL format %s : %s\n\n",
+                    GetGLFormatName( formats[ fmtIdx ].internal ),
+                    GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            }
+        }
+    }
+
+    return error;
+}
+
diff --git a/test_conformance/gles/CMakeLists.txt b/test_conformance/gles/CMakeLists.txt
new file mode 100644
index 00000000..e64b58e8
--- /dev/null
+++ b/test_conformance/gles/CMakeLists.txt
@@ -0,0 +1,34 @@
+set (MODULE_NAME GLES)
+
+set (${MODULE_NAME}_SOURCES
+        main.cpp
+        test_buffers.cpp
+        test_images_2D.cpp
+        test_images_3D.cpp
+        test_renderbuffer.cpp
+        test_images_2D_info.cpp
+        test_images_3D_info.cpp
+        test_renderbuffer_info.cpp
+        test_fence_sync.cpp
+        helpers.cpp
+        setup_egl.cpp
+        ../../test_common/gles/helpers.cpp
+        ../../test_common/harness/genericThread.cpp
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/threadTesting.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/conversions.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/parseParameters.cpp
+    )
+
+if(ANDROID)
+    list(APPEND CLConform_LIBRARIES GLESv2)
+elseif(WIN32)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGLES3")
+    list(APPEND CLConform_LIBRARIES libEGL libGLESv2 )
+endif(ANDROID)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/gles/helpers.cpp b/test_conformance/gles/helpers.cpp
new file mode 100644
index 00000000..dfff21f5
--- /dev/null
+++ b/test_conformance/gles/helpers.cpp
@@ -0,0 +1,481 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include "gl_headers.h"
+
+const char *get_kernel_suffix( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_FLOAT:
+            return "f";
+        case CL_HALF_FLOAT:
+            return "h";
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+            return "i";
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+            return "ui";
+        default:
+            return "";
+    }
+}
+
+ExplicitType get_read_kernel_type( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_FLOAT:
+            return kFloat;
+        case CL_HALF_FLOAT:
+            return kHalf;
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+            return kInt;
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+            return kUInt;
+        default:
+            return kInt;
+    }
+}
+
+ExplicitType get_write_kernel_type( cl_image_format *format )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+            return kFloat;
+        case CL_UNORM_INT16:
+            return kFloat;
+        case CL_SNORM_INT8:
+            return kFloat;
+        case CL_SNORM_INT16:
+            return kFloat;
+        case CL_HALF_FLOAT:
+            return kHalf;
+        case CL_FLOAT:
+            return kFloat;
+        case CL_SIGNED_INT8:
+            return kChar;
+        case CL_SIGNED_INT16:
+            return kShort;
+        case CL_SIGNED_INT32:
+            return kInt;
+        case CL_UNSIGNED_INT8:
+            return kUChar;
+        case CL_UNSIGNED_INT16:
+            return kUShort;
+        case CL_UNSIGNED_INT32:
+            return kUInt;
+        default:
+            return kInt;
+    }
+}
+
+const char* get_write_conversion( cl_image_format *format, ExplicitType type )
+{
+    switch( format->image_channel_data_type )
+    {
+        case CL_UNORM_INT8:
+        case CL_UNORM_INT16:
+        case CL_SNORM_INT8:
+        case CL_SNORM_INT16:
+        case CL_FLOAT:
+            if(type != kFloat) return "convert_float4";
+            break;
+        case CL_HALF_FLOAT:
+            break;
+        case CL_SIGNED_INT8:
+        case CL_SIGNED_INT16:
+        case CL_SIGNED_INT32:
+            if(type != kInt) return "convert_int4";
+            break;
+        case CL_UNSIGNED_INT8:
+        case CL_UNSIGNED_INT16:
+        case CL_UNSIGNED_INT32:
+            if(type != kUInt) return "convert_uint4";
+            break;
+        default:
+            return "";
+    }
+    return "";
+}
+
+// The only three input types to this function are kInt, kUInt and kFloat, due to the way we set up our tests
+// The output types, though, are pretty much anything valid for GL to receive
+
+#define DOWNSCALE_INTEGER_CASE( enum, type, bitShift )    \
+    case enum:    \
+    {        \
+        cl_##type *dst = new cl_##type[ numPixels * 4 ]; \
+        for( size_t i = 0; i < numPixels * 4; i++ ) \
+            dst[ i ] = src[ i ];    \
+        return (char *)dst;        \
+    }
+
+#define UPSCALE_FLOAT_CASE( enum, type, typeMax )    \
+    case enum:    \
+    {        \
+        cl_##type *dst = new cl_##type[ numPixels * 4 ]; \
+        for( size_t i = 0; i < numPixels * 4; i++ ) \
+            dst[ i ] = (cl_##type)( src[ i ] * typeMax );    \
+        return (char *)dst;        \
+    }
+
+char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType inType, ExplicitType outType )
+{
+#ifdef GLES_DEBUG
+    log_info( "- Converting from input type '%s' to output type '%s'\n",
+             get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+#endif
+
+    if( inType == outType )
+    {
+        char *outData = new char[ numPixels * 4 * get_explicit_type_size(outType) ] ; // sizeof( cl_int ) ];
+        memcpy( outData, inputBuffer, numPixels * 4 * get_explicit_type_size(inType)  );
+        return outData;
+    }
+    else if( inType == kChar )
+    {
+        cl_char *src = (cl_char *)inputBuffer;
+
+        switch( outType )
+        {
+            case kInt:
+            {
+                cl_int *outData = new cl_int[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_int)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_float)src[ i ] / 127.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kUChar )
+    {
+        cl_uchar *src = (cl_uchar *)inputBuffer;
+
+        switch( outType )
+        {
+            case kUInt:
+            {
+                cl_uint *outData = new cl_uint[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_uint)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_float)(src[ i ]) / 256.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kShort )
+    {
+        cl_short *src = (cl_short *)inputBuffer;
+
+        switch( outType )
+        {
+            case kInt:
+            {
+                cl_int *outData = new cl_int[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_int)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_float)src[ i ] / 32768.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kUShort )
+    {
+        cl_ushort *src = (cl_ushort *)inputBuffer;
+
+        switch( outType )
+        {
+            case kUInt:
+            {
+                cl_uint *outData = new cl_uint[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_uint)((src[ i ]));
+                }
+                return (char *)outData;
+            }
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_float)(src[ i ]) / 65535.0f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kInt )
+    {
+        cl_int *src = (cl_int *)inputBuffer;
+
+        switch( outType )
+        {
+                DOWNSCALE_INTEGER_CASE( kShort, short, 16 )
+                DOWNSCALE_INTEGER_CASE( kChar, char, 24 )
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_float)fmaxf( (float)src[ i ] / 2147483647.f, -1.f );
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else if( inType == kUInt )
+    {
+        cl_uint *src = (cl_uint *)inputBuffer;
+
+        switch( outType )
+        {
+                DOWNSCALE_INTEGER_CASE( kUShort, ushort, 16 )
+                DOWNSCALE_INTEGER_CASE( kUChar, uchar, 24 )
+            case kFloat:
+            {
+                // If we're converting to float, then CL decided that we should be normalized
+                cl_float *outData = new cl_float[ numPixels * 4 ];
+                for( size_t i = 0; i < numPixels * 4; i++ )
+                {
+                    outData[ i ] = (cl_float)src[ i ] / 4294967295.f;
+                }
+                return (char *)outData;
+            }
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+    else
+    {
+        cl_float *src = (cl_float *)inputBuffer;
+
+        switch( outType )
+        {
+                UPSCALE_FLOAT_CASE( kChar, char, 127.f )
+                UPSCALE_FLOAT_CASE( kUChar, uchar, 255.f )
+                UPSCALE_FLOAT_CASE( kShort, short, 32767.f )
+                UPSCALE_FLOAT_CASE( kUShort, ushort, 65535.f )
+                UPSCALE_FLOAT_CASE( kInt, int, 2147483647.f )
+                UPSCALE_FLOAT_CASE( kUInt, uint, 4294967295.f )
+            default:
+                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                return NULL;
+        }
+    }
+
+    return NULL;
+}
+
+int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t typeSize )
+{
+    return validate_integer_results( expectedResults, actualResults, width, height, 0, typeSize );
+}
+
+int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t typeSize )
+{
+    char *expected = (char *)expectedResults;
+    char *actual = (char *)actualResults;
+    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    {
+        for( size_t y = 0; y < height; y++ )
+        {
+            for( size_t x = 0; x < width; x++ )
+            {
+                if( memcmp( expected, actual, typeSize * 4 ) != 0 )
+                {
+                    char scratch[ 1024 ];
+
+                    if( depth == 0 )
+                        log_error( "ERROR: Data sample %d,%d did not validate!\n", (int)x, (int)y );
+                    else
+                        log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)z );
+                    log_error( "\tExpected: %s\n", GetDataVectorString( expected, typeSize, 4, scratch ) );
+                    log_error( "\t  Actual: %s\n", GetDataVectorString( actual, typeSize, 4, scratch ) );
+                    return -1;
+                }
+                expected += typeSize * 4;
+                actual += typeSize * 4;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height )
+{
+    return validate_float_results( expectedResults, actualResults, width, height, 0 );
+}
+
+int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth )
+{
+    cl_float *expected = (cl_float *)expectedResults;
+    cl_float *actual = (cl_float *)actualResults;
+    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    {
+        for( size_t y = 0; y < height; y++ )
+        {
+            for( size_t x = 0; x < width; x++ )
+            {
+                float err = 0.f;
+                for( size_t i = 0; i < 4; i++ )
+                {
+                    float error = fabsf( expected[ i ] - actual[ i ] );
+                    if( error > err )
+                        err = error;
+                }
+
+                if( err > 1.f / 127.f ) // Max expected range of error if we converted from an 8-bit integer to a normalized float
+                {
+                    if( depth == 0 )
+                        log_error( "ERROR: Data sample %d,%d did not validate!\n", (int)x, (int)y );
+                    else
+                        log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)z );
+                    log_error( "\tExpected: %f %f %f %f\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                    log_error( "\t        : %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                    log_error( "\t  Actual: %f %f %f %f\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                    log_error( "\t        : %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                    return -1;
+                }
+                expected += 4;
+                actual += 4;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type, GLuint expected_gl_name,
+                  GLenum expected_cl_gl_texture_target, GLint expected_cl_gl_mipmap_level)
+{
+  cl_gl_object_type object_type;
+  GLuint object_name;
+  GLenum texture_target;
+  GLint mipmap_level;
+    int error;
+
+  error = (*clGetGLObjectInfo_ptr)(mem, &object_type, &object_name);
+  test_error( error, "clGetGLObjectInfo failed");
+  if (object_type != expected_cl_gl_type) {
+    log_error("clGetGLObjectInfo did not return expected object type: expected %d, got %d.\n", expected_cl_gl_type, object_type);
+    return -1;
+  }
+  if (object_name != expected_gl_name) {
+    log_error("clGetGLObjectInfo did not return expected object name: expected %d, got %d.\n", expected_gl_name, object_name);
+    return -1;
+  }
+
+  if (object_type == CL_GL_OBJECT_TEXTURE2D || object_type == CL_GL_OBJECT_TEXTURE3D) {
+       error = (*clGetGLTextureInfo_ptr)(mem, CL_GL_TEXTURE_TARGET, sizeof(texture_target), &texture_target, NULL);
+    test_error( error, "clGetGLTextureInfo for CL_GL_TEXTURE_TARGET failed");
+
+    if (texture_target != expected_cl_gl_texture_target) {
+      log_error("clGetGLTextureInfo did not return expected texture target: expected %d, got %d.\n", expected_cl_gl_texture_target, texture_target);
+      return -1;
+    }
+
+       error = (*clGetGLTextureInfo_ptr)(mem, CL_GL_MIPMAP_LEVEL, sizeof(mipmap_level), &mipmap_level, NULL);
+    test_error( error, "clGetGLTextureInfo for CL_GL_MIPMAP_LEVEL failed");
+
+    if (mipmap_level != expected_cl_gl_mipmap_level) {
+      log_error("clGetGLTextureInfo did not return expected mipmap level: expected %d, got %d.\n", expected_cl_gl_mipmap_level, mipmap_level);
+      return -1;
+    }
+  }
+  return 0;
+}
+
+bool CheckGLIntegerExtensionSupport()
+{
+    // Get the OpenGL version and supported extensions
+    const GLubyte *glVersion = glGetString(GL_VERSION);
+    const GLubyte *glExtensionList = glGetString(GL_EXTENSIONS);
+
+    // Check if the OpenGL vrsion is 3.0 or grater or GL_EXT_texture_integer is supported
+    return (((glVersion[0] - '0') >= 3) || (strstr((const char*)glExtensionList, "GL_EXT_texture_integer")));
+}
diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
new file mode 100644
index 00000000..e7aeb938
--- /dev/null
+++ b/test_conformance/gles/main.cpp
@@ -0,0 +1,402 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#endif
+
+#include "procs.h"
+#include "../../test_common/gles/setup.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+static cl_context        sCurrentContext = NULL;
+
+
+#define TEST_FN_REDIRECT( fn )    redirect_##fn
+#define TEST_FN_REDIRECTOR( fn ) \
+int redirect_##fn(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )    \
+{ \
+    int error; \
+    clCommandQueueWrapper realQueue = clCreateCommandQueue( sCurrentContext, device, 0, &error ); \
+    test_error( error, "Unable to create command queue" );    \
+    return fn( device, sCurrentContext, realQueue, numElements ); \
+}
+
+TEST_FN_REDIRECTOR( test_buffers )
+TEST_FN_REDIRECTOR( test_buffers_getinfo )
+TEST_FN_REDIRECTOR( test_images_read )
+TEST_FN_REDIRECTOR( test_images_2D_getinfo )
+TEST_FN_REDIRECTOR( test_images_read_cube )
+TEST_FN_REDIRECTOR( test_images_cube_getinfo )
+TEST_FN_REDIRECTOR( test_images_read_3D )
+TEST_FN_REDIRECTOR( test_images_3D_getinfo )
+TEST_FN_REDIRECTOR( test_images_write )
+TEST_FN_REDIRECTOR( test_images_write_cube )
+TEST_FN_REDIRECTOR( test_renderbuffer_read )
+TEST_FN_REDIRECTOR( test_renderbuffer_write )
+TEST_FN_REDIRECTOR( test_renderbuffer_getinfo )
+
+#ifndef GL_ES_VERSION_2_0
+TEST_FN_REDIRECTOR( test_fence_sync )
+#endif
+
+basefn    basefn_list[] = {
+    TEST_FN_REDIRECT( test_buffers ),
+  TEST_FN_REDIRECT( test_buffers_getinfo ),
+    TEST_FN_REDIRECT( test_images_read ),
+  TEST_FN_REDIRECT( test_images_2D_getinfo ),
+    TEST_FN_REDIRECT( test_images_read_cube ),
+  TEST_FN_REDIRECT( test_images_cube_getinfo ),
+    TEST_FN_REDIRECT( test_images_read_3D ),
+  TEST_FN_REDIRECT( test_images_3D_getinfo ),
+    TEST_FN_REDIRECT( test_images_write ),
+    TEST_FN_REDIRECT( test_images_write_cube ),
+    TEST_FN_REDIRECT( test_renderbuffer_read ),
+     TEST_FN_REDIRECT( test_renderbuffer_write ),
+  TEST_FN_REDIRECT( test_renderbuffer_getinfo )
+};
+
+#ifndef GL_ES_VERSION_2_0
+basefn    basefn_list32[] = {
+    TEST_FN_REDIRECT( test_fence_sync )
+};
+#endif
+
+const char    *basefn_names[] = {
+    "buffers",
+  "buffers_getinfo",
+    "images_read",
+  "images_2D_getinfo",
+    "images_read_cube",
+  "images_cube_getinfo",
+    "images_read_3D",
+  "images_3D_getinfo",
+    "images_write",
+    "images_write_cube",
+    "renderbuffer_read",
+    "renderbuffer_write",
+  "renderbuffer_getinfo",
+    "all"
+};
+
+const char    *basefn_names32[] = {
+    "fence_sync",
+  "all"
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+int num_fns32 = sizeof(basefn_names32) / sizeof(char *);
+
+
+int main(int argc, const char *argv[])
+{
+  int error = 0;
+  cl_platform_id platform_id = NULL;
+  /* To keep it simple, use a static allocation of 32 argv pointers.
+     argc is not expected to go beyond 32 */
+  const char* argv_tmp[32] = {0};
+  int argc_tmp = 0;
+
+    test_start();
+
+  cl_device_type requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+    for(int z = 1; z < argc; ++z)
+    {//for
+    if(strcmp( argv[ z ], "-list" ) == 0 )
+    {
+        log_info( "Available 2.x tests:\n" );
+        for( int i = 0; i < num_fns - 1; i++ )
+            log_info( "\t%s\n", basefn_names[ i ] );
+
+        log_info( "Available 3.2 tests:\n" );
+        for( int i = 0; i < num_fns32 - 1; i++ )
+            log_info( "\t%s\n", basefn_names32[ i ] );
+
+    log_info( "Note: Any 3.2 test names must follow 2.1 test names on the command line." );
+    log_info( "Use environment variables to specify desired device." );
+
+        test_finish();
+        return 0;
+    }
+
+    /* support requested device type */
+        if(!strcmp(argv[z], "CL_DEVICE_TYPE_GPU"))
+        {
+           printf("Requested device type is CL_DEVICE_TYPE_GPU\n");
+           requestedDeviceType = CL_DEVICE_TYPE_GPU;
+        }
+        else
+        if(!strcmp(argv[z], "CL_DEVICE_TYPE_CPU"))
+        {
+           printf("Requested device type is CL_DEVICE_TYPE_CPU\n");
+           log_info("Invalid CL device type. GL tests can only run on a GPU device.\n");
+           test_finish();
+           return 0;
+        }
+    }//for
+
+  // Check to see if any 2.x or 3.2 test names were specified on the command line.
+  unsigned first_32_testname = 0;
+
+  for (int j=1; (j<argc) && (!first_32_testname); ++j)
+    for (int i=0;i<num_fns32-1;++i)
+      if (strcmp(basefn_names32[i],argv[j])==0) {
+        first_32_testname = j;
+        break;
+      }
+
+  // Create the environment for the test.
+    GLEnvironment *glEnv = GLEnvironment::Instance();
+
+  // Check if any devices of the requested type support CL/GL interop.
+  int supported = glEnv->SupportsCLGLInterop( requestedDeviceType );
+  if( supported == 0 ) {
+    log_info("Test not run because GL-CL interop is not supported for any devices of the requested type.\n");
+    test_finish();
+    error = 0;
+    goto cleanup;
+  } else if ( supported == -1 ) {
+    log_error("Failed to determine if CL-GL interop is supported.\n");
+    test_finish();
+    error = -1;
+    goto cleanup;
+  }
+
+  // OpenGL tests for non-3.2 ////////////////////////////////////////////////////////
+  if ((argc == 1) || (first_32_testname != 1)) {
+
+    // At least one device supports CL-GL interop, so init the test.
+    if( glEnv->Init( &argc, (char **)argv, CL_FALSE ) ) {
+      log_error("Failed to initialize the GL environment for this test.\n");
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+
+    // Create a context to use and then grab a device (or devices) from it
+    sCurrentContext = glEnv->CreateCLContext();
+    if( sCurrentContext == NULL )
+      {
+        log_error( "ERROR: Unable to obtain CL context from GL\n" );
+        test_finish();
+        error = -1;
+        goto cleanup;
+      }
+
+    size_t numDevices = 0;
+    cl_device_id deviceIDs[ 16 ];
+
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL, &numDevices);
+    if( error != CL_SUCCESS )
+      {
+        print_error( error, "Unable to get device count from context" );
+        test_finish();
+        error = -1;
+        goto cleanup;
+      }
+    numDevices /= sizeof(cl_device_id);
+
+    if (numDevices < 1) {
+      log_error("No devices found.\n");
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, sizeof( deviceIDs ), deviceIDs, NULL);
+    if( error != CL_SUCCESS ) {
+      print_error( error, "Unable to get device list from context" );
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+
+    // Execute tests.
+    int argc_ = (first_32_testname) ? first_32_testname : argc;
+
+      for( size_t i = 0; i < numDevices; i++ ) {
+        log_info( "\nTesting OpenGL 2.x\n" );
+        if( printDeviceHeader( deviceIDs[ i ] ) != CL_SUCCESS ) {
+          test_finish();
+          error = -1;
+          goto cleanup;
+        }
+
+        error = clGetDeviceInfo(deviceIDs[ i ],
+                                CL_DEVICE_PLATFORM,
+                                sizeof(platform_id),
+                                &platform_id,
+                                NULL);
+        if(error)
+        {
+          goto cleanup;
+        }
+
+        error = init_clgl_ext(platform_id);
+        if (error < 0)
+        {
+          goto cleanup;
+        }
+
+        /* parseAndCallCommandLineTests considers every command line argument
+           as a test name. This results in the test failing because of considering
+           args such as 'CL_DEVICE_TYPE_GPU' as test names unless
+           the actual test name happens to be the first argument.
+           Instead of changing the behaviour of parseAndCallCommandLineTests
+           modify the arguments passed to it so as to not affect other tests.
+        */
+    int w = 1;
+    argc_tmp= argc_;
+    for(int k = 1; k < argc; k++)
+    {
+        if( (strcmp(argv[k], "full") == 0) ||
+            (strcmp(argv[k], "CL_DEVICE_TYPE_CPU") == 0) ||
+            (strcmp(argv[k], "CL_DEVICE_TYPE_GPU") == 0))
+        {
+            argc_tmp--;
+            continue;
+        }
+        else
+        {
+            argv_tmp[w++] = argv[k];
+        }
+    }
+
+        // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
+        error = parseAndCallCommandLineTests( argc_tmp, argv_tmp, deviceIDs[ i ], num_fns, basefn_list, basefn_names, true, 0, 1024 );
+        if( error != 0 )
+          break;
+    }
+
+    // Clean-up.
+      // We move this to a common cleanup step to make sure that things will be released properly before the test exit
+      goto cleanup;
+      // clReleaseContext( sCurrentContext );
+      // delete glEnv;
+  }
+
+  // OpenGL 3.2 tests. ////////////////////////////////////////////////////////
+  if ((argc==1) || first_32_testname) {
+
+    // At least one device supports CL-GL interop, so init the test.
+    if( glEnv->Init( &argc, (char **)argv, CL_TRUE ) ) {
+      log_error("Failed to initialize the GL environment for this test.\n");
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+
+    // Create a context to use and then grab a device (or devices) from it
+    sCurrentContext = glEnv->CreateCLContext();
+    if( sCurrentContext == NULL ) {
+      log_error( "ERROR: Unable to obtain CL context from GL\n" );
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+
+    size_t numDevices = 0;
+    cl_device_id deviceIDs[ 16 ];
+
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL, &numDevices);
+    if( error != CL_SUCCESS ) {
+      print_error( error, "Unable to get device count from context" );
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+    numDevices /= sizeof(cl_device_id);
+
+    if (numDevices < 1) {
+      log_error("No devices found.\n");
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+
+    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, sizeof( deviceIDs ), deviceIDs, NULL);
+    if( error != CL_SUCCESS ) {
+      print_error( error, "Unable to get device list from context" );
+      test_finish();
+      error = -1;
+      goto cleanup;
+    }
+
+    int argc_ = (first_32_testname) ? 1 + (argc - first_32_testname) : argc;
+    const char** argv_ = (first_32_testname) ? &argv[first_32_testname-1] : argv;
+
+    // Execute the tests.
+      for( size_t i = 0; i < numDevices; i++ ) {
+        log_info( "\nTesting OpenGL 3.2\n" );
+        if( printDeviceHeader( deviceIDs[ i ] ) != CL_SUCCESS ) {
+          test_finish();
+          error = -1;
+          goto cleanup;
+        }
+#ifdef GL_ES_VERSION_2_0
+        log_info("Cannot test OpenGL 3.2! This test was built for OpenGL ES 2.0\n");
+        test_finish();
+        error = -1;
+        goto cleanup;
+#else
+        // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
+        error = parseAndCallCommandLineTests( argc_, argv_, deviceIDs[ i ], num_fns32, basefn_list32, basefn_names32, true, 0, 1024 );
+        if( error != 0 )
+          break;
+#endif
+    }
+
+    // Converge on a common cleanup to make sure that things will be released properly before the test exit
+    goto cleanup;
+  }
+
+
+// cleanup CL/GL/EGL environment properly when the test exit.
+// This change does not affect any functionality of the test
+
+// Intentional falling through
+cleanup:
+
+    // Cleanup EGL
+    glEnv->terminate_egl_display();
+
+    // Always make sure that OpenCL context is released properly when the test exit
+    if(sCurrentContext)
+    {
+        clReleaseContext( sCurrentContext );
+        sCurrentContext = NULL;
+    }
+    delete glEnv;
+
+
+    return error;
+}
\ No newline at end of file
diff --git a/test_conformance/gles/procs.h b/test_conformance/gles/procs.h
new file mode 100644
index 00000000..9df80d6c
--- /dev/null
+++ b/test_conformance/gles/procs.h
@@ -0,0 +1,35 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/mt19937.h"
+
+
+extern int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_images_create( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_images_read( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_images_2D_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_images_read_cube( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_images_cube_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_images_read_3D( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_images_3D_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_images_write( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_images_write_cube( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_renderbuffer_write( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_renderbuffer_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+
diff --git a/test_conformance/gles/setup_egl.cpp b/test_conformance/gles/setup_egl.cpp
new file mode 100644
index 00000000..c72f7f27
--- /dev/null
+++ b/test_conformance/gles/setup_egl.cpp
@@ -0,0 +1,215 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "setup.h"
+#include "testBase.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include <assert.h>
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+
+#define EGLERR() \
+    assert(eglGetError() == EGL_SUCCESS); \
+
+#define MAX_DEVICES 10
+
+class EGLGLEnvironment : public GLEnvironment
+{
+private:
+    cl_platform_id _platform;
+    EGLDisplay     _display;
+    EGLContext     _context;
+    EGLSurface     _surface;
+
+public:
+    EGLGLEnvironment()
+        :_platform(NULL)
+        ,_display(EGL_NO_DISPLAY)
+        ,_context(NULL)
+        ,_surface(EGL_NO_SURFACE)
+    {
+    }
+
+    virtual int Init( int *argc, char **argv, int use_opengl_32 )
+    {
+        EGLint ConfigAttribs[] =
+        {
+            EGL_RED_SIZE,        8,
+            EGL_GREEN_SIZE,     8,
+            EGL_BLUE_SIZE,        8,
+            EGL_ALPHA_SIZE,     8,
+            EGL_DEPTH_SIZE,     16,
+            EGL_SURFACE_TYPE,    EGL_PBUFFER_BIT,
+//            EGL_BIND_TO_TEXTURE_RGBA, EGL_TRUE,
+            EGL_NONE
+        };
+
+        static const EGLint ContextAttribs[] =
+        {
+            EGL_CONTEXT_CLIENT_VERSION, 2,
+            EGL_NONE
+        };
+
+        EGLint conf_list[] = {
+                     EGL_WIDTH,  512,
+                     EGL_HEIGHT, 512,
+                     EGL_TEXTURE_FORMAT, EGL_TEXTURE_RGBA,
+                     EGL_TEXTURE_TARGET, EGL_TEXTURE_2D,
+                     EGL_NONE};
+
+        EGLint        majorVersion;
+        EGLint        minorVersion;
+        EGLConfig    config;
+        EGLint      numConfigs;
+
+        EGLERR();
+        _display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+        EGLERR();
+
+        eglInitialize(_display, &majorVersion, &minorVersion);
+        EGLERR();
+
+        eglBindAPI(EGL_OPENGL_ES_API);
+        EGLERR();
+
+        eglChooseConfig(_display, ConfigAttribs, &config, 1, &numConfigs);
+        EGLERR();
+
+        _context = eglCreateContext(_display, config, NULL, ContextAttribs);
+        EGLERR();
+
+        _surface = eglCreatePbufferSurface(_display, config, conf_list);
+        EGLERR();
+
+        eglMakeCurrent(_display, _surface, _surface, _context);
+        EGLERR();
+
+        return 0;
+    }
+
+    virtual cl_context CreateCLContext( void )
+    {
+        cl_context_properties properties[] = {
+            CL_CONTEXT_PLATFORM, (cl_context_properties) _platform,
+            CL_GL_CONTEXT_KHR,   (cl_context_properties) _context,
+            CL_EGL_DISPLAY_KHR,  (cl_context_properties) _display,
+            0
+        };
+        cl_device_id devices[MAX_DEVICES];
+        size_t dev_size;
+        cl_int status;
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_DEVICES_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return NULL;
+        }
+        dev_size /= sizeof(cl_device_id);
+        log_info("GL _context supports %d compute devices\n", dev_size);
+
+        status = clGetGLContextInfoKHR(properties,
+                                       CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
+                                       sizeof(devices),
+                                       devices,
+                                       &dev_size);
+        if (status != CL_SUCCESS) {
+            print_error(status, "clGetGLContextInfoKHR failed");
+            return NULL;
+        }
+
+        if (!dev_size)
+        {
+            log_info("GL _context current device is not a CL device.\n");
+            return NULL;
+        }
+
+        return clCreateContext(properties, 1, &devices[0], NULL, NULL, &status);
+    }
+
+    virtual int SupportsCLGLInterop( cl_device_type device_type )
+    {
+        cl_device_id devices[MAX_DEVICES];
+        cl_uint num_of_devices;
+        int interop_devices = 0;
+        int error;
+
+        error = clGetPlatformIDs(1, &_platform, NULL);
+        if (error) {
+            print_error(error, "clGetPlatformIDs failed");
+            return -1;
+        }
+
+        error = clGetDeviceIDs(_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
+        if (error) {
+            print_error(error, "clGetDeviceIDs failed");
+            return -1;
+        }
+
+        // Check all devices, search for one that supports cl_khr_gl_sharing
+        char extensions[8192];
+        for (int i=0; i<(int)num_of_devices; i++) {
+            error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
+            if (error) {
+                print_error(error, "clGetDeviceInfo failed");
+                return -1;
+            }
+
+            if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
+                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            } else {
+                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+                interop_devices++;
+            }
+        }
+        return interop_devices > 0;
+    }
+
+    // Change to cleanup egl environment properly when the test exit.
+    // This change does not affect any functionality of the test it self
+    virtual void terminate_egl_display()
+    {
+        if(_display != EGL_NO_DISPLAY)
+        {
+            eglMakeCurrent(_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
+            EGLERR();
+
+            eglDestroyContext(_display, _context);
+            EGLERR();
+            _context = EGL_NO_CONTEXT;
+
+            eglDestroySurface(_display, _surface);
+            EGLERR();
+            _surface = EGL_NO_SURFACE;
+
+            eglTerminate(_display);
+            EGLERR();
+            _display = EGL_NO_DISPLAY;
+        }
+    }
+
+    virtual ~EGLGLEnvironment()
+    {
+    }
+};
+
+GLEnvironment * GLEnvironment::Instance( void )
+{
+    return new EGLGLEnvironment();
+}
diff --git a/test_conformance/gles/testBase.h b/test_conformance/gles/testBase.h
new file mode 100644
index 00000000..2bb59a61
--- /dev/null
+++ b/test_conformance/gles/testBase.h
@@ -0,0 +1,71 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#include "../../test_common/gles/gl_headers.h"
+#include <CL/cl_gl.h>
+#else
+#include "../../test_common/gl/gl_headers.h"
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+#ifdef GL_ES_VERSION_2_0
+#include "../../test_common/gles/helpers.h"
+#else
+#include "../../test_common/gl/helpers.h"
+#endif
+
+extern const char *get_kernel_suffix( cl_image_format *format );
+extern const char *get_write_conversion( cl_image_format *format, ExplicitType type);
+extern ExplicitType get_read_kernel_type( cl_image_format *format );
+extern ExplicitType get_write_kernel_type( cl_image_format *format );
+
+extern char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType inType, ExplicitType outType );
+extern int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t typeSize );
+extern int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t typeSize );
+extern int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height );
+extern int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth );
+
+extern int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type, GLuint expected_gl_name,
+                             GLenum expected_cl_gl_texture_target, GLint expected_cl_gl_mipmap_level);
+
+extern bool CheckGLIntegerExtensionSupport();
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/gles/test_buffers.cpp b/test_conformance/gles/test_buffers.cpp
new file mode 100644
index 00000000..31d81d64
--- /dev/null
+++ b/test_conformance/gles/test_buffers.cpp
@@ -0,0 +1,402 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "testHarness.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include <math.h>
+#include <float.h>
+
+#if !defined (__APPLE__)
+    #include <CL/cl_gl.h>
+#endif
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+static const char *bufferKernelPattern =
+"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"     clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
+"     glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
+"}\n";
+
+#define TYPE_CASE( enum, type, range, offset )    \
+    case enum:    \
+    {                \
+        cl_##type *ptr = (cl_##type *)outData; \
+        for( i = 0; i < count; i++ ) \
+            ptr[ i ] = (cl_##type)( ( genrand_int32(d) & range ) - offset ); \
+        break; \
+    }
+
+void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
+{
+    size_t i;
+
+    switch( type )
+    {
+        case kBool:
+        {
+            bool *boolPtr = (bool *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                boolPtr[i] = ( genrand_int32(d) & 1 ) ? true : false;
+            }
+            break;
+        }
+
+        TYPE_CASE( kChar, char, 250, 127 )
+        TYPE_CASE( kUChar, uchar, 250, 0 )
+        TYPE_CASE( kShort, short, 65530, 32767 )
+        TYPE_CASE( kUShort, ushort, 65530, 0 )
+        TYPE_CASE( kInt, int, 0x0fffffff, 0x70000000 )
+        TYPE_CASE( kUInt, uint, 0x0fffffff, 0 )
+
+        case kLong:
+        {
+            cl_long *longPtr = (cl_long *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+            }
+            break;
+        }
+
+        case kULong:
+        {
+            cl_ulong *ulongPtr = (cl_ulong *)outData;
+            for( i = 0; i < count; i++ )
+            {
+                ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+            }
+            break;
+        }
+
+        case kFloat:
+        {
+            cl_float *floatPtr = (float *)outData;
+            for( i = 0; i < count; i++ )
+                floatPtr[i] = get_random_float( -100000.f, 100000.f, d );
+            break;
+        }
+
+        default:
+            log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
+            break;
+    }
+}
+
+#define INC_CASE( enum, type )    \
+    case enum:    \
+    {                \
+        cl_##type *src = (cl_##type *)inData; \
+        cl_##type *dst = (cl_##type *)outData; \
+        *dst = *src + 1; \
+        break; \
+    }
+
+void get_incremented_value( void *inData, void *outData, ExplicitType type )
+{
+    switch( type )
+    {
+        INC_CASE( kChar, char )
+        INC_CASE( kUChar, uchar )
+        INC_CASE( kShort, short )
+        INC_CASE( kUShort, ushort )
+        INC_CASE( kInt, int )
+        INC_CASE( kUInt, uint )
+        INC_CASE( kLong, long )
+        INC_CASE( kULong, ulong )
+        INC_CASE( kFloat, float )
+        default:
+            break;
+    }
+}
+
+int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 3 ];
+    size_t dataSize = numElements * 16 * sizeof(cl_long);
+#if !(defined(_WIN32) && defined(_MSC_VER))
+    cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ];
+#else
+    cl_long* inData    = (cl_long*)_malloca(dataSize);
+    cl_long* outDataCL = (cl_long*)_malloca(dataSize);
+    cl_long* outDataGL = (cl_long*)_malloca(dataSize);
+#endif
+    glBufferWrapper inGLBuffer, outGLBuffer;
+    int    i;
+    size_t bufferSize;
+
+    int error;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4];
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", (int)vecSize );
+
+    sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName,
+                                                get_explicit_type_name( vecType ), sizeName );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    bufferSize = numElements * vecSize * get_explicit_type_size( vecType );
+
+    /* Generate some almost-random input data */
+    gen_input_data( vecType, vecSize * numElements, d, inData );
+    memset( outDataCL, 0, dataSize );
+    memset( outDataGL, 0, dataSize );
+
+    /* Generate some GL buffers to go against */
+    glGenBuffers( 1, &inGLBuffer );
+    glGenBuffers( 1, &outGLBuffer );
+
+    glBindBuffer( GL_ARRAY_BUFFER, inGLBuffer );
+    glBufferData( GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW );
+
+    // Note: we need to bind the output buffer, even though we don't care about its values yet,
+    // because CL needs it to get the buffer size
+    glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
+    glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW );
+
+    glBindBuffer( GL_ARRAY_BUFFER, 0 );
+    glFlush();
+
+
+    /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */
+    streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error );
+    test_error( error, "Unable to create input GL buffer" );
+
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error );
+    test_error( error, "Unable to create output CL buffer" );
+
+    streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error );
+    test_error( error, "Unable to create output GL buffer" );
+
+
+  /* Validate the info */
+  if (validate_only) {
+    int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) |
+                  CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) );
+    for(i=0;i<3;i++)
+    {
+        clReleaseMemObject(streams[i]);
+        streams[i] = NULL;
+    }
+
+    glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
+    glDeleteBuffers(1, &outGLBuffer);    outGLBuffer = 0;
+
+    return result;
+  }
+
+    /* Assign streams and execute */
+    for( int i = 0; i < 3; i++ )
+    {
+        error = clSetKernelArg( kernel, i, sizeof( streams[ i ] ), &streams[ i ] );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL);
+  test_error( error, "Unable to acquire GL obejcts");
+
+    /* Run the kernel */
+    threads[0] = numElements;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+  error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL );
+  test_error(error, "clEnqueueReleaseGLObjects failed");
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL );
+  test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    // Get the results from both CL and GL and make sure everything looks correct
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL );
+    test_error( error, "Unable to read output CL array!" );
+    glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
+    void *glMem = glMapBufferRange(GL_ARRAY_BUFFER, 0, bufferSize, GL_MAP_READ_BIT );
+    memcpy( outDataGL, glMem, bufferSize );
+    glUnmapBuffer( GL_ARRAY_BUFFER );
+    char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL;
+    error = 0;
+    for( size_t i = 0; i < numElements * vecSize; i++ )
+    {
+        cl_long expectedCLValue, expectedGLValue;
+        get_incremented_value( inP, &expectedCLValue, vecType );
+        get_incremented_value( &expectedCLValue, &expectedGLValue, vecType );
+
+        if( memcmp( clP, &expectedCLValue, get_explicit_type_size( vecType ) ) != 0 )
+        {
+            char scratch[ 64 ];
+            log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i );
+            log_error( "\t   Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\t  Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) );
+            error = -1;
+        }
+
+        if( memcmp( glP, &expectedGLValue, get_explicit_type_size( vecType ) ) != 0 )
+        {
+            char scratch[ 64 ];
+            log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i );
+            log_error( "\t   Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) );
+            log_error( "\t  Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) );
+            error = -1;
+        }
+
+        if( error )
+            return error;
+
+        inP += get_explicit_type_size( vecType );
+        glP += get_explicit_type_size( vecType );
+        clP += get_explicit_type_size( vecType );
+    }
+
+    for(i=0;i<3;i++)
+    {
+        clReleaseMemObject(streams[i]);
+        streams[i] = NULL;
+    }
+
+    glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
+    glDeleteBuffers(1, &outGLBuffer);    outGLBuffer = 0;
+
+#if (defined(_WIN32) && defined(_MSC_VER))
+    _freea(inData);
+    _freea(outDataCL);
+    _freea(outDataGL);
+#endif
+
+    return 0;
+}
+
+int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+    /* 64-bit ints optional in embedded profile */
+    int hasLong = 1;
+    int isEmbedded = 0;
+    char profile[1024] = "";
+    retVal = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (retVal)
+    {
+        print_error(retVal, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        return -1;
+    }
+    isEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+    if(isEmbedded && !is_extension_available(device, "cles_khr_int64"))
+    {
+        hasLong = 0;
+    }
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            /* Fix bug 7124 */
+            if ( (vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong) && !hasLong )
+              continue;
+
+            // Test!
+            if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 )
+            {
+                char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+                log_error( "   Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+                retVal++;
+            }
+        }
+    }
+
+    return retVal;
+
+}
+
+
+int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed( gRandomSeed );
+
+    /* 64-bit ints optional in embedded profile */
+    int hasLong = 1;
+    int isEmbedded = 0;
+    char profile[1024] = "";
+    retVal = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (retVal)
+    {
+        print_error(retVal, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        return -1;
+    }
+    isEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+    if(isEmbedded && !is_extension_available(device, "cles_khr_int64"))
+    {
+        hasLong = 0;
+    }
+
+    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            /* Fix bug 7124 */
+            if ( (vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong) && !hasLong )
+              continue;
+
+            // Test!
+            if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 )
+            {
+                char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+                log_error( "   Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+                retVal++;
+            }
+        }
+    }
+
+    return retVal;
+
+}
+
+
+
diff --git a/test_conformance/gles/test_fence_sync.cpp b/test_conformance/gles/test_fence_sync.cpp
new file mode 100644
index 00000000..1c117943
--- /dev/null
+++ b/test_conformance/gles/test_fence_sync.cpp
@@ -0,0 +1,664 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "gl_headers.h"
+
+#include "testBase.h"
+#include "setup.h"
+#include "../../test_common/harness/genericThread.h"
+
+#ifndef GLsync
+// For OpenGL before 3.2, we look for the ARB_sync extension and try to use that
+#if !defined(_WIN32)
+#include <inttypes.h>
+#endif // !_WIN32
+typedef int64_t GLint64;
+typedef uint64_t GLuint64;
+typedef struct __GLsync *GLsync;
+
+typedef GLsync (*glFenceSyncPtr)(GLenum condition,GLbitfield flags);
+glFenceSyncPtr glFenceSyncFunc;
+
+typedef bool (*glIsSyncPtr)(GLsync sync);
+glIsSyncPtr glIsSyncFunc;
+
+typedef void (*glDeleteSyncPtr)(GLsync sync);
+glDeleteSyncPtr glDeleteSyncFunc;
+
+typedef GLenum (*glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+glClientWaitSyncPtr glClientWaitSyncFunc;
+
+typedef void (*glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+glWaitSyncPtr glWaitSyncFunc;
+
+typedef void (*glGetInteger64vPtr)(GLenum pname, GLint64 *params);
+glGetInteger64vPtr glGetInteger64vFunc;
+
+typedef void (*glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length,
+                               GLint *values);
+glGetSyncivPtr glGetSyncivFunc;
+
+#define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
+
+static void InitSyncFns( void )
+{
+    glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" );
+    glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" );
+    glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" );
+    glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" );
+    glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" );
+    glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
+    glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
+}
+
+#define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
+
+#define GL_OBJECT_TYPE            0x9112
+#define GL_SYNC_CONDITION            0x9113
+#define GL_SYNC_STATUS            0x9114
+#define GL_SYNC_FLAGS            0x9115
+
+#define GL_SYNC_FENCE            0x9116
+
+#define GL_SYNC_GPU_COMMANDS_COMPLETE    0x9117
+
+#define GL_UNSIGNALED            0x9118
+#define GL_SIGNALED            0x9119
+
+#define GL_SYNC_FLUSH_COMMANDS_BIT        0x00000001
+
+#define GL_TIMEOUT_IGNORED            0xFFFFFFFFFFFFFFFFull
+
+#define GL_ALREADY_SIGNALED        0x911A
+#define GL_TIMEOUT_EXPIRED            0x911B
+#define GL_CONDITION_SATISFIED        0x911C
+#define GL_WAIT_FAILED            0x911D
+
+#define USING_ARB_sync 1
+#endif
+
+typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ;
+
+clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
+
+
+static const char *updateBuffersKernel[] = {
+    "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n"
+    "{\n"
+    "    size_t tid = get_global_id(0);\n"
+    "\n"
+    "    size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
+    "    vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
+    "    vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n"
+    "\n"
+    "    int rowV = rowIdx + 1;\n"
+    "    colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
+    "    //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n"
+    "    colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
+    "}\n" };
+
+//Passthrough VertexShader
+static const char vertexshader[] =
+"uniform mat4 projMatrix;\n"
+"attribute vec4 inPosition;\n"
+"attribute vec4 inColor;\n"
+"varying   vec4 outColor;\n"
+"void main (void) {\n"
+"    gl_Position = projMatrix*inPosition;\n"
+"   outColor = inColor;\n"
+"}\n";
+
+//Passthrough FragmentShader
+static const char fragmentshader[] =
+"varying   vec4 outColor;\n"
+"void main (void) {\n"
+"    gl_FragColor = outColor;\n"
+"}\n";
+
+GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
+{
+    GLint  logLength, status;
+    GLuint program = glCreateProgram();
+    GLuint vpShader;
+
+    char* vpstr = (char*)malloc(sizeof(vertexshader));
+    strcpy(vpstr, vertexshader);
+
+    vpShader = glCreateShader(GL_VERTEX_SHADER);
+    glShaderSource(vpShader, 1, (const GLchar **)&vpstr, NULL);
+    glCompileShader(vpShader);
+    glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*) malloc(logLength);
+        glGetShaderInfoLog(vpShader, logLength, &logLength, log);
+        log_info("Vtx Shader compile log:\n%s", log);
+        free(log);
+    }
+
+    free(vpstr);
+    vpstr = NULL;
+
+    glGetShaderiv(vpShader, GL_COMPILE_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to compile vtx shader:\n");
+        return 0;
+    }
+
+    glAttachShader(program, vpShader);
+
+    GLuint fpShader;
+    char* fpstr = (char*)malloc(strlen(fragmentshader));
+    strcpy(fpstr, fragmentshader);
+    fpShader = glCreateShader(GL_FRAGMENT_SHADER);
+    glShaderSource(fpShader, 1, (const GLchar **)&fpstr, NULL);
+    glCompileShader(fpShader);
+
+    free(fpstr);
+    fpstr = NULL;
+
+    glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*)malloc(logLength);
+        glGetShaderInfoLog(fpShader, logLength, &logLength, log);
+        log_info("Frag Shader compile log:\n%s", log);
+        free(log);
+    }
+
+    glAttachShader(program, fpShader);
+    glGetShaderiv(fpShader, GL_COMPILE_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to compile frag shader:\n\n");
+        return 0;
+    }
+
+    glLinkProgram(program);
+    glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*)malloc(logLength);
+        glGetProgramInfoLog(program, logLength, &logLength, log);
+        log_info("Program link log:\n%s", log);
+        free(log);
+    }
+
+    glGetProgramiv(program, GL_LINK_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to link program\n");
+        return 0;
+    }
+
+    glValidateProgram(program);
+    glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
+    if (logLength > 0) {
+        GLchar *log = (GLchar*)malloc(logLength);
+        glGetProgramInfoLog(program, logLength, &logLength, log);
+        log_info("Program validate log:\n%s", log);
+        free(log);
+    }
+
+    glGetProgramiv(program, GL_VALIDATE_STATUS, &status);
+    if (status == 0)
+    {
+        log_error("Failed to validate program\n");
+        return 0;
+    }
+
+    *posLoc = glGetAttribLocation(program, "inPosition");
+    *colLoc = glGetAttribLocation(program, "inColor");
+
+    return program;
+}
+
+void destroyShaderProgram(GLuint program)
+{
+    GLuint shaders[2];
+    GLsizei count;
+    glUseProgram(0);
+    glGetAttachedShaders(program, 2, &count, shaders);
+    int i;
+    for(i = 0; i < count; i++)
+    {
+        glDetachShader(program, shaders[i]);
+        glDeleteShader(shaders[i]);
+    }
+    glDeleteProgram(program);
+}
+
+// This function queues up and runs the above CL kernel that writes the vertex data
+cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1,
+                     cl_int rowIdx, cl_event fenceEvent, size_t numThreads )
+{
+    cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx );
+    test_error( error, "Unable to set kernel arguments" );
+
+    clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
+    int numEvents = ( fenceEvent != NULL ) ? 1 : 0;
+    cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL;
+
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 );
+    test_error( error, "Unable to acquire GL obejcts");
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 );
+    test_error( error, "Unable to acquire GL obejcts");
+
+    cl_event evts[ 2 ] = { acqEvent1, acqEvent2 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent );
+    test_error( error, "Unable to execute test kernel" );
+
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 );
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 );
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    evts[ 0 ] = relEvent1;
+    evts[ 1 ] = relEvent2;
+    error = clWaitForEvents( 2, evts );
+    test_error( error, "Unable to wait for release events" );
+
+    return 0;
+}
+
+class RunThread : public genericThread
+{
+public:
+
+    cl_kernel mKernel;
+    cl_command_queue mQueue;
+    cl_mem mStream0, mStream1;
+    cl_int mRowIdx;
+    cl_event mFenceEvent;
+    size_t mNumThreads;
+
+    RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads )
+    : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads )
+    {
+    }
+
+    void SetRunData( cl_int rowIdx, cl_event fenceEvent )
+    {
+        mRowIdx = rowIdx;
+        mFenceEvent = fenceEvent;
+    }
+
+    virtual void * IRun( void )
+    {
+        cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
+        return (void *)error;
+    }
+};
+
+
+int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device )
+{
+    int error;
+    const int framebufferSize = 512;
+    cl_platform_id platform_id = NULL;
+
+    if( !is_extension_available( device, "cl_khr_gl_event" ) )
+    {
+        log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+        return 0;
+    }
+
+    error = clGetDeviceInfo(device,
+                            CL_DEVICE_PLATFORM,
+                            sizeof(platform_id),
+                            &platform_id,
+                            NULL);
+    if(error)
+    {
+        return error;
+    }
+
+    clCreateEventFromGLsyncKHR_ptr = \
+        (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform_id,"clCreateEventFromGLsyncKHR");
+    if( clCreateEventFromGLsyncKHR_ptr == NULL )
+    {
+        log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" );
+        clCreateEventFromGLsyncKHR_ptr = \
+            (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform_id, "clCreateEventFromGLsyncAPPLE");
+        return -1;
+    }
+
+#ifdef USING_ARB_sync
+    char *gl_version_str = (char*)glGetString( GL_VERSION );
+    float glCoreVersion;
+    sscanf(gl_version_str, "%f", &glCoreVersion);
+    if( glCoreVersion < 3.0f )
+    {
+        log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+        return 0;
+    }
+
+#ifdef __APPLE__
+    CGLContextObj currCtx = CGLGetCurrentContext();
+    CGLPixelFormatObj pixFmt = CGLGetPixelFormat(currCtx);
+    GLint val, screen;
+    CGLGetVirtualScreen(currCtx, &screen);
+    CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
+    if(val != kCGLOGLPVersion_3_2_Core)
+    {
+        log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it"
+                  "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+        return -1;
+    }
+#else
+    // Need platform specific way to query if current GL context was created with 3.x profile
+    log_error( "ERROR: not implemented\n\n" );
+    return -1;
+#endif
+
+    InitSyncFns();
+#endif
+
+#ifdef __APPLE__
+    CGLSetVirtualScreen(CGLGetCurrentContext(), rend_vs);
+#else
+    // Need platform specific way to set device with id rend_vs the current
+    // rendering target
+    log_error( "ERROR: not implemented\n\n" );
+    return -1;
+#endif
+
+    GLint posLoc, colLoc;
+    GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
+    if(!shaderprogram)
+    {
+        log_error("Failed to create shader program\n");
+        return -1;
+    }
+
+    float l = 0.0f; float r = framebufferSize;
+    float b = 0.0f; float t = framebufferSize;
+
+    float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f,
+        0.0f, 2.0f/(t-b), 0.0f, 0.0f,
+        0.0f, 0.0f, -1.0f, 0.0f,
+        -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f
+    };
+
+    glUseProgram(shaderprogram);
+    GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
+    glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
+    glUseProgram(0);
+
+    // Note: the framebuffer is just the target to verify our results against, so we don't
+    // really care to go through all the possible formats in this case
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT,
+                                    GL_RGBA, GL_UNSIGNED_BYTE,
+                                    &glFramebuffer, &glRenderbuffer );
+    if( error != 0 )
+        return error;
+
+//    GLuint vao;
+//    glGenVertexArrays(1, &vao);
+//    glBindVertexArray(vao);
+
+    glBufferWrapper vtxBuffer, colorBuffer;
+    glGenBuffers( 1, &vtxBuffer );
+    glGenBuffers( 1, &colorBuffer );
+
+    const int numHorizVertices = ( framebufferSize * 64 ) + 1;
+
+    glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
+    glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+
+    glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
+    glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+
+    if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) )
+        return -1;
+
+
+    streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error );
+    test_error( error, "Unable to create CL buffer from GL vertex buffer" );
+
+    streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error );
+    test_error( error, "Unable to create CL buffer from GL color buffer" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    cl_int horizWrap = (cl_int)framebufferSize;
+    error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap );
+    test_error( error, "Unable to set kernel arguments" );
+
+    glViewport( 0, 0, framebufferSize, framebufferSize );
+    glClearColor( 0, 0, 0, 0 );
+    glClear( GL_COLOR_BUFFER_BIT );
+    glClear( GL_DEPTH_BUFFER_BIT );
+    glDisable( GL_DEPTH_TEST );
+    glEnable( GL_BLEND );
+    glBlendFunc( GL_ONE, GL_ONE );
+
+    clEventWrapper fenceEvent;
+    GLsync glFence = 0;
+
+    // Do a loop through 8 different horizontal stripes against the framebuffer
+    RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices );
+
+    for( int i = 0; i < 8; i++ )
+    {
+        // if current rendering device is not the compute device and
+        // separateThreads == false which means compute is going on same
+        // thread and we are using implicit synchronization (no GLSync obj used)
+        // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need
+        // to wait for rendering to finish on other device before CL can start
+        // writing to CL/GL shared mem objects. When separateThreads is true i.e.
+        // we are using GLSync obj to synchronize then we dont need to call glFinish
+        // here since CL should wait for rendering on other device before this
+        // GLSync object to finish before it starts writing to shared mem object.
+        // Also rend_device == compute_device no need to call glFinish
+        if(rend_device != device && !separateThreads)
+            glFinish();
+
+        if( separateThreads )
+        {
+            thread.SetRunData( (cl_int)i, fenceEvent );
+            thread.Start();
+
+            error = (cl_int)(size_t)thread.Join();
+        }
+        else
+        {
+            error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices );
+        }
+        test_error( error, "Unable to run CL kernel" );
+
+        glUseProgram(shaderprogram);
+        glEnableVertexAttribArray(posLoc);
+        glEnableVertexAttribArray(colLoc);
+        glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
+        glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
+        glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
+        glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
+        glBindBuffer( GL_ARRAY_BUFFER, 0 );
+
+        glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 );
+
+        glDisableVertexAttribArray(posLoc);
+        glDisableVertexAttribArray(colLoc);
+        glUseProgram(0);
+
+        if( separateThreads )
+        {
+            // If we're on the same thread, then we're testing implicit syncing, so we
+            // don't need the actual fence code
+            if( fenceEvent != NULL )
+            {
+                clReleaseEvent( fenceEvent );
+                glDeleteSyncFunc( glFence );
+            }
+
+            glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
+            fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error );
+            test_error( error, "Unable to create CL event from GL fence" );
+
+            // in case of explicit synchronization, we just wait for the sync object to complete
+            // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
+            // to flush on the context on which glSync is created
+            glFlush();
+        }
+    }
+
+    if( glFence != 0 )
+        // Don't need the final release for fenceEvent, because the wrapper will take care of that
+        glDeleteSyncFunc( glFence );
+
+#ifdef __APPLE__
+    CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
+#else
+    // Need platform specific code to set the current rendering device (OpenGL target)
+    // to device with id read_vs so that next glReadPixels get submitted to that device
+    log_error( "ERROR: not implemented\n\n" );
+    return -1;
+#endif
+    // Grab the contents of the final framebuffer
+    BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer,
+                                                         GL_COLOR_ATTACHMENT0_EXT,
+                                                         GL_RGBA8_OES, GL_UNSIGNED_BYTE, GL_RGBA, GL_UNSIGNED_BYTE, kUChar,
+                                                         framebufferSize, 128 ) );
+
+    // Check the contents now. We should end up with solid color bands 32 pixels high and the
+    // full width of the framebuffer, at values (128,128,128) due to the additive blending
+    for( int i = 0; i < 8; i++ )
+    {
+        for( int y = 0; y < 4; y++ )
+        {
+            // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass
+            cl_uchar valA = ( ( ( i + 1 ) & 1 )      ) * numHorizVertices * 2 / framebufferSize;
+            cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize;
+            cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize;
+
+            cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ];
+            for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ )
+            {
+                if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) ||
+                   ( row[ x * 4 + 2 ] != valC ) )
+                {
+                    log_error( "ERROR: Output framebuffer did not validate!\n" );
+                    DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData );
+                    log_error( "RUNS:\n" );
+                    uint32_t *p = (uint32_t *)(char *)resultData;
+                    size_t a = 0;
+                    for( size_t t = 1; t < framebufferSize * framebufferSize; t++ )
+                    {
+                        if( p[ a ] != 0 )
+                        {
+                            if( p[ t ] == 0 )
+                            {
+                                log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
+                                          (int)( a % framebufferSize ), (int)( a / framebufferSize ),
+                                          (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
+                                          p[ a ] );
+                                a = t;
+                            }
+                        }
+                        else
+                        {
+                            if( p[ t ] != 0 )
+                            {
+                                a = t;
+                            }
+                        }
+
+                    }
+                    return -1;
+                }
+            }
+        }
+    }
+
+    glDeleteBuffers( 1, &vtxBuffer );
+    glDeleteBuffers( 1, &colorBuffer );
+    destroyShaderProgram(shaderprogram);
+//    glDeleteVertexArrays(1, &vao);
+    return 0;
+}
+
+int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLint vs_count = 0;
+    cl_device_id *device_list = NULL;
+
+    if( !is_extension_available( device, "cl_khr_gl_event" ) )
+    {
+        log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+        return 0;
+    }
+#ifdef __APPLE__
+    CGLContextObj ctx = CGLGetCurrentContext();
+    CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
+    CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
+
+    device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count);
+    clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL);
+#else
+    // Need platform specific way of getting devices from CL context to which OpenGL can render
+    // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES
+    log_error( "ERROR: not implemented\n\n" );
+    return -1;
+#endif
+
+    GLint rend_vs, read_vs;
+    int error = 0;
+    int any_failed = 0;
+
+    // Loop through all the devices capable to OpenGL rendering
+    // and set them as current rendering target
+    for(rend_vs = 0; rend_vs < vs_count; rend_vs++)
+    {
+        // Loop through all the devices and set them as current
+        // compute target
+        for(read_vs = 0; read_vs < vs_count; read_vs++)
+        {
+            cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs];
+            char rend_name[200], read_name[200];
+
+            clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL);
+            clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL);
+
+            log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name);
+            error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device );
+            any_failed |= error;
+            if( error != 0 )
+                log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" );
+            else
+                log_info("Implicit syncing Passed\n");
+
+            error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device );
+            any_failed |= error;
+            if( error != 0 )
+                log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" );
+            else
+                log_info("Explicit syncing Passed\n");
+        }
+    }
+
+    free(device_list);
+
+    return any_failed;
+}
diff --git a/test_conformance/gles/test_images_2D.cpp b/test_conformance/gles/test_images_2D.cpp
new file mode 100644
index 00000000..f2c66fd7
--- /dev/null
+++ b/test_conformance/gles/test_images_2D.cpp
@@ -0,0 +1,749 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include "gl_headers.h"
+
+extern "C" { extern cl_uint gRandomSeed; }
+
+static const char *imageReadKernelPattern =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"  /* added support for half floats */
+"__kernel void sample_test( read_only image2d_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    results[ tidY * get_image_width( source ) + tidX ] = read_image%s( source, sampler, (int2)( tidX, tidY ) );\n"
+"}\n";
+
+static const char *imageWriteKernelPattern =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"  /* added support for half floats */
+"__kernel void sample_test( __global %s4 *source, write_only image2d_t dest )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    uint index = tidY * get_image_width( dest ) + tidX;\n"
+"    %s4 value = source[index];\n"
+"    write_image%s( dest, (int2)( tidX, tidY ), %s(value));\n"
+"}\n";
+
+int test_cl_image_read( cl_context context, cl_command_queue queue, cl_mem clImage,
+                       size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper outStream;
+
+    int error;
+    size_t threads[ 2 ], localThreads[ 2 ];
+    char kernelSource[10240];
+    char *programPtr;
+
+
+    // Determine data type and format that CL came up with
+    error = clGetImageInfo( clImage, CL_IMAGE_FORMAT, sizeof( cl_image_format ), outFormat, NULL );
+    test_error( error, "Unable to get CL image format" );
+
+    /* Create the source */
+    *outType = get_read_kernel_type( outFormat );
+    size_t channelSize = get_explicit_type_size( *outType );
+
+    sprintf( kernelSource, imageReadKernelPattern, get_explicit_type_name( *outType ), get_kernel_suffix( outFormat ) );
+
+#ifdef GLES_DEBUG
+    log_info("-- start cl image read kernel --\n");
+    log_info("%s", kernelSource);
+    log_info("-- end cl image read kernel --\n");
+#endif
+
+    /* Create kernel */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+
+    // Create a vanilla output buffer
+    outStream = clCreateBuffer( context, CL_MEM_READ_WRITE, channelSize * 4 * imageWidth * imageHeight, NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+
+    /* Assign streams and execute */
+    clSamplerWrapper sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+    test_error( error, "Unable to create sampler" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( clImage ), &clImage );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 2, sizeof( outStream ), &outStream );
+    test_error( error, "Unable to set kernel arguments" );
+
+    glFlush();
+
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &clImage, 0, NULL, NULL);
+    test_error( error, "Unable to acquire GL obejcts");
+
+    /* Run the kernel */
+    threads[ 0 ] = imageWidth;
+    threads[ 1 ] = imageHeight;
+
+    error = get_max_common_2D_work_group_size( context, kernel, threads, localThreads );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &clImage, 0, NULL, NULL );
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    // Read results from the CL buffer
+    *outResultBuffer = malloc(channelSize * 4 * imageWidth * imageHeight);
+    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, channelSize * 4 * imageWidth * imageHeight,
+                                *outResultBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read output CL buffer!" );
+
+    return 0;
+}
+
+static int test_image_read( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glTexture,
+                           size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+    // Create a CL image from the supplied GL texture
+    int error;
+    clMemWrapper image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY, glTarget, 0, glTexture, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL texture" );
+#ifndef GL_ES_VERSION_2_0
+        GLint fmt;
+        glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+        log_error( "    Supplied GL texture was baseformat %s and internalformat %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
+#endif
+        return error;
+    }
+
+    return test_cl_image_read( context, queue, image, imageWidth, imageHeight, outFormat, outType, outResultBuffer );
+}
+
+int test_image_format_read( cl_context context, cl_command_queue queue,
+                           size_t width, size_t height, GLenum target,
+                           GLenum format, GLenum internalFormat,
+                           GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+
+    // Create the GL texture
+    glTextureWrapper glTexture;
+    void *tmp = CreateGLTexture2D( width, height, target, format, internalFormat, glType, type, &glTexture, &error, true, d );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+    {
+        return error;
+    }
+
+    /* skip formats not supported by OpenGL */
+    if(!tmp)
+    {
+        return 0;
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
+    error = test_image_read( context, queue, target, glTexture, width, height, &clFormat, &actualType, (void **)&outBuffer );
+    if( error != 0 )
+        return error;
+    BufferOwningPtr<char> actualResults(outBuffer);
+
+    log_info( "- Read [%4d x %4d] : GL Texture : %s : %s : %s => CL Image : %s : %s \n", (int)width, (int)height,
+             GetGLFormatName( format ), GetGLFormatName( internalFormat ), GetGLTypeName( glType),
+             GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+
+    // We have to convert our input buffer to the returned type, so we can validate.
+    BufferOwningPtr<char> convertedInputs(convert_to_expected( inputBuffer, width * height, type, actualType ));
+
+    // Now we validate
+    int valid = 0;
+    if(convertedInputs) {
+        if( actualType == kFloat )
+            valid = validate_float_results( convertedInputs, actualResults, width, height );
+        else
+            valid = validate_integer_results( convertedInputs, actualResults, width, height, get_explicit_type_size( actualType ) );
+    }
+
+    return valid;
+}
+
+int test_images_read( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] =
+#ifdef GL_ES_VERSION_2_0
+        { GL_TEXTURE_2D };
+#else // GL_ES_VERSION_2_0
+        { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+#endif // GL_ES_VERSION_2_0
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA,         GL_RGBA,             GL_FLOAT,                    kFloat },
+    };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed(gRandomSeed );
+
+    // Check if images are supported
+    if (checkForImageSupport(device)) {
+        log_info("Device does not support images. Skipping test.\n");
+        return 0;
+    }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            size_t i;
+
+            log_info( "Testing image read for GL format %s : %s : %s : %s\n",
+                     GetGLTargetName( targets[ tgtIdx ] ),
+                     GetGLFormatName( formats[ fmtIdx ].internal ),
+                     GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                     GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                size_t width = random_in_range( 16, 512, seed );
+                size_t height = random_in_range( 16, 512, seed );
+
+                if( test_image_format_read( context, queue, width, height,
+                                           targets[ tgtIdx ],
+                                           formats[ fmtIdx ].format,
+                                           formats[ fmtIdx ].internal,
+                                           formats[ fmtIdx ].datatype,
+                                           formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Image read test failed for %s : %s : %s : %s\n\n",
+                              GetGLTargetName( targets[ tgtIdx ] ),
+                              GetGLFormatName( formats[ fmtIdx ].internal ),
+                              GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                              GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Image read for GL format %s : %s : %s : %s\n\n",
+                         GetGLTargetName( targets[ tgtIdx ] ),
+                         GetGLFormatName( formats[ fmtIdx ].internal ),
+                         GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                         GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            }
+        }
+    }
+
+    return error;
+}
+
+int test_images_read_cube( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] = {
+        GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Z };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+#ifdef GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        // XXX add others
+#else // GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat }
+#endif
+    };
+
+    size_t sizes[] = { 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed(gRandomSeed);
+
+    // Check if images are supported
+    if (checkForImageSupport(device)) {
+        log_info("Device does not support images. Skipping test.\n");
+        return 0;
+    }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            size_t i;
+
+            log_info( "Testing image read cubemap for GL format  %s : %s : %s : %s\n\n",
+                     GetGLTargetName( targets[ tgtIdx ] ),
+                     GetGLFormatName( formats[ fmtIdx ].internal ),
+                     GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                     GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                if( test_image_format_read( context, queue, sizes[i], sizes[i],
+                                           targets[ tgtIdx ],
+                                           formats[ fmtIdx ].format,
+                                           formats[ fmtIdx ].internal,
+                                           formats[ fmtIdx ].datatype,
+                                           formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Image read cubemap test failed for %s : %s : %s : %s\n\n",
+                              GetGLTargetName( targets[ tgtIdx ] ),
+                              GetGLFormatName( formats[ fmtIdx ].internal ),
+                              GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                              GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Image read cubemap for GL format  %s : %s : %s : %s\n\n",
+                         GetGLTargetName( targets[ tgtIdx ] ),
+                         GetGLFormatName( formats[ fmtIdx ].internal ),
+                         GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                         GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            }
+            else
+                break;    // Skip other cube map targets; they're unlikely to pass either
+        }
+    }
+
+    return error;
+}
+
+
+#pragma mark -------------------- Write tests -------------------------
+
+
+int test_cl_image_write( cl_context context, cl_command_queue queue, cl_mem clImage,
+                        size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper inStream;
+
+    int error;
+    size_t threads[ 2 ], localThreads[ 2 ];
+    char kernelSource[10240];
+    char *programPtr;
+
+    // Determine data type and format that CL came up with
+    error = clGetImageInfo( clImage, CL_IMAGE_FORMAT, sizeof( cl_image_format ), outFormat, NULL );
+    test_error( error, "Unable to get CL image format" );
+
+    /* Create the source */
+    *outType = get_write_kernel_type( outFormat );
+    size_t channelSize = get_explicit_type_size( *outType );
+
+    const char* suffix = get_kernel_suffix( outFormat );
+    const char* convert = get_write_conversion( outFormat, *outType );
+
+    sprintf( kernelSource, imageWriteKernelPattern, get_explicit_type_name( *outType ), get_explicit_type_name( *outType ), suffix, convert);
+
+#ifdef GLES_DEBUG
+    log_info("-- start cl image write kernel --\n");
+    log_info("%s", kernelSource);
+    log_info("-- end cl image write kernel --\n");
+#endif
+
+    /* Create kernel */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    // Generate some source data based on the input type we need
+    *outSourceBuffer = CreateRandomData(*outType, imageWidth * imageHeight * 4, d);
+
+    // Create a vanilla input buffer
+    inStream = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, channelSize * 4 * imageWidth * imageHeight, *outSourceBuffer, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    /* Assign streams and execute */
+    clSamplerWrapper sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+    test_error( error, "Unable to create sampler" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( inStream ), &inStream );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( clImage ), &clImage );
+    test_error( error, "Unable to set kernel arguments" );
+
+    glFlush();
+
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &clImage, 0, NULL, NULL);
+    test_error( error, "Unable to acquire GL obejcts");
+
+    /* Run the kernel */
+    threads[ 0 ] = imageWidth;
+    threads[ 1 ] = imageHeight;
+
+    error = get_max_common_2D_work_group_size( context, kernel, threads, localThreads );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    clEventWrapper event;
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &clImage, 0, NULL, &event );
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    error = clWaitForEvents( 1, &event );
+    test_error(error, "clWaitForEvents failed");
+
+#ifdef GLES_DEBUG
+    int i;
+    size_t origin[] = {0, 0, 0,};
+    size_t region[] = {imageWidth, imageHeight, 1 };
+    void* cldata = malloc( channelSize * 4 * imageWidth * imageHeight );
+    clEnqueueReadImage( queue, clImage, 1, origin, region, 0, 0, cldata, 0, 0, 0);
+    log_info("- start CL Image Data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(*outType), imageWidth, imageHeight, cldata);
+    log_info("- end CL Image Data -- \n");
+    free(cldata);
+#endif
+
+    // All done!
+    return 0;
+}
+
+int test_image_write( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glTexture,
+                     size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer, MTdata d )
+{
+    int error;
+
+    // Create a CL image from the supplied GL texture
+    clMemWrapper image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_WRITE_ONLY, glTarget, 0, glTexture, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL texture" );
+#ifndef GL_ES_VERSION_2_0
+        GLint fmt;
+        glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+        log_error( "    Supplied GL texture was baseformat %s and internalformat %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
+#endif
+        return error;
+    }
+
+    return test_cl_image_write( context, queue, image, imageWidth, imageHeight, outFormat, outType, outSourceBuffer, d );
+}
+
+
+int test_image_format_write( cl_context context, cl_command_queue queue,
+                            size_t width, size_t height, GLenum target,
+                            GLenum format, GLenum internalFormat,
+                            GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+    // Create the GL texture
+    glTextureWrapper glTexture;
+    void *tmp = CreateGLTexture2D( width, height, target, format, internalFormat, glType, type, &glTexture, &error, true, d );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+    {
+        return error;
+    }
+
+    /* skip formats not supported by OpenGL */
+    if(!tmp)
+    {
+        return 0;
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType sourceType;
+    void *outSourceBuffer;
+    error = test_image_write( context, queue, target, glTexture, width, height, &clFormat, &sourceType, (void **)&outSourceBuffer, d );
+    if( error != 0 )
+        return error;
+
+    BufferOwningPtr<char> actualSource(outSourceBuffer);
+
+    log_info( "- Write [%4d x %4d] : GL Texture : %s : %s : %s => CL Image : %s : %s \n", (int)width, (int)height,
+             GetGLFormatName( format ), GetGLFormatName( internalFormat ), GetGLTypeName( glType),
+             GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+
+    // Now read the results from the GL texture
+    ExplicitType readType = type;
+    BufferOwningPtr<char> glResults( ReadGLTexture( target, glTexture, format, internalFormat, glType, readType, width, height ) );
+
+    // We have to convert our input buffer to the returned type, so we can validate.
+    BufferOwningPtr<char> convertedGLResults( convert_to_expected( glResults, width * height, readType, sourceType ) );
+
+#ifdef GLES_DEBUG
+    log_info("- start read GL data -- \n");
+    DumpGLBuffer(glType, width, height, glResults);
+    log_info("- end read GL data -- \n");
+
+    log_info("- start converted data -- \n");
+    DumpGLBuffer(glType, width, height, convertedGLResults);
+    log_info("- end converted data -- \n");
+#endif
+
+    // Now we validate
+    int valid = 0;
+    if(convertedGLResults) {
+        if( sourceType == kFloat )
+            valid = validate_float_results( actualSource, convertedGLResults, width, height );
+        else
+            valid = validate_integer_results( actualSource, convertedGLResults, width, height, get_explicit_type_size( readType ) );
+    }
+
+    return valid;
+}
+
+int test_images_write( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] =
+#ifdef GL_ES_VERSION_2_0
+            { GL_TEXTURE_2D };
+#else // GL_ES_VERSION_2_0
+            { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+#endif
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+#ifdef GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        // XXX add others
+#else // GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat }
+#endif
+    };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed(gRandomSeed);
+
+    // Check if images are supported
+    if (checkForImageSupport(device)) {
+        log_info("Device does not support images. Skipping test.\n");
+        return 0;
+    }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            log_info( "Testing image write test for %s : %s : %s : %s\n",
+                     GetGLTargetName( targets[ tgtIdx ] ),
+                     GetGLFormatName( formats[ fmtIdx ].internal ),
+                     GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                     GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            size_t i;
+            for( i = 0; i < iter; i++ )
+            {
+                size_t width = random_in_range( 16, 512, seed );
+                size_t height = random_in_range( 16, 512, seed );
+
+                if( targets[ tgtIdx ] == GL_TEXTURE_2D )
+                    width = height;
+
+                if( test_image_format_write( context, queue, width, height,
+                                            targets[ tgtIdx ],
+                                            formats[ fmtIdx ].format,
+                                            formats[ fmtIdx ].internal,
+                                            formats[ fmtIdx ].datatype,
+                                            formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Image write test failed for %s : %s : %s : %s\n\n",
+                              GetGLTargetName( targets[ tgtIdx ] ),
+                              GetGLFormatName( formats[ fmtIdx ].internal ),
+                              GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                              GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == 6 )
+            {
+                log_info( "passed: Image write for GL format  %s : %s : %s : %s\n\n",
+                         GetGLTargetName( targets[ tgtIdx ] ),
+                         GetGLFormatName( formats[ fmtIdx ].internal ),
+                         GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                         GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            }
+        }
+    }
+
+    return error;
+}
+
+int test_images_write_cube( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] = {
+        GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Z };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+#ifdef GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        // XXX add others
+#else // GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat }
+#endif
+    };
+
+    size_t sizes[] = { 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed( gRandomSeed );
+
+    // Check if images are supported
+    if (checkForImageSupport(device)) {
+        log_info("Device does not support images. Skipping test.\n");
+        return 0;
+    }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            size_t i;
+            log_info( "Testing image write cubemap test for %s : %s : %s : %s\n",
+                     GetGLTargetName( targets[ tgtIdx ] ),
+                     GetGLFormatName( formats[ fmtIdx ].internal ),
+                     GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                     GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                if( test_image_format_write( context, queue, sizes[i], sizes[i],
+                                            targets[ tgtIdx ],
+                                            formats[ fmtIdx ].format,
+                                            formats[ fmtIdx ].internal,
+                                            formats[ fmtIdx ].datatype,
+                                            formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Image write cubemap test failed for %s : %s : %s : %s\n\n",
+                              GetGLTargetName( targets[ tgtIdx ] ),
+                              GetGLFormatName( formats[ fmtIdx ].internal ),
+                              GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                              GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Image write cubemap for GL format  %s : %s : %s : %s\n\n",
+                         GetGLTargetName( targets[ tgtIdx ] ),
+                         GetGLFormatName( formats[ fmtIdx ].internal ),
+                         GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                         GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            }
+            else
+                break;    // Skip other cube map targets; they're unlikely to pass either
+        }
+    }
+
+    return error;
+}
diff --git a/test_conformance/gles/test_images_2D_info.cpp b/test_conformance/gles/test_images_2D_info.cpp
new file mode 100644
index 00000000..ce6717e6
--- /dev/null
+++ b/test_conformance/gles/test_images_2D_info.cpp
@@ -0,0 +1,256 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include "gl_headers.h"
+
+extern "C" {extern cl_uint gRandomSeed;};
+
+static int test_image_read( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glTexture,
+                            size_t imageWidth, size_t imageHeight, cl_image_format *outFormat,
+                            ExplicitType *outType, void **outResultBuffer )
+{
+    // Create a CL image from the supplied GL texture
+    int error;
+    clMemWrapper image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY, glTarget, 0, glTexture, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL texture" );
+#ifndef GL_ES_VERSION_2_0
+        GLint fmt;
+        glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+        log_error( "    Supplied GL texture was baseformat %s and internalformat %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
+#endif
+        return error;
+    }
+
+    // Determine data type and format that CL came up with
+    error = clGetImageInfo( image, CL_IMAGE_FORMAT, sizeof( cl_image_format ), outFormat, NULL );
+    test_error( error, "Unable to get CL image format" );
+
+    return CheckGLObjectInfo(image, CL_GL_OBJECT_TEXTURE2D, glTexture, glTarget, 0);
+}
+
+static int test_image_object_info( cl_context context, cl_command_queue queue,
+                                   size_t width, size_t height, GLenum target,
+                                   GLenum format, GLenum internalFormat,
+                                   GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+    // Create the GL texture
+    glTextureWrapper glTexture;
+    void *tmp = CreateGLTexture2D( width, height, target, format, internalFormat, glType, type, &glTexture, &error, true, d );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+    {
+        // GL_RGBA_INTEGER_EXT doesn't exist in GLES2. No need to check for it.
+        return error;
+    }
+
+    /* skip formats not supported by OpenGL */
+    if(!tmp)
+    {
+        return 0;
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
+    error = test_image_read( context, queue, target, glTexture, width, height, &clFormat, &actualType, (void **)&outBuffer );
+
+    return error;
+}
+
+int test_images_2D_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] =
+#ifdef GL_ES_VERSION_2_0
+        { GL_TEXTURE_2D };
+#else // GL_ES_VERSION_2_0
+        { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+#endif // GL_ES_VERSION_2_0
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA,         GL_RGBA,             GL_HALF_FLOAT_OES,           kHalf },
+        { GL_RGBA,         GL_RGBA,             GL_FLOAT,                    kFloat },
+    };
+
+    size_t sizes[] = { 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed( gRandomSeed );
+
+    // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            size_t i;
+            log_info( "Testing image texture object info test for %s : %s : %s : %s\n",
+                GetGLTargetName( targets[ tgtIdx ] ),
+                GetGLFormatName( formats[ fmtIdx ].internal ),
+                GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                if( test_image_object_info( context, queue, sizes[i], sizes[i],
+                                            targets[ tgtIdx ],
+                                            formats[ fmtIdx ].format,
+                                            formats[ fmtIdx ].internal,
+                                            formats[ fmtIdx ].datatype,
+                                            formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Image texture object info test failed for %s : %s : %s : %s\n\n",
+                        GetGLTargetName( targets[ tgtIdx ] ),
+                        GetGLFormatName( formats[ fmtIdx ].internal ),
+                        GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                        GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Image texture object info test passed for %s : %s : %s : %s\n\n",
+                    GetGLTargetName( targets[ tgtIdx ] ),
+                    GetGLFormatName( formats[ fmtIdx ].internal ),
+                    GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                    GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            }
+            else
+                break;    // Skip other cube map targets; they're unlikely to pass either
+        }
+    }
+
+    return error;
+}
+int test_images_cube_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] = {
+        GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Z };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+#ifdef GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        // XXX add others
+#else // GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat }
+#endif
+    };
+
+    size_t sizes[] = { 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed( gRandomSeed );
+
+    // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            size_t i;
+            log_info( "Testing cube map object info test for %s : %s : %s : %s\n",
+                GetGLTargetName( targets[ tgtIdx ] ),
+                GetGLFormatName( formats[ fmtIdx ].internal ),
+                GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                if( test_image_object_info( context, queue, sizes[i], sizes[i],
+                                            targets[ tgtIdx ],
+                                            formats[ fmtIdx ].format,
+                                            formats[ fmtIdx ].internal,
+                                            formats[ fmtIdx ].datatype,
+                                            formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Cube map object info test failed for %s : %s : %s : %s\n\n",
+                        GetGLTargetName( targets[ tgtIdx ] ),
+                        GetGLFormatName( formats[ fmtIdx ].internal ),
+                        GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                        GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Cube map object info test passed for %s : %s : %s : %s\n\n",
+                    GetGLTargetName( targets[ tgtIdx ] ),
+                    GetGLFormatName( formats[ fmtIdx ].internal ),
+                    GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                    GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            }
+            else
+                break;    // Skip other cube map targets; they're unlikely to pass either
+        }
+    }
+
+    return error;
+}
diff --git a/test_conformance/gles/test_images_3D.cpp b/test_conformance/gles/test_images_3D.cpp
new file mode 100644
index 00000000..7babc91e
--- /dev/null
+++ b/test_conformance/gles/test_images_3D.cpp
@@ -0,0 +1,268 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+#include "gl_headers.h"
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+static const char *imageReadKernelPattern =
+"__kernel void sample_test( read_only image3d_t source, sampler_t sampler, __global %s4 *results )\n"
+"{\n"
+"    int  tidX = get_global_id(0);\n"
+"    int  tidY = get_global_id(1);\n"
+"    int  tidZ = get_global_id(2);\n"
+"    int  width = get_image_width( source );\n"
+"    int  height = get_image_height( source );\n"
+"    int offset = tidZ * width * height + tidY * width + tidX;\n"
+"\n"
+"     results[ offset ] = read_image%s( source, sampler, (int4)( tidX, tidY, tidZ, 0 ) );\n"
+"}\n";
+
+static int test_image_read( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glTexture,
+                            size_t imageWidth, size_t imageHeight, size_t imageDepth,
+                            cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[ 2 ];
+
+    int error;
+    size_t threads[ 3 ], localThreads[ 3 ];
+    char kernelSource[1024];
+    char *programPtr;
+
+
+    // Create a CL image from the supplied GL texture
+    streams[ 0 ] = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY, glTarget, 0, glTexture, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL texture" );
+#ifndef GL_ES_VERSION_2_0
+        GLint fmt;
+        glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+        log_error( "    Supplied GL texture was format %s\n", GetGLFormatName( fmt ) );
+#endif
+        return error;
+    }
+
+    // Determine data type and format that CL came up with
+    error = clGetImageInfo( streams[ 0 ], CL_IMAGE_FORMAT, sizeof( cl_image_format ), outFormat, NULL );
+    test_error( error, "Unable to get CL image format" );
+
+    /* Create the source */
+    *outType = get_read_kernel_type( outFormat );
+    size_t channelSize = get_explicit_type_size( *outType );
+
+    sprintf( kernelSource, imageReadKernelPattern, get_explicit_type_name( *outType ), get_kernel_suffix( outFormat ) );
+
+    /* Create kernel */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+
+    // Create a vanilla output buffer
+    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, channelSize * 4 * imageWidth * imageHeight * imageDepth, NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+
+    /* Assign streams and execute */
+    clSamplerWrapper sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+    test_error( error, "Unable to create sampler" );
+
+    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, 2, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    glFlush();
+
+    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
+    test_error( error, "Unable to acquire GL obejcts");
+
+    /* Run the kernel */
+    threads[ 0 ] = imageWidth;
+    threads[ 1 ] = imageHeight;
+    threads[ 2 ] = imageDepth;
+
+    error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+
+    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL );
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    // Read results from the CL buffer
+    *outResultBuffer = (void *)( new char[ channelSize * 4 * imageWidth * imageHeight * imageDepth ] );
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, channelSize * 4 * imageWidth * imageHeight * imageDepth,
+                                *outResultBuffer, 0, NULL, NULL );
+    test_error( error, "Unable to read output CL buffer!" );
+
+    return 0;
+}
+
+int test_image_format_read( cl_context context, cl_command_queue queue,
+                            size_t width, size_t height, size_t depth,
+                            GLenum target, GLenum format, GLenum internalFormat,
+                            GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+
+    // Create the GL texture
+    glTextureWrapper glTexture;
+    void* tmp = CreateGLTexture3D( width, height, depth, target, format, internalFormat, glType, type, &glTexture, &error, d );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+    {
+        return error;
+    }
+
+    /* skip formats not supported by OpenGL */
+    if(!tmp)
+    {
+        return 0;
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
+    error = test_image_read( context, queue, target, glTexture, width, height, depth, &clFormat, &actualType, (void **)&outBuffer );
+    if( error != 0 )
+        return error;
+    BufferOwningPtr<char> actualResults(outBuffer);
+
+    log_info( "- Read [%4d x %4d x %4d] : GL Texture : %s : %s : %s => CL Image : %s : %s \n",
+                    (int)width, (int)height, (int)depth,
+                    GetGLFormatName( format ), GetGLFormatName( internalFormat ), GetGLTypeName( glType),
+                    GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+
+    // We have to convert our input buffer to the returned type, so we can validate.
+    // This is necessary because OpenCL might not actually pick an internal format that actually matches our
+    // input format (for example, if it picks a normalized format, the results will come out as floats instead of
+    // going in as ints).
+
+    BufferOwningPtr<char> convertedInputs(convert_to_expected( inputBuffer, width * height * depth, type, actualType ));
+    if( convertedInputs == NULL )
+        return -1;
+
+    // Now we validate
+    if( actualType == kFloat )
+        return validate_float_results( convertedInputs, actualResults, width, height, depth );
+    else
+        return validate_integer_results( convertedInputs, actualResults, width, height, depth, get_explicit_type_size( actualType ) );
+}
+
+
+int test_images_read_3D( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] = { GL_TEXTURE_3D };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+#ifdef GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        // XXX add others
+#else // GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat }
+#endif
+    };
+
+    size_t sizes[] = { 2, 4, 8, 16, 32, 64, 128 };
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    RandomSeed seed(gRandomSeed);
+
+    size_t iter = sizeof(sizes)/sizeof(sizes[0]);
+
+    // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            size_t i;
+
+            log_info( "Testing image read for GL format %s : %s : %s : %s\n",
+                GetGLTargetName( targets[ tgtIdx ] ),
+                GetGLFormatName( formats[ fmtIdx ].internal ),
+                GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                if( test_image_format_read( context, queue, sizes[i], sizes[i], sizes[i],
+                                            targets[ tgtIdx ],
+                                            formats[ fmtIdx ].format,
+                                            formats[ fmtIdx ].internal,
+                                            formats[ fmtIdx ].datatype,
+                                            formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Image read test failed for %s : %s : %s : %s\n\n",
+                        GetGLTargetName( targets[ tgtIdx ] ),
+                        GetGLFormatName( formats[ fmtIdx ].internal ),
+                        GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                        GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == sizeof (sizes) / sizeof( sizes[0] ) )
+            {
+                log_info( "passed: Image read test for GL format  %s : %s : %s : %s\n\n",
+                    GetGLTargetName( targets[ tgtIdx ] ),
+                    GetGLFormatName( formats[ fmtIdx ].internal ),
+                    GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                    GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            }
+        }
+    }
+
+    return error;
+}
+
diff --git a/test_conformance/gles/test_images_3D_info.cpp b/test_conformance/gles/test_images_3D_info.cpp
new file mode 100644
index 00000000..f2e98c01
--- /dev/null
+++ b/test_conformance/gles/test_images_3D_info.cpp
@@ -0,0 +1,166 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "gl_headers.h"
+#include "testBase.h"
+
+extern "C" {extern cl_uint gRandomSeed;};
+
+static int test_image_read( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glTexture,
+                    size_t imageWidth, size_t imageHeight, size_t imageDepth, cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+  clMemWrapper streams[ 2 ];
+
+  int error;
+
+    // Create a CL image from the supplied GL texture
+    streams[ 0 ] = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY, glTarget, 0, glTexture, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL texture" );
+#ifndef GL_ES_VERSION_2_0
+        GLint fmt;
+        glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
+        log_error( "    Supplied GL texture was format %s\n", GetGLFormatName( fmt ) );
+#endif
+        return error;
+    }
+
+    // Determine data type and format that CL came up with
+    error = clGetImageInfo( streams[ 0 ], CL_IMAGE_FORMAT, sizeof( cl_image_format ), outFormat, NULL );
+    test_error( error, "Unable to get CL image format" );
+
+  return CheckGLObjectInfo(streams[0], CL_GL_OBJECT_TEXTURE3D, glTexture, glTarget, 0);
+}
+
+static int test_image_format_read( cl_context context, cl_command_queue queue,
+                           size_t width, size_t height, size_t depth,
+                           GLenum target, GLenum format, GLenum internalFormat,
+                           GLenum glType, ExplicitType type, MTdata d )
+{
+    int error;
+
+
+    // Create the GL texture
+    glTextureWrapper glTexture;
+    void* tmp = CreateGLTexture3D( width, height, depth, target, format, internalFormat, glType, type, &glTexture, &error, d, true );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+    {
+        return error;
+    }
+
+    /* skip formats not supported by OpenGL */
+    if(!tmp)
+    {
+        return 0;
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
+    return test_image_read( context, queue, target, glTexture, width, height, depth, &clFormat, &actualType, (void **)&outBuffer );
+}
+
+
+int test_images_3D_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum targets[] = { GL_TEXTURE_3D };
+
+    struct {
+        GLenum internal;
+        GLenum format;
+        GLenum datatype;
+        ExplicitType type;
+
+    } formats[] = {
+#ifdef GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+//        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+//        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+//        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+//        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+//        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+//        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+//        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat }
+#else // GL_ES_VERSION_2_0
+        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
+        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
+        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
+        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat }
+#endif
+    };
+
+    size_t sizes[] = { 2, 4, 8, 16, 32, 64, 128 };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    RandomSeed seed(gRandomSeed);
+
+    size_t iter = sizeof(sizes)/sizeof(sizes[0]);
+
+    // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( targets ) / sizeof( targets[ 0 ] ); tgtIdx++ )
+        {
+            size_t i;
+
+            log_info( "Testing image info for GL format %s : %s : %s : %s\n",
+                   GetGLTargetName( targets[ tgtIdx ] ),
+                   GetGLFormatName( formats[ fmtIdx ].internal ),
+                   GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                   GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                if( test_image_format_read( context, queue, sizes[i], sizes[i], sizes[i],
+                                   targets[ tgtIdx ],
+                                   formats[ fmtIdx ].format,
+                                   formats[ fmtIdx ].internal,
+                                   formats[ fmtIdx ].datatype,
+                                   formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Image info test failed for %s : %s : %s : %s\n\n",
+                    GetGLTargetName( targets[ tgtIdx ] ),
+                    GetGLFormatName( formats[ fmtIdx ].internal ),
+                    GetGLBaseFormatName( formats[ fmtIdx ].format ),
+                    GetGLTypeName( formats[ fmtIdx ].datatype ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+        }
+    }
+
+    return error;
+}
+
diff --git a/test_conformance/gles/test_renderbuffer.cpp b/test_conformance/gles/test_renderbuffer.cpp
new file mode 100644
index 00000000..b4c6f148
--- /dev/null
+++ b/test_conformance/gles/test_renderbuffer.cpp
@@ -0,0 +1,373 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "helpers.h"
+
+#include "gl_headers.h"
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+extern int test_cl_image_write( cl_context context, cl_command_queue queue, cl_mem clImage,
+                                size_t imageWidth, size_t imageHeight, cl_image_format *outFormat,
+                                ExplicitType *outType, void **outSourceBuffer, MTdata d );
+
+extern int test_cl_image_read( cl_context context, cl_command_queue queue, cl_mem clImage,
+                               size_t imageWidth, size_t imageHeight, cl_image_format *outFormat,
+                               ExplicitType *outType, void **outResultBuffer );
+
+static int test_attach_renderbuffer_read_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
+                    size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+{
+    int error;
+
+    // Create a CL image from the supplied GL renderbuffer
+    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_READ_ONLY, glRenderbuffer, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL renderbuffer" );
+        return error;
+    }
+
+    return test_cl_image_read( context, queue, image, imageWidth, imageHeight, outFormat, outType, outResultBuffer );
+}
+
+int test_renderbuffer_read_image( cl_context context, cl_command_queue queue,
+                            GLsizei width, GLsizei height, GLenum attachment,
+                            GLenum rbFormat, GLenum rbType,
+                            GLenum texFormat, GLenum texType,
+                            ExplicitType type, MTdata d )
+{
+    int error;
+
+
+    // Create the GL renderbuffer
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    void *tmp = CreateGLRenderbuffer( width, height, attachment, rbFormat, rbType, texFormat, texType,
+                                     type, &glFramebuffer, &glRenderbuffer, &error, d, true );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+    {
+        // GL_RGBA_INTEGER_EXT doesn't exist in GLES2. No need to check for it.
+        return error;
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
+    error = test_attach_renderbuffer_read_image( context, queue, attachment, glRenderbuffer, width, height, &clFormat, &actualType, (void **)&outBuffer );
+    if( error != 0 )
+        return error;
+    BufferOwningPtr<char> actualResults(outBuffer);
+
+    log_info( "- Read [%4d x %4d] : GL renderbuffer : %s : %s : %s => CL Image : %s : %s \n", width, height,
+                    GetGLFormatName( rbFormat ), GetGLFormatName( rbFormat ), GetGLTypeName( rbType ),
+                    GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+
+#ifdef GLES_DEBUG
+    log_info("- start read GL data -- \n");
+    DumpGLBuffer(glType, width, height, actualResults);
+    log_info("- end read GL data -- \n");
+#endif
+
+    // We have to convert our input buffer to the returned type, so we can validate.
+    BufferOwningPtr<char> convertedInput(convert_to_expected( inputBuffer, width * height, type, actualType ));
+
+#ifdef GLES_DEBUG
+    log_info("- start input data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height, convertedInput);
+    log_info("- end input data -- \n");
+#endif
+
+#ifdef GLES_DEBUG
+    log_info("- start converted data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height, actualResults);
+    log_info("- end converted data -- \n");
+#endif
+
+    // Now we validate
+    int valid = 0;
+    if(convertedInput) {
+        if( actualType == kFloat )
+            valid = validate_float_results( convertedInput, actualResults, width, height );
+        else
+            valid = validate_integer_results( convertedInput, actualResults, width, height, get_explicit_type_size( actualType ) );
+    }
+
+    return valid;
+}
+
+int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
+
+    struct {
+        GLenum rbFormat;
+        GLenum rbType;
+        GLenum texFormat;
+        GLenum texType;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA8_OES,    GL_UNSIGNED_BYTE,   GL_RGBA,           GL_UNSIGNED_BYTE,            kUChar },
+        //{ GL_RGBA16F_QCOM, GL_HALF_FLOAT_OES,  GL_RGBA,           GL_HALF_FLOAT_OES,           kHalf  },  // Half-float not supported by ReadPixels
+        { GL_RGBA32F,      GL_FLOAT,           GL_RGBA,           GL_FLOAT,                    kFloat},
+        // XXX add others
+    };
+
+    size_t fmtIdx, attIdx;
+    int error = 0;
+#ifdef GLES_DEBUG
+    size_t iter = 1;
+#else
+    size_t iter = 6;
+#endif
+    RandomSeed seed( gRandomSeed );
+
+    // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( attIdx = 0; attIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); attIdx++ )
+        {
+            size_t i;
+
+            log_info( "Testing renderbuffer read for %s : %s : %s : %s\n",
+                GetGLAttachmentName( attachments[ attIdx ] ),
+                GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                GetGLBaseFormatName( formats[ fmtIdx ].rbFormat ),
+                GetGLTypeName( formats[ fmtIdx ].rbType ) );
+
+            for( i = 0; i < iter; i++ )
+            {
+                GLsizei width = random_in_range( 16, 512, seed );
+                GLsizei height = random_in_range( 16, 512, seed );
+#ifdef GLES_DEBUG
+                width = height = 4;
+#endif
+
+                if( test_renderbuffer_read_image( context, queue, width, height,
+                                                  attachments[ attIdx ],
+                                                  formats[ fmtIdx ].rbFormat,
+                                                  formats[ fmtIdx ].rbType,
+                                                  formats[ fmtIdx ].texFormat,
+                                                  formats[ fmtIdx ].texType,
+                                                  formats[ fmtIdx ].type, seed ) )
+
+                {
+                    log_error( "ERROR: Renderbuffer read test failed for %s : %s : %s : %s\n\n",
+                                GetGLAttachmentName( attachments[ attIdx ] ),
+                                GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                                GetGLBaseFormatName( formats[ fmtIdx ].rbFormat ),
+                                GetGLTypeName( formats[ fmtIdx ].rbType ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Renderbuffer read test passed for %s : %s : %s : %s\n\n",
+                          GetGLAttachmentName( attachments[ attIdx ] ),
+                          GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                          GetGLBaseFormatName( formats[ fmtIdx ].rbFormat ),
+                          GetGLTypeName( formats[ fmtIdx ].rbType ) );
+            }
+        }
+    }
+
+    return error;
+}
+
+
+#pragma mark -------------------- Write tests -------------------------
+
+int test_attach_renderbuffer_write_to_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
+                     size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, MTdata d, void **outSourceBuffer )
+{
+    int error;
+
+    // Create a CL image from the supplied GL renderbuffer
+    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_WRITE_ONLY, glRenderbuffer, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create CL image from GL renderbuffer" );
+        return error;
+    }
+
+    return test_cl_image_write( context, queue, image, imageWidth, imageHeight, outFormat, outType, outSourceBuffer, d );
+}
+
+int test_renderbuffer_image_write( cl_context context, cl_command_queue queue,
+                                   GLsizei width, GLsizei height, GLenum attachment,
+                                      GLenum rbFormat, GLenum rbType,
+                                   GLenum texFormat, GLenum texType,
+                                     ExplicitType type, MTdata d)
+{
+    int error;
+
+    // Create the GL renderbuffer
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    CreateGLRenderbuffer( width, height, attachment, rbFormat, rbType, texFormat, texType,
+                         type, &glFramebuffer, &glRenderbuffer, &error, d, false );
+    if( error != 0 )
+    {
+        // GL_RGBA_INTEGER_EXT doesn't exist in GLES2. No need to check for it.
+        return error;
+    }
+
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType sourceType;
+    void *outSourceBuffer;
+    error = test_attach_renderbuffer_write_to_image( context, queue, attachment, glRenderbuffer, width, height, &clFormat, &sourceType, d, (void **)&outSourceBuffer );
+    if( error != 0 )
+        return error;
+
+    BufferOwningPtr<char> sourceData(outSourceBuffer);
+
+    log_info( "- Write [%4d x %4d] : GL Renderbuffer : %s : %s : %s => CL Image : %s : %s \n", width, height,
+                    GetGLFormatName( rbFormat ), GetGLFormatName( rbFormat ), GetGLTypeName( rbType),
+                    GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+
+    // Now read the results from the GL renderbuffer
+    void* tmp = ReadGLRenderbuffer( glFramebuffer, glRenderbuffer, attachment, rbFormat, rbType,
+                                    texFormat, texType, type, width, height );
+    BufferOwningPtr<char> resultData( tmp );
+
+#ifdef GLES_DEBUG
+    log_info("- start result data -- \n");
+    DumpGLBuffer(glType, width, height, resultData);
+    log_info("- end result data -- \n");
+#endif
+
+    // We have to convert our input buffer to the returned type, so we can validate.
+    BufferOwningPtr<char> convertedData( convert_to_expected( resultData, width * height, type, sourceType ) );
+
+#ifdef GLES_DEBUG
+    log_info("- start input data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(sourceType), width, height, sourceData);
+    log_info("- end input data -- \n");
+#endif
+
+#ifdef GLES_DEBUG
+    log_info("- start converted data -- \n");
+    DumpGLBuffer(GetGLTypeForExplicitType(sourceType), width, height, convertedData);
+    log_info("- end converted data -- \n");
+#endif
+
+    // Now we validate
+    int valid = 0;
+    if(convertedData) {
+        if( sourceType == kFloat )
+            valid = validate_float_results( sourceData, convertedData, width, height );
+        else
+            valid = validate_integer_results( sourceData, convertedData, width, height, get_explicit_type_size( type ) );
+    }
+
+    return valid;
+}
+
+int test_renderbuffer_write( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
+
+    struct {
+        GLenum rbFormat;
+        GLenum rbType;
+        GLenum texFormat;
+        GLenum texType;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA8_OES,    GL_UNSIGNED_BYTE,   GL_RGBA,           GL_UNSIGNED_BYTE,            kUChar },
+        //{ GL_RGBA16F_QCOM, GL_UNSIGNED_SHORT,  GL_RGBA,           GL_UNSIGNED_SHORT,           kHalf  },  // Half float not supported by ReadPixels
+        { GL_RGBA32F,      GL_FLOAT,           GL_RGBA,           GL_FLOAT,                    kFloat},
+        // XXX add others
+    };
+
+    size_t fmtIdx, attIdx;
+    int error = 0;
+    size_t iter = 6;
+#ifdef GLES_DEBUG
+    iter = 1;
+#endif
+    RandomSeed seed( gRandomSeed );
+
+    // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( attIdx = 0; attIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); attIdx++ )
+        {
+            log_info( "Testing Renderbuffer write test for %s : %s : %s : %s\n",
+                GetGLAttachmentName( attachments[ attIdx ] ),
+                GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                GetGLBaseFormatName( formats[ fmtIdx ].rbFormat ),
+                GetGLTypeName( formats[ fmtIdx ].rbType) );
+
+            size_t i;
+            for( i = 0; i < iter; i++ )
+            {
+                GLsizei width = random_in_range( 16, 512, seed );
+                GLsizei height = random_in_range( 16, 512, seed );
+#ifdef GLES_DEBUG
+                width = height = 4;
+#endif
+
+                if( test_renderbuffer_image_write( context, queue, width, height,
+                                                   attachments[ attIdx ],
+                                                   formats[ fmtIdx ].rbFormat,
+                                                   formats[ fmtIdx ].rbType,
+                                                   formats[ fmtIdx ].texFormat,
+                                                   formats[ fmtIdx ].texType,
+                                                   formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Renderbuffer write test failed for %s : %s : %s : %s\n\n",
+                          GetGLAttachmentName( attachments[ attIdx ] ),
+                          GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                          GetGLBaseFormatName( formats[ fmtIdx ].rbFormat ),
+                          GetGLTypeName( formats[ fmtIdx ].rbType ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Renderbuffer write test passed for %s : %s : %s : %s\n\n",
+                          GetGLAttachmentName( attachments[ attIdx ] ),
+                          GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                          GetGLBaseFormatName( formats[ fmtIdx ].rbFormat ),
+                          GetGLTypeName( formats[ fmtIdx ].rbType ) );
+            }
+        }
+    }
+
+    return error;
+}
diff --git a/test_conformance/gles/test_renderbuffer_info.cpp b/test_conformance/gles/test_renderbuffer_info.cpp
new file mode 100644
index 00000000..4724187d
--- /dev/null
+++ b/test_conformance/gles/test_renderbuffer_info.cpp
@@ -0,0 +1,119 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "gl_headers.h"
+#include "testBase.h"
+
+extern "C" {extern cl_uint gRandomSeed;};
+
+static int test_renderbuffer_object_info( cl_context context, cl_command_queue queue,
+                                          GLsizei width, GLsizei height, GLenum attachment,
+                                              GLenum rbFormat, GLenum rbType,
+                                           GLenum texFormat, GLenum texType,
+                                          ExplicitType type, MTdata d )
+{
+    int error;
+
+    // Create the GL render buffer
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    void* tmp = CreateGLRenderbuffer( width, height, attachment, rbFormat, rbType, texFormat, texType,
+                                    type, &glFramebuffer, &glRenderbuffer, &error, d, true );
+    BufferOwningPtr<char> inputBuffer(tmp);
+    if( error != 0 )
+        return error;
+
+    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)(context, CL_MEM_READ_ONLY, glRenderbuffer, &error);
+    test_error(error, "clCreateFromGLRenderbuffer failed");
+
+    log_info( "- Given a GL format of %s, input type was %s, size was %d x %d\n",
+              GetGLFormatName( rbFormat ),
+              get_explicit_type_name( type ), (int)width, (int)height );
+
+    // Verify the expected information here.
+    return CheckGLObjectInfo(image, CL_GL_OBJECT_RENDERBUFFER, (GLuint)glRenderbuffer, rbFormat, 0);
+}
+
+int test_renderbuffer_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
+
+    struct {
+        GLenum rbFormat;
+        GLenum rbType;
+        GLenum texFormat;
+        GLenum texType;
+        ExplicitType type;
+
+    } formats[] = {
+        { GL_RGBA8_OES,    GL_UNSIGNED_BYTE,   GL_RGBA,    GL_UNSIGNED_BYTE,    kUChar },
+        { GL_RGBA32F,      GL_FLOAT,           GL_RGBA,    GL_FLOAT,            kFloat }
+    };
+
+    size_t fmtIdx, tgtIdx;
+    int error = 0;
+    size_t iter = 6;
+    RandomSeed seed(gRandomSeed);
+
+    // Check if images are supported
+  if (checkForImageSupport(device)) {
+    log_info("Device does not support images. Skipping test.\n");
+    return 0;
+  }
+
+    // Loop through a set of GL formats, testing a set of sizes against each one
+    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    {
+        for( tgtIdx = 0; tgtIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); tgtIdx++ )
+        {
+            log_info( "Testing Renderbuffer object info for %s : %s : %s\n",
+                GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                GetGLBaseFormatName( formats[ fmtIdx ].rbFormat ),
+                GetGLTypeName( formats[ fmtIdx ].type ) );
+
+            size_t i;
+            for( i = 0; i < iter; i++ )
+            {
+                GLsizei width = random_in_range( 16, 512, seed );
+                GLsizei height = random_in_range( 16, 512, seed );
+
+                if( test_renderbuffer_object_info( context, queue, (int)width, (int)height,
+                                                   attachments[ tgtIdx ],
+                                                   formats[ fmtIdx ].rbFormat,
+                                                   formats[ fmtIdx ].rbType,
+                                                   formats[ fmtIdx ].texFormat,
+                                                   formats[ fmtIdx ].texType,
+                                                   formats[ fmtIdx ].type, seed ) )
+                {
+                    log_error( "ERROR: Renderbuffer write test failed for GL format %s : %s\n\n",
+                        GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                        GetGLTypeName( formats[ fmtIdx ].rbType ) );
+
+                    error++;
+                    break;    // Skip other sizes for this combination
+                }
+            }
+            if( i == iter )
+            {
+                log_info( "passed: Renderbuffer write test passed for GL format %s : %s\n\n",
+                    GetGLFormatName( formats[ fmtIdx ].rbFormat ),
+                    GetGLTypeName( formats[ fmtIdx ].rbType ) );
+
+            }
+        }
+    }
+
+    return error;
+}
diff --git a/test_conformance/half/CMakeLists.txt b/test_conformance/half/CMakeLists.txt
new file mode 100644
index 00000000..03308d25
--- /dev/null
+++ b/test_conformance/half/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(MODULE_NAME HALF)
+
+set(${MODULE_NAME}_SOURCES
+        cl_utils.c
+        Test_vLoadHalf.c
+        Test_roundTrip.c
+        Test_vStoreHalf.c main.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/mingw_compat.c
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/ThreadPool.c
+        ../../test_common/harness/parseParameters.cpp
+        ../../test_common/harness/kernelHelpers.c
+)
+
+include(../CMakeCommon.txt)
+
diff --git a/test_conformance/half/Jamfile b/test_conformance/half/Jamfile
new file mode 100644
index 00000000..22d3efac
--- /dev/null
+++ b/test_conformance/half/Jamfile
@@ -0,0 +1,22 @@
+project
+    : requirements
+      -<library>/harness//harness <use>/harness//harness
+#      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe Test_half
+    : cl_utils.c
+      main.c
+      Test_roundTrip.c
+      Test_vLoadHalf.c
+      Test_vStoreHalf.c
+      /harness//errorHelpers.c
+    : <target-os>windows:<source>/harness//msvc9.c
+    ;
+
+install dist
+    : Test_half
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/half
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/half
+    ;
diff --git a/test_conformance/half/Makefile b/test_conformance/half/Makefile
new file mode 100644
index 00000000..b9f5e71f
--- /dev/null
+++ b/test_conformance/half/Makefile
@@ -0,0 +1,45 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCFILES = cl_utils.c Test_vLoadHalf.c Test_roundTrip.c \
+           Test_vStoreHalf.c main.c
+
+CC = c++
+CFLAGS = -g -Wall -Wshorten-64-to-32 $(COMPILERFLAGS) ${RC_CFLAGS} \
+         ${USE_ATF}
+
+LIBRARIES = -framework OpenCL ${RC_CFLAGS} ${ATF}
+
+
+all: release
+
+OBJECTS := ${SRCFILES:.c=.o}
+
+release:
+	echo "Build Release"
+	$(CC) $(SRCFILES) -Os $(CFLAGS) -o Test_half $(LIBRARIES)
+
+debug: $(OBJECTS)
+	echo "Build Debug"
+	$(CC) $(OBJECTS) -O0 $(CFLAGS) -o Test_half_debug -D_DEBUG=1 $(LIBRARIES)
+
+test:	release
+	arch -i386 ./Test_half -c > cpu.log	&
+	arch -i386 ./Test_half -g > gpu.log	&
+	echo "Testing 32-bit mode in progress. See cpu.log and gpu.log for results."
+
+test64:	release
+	arch -x86_64 ./Test_half -c > cpu64.log	&
+	arch -x86_64 ./Test_half -g > gpu64.log	&
+	echo "Testing 64-bit mode in progress. See cpu64.log and gpu64.log for results."
+
+
+clean:
+	rm -f ./Test_half_debug
+	rm -f ./Test_half
+
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/half/Test_half.xcodeproj/project.pbxproj b/test_conformance/half/Test_half.xcodeproj/project.pbxproj
new file mode 100644
index 00000000..d3860565
--- /dev/null
+++ b/test_conformance/half/Test_half.xcodeproj/project.pbxproj
@@ -0,0 +1,292 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 44;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		3B1B765F0DE342BC00837A59 /* Test_vStoreHalf.c in Sources */ = {isa = PBXBuildFile; fileRef = 3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */; };
+		3B6173C40DE2B14800384A2C /* Test_roundTrip.c in Sources */ = {isa = PBXBuildFile; fileRef = 3B6173C30DE2B14800384A2C /* Test_roundTrip.c */; };
+		3BA6BFB80DE21DB9008685CF /* Test_vLoadHalf.c in Sources */ = {isa = PBXBuildFile; fileRef = 3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */; };
+		3BA6BFBB0DE21EFA008685CF /* cl_utils.c in Sources */ = {isa = PBXBuildFile; fileRef = 3BA6BFBA0DE21EFA008685CF /* cl_utils.c */; };
+		3BA6BFF30DE229C5008685CF /* OpenCL.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 3BA6BFF20DE229C5008685CF /* OpenCL.framework */; };
+		8DD76FAC0486AB0100D96B5E /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = 08FB7796FE84155DC02AAC07 /* main.c */; settings = {ATTRIBUTES = (); }; };
+		8DD76FB00486AB0100D96B5E /* Test_half.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* Test_half.1 */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		8DD76FAF0486AB0100D96B5E /* CopyFiles */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 8;
+			dstPath = /usr/share/man/man1/;
+			dstSubfolderSpec = 0;
+			files = (
+				8DD76FB00486AB0100D96B5E /* Test_half.1 in CopyFiles */,
+			);
+			runOnlyForDeploymentPostprocessing = 1;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		08FB7796FE84155DC02AAC07 /* main.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
+		3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_vStoreHalf.c; sourceTree = "<group>"; };
+		3B1B77910DE3896E00837A59 /* builtins.cl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = builtins.cl; path = ../../../compute/OpenCL/cl_headers/private/builtins.cl; sourceTree = SOURCE_ROOT; xcLanguageSpecificationIdentifier = xcode.lang.c; };
+		3B6173C30DE2B14800384A2C /* Test_roundTrip.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_roundTrip.c; sourceTree = "<group>"; };
+		3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = Test_vLoadHalf.c; sourceTree = "<group>"; };
+		3BA6BFB90DE21EFA008685CF /* cl_utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cl_utils.h; sourceTree = "<group>"; };
+		3BA6BFBA0DE21EFA008685CF /* cl_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cl_utils.c; sourceTree = "<group>"; };
+		3BA6BFF20DE229C5008685CF /* OpenCL.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = OpenCL.framework; path = /System/Library/Frameworks/OpenCL.framework; sourceTree = "<absolute>"; };
+		3BA6C00A0DE22A95008685CF /* test_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = test_config.h; sourceTree = "<group>"; };
+		3BA6C0770DE24F41008685CF /* tests.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tests.h; sourceTree = "<group>"; };
+		8DD76FB20486AB0100D96B5E /* Test_half */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Test_half; sourceTree = BUILT_PRODUCTS_DIR; };
+		C6A0FF2C0290799A04C91782 /* Test_half.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = Test_half.1; sourceTree = "<group>"; };
+		FFB9F1420E5E155400F45584 /* ATF.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = ATF.framework; path = /Library/Frameworks/ATF.framework; sourceTree = "<absolute>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		8DD76FAD0486AB0100D96B5E /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				3BA6BFF30DE229C5008685CF /* OpenCL.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		08FB7794FE84155DC02AAC07 /* Test_half */ = {
+			isa = PBXGroup;
+			children = (
+				3BA6C00A0DE22A95008685CF /* test_config.h */,
+				3BA6C0770DE24F41008685CF /* tests.h */,
+				08FB7796FE84155DC02AAC07 /* main.c */,
+				08FB7795FE84155DC02AAC07 /* Source */,
+				3BA6BFF80DE229CC008685CF /* Resources */,
+				3B1B77910DE3896E00837A59 /* builtins.cl */,
+				C6A0FF2B0290797F04C91782 /* Documentation */,
+				1AB674ADFE9D54B511CA2CBB /* Products */,
+				FFB9F1420E5E155400F45584 /* ATF.framework */,
+			);
+			name = Test_half;
+			sourceTree = "<group>";
+		};
+		08FB7795FE84155DC02AAC07 /* Source */ = {
+			isa = PBXGroup;
+			children = (
+				3BA6BFB90DE21EFA008685CF /* cl_utils.h */,
+				3BA6BFBA0DE21EFA008685CF /* cl_utils.c */,
+				3BA6BFB70DE21DB9008685CF /* Test_vLoadHalf.c */,
+				3B6173C30DE2B14800384A2C /* Test_roundTrip.c */,
+				3B1B765E0DE342BC00837A59 /* Test_vStoreHalf.c */,
+			);
+			name = Source;
+			sourceTree = "<group>";
+		};
+		1AB674ADFE9D54B511CA2CBB /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				8DD76FB20486AB0100D96B5E /* Test_half */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		3BA6BFF80DE229CC008685CF /* Resources */ = {
+			isa = PBXGroup;
+			children = (
+				3BA6BFF20DE229C5008685CF /* OpenCL.framework */,
+			);
+			name = Resources;
+			sourceTree = "<group>";
+		};
+		C6A0FF2B0290797F04C91782 /* Documentation */ = {
+			isa = PBXGroup;
+			children = (
+				C6A0FF2C0290799A04C91782 /* Test_half.1 */,
+			);
+			name = Documentation;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		8DD76FA90486AB0100D96B5E /* Test_half */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = 1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Test_half" */;
+			buildPhases = (
+				8DD76FAB0486AB0100D96B5E /* Sources */,
+				8DD76FAD0486AB0100D96B5E /* Frameworks */,
+				8DD76FAF0486AB0100D96B5E /* CopyFiles */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = Test_half;
+			productInstallPath = "$(HOME)/bin";
+			productName = Test_half;
+			productReference = 8DD76FB20486AB0100D96B5E /* Test_half */;
+			productType = "com.apple.product-type.tool";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		08FB7793FE84155DC02AAC07 /* Project object */ = {
+			isa = PBXProject;
+			buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Test_half" */;
+			compatibilityVersion = "Xcode 3.0";
+			hasScannedForEncodings = 1;
+			mainGroup = 08FB7794FE84155DC02AAC07 /* Test_half */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				8DD76FA90486AB0100D96B5E /* Test_half */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+		8DD76FAB0486AB0100D96B5E /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				8DD76FAC0486AB0100D96B5E /* main.c in Sources */,
+				3BA6BFB80DE21DB9008685CF /* Test_vLoadHalf.c in Sources */,
+				3BA6BFBB0DE21EFA008685CF /* cl_utils.c in Sources */,
+				3B6173C40DE2B14800384A2C /* Test_roundTrip.c in Sources */,
+				3B1B765F0DE342BC00837A59 /* Test_vStoreHalf.c in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		1DEB928608733DD80010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				COPY_PHASE_STRIP = NO;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_ENABLE_FIX_AND_CONTINUE = YES;
+				GCC_MODEL_TUNING = G5;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				INSTALL_PATH = /usr/local/bin;
+				PRODUCT_NAME = Test_half;
+				ZERO_LINK = YES;
+			};
+			name = Debug;
+		};
+		1DEB928708733DD80010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				GCC_MODEL_TUNING = G5;
+				INSTALL_PATH = /usr/local/bin;
+				PRODUCT_NAME = Test_half;
+			};
+			name = Release;
+		};
+		1DEB928A08733DD80010E9CD /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = "$(ARCHS_STANDARD_32_64_BIT_PRE_XCODE_3_1)";
+				ARCHS_STANDARD_32_64_BIT_PRE_XCODE_3_1 = "x86_64 i386 ppc";
+				ATF_Config_Comment = "Set ATF_DEFAULT to non-empty to link to ATF iff the BUILD_WITH_ATF env var is set";
+				ATF_DEFAULT = SKIPPING;
+				ATF_DEFINES = "$(ATF_DEFINES_$(SKIPPING_ATF))";
+				ATF_DEFINES_ = "USE_ATF=1";
+				ATF_LINK = "$(ATF_LINK_$(SKIPPING_ATF))";
+				ATF_LINK_ = "-framework ATF";
+				GCC_C_LANGUAGE_STANDARD = c99;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = _DEBUG;
+				GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_CHECK_SWITCH_STATEMENTS = YES;
+				GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
+				GCC_WARN_MISSING_PARENTHESES = NO;
+				GCC_WARN_PEDANTIC = YES;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_SIGN_COMPARE = YES;
+				GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = NO;
+				GCC_WARN_UNKNOWN_PRAGMAS = YES;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_LABEL = YES;
+				GCC_WARN_UNUSED_PARAMETER = YES;
+				GCC_WARN_UNUSED_VALUE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				PREBINDING = NO;
+				SKIPPING_ATF = SKIPPING;
+				VALID_ARCHS = "i386 x86_64";
+			};
+			name = Debug;
+		};
+		1DEB928B08733DD80010E9CD /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ARCHS = (
+					ppc,
+					i386,
+				);
+				ATF_Config_Comment = "Set ATF_DEFAULT to non-empty to link to ATF iff the BUILD_WITH_ATF env var is set";
+				ATF_DEFAULT = SKIPPING;
+				ATF_DEFINES = "$(ATF_DEFINES_$(SKIPPING_ATF))";
+				ATF_DEFINES_ = "USE_ATF=1";
+				ATF_LINK = "$(ATF_LINK_$(SKIPPING_ATF))";
+				ATF_LINK_ = "-framework ATF";
+				GCC_C_LANGUAGE_STANDARD = c99;
+				GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
+				GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES;
+				GCC_WARN_CHECK_SWITCH_STATEMENTS = YES;
+				GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED = YES;
+				GCC_WARN_MISSING_PARENTHESES = NO;
+				GCC_WARN_PEDANTIC = NO;
+				GCC_WARN_SHADOW = YES;
+				GCC_WARN_SIGN_COMPARE = YES;
+				GCC_WARN_TYPECHECK_CALLS_TO_PRINTF = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = NO;
+				GCC_WARN_UNKNOWN_PRAGMAS = YES;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_LABEL = YES;
+				GCC_WARN_UNUSED_PARAMETER = YES;
+				GCC_WARN_UNUSED_VALUE = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				PREBINDING = NO;
+				SKIPPING_ATF = SKIPPING;
+				VALID_ARCHS = "i386 x86_64";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "Test_half" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB928608733DD80010E9CD /* Debug */,
+				1DEB928708733DD80010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Test_half" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				1DEB928A08733DD80010E9CD /* Debug */,
+				1DEB928B08733DD80010E9CD /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = 08FB7793FE84155DC02AAC07 /* Project object */;
+}
diff --git a/test_conformance/half/Test_roundTrip.c b/test_conformance/half/Test_roundTrip.c
new file mode 100644
index 00000000..18eb5f9f
--- /dev/null
+++ b/test_conformance/half/Test_roundTrip.c
@@ -0,0 +1,398 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <string.h>
+#include "cl_utils.h"
+#include "tests.h"
+
+
+
+int Test_roundTrip( void )
+{
+    int vectorSize, error;
+    uint64_t i, j;
+    cl_program  programs[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    cl_kernel   kernels[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    cl_program  doublePrograms[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    cl_kernel   doubleKernels[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t doubleTime[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t min_double_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    memset( min_time, -1, sizeof( min_time ) );
+    memset( min_double_time, -1, sizeof( min_double_time ) );
+
+    vlog( "Testing roundTrip\n" );
+    fflush( stdout );
+
+
+    for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+        const char *source[] = {
+            "__kernel void test( const __global half *in, __global half *out )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],"( vload_half",vector_size_name_extensions[vectorSize],"(i, in),  i, out);\n"
+            "}\n"
+        };
+
+        const char *doubleSource[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( const __global half *in, __global half *out )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],"( convert_double", vector_size_name_extensions[vectorSize], "( vload_half",vector_size_name_extensions[vectorSize],"(i, in)),  i, out);\n"
+            "}\n"
+        };
+
+        const char *sourceV3[] = {
+            "__kernel void test( const __global half *in, __global half *out,"
+            "                    uint extra_last_thread  )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   if(i == last_i && extra_last_thread != 0) { \n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   }\n"
+            "   vstore_half3( vload_half3(i, in-adjust),  i, out-adjust);\n"
+            "}\n"
+        };
+
+        const char *doubleSourceV3[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( const __global half *in, __global half *out,"
+            "                    uint extra_last_thread  )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   if(i == last_i && extra_last_thread != 0) { \n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   }\n"
+            "   vstore_half3( vload_half3(i, in-adjust),  i, out-adjust);\n"
+            "}\n"
+        };
+
+/*
+        const char *sourceV3aligned[] = {
+            "__kernel void test( const __global half *in, __global half *out )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstorea_half3( vloada_half3(i, in),  i, out);\n"
+            "   vstore_half(vload_half(4*i+3, in), 4*i+3, out);\n"
+            "}\n"
+        };
+
+        const char *doubleSourceV3aligned[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( const __global half *in, __global half *out )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstorea_half3( vloada_half3(i, in),  i, out);\n"
+            "   vstore_half(vload_half(4*i+3, in), 4*i+3, out);\n"
+            "}\n"
+        };
+*/
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            programs[vectorSize] = MakeProgram( sourceV3, sizeof( sourceV3) / sizeof( sourceV3[0])  );
+            if( NULL == programs[ vectorSize ] )
+            {
+                gFailCount++;
+
+                return -1;
+            }
+        } else {
+            programs[vectorSize] = MakeProgram( source, sizeof( source) / sizeof( source[0])  );
+            if( NULL == programs[ vectorSize ] )
+            {
+                gFailCount++;
+                return -1;
+            }
+        }
+
+        kernels[ vectorSize ] = clCreateKernel( programs[ vectorSize ], "test", &error );
+        if( NULL == kernels[vectorSize] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
+            return error;
+        }
+
+        if( gTestDouble )
+        {
+            if(g_arrVecSizes[vectorSize] == 3) {
+                doublePrograms[vectorSize] = MakeProgram( doubleSourceV3, sizeof( doubleSourceV3) / sizeof( doubleSourceV3[0])  );
+                if( NULL == programs[ vectorSize ] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            } else {
+                doublePrograms[vectorSize] = MakeProgram( doubleSource, sizeof( doubleSource) / sizeof( doubleSource[0])  );
+                if( NULL == programs[ vectorSize ] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            }
+
+            doubleKernels[ vectorSize ] = clCreateKernel( doublePrograms[ vectorSize ], "test", &error );
+            if( NULL == kernels[vectorSize] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
+                return error;
+            }
+        }
+    }
+
+    // Figure out how many elements are in a work block
+    size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
+    size_t blockCount = (size_t)getBufferSize(gDevice) / elementSize; //elementSize is a power of two
+    uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half
+    size_t stride = blockCount;
+
+    error = 0;
+    uint64_t printMask = (lastCase >> 4) - 1;
+    uint32_t count;
+    size_t loopCount;
+
+    for( i = 0; i < (uint64_t)lastCase; i += stride )
+    {
+        count = (uint32_t) MIN( blockCount, lastCase - i );
+
+        //Init the input stream
+        uint16_t *p = (uint16_t *)gIn_half;
+        for( j = 0; j < count; j++ )
+            p[j] = j + i;
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_half, CL_TRUE, 0, count * sizeof( cl_half ), gIn_half, 0, NULL, NULL)) )
+        {
+            vlog_error( "Failure in clWriteArray\n" );
+            gFailCount++;
+            goto exit;
+        }
+
+        //Check the vector lengths
+        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+        { // here we loop through vector sizes -- 3 is last.
+            uint32_t pattern = 0xdeaddead;
+            memset_pattern4( gOut_half, &pattern, (size_t)getBufferSize(gDevice)/2);
+
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
+            {
+                vlog_error( "Failure in clWriteArray\n" );
+                gFailCount++;
+                goto exit;
+            }
+
+
+            // here is where "3" starts to cause problems.
+            error = RunKernel( kernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
+                              runsOverBy(count, vectorSize, false) );
+            if(error)
+            {
+                gFailCount++;
+                goto exit;
+            }
+
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
+            {
+                vlog_error( "Failure in clReadArray\n" );
+                gFailCount++;
+                goto exit;
+            }
+
+            if( (memcmp( gOut_half, gIn_half, count * sizeof(cl_half))) )
+            {
+                uint16_t *u1 = (uint16_t *)gOut_half;
+                uint16_t *u2 = (uint16_t *)gIn_half;
+                for( j = 0; j < count; j++ )
+                {
+                    if( u1[j] != u2[j] )
+                    {
+                        uint16_t abs1 = u1[j] & 0x7fff;
+                        uint16_t abs2 = u2[j] & 0x7fff;
+                        if( abs1 > 0x7c00 && abs2 > 0x7c00 )
+                            continue; //any NaN is okay if NaN is input
+
+                        // if reference result is sub normal, test if the output is flushed to zero
+                        if( IsHalfSubnormal(u2[j]) && ( (u1[j] == 0) || (u1[j] == 0x8000) ) )
+                            continue;
+
+                        vlog_error( "%lld) (of %lld)  Failure at 0x%4.4x:  0x%4.4x   vector_size = %d \n", j, (uint64_t)count, u2[j], u1[j], (g_arrVecSizes[vectorSize]) );
+                        gFailCount++;
+                        goto exit;
+                    }
+                }
+            }
+
+            if( gTestDouble )
+            {
+                memset_pattern4( gOut_half, &pattern, (size_t)getBufferSize(gDevice)/2);
+                if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
+                {
+                    vlog_error( "Failure in clWriteArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+
+                if( (error = RunKernel( doubleKernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
+                                       runsOverBy(count, vectorSize, false) ) ) )
+                {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_half), gOut_half, 0, NULL, NULL)) )
+                {
+                    vlog_error( "Failure in clReadArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if( (memcmp( gOut_half, gIn_half, count * sizeof(cl_half))) )
+                {
+                    uint16_t *u1 = (uint16_t *)gOut_half;
+                    uint16_t *u2 = (uint16_t *)gIn_half;
+                    for( j = 0; j < count; j++ )
+                    {
+                        if( u1[j] != u2[j] )
+                        {
+                            uint16_t abs1 = u1[j] & 0x7fff;
+                            uint16_t abs2 = u2[j] & 0x7fff;
+                            if( abs1 > 0x7c00 && abs2 > 0x7c00 )
+                                continue; //any NaN is okay if NaN is input
+
+                            // if reference result is sub normal, test if the output is flushed to zero
+                            if( IsHalfSubnormal(u2[j]) && ( (u1[j] == 0) || (u1[j] == 0x8000) ) )
+                                continue;
+
+                            vlog_error( "%lld) Failure at 0x%4.4x:  0x%4.4x   vector_size = %d (double precsion)\n", j, u2[j], u1[j], (g_arrVecSizes[vectorSize]) );
+                            gFailCount++;
+                            goto exit;
+                        }
+                    }
+                }
+            }
+        }
+
+        if( ((i+blockCount) & ~printMask) == (i+blockCount) )
+        {
+            vlog( "." );
+            fflush( stdout );
+        }
+    }
+
+    vlog( "\tPassed\n" );
+
+    loopCount = 100;
+    if( gReportTimes )
+    {
+        //Run again for timing
+        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+        {
+            uint64_t bestTime = -1ULL;
+
+            for( j = 0; j < loopCount; j++ )
+            {
+                uint64_t startTime = ReadTime();
+                if( (error = RunKernel( kernels[vectorSize], gInBuffer_half, gOutBuffer_half,numVecs(count, vectorSize, false) ,
+                                       runsOverBy(count, vectorSize, false)) ) )
+                {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if( (error = clFinish(gQueue)) )
+                {
+                    vlog_error( "Failure in clFinish\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+                uint64_t currentTime = ReadTime() - startTime;
+                if( currentTime < bestTime )
+                    bestTime = currentTime;
+                time[ vectorSize ] += currentTime;
+            }
+            if( bestTime < min_time[ vectorSize ] )
+                min_time[ vectorSize ] = bestTime;
+
+            if( gTestDouble )
+            {
+                bestTime = -1ULL;
+                for( j = 0; j < loopCount; j++ )
+                {
+                    uint64_t startTime = ReadTime();
+                    if( (error = RunKernel( doubleKernels[vectorSize], gInBuffer_half, gOutBuffer_half, numVecs(count, vectorSize, false) ,
+                                           runsOverBy(count, vectorSize, false)) ) )
+                    {
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    if( (error = clFinish(gQueue)) )
+                    {
+                        vlog_error( "Failure in clFinish\n" );
+                        gFailCount++;
+                        goto exit;
+                    }
+                    uint64_t currentTime = ReadTime() - startTime;
+                    if( currentTime < bestTime )
+                        bestTime = currentTime;
+                    doubleTime[ vectorSize ] += currentTime;
+                }
+                if( bestTime < min_double_time[ vectorSize ] )
+                    min_double_time[ vectorSize ] = bestTime;
+            }
+        }
+    }
+
+    if( gReportTimes )
+    {
+        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0, "average us/elem", "roundTrip avg. (vector size: %d)", (g_arrVecSizes[vectorSize]) );
+        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0, "best us/elem", "roundTrip best (vector size: %d)", (g_arrVecSizes[vectorSize])  );
+        if( gTestDouble )
+        {
+            for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf( SubtractTime( doubleTime[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0, "average us/elem (double)", "roundTrip avg. d (vector size: %d)", (g_arrVecSizes[vectorSize])  );
+            for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf( SubtractTime( min_double_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0, "best us/elem (double)", "roundTrip best d (vector size: %d)", (g_arrVecSizes[vectorSize]) );
+        }
+    }
+
+exit:
+    //clean up
+    for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+        clReleaseKernel( kernels[ vectorSize ] );
+        clReleaseProgram( programs[ vectorSize ] );
+        if( gTestDouble )
+        {
+            clReleaseKernel( doubleKernels[ vectorSize ] );
+            clReleaseProgram( doublePrograms[ vectorSize ] );
+        }
+    }
+
+    gTestCount++;
+    return error;
+}
+
+
diff --git a/test_conformance/half/Test_vLoadHalf.c b/test_conformance/half/Test_vLoadHalf.c
new file mode 100644
index 00000000..3ca8a203
--- /dev/null
+++ b/test_conformance/half/Test_vLoadHalf.c
@@ -0,0 +1,628 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <string.h>
+#include "cl_utils.h"
+#include "tests.h"
+
+extern const char *addressSpaceNames[];
+
+static inline float half2float( cl_ushort us )
+{
+    uint32_t u = us;
+    uint32_t sign = (u << 16) & 0x80000000;
+    int32_t exponent = (u & 0x7c00) >> 10;
+    uint32_t mantissa = (u & 0x03ff) << 13;
+    union{ unsigned int u; float f;}uu;
+
+    if( exponent == 0 )
+    {
+        if( mantissa == 0 )
+            return sign ? -0.0f : 0.0f;
+
+        int shift = __builtin_clz( mantissa ) - 8;
+        exponent -= shift-1;
+        mantissa <<= shift;
+        mantissa &= 0x007fffff;
+    }
+    else
+        if( exponent == 31)
+        {
+            uu.u = mantissa | sign;
+            if( mantissa )
+                uu.u |= 0x7fc00000;
+            else
+                uu.u |= 0x7f800000;
+
+            return uu.f;
+        }
+
+    exponent += 127 - 15;
+    exponent <<= 23;
+
+    exponent |= mantissa;
+    uu.u = exponent | sign;
+
+    return uu.f;
+}
+
+int Test_vLoadHalf_private( bool aligned );
+
+int Test_vLoadHalf_private( bool aligned )
+{
+    cl_int error;
+    int vectorSize;
+    cl_program  programs[kVectorSizeCount+kStrangeVectorSizeCount][4] = {{0}};
+    cl_kernel   kernels[kVectorSizeCount+kStrangeVectorSizeCount][4] = {{0}};
+    uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    size_t q;
+
+    memset( min_time, -1, sizeof( min_time ) );
+
+    vlog( "Testing vload%s_half\n", aligned ? "a" : "" );
+    fflush( stdout );
+    const char *vector_size_names[]   = {"1", "2", "4", "8", "16", "3"};
+
+    int minVectorSize = kMinVectorSize;
+    // There is no aligned scalar vloada_half in CL 1.1
+#if ! defined( CL_VERSION_1_1 ) && ! defined(__APPLE__)
+    vlog("Note: testing vloada_half.\n");
+    if (aligned && minVectorSize == 0)
+        minVectorSize = 1;
+#endif
+
+    for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+
+        int effectiveVectorSize = g_arrVecSizes[vectorSize];
+        if(effectiveVectorSize == 3 && aligned) {
+            effectiveVectorSize = 4;
+        }
+        const char *source[] = {
+            "__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( i, p );\n"
+            "}\n"
+        };
+
+        const char *sourceV3[] = {
+            "__kernel void test( const __global half *p, __global float *f,\n"
+            "                   uint extra_last_thread)\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     if(extra_last_thread ==2) {\n"
+            "       f[3*i+1] = vload_half(3*i+1, p);\n"
+            "     }\n"
+            "     f[3*i] = vload_half(3*i, p);\n"
+            "   } else {\n"
+            "     vstore3(vload_half3( i, p ),i,f);\n"
+            "   }\n"
+            "}\n"
+        };
+
+        const char *sourceV3aligned[] = {
+            "__kernel void test( const __global half *p, __global float3 *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   f[i] = vloada_half3( i, p );\n"
+            "   ((__global float *)f)[4*i+3] = vloada_half(4*i+3,p);\n"
+            "}\n"
+        };
+
+        const char *source_private1[] = {
+            "__kernel void test( const __global half *p, __global float *f )\n"
+            "{\n"
+            "   __private ushort data[1];\n"
+            "   __private half* hdata_p = (__private half*) data;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   data[0] = ((__global ushort*)p)[i];\n"
+            "   f[i] = vload", (aligned ? "a" : ""), "_half( 0, hdata_p );\n"
+            "}\n"
+        };
+
+        const char *source_private2[] = {
+            "__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
+            "{\n"
+            "   __private ", align_types[vectorSize], " data[", vector_size_names[vectorSize], "/", align_divisors[vectorSize], "];\n"
+            "   __private half* hdata_p = (__private half*) data;\n"
+            "   __global  ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize], "*)p;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   int k;\n"
+            "   for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
+            "     data[k] = i_p[i+k];\n"
+            "   f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( 0, hdata_p );\n"
+            "}\n"
+        };
+
+        const char *source_privateV3[] = {
+            "__kernel void test( const __global half *p, __global float *f,"
+            "                    uint extra_last_thread )\n"
+            "{\n"
+            "   __private ushort data[3];\n"
+            "   __private half* hdata_p = (__private half*) data;\n"
+            "   __global  ushort* i_p = (__global  ushort*)p;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   int k;\n"
+            //        "   data = vload3(i, i_p);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     if(extra_last_thread ==2) {\n"
+            "       f[3*i+1] = vload_half(3*i+1, p);\n"
+            "     }\n"
+            "     f[3*i] = vload_half(3*i, p);\n"
+            "   } else {\n"
+            "     for (k=0; k<3; k++)\n"
+            "       data[k] = i_p[i*3+k];\n"
+            "     vstore3(vload_half3( 0, hdata_p ), i, f);\n"
+            "   }\n"
+            "}\n"
+        };
+
+        const char *source_privateV3aligned[] = {
+            "__kernel void test( const __global half *p, __global float3 *f )\n"
+            "{\n"
+            "   ushort4 data[4];\n"  // declare as vector for alignment. Make four to check to see vloada_half3 index is working.
+            "   half* hdata_p = (half*) &data;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   global  ushort* i_p = (global  ushort*)p + i * 4;\n"
+            "   int offset = i & 3;\n"
+            "   data[offset] = (ushort4)( i_p[0], i_p[1], i_p[2], USHRT_MAX ); \n"
+            "   data[offset^1] = USHRT_MAX; \n"
+            "   data[offset^2] = USHRT_MAX; \n"
+            "   data[offset^3] = USHRT_MAX; \n"
+            //  test vloada_half3
+            "   f[i] = vloada_half3( offset, hdata_p );\n"
+            //  Fill in the 4th value so we don't have to special case this code elsewhere in the test.
+            "   mem_fence(CLK_GLOBAL_MEM_FENCE );\n"
+            "   ((__global float *)f)[4*i+3] = vload_half(4*i+3, p);\n"
+            "}\n"
+        };
+
+        char local_buf_size[10];
+
+        sprintf(local_buf_size, "%lld", (uint64_t)((effectiveVectorSize))*gWorkGroupSize);
+        const char *source_local1[] = {
+            "__kernel void test( const __global half *p, __global float *f )\n"
+            "{\n"
+            "   __local ushort data[",local_buf_size,"];\n"
+            "   __local half* hdata_p = (__local half*) data;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   data[lid] = ((__global ushort*)p)[i];\n"
+            "   f[i] = vload", aligned ? "a" : "", "_half( lid, hdata_p );\n"
+            "}\n"
+        };
+
+        const char *source_local2[] = {
+            "__kernel void test( const __global half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
+            "{\n"
+            "   __local ", align_types[vectorSize], " data[", local_buf_size, "/", align_divisors[vectorSize], "];\n"
+            "   __local half* hdata_p = (__local half*) data;\n"
+            "   __global ", align_types[vectorSize], "* i_p = (__global ", align_types[vectorSize],"*)p;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   int k;\n"
+            "   for (k=0; k<",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"; k++)\n"
+            "     data[lid*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k] = i_p[i*",vector_size_names[vectorSize],"/",align_divisors[vectorSize],"+k];\n"
+            "   f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( lid, hdata_p );\n"
+            "}\n"
+        };
+
+        const char *source_localV3[] = {
+            "__kernel void test( const __global half *p, __global float *f,\n"
+            "                    uint extra_last_thread)\n"
+            "{\n"
+            "   __local ushort data[", local_buf_size,"];\n"
+            "   __local half* hdata_p = (__local half*) data;\n"
+            "   __global  ushort* i_p = (__global  ushort*)p;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   int k;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     if(extra_last_thread ==2) {\n"
+            "       f[3*i+1] = vload_half(3*i+1, p);\n"
+            "     }\n"
+            "     f[3*i] = vload_half(3*i, p);\n"
+            "   } else {\n"
+            "     for (k=0; k<3; k++)\n"
+            "       data[lid*3+k] = i_p[i*3+k];\n"
+            "     vstore3( vload_half3( lid, hdata_p ),i,f);\n"
+            "   };\n"
+            "}\n"
+        };
+
+        const char *source_localV3aligned[] = {
+            "__kernel void test( const __global half *p, __global float3 *f )\n"
+            "{\n"
+            "   __local ushort data[", local_buf_size,"];\n"
+            "   __local half* hdata_p = (__local half*) data;\n"
+            "   __global  ushort* i_p = (__global  ushort*)p;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   int k;\n"
+            "   for (k=0; k<4; k++)\n"
+            "     data[lid*4+k] = i_p[i*4+k];\n"
+            "   f[i] = vloada_half3( lid, hdata_p );\n"
+            "   ((__global float *)f)[4*i+3] = vload_half(lid*4+3, hdata_p);\n"
+            "}\n"
+        };
+
+        const char *source_constant[] = {
+            "__kernel void test( __constant half *p, __global float", vector_size_name_extensions[vectorSize], " *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   f[i] = vload", aligned ? "a" : "", "_half",vector_size_name_extensions[vectorSize],"( i, p );\n"
+            "}\n"
+        };
+
+        const char *source_constantV3[] = {
+            "__kernel void test( __constant half *p, __global float *f,\n"
+            "                    uint extra_last_thread)\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     if(extra_last_thread ==2) {\n"
+            "       f[3*i+1] = vload_half(3*i+1, p);\n"
+            "     }\n"
+            "     f[3*i] = vload_half(3*i, p);\n"
+            "   } else {\n"
+            "     vstore3(vload_half",vector_size_name_extensions[vectorSize],"( i, p ), i, f);\n"
+            "   }\n"
+            "}\n"
+        };
+
+        const char *source_constantV3aligned[] = {
+            "__kernel void test( __constant half *p, __global float3 *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   f[i] = vloada_half3( i, p );\n"
+            "   ((__global float *)f)[4*i+3] = vload_half(4*i+3,p);\n"
+            "}\n"
+        };
+
+
+        if(g_arrVecSizes[vectorSize] != 3) {
+            programs[vectorSize][0] = MakeProgram( source, sizeof( source) / sizeof( source[0])  );
+            if( NULL == programs[ vectorSize ][0] ) {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create program.\n" );
+                for ( q= 0; q < sizeof( source) / sizeof( source[0]); q++)
+                    vlog_error("%s", source[q]);
+                return -1;
+            } else {
+            }
+        } else if(aligned) {
+            programs[vectorSize][0] = MakeProgram( sourceV3aligned, sizeof( sourceV3aligned) / sizeof( sourceV3aligned[0])  );
+            if( NULL == programs[ vectorSize ][0] ) {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create program.\n" );
+                for ( q= 0; q < sizeof( sourceV3aligned) / sizeof( sourceV3aligned[0]); q++)
+                    vlog_error("%s", sourceV3aligned[q]);
+                return -1;
+            } else {
+            }
+        } else {
+            programs[vectorSize][0] = MakeProgram( sourceV3, sizeof( sourceV3) / sizeof( sourceV3[0])  );
+            if( NULL == programs[ vectorSize ][0] ) {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create program.\n" );
+                for ( q= 0; q < sizeof( sourceV3) / sizeof( sourceV3[0]); q++)
+                    vlog_error("%s", sourceV3[q]);
+                return -1;
+            }
+        }
+
+        kernels[ vectorSize ][0] = clCreateKernel( programs[ vectorSize ][0], "test", &error );
+        if( NULL == kernels[vectorSize][0] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
+            return -2;
+        }
+
+        const char** source_ptr;
+        uint32_t source_size;
+        if (vectorSize == 0) {
+            source_ptr = source_private1;
+            source_size = sizeof( source_private1) / sizeof( source_private1[0]);
+        } else if(g_arrVecSizes[vectorSize] == 3) {
+            if(aligned) {
+                source_ptr = source_privateV3aligned;
+                source_size = sizeof( source_privateV3aligned) / sizeof( source_privateV3aligned[0]);
+            } else {
+                source_ptr = source_privateV3;
+                source_size = sizeof( source_privateV3) / sizeof( source_privateV3[0]);
+            }
+        } else {
+            source_ptr = source_private2;
+            source_size = sizeof( source_private2) / sizeof( source_private2[0]);
+        }
+        programs[vectorSize][1] = MakeProgram( source_ptr, source_size );
+        if( NULL == programs[ vectorSize ][1] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create private program.\n" );
+            for ( q= 0; q < source_size; q++)
+                vlog_error("%s", source_ptr[q]);
+            return -1;
+        }
+
+        kernels[ vectorSize ][1] = clCreateKernel( programs[ vectorSize ][1], "test", &error );
+        if( NULL == kernels[vectorSize][1] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create private kernel. (%d)\n", error );
+            return -2;
+        }
+
+        if (vectorSize == 0) {
+            source_ptr = source_local1;
+            source_size = sizeof( source_local1) / sizeof( source_local1[0]);
+        } else if(g_arrVecSizes[vectorSize] == 3) {
+            if(aligned) {
+                source_ptr = source_localV3aligned;
+                source_size = sizeof(source_localV3aligned)/sizeof(source_localV3aligned[0]);
+            } else  {
+                source_ptr = source_localV3;
+                source_size = sizeof(source_localV3)/sizeof(source_localV3[0]);
+            }
+        } else {
+            source_ptr = source_local2;
+            source_size = sizeof( source_local2) / sizeof( source_local2[0]);
+        }
+        programs[vectorSize][2] = MakeProgram( source_ptr, source_size );
+        if( NULL == programs[ vectorSize ][2] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create local program.\n" );
+            for ( q= 0; q < source_size; q++)
+                vlog_error("%s", source_ptr[q]);
+            return -1;
+        }
+
+        kernels[ vectorSize ][2] = clCreateKernel( programs[ vectorSize ][2], "test", &error );
+        if( NULL == kernels[vectorSize][2] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create local kernel. (%d)\n", error );
+            return -2;
+        }
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            if(aligned) {
+                programs[vectorSize][3] = MakeProgram( source_constantV3aligned, sizeof(source_constantV3aligned) / sizeof( source_constantV3aligned[0])  );
+                if( NULL == programs[ vectorSize ][3] )
+                {
+                    gFailCount++;
+                    vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
+                    for ( q= 0; q < sizeof( source_constantV3aligned) / sizeof( source_constantV3aligned[0]); q++)
+                        vlog_error("%s", source_constantV3aligned[q]);
+                    return -1;
+                }
+            } else {
+                programs[vectorSize][3] = MakeProgram( source_constantV3, sizeof(source_constantV3) / sizeof( source_constantV3[0])  );
+                if( NULL == programs[ vectorSize ][3] )
+                {
+                    gFailCount++;
+                    vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
+                    for ( q= 0; q < sizeof( source_constantV3) / sizeof( source_constantV3[0]); q++)
+                        vlog_error("%s", source_constantV3[q]);
+                    return -1;
+                }
+            }
+        } else {
+            programs[vectorSize][3] = MakeProgram( source_constant, sizeof(source_constant) / sizeof( source_constant[0])  );
+            if( NULL == programs[ vectorSize ][3] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create constant program.\n" );
+                for ( q= 0; q < sizeof( source_constant) / sizeof( source_constant[0]); q++)
+                    vlog_error("%s", source_constant[q]);
+                return -1;
+            }
+        }
+
+        kernels[ vectorSize ][3] = clCreateKernel( programs[ vectorSize ][3], "test", &error );
+        if( NULL == kernels[vectorSize][3] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create constant kernel. (%d)\n", error );
+            return -2;
+        }
+    }
+
+    // Figure out how many elements are in a work block
+    size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
+    size_t blockCount = getBufferSize(gDevice) / elementSize; // elementSize is power of 2
+    uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half
+
+    // we handle 64-bit types a bit differently.
+    if( lastCase == 0 )
+        lastCase = 0x100000000ULL;
+
+
+    uint64_t i, j;
+    uint64_t printMask = (lastCase >> 4) - 1;
+    uint32_t count = 0;
+    error = 0;
+    int addressSpace;
+    //    int reported_vector_skip = 0;
+
+    for( i = 0; i < (uint64_t)lastCase; i += blockCount )
+    {
+        count = (uint32_t) MIN( blockCount, lastCase - i );
+
+        //Init the input stream
+        uint16_t *p = (uint16_t *)gIn_half;
+        for( j = 0; j < count; j++ )
+            p[j] = j + i;
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_half, CL_TRUE, 0, count * sizeof( cl_half ), gIn_half, 0, NULL, NULL)))
+        {
+            vlog_error( "Failure in clWriteArray\n" );
+            gFailCount++;
+            goto exit;
+        }
+
+        //create the reference result
+        const unsigned short *s = (const unsigned short *)gIn_half;
+        float *d = (float *)gOut_single_reference;
+        for( j = 0; j < count; j++ )
+            d[j] = half2float( s[j] );
+
+        //Check the vector lengths
+        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+        { // here we loop through vector sizes, 3 is last
+
+            for ( addressSpace = 0; addressSpace < 4; addressSpace++) {
+                uint32_t pattern = 0x7fffdead;
+
+                /*
+                 if (addressSpace == 3) {
+                 vlog("Note: skipping address space %s due to small buffer size.\n", addressSpaceNames[addressSpace]);
+                 continue;
+                 }
+                 */
+                memset_pattern4( gOut_single, &pattern, getBufferSize(gDevice));
+                if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
+                {
+                    vlog_error( "Failure in clWriteArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if(g_arrVecSizes[vectorSize] == 3 && !aligned) {
+                    // now we need to add the extra const argument for how
+                    // many elements the last thread should take care of.
+                }
+
+                // okay, here is where we have to be careful
+                if( (error = RunKernel( kernels[vectorSize][addressSpace], gInBuffer_half, gOutBuffer_single, numVecs(count, vectorSize, aligned) ,
+                                       runsOverBy(count, vectorSize, aligned) ) ) )
+                {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer_single, CL_TRUE, 0, count * sizeof( float ), gOut_single, 0, NULL, NULL)) )
+                {
+                    vlog_error( "Failure in clReadArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if( memcmp( gOut_single, gOut_single_reference, count * sizeof( float )) )
+                {
+                    uint32_t *u1 = (uint32_t *)gOut_single;
+                    uint32_t *u2 = (uint32_t *)gOut_single_reference;
+                    float *f1 = (float *)gOut_single;
+                    float *f2 = (float *)gOut_single_reference;
+                    for( j = 0; j < count; j++ )
+                    {
+                        if(isnan(f1[j]) && isnan(f2[j])) // both are nan dont compare them
+                            continue;
+                        if( u1[j] != u2[j])
+                        {
+                            vlog_error( " %lld)  (of %lld) Failure at 0x%4.4x:  %a vs *%a  (0x%8.8x vs *0x%8.8x)  vector_size = %d (%s) address space = %s, load is %s\n",
+                                       j, (uint64_t)count, ((unsigned short*)gIn_half)[j], f1[j], f2[j], u1[j], u2[j], (g_arrVecSizes[vectorSize]),
+                                       vector_size_names[vectorSize], addressSpaceNames[addressSpace],
+                                       (aligned?"aligned":"unaligned"));
+                            gFailCount++;
+                            break; // goto exit;
+                        }
+                    }
+                }
+
+                if( gReportTimes && addressSpace == 0)
+                {
+                    //Run again for timing
+                    for( j = 0; j < 100; j++ )
+                    {
+                        uint64_t startTime = ReadTime();
+                        error =
+                        RunKernel( kernels[vectorSize][addressSpace], gInBuffer_half, gOutBuffer_single, numVecs(count, vectorSize, aligned) ,
+                                  runsOverBy(count, vectorSize, aligned));
+                        if(error)
+                        {
+                            gFailCount++;
+                            goto exit;
+                        }
+
+                        if( (error = clFinish(gQueue)) )
+                        {
+                            vlog_error( "Failure in clFinish\n" );
+                            gFailCount++;
+                            goto exit;
+                        }
+                        uint64_t currentTime = ReadTime() - startTime;
+                        time[ vectorSize ] += currentTime;
+                        if( currentTime < min_time[ vectorSize ] )
+                            min_time[ vectorSize ] = currentTime ;
+                    }
+                }
+            }
+        }
+
+        if( ((i+blockCount) & ~printMask) == (i+blockCount) )
+        {
+            vlog( "." );
+            fflush( stdout );
+        }
+    }
+
+    vlog( "\tPassed\n" );
+
+    if( gReportTimes )
+    {
+        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * 100), 0,
+                      "average us/elem", "vLoad%sHalf avg. (%s, vector size: %d)", ( (aligned) ? "a" : ""), addressSpaceNames[0], (g_arrVecSizes[vectorSize])  );
+        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
+                      "best us/elem", "vLoad%sHalf best (%s vector size: %d)", ( (aligned) ? "a" : ""), addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
+    }
+
+exit:
+    //clean up
+    for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+        for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
+            clReleaseKernel( kernels[ vectorSize ][addressSpace] );
+            clReleaseProgram( programs[ vectorSize ][addressSpace] );
+        }
+    }
+
+    gTestCount++;
+    return error;
+}
+
+int Test_vload_half( void )
+{
+    return Test_vLoadHalf_private( false );
+}
+
+int Test_vloada_half( void )
+{
+    return Test_vLoadHalf_private( true );
+}
+
diff --git a/test_conformance/half/Test_vStoreHalf.c b/test_conformance/half/Test_vStoreHalf.c
new file mode 100644
index 00000000..a3117bcb
--- /dev/null
+++ b/test_conformance/half/Test_vStoreHalf.c
@@ -0,0 +1,1947 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include "../../test_common/harness/kernelHelpers.h"
+
+#include <string.h>
+#include "cl_utils.h"
+#include "tests.h"
+
+extern const char *addressSpaceNames[];
+
+typedef struct ComputeReferenceInfoF_
+{
+    float *x;
+    cl_ushort *r;
+    f2h f;
+    cl_ulong i;
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoF;
+
+typedef struct ComputeReferenceInfoD_
+{
+    double *x;
+    cl_ushort *r;
+    d2h f;
+    cl_ulong i;
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoD;
+
+typedef struct CheckResultInfoF_
+{
+    const float *x;
+    const cl_ushort *r;
+    const cl_ushort *s;
+    f2h f;
+    const char *aspace;
+    cl_uint lim;
+    cl_uint count;
+    int vsz;
+} CheckResultInfoF;
+
+typedef struct CheckResultInfoD_
+{
+    const double *x;
+    const cl_ushort *r;
+    const cl_ushort *s;
+    d2h f;
+    const char *aspace;
+    cl_uint lim;
+    cl_uint count;
+    int vsz;
+} CheckResultInfoD;
+
+static cl_int
+ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    float *x = cri->x + off;
+    cl_ushort *r = cri->r + off;
+    f2h f = cri->f;
+    cl_ulong i = cri->i + off;
+    cl_uint j, rr;
+
+    if (off + count > lim)
+        count = lim - off;
+
+    for (j = 0; j < count; ++j) {
+        x[j] = as_float((cl_uint)(i + j));
+        r[j] = f(x[j]);
+    }
+
+    return 0;
+}
+
+static cl_int
+CheckF(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    CheckResultInfoF *cri = (CheckResultInfoF *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const float *x = cri->x + off;
+    const cl_ushort *r = cri->r + off;
+    const cl_ushort *s = cri->s + off;
+    f2h f = cri->f;
+    cl_uint j;
+    cl_ushort correct2 = f( 0.0f);
+    cl_ushort correct3 = f(-0.0f);
+    cl_int ret = 0;
+
+    if (off + count > lim)
+        count = lim - off;
+
+    if (!memcmp(r, s, count*sizeof(cl_ushort)))
+        return 0;
+
+    for (j = 0; j < count; j++) {
+    if (s[j] == r[j])
+        continue;
+
+        // Pass any NaNs
+        if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00 )
+            continue;
+
+        // retry per section 6.5.3.3
+        if (IsFloatSubnormal(x[j]) && (s[j] == correct2 || s[j] == correct3))
+            continue;
+
+        // if reference result is subnormal, pass any zero
+        if (gIsEmbedded && IsHalfSubnormal(r[j]) && (s[j] == 0x0000 || s[j] == 0x8000))
+            continue;
+
+        vlog_error("\nFailure at [%u] with %.6a: *0x%04x vs 0x%04x,  vector_size = %d, address_space = %s\n",
+                   j+off, x[j], r[j], s[j], cri->vsz, cri->aspace);
+
+        ret = 1;
+        break;
+    }
+
+    return ret;
+}
+
+static cl_int
+ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    double *x = cri->x + off;
+    cl_ushort *r = cri->r + off;
+    d2h f = cri->f;
+    cl_uint j;
+    cl_ulong i = cri->i + off;
+
+    if (off + count > lim)
+        count = lim - off;
+
+    for (j = 0; j < count; ++j) {
+        x[j] = as_double(DoubleFromUInt((cl_uint)(i + j)));
+        r[j] = f(x[j]);
+    }
+
+    return 0;
+}
+
+static cl_int
+CheckD(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    CheckResultInfoD *cri = (CheckResultInfoD *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const double *x = cri->x + off;
+    const cl_ushort *r = cri->r + off;
+    const cl_ushort *s = cri->s + off;
+    d2h f = cri->f;
+    cl_uint j;
+    cl_ushort correct2 = f( 0.0);
+    cl_ushort correct3 = f(-0.0);
+    cl_int ret = 0;
+
+    if (off + count > lim)
+        count = lim - off;
+
+    if (!memcmp(r, s, count*sizeof(cl_ushort)))
+        return 0;
+
+    for (j = 0; j < count; j++) {
+    if (s[j] == r[j])
+        continue;
+
+        // Pass any NaNs
+        if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00)
+            continue;
+
+        if (IsDoubleSubnormal(x[j]) && (s[j] == correct2 || s[j] == correct3))
+            continue;
+
+        // if reference result is subnormal, pass any zero result
+        if (gIsEmbedded && IsHalfSubnormal(r[j]) && (s[j] == 0x0000 || s[j] == 0x8000))
+            continue;
+
+        vlog_error("\nFailure at [%u] with %.13la: *0x%04x vs 0x%04x, vector_size = %d, address space = %s (double precision)\n",
+                   j+off, x[j], r[j], s[j], cri->vsz, cri->aspace);
+
+        ret = 1;
+    break;
+    }
+
+    return ret;
+}
+
+static cl_ushort float2half_rte( float f );
+static cl_ushort float2half_rtz( float f );
+static cl_ushort float2half_rtp( float f );
+static cl_ushort float2half_rtn( float f );
+static cl_ushort double2half_rte( double f );
+static cl_ushort double2half_rtz( double f );
+static cl_ushort double2half_rtp( double f );
+static cl_ushort double2half_rtn( double f );
+
+static cl_ushort
+float2half_rte( float f )
+{
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+    u.u &= 0x7f800000;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+    return (u.u >> (24-11)) | sign;
+}
+
+static cl_ushort
+float2half_rtz( float f )
+{
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+    {
+        if( x == INFINITY )
+            return 0x7c00 | sign;
+
+        return 0x7bff | sign;
+    }
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+
+    u.u &= 0xFFFFE000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
+}
+
+static cl_ushort
+float2half_rtp( float f )
+{
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f > MAKE_HEX_FLOAT(0x1.ffcp15f, 0x1ffcL, 3) )
+        return 0x7c00;
+
+    if( f <= MAKE_HEX_FLOAT(-0x1.0p16f, -0x1L, 16) )
+    {
+        if( f == -INFINITY )
+            return 0xfc00;
+
+        return 0xfbff;
+    }
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    {
+        if( f > 0 )
+            return 1;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        r += (float) r != x && f > 0.0f;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    float g = u.f;
+    u.u &= 0xFFFFE000U;
+    if( g > u.f )
+        u.u += 0x00002000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
+}
+
+
+static cl_ushort
+float2half_rtn( float f )
+{
+    union{ float f; cl_uint u; } u = {f};
+    cl_uint sign = (u.u >> 16) & 0x8000;
+    float x = fabsf(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (24-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+    {
+        if( f == INFINITY )
+            return 0x7c00;
+
+        return 0x7bff;
+    }
+
+    if( f < MAKE_HEX_FLOAT(-0x1.ffcp15f, -0x1ffcL, 3) )
+        return 0xfc00;
+
+    // underflow
+    if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    {
+        if( f < 0 )
+            return 0x8001;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        r += (float) r != x && f < 0.0f;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    u.u &= 0xFFFFE000U;
+    if( u.f > f )
+        u.u += 0x00002000U;
+    u.u -= 0x38000000U;
+
+    return (u.u >> (24-11)) | sign;
+}
+
+static cl_ushort
+double2half_rte( double f )
+{
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( x >= MAKE_HEX_DOUBLE(0x1.ffep15, 0x1ffeLL, 3) )
+        return 0x7c00 | sign;
+
+    // underflow
+    if( x <= MAKE_HEX_DOUBLE(0x1.0p-25, 0x1LL, -25) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // very small
+    if( x < MAKE_HEX_DOUBLE(0x1.8p-24, 0x18LL, -28) )
+        return sign | 1;
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        u.f = x * MAKE_HEX_DOUBLE(0x1.0p-1050, 0x1LL, -1050);
+        return sign | u.u;
+    }
+
+    u.f *= MAKE_HEX_DOUBLE(0x1.0p42, 0x1LL, 42);
+    u.u &= 0x7ff0000000000000ULL;
+    x += u.f;
+    u.f = x - u.f;
+    u.f *= MAKE_HEX_DOUBLE(0x1.0p-1008, 0x1LL, -1008);
+
+    return (u.u >> (53-11)) | sign;
+}
+
+static cl_ushort
+double2half_rtz( double f )
+{
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    if( x == INFINITY )
+        return 0x7c00 | sign;
+
+    // overflow
+    if( x >= MAKE_HEX_DOUBLE(0x1.0p16, 0x1LL, 16) )
+        return 0x7bff | sign;
+
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+        return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        return (cl_ushort)((int) x | sign);
+    }
+
+    u.u &= 0xFFFFFC0000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+
+    return (u.u >> (53-11)) | sign;
+}
+
+static cl_ushort
+double2half_rtp( double f )
+{
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f > MAKE_HEX_DOUBLE(0x1.ffcp15, 0x1ffcLL, 3) )
+        return 0x7c00;
+
+    if( f <= MAKE_HEX_DOUBLE(-0x1.0p16, -0x1LL, 16) )
+    {
+        if( f == -INFINITY )
+            return 0xfc00;
+
+        return 0xfbff;
+    }
+
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+    {
+        if( f > 0 )
+            return 1;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+        int r = (int) x;
+        if( 0 == sign )
+            r += (double) r != x;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    double g = u.f;
+    u.u &= 0xFFFFFC0000000000ULL;
+    if( g != u.f && 0 == sign)
+        u.u += 0x0000040000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+
+    return (u.u >> (53-11)) | sign;
+}
+
+
+static cl_ushort
+double2half_rtn( double f )
+{
+    union{ double f; cl_ulong u; } u = {f};
+    cl_ulong sign = (u.u >> 48) & 0x8000;
+    double x = fabs(f);
+
+    //Nan
+    if( x != x )
+    {
+        u.u >>= (53-11);
+        u.u &= 0x7fff;
+        u.u |= 0x0200;      //silence the NaN
+        return u.u | sign;
+    }
+
+    // overflow
+    if( f >= MAKE_HEX_DOUBLE(0x1.0p16, 0x1LL, 16) )
+    {
+        if( f == INFINITY )
+            return 0x7c00;
+
+        return 0x7bff;
+    }
+
+    if( f < MAKE_HEX_DOUBLE(-0x1.ffcp15, -0x1ffcLL, 3) )
+        return 0xfc00;
+
+    // underflow
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-24, 0x1LL, -24) )
+    {
+        if( f < 0 )
+            return 0x8001;
+        return sign;
+    }
+
+    // half denormal
+    if( x < MAKE_HEX_DOUBLE(0x1.0p-14, 0x1LL, -14) )
+    {
+        x *= MAKE_HEX_DOUBLE(0x1.0p24, 0x1LL, 24);
+        int r = (int) x;
+        if( sign )
+            r += (double) r != x;
+
+        return (cl_ushort)( r | sign);
+    }
+
+    double g = u.f;
+    u.u &= 0xFFFFFC0000000000ULL;
+    if( g < u.f && sign)
+        u.u += 0x0000040000000000ULL;
+    u.u -= 0x3F00000000000000ULL;
+
+    return (u.u >> (53-11)) | sign;
+}
+
+int Test_vstore_half( void )
+{
+    switch (get_default_rounding_mode(gDevice))
+    {
+        case CL_FP_ROUND_TO_ZERO:
+            return Test_vStoreHalf_private(float2half_rtz, double2half_rte, "");
+        case 0:
+            return -1;
+        default:
+            return Test_vStoreHalf_private(float2half_rte, double2half_rte, "");
+    }
+}
+
+int Test_vstore_half_rte( void )
+{
+    return Test_vStoreHalf_private(float2half_rte, double2half_rte, "_rte");
+}
+
+int Test_vstore_half_rtz( void )
+{
+    return Test_vStoreHalf_private(float2half_rtz, double2half_rtz, "_rtz");
+}
+
+int Test_vstore_half_rtp( void )
+{
+    return Test_vStoreHalf_private(float2half_rtp, double2half_rtp, "_rtp");
+}
+
+int Test_vstore_half_rtn( void )
+{
+    return Test_vStoreHalf_private(float2half_rtn, double2half_rtn, "_rtn");
+}
+
+int Test_vstorea_half( void )
+{
+    switch (get_default_rounding_mode(gDevice))
+    {
+        case CL_FP_ROUND_TO_ZERO:
+            return Test_vStoreaHalf_private(float2half_rtz, double2half_rte, "");
+        case 0:
+            return -1;
+        default:
+            return Test_vStoreaHalf_private(float2half_rte, double2half_rte, "");
+    }
+}
+
+int Test_vstorea_half_rte( void )
+{
+    return Test_vStoreaHalf_private(float2half_rte, double2half_rte, "_rte");
+}
+
+int Test_vstorea_half_rtz( void )
+{
+    return Test_vStoreaHalf_private(float2half_rtz, double2half_rtz, "_rtz");
+}
+
+int Test_vstorea_half_rtp( void )
+{
+    return Test_vStoreaHalf_private(float2half_rtp, double2half_rtp, "_rtp");
+}
+
+int Test_vstorea_half_rtn( void )
+{
+    return Test_vStoreaHalf_private(float2half_rtn, double2half_rtn, "_rtn");
+}
+
+#pragma mark -
+
+int Test_vStoreHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const char *roundName )
+{
+    int vectorSize, error;
+    cl_program  programs[kVectorSizeCount+kStrangeVectorSizeCount][3];
+    cl_kernel   kernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
+
+    uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    memset( min_time, -1, sizeof( min_time ) );
+    cl_program  doublePrograms[kVectorSizeCount+kStrangeVectorSizeCount][3];
+    cl_kernel   doubleKernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
+    uint64_t doubleTime[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t min_double_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    memset( min_double_time, -1, sizeof( min_double_time ) );
+
+    vlog( "Testing vstore_half%s\n", roundName );
+    fflush( stdout );
+
+    bool aligned= false;
+
+    for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+        const char *source[] = {
+            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
+            "}\n"
+        };
+
+        const char *source_v3[] = {
+            "__kernel void test( __global float *p, __global half *f,\n"
+            "                   uint extra_last_thread)\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   } "
+            "   vstore_half3",roundName,"( vload3(i, p-adjust), i, f-adjust );\n"
+            "}\n"
+        };
+
+        const char *source_private_store[] = {
+            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __private ushort data[16];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t offset = 0;\n"
+            "   size_t vecsize = vec_step(p[i]);\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], 0, (__private half *)(&data[0]) );\n"
+            "   for(offset = 0; offset < vecsize; offset++)\n"
+            "   {\n"
+            "       vstore_half(vload_half(offset, (__private half *)data), 0, &f[vecsize*i+offset]);\n"
+            "   }\n"
+            "}\n"
+        };
+
+
+        const char *source_private_store_v3[] = {
+            "__kernel void test( __global float *p, __global half *f,\n"
+            "                   uint extra_last_thread )\n"
+            "{\n"
+            "   __private ushort data[4];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   size_t offset = 0;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   } "
+            "   vstore_half3",roundName,"( vload3(i, p-adjust), 0, (__private half *)(&data[0]) );\n"
+            "   for(offset = 0; offset < 3; offset++)\n"
+            "   {\n"
+            "       vstore_half(vload_half(offset, (__private half *) data), 0, &f[3*i+offset-adjust]);\n"
+            "   }\n"
+            "}\n"
+        };
+
+        char local_buf_size[10];
+        sprintf(local_buf_size, "%lld", (uint64_t)gWorkGroupSize);
+
+
+        const char *source_local_store[] = {
+            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __local ushort data[16*", local_buf_size, "];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   size_t lsize = get_local_size(0);\n"
+            "   size_t vecsize = vec_step(p[0]);\n"
+            "   event_t async_event;\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], lid, (__local half *)(&data[0]) );\n"
+            "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
+            "   async_event = async_work_group_copy((__global ushort *)f+vecsize*(i-lid), (__local ushort *)(&data[0]), vecsize*lsize, 0);\n" // investigate later
+            "   wait_group_events(1, &async_event);\n"
+            "}\n"
+        };
+
+        const char *source_local_store_v3[] = {
+            "__kernel void test( __global float *p, __global half *f,\n"
+            "                   uint extra_last_thread )\n"
+            "{\n"
+            "   __local ushort data[3*(", local_buf_size, "+1)];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   size_t lsize = get_local_size(0);\n"
+            "   event_t async_event;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   } "
+            "   vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
+            "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
+            "   async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later
+            "   wait_group_events(1, &async_event);\n"
+            "}\n"
+        };
+
+        const char *double_source[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
+            "}\n"
+        };
+
+        const char *double_source_private_store[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __private ushort data[16];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t offset = 0;\n"
+            "   size_t vecsize = vec_step(p[i]);\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], 0, (__private half *)(&data[0]) );\n"
+            "   for(offset = 0; offset < vecsize; offset++)\n"
+            "   {\n"
+            "       vstore_half(vload_half(offset, (__private half *)data), 0, &f[vecsize*i+offset]);\n"
+            "   }\n"
+            "}\n"
+        };
+
+
+        const char *double_source_local_store[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __local ushort data[16*", local_buf_size, "];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   size_t vecsize = vec_step(p[0]);\n"
+            "   size_t lsize = get_local_size(0);\n"
+            "   event_t async_event;\n"
+            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], lid, (__local half *)(&data[0]) );\n"
+            "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
+            "   async_event = async_work_group_copy((__global ushort *)(f+vecsize*(i-lid)), (__local ushort *)(&data[0]), vecsize*lsize, 0);\n" // investigate later
+            "   wait_group_events(1, &async_event);\n"
+            "}\n"
+        };
+
+
+        const char *double_source_v3[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double *p, __global half *f ,\n"
+            "                   uint extra_last_thread)\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   } "
+            "   vstore_half3",roundName,"( vload3(i,p-adjust), i, f -adjust);\n"
+            "}\n"
+        };
+
+        const char *double_source_private_store_v3[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double *p, __global half *f,\n"
+            "                   uint extra_last_thread )\n"
+            "{\n"
+            "   __private ushort data[4];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   size_t offset = 0;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   } "
+            "   vstore_half3",roundName,"( vload3(i, p-adjust), 0, (__private half *)(&data[0]) );\n"
+            "   for(offset = 0; offset < 3; offset++)\n"
+            "   {\n"
+            "       vstore_half(vload_half(offset, (__private half *)data), 0, &f[3*i+offset-adjust]);\n"
+            "   }\n"
+            "}\n"
+        };
+
+        const char *double_source_local_store_v3[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double *p, __global half *f,\n"
+            "                   uint extra_last_thread )\n"
+            "{\n"
+            "   __local ushort data[3*(", local_buf_size, "+1)];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   size_t last_i = get_global_size(0)-1;\n"
+            "   size_t adjust = 0;\n"
+            "   size_t lsize = get_local_size(0);\n"
+            "   event_t async_event;\n"
+            "   if(last_i == i && extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   }\n "
+            "   vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
+            "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
+            "   async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later
+            "   wait_group_events(1, &async_event);\n"
+            "}\n"
+        };
+
+
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            programs[vectorSize][0] = MakeProgram( source_v3, sizeof(source_v3) / sizeof( source_v3[0]) );
+        } else {
+            programs[vectorSize][0] = MakeProgram( source, sizeof(source) / sizeof( source[0]) );
+        }
+        if( NULL == programs[ vectorSize ][0] )
+        {
+            gFailCount++;
+            return -1;
+        }
+
+        kernels[ vectorSize ][0] = clCreateKernel( programs[ vectorSize ][0], "test", &error );
+        if( NULL == kernels[vectorSize][0] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
+            return error;
+        }
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            programs[vectorSize][1] = MakeProgram( source_private_store_v3, sizeof(source_private_store_v3) / sizeof( source_private_store_v3[0]) );
+        } else {
+            programs[vectorSize][1] = MakeProgram( source_private_store, sizeof(source_private_store) / sizeof( source_private_store[0]) );
+        }
+        if( NULL == programs[ vectorSize ][1] )
+        {
+            gFailCount++;
+            return -1;
+        }
+
+        kernels[ vectorSize ][1] = clCreateKernel( programs[ vectorSize ][1], "test", &error );
+        if( NULL == kernels[vectorSize][1] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create private kernel. (%d)\n", error );
+            return error;
+        }
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            programs[vectorSize][2] = MakeProgram( source_local_store_v3, sizeof(source_local_store_v3) / sizeof( source_local_store_v3[0]) );
+            if(  NULL == programs[ vectorSize ][2] )
+            {
+                unsigned q;
+                for ( q= 0; q < sizeof( source_local_store_v3) / sizeof( source_local_store_v3[0]); q++)
+                    vlog_error("%s", source_local_store_v3[q]);
+
+                gFailCount++;
+                return -1;
+
+            }
+        } else {
+            programs[vectorSize][2] = MakeProgram( source_local_store, sizeof(source_local_store) / sizeof( source_local_store[0]) );
+            if( NULL == programs[ vectorSize ][2] )
+            {
+                unsigned q;
+                for ( q= 0; q < sizeof( source_local_store) / sizeof( source_local_store[0]); q++)
+                    vlog_error("%s", source_local_store[q]);
+
+                gFailCount++;
+                return -1;
+
+            }
+        }
+
+        kernels[ vectorSize ][2] = clCreateKernel( programs[ vectorSize ][2], "test", &error );
+        if( NULL == kernels[vectorSize][2] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create local kernel. (%d)\n", error );
+            return error;
+        }
+
+        if( gTestDouble )
+        {
+            if(g_arrVecSizes[vectorSize] == 3) {
+                doublePrograms[vectorSize][0] = MakeProgram( double_source_v3, sizeof(double_source_v3) / sizeof( double_source_v3[0]) );
+            } else {
+                doublePrograms[vectorSize][0] = MakeProgram( double_source, sizeof(double_source) / sizeof( double_source[0]) );
+            }
+            if( NULL == doublePrograms[ vectorSize ][0] )
+            {
+                gFailCount++;
+                return -1;
+            }
+
+            doubleKernels[ vectorSize ][0] = clCreateKernel( doublePrograms[ vectorSize ][0], "test", &error );
+            if( NULL == kernels[vectorSize][0] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create double kernel. (%d)\n", error );
+                return error;
+            }
+
+            if(g_arrVecSizes[vectorSize] == 3)
+                doublePrograms[vectorSize][1] = MakeProgram( double_source_private_store_v3, sizeof(double_source_private_store_v3) / sizeof( double_source_private_store_v3[0]) );
+            else
+                doublePrograms[vectorSize][1] = MakeProgram( double_source_private_store, sizeof(double_source_private_store) / sizeof( double_source_private_store[0]) );
+
+            if( NULL == doublePrograms[ vectorSize ][1] )
+            {
+                gFailCount++;
+                return -1;
+            }
+
+            doubleKernels[ vectorSize ][1] = clCreateKernel( doublePrograms[ vectorSize ][1], "test", &error );
+            if( NULL == kernels[vectorSize][1] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create double private kernel. (%d)\n", error );
+                return error;
+            }
+
+            if(g_arrVecSizes[vectorSize] == 3) {
+                doublePrograms[vectorSize][2] = MakeProgram( double_source_local_store_v3, sizeof(double_source_local_store_v3) / sizeof( double_source_local_store_v3[0]) );
+            } else {
+                doublePrograms[vectorSize][2] = MakeProgram( double_source_local_store, sizeof(double_source_local_store) / sizeof( double_source_local_store[0]) );
+            }
+            if( NULL == doublePrograms[ vectorSize ][2] )
+            {
+                gFailCount++;
+                return -1;
+            }
+
+            doubleKernels[ vectorSize ][2] = clCreateKernel( doublePrograms[ vectorSize ][2], "test", &error );
+            if( NULL == kernels[vectorSize][2] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create double local kernel. (%d)\n", error );
+                return error;
+            }
+        }
+    } // end for vector size
+
+    // Figure out how many elements are in a work block
+    size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float));
+    size_t blockCount = BUFFER_SIZE / elementSize; // elementSize is power of 2
+    uint64_t lastCase = 1ULL << (8*sizeof(float)); // number of floats.
+    size_t stride = blockCount;
+
+    if (gWimpyMode)
+        stride = 0x10000000U;
+
+    // we handle 64-bit types a bit differently.
+    if( lastCase == 0 )
+        lastCase = 0x100000000ULL;
+
+    uint64_t i, j;
+    error = 0;
+    uint64_t printMask = (lastCase >> 4) - 1;
+    cl_uint count = 0;
+    int addressSpace;
+    size_t loopCount;
+    cl_uint threadCount = GetThreadCount();
+
+    ComputeReferenceInfoF fref;
+    fref.x = (float *)gIn_single;
+    fref.r = (cl_ushort *)gOut_half_reference;
+    fref.f = referenceFunc;
+    fref.lim = blockCount;
+    fref.count = (blockCount + threadCount - 1) / threadCount;
+
+    CheckResultInfoF fchk;
+    fchk.x = (const float *)gIn_single;
+    fchk.r = (const cl_ushort *)gOut_half_reference;
+    fchk.s = (const cl_ushort *)gOut_half;
+    fchk.f = referenceFunc;
+    fchk.lim = blockCount;
+    fchk.count = (blockCount + threadCount - 1) / threadCount;
+
+    ComputeReferenceInfoD dref;
+    dref.x = (double *)gIn_double;
+    dref.r = (cl_ushort *)gOut_half_reference_double;
+    dref.f = doubleReferenceFunc;
+    dref.lim = blockCount;
+    dref.count = (blockCount + threadCount - 1) / threadCount;
+
+    CheckResultInfoD dchk;
+    dchk.x = (const double *)gIn_double;
+    dchk.r = (const cl_ushort *)gOut_half_reference_double;
+    dchk.s = (const cl_ushort *)gOut_half;
+    dchk.f = doubleReferenceFunc;
+    dchk.lim = blockCount;
+    dchk.count = (blockCount + threadCount - 1) / threadCount;
+
+    for( i = 0; i < lastCase; i += stride )
+    {
+        count = (cl_uint) MIN( blockCount, lastCase - i );
+        fref.i = i;
+        dref.i = i;
+
+        // Compute the input and reference
+        ThreadPool_Do(ReferenceF, threadCount, &fref);
+
+        error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_FALSE, 0, count * sizeof(float ), gIn_single, 0, NULL, NULL);
+        if (error) {
+            vlog_error( "Failure in clWriteBuffer\n" );
+            gFailCount++;
+            goto exit;
+        }
+
+        if (gTestDouble) {
+            ThreadPool_Do(ReferenceD, threadCount, &dref);
+
+            error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_FALSE, 0, count * sizeof(double ), gIn_double, 0, NULL, NULL);
+            if (error) {
+                vlog_error( "Failure in clWriteBuffer\n" );
+                gFailCount++;
+                goto exit;
+            }
+        }
+
+        for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++) {
+            // Loop through vector sizes
+            fchk.vsz = g_arrVecSizes[vectorSize];
+            dchk.vsz = g_arrVecSizes[vectorSize];
+
+            for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
+                // Loop over address spaces
+                fchk.aspace = addressSpaceNames[addressSpace];
+                dchk.aspace = addressSpaceNames[addressSpace];
+
+                cl_uint pattern = 0xdeaddead;
+                memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
+
+                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                if (error) {
+                    vlog_error( "Failure in clWriteArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+                error = RunKernel(kernels[vectorSize][addressSpace], gInBuffer_single, gOutBuffer_half,
+                                       numVecs(count, vectorSize, aligned) ,
+                                  runsOverBy(count, vectorSize, aligned));
+                if (error) {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                if (error) {
+                    vlog_error( "Failure in clReadArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+                error = ThreadPool_Do(CheckF, threadCount, &fchk);
+                if (error) {
+                            gFailCount++;
+                            goto exit;
+                        }
+
+                if (gTestDouble) {
+                    memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
+
+                    error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                    if (error) {
+                        vlog_error( "Failure in clWriteArray\n" );
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    error = RunKernel(doubleKernels[vectorSize][addressSpace], gInBuffer_double, gOutBuffer_half,
+                                      numVecs(count, vectorSize, aligned),
+                                      runsOverBy(count, vectorSize, aligned));
+                    if (error) {
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                    if (error) {
+                        vlog_error( "Failure in clReadArray\n" );
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    error = ThreadPool_Do(CheckD, threadCount, &dchk);
+                    if (error) {
+                                gFailCount++;
+                                goto exit;
+                            }
+                        }
+                    }
+                }
+
+        if( ((i+blockCount) & ~printMask) == (i+blockCount) )
+        {
+            vlog( "." );
+            fflush( stdout );
+        }
+    }  // end last case
+
+    loopCount = count == blockCount ? 1 : 100;
+    if( gReportTimes )
+    {
+        //Init the input stream
+        cl_float *p = (cl_float *)gIn_single;
+        for( j = 0; j < count; j++ )
+            p[j] = (float)((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_TRUE, 0, count * sizeof( float ), gIn_single, 0, NULL, NULL)) )
+        {
+            vlog_error( "Failure in clWriteArray\n" );
+            gFailCount++;
+            goto exit;
+        }
+
+        if( gTestDouble )
+        {
+            //Init the input stream
+            cl_double *q = (cl_double *)gIn_double;
+            for( j = 0; j < count; j++ )
+                q[j] = ((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+
+            if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_TRUE, 0, count * sizeof( double ), gIn_double, 0, NULL, NULL)) )
+            {
+                vlog_error( "Failure in clWriteArray\n" );
+                gFailCount++;
+                goto exit;
+            }
+        }
+
+        //Run again for timing
+        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+        {
+            uint64_t bestTime = -1ULL;
+            for( j = 0; j < loopCount; j++ )
+            {
+                uint64_t startTime = ReadTime();
+
+
+                if( (error = RunKernel( kernels[vectorSize][0], gInBuffer_single, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
+                                       runsOverBy(count, vectorSize, aligned)) ) )
+                {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if( (error = clFinish(gQueue)) )
+                {
+                    vlog_error( "Failure in clFinish\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+                uint64_t currentTime = ReadTime() - startTime;
+                if( currentTime < bestTime )
+                    bestTime = currentTime;
+                time[ vectorSize ] += currentTime;
+            }
+            if( bestTime < min_time[ vectorSize ] )
+                min_time[ vectorSize ] = bestTime ;
+
+            if( gTestDouble )
+            {
+                bestTime = -1ULL;
+                for( j = 0; j < loopCount; j++ )
+                {
+                    uint64_t startTime = ReadTime();
+                    if( (error = RunKernel( doubleKernels[vectorSize][0], gInBuffer_double, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
+                                           runsOverBy(count, vectorSize, aligned)) ) )
+                    {
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    if( (error = clFinish(gQueue)) )
+                    {
+                        vlog_error( "Failure in clFinish\n" );
+                        gFailCount++;
+                        goto exit;
+                    }
+                    uint64_t currentTime = ReadTime() - startTime;
+                    if( currentTime < bestTime )
+                        bestTime = currentTime;
+                    doubleTime[ vectorSize ] += currentTime;
+                }
+                if( bestTime < min_double_time[ vectorSize ] )
+                    min_double_time[ vectorSize ] = bestTime;
+            }
+        }
+    }
+
+    if( 0 == gFailCount )
+    {
+        if( gWimpyMode )
+        {
+            vlog( "\tfloat: Wimp Passed\n" );
+            if( gTestDouble )
+                vlog( "\tdouble: Wimp Passed\n" );
+        }
+        else
+        {
+            vlog( "\tfloat Passed\n" );
+            if( gTestDouble )
+                vlog( "\tdouble Passed\n" );
+        }
+    }
+
+    if( gReportTimes )
+    {
+        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
+                      "average us/elem", "vStoreHalf%s avg. (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
+        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
+                      "best us/elem", "vStoreHalf%s best (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize])  );
+        if( gTestDouble )
+        {
+            for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf( SubtractTime( doubleTime[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
+                          "average us/elem (double)", "vStoreHalf%s avg. d (%s vector size: %d)", roundName, addressSpaceNames[0],  (g_arrVecSizes[vectorSize])  );
+            for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf( SubtractTime( min_double_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
+                          "best us/elem (double)", "vStoreHalf%s best d (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
+        }
+    }
+
+exit:
+    //clean up
+    for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+        for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
+            clReleaseKernel( kernels[ vectorSize ][ addressSpace ] );
+            clReleaseProgram( programs[ vectorSize ][ addressSpace ] );
+            if( gTestDouble )
+            {
+                clReleaseKernel( doubleKernels[ vectorSize ][addressSpace] );
+                clReleaseProgram( doublePrograms[ vectorSize ][addressSpace] );
+            }
+        }
+    }
+
+    gTestCount++;
+    return error;
+}
+
+int Test_vStoreaHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const char *roundName )
+{
+    int vectorSize, error;
+    cl_program  programs[kVectorSizeCount+kStrangeVectorSizeCount][3];
+    cl_kernel   kernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
+
+    uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    memset( min_time, -1, sizeof( min_time ) );
+    cl_program  doublePrograms[kVectorSizeCount+kStrangeVectorSizeCount][3];
+    cl_kernel   doubleKernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
+    uint64_t doubleTime[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    uint64_t min_double_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
+    memset( min_double_time, -1, sizeof( min_double_time ) );
+
+    bool aligned = true;
+
+    vlog( "Testing vstorea_half%s\n", roundName );
+    fflush( stdout );
+
+    int minVectorSize = kMinVectorSize;
+    // There is no aligned scalar vstorea_half
+    if( 0 == minVectorSize )
+        minVectorSize = 1;
+
+    //Loop over vector sizes
+    for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+        const char *source[] = {
+            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
+            "}\n"
+        };
+
+        const char *source_v3[] = {
+            "__kernel void test( __global float3 *p, __global half *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstorea_half3",roundName,"( p[i], i, f );\n"
+            "   vstore_half",roundName,"( ((__global  float *)p)[4*i+3], 4*i+3, f);\n"
+            "}\n"
+        };
+
+        const char *source_private[] = {
+            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __private float", vector_size_name_extensions[vectorSize], " data;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   data = p[i];\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data, i, f );\n"
+            "}\n"
+        };
+
+        const char *source_private_v3[] = {
+            "__kernel void test( __global float3 *p, __global half *f )\n"
+            "{\n"
+            "   __private float", vector_size_name_extensions[vectorSize], " data;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   data = p[i];\n"
+            "   vstorea_half3",roundName,"( data, i, f );\n"
+            "   vstore_half",roundName,"( ((__global  float *)p)[4*i+3], 4*i+3, f);\n"
+            "}\n"
+        };
+
+        char local_buf_size[10];
+        sprintf(local_buf_size, "%lld", (uint64_t)gWorkGroupSize);
+        const char *source_local[] = {
+            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __local float", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   data[lid] = p[i];\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
+            "}\n"
+        };
+
+        const char *source_local_v3[] = {
+            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __local float", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   data[lid] = p[i];\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
+            "   vstore_half",roundName,"( ((__global float *)p)[4*i+3], 4*i+3, f);\n"
+            "}\n"
+        };
+
+        const char *double_source[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
+            "}\n"
+        };
+
+        const char *double_source_v3[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   size_t i = get_global_id(0);\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
+            "   vstore_half",roundName,"( ((__global double *)p)[4*i+3], 4*i+3, f);\n"
+            "}\n"
+        };
+
+        const char *double_source_private[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __private double", vector_size_name_extensions[vectorSize], " data;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   data = p[i];\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data, i, f );\n"
+            "}\n"
+        };
+
+        const char *double_source_private_v3[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __private double", vector_size_name_extensions[vectorSize], " data;\n"
+            "   size_t i = get_global_id(0);\n"
+            "   data = p[i];\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data, i, f );\n"
+            "   vstore_half",roundName,"( ((__global  double *)p)[4*i+3], 4*i+3, f);\n"
+            "}\n"
+        };
+
+        const char *double_source_local[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __local double", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   data[lid] = p[i];\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
+            "}\n"
+        };
+
+        const char *double_source_local_v3[] = {
+            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "{\n"
+            "   __local double", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
+            "   size_t i = get_global_id(0);\n"
+            "   size_t lid = get_local_id(0);\n"
+            "   data[lid] = p[i];\n"
+            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
+            "   vstore_half",roundName,"( ((__global double *)p)[4*i+3], 4*i+3, f);\n"
+            "}\n"
+        };
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            programs[vectorSize][0] = MakeProgram( source_v3, sizeof(source_v3) / sizeof( source_v3[0]) );
+            if( NULL == programs[ vectorSize ][0] )
+            {
+                gFailCount++;
+                return -1;
+            }
+        } else {
+            programs[vectorSize][0] = MakeProgram( source, sizeof(source) / sizeof( source[0]) );
+            if( NULL == programs[ vectorSize ][0] )
+            {
+                gFailCount++;
+                return -1;
+            }
+        }
+
+        kernels[ vectorSize ][0] = clCreateKernel( programs[ vectorSize ][0], "test", &error );
+        if( NULL == kernels[vectorSize][0] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
+            return error;
+        }
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            programs[vectorSize][1] = MakeProgram( source_private_v3, sizeof(source_private_v3) / sizeof( source_private_v3[0]) );
+            if( NULL == programs[ vectorSize ][1] )
+            {
+                gFailCount++;
+                return -1;
+            }
+        } else {
+            programs[vectorSize][1] = MakeProgram( source_private, sizeof(source_private) / sizeof( source_private[0]) );
+            if( NULL == programs[ vectorSize ][1] )
+            {
+                gFailCount++;
+                return -1;
+            }
+        }
+
+        kernels[ vectorSize ][1] = clCreateKernel( programs[ vectorSize ][1], "test", &error );
+        if( NULL == kernels[vectorSize][1] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create private kernel. (%d)\n", error );
+            return error;
+        }
+
+        if(g_arrVecSizes[vectorSize] == 3) {
+            programs[vectorSize][2] = MakeProgram( source_local_v3, sizeof(source_local_v3) / sizeof( source_local_v3[0]) );
+            if( NULL == programs[ vectorSize ][2] )
+            {
+                gFailCount++;
+                return -1;
+            }
+        } else {
+            programs[vectorSize][2] = MakeProgram( source_local, sizeof(source_local) / sizeof( source_local[0]) );
+            if( NULL == programs[ vectorSize ][2] )
+            {
+                gFailCount++;
+                return -1;
+            }
+        }
+
+        kernels[ vectorSize ][2] = clCreateKernel( programs[ vectorSize ][2], "test", &error );
+        if( NULL == kernels[vectorSize][2] )
+        {
+            gFailCount++;
+            vlog_error( "\t\tFAILED -- Failed to create local kernel. (%d)\n", error );
+            return error;
+        }
+
+        if( gTestDouble )
+        {
+            if(g_arrVecSizes[vectorSize] == 3) {
+                doublePrograms[vectorSize][0] = MakeProgram( double_source_v3, sizeof(double_source_v3) / sizeof( double_source_v3[0]) );
+                if( NULL == doublePrograms[ vectorSize ][0] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            } else {
+                doublePrograms[vectorSize][0] = MakeProgram( double_source, sizeof(double_source) / sizeof( double_source[0]) );
+                if( NULL == doublePrograms[ vectorSize ][0] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            }
+
+            doubleKernels[ vectorSize ][0] = clCreateKernel( doublePrograms[ vectorSize ][0], "test", &error );
+            if( NULL == kernels[vectorSize][0] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create double kernel. (%d)\n", error );
+                return error;
+            }
+
+            if(g_arrVecSizes[vectorSize] == 3) {
+                doublePrograms[vectorSize][1] = MakeProgram( double_source_private_v3, sizeof(double_source_private_v3) / sizeof( double_source_private_v3[0]) );
+                if( NULL == doublePrograms[ vectorSize ][1] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            } else {
+                doublePrograms[vectorSize][1] = MakeProgram( double_source_private, sizeof(double_source_private) / sizeof( double_source_private[0]) );
+                if( NULL == doublePrograms[ vectorSize ][1] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            }
+
+            doubleKernels[ vectorSize ][1] = clCreateKernel( doublePrograms[ vectorSize ][1], "test", &error );
+            if( NULL == kernels[vectorSize][1] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create double private kernel. (%d)\n", error );
+                return error;
+            }
+
+            if(g_arrVecSizes[vectorSize] == 3) {
+                doublePrograms[vectorSize][2] = MakeProgram( double_source_local_v3, sizeof(double_source_local_v3) / sizeof( double_source_local_v3[0]) );
+                if( NULL == doublePrograms[ vectorSize ][2] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            } else {
+                doublePrograms[vectorSize][2] = MakeProgram( double_source_local, sizeof(double_source_local) / sizeof( double_source_local[0]) );
+                if( NULL == doublePrograms[ vectorSize ][2] )
+                {
+                    gFailCount++;
+                    return -1;
+                }
+            }
+
+            doubleKernels[ vectorSize ][2] = clCreateKernel( doublePrograms[ vectorSize ][2], "test", &error );
+            if( NULL == kernels[vectorSize][2] )
+            {
+                gFailCount++;
+                vlog_error( "\t\tFAILED -- Failed to create double local kernel. (%d)\n", error );
+                return error;
+            }
+        }
+    }
+
+    // Figure out how many elements are in a work block
+    size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float));
+    size_t blockCount = BUFFER_SIZE / elementSize;
+    uint64_t lastCase = 1ULL << (8*sizeof(float));
+    size_t stride = blockCount;
+
+    if (gWimpyMode)
+        stride = 0x10000000U;
+
+    // we handle 64-bit types a bit differently.
+    if( lastCase == 0 )
+        lastCase = 0x100000000ULL;
+    uint64_t i, j;
+    error = 0;
+    uint64_t printMask = (lastCase >> 4) - 1;
+    cl_uint count = 0;
+    int addressSpace;
+    size_t loopCount;
+    cl_uint threadCount = GetThreadCount();
+
+    ComputeReferenceInfoF fref;
+    fref.x = (float *)gIn_single;
+    fref.r = (cl_ushort *)gOut_half_reference;
+    fref.f = referenceFunc;
+    fref.lim = blockCount;
+    fref.count = (blockCount + threadCount - 1) / threadCount;
+
+    CheckResultInfoF fchk;
+    fchk.x = (const float *)gIn_single;
+    fchk.r = (const cl_ushort *)gOut_half_reference;
+    fchk.s = (const cl_ushort *)gOut_half;
+    fchk.f = referenceFunc;
+    fchk.lim = blockCount;
+    fchk.count = (blockCount + threadCount - 1) / threadCount;
+
+    ComputeReferenceInfoD dref;
+    dref.x = (double *)gIn_double;
+    dref.r = (cl_ushort *)gOut_half_reference_double;
+    dref.f = doubleReferenceFunc;
+    dref.lim = blockCount;
+    dref.count = (blockCount + threadCount - 1) / threadCount;
+
+    CheckResultInfoD dchk;
+    dchk.x = (const double *)gIn_double;
+    dchk.r = (const cl_ushort *)gOut_half_reference_double;
+    dchk.s = (const cl_ushort *)gOut_half;
+    dchk.f = doubleReferenceFunc;
+    dchk.lim = blockCount;
+    dchk.count = (blockCount + threadCount - 1) / threadCount;
+
+    for( i = 0; i < (uint64_t)lastCase; i += stride )
+    {
+        count = (cl_uint) MIN( blockCount, lastCase - i );
+        fref.i = i;
+        dref.i = i;
+
+        // Create the input and reference
+        ThreadPool_Do(ReferenceF, threadCount, &fref);
+
+        error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_FALSE, 0, count * sizeof(float ), gIn_single, 0, NULL, NULL);
+        if (error) {
+            vlog_error( "Failure in clWriteArray\n" );
+            gFailCount++;
+            goto exit;
+        }
+
+        if (gTestDouble) {
+            ThreadPool_Do(ReferenceD, threadCount, &dref);
+
+            error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_FALSE, 0, count * sizeof(double ), gIn_double, 0, NULL, NULL);
+            if (error) {
+                vlog_error( "Failure in clWriteArray\n" );
+                gFailCount++;
+                goto exit;
+            }
+        }
+
+        for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++) {
+            // Loop over vector legths
+            fchk.vsz = g_arrVecSizes[vectorSize];
+            dchk.vsz = g_arrVecSizes[vectorSize];
+
+            for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
+                // Loop over address spaces
+                fchk.aspace = addressSpaceNames[addressSpace];
+                dchk.aspace = addressSpaceNames[addressSpace];
+
+                cl_uint pattern = 0xdeaddead;
+                memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
+
+                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                if (error) {
+                    vlog_error( "Failure in clWriteArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+                error = RunKernel(kernels[vectorSize][addressSpace], gInBuffer_single, gOutBuffer_half,
+                                  numVecs(count, vectorSize, aligned),
+                                  runsOverBy(count, vectorSize, aligned));
+                if (error) {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                if (error) {
+                    vlog_error( "Failure in clReadArray\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+
+                error = ThreadPool_Do(CheckF, threadCount, &fchk);
+                if (error) {
+                            gFailCount++;
+                            goto exit;
+                        }
+
+                if (gTestDouble) {
+                    memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
+
+                    error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                    if (error) {
+                        vlog_error( "Failure in clWriteArray\n" );
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    error = RunKernel(doubleKernels[vectorSize][addressSpace], gInBuffer_double, gOutBuffer_half,
+                                      numVecs(count, vectorSize, aligned),
+                                      runsOverBy(count, vectorSize, aligned));
+                    if (error) {
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0, count * sizeof(cl_ushort), gOut_half, 0, NULL, NULL);
+                    if (error) {
+                        vlog_error( "Failure in clReadArray\n" );
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    error = ThreadPool_Do(CheckD, threadCount, &dchk);
+                    if (error) {
+                                gFailCount++;
+                                goto exit;
+                            }
+                        }
+                    }
+        }  // end for vector size
+
+        if( ((i+blockCount) & ~printMask) == (i+blockCount) ) {
+            vlog( "." );
+            fflush( stdout );
+        }
+    }  // for end lastcase
+
+    loopCount = count == blockCount ? 1 : 100;
+    if( gReportTimes )
+    {
+        //Init the input stream
+        cl_float *p = (cl_float *)gIn_single;
+        for( j = 0; j < count; j++ )
+            p[j] = (float)((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_TRUE, 0, count * sizeof( float ), gIn_single, 0, NULL, NULL)) )
+        {
+            vlog_error( "Failure in clWriteArray\n" );
+            gFailCount++;
+            goto exit;
+        }
+
+        if( gTestDouble )
+        {
+            //Init the input stream
+            cl_double *q = (cl_double *)gIn_double;
+            for( j = 0; j < count; j++ )
+                q[j] = ((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+
+            if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_TRUE, 0, count * sizeof( double ), gIn_double, 0, NULL, NULL)) )
+            {
+                vlog_error( "Failure in clWriteArray\n" );
+                gFailCount++;
+                goto exit;
+            }
+        }
+
+        //Run again for timing
+        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+        {
+            uint64_t bestTime = -1ULL;
+            for( j = 0; j < loopCount; j++ )
+            {
+                uint64_t startTime = ReadTime();
+                if( (error = RunKernel( kernels[vectorSize][0], gInBuffer_single, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
+                                       runsOverBy(count, vectorSize, aligned)) ) )
+                {
+                    gFailCount++;
+                    goto exit;
+                }
+
+                if( (error = clFinish(gQueue)) )
+                {
+                    vlog_error( "Failure in clFinish\n" );
+                    gFailCount++;
+                    goto exit;
+                }
+                uint64_t currentTime = ReadTime() - startTime;
+                if( currentTime < bestTime )
+                    bestTime = currentTime;
+                time[ vectorSize ] += currentTime;
+            }
+            if( bestTime < min_time[ vectorSize ] )
+                min_time[ vectorSize ] = bestTime ;
+
+            if( gTestDouble )
+            {
+                bestTime = -1ULL;
+                for( j = 0; j < loopCount; j++ )
+                {
+                    uint64_t startTime = ReadTime();
+                    if( (error = RunKernel( doubleKernels[vectorSize][0], gInBuffer_double, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
+                                           runsOverBy(count, vectorSize, aligned)) ) )
+                    {
+                        gFailCount++;
+                        goto exit;
+                    }
+
+                    if( (error = clFinish(gQueue)) )
+                    {
+                        vlog_error( "Failure in clFinish\n" );
+                        gFailCount++;
+                        goto exit;
+                    }
+                    uint64_t currentTime = ReadTime() - startTime;
+                    if( currentTime < bestTime )
+                        bestTime = currentTime;
+                    doubleTime[ vectorSize ] += currentTime;
+                }
+                if( bestTime < min_double_time[ vectorSize ] )
+                    min_double_time[ vectorSize ] = bestTime;
+            }
+        }
+    }
+
+    if( gWimpyMode )
+    {
+        vlog( "\tfloat: Wimp Passed\n" );
+
+        if( gTestDouble )
+            vlog( "\tdouble: Wimp Passed\n" );
+    }
+    else
+    {
+        vlog( "\tfloat Passed\n" );
+        if( gTestDouble )
+            vlog( "\tdouble Passed\n" );
+    }
+
+    if( gReportTimes )
+    {
+        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
+                      "average us/elem", "vStoreaHalf%s avg. (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
+        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+            vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
+                      "best us/elem", "vStoreaHalf%s best (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize])  );
+        if( gTestDouble )
+        {
+            for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf( SubtractTime( doubleTime[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
+                          "average us/elem (double)", "vStoreaHalf%s avg. d (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize])  );
+            for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf( SubtractTime( min_double_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
+                          "best us/elem (double)", "vStoreaHalf%s best d (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
+        }
+    }
+
+exit:
+    //clean up
+    for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    {
+        for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
+            clReleaseKernel( kernels[ vectorSize ][addressSpace] );
+            clReleaseProgram( programs[ vectorSize ][addressSpace] );
+            if( gTestDouble )
+            {
+                clReleaseKernel( doubleKernels[ vectorSize ][addressSpace] );
+                clReleaseProgram( doublePrograms[ vectorSize ][addressSpace] );
+            }
+        }
+    }
+
+    gTestCount++;
+    return error;
+}
+
diff --git a/test_conformance/half/cl_utils.c b/test_conformance/half/cl_utils.c
new file mode 100644
index 00000000..939cddf3
--- /dev/null
+++ b/test_conformance/half/cl_utils.c
@@ -0,0 +1,493 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "cl_utils.h"
+#include <stdlib.h>
+
+#if !defined (_WIN32)
+#include <sys/mman.h>
+#endif
+
+#include "test_config.h"
+#include "string.h"
+#include "../../test_common/harness/kernelHelpers.h"
+
+#define HALF_MIN 1.0p-14
+
+
+const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount] = { "", "2", "4", "8", "16", "3" };
+const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "3" };
+const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "4" };
+const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount] = { "half", "int", "int2", "int4", "int8", "int2" };
+
+
+void            *gIn_half = NULL;
+void            *gOut_half = NULL;
+void            *gOut_half_reference = NULL;
+void            *gOut_half_reference_double = NULL;
+void            *gIn_single = NULL;
+void            *gOut_single = NULL;
+void            *gOut_single_reference = NULL;
+void            *gIn_double = NULL;
+// void            *gOut_double = NULL;
+// void            *gOut_double_reference = NULL;
+cl_mem          gInBuffer_half = NULL;
+cl_mem          gOutBuffer_half = NULL;
+cl_mem          gInBuffer_single = NULL;
+cl_mem          gOutBuffer_single = NULL;
+cl_mem          gInBuffer_double = NULL;
+// cl_mem          gOutBuffer_double = NULL;
+
+cl_device_type  gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_device_id    gDevice = NULL;
+cl_context       gContext = NULL;
+cl_command_queue gQueue = NULL;
+uint32_t        gDeviceFrequency = 0;
+uint32_t        gComputeDevices = 0;
+size_t          gMaxThreadGroupSize = 0;
+size_t          gWorkGroupSize = 0;
+int             gTestCount = 0;
+int             gFailCount = 0;
+bool            gWimpyMode = false;
+int             gTestDouble = 0;
+uint32_t        gDeviceIndex = 0;
+int             gIsEmbedded = 0;
+
+#if defined( __APPLE__ )
+int             gReportTimes = 1;
+#else
+int             gReportTimes = 0;
+#endif
+
+#pragma mark -
+
+static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    vlog( "%s\n", errinfo );
+}
+
+int InitCL( void )
+{
+    cl_platform_id platform = NULL;
+    size_t configSize = sizeof( gComputeDevices );
+    int error;
+
+    if( (error = clGetPlatformIDs(1, &platform, NULL) ) )
+        return error;
+
+    // gDeviceType & gDeviceIndex are globals set in ParseArgs
+
+    cl_uint ndevices;
+    if ( (error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &ndevices)) )
+        return error;
+
+    cl_device_id *gDeviceList = (cl_device_id *)malloc(ndevices*sizeof( cl_device_id ));
+    if ( gDeviceList == 0 )
+    {
+        log_error("Unable to allocate memory for devices\n");
+        return -1;
+    }
+    if( (error = clGetDeviceIDs(platform,  gDeviceType, ndevices, gDeviceList, NULL )) )
+    {
+        free( gDeviceList );
+        return error;
+    }
+
+    gDevice = gDeviceList[gDeviceIndex];
+    free( gDeviceList );
+
+#if MULTITHREAD
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS,  configSize, &gComputeDevices, NULL )) )
+#endif
+    gComputeDevices = 1;
+
+    configSize = sizeof( gMaxThreadGroupSize );
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_WORK_GROUP_SIZE, configSize, &gMaxThreadGroupSize,  NULL )) )
+        gMaxThreadGroupSize = 1;
+
+    // Use only one-eighth the work group size
+    if (gMaxThreadGroupSize > 8)
+        gWorkGroupSize = gMaxThreadGroupSize / 8;
+    else
+        gWorkGroupSize = gMaxThreadGroupSize;
+
+    configSize = sizeof( gDeviceFrequency );
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency,  NULL )) )
+        gDeviceFrequency = 1;
+
+    // Check extensions
+    size_t extSize = 0;
+    int hasDouble = 0;
+    if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, 0, NULL, &extSize)))
+    {   vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
+    else
+    {
+        char *ext = (char *)malloc( extSize );
+        if( NULL == ext )
+        { vlog_error( "malloc failed at %s:%d\nUnable to determine if double present.\n", __FILE__, __LINE__ ); }
+        else
+        {
+            if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, extSize, ext, NULL)))
+            {    vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
+            else
+            {
+                if( strstr( ext, "cl_khr_fp64" ))
+                    hasDouble = 1;
+            }
+            free(ext);
+        }
+    }
+    gTestDouble ^= hasDouble;
+
+
+
+    //detect whether profile of the device is embedded
+    char profile[64] = "";
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) )
+    {
+        vlog_error( "Unable to get device CL DEVICE PROFILE string. (%d) \n", error );
+    }
+    else if( strstr(profile, "EMBEDDED_PROFILE" ) )
+    {
+        gIsEmbedded = 1;
+    }
+
+    vlog( "%d compute devices at %f GHz\n", gComputeDevices, (double) gDeviceFrequency / 1000. );
+    vlog( "Max thread group size is %lld.\n", (uint64_t) gMaxThreadGroupSize );
+
+    gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error );
+    if( NULL == gContext )
+    {
+        vlog_error( "clCreateDeviceGroup failed. (%d)\n", error );
+        return -1;
+    }
+
+    gQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+    if( NULL == gQueue )
+    {
+        vlog_error( "clCreateContext failed. (%d)\n", error );
+        return -2;
+    }
+
+#if defined( __APPLE__ )
+    // FIXME: use clProtectedArray
+#endif
+    //Allocate buffers
+    gIn_half   = malloc( getBufferSize(gDevice)/2  );
+    gOut_half = malloc( BUFFER_SIZE/2  );
+    gOut_half_reference = malloc( BUFFER_SIZE/2  );
+    gOut_half_reference_double = malloc( BUFFER_SIZE/2  );
+    gIn_single   = malloc( BUFFER_SIZE );
+    gOut_single = malloc( getBufferSize(gDevice)  );
+    gOut_single_reference = malloc( getBufferSize(gDevice)  );
+    gIn_double   = malloc( 2*BUFFER_SIZE  );
+    // gOut_double = malloc( (2*getBufferSize(gDevice))  );
+    // gOut_double_reference = malloc( (2*getBufferSize(gDevice))  );
+
+    if ( NULL == gIn_half ||
+     NULL == gOut_half ||
+     NULL == gOut_half_reference ||
+     NULL == gOut_half_reference_double ||
+         NULL == gIn_single ||
+     NULL == gOut_single ||
+     NULL == gOut_single_reference ||
+         NULL == gIn_double // || NULL == gOut_double || NULL == gOut_double_reference
+         )
+        return -3;
+
+    gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(gDevice) / 2, NULL, &error);
+    if( gInBuffer_half == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return -4;
+    }
+
+    gInBuffer_single = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &error );
+    if( gInBuffer_single == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return -4;
+    }
+
+    gInBuffer_double = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE*2, NULL, &error );
+    if( gInBuffer_double == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return -4;
+    }
+
+    gOutBuffer_half = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, BUFFER_SIZE/2, NULL, &error );
+    if( gOutBuffer_half == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return -5;
+    }
+
+    gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(gDevice), NULL, &error );
+    if( gOutBuffer_single == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return -5;
+    }
+
+#if 0
+    gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(gDevice)), NULL, &error );
+    if( gOutBuffer_double == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return -5;
+    }
+#endif
+
+    char string[16384];
+    vlog( "\nCompute Device info:\n" );
+    error = clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(string), string, NULL);
+    vlog( "\tDevice Name: %s\n", string );
+    error = clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(string), string, NULL);
+    vlog( "\tVendor: %s\n", string );
+    error = clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(string), string, NULL);
+    vlog( "\tDevice Version: %s\n", string );
+    error = clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(string), string, NULL);
+    vlog( "\tOpenCL C Version: %s\n", string );
+    error = clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(string), string, NULL);
+    vlog( "\tDriver Version: %s\n", string );
+    vlog( "\tProcessing with %d devices\n", gComputeDevices );
+    vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
+    vlog( "\tHas double? %s\n", hasDouble ? "YES" : "NO" );
+    vlog( "\tTest double? %s\n", gTestDouble ? "YES" : "NO" );
+
+    return 0;
+}
+
+cl_program   MakeProgram( const char *source[], int count )
+{
+    int error;
+    int i;
+
+    //create the program
+    cl_program program;
+    error = create_single_kernel_helper_create_program(gContext, &program, (cl_uint)count, source);
+    if( NULL == program )
+    {
+        vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
+        return NULL;
+    }
+
+    // build it
+    if( (error = clBuildProgram( program, 1, &gDevice, NULL, NULL, NULL )) )
+    {
+        size_t  len;
+        char    buffer[16384];
+
+        vlog_error("\t\tFAILED -- clBuildProgramExecutable() failed:\n");
+        clGetProgramBuildInfo(program, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        vlog_error("Log: %s\n", buffer);
+        vlog_error("Source :\n");
+        for(i = 0; i < count; ++i) {
+            vlog_error("%s", source[i]);
+        }
+        vlog_error("\n");
+
+        clReleaseProgram( program );
+        return NULL;
+    }
+
+    return program;
+}
+
+void ReleaseCL(void)
+{
+    clReleaseMemObject(gInBuffer_half);
+    clReleaseMemObject(gOutBuffer_half);
+    clReleaseMemObject(gInBuffer_single);
+    clReleaseMemObject(gOutBuffer_single);
+    clReleaseMemObject(gInBuffer_double);
+    // clReleaseMemObject(gOutBuffer_double);
+    clReleaseCommandQueue(gQueue);
+    clReleaseContext(gContext);
+}
+
+cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
+    if(aligned && g_arrVecSizes[vectorSizeIdx] == 3) {
+        return count/4;
+    }
+    return  (count + g_arrVecSizes[vectorSizeIdx] - 1)/
+    ( (g_arrVecSizes[vectorSizeIdx]) );
+}
+
+cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned) {
+    if(aligned || g_arrVecSizes[vectorSizeIdx] != 3) { return -1; }
+    return count% (g_arrVecSizes[vectorSizeIdx]);
+}
+
+void printSource(const char * src[], int len) {
+    int i;
+    for(i = 0; i < len; ++i) {
+        vlog("%s", src[i]);
+    }
+}
+
+int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg)
+{
+    size_t localCount = blockCount;
+    size_t wg_size;
+    int error;
+
+    error = clSetKernelArg(kernel, 0, sizeof inBuf, &inBuf);
+    error |= clSetKernelArg(kernel, 1, sizeof outBuf, &outBuf);
+
+    if(extraArg >= 0) {
+        error |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &extraArg);
+    }
+
+    if( error )
+    {
+        vlog_error( "FAILED -- could not set kernel args\n" );
+        return -3;
+    }
+
+    error = clGetKernelWorkGroupInfo(kernel, gDevice, CL_KERNEL_WORK_GROUP_SIZE, sizeof( wg_size ), &wg_size, NULL);
+    if (error)
+    {
+        vlog_error( "FAILED -- could not get kernel work group info\n" );
+        return -4;
+    }
+
+    wg_size = (wg_size > gWorkGroupSize) ? gWorkGroupSize : wg_size;
+    while( localCount % wg_size )
+        wg_size--;
+
+    if( (error = clEnqueueNDRangeKernel( gQueue, kernel, 1, NULL, &localCount, &wg_size, 0, NULL, NULL )) )
+    {
+        vlog_error( "FAILED -- could not execute kernel\n" );
+        return -5;
+    }
+
+    return 0;
+}
+
+#if defined (__APPLE__ )
+
+#include <mach/mach_time.h>
+
+uint64_t ReadTime( void )
+{
+    return mach_absolute_time();        // returns time since boot.  Ticks have better than microsecond precsion.
+}
+
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    static double conversion = 0.0;
+
+    if(  0.0 == conversion )
+    {
+        mach_timebase_info_data_t   info;
+        kern_return_t err = mach_timebase_info( &info );
+        if( 0 == err )
+            conversion = 1e-9 * (double) info.numer / (double) info.denom;
+    }
+
+    return (double) (endTime - startTime) * conversion;
+}
+
+#elif defined( _WIN32 ) && defined (_MSC_VER)
+
+// functions are defined in compat.h
+
+#else
+
+//
+//  Please feel free to substitute your own timing facility here.
+//
+
+#warning  Times are meaningless. No timing facility in place for this platform.
+uint64_t ReadTime( void )
+{
+    return 0ULL;
+}
+
+// return the difference between two times obtained from ReadTime in seconds
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    return INFINITY;
+}
+
+#endif
+
+#if !defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
+{
+    uint32_t pat = ((uint32_t*) src_pattern)[0];
+    size_t count = bytes / 4;
+    size_t i;
+    uint32_t *d = (uint32_t*)dest;
+
+    for( i = 0; i < count; i++ )
+        d[i] = pat;
+
+    d += i;
+
+    bytes &= 3;
+    if( bytes )
+        memcpy( d, src_pattern, bytes );
+}
+#endif
+
+size_t getBufferSize(cl_device_id device_id)
+{
+    static int s_initialized = 0;
+    static cl_device_id s_device_id;
+    static cl_ulong s_result = 64*1024;
+
+    if(s_initialized == 0 || s_device_id != device_id)
+    {
+        cl_ulong result;
+        cl_int err = clGetDeviceInfo (device_id,
+                                      CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
+                                      sizeof(result), (void *)&result,
+                                      NULL);
+        if(err)
+        {
+            vlog_error("clGetDeviceInfo() failed\n");
+            s_result = 64*1024;
+            goto exit;
+        }
+        result = result / 2;
+        log_info("Const buffer size is %llx (%llu)\n", result, result);
+        s_initialized = 1;
+        s_device_id = device_id;
+        s_result = result;
+    }
+
+exit:
+    if( s_result > SIZE_MAX )
+    {
+        vlog_error( "ERROR: clGetDeviceInfo is reporting a CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE larger than addressable memory on the host.\n It seems highly unlikely that this is usable, due to the API design.\n" );
+        fflush(stdout);
+        abort();
+    }
+
+    return (size_t) s_result;
+}
+
+cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize)
+{
+    cl_ulong tmp = getBufferSize(device_id);
+    if(vecSize == 3)
+    {
+        return tmp/(cl_ulong)(4*typeSize);
+    }
+    return tmp/(cl_ulong)(vecSize*typeSize);
+}
diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h
new file mode 100644
index 00000000..9a720efd
--- /dev/null
+++ b/test_conformance/half/cl_utils.h
@@ -0,0 +1,162 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef CL_UTILS_H
+#define  CL_UTILS_H
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+
+#if !defined(_WIN32)
+#include <sys/param.h>
+#endif
+
+
+#ifdef __MINGW32__
+#define __mingw_printf printf
+#endif
+#include "../../test_common/harness/errorHelpers.h"
+
+#include "../../test_common/harness/ThreadPool.h"
+
+
+
+#include "test_config.h"
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+extern void            *gIn_half;
+extern void            *gOut_half;
+extern void            *gOut_half_reference;
+extern void            *gOut_half_reference_double;
+extern void            *gIn_single;
+extern void            *gOut_single;
+extern void            *gOut_single_reference;
+extern void            *gIn_double;
+// extern void            *gOut_double;
+// extern void            *gOut_double_reference;
+extern cl_mem          gInBuffer_half;
+extern cl_mem          gOutBuffer_half;
+extern cl_mem          gInBuffer_single;
+extern cl_mem          gOutBuffer_single;
+extern cl_mem          gInBuffer_double;
+// extern cl_mem          gOutBuffer_double;
+
+extern uint32_t           gDeviceIndex;
+extern cl_device_type  gDeviceType;
+extern cl_device_id    gDevice;
+extern cl_context      gContext;
+extern cl_command_queue gQueue;
+extern uint32_t        gDeviceFrequency;
+extern uint32_t        gComputeDevices;
+extern size_t          gMaxThreadGroupSize;
+extern size_t          gWorkGroupSize;
+extern int             gTestCount;
+extern int             gFailCount;
+extern int             gTestDouble;
+extern int             gReportTimes;
+extern int             gIsEmbedded;
+
+// gWimpyMode indicates if we run the test in wimpy mode where we limit the
+// size of 32 bit ranges to a much smaller set.  This is meant to be used
+// as a smoke test
+extern bool            gWimpyMode;
+
+uint64_t ReadTime( void );
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+
+cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned);
+cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned);
+
+void printSource(const char * src[], int len);
+
+extern const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount];
+extern const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount];
+extern const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount];
+extern const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount];
+
+int InitCL( void );
+void ReleaseCL( void );
+int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg);
+cl_program   MakeProgram( const char *source[], int count );
+
+#if ! defined( __APPLE__ )
+    extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+#endif
+
+#define STRING( _x )    STRINGIFY( _x )
+#define STRINGIFY(x)    #x
+
+static inline float as_float(cl_uint u) { union { cl_uint u; float f; }v; v.u = u; return v.f; }
+static inline double as_double(cl_ulong u) { union { cl_ulong u; double d; }v; v.u = u; return v.d; }
+
+// used to convert a bucket of bits into a search pattern through double
+static inline cl_ulong DoubleFromUInt( cl_uint bits );
+static inline cl_ulong DoubleFromUInt( cl_uint bits )
+{
+    // split 0x89abcdef to 0x89abcd00000000ef
+    cl_ulong u = ((cl_ulong)(bits & ~0xffU) << 32) | ((cl_ulong)(bits & 0xffU));
+
+    // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
+    u -= (cl_ulong)((bits & 0x80U) << 1);
+
+    return u;
+}
+
+static inline int IsHalfSubnormal( uint16_t x )
+{
+    return ((x&0x7fffU)-1U) < 0x03ffU;
+}
+
+// prevent silent failures due to missing FLT_RADIX
+#ifndef FLT_RADIX
+    #error FLT_RADIX is not defined by float.h
+#endif
+
+static inline int IsFloatSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf((float) x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+static inline int IsDoubleSubnormal( long double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs((double)x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+#endif /* CL_UTILS_H */
+
+
+
diff --git a/test_conformance/half/main.c b/test_conformance/half/main.c
new file mode 100644
index 00000000..fe3b6f17
--- /dev/null
+++ b/test_conformance/half/main.c
@@ -0,0 +1,434 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if !defined (_WIN32)
+#include <sys/resource.h>
+#if !defined(__ANDROID__)
+#include <sys/sysctl.h>
+#endif
+#include <libgen.h>
+#include <sys/param.h>
+#endif
+
+#include "../../test_common/harness/mingw_compat.h"
+#include "../../test_common/harness/parseParameters.h"
+#if defined (__MINGW32__)
+#include <sys/param.h>
+#endif
+
+#include "cl_utils.h"
+#include "tests.h"
+
+const char **   argList = NULL;
+size_t          argCount = 0;
+char            appName[64] = "ctest";
+const char *addressSpaceNames[] = {"global", "private", "local", "constant"};
+
+#pragma mark -
+#pragma mark Declarations
+
+
+static int ParseArgs( int argc, const char **argv );
+static void PrintUsage( void );
+static void PrintArch(void);
+static void PrintDevice(void);
+static int DoTest( void);
+
+
+int g_arrVecSizes[kVectorSizeCount+kStrangeVectorSizeCount];
+int g_arrVecAligns[kLargestVectorSize+1];
+static int arrStrangeVecSizes[kStrangeVectorSizeCount] = {3};
+
+int main (int argc, const char **argv )
+{
+    int error;
+    int i;
+    int alignbound;
+
+    for(i = 0; i < kVectorSizeCount; ++i) {
+      g_arrVecSizes[i] = (1<<i);
+    }
+    for(i = 0; i < kStrangeVectorSizeCount; ++i) {
+      g_arrVecSizes[i+kVectorSizeCount] =
+    arrStrangeVecSizes[i];
+    }
+
+    for(i = 0, alignbound=1; i <= kLargestVectorSize; ++i) {
+    while(alignbound < i) {
+        alignbound = alignbound<<1;
+    }
+    g_arrVecAligns[i] = alignbound;
+    }
+
+    test_start();
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    if( (error = ParseArgs( argc, argv )) )
+        goto exit;
+
+    if( (error = InitCL()) )
+        goto exit;
+
+    if (gIsEmbedded) {
+        vlog( "\tProfile: Embedded\n" );
+    }else
+    {
+        vlog( "\tProfile: Full\n" );
+    }
+
+    fflush( stdout );
+    error = DoTest();
+
+exit:
+
+    if (gFailCount == 0) {
+        if (gTestCount > 1)
+            vlog("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+        else
+            vlog("PASSED test.\n");
+    } else if (gFailCount > 0) {
+        if (gFailCount+gTestCount > 1)
+            vlog_error("FAILED %d of %d tests.\n", gFailCount, gTestCount+gFailCount);
+        else
+            vlog_error("FAILED test.\n");
+    }
+
+    if (gQueue) {
+        int flush_error = clFinish(gQueue);
+        if (flush_error)
+            vlog_error("clFinish failed: %d\n", flush_error);
+    }
+
+    ReleaseCL();
+    test_finish();
+
+    if (gFailCount)
+        return gFailCount;
+
+    return error;
+}
+
+#pragma mark -
+#pragma mark setup
+
+static int ParseArgs( int argc, const char **argv )
+{
+    int i;
+    argList = (const char **)calloc( argc - 1, sizeof( char*) );
+
+    argCount = 0;
+
+    if( NULL == argList && argc > 1 )
+        return -1;
+
+#if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
+    { // Extract the app name
+        char baseName[ MAXPATHLEN ];
+        strncpy( baseName, argv[0], MAXPATHLEN );
+        char *base = basename( baseName );
+        if( NULL != base )
+        {
+            strncpy( appName, base, sizeof( appName )  );
+            appName[ sizeof( appName ) -1 ] = '\0';
+        }
+    }
+#elif defined (_WIN32)
+    {
+        char fname[_MAX_FNAME + _MAX_EXT + 1];
+        char ext[_MAX_EXT];
+
+        errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
+                                   fname, _MAX_FNAME, ext, _MAX_EXT );
+        if (err == 0) { // no error
+            strcat (fname, ext); //just cat them, size of frame can keep both
+            strncpy (appName, fname, sizeof(appName));
+            appName[ sizeof( appName ) -1 ] = '\0';
+        }
+    }
+#endif
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            vlog_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+            abort();
+        }
+    }
+
+    unsigned int num_devices;
+    cl_platform_id platform = NULL;
+    clGetPlatformIDs(1, &platform, NULL);
+    clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices);
+
+    const char* device_index_env = getenv("CL_DEVICE_INDEX");
+    if (device_index_env) {
+        if (device_index_env) {
+            gDeviceIndex = atoi(device_index_env);
+        }
+
+        if (gDeviceIndex >= num_devices) {
+            vlog("Specified CL_DEVICE_INDEX=%d out of range, using index 0.\n",
+                gDeviceIndex);
+            gDeviceIndex = 0;
+        }
+    }
+
+    vlog( "\n%s", appName );
+    for( i = 1; i < argc; i++ )
+    {
+        const char *arg = argv[i];
+        if( NULL == arg )
+            break;
+
+        vlog( "\t%s", arg );
+        if( arg[0] == '-' )
+        {
+            arg++;
+            while( *arg != '\0' )
+            {
+                switch( *arg )
+                {
+                    case 'd':
+                        gTestDouble ^= 1;
+                        break;
+
+                    case 'h':
+                        PrintUsage();
+                        return -1;
+
+                    case 't':
+                        gReportTimes ^= 1;
+                        break;
+
+                    case 'w':  // Wimpy mode
+                        gWimpyMode = true;
+                        break;
+
+                    default:
+                        vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
+                        PrintUsage();
+                        return -1;
+                }
+                arg++;
+            }
+        }
+        else
+        {
+            if( 0 == strcmp( arg, "CL_DEVICE_TYPE_CPU" ) )
+                gDeviceType = CL_DEVICE_TYPE_CPU;
+            else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_GPU" ) )
+                gDeviceType = CL_DEVICE_TYPE_GPU;
+            else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_ACCELERATOR" ) )
+                gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+            else if( 0 == strcmp( arg, "CL_DEVICE_TYPE_DEFAULT" ) )
+                gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+            else
+            {
+                argList[ argCount ] = arg;
+                argCount++;
+            }
+        }
+    }
+
+    if (getenv("CL_WIMPY_MODE")) {
+      vlog( "\n" );
+      vlog( "*** Detected CL_WIMPY_MODE env                          ***\n" );
+      gWimpyMode = 1;
+    }
+
+    vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
+    PrintArch();
+    PrintDevice();
+    if( gWimpyMode )
+    {
+        vlog( "\n" );
+        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
+        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
+        vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
+    }
+    return 0;
+}
+
+static void PrintUsage( void )
+{
+    vlog( "%s [-dthw]: <optional: test names>\n", appName );
+    vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
+    vlog( "\t\t-t\tToggle reporting performance data.\n" );
+    vlog( "\t\t-w\tRun in wimpy mode\n" );
+    vlog( "\t\t-h\tHelp\n" );
+    vlog( "\n" );
+}
+
+static void PrintArch( void )
+{
+    vlog( "sizeof( void*) = %ld\n", sizeof( void *) );
+
+#if defined( __APPLE__ )
+#if defined( __ppc__ )
+    vlog( "ARCH:\tppc\n" );
+#elif defined( __ppc64__ )
+    vlog( "ARCH:\tppc64\n" );
+#elif defined( __i386__ )
+    vlog( "ARCH:\ti386\n" );
+#elif defined( __x86_64__ )
+    vlog( "ARCH:\tx86_64\n" );
+#elif defined( __arm__ )
+    vlog( "ARCH:\tarm\n" );
+#else
+#error unknown arch
+#endif
+
+    int type = 0;
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    vlog( "cpu subtype:\t%d\n", type );
+#endif
+}
+
+static void PrintDevice( void)
+{
+    switch(gDeviceType) {
+        case CL_DEVICE_TYPE_CPU:
+            vlog( "DEVICE:\tcpu\n" );
+            break;
+        case CL_DEVICE_TYPE_GPU:
+            vlog( "DEVICE:\tgpu\n" );
+            break;
+        case CL_DEVICE_TYPE_ACCELERATOR:
+            vlog( "DEVICE:\taccelerator\n" );
+            break;
+        default:
+            vlog_error( "DEVICE:\tunknown\n" );
+            break;
+    }
+}
+
+static int DoTest( void )
+{
+    int error = 0;
+
+    if( 0 == argCount )
+    { // test all
+        if( (error = Test_vload_half()) )
+            return error;
+
+        if( (error = Test_vloada_half()) )
+            return error;
+
+        if( (error = Test_vstore_half()) )
+            return error;
+
+        if( (error = Test_vstorea_half()) )
+            return error;
+
+        if( (error = Test_vstore_half_rte()) )
+            return error;
+
+        if( (error = Test_vstorea_half_rte()) )
+            return error;
+
+        if( (error = Test_vstore_half_rtz()) )
+            return error;
+
+        if( (error = Test_vstorea_half_rtz()) )
+            return error;
+
+        if( (error = Test_vstore_half_rtp()) )
+            return error;
+
+        if( (error = Test_vstorea_half_rtp()) )
+            return error;
+
+        if( (error = Test_vstore_half_rtn()) )
+            return error;
+
+        if( (error = Test_vstorea_half_rtn()) )
+            return error;
+
+        if( (error = Test_roundTrip()) )
+            return error;
+    }
+    else
+    {
+        typedef struct{ int (*f)(void); const char *name; }TestItem;
+#define ENTRY( _x )     { Test_ ## _x, STRINGIFY(_x) }
+        static const TestItem list[] =
+        {
+            ENTRY(vload_half),
+            ENTRY(vloada_half),
+            ENTRY(vstore_half),
+            ENTRY(vstorea_half),
+            ENTRY(vstore_half_rte),
+            ENTRY(vstorea_half_rte),
+            ENTRY(vstore_half_rtz),
+            ENTRY(vstorea_half_rtz),
+            ENTRY(vstore_half_rtp),
+            ENTRY(vstorea_half_rtp),
+            ENTRY(vstore_half_rtn),
+            ENTRY(vstorea_half_rtn),
+            ENTRY(roundTrip)
+        };
+        static const size_t list_count = sizeof( list ) / sizeof( list[0] );
+
+        size_t i, j;
+        for( i = 0; i < argCount; i++ )
+        {
+            const char *argp = argList[i];
+            for( j = 0; j < list_count; j++ )
+            {
+                if( 0 == strcmp(argp, list[j].name) )
+                {
+                    if( (error = list[j].f()) )
+                        return error;
+
+                    break;
+                }
+            }
+            if( j == list_count )
+            {
+                vlog_error( "Unknown test name: %s\n. Exiting...\n", argp );
+                return -5;
+            }
+        }
+    }
+
+    return error;
+}
+
+
diff --git a/test_conformance/half/test_config.h b/test_conformance/half/test_config.h
new file mode 100644
index 00000000..a81f429d
--- /dev/null
+++ b/test_conformance/half/test_config.h
@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TEST_CONFIG_H
+#define TEST_CONFIG_H
+
+#define MULTITHREAD 1
+
+#define kVectorSizeCount    5
+#define kStrangeVectorSizeCount 1
+#define kMinVectorSize      0
+#define kLargestVectorSize      (1 << (kVectorSizeCount-1))
+
+#define kLastVectorSizeToTest (kVectorSizeCount + kStrangeVectorSizeCount)
+
+#define BUFFER_SIZE ((size_t)2 * 1024 * 1024)
+
+extern size_t getBufferSize(cl_device_id device_id);
+extern cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize);
+// could call
+// CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
+#define kPageSize       4096
+
+extern int g_arrVecSizes[kVectorSizeCount+kStrangeVectorSizeCount];
+extern int g_arrVecAligns[kLargestVectorSize+1];
+
+#endif /* TEST_CONFIG_H */
+
+
diff --git a/test_conformance/half/tests.h b/test_conformance/half/tests.h
new file mode 100644
index 00000000..bf869658
--- /dev/null
+++ b/test_conformance/half/tests.h
@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TESTS_H
+#define TESTS_H
+
+
+int Test_vload_half( void );
+int Test_vloada_half( void );
+int Test_vstore_half( void );
+int Test_vstorea_half( void );
+int Test_vstore_half_rte( void );
+int Test_vstorea_half_rte( void );
+int Test_vstore_half_rtz( void );
+int Test_vstorea_half_rtz( void );
+int Test_vstore_half_rtp( void );
+int Test_vstorea_half_rtp( void );
+int Test_vstore_half_rtn( void );
+int Test_vstorea_half_rtn( void );
+int Test_roundTrip( void );
+
+typedef cl_ushort (*f2h)( float );
+typedef cl_ushort (*d2h)( double );
+int Test_vStoreHalf_private( f2h referenceFunc, d2h referenceDoubleFunc, const char *roundName );
+int Test_vStoreaHalf_private( f2h referenceFunc, d2h referenceDoubleFunc, const char *roundName );
+
+#endif /* TESTS_H */
+
+
diff --git a/test_conformance/headers/CMakeLists.txt b/test_conformance/headers/CMakeLists.txt
new file mode 100644
index 00000000..eff8e286
--- /dev/null
+++ b/test_conformance/headers/CMakeLists.txt
@@ -0,0 +1,87 @@
+set(HEADERS_SOURCES
+    test_headers.c
+)
+
+set(HEADERS_OUT ${CONFORMANCE_PREFIX}headers${CONFORMANCE_SUFFIX})
+
+add_executable(
+    ${HEADERS_OUT}
+    ${HEADERS_SOURCES}
+)
+
+set_property(TARGET ${HEADERS_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
+
+TARGET_LINK_LIBRARIES(${HEADERS_OUT} ${CLConform_LIBRARIES})
+
+########################################################################################
+
+set(CL_H_SOURCES
+    test_cl.h.c
+)
+
+set(CL_H_OUT ${CONFORMANCE_PREFIX}cl_h${CONFORMANCE_SUFFIX})
+
+add_executable(
+    ${CL_H_OUT}
+    ${CL_H_SOURCES}
+)
+
+set_property(TARGET ${CL_H_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
+
+TARGET_LINK_LIBRARIES(${CL_H_OUT} ${CLConform_LIBRARIES})
+
+########################################################################################
+
+set(CL_PLATFORM_H_SOURCES
+    test_cl_platform.h.c
+)
+
+set(CL_PLATFORM_H_OUT ${CONFORMANCE_PREFIX}cl_platform_h${CONFORMANCE_SUFFIX})
+
+add_executable(
+    ${CL_PLATFORM_H_OUT}
+    ${CL_PLATFORM_H_SOURCES}
+)
+
+set_property(TARGET ${CL_PLATFORM_H_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
+
+TARGET_LINK_LIBRARIES(${CL_PLATFORM_H_OUT} ${CLConform_LIBRARIES})
+
+########################################################################################
+
+set(CL_GL_H_SOURCES
+    test_cl_gl.h.c
+)
+
+set(CL_GL_H_OUT ${CONFORMANCE_PREFIX}cl_gl_h${CONFORMANCE_SUFFIX})
+
+add_executable(
+    ${CL_GL_H_OUT}
+    ${CL_GL_H_SOURCES}
+)
+
+set_property(TARGET ${CL_GL_H_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
+
+TARGET_LINK_LIBRARIES(${CL_GL_H_OUT} ${CLConform_LIBRARIES})
+
+########################################################################################
+
+set(OPENCL_H_SOURCES
+    test_opencl.h.c
+)
+
+set(OPENCL_H_OUT ${CONFORMANCE_PREFIX}opencl_h${CONFORMANCE_SUFFIX})
+
+add_executable(
+    ${OPENCL_H_OUT}
+    ${OPENCL_H_SOURCES}
+)
+
+set_property(TARGET ${OPENCL_H_OUT} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
+
+TARGET_LINK_LIBRARIES(${OPENCL_H_OUT} ${CLConform_LIBRARIES})
+
+########################################################################################
+
+
+# end of file #
diff --git a/test_conformance/headers/Makefile b/test_conformance/headers/Makefile
new file mode 100644
index 00000000..dc649238
--- /dev/null
+++ b/test_conformance/headers/Makefile
@@ -0,0 +1,38 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+EXECUTABLES = test_headers \
+              test_cl_h \
+              test_cl_platform_h \
+              test_cl_gl_h \
+              test_opencl_h \
+              test_cl_h_c99 \
+              test_cl_platform_h_c99 \
+              test_cl_gl_h_c99 \
+              test_opencl_h_c99
+
+INCLUDE = 
+HCFLAGS =  ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CFLAGS = -Wall -pedantic ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = ${ATF}
+
+all : debug
+release : debug
+debug:
+	cc $(HCFLAGS) test_headers.c $(LIBRARIES) -o test_headers
+	cc $(CFLAGS) test_cl.h.c $(LIBRARIES) -o test_cl_h
+	cc $(CFLAGS) test_cl_platform.h.c $(LIBRARIES) -o test_cl_platform_h
+	cc $(CFLAGS) test_cl_gl.h.c $(LIBRARIES) -o test_cl_gl_h
+	cc $(CFLAGS) test_opencl.h.c $(LIBRARIES) -o test_opencl_h
+	cc $(CFLAGS) -std=c99 test_cl.h.c $(LIBRARIES) -o test_cl_h_c99
+	cc $(CFLAGS) -std=c99 test_cl_platform.h.c $(LIBRARIES) -o test_cl_platform_h_c99
+	cc $(CFLAGS) -std=c99 test_cl_gl.h.c $(LIBRARIES) -o test_cl_gl_h_c99
+	cc $(CFLAGS) -std=c99 test_opencl.h.c $(LIBRARIES) -o test_opencl_h_c99
+	
+install:
+
+	
+clean: 
+	rm -f $(EXECUTABLES)
diff --git a/test_conformance/headers/README.txt b/test_conformance/headers/README.txt
new file mode 100644
index 00000000..52bfea7a
--- /dev/null
+++ b/test_conformance/headers/README.txt
@@ -0,0 +1,26 @@
+test_conformance/headers README
+===============================
+
+The test_headers.c test is designed to make sure that the various
+cl_typen types work and conform to expectation for recent versions
+of cl_platform.h.  Conforming to these expectations make use of
+these types practical for developers writing portable code. 
+
+The various tests ending in .h.c are there to verify that the various
+OpenCL headers can compile stand alone.  That is to ensure that they
+may be used a la carte. This provides developers a lifeline in the case
+that some unneeded part of OpenCL (e.g. cl/gl sharing) brings in a pile 
+of symbols (e.g. all of OpenGL) that collides with other headers needed 
+by the application.  It is also poor form to require headers to be 
+included in a particular order, especially if multiple systems require 
+they be included in mutually incompatible order. So, here we require 
+that each header can be used standalone so that the order is irrelevant.  
+
+In the .h.c tests, we also check to make sure that the headers don't
+cause spurious warnings. These tests are intended to be compiled using 
+the most stringent compiler flags available for the platform, within 
+reason. All warnings should be errors and extra warnings that it is 
+expected developers are likely to use should be turned on. The current 
+Makefile includes such flags for a GCC compiler. Implementors are 
+expected to modify these flags and make system as necessary to conform 
+to the local build environment.  
diff --git a/test_conformance/headers/test_cl.h.c b/test_conformance/headers/test_cl.h.c
new file mode 100644
index 00000000..1fabcba9
--- /dev/null
+++ b/test_conformance/headers/test_cl.h.c
@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( __APPLE__ )
+    #include <OpenCL/cl.h>
+#else
+    #include <CL/cl.h>
+#endif
+#include <stdio.h>
+
+int main( void )
+{
+    printf("cl.h standalone test PASSED.\n");
+    return 0;
+}
diff --git a/test_conformance/headers/test_cl_gl.h.c b/test_conformance/headers/test_cl_gl.h.c
new file mode 100644
index 00000000..d20dfc07
--- /dev/null
+++ b/test_conformance/headers/test_cl_gl.h.c
@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_gl.h>
+#else
+    #include <CL/cl_gl.h>
+#endif
+#include <stdio.h>
+
+int main( void )
+{
+    printf("cl_gl.h standalone test PASSED.\n");
+    return 0;
+}
diff --git a/test_conformance/headers/test_cl_platform.h.c b/test_conformance/headers/test_cl_platform.h.c
new file mode 100644
index 00000000..7fdd6ff7
--- /dev/null
+++ b/test_conformance/headers/test_cl_platform.h.c
@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
+#else
+    #include <CL/cl_platform.h>
+#endif
+#include <stdio.h>
+
+int main( void )
+{
+    printf("cl_platform.h standalone test PASSED.\n");
+    return 0;
+}
diff --git a/test_conformance/headers/test_headers.c b/test_conformance/headers/test_headers.c
new file mode 100644
index 00000000..6fe0f558
--- /dev/null
+++ b/test_conformance/headers/test_headers.c
@@ -0,0 +1,650 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( __APPLE__ )
+    #include <OpenCL/cl_platform.h>
+#else
+    #include <CL/cl_platform.h>
+#endif
+#include <stdio.h>
+#include "../../test_common/harness/errorHelpers.h"
+
+
+void test_char( void );
+void test_uchar( void );
+void test_short( void );
+void test_ushort( void );
+void test_int( void );
+void test_uint( void );
+void test_long( void );
+void test_ulong( void );
+void test_float( void );
+void test_double( void );
+
+void test_char( void )
+{
+/* char */
+    /* Constructor */
+    cl_char a = 0;
+    cl_char2 a2 = {{ 0, 1 }};
+    cl_char4 a4 = {{ 0, 1, 2, 3 }};
+    cl_char8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_char16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_char    b = a;
+    cl_char2   b2 = a2;
+    cl_char4   b4 = a4;
+    cl_char8   b8 = a8;
+    cl_char16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %d\n", b );
+    log_info("b2:  %d %d \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %d %d %d %d\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %d %d %d %d %d %d %d %d\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_CHAR2__ )
+    __cl_char2 v2 = b2.v2;
+    log_info("__cl_char2:  %d %d \n", ((cl_char*)&v2)[0], ((cl_char*)&v2)[1] );
+#else
+    log_info( "__cl_char2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_CHAR4__ )
+    __cl_char4 v4 = b4.v4;
+    log_info("__cl_char4:  %d %d %d %d \n", ((cl_char*)&v4)[0], ((cl_char*)&v4)[1], ((cl_char*)&v4)[2], ((cl_char*)&v4)[3] );
+#else
+    log_info( "__cl_char4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_CHAR8__ )
+    __cl_char8 v8 = b8.v8;
+    log_info("__cl_char8:  %d %d %d %d %d %d %d %d \n", ((cl_char*)&v8)[0], ((cl_char*)&v8)[1], ((cl_char*)&v8)[2], ((cl_char*)&v8)[3], ((cl_char*)&v8)[4], ((cl_char*)&v8)[5], ((cl_char*)&v8)[6], ((cl_char*)&v8)[7] );
+#else
+    log_info( "__cl_char8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_CHAR16__ )
+    __cl_char16 v16 = b16.v16;
+    log_info("__cl_char16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n", ((cl_char*)&v16)[0], ((cl_char*)&v16)[1], ((cl_char*)&v16)[2], ((cl_char*)&v16)[3], ((cl_char*)&v16)[4], ((cl_char*)&v16)[5], ((cl_char*)&v16)[6], ((cl_char*)&v16)[7],
+                                                                      ((cl_char*)&v16)[8], ((cl_char*)&v16)[9], ((cl_char*)&v16)[10], ((cl_char*)&v16)[11], ((cl_char*)&v16)[12], ((cl_char*)&v16)[13], ((cl_char*)&v16)[14], ((cl_char*)&v16)[15]);
+#else
+    log_info( "__cl_char16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_uchar( void )
+{
+/* uchar */
+    /* Constructor */
+    cl_uchar a = 0;
+    cl_uchar2 a2 = {{ 0, 1 }};
+    cl_uchar4 a4 = {{ 0, 1, 2, 3 }};
+    cl_uchar8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_uchar16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_uchar    b = a;
+    cl_uchar2   b2 = a2;
+    cl_uchar4   b4 = a4;
+    cl_uchar8   b8 = a8;
+    cl_uchar16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %d\n", b );
+    log_info("b2:  %d %d \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %d %d %d %d\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %d %d %d %d %d %d %d %d\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_UCHAR2__ )
+    __cl_uchar2 v2 = b2.v2;
+    log_info("__cl_uchar2:  %d %d \n", ((uchar*)&v2)[0], ((cl_uchar*)&v2)[1] );
+#else
+    log_info( "__cl_uchar2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_UCHAR4__ )
+    __cl_uchar4 v4 = b4.v4;
+    log_info("__cl_uchar4:  %d %d %d %d \n", ((uchar*)&v4)[0], ((cl_uchar*)&v4)[1], ((cl_uchar*)&v4)[2], ((cl_uchar*)&v4)[3] );
+#else
+    log_info( "__cl_uchar4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_UCHAR8__ )
+    __cl_uchar8 v8 = b8.v8;
+    log_info("__cl_uchar8:  %d %d %d %d %d %d %d %d \n", ((cl_uchar*)&v8)[0], ((cl_uchar*)&v8)[1], ((cl_uchar*)&v8)[2], ((cl_uchar*)&v8)[3], ((cl_uchar*)&v8)[4], ((cl_uchar*)&v8)[5], ((cl_uchar*)&v8)[6], ((cl_uchar*)&v8)[7] );
+#else
+    log_info( "__cl_uchar8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_UCHAR16__ )
+    __cl_uchar16 v16 = b16.v16;
+    log_info("__cl_uchar16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n", ((cl_uchar*)&v16)[0], ((cl_uchar*)&v16)[1], ((cl_uchar*)&v16)[2], ((cl_uchar*)&v16)[3], ((cl_uchar*)&v16)[4], ((cl_uchar*)&v16)[5], ((cl_uchar*)&v16)[6], ((cl_uchar*)&v16)[7],
+                                                                      ((cl_uchar*)&v16)[8], ((cl_uchar*)&v16)[9], ((cl_uchar*)&v16)[10], ((cl_uchar*)&v16)[11], ((cl_uchar*)&v16)[12], ((cl_uchar*)&v16)[13], ((cl_uchar*)&v16)[14], ((cl_uchar*)&v16)[15]);
+#else
+    log_info( "__cl_uchar16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_short( void )
+{
+/* short */
+    /* Constructor */
+    cl_short a = 0;
+    cl_short2 a2 = {{ 0, 1 }};
+    cl_short4 a4 = {{ 0, 1, 2, 3 }};
+    cl_short8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_short16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_short    b = a;
+    cl_short2   b2 = a2;
+    cl_short4   b4 = a4;
+    cl_short8   b8 = a8;
+    cl_short16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %d\n", b );
+    log_info("b2:  %d %d \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %d %d %d %d\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %d %d %d %d %d %d %d %d\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_SHORT2__ )
+    __cl_short2 v2 = b2.v2;
+    log_info("__cl_short2:  %d %d \n", ((cl_short*)&v2)[0], ((cl_short*)&v2)[1] );
+#else
+    log_info( "__cl_short2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_SHORT4__ )
+    __cl_short4 v4 = b4.v4;
+    log_info("__cl_short4:  %d %d %d %d \n", ((cl_short*)&v4)[0], ((cl_short*)&v4)[1], ((cl_short*)&v4)[2], ((cl_short*)&v4)[3] );
+#else
+    log_info( "__cl_short4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_SHORT8__ )
+    __cl_short8 v8 = b8.v8;
+    log_info("__cl_short8:  %d %d %d %d %d %d %d %d \n", ((cl_short*)&v8)[0], ((cl_short*)&v8)[1], ((cl_short*)&v8)[2], ((cl_short*)&v8)[3], ((cl_short*)&v8)[4], ((cl_short*)&v8)[5], ((cl_short*)&v8)[6], ((cl_short*)&v8)[7] );
+#else
+    log_info( "__cl_short8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_SHORT16__ )
+    __cl_short16 v16 = b16.v16;
+    log_info("__cl_short16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n", ((cl_short*)&v16)[0], ((cl_short*)&v16)[1], ((cl_short*)&v16)[2], ((cl_short*)&v16)[3], ((cl_short*)&v16)[4], ((cl_short*)&v16)[5], ((cl_short*)&v16)[6], ((cl_short*)&v16)[7],
+                                                                      ((cl_short*)&v16)[8], ((cl_short*)&v16)[9], ((cl_short*)&v16)[10], ((cl_short*)&v16)[11], ((cl_short*)&v16)[12], ((cl_short*)&v16)[13], ((cl_short*)&v16)[14], ((cl_short*)&v16)[15]);
+#else
+    log_info( "__cl_short16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_ushort( void )
+{
+/* ushort */
+    /* Constructor */
+    cl_ushort a = 0;
+    cl_ushort2 a2 = {{ 0, 1 }};
+    cl_ushort4 a4 = {{ 0, 1, 2, 3 }};
+    cl_ushort8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_ushort16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_ushort    b = a;
+    cl_ushort2   b2 = a2;
+    cl_ushort4   b4 = a4;
+    cl_ushort8   b8 = a8;
+    cl_ushort16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %d\n", b );
+    log_info("b2:  %d %d \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %d %d %d %d\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %d %d %d %d %d %d %d %d\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_USHORT2__ )
+    __cl_ushort2 v2 = b2.v2;
+    log_info("__cl_ushort2:  %d %d \n", ((unsigned short*)&v2)[0], ((unsigned short*)&v2)[1] );
+#else
+    log_info( "__cl_ushort2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_USHORT4__ )
+    __cl_ushort4 v4 = b4.v4;
+    log_info("__cl_ushort4:  %d %d %d %d \n", ((unsigned short*)&v4)[0], ((unsigned short*)&v4)[1], ((unsigned short*)&v4)[2], ((unsigned short*)&v4)[3] );
+#else
+    log_info( "__cl_ushort4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_USHORT8__ )
+    __cl_ushort8 v8 = b8.v8;
+    log_info("__cl_ushort8:  %d %d %d %d %d %d %d %d \n", ((unsigned short*)&v8)[0], ((unsigned short*)&v8)[1], ((unsigned short*)&v8)[2], ((unsigned short*)&v8)[3], ((unsigned short*)&v8)[4], ((unsigned short*)&v8)[5], ((unsigned short*)&v8)[6], ((unsigned short*)&v8)[7] );
+#else
+    log_info( "__cl_ushort8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_USHORT16__ )
+    __cl_ushort16 v16 = b16.v16;
+    log_info("__cl_ushort16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n", ((unsigned short*)&v16)[0], ((unsigned short*)&v16)[1], ((unsigned short*)&v16)[2], ((unsigned short*)&v16)[3], ((unsigned short*)&v16)[4], ((unsigned short*)&v16)[5], ((unsigned short*)&v16)[6], ((unsigned short*)&v16)[7],
+                                                                      ((unsigned short*)&v16)[8], ((unsigned short*)&v16)[9], ((unsigned short*)&v16)[10], ((unsigned short*)&v16)[11], ((unsigned short*)&v16)[12], ((unsigned short*)&v16)[13], ((unsigned short*)&v16)[14], ((unsigned short*)&v16)[15]);
+#else
+    log_info( "__cl_ushort16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_int( void )
+{
+/* int */
+    /* Constructor */
+    cl_int a = 0;
+    cl_int2 a2 = {{ 0, 1 }};
+    cl_int4 a4 = {{ 0, 1, 2, 3 }};
+    cl_int8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_int16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_int    b = a;
+    cl_int2   b2 = a2;
+    cl_int4   b4 = a4;
+    cl_int8   b8 = a8;
+    cl_int16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %d\n", b );
+    log_info("b2:  %d %d \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %d %d %d %d\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %d %d %d %d %d %d %d %d\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_INT2__ )
+    __cl_int2 v2 = b2.v2;
+    log_info("__cl_int2:  %d %d \n", ((cl_int*)&v2)[0], ((cl_int*)&v2)[1] );
+#else
+    log_info( "__cl_int2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_INT4__ )
+    __cl_int4 v4 = b4.v4;
+    log_info("__cl_int4:  %d %d %d %d \n", ((cl_int*)&v4)[0], ((cl_int*)&v4)[1], ((cl_int*)&v4)[2], ((cl_int*)&v4)[3] );
+#else
+    log_info( "__cl_int4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_INT8__ )
+    __cl_int8 v8 = b8.v8;
+    log_info("__cl_int8:  %d %d %d %d %d %d %d %d \n", ((cl_int*)&v8)[0], ((cl_int*)&v8)[1], ((cl_int*)&v8)[2], ((cl_int*)&v8)[3], ((cl_int*)&v8)[4], ((cl_int*)&v8)[5], ((cl_int*)&v8)[6], ((cl_int*)&v8)[7] );
+#else
+    log_info( "__cl_int8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_INT16__ )
+    __cl_int16 v16 = b16.v16;
+    log_info("__cl_int16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n", ((cl_int*)&v16)[0], ((cl_int*)&v16)[1], ((cl_int*)&v16)[2], ((cl_int*)&v16)[3], ((cl_int*)&v16)[4], ((cl_int*)&v16)[5], ((cl_int*)&v16)[6], ((cl_int*)&v16)[7],
+                                                                      ((cl_int*)&v16)[8], ((cl_int*)&v16)[9], ((cl_int*)&v16)[10], ((cl_int*)&v16)[11], ((cl_int*)&v16)[12], ((cl_int*)&v16)[13], ((cl_int*)&v16)[14], ((cl_int*)&v16)[15]);
+#else
+    log_info( "__cl_int16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_uint( void )
+{
+/* uint */
+    /* Constructor */
+    cl_uint a = 0;
+    cl_uint2 a2 = {{ 0, 1 }};
+    cl_uint4 a4 = {{ 0, 1, 2, 3 }};
+    cl_uint8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_uint16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_uint    b = a;
+    cl_uint2   b2 = a2;
+    cl_uint4   b4 = a4;
+    cl_uint8   b8 = a8;
+    cl_uint16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %d\n", b );
+    log_info("b2:  %d %d \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %d %d %d %d\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %d %d %d %d %d %d %d %d\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_UINT2__ )
+    __cl_uint2 v2 = b2.v2;
+    log_info("__cl_uint2:  %d %d \n", ((cl_uint*)&v2)[0], ((cl_uint*)&v2)[1] );
+#else
+    log_info( "__cl_uint2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_UINT4__ )
+    __cl_uint4 v4 = b4.v4;
+    log_info("__cl_uint4:  %d %d %d %d \n", ((cl_uint*)&v4)[0], ((cl_uint*)&v4)[1], ((cl_uint*)&v4)[2], ((cl_uint*)&v4)[3] );
+#else
+    log_info( "__cl_uint4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_UINT8__ )
+    __cl_uint8 v8 = b8.v8;
+    log_info("__cl_uint8:  %d %d %d %d %d %d %d %d \n", ((cl_uint*)&v8)[0], ((cl_uint*)&v8)[1], ((cl_uint*)&v8)[2], ((cl_uint*)&v8)[3], ((cl_uint*)&v8)[4], ((cl_uint*)&v8)[5], ((cl_uint*)&v8)[6], ((cl_uint*)&v8)[7] );
+#else
+    log_info( "__cl_uint8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_UINT16__ )
+    __cl_uint16 v16 = b16.v16;
+    log_info("__cl_uint16: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d \n", ((cl_uint*)&v16)[0], ((cl_uint*)&v16)[1], ((cl_uint*)&v16)[2], ((cl_uint*)&v16)[3], ((cl_uint*)&v16)[4], ((cl_uint*)&v16)[5], ((cl_uint*)&v16)[6], ((cl_uint*)&v16)[7],
+                                                                      ((cl_uint*)&v16)[8], ((cl_uint*)&v16)[9], ((cl_uint*)&v16)[10], ((cl_uint*)&v16)[11], ((cl_uint*)&v16)[12], ((cl_uint*)&v16)[13], ((cl_uint*)&v16)[14], ((cl_uint*)&v16)[15]);
+#else
+    log_info( "__cl_uint16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_long( void )
+{
+/* long */
+    /* Constructor */
+    cl_long a = 0;
+    cl_long2 a2 = {{ 0, 1 }};
+    cl_long4 a4 = {{ 0, 1, 2, 3 }};
+    cl_long8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_long16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_long    b = a;
+    cl_long2   b2 = a2;
+    cl_long4   b4 = a4;
+    cl_long8   b8 = a8;
+    cl_long16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %lld\n", b );
+    log_info("b2:  %lld %lld \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %lld %lld %lld %lld\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %lld %lld %lld %lld %lld %lld %lld %lld\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_LONG2__ )
+    __cl_long2 v2 = b2.v2;
+    log_info("__cl_long2:  %lld %lld \n", ((cl_long*)&v2)[0], ((cl_long*)&v2)[1] );
+#else
+    log_info( "__cl_long2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_LONG4__ )
+    __cl_long4 v4 = b4.v4;
+    log_info("__cl_long4:  %lld %lld %lld %lld \n", ((cl_long*)&v4)[0], ((cl_long*)&v4)[1], ((cl_long*)&v4)[2], ((cl_long*)&v4)[3] );
+#else
+    log_info( "__cl_long4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_LONG8__ )
+    __cl_long8 v8 = b8.v8;
+    log_info("__cl_long8:  %lld %lld %lld %lld %lld %lld %lld %lld \n", ((cl_long*)&v8)[0], ((cl_long*)&v8)[1], ((cl_long*)&v8)[2], ((cl_long*)&v8)[3], ((cl_long*)&v8)[4], ((cl_long*)&v8)[5], ((cl_long*)&v8)[6], ((cl_long*)&v8)[7] );
+#else
+    log_info( "__cl_long8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_LONG16__ )
+    __cl_long16 v16 = b16.v16;
+    log_info("__cl_long16: %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld \n", ((cl_long*)&v16)[0], ((cl_long*)&v16)[1], ((cl_long*)&v16)[2], ((cl_long*)&v16)[3], ((cl_long*)&v16)[4], ((cl_long*)&v16)[5], ((cl_long*)&v16)[6], ((cl_long*)&v16)[7],
+                                                                      ((cl_long*)&v16)[8], ((cl_long*)&v16)[9], ((cl_long*)&v16)[10], ((cl_long*)&v16)[11], ((cl_long*)&v16)[12], ((cl_long*)&v16)[13], ((cl_long*)&v16)[14], ((cl_long*)&v16)[15]);
+#else
+    log_info( "__cl_long16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_ulong( void )
+{
+/* ulong */
+    /* Constructor */
+    cl_ulong a = 0;
+    cl_ulong2 a2 = {{ 0, 1 }};
+    cl_ulong4 a4 = {{ 0, 1, 2, 3 }};
+    cl_ulong8 a8 = {{ 0, 1, 2, 3, 4, 5, 6, 7 }};
+    cl_ulong16 a16 = {{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }};
+
+    /* assignment */
+    cl_ulong    b = a;
+    cl_ulong2   b2 = a2;
+    cl_ulong4   b4 = a4;
+    cl_ulong8   b8 = a8;
+    cl_ulong16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %lld\n", b );
+    log_info("b2:  %lld %lld \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %lld %lld %lld %lld\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %lld %lld %lld %lld %lld %lld %lld %lld\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_ULONG2__ )
+    __cl_ulong2 v2 = b2.v2;
+    log_info("__cl_ulong2:  %lld %lld \n", ((cl_ulong*)&v2)[0], ((cl_ulong*)&v2)[1] );
+#else
+    log_info( "__cl_ulong2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_ULONG4__ )
+    __cl_ulong4 v4 = b4.v4;
+    log_info("__cl_ulong4:  %lld %lld %lld %lld \n", ((cl_ulong*)&v4)[0], ((cl_ulong*)&v4)[1], ((cl_ulong*)&v4)[2], ((cl_ulong*)&v4)[3] );
+#else
+    log_info( "__cl_ulong4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_ULONG8__ )
+    __cl_ulong8 v8 = b8.v8;
+    log_info("__cl_ulong8:  %lld %lld %lld %lld %lld %lld %lld %lld \n", ((cl_ulong*)&v8)[0], ((cl_ulong*)&v8)[1], ((cl_ulong*)&v8)[2], ((cl_ulong*)&v8)[3], ((cl_ulong*)&v8)[4], ((cl_ulong*)&v8)[5], ((cl_ulong*)&v8)[6], ((cl_ulong*)&v8)[7] );
+#else
+    log_info( "__cl_ulong8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_ULONG16__ )
+    __cl_ulong16 v16 = b16.v16;
+    log_info("__cl_ulong16: %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld %lld \n", ((cl_ulong*)&v16)[0], ((cl_ulong*)&v16)[1], ((cl_ulong*)&v16)[2], ((cl_ulong*)&v16)[3], ((cl_ulong*)&v16)[4], ((cl_ulong*)&v16)[5], ((cl_ulong*)&v16)[6], ((cl_ulong*)&v16)[7],
+                                                                      ((cl_ulong*)&v16)[8], ((cl_ulong*)&v16)[9], ((cl_ulong*)&v16)[10], ((cl_ulong*)&v16)[11], ((cl_ulong*)&v16)[12], ((cl_ulong*)&v16)[13], ((cl_ulong*)&v16)[14], ((cl_ulong*)&v16)[15]);
+#else
+    log_info( "__cl_ulong16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+
+void test_float( void )
+{
+/* float */
+    /* Constructor */
+    cl_float a = 0.0f;
+    cl_float2 a2 = {{ 0.0f, 1.0f }};
+    cl_float4 a4 = {{ 0.0f, 1.0f, 2.0f, 3.0f }};
+    cl_float8 a8 = {{ 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f }};
+    cl_float16 a16 = {{ 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }};
+
+    /* assignment */
+    cl_float    b = a;
+    cl_float2   b2 = a2;
+    cl_float4   b4 = a4;
+    cl_float8   b8 = a8;
+    cl_float16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %f\n", b );
+    log_info("b2:  %f %f \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %f %f %f %f\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %f %f %f %f %f %f %f %f\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_FLOAT2__ )
+    __cl_float2 v2 = b2.v2;
+    log_info("__cl_float2:  %f %f \n", ((cl_float*)&v2)[0], ((cl_float*)&v2)[1] );
+#else
+    log_info( "__cl_float2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_FLOAT4__ )
+    {
+        __cl_float4 v4 = b4.v4;
+        log_info("__cl_float4:  %f %f %f %f \n", ((cl_float*)&v4)[0], ((cl_float*)&v4)[1], ((cl_float*)&v4)[2], ((cl_float*)&v4)[3] );
+    }
+#else
+    log_info( "__cl_float4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_FLOAT8__ )
+    __cl_float8 v8 = b8.v8;
+    log_info("__cl_float8:  %f %f %f %f %f %f %f %f \n", ((cl_float*)&v8)[0], ((cl_float*)&v8)[1], ((cl_float*)&v8)[2], ((cl_float*)&v8)[3], ((cl_float*)&v8)[4], ((cl_float*)&v8)[5], ((cl_float*)&v8)[6], ((cl_float*)&v8)[7] );
+#else
+    log_info( "__cl_float8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_FLOAT16__ )
+    __cl_float16 v16 = b16.v16;
+    log_info("__cl_float16: %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f \n", ((cl_float*)&v16)[0], ((cl_float*)&v16)[1], ((cl_float*)&v16)[2], ((cl_float*)&v16)[3], ((cl_float*)&v16)[4], ((cl_float*)&v16)[5], ((cl_float*)&v16)[6], ((cl_float*)&v16)[7],
+                                                                      ((cl_float*)&v16)[8], ((cl_float*)&v16)[9], ((cl_float*)&v16)[10], ((cl_float*)&v16)[11], ((cl_float*)&v16)[12], ((cl_float*)&v16)[13], ((cl_float*)&v16)[14], ((cl_float*)&v16)[15]);
+#else
+    log_info( "__cl_float16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+void test_double( void )
+{
+/* double */
+    /* Constructor */
+    cl_double a = 0.0f;
+    cl_double2 a2 = {{ 0.0f, 1.0f }};
+    cl_double4 a4 = {{ 0.0f, 1.0f, 2.0f, 3.0f }};
+    cl_double8 a8 = {{ 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f }};
+    cl_double16 a16 = {{ 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f }};
+
+    /* assignment */
+    cl_double    b = a;
+    cl_double2   b2 = a2;
+    cl_double4   b4 = a4;
+    cl_double8   b8 = a8;
+    cl_double16  b16 = a16;
+
+    log_info("\nVerifying assignment:\n" );
+    log_info("b:   %f\n", b );
+    log_info("b2:  %f %f \n", b2.s[0], b2.s[1] );
+    log_info("b4:  %f %f %f %f\n", b4.s[0], b4.s[1], b4.s[2], b4.s[3] );
+    log_info("b8:  %f %f %f %f %f %f %f %f\n", b8.s[0], b8.s[1], b8.s[2], b8.s[3], b8.s[4], b8.s[5], b8.s[6], b8.s[7] );
+    log_info("b16: %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f\n", b16.s[0], b16.s[1], b16.s[2], b16.s[3], b16.s[4], b16.s[5], b16.s[6], b16.s[7],
+                                                                     b16.s[8], b16.s[9], b16.s[10], b16.s[11], b16.s[12], b16.s[13], b16.s[14], b16.s[15]);
+
+    /* vector access */
+    log_info("\nVerifying vector access:\n" );
+#if defined( __CL_DOUBLE2__ )
+    __cl_double2 v2 = b2.v2;
+    log_info("__cl_double2:  %f %f \n", ((cl_double*)&v2)[0], ((cl_double*)&v2)[1] );
+#else
+    log_info( "__cl_double2 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_DOUBLE4__ )
+    __cl_double4 v4 = b4.v4;
+    log_info("__cl_double4:  %f %f %f %f \n", ((cl_double*)&v4)[0], ((cl_double*)&v4)[1], ((cl_double*)&v4)[2], ((cl_double*)&v4)[3] );
+#else
+    log_info( "__cl_double4 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_DOUBLE8__ )
+    __cl_double8 v8 = b8.v8;
+    log_info("__cl_double8:  %f %f %f %f %f %f %f %f \n", ((cl_double*)&v8)[0], ((cl_double*)&v8)[1], ((cl_double*)&v8)[2], ((cl_double*)&v8)[3], ((cl_double*)&v8)[4], ((cl_double*)&v8)[5], ((cl_double*)&v8)[6], ((cl_double*)&v8)[7] );
+#else
+    log_info( "__cl_double8 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+#if defined( __CL_DOUBLE16__ )
+    __cl_double16 v16 = b16.v16;
+    log_info("__cl_double16: %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f %f \n", ((cl_double*)&v16)[0], ((cl_double*)&v16)[1], ((cl_double*)&v16)[2], ((cl_double*)&v16)[3], ((cl_double*)&v16)[4], ((cl_double*)&v16)[5], ((cl_double*)&v16)[6], ((cl_double*)&v16)[7],
+                                                                      ((cl_double*)&v16)[8], ((cl_double*)&v16)[9], ((cl_double*)&v16)[10], ((cl_double*)&v16)[11], ((cl_double*)&v16)[12], ((cl_double*)&v16)[13], ((cl_double*)&v16)[14], ((cl_double*)&v16)[15]);
+#else
+    log_info( "__cl_double16 SIMD vectors not supported on this architecture.\n" );
+#endif
+
+    log_info( "\n" );
+}
+
+int main( void )
+{
+    test_start();
+
+    log_info( "\nChecking operations on cl_types.\nNumbers, where presented, should walk upward from 0, with step of 1:\n" );
+
+    test_char();
+    test_uchar();
+    test_short();
+    test_ushort();
+    test_int();
+    test_uint();
+    test_long();
+    test_ulong();
+    test_float();
+    test_double();
+
+    test_finish();
+    log_info("PASSED test.\n");
+
+    return 0;
+}
diff --git a/test_conformance/headers/test_opencl.h.c b/test_conformance/headers/test_opencl.h.c
new file mode 100644
index 00000000..d7318387
--- /dev/null
+++ b/test_conformance/headers/test_opencl.h.c
@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/opencl.h>
+#endif
+#include <stdio.h>
+
+int main( void )
+{
+    printf("opencl.h standalone test PASSED.\n");
+    return 0;
+}
diff --git a/test_conformance/images/CMakeLists.txt b/test_conformance/images/CMakeLists.txt
new file mode 100644
index 00000000..e0eb3fe1
--- /dev/null
+++ b/test_conformance/images/CMakeLists.txt
@@ -0,0 +1,15 @@
+if(NOT MSVC)
+# IGL: Force no optimizations
+set(CMAKE_C_FLAGS_RELEASE "-O0")
+set(CMAKE_CXX_FLAGS_RELEASE "-O0")
+endif(NOT MSVC)
+
+add_subdirectory(clCopyImage)
+add_subdirectory(clFillImage)
+add_subdirectory(clGetInfo)
+add_subdirectory(clReadWriteImage)
+add_subdirectory(kernel_image_methods)
+add_subdirectory(kernel_read_write)
+add_subdirectory(samplerlessReads)
+
+
diff --git a/test_conformance/images/Jamfile b/test_conformance/images/Jamfile
new file mode 100644
index 00000000..081a4702
--- /dev/null
+++ b/test_conformance/images/Jamfile
@@ -0,0 +1,9 @@
+build-project clCopyImage ;
+build-project clFillImage ;
+build-project clGetInfo ;
+build-project clReadWriteImage ;
+build-project kernel_image_methods ;
+build-project kernel_read_write ;
+build_project samplerlessReads ;
+
+use-project /images : . ;
diff --git a/test_conformance/images/Makefile b/test_conformance/images/Makefile
new file mode 100644
index 00000000..9028f6c6
--- /dev/null
+++ b/test_conformance/images/Makefile
@@ -0,0 +1,31 @@
+
+PRODUCTS = \
+	clCopyImage/ \
+	clFillImage/ \
+	clGetInfo/ \
+	clReadWriteImage/ \
+	kernel_image_methods/ \
+	kernel_read_write/ \
+	samplerlessReads/
+ 
+TOP=$(shell pwd)
+
+all: $(PRODUCTS)
+
+clean:
+	@for testdir in $(dir $(PRODUCTS))  ; \
+		do ( \
+			echo "==================================================================================" ; \
+			echo "Cleaning $$testdir" ; \
+			echo "==================================================================================" ; \
+			cd $$testdir && make clean \
+			); \
+		done \
+
+$(PRODUCTS): 
+	@echo "==================================================================================" ;
+	@echo "(`date "+%H:%M:%S"`) Make $@" ;
+	@echo "==================================================================================" ;
+	cd $(dir $@) && make -i 
+
+.PHONY: clean $(PRODUCTS)  all
diff --git a/test_conformance/images/clCopyImage/CMakeLists.txt b/test_conformance/images/clCopyImage/CMakeLists.txt
new file mode 100644
index 00000000..f69ca0db
--- /dev/null
+++ b/test_conformance/images/clCopyImage/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(MODULE_NAME CL_COPY_IMAGES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_copy_1D.cpp
+    test_copy_1D_array.cpp
+    test_copy_2D.cpp
+    test_copy_2D_2D_array.cpp
+    test_copy_2D_3D.cpp
+    test_copy_2D_array.cpp
+    test_copy_3D.cpp
+    test_copy_3D_2D_array.cpp
+    test_copy_generic.cpp
+    test_loops.cpp
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/threadTesting.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/imageHelpers.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/typeWrappers.cpp
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
+
diff --git a/test_conformance/images/clCopyImage/Jamfile b/test_conformance/images/clCopyImage/Jamfile
new file mode 100644
index 00000000..526e206c
--- /dev/null
+++ b/test_conformance/images/clCopyImage/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_cl_copy_images
+    : main.cpp
+      test_copy_2D_3D.cpp
+      test_copy_2D.cpp
+      test_copy_3D.cpp
+      test_loops.cpp
+    ;
+
+install dist
+    : test_cl_copy_images
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/clCopyImage
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/clCopyImage
+    ;
diff --git a/test_conformance/images/clCopyImage/Makefile b/test_conformance/images/clCopyImage/Makefile
new file mode 100644
index 00000000..18b93f07
--- /dev/null
+++ b/test_conformance/images/clCopyImage/Makefile
@@ -0,0 +1,56 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		test_copy_generic.cpp \
+		test_copy_1D.cpp \
+		test_copy_2D.cpp \
+		test_loops.cpp \
+		test_copy_3D.cpp \
+		test_copy_1D_array.cpp \
+		test_copy_2D_array.cpp \
+		test_copy_2D_3D.cpp \
+		test_copy_2D_2D_array.cpp \
+		test_copy_3D_2D_array.cpp \
+		../../../test_common/harness/errorHelpers.c \
+		../../../test_common/harness/threadTesting.c \
+		../../../test_common/harness/kernelHelpers.c \
+		../../../test_common/harness/imageHelpers.cpp \
+		../../../test_common/harness/conversions.c \
+		../../../test_common/harness/testHarness.c \
+		../../../test_common/harness/typeWrappers.cpp \
+		../../../test_common/harness/mt19937.c
+
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_cl_copy_images
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/images/clCopyImage/main.cpp b/test_conformance/images/clCopyImage/main.cpp
new file mode 100644
index 00000000..8b56b537
--- /dev/null
+++ b/test_conformance/images/clCopyImage/main.cpp
@@ -0,0 +1,265 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+
+bool            gDebugTrace = false, gTestSmallImages = false, gTestMaxImages = false, gUseRamp = false, gTestRounding = false, gEnablePitch = false, gTestMipmaps = false;
+int                gTypesToTest = 0;
+cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
+cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
+cl_device_type    gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_context context;
+cl_command_queue queue;
+
+extern int test_image_set( cl_device_id device, MethodsToTest testMethod );
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [debug_trace] [small_images]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\t2Dto3D - Only test 2D -> 3D images\n" );
+    log_info( "\t3Dto2D - Only test 3D -> 2D images\n" );
+    log_info( "\t2Darrayto2D - Only test 2D image arrays -> 2D images\n" );
+    log_info( "\t2Dto2Darray - Only test 2D images -> 2D image arrays\n" );
+    log_info( "\t2Darrayto3D - Only test 2D image arrays -> 3D images\n" );
+    log_info( "\t3Dto2Darray - Only test 3D images -> 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\ttest_mipmaps - Test with mipmapped images\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging\n" );
+    log_info( "\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
+    log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
+    log_info( "\trounding - Runs every format through a single image filled with every possible value for that image format, to verify rounding works properly\n" );
+    //log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "\tuse_ramp - Instead of random data, uses images filled with ramps (and 0xff on any padding pixels) to ease debugging\n" );
+}
+
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id  platform;
+    cl_device_id       device;
+    cl_channel_type chanType;
+    cl_channel_order chanOrder;
+    char            str[ 128 ];
+    int                testMethods = 0;
+    bool            randomize = false;
+
+    test_start();
+
+    checkDeviceTypeOverride( &gDeviceType );
+
+    // Parse arguments
+    for( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+        else if( strcmp( str, "test_mipmaps" ) == 0 )
+        {
+            gTestMipmaps = true;
+            // Don't test pitches with mipmaps, at least currently.
+            gEnablePitch = false;
+        }
+        else if( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+
+        else if( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+        else if( strcmp( str, "use_ramps" ) == 0 )
+            gUseRamp = true;
+
+        else if( strcmp( str, "use_pitches" ) == 0 )
+            gEnablePitch = true;
+
+        else if( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+
+        else if( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+        else if( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+        else if( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+        else if( strcmp( str, "1Darray" ) == 0 )
+            testMethods |= k1DArray;
+        else if( strcmp( str, "2Darray" ) == 0 )
+            testMethods |= k2DArray;
+        else if( strcmp( str, "2Dto3D" ) == 0 )
+            testMethods |= k2DTo3D;
+        else if( strcmp( str, "3Dto2D" ) == 0 )
+            testMethods |= k3DTo2D;
+        else if( strcmp( str, "2Darrayto2D" ) == 0 )
+            testMethods |= k2DArrayTo2D;
+        else if( strcmp( str, "2Dto2Darray" ) == 0 )
+            testMethods |= k2DTo2DArray;
+        else if( strcmp( str, "2Darrayto3D" ) == 0 )
+            testMethods |= k2DArrayTo3D;
+        else if( strcmp( str, "3Dto2Darray" ) == 0 )
+            testMethods |= k3DTo2DArray;
+
+        else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+
+        else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+
+        else if( ( chanOrder = get_channel_order_from_name( str ) ) != (cl_channel_order)-1 )
+            gChannelOrderToUse = chanOrder;
+        else
+        {
+            log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+            return -1;
+        }
+
+    }
+
+    if( testMethods == 0 )
+        testMethods = k1D | k2D | k3D | k1DArray | k2DArray | k2DTo3D | k3DTo2D | k2DArrayTo2D | k2DTo2DArray | k2DArrayTo3D | k3DTo2DArray;
+
+    // Seed the random # generators
+    if( randomize )
+    {
+        gRandomSeed = (cl_uint) time( NULL );
+        log_info( "Random seed: %u.\n", gRandomSeed );
+        gReSeed = 1;
+    }
+
+    int error;
+    // Get our platform
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if( error )
+    {
+        print_error( error, "Unable to get platform" );
+        test_finish();
+        return -1;
+    }
+
+    // Get our device
+    error = clGetDeviceIDs(platform,  gDeviceType, 1, &device, NULL );
+    if( error )
+    {
+        print_error( error, "Unable to get specified device" );
+        test_finish();
+        return -1;
+    }
+
+    char deviceName[ 128 ], deviceVendor[ 128 ], deviceVersion[ 128 ];
+    error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get device information" );
+        test_finish();
+        return -1;
+    }
+    log_info("Using compute device: Name = %s, Vendor = %s, Version = %s\n", deviceName, deviceVendor, deviceVersion );
+
+    // Check for image support
+    if(checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+        log_info("Device does not support images. Skipping test.\n");
+        test_finish();
+        return 0;
+    }
+
+    // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+
+    // Create a queue against the context
+    queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+
+    if( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+
+    // Run the test now
+    int ret = 0;
+    for( int test = k1D; test <= k3DTo2DArray; test <<= 1 )
+    {
+        if( testMethods & test )
+            ret += test_image_set( device, (MethodsToTest)test );
+    }
+
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.");
+
+    if (gTestFailure == 0) {
+        if (gTestCount > 1)
+            log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+        else
+            log_info("PASSED test.\n");
+    } else if (gTestFailure > 0) {
+        if (gTestCount > 1)
+            log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+        else
+            log_error("FAILED test.\n");
+    }
+
+    // Clean up
+    clReleaseCommandQueue(queue);
+    clReleaseContext(context);
+    test_finish();
+
+    if (gTestFailure > 0)
+        return gTestFailure;
+
+    return ret;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
new file mode 100644
index 00000000..205f0ea3
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -0,0 +1,235 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+int test_copy_image_size_1D( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+  size_t sourcePos[ 3 ], destPos[ 3 ], regionSize[ 3 ];
+  int ret = 0, retCode;
+    size_t src_lod = 0, src_width_lod = imageInfo->width, src_row_pitch_lod;
+    size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
+    size_t width_lod = imageInfo->width;
+    size_t max_mip_level;
+
+    if( gTestMipmaps )
+    {
+        max_mip_level = imageInfo->num_mip_levels;
+        // Work at a random mip level
+        src_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        dst_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        src_width_lod = ( imageInfo->width >> src_lod )? ( imageInfo->width >> src_lod ) : 1;
+        dst_width_lod = ( imageInfo->width >> dst_lod )? ( imageInfo->width >> dst_lod ) : 1;
+        width_lod  = ( src_width_lod > dst_width_lod ) ? dst_width_lod : src_width_lod;
+        src_row_pitch_lod = src_width_lod * get_pixel_size( imageInfo->format );
+        dst_row_pitch_lod = dst_width_lod * get_pixel_size( imageInfo->format );
+    }
+
+    // First, try just a full covering region
+    sourcePos[ 0 ] = sourcePos[ 1 ] = sourcePos[ 2 ] = 0;
+    destPos[ 0 ] = destPos[ 1 ] = destPos[ 2 ] = 0;
+    regionSize[ 0 ] = imageInfo->width;
+    regionSize[ 1 ] = 1;
+    regionSize[ 2 ] = 1;
+
+    if(gTestMipmaps)
+    {
+        sourcePos[ 1 ] = src_lod;
+        destPos[ 1 ] = dst_lod;
+        regionSize[ 0 ] = width_lod;
+    }
+
+    retCode = test_copy_image_generic( device, imageInfo, imageInfo, sourcePos, destPos, regionSize, d );
+    if( retCode < 0 )
+      return retCode;
+    else
+      ret += retCode;
+
+    // Now try a sampling of different random regions
+    for( int i = 0; i < 8; i++ )
+    {
+      if( gTestMipmaps )
+      {
+        // Work at a random mip level
+        src_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        dst_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        src_width_lod = ( imageInfo->width >> src_lod )? ( imageInfo->width >> src_lod ) : 1;
+        dst_width_lod = ( imageInfo->width >> dst_lod )? ( imageInfo->width >> dst_lod ) : 1;
+        width_lod  = ( src_width_lod > dst_width_lod ) ? dst_width_lod : src_width_lod;
+        sourcePos[ 1 ] = src_lod;
+        destPos[ 1 ] = dst_lod;
+      }
+      // Pick a random size
+      regionSize[ 0 ] = ( width_lod > 8 ) ? (size_t)random_in_range( 8, (int)width_lod - 1, d ) : width_lod;
+
+      // Now pick positions within valid ranges
+      sourcePos[ 0 ] = ( width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+      destPos[ 0 ] = ( width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+
+
+      // Go for it!
+      retCode = test_copy_image_generic( device, imageInfo, imageInfo, sourcePos, destPos, regionSize, d );
+      if( retCode < 0 )
+        return retCode;
+      else
+        ret += retCode;
+    }
+
+      return ret;
+}
+
+int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed(gRandomSeed);
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+        memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            if( gDebugTrace )
+                log_info( "   at size %d\n", (int)imageInfo.width );
+
+            int ret = test_copy_image_size_1D( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            log_info( "Testing %d\n", (int)sizes[ idx ][ 0 ] );
+            if( gDebugTrace )
+                log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
+            if( test_copy_image_size_1D( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+
+        if (gTestMipmaps)
+        {
+          imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+          imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+          size = compute_mipmapped_image_size( imageInfo );
+          size = size*4;
+        }
+        else
+        {
+          imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+          if (gEnablePitch)
+          {
+            do {
+              rowPadding++;
+              imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+            } while ((imageInfo.rowPitch % pixelSize) != 0);
+          }
+
+          size = (size_t)imageInfo.rowPitch * 4;
+        }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+      {
+        log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+        if ( gTestMipmaps )
+          log_info( "   and %llu mip levels\n", (size_t) imageInfo.num_mip_levels );
+      }
+
+            int ret = test_copy_image_size_1D( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
new file mode 100644
index 00000000..3d3633ec
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -0,0 +1,248 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+int test_copy_image_size_1D_array( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    size_t sourcePos[ 3 ], destPos[ 3 ], regionSize[ 3 ];
+    int ret = 0, retCode;
+    size_t src_lod = 0, src_width_lod = imageInfo->width, src_row_pitch_lod;
+    size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
+    size_t width_lod = imageInfo->width;
+    size_t max_mip_level;
+
+    if( gTestMipmaps )
+    {
+        max_mip_level = imageInfo->num_mip_levels;
+        // Work at a random mip level
+        src_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        dst_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        src_width_lod = ( imageInfo->width >> src_lod )? ( imageInfo->width >> src_lod ) : 1;
+        dst_width_lod = ( imageInfo->width >> dst_lod )? ( imageInfo->width >> dst_lod ) : 1;
+        width_lod  = ( src_width_lod > dst_width_lod ) ? dst_width_lod : src_width_lod;
+        src_row_pitch_lod = src_width_lod * get_pixel_size( imageInfo->format );
+        dst_row_pitch_lod = dst_width_lod * get_pixel_size( imageInfo->format );
+    }
+
+    // First, try just a full covering region
+    sourcePos[ 0 ] = sourcePos[ 1 ] = sourcePos[ 2 ] = 0;
+    destPos[ 0 ] = destPos[ 1 ] = destPos[ 2 ] = 0;
+    regionSize[ 0 ] = imageInfo->width;
+    regionSize[ 1 ] = imageInfo->arraySize;
+    regionSize[ 2 ] = 1;
+
+    if(gTestMipmaps)
+    {
+        sourcePos[ 2 ] = src_lod;
+        destPos[ 2 ] = dst_lod;
+        regionSize[ 0 ] = width_lod;
+    }
+
+    retCode = test_copy_image_generic( device, imageInfo, imageInfo, sourcePos, destPos, regionSize, d );
+    if( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for( int i = 0; i < 8; i++ )
+    {
+        if( gTestMipmaps )
+        {
+            // Work at a random mip level
+            src_lod = (size_t) ( max_mip_level > 1 )? random_in_range( 0,  max_mip_level - 1 , d ) : 0;
+            dst_lod = (size_t) ( max_mip_level > 1 )? random_in_range( 0,  max_mip_level - 1 , d ) : 0;
+            src_width_lod = ( imageInfo->width >> src_lod )? ( imageInfo->width >> src_lod ) : 1;
+            dst_width_lod = ( imageInfo->width >> dst_lod )? ( imageInfo->width >> dst_lod ) : 1;
+            width_lod  = ( src_width_lod > dst_width_lod ) ? dst_width_lod : src_width_lod;
+            sourcePos[ 2 ] = src_lod;
+            destPos[ 2 ] = dst_lod;
+        }
+        // Pick a random size
+        regionSize[ 0 ] = ( width_lod > 8 ) ? (size_t)random_in_range( 8, (int)width_lod - 1, d ) : (int)width_lod;
+        regionSize[ 1 ] = ( imageInfo->arraySize > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->arraySize - 1, d ) : imageInfo->arraySize;
+
+        // Now pick positions within valid ranges
+        sourcePos[ 0 ] = ( width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+        sourcePos[ 1 ] = ( imageInfo->arraySize > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->arraySize - regionSize[ 1 ] - 1 ), d ) : 0;
+
+
+        destPos[ 0 ] = ( width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+        destPos[ 1 ] = ( imageInfo->arraySize > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->arraySize - regionSize[ 1 ] - 1 ), d ) : 0;
+
+
+        // Go for it!
+        retCode = test_copy_image_generic( device, imageInfo, imageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed(gRandomSeed);
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 1D array size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+        memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+
+                int ret = test_copy_image_size_1D_array( device, &imageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
+            if( test_copy_image_size_1D_array( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+        imageInfo.height = imageInfo.depth = 0;
+
+        if (gTestMipmaps)
+        {
+          imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+          imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+          imageInfo.slicePitch = imageInfo.rowPitch;
+          size = compute_mipmapped_image_size( imageInfo );
+          size = size*4;
+        }
+        else
+        {
+          imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+          if (gEnablePitch)
+          {
+            do {
+              rowPadding++;
+              imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+            } while ((imageInfo.rowPitch % pixelSize) != 0);
+          }
+
+          imageInfo.slicePitch = imageInfo.rowPitch;
+
+          size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+        }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+      if( gDebugTrace )
+        log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+      int ret = test_copy_image_size_1D_array( device, &imageInfo, seed );
+      if( ret )
+        return -1;
+    }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
new file mode 100644
index 00000000..3a9ed478
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -0,0 +1,248 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+int test_copy_image_size_2D( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    size_t sourcePos[ 3 ], destPos[ 3 ], regionSize[ 3 ];
+    int ret = 0, retCode;
+    size_t src_lod = 0, src_width_lod = imageInfo->width, src_row_pitch_lod;
+    size_t src_height_lod = imageInfo->height;
+    size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_row_pitch_lod;
+    size_t dst_height_lod = imageInfo->height;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
+    size_t max_mip_level;
+
+    if( gTestMipmaps )
+    {
+        max_mip_level = imageInfo->num_mip_levels;
+        // Work at a random mip level
+        src_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        dst_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        src_width_lod = ( imageInfo->width >> src_lod )? ( imageInfo->width >> src_lod ) : 1;
+        dst_width_lod = ( imageInfo->width >> dst_lod )? ( imageInfo->width >> dst_lod ) : 1;
+        src_height_lod = ( imageInfo->height >> src_lod )? ( imageInfo->height >> src_lod ) : 1;
+        dst_height_lod = ( imageInfo->height >> dst_lod )? ( imageInfo->height >> dst_lod ) : 1;
+        width_lod  = ( src_width_lod > dst_width_lod ) ? dst_width_lod : src_width_lod;
+        height_lod  = ( src_height_lod > dst_height_lod ) ? dst_height_lod : src_height_lod;
+        src_row_pitch_lod = src_width_lod * get_pixel_size( imageInfo->format );
+        dst_row_pitch_lod = dst_width_lod * get_pixel_size( imageInfo->format );
+    }
+
+    // First, try just a full covering region
+    sourcePos[ 0 ] = sourcePos[ 1 ] = sourcePos[ 2 ] = 0;
+    destPos[ 0 ] = destPos[ 1 ] = destPos[ 2 ] = 0;
+    regionSize[ 0 ] = imageInfo->width;
+    regionSize[ 1 ] = imageInfo->height;
+    regionSize[ 2 ] = 1;
+
+    if(gTestMipmaps)
+    {
+        sourcePos[ 2 ] = src_lod;
+        destPos[ 2 ] = dst_lod;
+        regionSize[ 0 ] = width_lod;
+        regionSize[ 1 ] = height_lod;
+    }
+
+    retCode = test_copy_image_generic( device, imageInfo, imageInfo, sourcePos, destPos, regionSize, d );
+    if( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for( int i = 0; i < 8; i++ )
+    {
+        if( gTestMipmaps )
+        {
+            // Work at a random mip level
+            src_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+            dst_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+            src_width_lod = ( imageInfo->width >> src_lod )? ( imageInfo->width >> src_lod ) : 1;
+            dst_width_lod = ( imageInfo->width >> dst_lod )? ( imageInfo->width >> dst_lod ) : 1;
+            src_height_lod = ( imageInfo->height >> src_lod )? ( imageInfo->height >> src_lod ) : 1;
+            dst_height_lod = ( imageInfo->height >> dst_lod )? ( imageInfo->height >> dst_lod ) : 1;
+            width_lod  = ( src_width_lod > dst_width_lod ) ? dst_width_lod : src_width_lod;
+            height_lod  = ( src_height_lod > dst_height_lod ) ? dst_height_lod : src_height_lod;
+            sourcePos[ 2 ] = src_lod;
+            destPos[ 2 ] = dst_lod;
+        }
+        // Pick a random size
+        regionSize[ 0 ] = ( width_lod > 8 ) ? (size_t)random_in_range( 8, (int)width_lod - 1, d ) : width_lod;
+        regionSize[ 1 ] = ( height_lod > 8 ) ? (size_t)random_in_range( 8, (int)height_lod - 1, d ) : height_lod;
+
+        // Now pick positions within valid ranges
+        sourcePos[ 0 ] = ( width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+        sourcePos[ 1 ] = ( height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+
+        destPos[ 0 ] = ( width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+        destPos[ 1 ] = ( height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+
+        // Go for it!
+        retCode = test_copy_image_generic( device, imageInfo, imageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed(gRandomSeed);
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+        memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+                int ret = test_copy_image_size_2D( device, &imageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            if( test_copy_image_size_2D( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+      size_t rowPadding = gEnablePitch ? 48 : 0;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+
+        if (gTestMipmaps)
+        {
+          imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+          imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+          size = compute_mipmapped_image_size( imageInfo );
+          size = size*4;
+        }
+        else
+        {
+          imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+          if (gEnablePitch)
+          {
+            do {
+              rowPadding++;
+              imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+            } while ((imageInfo.rowPitch % pixelSize) != 0);
+          }
+
+          size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+        }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            int ret = test_copy_image_size_2D( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
new file mode 100644
index 00000000..d7d63040
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -0,0 +1,419 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gTestRounding, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+
+static size_t random_in_ranges( size_t minimum, size_t rangeA, size_t rangeB, MTdata d )
+{
+    if( rangeB < rangeA )
+        rangeA = rangeB;
+    if( rangeA < minimum )
+        return rangeA;
+    return (size_t)random_in_range( (int)minimum, (int)rangeA - 1, d );
+}
+
+
+static void set_image_dimensions( image_descriptor *imageInfo, size_t width, size_t height, size_t arraySize, size_t rowPadding, size_t slicePadding )
+{
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+
+    imageInfo->width = width;
+    imageInfo->height = height;
+    imageInfo->arraySize = arraySize;
+    imageInfo->rowPitch = imageInfo->width * pixelSize + rowPadding;
+
+    if (gEnablePitch)
+    {
+        do {
+            rowPadding++;
+            imageInfo->rowPitch = imageInfo->width * pixelSize + rowPadding;
+        } while ((imageInfo->rowPitch % pixelSize) != 0);
+    }
+
+    if (arraySize == 0)
+    {
+        imageInfo->type = CL_MEM_OBJECT_IMAGE2D;
+        imageInfo->slicePitch = 0;
+    }
+    else
+    {
+        imageInfo->type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        imageInfo->slicePitch = imageInfo->rowPitch * (imageInfo->height + slicePadding);
+    }
+}
+
+
+int test_copy_image_size_2D_2D_array( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, MTdata d )
+{
+    size_t sourcePos[ 4 ] = { 0 }, destPos[ 4 ] = { 0 }, regionSize[ 3 ];
+    int ret = 0, retCode;
+
+    image_descriptor *threeImage, *twoImage;
+
+    if( srcImageInfo->arraySize > 0 )
+    {
+        threeImage = srcImageInfo;
+        twoImage = dstImageInfo;
+    }
+    else
+    {
+        threeImage = dstImageInfo;
+        twoImage = srcImageInfo;
+    }
+
+    size_t twoImage_lod = 0, twoImage_width_lod = twoImage->width, twoImage_row_pitch_lod;
+    size_t twoImage_height_lod = twoImage->height;
+    size_t threeImage_lod = 0, threeImage_width_lod = threeImage->width, threeImage_row_pitch_lod, threeImage_slice_pitch_lod;
+    size_t threeImage_height_lod = threeImage->height;
+    size_t width_lod, height_lod;
+    size_t twoImage_max_mip_level,threeImage_max_mip_level;
+
+    if( gTestMipmaps )
+    {
+        twoImage_max_mip_level = twoImage->num_mip_levels;
+        threeImage_max_mip_level = threeImage->num_mip_levels;
+        // Work at random mip levels
+        twoImage_lod = (size_t)random_in_range( 0, twoImage_max_mip_level ? twoImage_max_mip_level - 1 : 0, d );
+        threeImage_lod = (size_t)random_in_range( 0, threeImage_max_mip_level ? threeImage_max_mip_level - 1 : 0, d );
+        twoImage_width_lod = ( twoImage->width >> twoImage_lod )? ( twoImage->width >> twoImage_lod ) : 1;
+        threeImage_width_lod = ( threeImage->width >> threeImage_lod )? ( threeImage->width >> threeImage_lod ) : 1;
+        twoImage_height_lod = ( twoImage->height >> twoImage_lod )? ( twoImage->height >> twoImage_lod ) : 1;
+        threeImage_height_lod = ( threeImage->height >> threeImage_lod )? ( threeImage->height >> threeImage_lod ) : 1;
+        twoImage_row_pitch_lod = twoImage_width_lod * get_pixel_size( twoImage->format );
+        threeImage_row_pitch_lod = threeImage_width_lod * get_pixel_size( threeImage->format );
+        threeImage_slice_pitch_lod = threeImage_height_lod * threeImage_row_pitch_lod;
+    }
+    width_lod  = ( twoImage_width_lod > threeImage_width_lod ) ? threeImage_width_lod : twoImage_width_lod;
+    height_lod  = ( twoImage_height_lod > threeImage_height_lod ) ? threeImage_height_lod : twoImage_height_lod;
+
+    // First, try just a full covering region
+    sourcePos[ 0 ] = sourcePos[ 1 ] = sourcePos[ 2 ] = sourcePos[ 3 ] = 0;
+    destPos[ 0 ] = destPos[ 1 ] = destPos[ 2 ] = destPos[ 3 ] = 0;
+    regionSize[ 0 ] = width_lod;
+    regionSize[ 1 ] = height_lod;
+    regionSize[ 2 ] = 1;
+
+    if( srcImageInfo->arraySize == 0 )
+    {
+        // 2D to 2D array
+        destPos[ 2 ] = (size_t)random_in_range( 0, (int)dstImageInfo->arraySize - 1, d );
+        if(gTestMipmaps)
+        {
+            sourcePos[ 2 ] = twoImage_lod;
+            destPos[ 3 ] = threeImage_lod;
+            regionSize[ 0 ] = width_lod;
+            regionSize[ 1 ] = height_lod;
+        }
+    }
+    else
+    {
+        // 2D array to 2D
+        sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)srcImageInfo->arraySize - 1, d );
+        if(gTestMipmaps)
+        {
+            sourcePos[ 3 ] = threeImage_lod;
+            destPos[ 2 ] = twoImage_lod;
+            regionSize[ 0 ] = width_lod;
+            regionSize[ 1 ] = height_lod;
+        }
+    }
+
+    retCode = test_copy_image_generic( device, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+    if( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for( int i = 0; i < 8; i++ )
+    {
+        if( gTestMipmaps )
+        {
+            // Work at a random mip level
+            twoImage_lod = (size_t)random_in_range( 0, twoImage_max_mip_level ? twoImage_max_mip_level - 1 : 0, d );
+            threeImage_lod = (size_t)random_in_range( 0, threeImage_max_mip_level ? threeImage_max_mip_level - 1 : 0, d );
+            twoImage_width_lod = ( twoImage->width >> twoImage_lod )? ( twoImage->width >> twoImage_lod ) : 1;
+            threeImage_width_lod = ( threeImage->width >> threeImage_lod )? ( threeImage->width >> threeImage_lod ) : 1;
+            twoImage_height_lod = ( twoImage->height >> twoImage_lod )? ( twoImage->height >> twoImage_lod ) : 1;
+            threeImage_height_lod = ( threeImage->height >> threeImage_lod )? ( threeImage->height >> threeImage_lod ) : 1;
+            width_lod  = ( twoImage_width_lod > threeImage_width_lod ) ? threeImage_width_lod : twoImage_width_lod;
+            height_lod  = ( twoImage_height_lod > threeImage_height_lod ) ? threeImage_height_lod : twoImage_height_lod;
+        }
+        // Pick a random size
+        regionSize[ 0 ] = random_in_ranges( 8, srcImageInfo->width, dstImageInfo->width, d );
+        regionSize[ 1 ] = random_in_ranges( 8, srcImageInfo->height, dstImageInfo->height, d );
+        if( gTestMipmaps )
+        {
+            regionSize[ 0 ] = ( width_lod > 8 ) ? random_in_range( 8, width_lod, d ) : width_lod;
+            regionSize[ 1 ] = ( height_lod > 8) ? random_in_range( 8, height_lod, d ): height_lod;
+        }
+
+        // Now pick positions within valid ranges
+        sourcePos[ 0 ] = ( srcImageInfo->width > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->width - regionSize[ 0 ] - 1 ), d ) : 0;
+        sourcePos[ 1 ] = ( srcImageInfo->height > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->height - regionSize[ 1 ] - 1 ), d ) : 0;
+        sourcePos[ 2 ] = ( srcImageInfo->arraySize > 0 ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->arraySize - 1 ), d ) : gTestMipmaps ? twoImage_lod : 0;
+        if ( gTestMipmaps )
+            if( srcImageInfo->arraySize > 0 )
+            {
+                sourcePos[ 0 ] = ( threeImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                sourcePos[ 1 ] = ( threeImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                sourcePos[ 3 ] = threeImage_lod;
+            }
+            else
+            {
+                sourcePos[ 0 ] = ( twoImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                sourcePos[ 1 ] = ( twoImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+
+            }
+
+        destPos[ 0 ] = ( dstImageInfo->width > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->width - regionSize[ 0 ] - 1 ), d ) : 0;
+        destPos[ 1 ] = ( dstImageInfo->height > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->height - regionSize[ 1 ] - 1 ), d ) : 0;
+        destPos[ 2 ] = ( dstImageInfo->arraySize > 0 ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->arraySize - 1 ), d ) : gTestMipmaps ? twoImage_lod : 0;
+        if ( gTestMipmaps )
+            if( dstImageInfo->arraySize > 0 )
+            {
+                destPos[ 0 ] = ( threeImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                destPos[ 1 ] = ( threeImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                destPos[ 3 ] = threeImage_lod;
+            }
+            else
+            {
+                destPos[ 0 ] = ( twoImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                destPos[ 1 ] = ( twoImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+
+            }
+
+        // Go for it!
+        retCode = test_copy_image_generic( device, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *format, bool reverse = false )
+{
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor srcImageInfo = { 0 };
+    image_descriptor dstImageInfo = { 0 };
+    RandomSeed  seed( gRandomSeed );
+
+    srcImageInfo.format = dstImageInfo.format = format;
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
+        {
+            for( dstImageInfo.height = 4; dstImageInfo.height < 13; dstImageInfo.height++ )
+            {
+                for( dstImageInfo.arraySize = 4; dstImageInfo.arraySize < 9; dstImageInfo.arraySize++ )
+                {
+                    size_t rowPadding = gEnablePitch ? 256 : 0;
+                    size_t slicePadding = gEnablePitch ? 3 : 0;
+
+                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.arraySize, rowPadding, slicePadding );
+                    set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
+
+                    if (gTestMipmaps)
+                    {
+                      srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, 0), seed);
+                      srcImageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+                      dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, 0), seed);
+                      dstImageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+                      srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+                      srcImageInfo.slicePitch = 0;
+                      dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+                      dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+                    }
+
+                    if( gDebugTrace )
+                    {
+                        if (reverse)
+                            log_info( "   at size %d,%d,%d to %d,%d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height );
+                        else
+                            log_info( "   at size %d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+                    }
+                    int ret;
+                    if( reverse )
+                        ret = test_copy_image_size_2D_2D_array( device, &dstImageInfo, &srcImageInfo, seed );
+                    else
+                        ret = test_copy_image_size_2D_2D_array( device, &srcImageInfo, &dstImageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numberOfSizes2DArray, numberOfSizes2D;
+        size_t sizes2DArray[100][3], sizes2D[100][3];
+
+        // Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
+        get_max_sizes(&numberOfSizes2DArray, 100, sizes2DArray, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D_ARRAY, dstImageInfo.format);
+        get_max_sizes(&numberOfSizes2D, 100, sizes2D, maxWidth, maxHeight, 1, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D, dstImageInfo.format);
+
+        for( size_t i = 0; i < numberOfSizes2D; i++ )
+        {
+          for( size_t j = 0; j < numberOfSizes2DArray; j++ )
+          {
+            size_t rowPadding = gEnablePitch ? 256 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            set_image_dimensions( &dstImageInfo, sizes2DArray[ j ][ 0 ], sizes2DArray[ j ][ 1 ], sizes2DArray[ j ][ 2 ], rowPadding, slicePadding );
+            set_image_dimensions( &srcImageInfo, sizes2D[ i ][ 0 ], sizes2D[ i ][ 1 ], 0, rowPadding, slicePadding );
+
+            cl_ulong dstSize = get_image_size(&dstImageInfo);
+            cl_ulong srcSize = get_image_size(&srcImageInfo);
+
+            if (gTestMipmaps)
+            {
+              srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, 0), seed);
+              srcImageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+              dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, 0), seed);
+              dstImageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+              srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+              srcImageInfo.slicePitch = 0;
+              dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+              dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+              dstSize = 4 * compute_mipmapped_image_size( dstImageInfo );
+              srcSize = 4 * compute_mipmapped_image_size( srcImageInfo );
+            }
+
+            if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) && srcSize < maxAllocSize && srcSize < ( memSize / 3 ) )
+            {
+              if (reverse)
+                log_info( "Testing %d x %d x %d to %d x %d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height );
+              else
+                log_info( "Testing %d x %d to %d x %d x %d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+
+              if( gDebugTrace )
+              {
+                if (reverse)
+                  log_info( "   at max size %d,%d,%d to %d,%d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height );
+                else
+                  log_info( "   at max size %d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+              }
+              int ret;
+              if( reverse )
+                ret = test_copy_image_size_2D_2D_array( device, &dstImageInfo, &srcImageInfo, seed );
+              else
+                ret = test_copy_image_size_2D_2D_array( device, &srcImageInfo, &dstImageInfo, seed );
+              if( ret )
+                return -1;
+            }
+            else
+            {
+              if (reverse)
+                log_info("Not testing max size %d x %d x %d to %d x %d due to memory constraints.\n",
+                (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height);
+              else
+                log_info("Not testing max size %d x %d to %d x %d x %d due to memory constraints.\n",
+                (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize);
+            }
+
+          }
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong srcSize, dstSize;
+            size_t rowPadding = gEnablePitch ? 256 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                dstImageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                dstImageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                dstImageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+                srcImageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                srcImageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+
+                if (gTestMipmaps)
+                {
+                    srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, 0), seed);
+                    srcImageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+                    dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, 0), seed);
+                    dstImageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+                    srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+                    srcImageInfo.slicePitch = 0;
+                    dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+                    dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+                    srcSize = 4 * compute_mipmapped_image_size( srcImageInfo );
+                    dstSize = 4 * compute_mipmapped_image_size( dstImageInfo );
+                }
+                else
+                {
+                    set_image_dimensions( &srcImageInfo, srcImageInfo.width, srcImageInfo.height, 0, rowPadding, slicePadding );
+                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.arraySize, rowPadding, slicePadding );
+
+                    srcSize = (cl_ulong)srcImageInfo.rowPitch * (cl_ulong)srcImageInfo.height * 4;
+                    dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.arraySize * 4;
+                }
+            } while( srcSize > maxAllocSize || ( srcSize * 3 ) > memSize || dstSize > maxAllocSize || ( dstSize * 3 ) > memSize);
+
+            if( gDebugTrace )
+            {
+                if (reverse)
+                    log_info( "   at size %d,%d,%d to %d,%d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height );
+                else
+                    log_info( "   at size %d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+            }
+            int ret;
+            if( reverse )
+                ret = test_copy_image_size_2D_2D_array( device, &dstImageInfo, &srcImageInfo, seed );
+            else
+                ret = test_copy_image_size_2D_2D_array( device, &srcImageInfo, &dstImageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
new file mode 100644
index 00000000..8873e48b
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -0,0 +1,401 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gTestRounding, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+
+static size_t random_in_ranges( size_t minimum, size_t rangeA, size_t rangeB, MTdata d )
+{
+    if( rangeB < rangeA )
+        rangeA = rangeB;
+    if( rangeA < minimum )
+        return rangeA;
+    return (size_t)random_in_range( (int)minimum, (int)rangeA - 1, d );
+}
+
+
+static void set_image_dimensions( image_descriptor *imageInfo, size_t width, size_t height, size_t depth, size_t rowPadding, size_t slicePadding )
+{
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+
+    imageInfo->width = width;
+    imageInfo->height = height;
+    imageInfo->depth = depth;
+    imageInfo->rowPitch = imageInfo->width * pixelSize + rowPadding;
+
+    if (gEnablePitch)
+    {
+        do {
+            rowPadding++;
+            imageInfo->rowPitch = imageInfo->width * pixelSize + rowPadding;
+        } while ((imageInfo->rowPitch % pixelSize) != 0);
+    }
+
+    imageInfo->slicePitch = imageInfo->rowPitch * (imageInfo->height + slicePadding);
+
+    if (depth == 0)
+        imageInfo->type = CL_MEM_OBJECT_IMAGE2D;
+    else
+        imageInfo->type = CL_MEM_OBJECT_IMAGE3D;
+}
+
+
+int test_copy_image_size_2D_3D( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, MTdata d )
+{
+    size_t sourcePos[ 4 ] = { 0 }, destPos[ 4 ] = { 0 }, regionSize[ 3 ];
+    int ret = 0, retCode;
+
+    image_descriptor *threeImage, *twoImage;
+
+    if( srcImageInfo->depth > 0 )
+    {
+        threeImage = srcImageInfo;
+        twoImage = dstImageInfo;
+    }
+    else
+    {
+        threeImage = dstImageInfo;
+        twoImage = srcImageInfo;
+    }
+
+    size_t twoImage_lod = 0, twoImage_width_lod = twoImage->width, twoImage_row_pitch_lod;
+    size_t twoImage_height_lod = twoImage->height;
+    size_t threeImage_lod = 0, threeImage_width_lod = threeImage->width, threeImage_row_pitch_lod, threeImage_slice_pitch_lod;
+    size_t threeImage_height_lod = threeImage->height, depth_lod = threeImage->depth;
+    size_t width_lod, height_lod;
+    size_t twoImage_max_mip_level,threeImage_max_mip_level;
+
+    if( gTestMipmaps )
+    {
+        twoImage_max_mip_level = twoImage->num_mip_levels;
+        threeImage_max_mip_level = threeImage->num_mip_levels;
+        // Work at random mip levels
+        twoImage_lod = (size_t)random_in_range( 0, twoImage_max_mip_level ? twoImage_max_mip_level - 1 : 0, d );
+        threeImage_lod = (size_t)random_in_range( 0, threeImage_max_mip_level ? threeImage_max_mip_level - 1 : 0, d );
+        twoImage_width_lod = ( twoImage->width >> twoImage_lod )? ( twoImage->width >> twoImage_lod ) : 1;
+        threeImage_width_lod = ( threeImage->width >> threeImage_lod )? ( threeImage->width >> threeImage_lod ) : 1;
+        twoImage_height_lod = ( twoImage->height >> twoImage_lod )? ( twoImage->height >> twoImage_lod ) : 1;
+        threeImage_height_lod = ( threeImage->height >> threeImage_lod )? ( threeImage->height >> threeImage_lod ) : 1;
+        depth_lod = ( threeImage->depth >> threeImage_lod )? ( threeImage->depth >> threeImage_lod ) : 1;
+        twoImage_row_pitch_lod = twoImage_width_lod * get_pixel_size( twoImage->format );
+        threeImage_row_pitch_lod = threeImage_width_lod * get_pixel_size( threeImage->format );
+        threeImage_slice_pitch_lod = threeImage_height_lod * threeImage_row_pitch_lod;
+    }
+    width_lod  = ( twoImage_width_lod > threeImage_width_lod ) ? threeImage_width_lod : twoImage_width_lod;
+    height_lod  = ( twoImage_height_lod > threeImage_height_lod ) ? threeImage_height_lod : twoImage_height_lod;
+
+    // First, try just a full covering region
+    sourcePos[ 0 ] = sourcePos[ 1 ] = sourcePos[ 2 ] = sourcePos[ 3 ] = 0;
+    destPos[ 0 ] = destPos[ 1 ] = destPos[ 2 ] = destPos[ 3 ] = 0;
+    regionSize[ 0 ] = width_lod;
+    regionSize[ 1 ] = height_lod;
+    regionSize[ 2 ] = 1;
+
+    if( srcImageInfo->depth == 0 )
+    {
+        // 2D to 3D
+        destPos[ 2 ] = (size_t)random_in_range( 0, (int)dstImageInfo->depth - 1, d );
+        if(gTestMipmaps)
+        {
+            destPos[ 2 ] = (size_t)random_in_range( 0, (int)depth_lod - 1, d );
+            sourcePos[ 2 ] = twoImage_lod;
+            destPos[ 3 ] = threeImage_lod;
+            regionSize[ 0 ] = width_lod;
+            regionSize[ 1 ] = height_lod;
+        }
+    }
+    else
+    {
+        // 3D to 2D
+        sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)srcImageInfo->depth - 1, d );
+        if(gTestMipmaps)
+        {
+            sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)depth_lod - 1, d );
+            sourcePos[ 3 ] = threeImage_lod;
+            destPos[ 2 ] = twoImage_lod;
+            regionSize[ 0 ] = width_lod;
+            regionSize[ 1 ] = height_lod;
+        }
+    }
+
+    retCode = test_copy_image_generic( device, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+    if( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for( int i = 0; i < 8; i++ )
+    {
+        if( gTestMipmaps )
+        {
+            // Work at a random mip level
+            twoImage_lod = (size_t)random_in_range( 0, twoImage_max_mip_level ? twoImage_max_mip_level - 1 : 0, d );
+            threeImage_lod = (size_t)random_in_range( 0, threeImage_max_mip_level ? threeImage_max_mip_level - 1 : 0, d );
+            twoImage_width_lod = ( twoImage->width >> twoImage_lod )? ( twoImage->width >> twoImage_lod ) : 1;
+            threeImage_width_lod = ( threeImage->width >> threeImage_lod )? ( threeImage->width >> threeImage_lod ) : 1;
+            twoImage_height_lod = ( twoImage->height >> twoImage_lod )? ( twoImage->height >> twoImage_lod ) : 1;
+            threeImage_height_lod = ( threeImage->height >> threeImage_lod )? ( threeImage->height >> threeImage_lod ) : 1;
+            width_lod  = ( twoImage_width_lod > threeImage_width_lod ) ? threeImage_width_lod : twoImage_width_lod;
+            height_lod  = ( twoImage_height_lod > threeImage_height_lod ) ? threeImage_height_lod : twoImage_height_lod;
+            depth_lod = ( threeImage->depth >> threeImage_lod )? ( threeImage->depth >> threeImage_lod ) : 1;
+        }
+        // Pick a random size
+        regionSize[ 0 ] = random_in_ranges( 8, srcImageInfo->width, dstImageInfo->width, d );
+        regionSize[ 1 ] = random_in_ranges( 8, srcImageInfo->height, dstImageInfo->height, d );
+        if( gTestMipmaps )
+        {
+            regionSize[ 0 ] = ( width_lod > 8 ) ? random_in_range( 8, width_lod, d ) : width_lod;
+            regionSize[ 1 ] = ( height_lod > 8) ? random_in_range( 8, height_lod, d ): height_lod;
+        }
+
+        // Now pick positions within valid ranges
+        sourcePos[ 0 ] = ( srcImageInfo->width > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->width - regionSize[ 0 ] - 1 ), d ) : 0;
+        sourcePos[ 1 ] = ( srcImageInfo->height > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->height - regionSize[ 1 ] - 1 ), d ) : 0;
+        sourcePos[ 2 ] = ( srcImageInfo->depth > 0 ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->depth - 1 ), d ) : 0;
+
+        if ( gTestMipmaps )
+            if( srcImageInfo->depth > 0 )
+            {
+                sourcePos[ 0 ] = ( threeImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                sourcePos[ 1 ] = ( threeImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)( depth_lod - 1 ), d );
+                sourcePos[ 3 ] = threeImage_lod;
+            }
+            else
+            {
+                sourcePos[ 0 ] = ( twoImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                sourcePos[ 1 ] = ( twoImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+
+            }
+
+        destPos[ 0 ] = ( dstImageInfo->width > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->width - regionSize[ 0 ] - 1 ), d ) : 0;
+        destPos[ 1 ] = ( dstImageInfo->height > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->height - regionSize[ 1 ] - 1 ), d ) : 0;
+        destPos[ 2 ] = ( dstImageInfo->depth > 0 ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->depth - 1 ), d ) : 0;
+
+        if ( gTestMipmaps )
+            if( dstImageInfo->depth > 0 )
+            {
+                destPos[ 0 ] = ( threeImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                destPos[ 1 ] = ( threeImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                destPos[ 2 ] = (size_t)random_in_range( 0, (int)( depth_lod - 1 ), d );
+                destPos[ 3 ] = threeImage_lod;
+            }
+            else
+            {
+                destPos[ 0 ] = ( twoImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                destPos[ 1 ] = ( twoImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+
+            }
+
+        // Go for it!
+        retCode = test_copy_image_generic( device, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, bool reverse = false )
+{
+    size_t maxWidth, maxHeight, max3DWidth, max3DHeight, max3DDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor srcImageInfo = { 0 };
+    image_descriptor dstImageInfo = { 0 };
+    RandomSeed  seed( gRandomSeed );
+
+    srcImageInfo.format = dstImageInfo.format = format;
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( max3DWidth ), &max3DWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( max3DHeight ), &max3DHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( max3DDepth ), &max3DDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D or 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
+        {
+            for( dstImageInfo.height = 4; dstImageInfo.height < 13; dstImageInfo.height++ )
+            {
+                for( dstImageInfo.depth = 4; dstImageInfo.depth < 9; dstImageInfo.depth++ )
+                {
+                    size_t rowPadding = gEnablePitch ? 256 : 0;
+                    size_t slicePadding = gEnablePitch ? 3 : 0;
+
+                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth, rowPadding, slicePadding );
+                    set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
+
+                    if (gTestMipmaps)
+                    {
+                      srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, 0), seed);
+                      srcImageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+                      dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth), seed);
+                      dstImageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+                      srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+                      srcImageInfo.slicePitch = 0;
+                      dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+                      dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+                    }
+
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth );
+
+                    int ret;
+                    if( reverse )
+                        ret = test_copy_image_size_2D_3D( device, &dstImageInfo, &srcImageInfo, seed );
+                    else
+                        ret = test_copy_image_size_2D_3D( device, &srcImageInfo, &dstImageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numberOfSizes3D, numberOfSizes2D;
+        size_t sizes3D[100][3], sizes2D[100][3];
+
+        // Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
+        get_max_sizes(&numberOfSizes3D, 100, sizes3D, max3DWidth, max3DHeight, max3DDepth, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE3D, dstImageInfo.format);
+        get_max_sizes(&numberOfSizes2D, 100, sizes2D, maxWidth, maxHeight, 1, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D, srcImageInfo.format);
+
+        for( size_t i = 0; i < numberOfSizes2D; i++ )
+        for( size_t j = 0; j < numberOfSizes3D; j++ )
+        {
+            size_t rowPadding = gEnablePitch ? 256 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            set_image_dimensions( &dstImageInfo, sizes3D[ j ][ 0 ], sizes3D[ j ][ 1 ], sizes3D[ j ][ 2 ], rowPadding, slicePadding );
+            set_image_dimensions( &srcImageInfo, sizes2D[ i ][ 0 ], sizes2D[ i ][ 1 ], 0, rowPadding, slicePadding );
+            cl_ulong dstSize = get_image_size(&dstImageInfo);
+            cl_ulong srcSize = get_image_size(&srcImageInfo);
+
+            if (gTestMipmaps)
+            {
+              srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, 0), seed);
+              srcImageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+              dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth), seed);
+              dstImageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+              srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+              srcImageInfo.slicePitch = 0;
+              dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+              dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+              dstSize = 4 * compute_mipmapped_image_size( dstImageInfo );
+              srcSize = 4 * compute_mipmapped_image_size( srcImageInfo );
+            }
+
+            if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) && srcSize < maxAllocSize && srcSize < ( memSize / 3 ) )
+            {
+                log_info( "Testing %d x %d to %d x %d x %d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth );
+                if( gDebugTrace )
+                    log_info( "   at max size %d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth );
+                int ret;
+                if( reverse )
+                    ret = test_copy_image_size_2D_3D( device, &dstImageInfo, &srcImageInfo, seed );
+                else
+                    ret = test_copy_image_size_2D_3D( device, &srcImageInfo, &dstImageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+            else
+            {
+                log_info("Not testing max size %d x %d to %d x %d x %d due to memory constraints.\n",
+                         (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth);
+            }
+
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong srcSize, dstSize;
+            size_t rowPadding = gEnablePitch ? 256 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                dstImageInfo.width = (size_t)random_log_in_range( 16, (int)max3DWidth / 32, seed );
+                dstImageInfo.height = (size_t)random_log_in_range( 16, (int)max3DHeight / 32, seed );
+                dstImageInfo.depth = (size_t)random_log_in_range( 16, (int)max3DDepth / 32, seed );
+                srcImageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                srcImageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+
+                if (gTestMipmaps)
+                {
+                    srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, 0), seed);
+                    srcImageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+                    dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth), seed);
+                    dstImageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+                    srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+                    srcImageInfo.slicePitch = 0;
+                    dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+                    dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+                    srcSize = 4 * compute_mipmapped_image_size( srcImageInfo );
+                    dstSize = 4 * compute_mipmapped_image_size( dstImageInfo );
+                }
+                else
+                {
+                    set_image_dimensions( &srcImageInfo, srcImageInfo.width, srcImageInfo.height, 0, rowPadding, slicePadding );
+                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth, rowPadding, slicePadding );
+
+                    srcSize = (cl_ulong)srcImageInfo.rowPitch * (cl_ulong)srcImageInfo.height * 4;
+                    dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.depth * 4;
+                }
+            } while( srcSize > maxAllocSize || ( srcSize * 3 ) > memSize || dstSize > maxAllocSize || ( dstSize * 3 ) > memSize);
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth );
+            int ret;
+            if( reverse )
+                ret = test_copy_image_size_2D_3D( device, &dstImageInfo, &srcImageInfo, seed );
+            else
+                ret = test_copy_image_size_2D_3D( device, &srcImageInfo, &dstImageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
new file mode 100644
index 00000000..0175a8f7
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
@@ -0,0 +1,205 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern cl_command_queue queue;
+extern cl_context context;
+
+// Defined in test_copy_generic.cpp
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+int test_copy_image_2D_array( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    size_t srcPos[] = { 0, 0, 0, 0}, dstPos[] = {0, 0, 0, 0};
+    size_t region[] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
+
+    size_t src_lod = 0, src_width_lod = imageInfo->width, src_height_lod = imageInfo->height;
+    size_t dst_lod = 0, dst_width_lod = imageInfo->width, dst_height_lod = imageInfo->height;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
+    size_t max_mip_level;
+
+    if( gTestMipmaps )
+    {
+        max_mip_level = imageInfo->num_mip_levels;
+        // Work at a random mip level
+        src_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        dst_lod = (size_t)random_in_range( 0, max_mip_level ? max_mip_level - 1 : 0, d );
+        src_width_lod = ( imageInfo->width >> src_lod )? ( imageInfo->width >> src_lod ) : 1;
+        dst_width_lod = ( imageInfo->width >> dst_lod )? ( imageInfo->width >> dst_lod ) : 1;
+        src_height_lod = ( imageInfo->height >> src_lod )? ( imageInfo->height >> src_lod ) : 1;
+        dst_height_lod = ( imageInfo->height >> dst_lod )? ( imageInfo->height >> dst_lod ) : 1;
+        width_lod  = ( src_width_lod > dst_width_lod ) ? dst_width_lod : src_width_lod;
+        height_lod  = ( src_height_lod > dst_height_lod ) ? dst_height_lod : src_height_lod;
+
+        region[ 0 ] = width_lod;
+        region[ 1 ] = height_lod;
+        srcPos[ 3 ] = src_lod;
+        dstPos[ 3 ] = dst_lod;
+}
+    return test_copy_image_generic( device, imageInfo, imageInfo, srcPos, dstPos, region, d );
+}
+
+int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D array size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            size_t rowPadding = gEnablePitch ? 80 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+              imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int ret = test_copy_image_2D_array( device, &imageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, imageInfo.type, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            size_t rowPadding = gEnablePitch ? 80 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+              imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( test_copy_image_2D_array( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            size_t rowPadding = gEnablePitch ? 80 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                if (gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                    size = compute_mipmapped_image_size( imageInfo );
+                    size = size*4;
+                }
+                else
+                {
+                  imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                  if (gEnablePitch)
+                  {
+                    do {
+                      rowPadding++;
+                      imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                    } while ((imageInfo.rowPitch % pixelSize) != 0);
+                  }
+
+                  imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+
+                  size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+            int ret = test_copy_image_2D_array( device, &imageInfo,seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp
new file mode 100644
index 00000000..d479589f
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp
@@ -0,0 +1,192 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern cl_command_queue queue;
+extern cl_context context;
+
+// Defined in test_copy_generic.cpp
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+int test_copy_image_3D( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    size_t origin[] = { 0, 0, 0, 0};
+    size_t region[] = { imageInfo->width, imageInfo->height, imageInfo->depth };
+
+    if( gTestMipmaps )
+    {
+        size_t lod = (imageInfo->num_mip_levels > 1 )? (size_t)random_in_range( 0, imageInfo->num_mip_levels - 1, d ) : 0 ;
+        origin[ 3 ] = lod;
+        region[ 0 ] = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
+        region[ 1 ] = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
+        region[ 2 ] = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
+    }
+
+    return test_copy_image_generic( device, imageInfo, imageInfo, origin, origin, region, d );
+}
+
+int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            size_t rowPadding = gEnablePitch ? 80 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+              imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+                for( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int ret = test_copy_image_3D( device, &imageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, imageInfo.type, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            size_t rowPadding = gEnablePitch ? 80 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gTestMipmaps)
+              imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
+
+            if (gEnablePitch)
+            {
+                do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( test_copy_image_3D( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            size_t rowPadding = gEnablePitch ? 80 : 0;
+            size_t slicePadding = gEnablePitch ? 3 : 0;
+
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32,seed );
+
+                if (gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                    size = compute_mipmapped_image_size( imageInfo );
+                    size = size*4;
+                }
+                else
+                {
+                  imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+                  if (gEnablePitch)
+                  {
+                    do {
+                      rowPadding++;
+                      imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                    } while ((imageInfo.rowPitch % pixelSize) != 0);
+                  }
+
+                  imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+
+                  size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+            int ret = test_copy_image_3D( device, &imageInfo,seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
new file mode 100644
index 00000000..b0b02932
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
@@ -0,0 +1,442 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gTestRounding, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+extern int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                                   const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d );
+
+
+static size_t random_in_ranges( size_t minimum, size_t rangeA, size_t rangeB, MTdata d )
+{
+    if( rangeB < rangeA )
+        rangeA = rangeB;
+    if( rangeA < minimum )
+        return rangeA;
+    return (size_t)random_in_range( (int)minimum, (int)rangeA - 1, d );
+}
+
+
+static void set_image_dimensions( image_descriptor *imageInfo, size_t width, size_t height, size_t depth, size_t arraySize, size_t rowPadding, size_t slicePadding )
+{
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+
+    imageInfo->width = width;
+    imageInfo->height = height;
+    imageInfo->depth = depth;
+    imageInfo->arraySize = arraySize;
+    imageInfo->rowPitch = imageInfo->width * pixelSize + rowPadding;
+
+    if (gEnablePitch)
+    {
+        do {
+            rowPadding++;
+            imageInfo->rowPitch = imageInfo->width * pixelSize + rowPadding;
+        } while ((imageInfo->rowPitch % pixelSize) != 0);
+    }
+
+    imageInfo->slicePitch = imageInfo->rowPitch * (imageInfo->height + slicePadding);
+
+    if (arraySize == 0)
+        imageInfo->type = CL_MEM_OBJECT_IMAGE3D;
+    else
+        imageInfo->type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+}
+
+
+int test_copy_image_size_3D_2D_array( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, MTdata d )
+{
+    size_t sourcePos[ 4 ], destPos[ 4 ], regionSize[ 3 ];
+    int ret = 0, retCode;
+
+    image_descriptor *threeImage, *twoImage;
+
+    if( srcImageInfo->arraySize == 0 )
+    {
+        threeImage = srcImageInfo;
+        twoImage = dstImageInfo;
+    }
+    else
+    {
+        threeImage = dstImageInfo;
+        twoImage = srcImageInfo;
+    }
+
+    size_t twoImage_width_lod = twoImage->width, twoImage_height_lod = twoImage->height;
+    size_t threeImage_width_lod = threeImage->width, threeImage_height_lod = threeImage->height;
+    size_t twoImage_lod = 0, threeImage_lod = 0;
+    size_t width_lod = 0, height_lod = 0, depth_lod = 0;
+    size_t twoImage_max_mip_level,threeImage_max_mip_level;
+
+    if( gTestMipmaps )
+    {
+        twoImage_max_mip_level = twoImage->num_mip_levels;
+        threeImage_max_mip_level = threeImage->num_mip_levels;
+        // Work at random mip levels
+        twoImage_lod = (size_t)random_in_range( 0, twoImage_max_mip_level ? twoImage_max_mip_level - 1 : 0, d );
+        threeImage_lod = (size_t)random_in_range( 0, threeImage_max_mip_level ? threeImage_max_mip_level - 1 : 0, d );
+        twoImage_width_lod = ( twoImage->width >> twoImage_lod )? ( twoImage->width >> twoImage_lod ) : 1;
+        threeImage_width_lod = ( threeImage->width >> threeImage_lod )? ( threeImage->width >> threeImage_lod ) : 1;
+        twoImage_height_lod = ( twoImage->height >> twoImage_lod )? ( twoImage->height >> twoImage_lod ) : 1;
+        threeImage_height_lod = ( threeImage->height >> threeImage_lod )? ( threeImage->height >> threeImage_lod ) : 1;
+        depth_lod = ( threeImage->depth >> threeImage_lod )? ( threeImage->depth >> threeImage_lod ) : 1;
+    }
+    width_lod  = ( twoImage_width_lod > threeImage_width_lod ) ? threeImage_width_lod : twoImage_width_lod;
+    height_lod  = ( twoImage_height_lod > threeImage_height_lod ) ? threeImage_height_lod : twoImage_height_lod;
+    depth_lod = ( depth_lod > twoImage->arraySize ) ? twoImage->arraySize : depth_lod;
+
+    // First, try just a full covering region
+    sourcePos[ 0 ] = sourcePos[ 1 ] = sourcePos[ 2 ] = 0;
+    destPos[ 0 ] = destPos[ 1 ] = destPos[ 2 ] = 0;
+    regionSize[ 0 ] = ( threeImage->width < twoImage->width ) ? threeImage->width : twoImage->width;
+    regionSize[ 1 ] = ( threeImage->height < twoImage->height ) ? threeImage->height : twoImage->height;
+    regionSize[ 2 ] = 1;
+
+    if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
+    {
+        // 3D to 2D array
+        sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)srcImageInfo->depth - 1, d );
+        destPos[ 2 ] = (size_t)random_in_range( 0, (int)dstImageInfo->arraySize - 1, d );
+        if(gTestMipmaps)
+        {
+            sourcePos[ 2 ] = 0/*(size_t)random_in_range( 0, (int)depth_lod - 1, d )*/;
+            destPos[ 2 ] = ( twoImage->arraySize > depth_lod ) ? (size_t)random_in_range( 0, twoImage->arraySize - depth_lod, d) : 0;
+            sourcePos[ 3 ] = threeImage_lod;
+            destPos[ 3 ] = twoImage_lod;
+            regionSize[ 0 ] = width_lod;
+            regionSize[ 1 ] = height_lod;
+            regionSize[ 2 ] = depth_lod;
+        }
+    }
+    else
+    {
+        // 2D array to 3D
+        sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)srcImageInfo->arraySize - 1, d );
+        destPos[ 2 ] = (size_t)random_in_range( 0, (int)dstImageInfo->depth - 1, d );
+        if(gTestMipmaps)
+        {
+            destPos[ 2 ] = 0 /*(size_t)random_in_range( 0, (int)depth_lod - 1, d )*/;
+            sourcePos[ 2 ] = ( twoImage->arraySize > depth_lod ) ? (size_t)random_in_range( 0, twoImage->arraySize - depth_lod, d) : 0;
+            sourcePos[ 3 ] = twoImage_lod;
+            destPos[ 3 ] = threeImage_lod;
+            regionSize[ 0 ] = width_lod;
+            regionSize[ 1 ] = height_lod;
+            regionSize[ 2 ] = depth_lod;
+        }
+    }
+
+    retCode = test_copy_image_generic( device, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+    if( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for( int i = 0; i < 8; i++ )
+    {
+        if( gTestMipmaps )
+        {
+            twoImage_max_mip_level = twoImage->num_mip_levels;
+            threeImage_max_mip_level = threeImage->num_mip_levels;
+            // Work at random mip levels
+            twoImage_lod = (size_t)random_in_range( 0, twoImage_max_mip_level ? twoImage_max_mip_level - 1 : 0, d );
+            threeImage_lod = (size_t)random_in_range( 0, threeImage_max_mip_level ? threeImage_max_mip_level - 1 : 0, d );
+            twoImage_width_lod = ( twoImage->width >> twoImage_lod )? ( twoImage->width >> twoImage_lod ) : 1;
+            threeImage_width_lod = ( threeImage->width >> threeImage_lod )? ( threeImage->width >> threeImage_lod ) : 1;
+            twoImage_height_lod = ( twoImage->height >> twoImage_lod )? ( twoImage->height >> twoImage_lod ) : 1;
+            threeImage_height_lod = ( threeImage->height >> threeImage_lod )? ( threeImage->height >> threeImage_lod ) : 1;
+            depth_lod = ( threeImage->depth >> threeImage_lod )? ( threeImage->depth >> threeImage_lod ) : 1;
+            width_lod  = ( twoImage_width_lod > threeImage_width_lod ) ? threeImage_width_lod : twoImage_width_lod;
+            height_lod  = ( twoImage_height_lod > threeImage_height_lod ) ? threeImage_height_lod : twoImage_height_lod;
+            depth_lod = ( twoImage->arraySize > depth_lod ) ? depth_lod : twoImage->arraySize;
+        }
+        // Pick a random size
+        regionSize[ 0 ] = random_in_ranges( 8, srcImageInfo->width, dstImageInfo->width, d );
+        regionSize[ 1 ] = random_in_ranges( 8, srcImageInfo->height, dstImageInfo->height, d );
+        if( gTestMipmaps )
+        {
+            regionSize[ 0 ] = random_in_range( 1, width_lod, d );
+            regionSize[ 1 ] = random_in_range( 1, height_lod, d );
+            regionSize[ 2 ] = depth_lod/*random_in_range( 0, depth_lod, d )*/;
+        }
+
+        // Now pick positions within valid ranges
+        sourcePos[ 0 ] = ( srcImageInfo->width > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->width - regionSize[ 0 ] - 1 ), d ) : 0;
+        sourcePos[ 1 ] = ( srcImageInfo->height > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( srcImageInfo->height - regionSize[ 1 ] - 1 ), d ) : 0;
+
+        if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+        {
+            sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)( srcImageInfo->depth - 1 ), d );
+            if(gTestMipmaps)
+            {
+                sourcePos[ 0 ] = ( threeImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                sourcePos[ 1 ] = ( threeImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                sourcePos[ 2 ] = 0 /*( depth_lod > regionSize[ 2 ] ) ? (size_t)random_in_range( 0, (int)( depth_lod - regionSize[ 2 ] - 1 ), d ) : 0*/;
+                sourcePos[ 3 ] = threeImage_lod;
+            }
+        }
+        else
+        {
+            sourcePos[ 2 ] = (size_t)random_in_range( 0, (int)( srcImageInfo->arraySize - 1 ), d );
+            if(gTestMipmaps)
+            {
+                sourcePos[ 0 ] = ( twoImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                sourcePos[ 1 ] = ( twoImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                sourcePos[ 2 ] = ( twoImage->arraySize > regionSize[ 2 ] ) ? (size_t)random_in_range( 0, (int)( twoImage->arraySize - regionSize[ 2 ] - 1 ), d ) : 0;
+                sourcePos[ 3 ] = twoImage_lod;
+            }
+        }
+
+        destPos[ 0 ] = ( dstImageInfo->width > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->width - regionSize[ 0 ] - 1 ), d ) : 0;
+        destPos[ 1 ] = ( dstImageInfo->height > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( dstImageInfo->height - regionSize[ 1 ] - 1 ), d ) : 0;
+        if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+        {
+            destPos[ 2 ] = (size_t)random_in_range( 0, (int)( dstImageInfo->depth - 1 ), d );
+            if(gTestMipmaps)
+            {
+                destPos[ 0 ] = ( threeImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                destPos[ 1 ] = ( threeImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( threeImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                destPos[ 2 ] = 0/*( depth_lod > regionSize[ 2 ] ) ? (size_t)random_in_range( 0, (int)( depth_lod - regionSize[ 2 ] - 1 ), d ) : 0*/;
+                destPos[ 3 ] = threeImage_lod;
+            }
+        }
+        else
+        {
+            destPos[ 2 ] = (size_t)random_in_range( 0, (int)( dstImageInfo->arraySize - 1 ), d );
+            if(gTestMipmaps)
+            {
+                destPos[ 0 ] = ( twoImage_width_lod > regionSize[ 0 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_width_lod - regionSize[ 0 ] - 1 ), d ) : 0;
+                destPos[ 1 ] = ( twoImage_height_lod > regionSize[ 1 ] ) ? (size_t)random_in_range( 0, (int)( twoImage_height_lod - regionSize[ 1 ] - 1 ), d ) : 0;
+                destPos[ 2 ] = ( twoImage->arraySize > regionSize[ 2 ] ) ? (size_t)random_in_range( 0, (int)( twoImage->arraySize - regionSize[ 2 ] - 1 ), d ) : 0;
+                destPos[ 3 ] = twoImage_lod;
+            }
+        }
+
+
+        // Go for it!
+        retCode = test_copy_image_generic( device, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_copy_image_set_3D_2D_array( cl_device_id device, cl_image_format *format, bool reverse = false )
+{
+    size_t maxWidth, maxHeight, max3DWidth, max3DHeight, maxDepth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor srcImageInfo = { 0 };
+    image_descriptor dstImageInfo = { 0 };
+    RandomSeed  seed( gRandomSeed );
+    size_t rowPadding = gEnablePitch ? 256 : 0;
+    size_t slicePadding = gEnablePitch ? 3 : 0;
+
+    srcImageInfo.format = dstImageInfo.format = format;
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( max3DWidth ), &max3DWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( max3DHeight ), &max3DHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D image array or 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
+        {
+            for( dstImageInfo.height = 4; dstImageInfo.height < 13; dstImageInfo.height++ )
+            {
+                for( dstImageInfo.arraySize = 4; dstImageInfo.arraySize < 9; dstImageInfo.arraySize++ )
+                {
+                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, 0, dstImageInfo.arraySize, rowPadding, slicePadding );
+                    set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.arraySize, 0, rowPadding, slicePadding );
+
+                    if (gTestMipmaps)
+                    {
+                      dstImageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+                      dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, 0), seed);
+                      srcImageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+                      srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, srcImageInfo.depth), seed);
+                      srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+                      srcImageInfo.slicePitch = srcImageInfo.rowPitch * srcImageInfo.height;
+                      dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+                      dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+                    }
+
+                    if( gDebugTrace )
+                    {
+                        if (reverse)
+                            log_info( "   at size %d,%d,%d to %d,%d,%d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth );
+                        else
+                            log_info( "   at size %d,%d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+                    }
+                    int ret;
+                    if( reverse )
+                        ret = test_copy_image_size_3D_2D_array( device, &dstImageInfo, &srcImageInfo, seed );
+                    else
+                        ret = test_copy_image_size_3D_2D_array( device, &srcImageInfo, &dstImageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes3D[100][3];
+        size_t sizes2Darray[100][3];
+
+        // Try to allocate a bit smaller images because we need the 3D ones as well for the copy.
+        get_max_sizes(&numbeOfSizes, 100, sizes2Darray, maxWidth, maxHeight, maxDepth, maxArraySize, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE3D, srcImageInfo.format);
+        get_max_sizes(&numbeOfSizes, 100, sizes3D, max3DWidth, max3DHeight, maxDepth, maxArraySize, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE3D, dstImageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            set_image_dimensions( &srcImageInfo, sizes3D[ idx ][ 0 ], sizes3D[ idx ][ 1 ], sizes3D[ idx ][ 2 ], 0, rowPadding, slicePadding );
+            set_image_dimensions( &dstImageInfo, sizes2Darray[ idx ][ 0 ], sizes2Darray[ idx ][ 1 ], 0, sizes2Darray[ idx ][ 2 ], rowPadding, slicePadding );
+
+            cl_ulong dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.arraySize;
+            cl_ulong srcSize = (cl_ulong)srcImageInfo.slicePitch * (cl_ulong)srcImageInfo.depth;
+
+            if (gTestMipmaps)
+            {
+              dstImageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+              dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, 0), seed);
+              srcImageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+              srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, srcImageInfo.depth), seed);
+              srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+              srcImageInfo.slicePitch = srcImageInfo.rowPitch * srcImageInfo.height;
+              dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+              dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+              srcSize = 4 * compute_mipmapped_image_size( srcImageInfo );
+              dstSize = 4 * compute_mipmapped_image_size( dstImageInfo );
+            }
+
+            if ( ( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) ) &&
+                 ( srcSize < maxAllocSize && srcSize < ( memSize / 3 ) ) )
+            {
+                if (reverse)
+                    log_info( "Testing %d x %d x %d to %d x %d x %d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth );
+                else
+                    log_info( "Testing %d x %d x %d to %d x %d x %d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+
+                if( gDebugTrace )
+                {
+                    if (reverse)
+                        log_info( "   at max size %d,%d,%d to %d,%d,%d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth );
+                    else
+                        log_info( "   at max size %d,%d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+                }
+                int ret;
+                if( reverse )
+                    ret = test_copy_image_size_3D_2D_array( device, &dstImageInfo, &srcImageInfo, seed );
+                else
+                    ret = test_copy_image_size_3D_2D_array( device, &srcImageInfo, &dstImageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+            else
+            {
+                if (reverse)
+                    log_info("Not testing max size %d x %d x %d x %d to %d x %d due to memory constraints.\n",
+                             (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth);
+                else
+                    log_info("Not testing max size %d x %d x %d to %d x %d x %d due to memory constraints.\n",
+                         (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize);
+            }
+
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong srcSize, dstSize;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                dstImageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                dstImageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                dstImageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+                srcImageInfo.width = (size_t)random_log_in_range( 16, (int)max3DWidth / 32, seed );
+                srcImageInfo.height = (size_t)random_log_in_range( 16, (int)max3DHeight / 32, seed );
+                srcImageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+
+                if (gTestMipmaps)
+                {
+                    dstImageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+                    dstImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(dstImageInfo.width, dstImageInfo.height, 0), seed);
+                    srcImageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+                    srcImageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(srcImageInfo.width, srcImageInfo.height, srcImageInfo.depth), seed);
+                    srcImageInfo.rowPitch = srcImageInfo.width * get_pixel_size( srcImageInfo.format );
+                    srcImageInfo.slicePitch = srcImageInfo.rowPitch * srcImageInfo.height;
+                    dstImageInfo.rowPitch = dstImageInfo.width * get_pixel_size( dstImageInfo.format );
+                    dstImageInfo.slicePitch = dstImageInfo.rowPitch * dstImageInfo.height;
+                    srcSize = 4 * compute_mipmapped_image_size( srcImageInfo );
+                    dstSize = 4 * compute_mipmapped_image_size( dstImageInfo );
+                }
+                else
+                {
+                    set_image_dimensions( &srcImageInfo, srcImageInfo.width, srcImageInfo.height, srcImageInfo.depth, 0, rowPadding, slicePadding );
+                    set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, 0, dstImageInfo.arraySize, rowPadding, slicePadding );
+
+                    srcSize = (cl_ulong)srcImageInfo.slicePitch * (cl_ulong)srcImageInfo.depth * 4;
+                    dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.arraySize * 4;
+                }
+            } while( srcSize > maxAllocSize || ( srcSize * 3 ) > memSize || dstSize > maxAllocSize || ( dstSize * 3 ) > memSize);
+
+            if( gDebugTrace )
+            {
+                if (reverse)
+                    log_info( "   at size %d,%d,%d to %d,%d,%d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth );
+                else
+                    log_info( "   at size %d,%d,%d to %d,%d,%d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)srcImageInfo.depth, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize );
+            }
+            int ret;
+            if( reverse )
+                ret = test_copy_image_size_3D_2D_array( device, &dstImageInfo, &srcImageInfo, seed );
+            else
+                ret = test_copy_image_size_3D_2D_array( device, &srcImageInfo, &dstImageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
new file mode 100644
index 00000000..ed111bcd
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -0,0 +1,763 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gTestRounding, gEnablePitch, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+size_t random_in_ranges( size_t minimum, size_t rangeA, size_t rangeB, MTdata d )
+{
+    if( rangeB < rangeA )
+        rangeA = rangeB;
+    if( rangeA < minimum )
+        return rangeA;
+    return (size_t)random_in_range( (int)minimum, (int)rangeA - 1, d );
+}
+
+static void CL_CALLBACK free_pitch_buffer( cl_mem image, void *buf )
+{
+    free( buf );
+}
+
+cl_mem create_image( cl_context context, BufferOwningPtr<char>& data, image_descriptor *imageInfo, int *error )
+{
+    cl_mem img;
+    cl_image_desc imageDesc;
+    cl_mem_flags mem_flags = CL_MEM_READ_ONLY;
+    void *host_ptr = NULL;
+
+    memset(&imageDesc, 0x0, sizeof(cl_image_desc));
+    imageDesc.image_type = imageInfo->type;
+    imageDesc.image_width = imageInfo->width;
+    imageDesc.image_height = imageInfo->height;
+    imageDesc.image_depth = imageInfo->depth;
+    imageDesc.image_array_size = imageInfo->arraySize;
+    imageDesc.image_row_pitch = gEnablePitch ? imageInfo->rowPitch : 0;
+    imageDesc.image_slice_pitch = gEnablePitch ? imageInfo->slicePitch : 0;
+    imageDesc.num_mip_levels = gTestMipmaps ? imageInfo->num_mip_levels : 0;
+
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            if ( gDebugTrace )
+                log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->rowPitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            if ( gDebugTrace )
+                log_info( " - Creating 2D image %d by %d ...\n", (int)imageInfo->width, (int)imageInfo->height );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->height * imageInfo->rowPitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            if ( gDebugTrace )
+                log_info( " - Creating 3D image %d by %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->depth * imageInfo->slicePitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            if ( gDebugTrace )
+                log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->arraySize * imageInfo->slicePitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            if ( gDebugTrace )
+                log_info( " - Creating 2D image array %d by %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->arraySize * imageInfo->slicePitch );
+            break;
+    }
+
+    if ( gDebugTrace && gTestMipmaps )
+        log_info(" - with %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels);
+
+    if (gEnablePitch)
+    {
+        if ( NULL == host_ptr )
+        {
+            log_error( "ERROR: Unable to create backing store for pitched 3D image. %ld bytes\n",  imageInfo->depth * imageInfo->slicePitch );
+            return NULL;
+        }
+        mem_flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;
+    }
+
+    img = clCreateImage(context, mem_flags, imageInfo->format, &imageDesc, host_ptr, error);
+
+    if (gEnablePitch)
+    {
+        if ( *error == CL_SUCCESS )
+        {
+            int callbackError = clSetMemObjectDestructorCallback( img, free_pitch_buffer, host_ptr );
+            if ( CL_SUCCESS != callbackError )
+            {
+                free( host_ptr );
+                log_error( "ERROR: Unable to attach destructor callback to pitched 3D image. Err: %d\n", callbackError );
+                clReleaseMemObject( img );
+                return NULL;
+            }
+        }
+        else
+            free(host_ptr);
+    }
+
+    if ( *error != CL_SUCCESS )
+    {
+        switch (imageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( *error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( *error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( *error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( *error ) );
+                break;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( *error ) );
+                break;
+        }
+        log_error("ERROR: and %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels);
+        return NULL;
+    }
+
+    // Copy the specified data to the image via a Map operation.
+    size_t mappedRow, mappedSlice;
+    size_t width = imageInfo->width;
+    size_t height = 1;
+    size_t depth = 1;
+    size_t row_pitch_lod, slice_pitch_lod;
+    row_pitch_lod = imageInfo->rowPitch;
+    slice_pitch_lod = imageInfo->slicePitch;
+
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            height = imageInfo->arraySize;
+            depth = 1;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D:
+            height = depth = 1;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            height = imageInfo->height;
+            depth = 1;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            height = imageInfo->height;
+            depth = imageInfo->arraySize;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            height = imageInfo->height;
+            depth = imageInfo->depth;
+            break;
+    }
+
+    size_t origin[ 4 ] = { 0, 0, 0, 0 };
+    size_t region[ 3 ] = { imageInfo->width, height, depth };
+
+    for ( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels)) || (!gTestMipmaps && (lod < 1)); lod++)
+    {
+        // Map the appropriate miplevel to copy the specified data.
+        if(gTestMipmaps)
+        {
+            switch (imageInfo->type)
+            {
+                case CL_MEM_OBJECT_IMAGE3D:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                    origin[ 3 ] = lod;
+                    break;
+                case CL_MEM_OBJECT_IMAGE2D:
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                    origin[ 2 ] =  lod;
+                    break;
+                case CL_MEM_OBJECT_IMAGE1D:
+                    origin[ 1 ] = lod;
+                    break;
+            }
+
+            //Adjust image dimensions as per miplevel
+            switch (imageInfo->type)
+            {
+                case CL_MEM_OBJECT_IMAGE3D:
+                    depth = ( imageInfo->depth >> lod ) ? (imageInfo->depth >> lod) : 1;
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D:
+                    height = ( imageInfo->height >> lod ) ? (imageInfo->height >> lod) : 1;
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE1D:
+                    width = ( imageInfo->width >> lod ) ? (imageInfo->width >> lod) : 1;
+            }
+            row_pitch_lod = width * get_pixel_size(imageInfo->format);
+            slice_pitch_lod = row_pitch_lod * height;
+            region[0] = width;
+            region[1] = height;
+            region[2] = depth;
+        }
+
+        void* mapped = (char*)clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_WRITE, origin, region, &mappedRow, &mappedSlice, 0, NULL, NULL, error);
+        if (*error != CL_SUCCESS)
+        {
+            log_error( "ERROR: Unable to map image for writing: %s\n", IGetErrorString( *error ) );
+            return NULL;
+        }
+        size_t mappedSlicePad = mappedSlice - (mappedRow * height);
+
+        // Copy the image.
+        size_t scanlineSize = row_pitch_lod;
+        size_t sliceSize = slice_pitch_lod - scanlineSize * height;
+        size_t imageSize = scanlineSize * height * depth;
+        size_t data_lod_offset = 0;
+        if( gTestMipmaps )
+            data_lod_offset = compute_mip_level_offset(imageInfo, lod);
+
+        char* src = (char*)data + data_lod_offset;
+        char* dst = (char*)mapped;
+
+        if ((mappedRow == scanlineSize) && (mappedSlicePad==0 || (imageInfo->depth==0 && imageInfo->arraySize==0))) {
+            // Copy the whole image.
+            memcpy( dst, src, imageSize );
+        }
+        else {
+            // Else copy one scan line at a time.
+            size_t dstPitch2D = 0;
+            switch (imageInfo->type)
+            {
+                case CL_MEM_OBJECT_IMAGE3D:
+                case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE2D:
+                    dstPitch2D = mappedRow;
+                    break;
+                case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                case CL_MEM_OBJECT_IMAGE1D:
+                    dstPitch2D = mappedSlice;
+                    break;
+            }
+            for ( size_t z = 0; z < depth; z++ )
+            {
+                for ( size_t y = 0; y < height; y++ )
+                {
+                    memcpy( dst, src, scanlineSize );
+                    dst += dstPitch2D;
+                    src += scanlineSize;
+                }
+
+                // mappedSlicePad is incorrect for 2D images here, but we will exit the z loop before this is a problem.
+                dst += mappedSlicePad;
+                src += sliceSize;
+            }
+        }
+
+        // Unmap the image.
+        *error = clEnqueueUnmapMemObject(queue, img, mapped, 0, NULL, NULL);
+        if (*error != CL_SUCCESS)
+        {
+            log_error( "ERROR: Unable to unmap image after writing: %s\n", IGetErrorString( *error ) );
+            return NULL;
+        }
+    }
+    return img;
+}
+
+
+// WARNING -- not thread safe
+BufferOwningPtr<char> srcData;
+BufferOwningPtr<char> dstData;
+BufferOwningPtr<char> srcHost;
+BufferOwningPtr<char> dstHost;
+
+int test_copy_image_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
+                            const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d )
+{
+    int error;
+
+    clMemWrapper srcImage, dstImage;
+
+    if( gDebugTrace )
+        log_info( " ++ Entering inner test loop...\n" );
+
+    // Generate some data to test against
+    size_t srcBytes = 0;
+    if( gTestMipmaps )
+    {
+        srcBytes = (size_t)compute_mipmapped_image_size( *srcImageInfo );
+    }
+    else
+    {
+        switch (srcImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                srcBytes = srcImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                srcBytes = srcImageInfo->height * srcImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                srcBytes = srcImageInfo->depth * srcImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                srcBytes = srcImageInfo->arraySize * srcImageInfo->slicePitch;
+                break;
+        }
+    }
+
+    if (srcBytes > srcData.getSize())
+    {
+        if( gDebugTrace )
+            log_info( " - Resizing random image data...\n" );
+
+        generate_random_image_data( srcImageInfo, srcData, d  );
+
+        // Update the host verification copy of the data.
+        srcHost.reset(malloc(srcBytes),NULL,0,srcBytes);
+        if (srcHost == NULL) {
+            log_error( "ERROR: Unable to malloc %lu bytes for srcHost\n", srcBytes );
+            return -1;
+        }
+        memcpy(srcHost,srcData,srcBytes);
+    }
+
+    // Construct testing sources
+    if( gDebugTrace )
+        log_info( " - Writing source image...\n" );
+
+    srcImage = create_image( context, srcData, srcImageInfo, &error );
+    if( srcImage == NULL )
+        return error;
+
+
+    // Initialize the destination to empty
+    size_t destImageSize = 0;
+    if( gTestMipmaps )
+    {
+        destImageSize = (size_t)compute_mipmapped_image_size( *dstImageInfo );
+    }
+    else
+    {
+        switch (dstImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                destImageSize = dstImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                destImageSize = dstImageInfo->height * dstImageInfo->rowPitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                destImageSize = dstImageInfo->depth * dstImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                destImageSize = dstImageInfo->arraySize * dstImageInfo->slicePitch;
+                break;
+        }
+    }
+
+    if (destImageSize > dstData.getSize())
+    {
+        if( gDebugTrace )
+            log_info( " - Resizing destination buffer...\n" );
+        dstData.reset(malloc(destImageSize),NULL,0,destImageSize);
+        if (dstData == NULL) {
+            log_error( "ERROR: Unable to malloc %lu bytes for dstData\n", destImageSize );
+            return -1;
+        }
+
+        dstHost.reset(malloc(destImageSize),NULL,0,destImageSize);
+        if (dstHost == NULL) {
+            dstData.reset(NULL);
+            log_error( "ERROR: Unable to malloc %lu bytes for dstHost\n", destImageSize );
+            return -1;
+        }
+    }
+    memset( dstData, 0xff, destImageSize );
+    memset( dstHost, 0xff, destImageSize );
+
+    if( gDebugTrace )
+        log_info( " - Writing destination image...\n" );
+
+    dstImage = create_image( context, dstData, dstImageInfo, &error );
+    if( dstImage == NULL )
+        return error;
+
+    size_t dstRegion[ 3 ] = { dstImageInfo->width, 1, 1};
+    size_t dst_lod = 0;
+    size_t origin[ 4 ] = { 0, 0, 0, 0 };
+
+    if(gTestMipmaps)
+    {
+        switch(dstImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                dst_lod = destPos[1];
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            case CL_MEM_OBJECT_IMAGE2D:
+                dst_lod = destPos[2];
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            case CL_MEM_OBJECT_IMAGE3D:
+                dst_lod = destPos[3];
+                break;
+        }
+
+        dstRegion[ 0 ] = (dstImageInfo->width >> dst_lod)?(dstImageInfo->width >> dst_lod) : 1;
+    }
+    switch (dstImageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            if( gTestMipmaps )
+                origin[ 1 ] = dst_lod;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            dstRegion[ 1 ] = dstImageInfo->height;
+            if( gTestMipmaps )
+            {
+                dstRegion[ 1 ] = (dstImageInfo->height >> dst_lod) ?(dstImageInfo->height >> dst_lod): 1;
+                origin[ 2 ] = dst_lod;
+            }
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            dstRegion[ 1 ] = dstImageInfo->height;
+            dstRegion[ 2 ] = dstImageInfo->depth;
+            if( gTestMipmaps )
+            {
+                dstRegion[ 1 ] = (dstImageInfo->height >> dst_lod) ?(dstImageInfo->height >> dst_lod): 1;
+                dstRegion[ 2 ] = (dstImageInfo->depth >> dst_lod) ?(dstImageInfo->depth >> dst_lod): 1;
+                origin[ 3 ] = dst_lod;
+            }
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            dstRegion[ 1 ] = dstImageInfo->arraySize;
+            if( gTestMipmaps )
+                origin[ 2 ] = dst_lod;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            dstRegion[ 1 ] = dstImageInfo->height;
+            dstRegion[ 2 ] = dstImageInfo->arraySize;
+            if( gTestMipmaps )
+            {
+                dstRegion[ 1 ] = (dstImageInfo->height >> dst_lod) ?(dstImageInfo->height >> dst_lod): 1;
+                origin[ 3 ] = dst_lod;
+            }
+            break;
+    }
+
+    size_t region[ 3 ] = { dstRegion[ 0 ], dstRegion[ 1 ], dstRegion[ 2 ] };
+
+    // Now copy a subset to the destination image. This is the meat of what we're testing
+    if( gDebugTrace )
+    {
+        if( gTestMipmaps )
+        {
+            log_info( " - Copying from %d,%d,%d,%d to %d,%d,%d,%d size %d,%d,%d\n", (int)sourcePos[ 0 ], (int)sourcePos[ 1 ], (int)sourcePos[ 2 ],(int)sourcePos[ 3 ],
+                     (int)destPos[ 0 ], (int)destPos[ 1 ], (int)destPos[ 2 ],(int)destPos[ 3 ],
+                     (int)regionSize[ 0 ], (int)regionSize[ 1 ], (int)regionSize[ 2 ] );
+        }
+        else
+        {
+            log_info( " - Copying from %d,%d,%d to %d,%d,%d size %d,%d,%d\n", (int)sourcePos[ 0 ], (int)sourcePos[ 1 ], (int)sourcePos[ 2 ],
+                     (int)destPos[ 0 ], (int)destPos[ 1 ], (int)destPos[ 2 ],
+                     (int)regionSize[ 0 ], (int)regionSize[ 1 ], (int)regionSize[ 2 ] );
+        }
+    }
+
+    error = clEnqueueCopyImage( queue, srcImage, dstImage, sourcePos, destPos, regionSize, 0, NULL, NULL );
+    if( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to copy image from pos %d,%d,%d to %d,%d,%d size %d,%d,%d! (%s)\n",
+                  (int)sourcePos[ 0 ], (int)sourcePos[ 1 ], (int)sourcePos[ 2 ], (int)destPos[ 0 ], (int)destPos[ 1 ], (int)destPos[ 2 ],
+                  (int)regionSize[ 0 ], (int)regionSize[ 1 ], (int)regionSize[ 2 ], IGetErrorString( error ) );
+        return error;
+    }
+
+    // Construct the final dest image values to test against
+    if( gDebugTrace )
+        log_info( " - Host verification copy...\n" );
+
+    copy_image_data( srcImageInfo, dstImageInfo, srcHost, dstHost, sourcePos, destPos, regionSize );
+
+    // Map the destination image to verify the results with the host
+    // copy. The contents of the entire buffer are compared.
+    if( gDebugTrace )
+        log_info( " - Mapping results...\n" );
+
+    size_t mappedRow, mappedSlice;
+    void* mapped = (char*)clEnqueueMapImage(queue, dstImage, CL_TRUE, CL_MAP_READ, origin, region, &mappedRow, &mappedSlice, 0, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: Unable to map image for verification: %s\n", IGetErrorString( error ) );
+        return error;
+    }
+
+    // Verify scanline by scanline, since the pitches are different
+    char *sourcePtr = dstHost;
+    size_t cur_lod_offset = 0;
+    char *destPtr = (char*)mapped;
+
+    if( gTestMipmaps )
+    {
+        cur_lod_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
+        sourcePtr += cur_lod_offset;
+    }
+
+    size_t scanlineSize = dstImageInfo->width * get_pixel_size( dstImageInfo->format );
+    size_t rowPitch = dstImageInfo->rowPitch;
+    size_t slicePitch = dstImageInfo->slicePitch;
+    size_t dst_height_lod = dstImageInfo->height;
+    if(gTestMipmaps)
+    {
+        size_t dst_width_lod = (dstImageInfo->width >> dst_lod)?(dstImageInfo->width >> dst_lod) : 1;
+        dst_height_lod = (dstImageInfo->height >> dst_lod)?(dstImageInfo->height >> dst_lod) : 1;
+        scanlineSize = dst_width_lod * get_pixel_size(dstImageInfo->format);
+        rowPitch = scanlineSize;
+        slicePitch = rowPitch * dst_height_lod;
+    }
+
+    if( gDebugTrace )
+        log_info( " - Scanline verification...\n" );
+
+    size_t thirdDim;
+    size_t secondDim;
+    if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+    {
+        secondDim = dstImageInfo->arraySize;
+        thirdDim = 1;
+    }
+    else if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+    {
+        secondDim = dstImageInfo->height;
+        if( gTestMipmaps )
+            secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1;
+        thirdDim = dstImageInfo->arraySize;
+    }
+    else
+    {
+        secondDim = dstImageInfo->height;
+        thirdDim = dstImageInfo->depth;
+        if( gTestMipmaps )
+        {
+            secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1;
+            if(dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+                thirdDim = (dstImageInfo->depth >> dst_lod) ? (dstImageInfo->depth >> dst_lod):1;
+        }
+    }
+
+    for( size_t z = 0; z < thirdDim; z++ )
+    {
+        for( size_t y = 0; y < secondDim; y++ )
+        {
+            if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
+            {
+                log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)dstImageInfo->width, (int)dstImageInfo->height, (int)dstImageInfo->depth, (int)dstImageInfo->rowPitch, (int)dstImageInfo->rowPitch - (int)dstImageInfo->width * (int)get_pixel_size( dstImageInfo->format ) );
+
+                // Find the first missing pixel
+                size_t pixel_size = get_pixel_size( dstImageInfo->format );
+                size_t where = 0;
+                for( where = 0; where < dstImageInfo->width; where++ )
+                    if( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
+                        break;
+                log_error( "Failed at column: %ld   ", where );
+                switch( pixel_size )
+                {
+                    case 1:
+                        log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 2:
+                        log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 3:
+                        log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
+                                  ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 4:
+                        log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 6:
+                        log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
+                                  ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 8:
+                        log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] );
+                        break;
+                    case 12:
+                        log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                                  ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2],
+                                  ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2]
+                                  );
+                        break;
+                    case 16:
+                        log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                                  ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3],
+                                  ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3]
+                                  );
+                        break;
+                    default:
+                        log_error( "Don't know how to print pixel size of %ld\n", pixel_size );
+                        break;
+                }
+
+                return -1;
+            }
+            sourcePtr += rowPitch;
+            if((dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D))
+            destPtr += mappedSlice;
+            else
+            destPtr += mappedRow;
+        }
+        sourcePtr += slicePitch - rowPitch * dst_height_lod;
+        destPtr += mappedSlice - mappedRow * dst_height_lod;
+    }
+
+    // Unmap the image.
+    error = clEnqueueUnmapMemObject(queue, dstImage, mapped, 0, NULL, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: Unable to unmap image after verify: %s\n", IGetErrorString( error ) );
+        return error;
+    }
+
+    return 0;
+}
+
+int test_copy_image_size_generic( cl_device_id device, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, MTdata d )
+{
+    size_t sourcePos[ 3 ], destPos[ 3 ], regionSize[ 3 ];
+    int ret = 0, retCode;
+
+    for (int i = 0; i < 8; i++)
+    {
+        switch (srcImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = 1;
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = random_in_range( 0, (int)(srcImageInfo->depth - 4), d );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->arraySize - 4), d );
+                sourcePos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                sourcePos[ 0 ] = random_in_range( 0, (int)(srcImageInfo->width - 4), d );
+                sourcePos[ 1 ] = random_in_range( 0, (int)(srcImageInfo->height - 4), d );
+                sourcePos[ 2 ] = random_in_range( 0, (int)(srcImageInfo->arraySize - 4), d );
+                break;
+        }
+
+        switch (dstImageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = 1;
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = random_in_range( 0, (int)(dstImageInfo->depth - 4), d );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->arraySize - 4), d );
+                destPos[ 2 ] = 1;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                destPos[ 0 ] = random_in_range( 0, (int)(dstImageInfo->width - 4), d );
+                destPos[ 1 ] = random_in_range( 0, (int)(dstImageInfo->height - 4), d );
+                destPos[ 2 ] = random_in_range( 0, (int)(dstImageInfo->arraySize - 4), d );
+                break;
+        }
+
+        if ( (dstImageInfo->width - destPos[0]) < (srcImageInfo->width - sourcePos[0]) )
+            regionSize[0] = random_in_range(1, (dstImageInfo->width - destPos[0]), d);
+        else
+            regionSize[0] = random_in_range(1, (srcImageInfo->width - sourcePos[0]), d);
+
+        if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE1D || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D)
+            regionSize[1] = 0;
+        else
+        {
+            if ( (dstImageInfo->height - destPos[1]) < (srcImageInfo->height - sourcePos[1]) )
+                regionSize[1] = random_in_range(1, (dstImageInfo->height - destPos[1]), d);
+            else
+                regionSize[1] = random_in_range(1, (srcImageInfo->height - sourcePos[1]), d);
+        }
+
+        regionSize[2] = 0;
+        if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D && srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+        {
+            if ( (dstImageInfo->depth - destPos[2]) < (srcImageInfo->depth - sourcePos[2]) )
+                regionSize[2] = random_in_range(1, (dstImageInfo->depth - destPos[2]), d);
+            else
+                regionSize[2] = random_in_range(1, (srcImageInfo->depth - sourcePos[2]), d);
+        }
+        else if ( (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) )
+        {
+            if ( (dstImageInfo->arraySize - destPos[2]) < (srcImageInfo->arraySize - sourcePos[2]) )
+                regionSize[2] = random_in_range(1, (dstImageInfo->arraySize - destPos[2]), d);
+            else
+                regionSize[2] = random_in_range(1, (srcImageInfo->arraySize - sourcePos[2]), d);
+        }
+
+        // Go for it!
+        retCode = test_copy_image_generic( device, srcImageInfo, dstImageInfo, sourcePos, destPos, regionSize, d );
+        if( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp
new file mode 100644
index 00000000..ba8f588e
--- /dev/null
+++ b/test_conformance/images/clCopyImage/test_loops.cpp
@@ -0,0 +1,274 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern bool               gTestMipmaps;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_command_queue queue;
+extern cl_context context;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order      gChannelOrderToUse;
+
+
+extern bool gDebugTrace;
+
+extern int test_copy_image_set_1D( cl_device_id device, cl_image_format *format );
+extern int test_copy_image_set_2D( cl_device_id device, cl_image_format *format );
+extern int test_copy_image_set_3D( cl_device_id device, cl_image_format *format );
+extern int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format );
+extern int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format );
+extern int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, bool reverse );
+extern int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *format, bool reverse );
+extern int test_copy_image_set_3D_2D_array( cl_device_id device, cl_image_format *format, bool reverse );
+
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+
+        // Have we already discarded this via the command line?
+        if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+    // Have we already discarded the channel order via the command line?
+        if( gChannelOrderToUse != (cl_channel_order)-1 && gChannelOrderToUse != formatList[ j ].image_channel_order )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // We don't filter by channel type
+        if( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+
+        // Is the format supported?
+      int i;
+      for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+      {
+        if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+        {
+          numSupported++;
+          break;
+        }
+      }
+      if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+      {
+        // Format is NOT supported, so mark it as such
+        filterFlags[ j ] = true;
+      }
+    }
+    return numSupported;
+}
+
+int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    int error;
+
+    cl_image_format tempList[ 128 ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                       imageType, 128, tempList, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                       imageType, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+    return 0;
+}
+
+int test_image_type( cl_device_id device, MethodsToTest testMethod, cl_mem_flags flags )
+{
+    const char *name;
+    cl_mem_object_type imageType;
+
+    if ( gTestMipmaps )
+    {
+        if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
+        {
+            log_info( "-----------------------------------------------------\n" );
+            log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
+            log_info( "-----------------------------------------------------\n\n" );
+            return 0;
+        }
+    }
+
+    if( testMethod == k1D )
+    {
+        name = "1D -> 1D";
+        imageType = CL_MEM_OBJECT_IMAGE1D;
+    }
+    else if( testMethod == k2D )
+    {
+        name = "2D -> 2D";
+        imageType = CL_MEM_OBJECT_IMAGE2D;
+    }
+    else if( testMethod == k3D )
+    {
+        name = "3D -> 3D";
+        imageType = CL_MEM_OBJECT_IMAGE3D;
+    }
+    else if( testMethod == k1DArray )
+    {
+        name = "1D array -> 1D array";
+        imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    }
+    else if( testMethod == k2DArray )
+    {
+        name = "2D array -> 2D array";
+        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    }
+    else if( testMethod == k2DTo3D )
+    {
+        name = "2D -> 3D";
+        imageType = CL_MEM_OBJECT_IMAGE3D;
+    }
+    else if( testMethod == k3DTo2D )
+    {
+        name = "3D -> 2D";
+        imageType = CL_MEM_OBJECT_IMAGE3D;
+    }
+    else if( testMethod == k2DArrayTo2D )
+    {
+        name = "2D array -> 2D";
+        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    }
+    else if( testMethod == k2DTo2DArray )
+    {
+        name = "2D -> 2D array";
+        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    }
+    else if( testMethod == k2DArrayTo3D )
+    {
+        name = "2D array -> 3D";
+        imageType = CL_MEM_OBJECT_IMAGE3D;
+    }
+    else if( testMethod == k3DTo2DArray )
+    {
+        name = "3D -> 2D array";
+        imageType = CL_MEM_OBJECT_IMAGE3D;
+    }
+
+    if(gTestMipmaps)
+        log_info( "Running mipmapped %s tests...\n", name );
+    else
+        log_info( "Running %s tests...\n", name );
+
+    int ret = 0;
+
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+    if( get_format_list( device, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+
+    filter_formats(formatList, filterFlags, numFormats, NULL);
+
+    // Run the format list
+    for( unsigned int i = 0; i < numFormats; i++ )
+    {
+        int test_return = 0;
+        if( filterFlags[i] )
+        {
+            continue;
+        }
+
+        print_header( &formatList[ i ], false );
+
+        gTestCount++;
+
+        if( testMethod == k1D )
+            test_return = test_copy_image_set_1D( device, &formatList[ i ] );
+        else if( testMethod == k2D )
+            test_return = test_copy_image_set_2D( device, &formatList[ i ] );
+        else if( testMethod == k3D )
+            test_return = test_copy_image_set_3D( device, &formatList[ i ] );
+        else if( testMethod == k1DArray )
+            test_return = test_copy_image_set_1D_array( device, &formatList[ i ] );
+        else if( testMethod == k2DArray )
+            test_return = test_copy_image_set_2D_array( device, &formatList[ i ] );
+        else if( testMethod == k2DTo3D )
+            test_return = test_copy_image_set_2D_3D( device, &formatList[ i ], false );
+        else if( testMethod == k3DTo2D )
+            test_return = test_copy_image_set_2D_3D( device, &formatList[ i ], true );
+        else if( testMethod == k2DArrayTo2D)
+            test_return = test_copy_image_set_2D_2D_array( device, &formatList[ i ], true);
+        else if( testMethod == k2DTo2DArray)
+            test_return = test_copy_image_set_2D_2D_array( device, &formatList[ i ], false);
+        else if( testMethod == k2DArrayTo3D)
+            test_return = test_copy_image_set_3D_2D_array( device, &formatList[ i ], true);
+        else if( testMethod == k3DTo2DArray)
+            test_return = test_copy_image_set_3D_2D_array( device, &formatList[ i ], false);
+
+        if (test_return) {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_header( &formatList[ i ], true );
+            log_info( "\n" );
+        }
+
+        ret += test_return;
+    }
+
+    delete filterFlags;
+    delete formatList;
+
+    return ret;
+}
+
+int test_image_set( cl_device_id device, MethodsToTest testMethod )
+{
+    int ret = 0;
+
+    ret += test_image_type( device, testMethod, CL_MEM_READ_ONLY );
+
+    return ret;
+}
+
+
+
+
diff --git a/test_conformance/images/clFillImage/CMakeLists.txt b/test_conformance/images/clFillImage/CMakeLists.txt
new file mode 100644
index 00000000..7ee70944
--- /dev/null
+++ b/test_conformance/images/clFillImage/CMakeLists.txt
@@ -0,0 +1,27 @@
+
+set(MODULE_NAME CL_FILL_IMAGES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_fill_1D.cpp
+    test_fill_1D_array.cpp
+    test_fill_2D.cpp
+    test_fill_2D_array.cpp
+    test_fill_generic.cpp
+    test_loops.cpp
+    test_fill_3D.cpp
+#    test_fill_2D_3D.cpp
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/threadTesting.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/imageHelpers.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/typeWrappers.cpp
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/images/clFillImage/Jamfile b/test_conformance/images/clFillImage/Jamfile
new file mode 100644
index 00000000..14812f04
--- /dev/null
+++ b/test_conformance/images/clFillImage/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_cl_fill_images
+    : main.cpp
+      test_fill_2D_3D.cpp
+      test_fill_2D.cpp
+      test_fill_3D.cpp
+      test_loops.cpp
+    ;
+
+install dist
+    : test_cl_fill_images
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/clFillImage
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/clFillImage
+    ;
diff --git a/test_conformance/images/clFillImage/Makefile b/test_conformance/images/clFillImage/Makefile
new file mode 100644
index 00000000..58f12012
--- /dev/null
+++ b/test_conformance/images/clFillImage/Makefile
@@ -0,0 +1,53 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		test_fill_generic.cpp \
+		test_fill_1D.cpp \
+		test_fill_2D.cpp \
+		test_fill_3D.cpp \
+		test_fill_1D_array.cpp \
+		test_fill_2D_array.cpp \
+		test_loops.cpp \
+		../../../test_common/harness/errorHelpers.c \
+		../../../test_common/harness/threadTesting.c \
+		../../../test_common/harness/kernelHelpers.c \
+		../../../test_common/harness/imageHelpers.cpp \
+		../../../test_common/harness/conversions.c \
+		../../../test_common/harness/testHarness.cpp \
+		../../../test_common/harness/typeWrappers.cpp \
+		../../../test_common/harness/mt19937.c
+
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_cl_fill_images
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/images/clFillImage/main.cpp b/test_conformance/images/clFillImage/main.cpp
new file mode 100644
index 00000000..9c1f7904
--- /dev/null
+++ b/test_conformance/images/clFillImage/main.cpp
@@ -0,0 +1,252 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+
+bool             gDebugTrace = false, gTestSmallImages = false, gTestMaxImages = false, gTestRounding = false, gEnablePitch = false;
+int              gTypesToTest = 0;
+cl_channel_type  gChannelTypeToUse = (cl_channel_type)-1;
+cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
+cl_device_type   gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_context       context;
+cl_command_queue queue;
+
+extern int test_image_set( cl_device_id device, MethodsToTest testMethod );
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if ( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [debug_trace] [small_images]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\tThe following flags specify the types to test. They can be combined; if none are specified, all are tested:\n" );
+    log_info( "\t\tint - Test integer fill\n" );
+    log_info( "\t\tuint - Test unsigned integer fill\n" );
+    log_info( "\t\tfloat - Test float fill\n" );
+    log_info( "\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging\n" );
+    log_info( "\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
+    log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
+    log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
+}
+
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id   platform;
+    cl_device_id     device;
+    cl_channel_type  chanType;
+    cl_channel_order chanOrder;
+    char             str[ 128 ];
+    int              testMethods = 0;
+    bool             randomize = false;
+
+    test_start();
+
+    checkDeviceTypeOverride( &gDeviceType );
+
+    // Parse arguments
+    for ( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if ( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if ( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if ( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if ( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+        else if ( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+
+        else if ( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if ( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+
+        else if ( strcmp( str, "use_pitches" ) == 0 )
+            gEnablePitch = true;
+
+        else if ( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+
+        else if ( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+        else if ( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+        else if ( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+        else if( strcmp( str, "1Darray" ) == 0 )
+            testMethods |= k1DArray;
+        else if( strcmp( str, "2Darray" ) == 0 )
+            testMethods |= k2DArray;
+
+        else if( strcmp( str, "int" ) == 0 )
+            gTypesToTest |= kTestInt;
+        else if( strcmp( str, "uint" ) == 0 )
+            gTypesToTest |= kTestUInt;
+        else if( strcmp( str, "float" ) == 0 )
+            gTypesToTest |= kTestFloat;
+
+        else if ( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+
+        else if ( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+
+        else if ( ( chanOrder = get_channel_order_from_name( str ) ) != (cl_channel_order)-1 )
+            gChannelOrderToUse = chanOrder;
+        else
+        {
+            log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+            return -1;
+        }
+
+    }
+
+    if (testMethods == 0)
+        testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
+    if ( gTypesToTest == 0 )
+        gTypesToTest = kTestAllTypes;
+
+    // Seed the random # generators
+    if ( randomize )
+    {
+        gRandomSeed = (cl_uint) time( NULL );
+        log_info( "Random seed: %u.\n", gRandomSeed );
+        gReSeed = 1;
+    }
+
+    int error;
+    // Get our platform
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if ( error )
+    {
+        print_error( error, "Unable to get platform" );
+        test_finish();
+        return -1;
+    }
+
+    // Get our device
+    error = clGetDeviceIDs(platform,  gDeviceType, 1, &device, NULL );
+    if ( error )
+    {
+        print_error( error, "Unable to get specified device" );
+        test_finish();
+        return -1;
+    }
+
+    char deviceName[ 128 ], deviceVendor[ 128 ], deviceVersion[ 128 ];
+    error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
+    if ( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to get device information" );
+        test_finish();
+        return -1;
+    }
+    log_info("Using compute device: Name = %s, Vendor = %s, Version = %s\n", deviceName, deviceVendor, deviceVersion );
+
+    // Check for image support
+    if (checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+        log_info("Device does not support images. Skipping test.\n");
+        test_finish();
+        return 0;
+    }
+
+    // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if ( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+
+    // Create a queue against the context
+    queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
+    if ( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+
+    if ( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+
+    // Run the test now
+    int ret = 0;
+    for ( int test = k1D; test <= k3D; test <<= 1 )
+    {
+        if ( testMethods & test )
+            ret += test_image_set( device, (MethodsToTest)test );
+    }
+
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.");
+
+    if (gTestFailure == 0) {
+        if (gTestCount > 1)
+            log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+        else
+            log_info("PASSED test.\n");
+    }
+    else if (gTestFailure > 0) {
+        if (gTestCount > 1)
+            log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+        else
+            log_error("FAILED test.\n");
+    }
+
+    // Clean up
+    clReleaseCommandQueue(queue);
+    clReleaseContext(context);
+    test_finish();
+
+    if (gTestFailure > 0)
+        return gTestFailure;
+
+    return ret;
+}
diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp
new file mode 100644
index 00000000..bd3bcaec
--- /dev/null
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp
@@ -0,0 +1,181 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+extern cl_command_queue   queue;
+extern cl_context         context;
+
+// Defined in test_fill_2D_3D.cpp
+extern int test_fill_image_generic( cl_device_id device, image_descriptor *imageInfo,
+                                    const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
+
+
+int test_fill_image_size_1D( cl_device_id device, image_descriptor *imageInfo, ExplicitType outputType, MTdata d )
+{
+    size_t origin[ 3 ], region[ 3 ];
+    int ret = 0, retCode;
+
+    // First, try just a full covering region fill
+    origin[ 0 ] = origin[ 1 ] = origin[ 2 ] = 0;
+    region[ 0 ] = imageInfo->width;
+    region[ 1 ] = 1;
+    region[ 2 ] = 1;
+
+    retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+    if ( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for ( int i = 0; i < 8; i++ )
+    {
+        // Pick a random size
+        region[ 0 ] = ( imageInfo->width > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->width - 1, d ) : imageInfo->width;
+
+        // Now pick positions within valid ranges
+        origin[ 0 ] = ( imageInfo->width > region[ 0 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->width - region[ 0 ] - 1 ), d ) : 0;
+
+        // Go for it!
+        retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+        if ( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, ExplicitType outputType )
+{
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed(gRandomSeed);
+    const size_t rowPadding_default = 48;
+    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
+    size_t pixelSize;
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+            int ret = test_fill_image_size_1D( device, &imageInfo, outputType, seed );
+            if ( ret )
+                return -1;
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            log_info( "Testing %d\n", (int)sizes[ idx ][ 0 ] );
+            if ( gDebugTrace )
+                log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
+            if ( test_fill_image_size_1D( device, &imageInfo, outputType, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+                if (gEnablePitch)
+                {
+                  rowPadding = rowPadding_default;
+                  do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                  } while ((imageInfo.rowPitch % pixelSize) != 0);
+                }
+
+                size = (size_t)imageInfo.rowPitch * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            int ret = test_fill_image_size_1D( device, &imageInfo, outputType, seed );
+            if ( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
new file mode 100644
index 00000000..45caa24f
--- /dev/null
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
@@ -0,0 +1,193 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+extern cl_command_queue   queue;
+extern cl_context         context;
+
+// Defined in test_fill_2D_3D.cpp
+extern int test_fill_image_generic( cl_device_id device, image_descriptor *imageInfo,
+                                    const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
+
+
+int test_fill_image_size_1D_array( cl_device_id device, image_descriptor *imageInfo, ExplicitType outputType, MTdata d )
+{
+    size_t origin[ 3 ], region[ 3 ];
+    int ret = 0, retCode;
+
+    // First, try just a full covering region fill
+    origin[ 0 ] = origin[ 1 ] = origin[ 2 ] = 0;
+    region[ 0 ] = imageInfo->width;
+    region[ 1 ] = imageInfo->arraySize;
+    region[ 2 ] = 1;
+
+    retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+    if ( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for ( int i = 0; i < 8; i++ )
+    {
+        // Pick a random size
+        region[ 0 ] = ( imageInfo->width > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->width - 1, d ) : imageInfo->width;
+        region[ 1 ] = ( imageInfo->arraySize > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->arraySize - 1, d ) : imageInfo->arraySize;
+
+        // Now pick positions within valid ranges
+        origin[ 0 ] = ( imageInfo->width > region[ 0 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->width - region[ 0 ] - 1 ), d ) : 0;
+        origin[ 1 ] = ( imageInfo->arraySize > region[ 1 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->arraySize - region[ 1 ] - 1 ), d ) : 0;
+
+        // Go for it!
+        retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+        if ( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format, ExplicitType outputType )
+{
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed(gRandomSeed);
+    const size_t rowPadding_default = 48;
+    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
+    size_t pixelSize;
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 1D array size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for ( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if ( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+
+                int ret = test_fill_image_size_1D_array( device, &imageInfo, outputType, seed );
+                if ( ret )
+                    return -1;
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
+            if ( test_fill_image_size_1D_array( device, &imageInfo, outputType, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+                if (gEnablePitch)
+                {
+                  rowPadding = rowPadding_default;
+                  do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                  } while ((imageInfo.rowPitch % pixelSize) != 0);
+                }
+
+                imageInfo.slicePitch = imageInfo.rowPitch;
+
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+            int ret = test_fill_image_size_1D_array( device, &imageInfo, outputType, seed );
+            if ( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp
new file mode 100644
index 00000000..01b80ffb
--- /dev/null
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp
@@ -0,0 +1,189 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+extern cl_command_queue   queue;
+extern cl_context         context;
+
+// Defined in test_fill_2D_3D.cpp
+extern int test_fill_image_generic( cl_device_id device, image_descriptor *imageInfo,
+                                    const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
+
+
+int test_fill_image_size_2D( cl_device_id device, image_descriptor *imageInfo, ExplicitType outputType, MTdata d )
+{
+    size_t origin[ 3 ], region[ 3 ];
+    int ret = 0, retCode;
+
+    // First, try just a full covering region fill
+    origin[ 0 ] = origin[ 1 ] = origin[ 2 ] = 0;
+    region[ 0 ] = imageInfo->width;
+    region[ 1 ] = imageInfo->height;
+    region[ 2 ] = 1;
+
+    retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+    if ( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for ( int i = 0; i < 8; i++ )
+    {
+        // Pick a random size
+        region[ 0 ] = ( imageInfo->width > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->width - 1, d ) : imageInfo->width;
+        region[ 1 ] = ( imageInfo->height > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->height - 1, d ) : imageInfo->height;
+
+        // Now pick positions within valid ranges
+        origin[ 0 ] = ( imageInfo->width > region[ 0 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->width - region[ 0 ] - 1 ), d ) : 0;
+        origin[ 1 ] = ( imageInfo->height > region[ 1 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->height - region[ 1 ] - 1 ), d ) : 0;
+
+        // Go for it!
+        retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+        if ( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, ExplicitType outputType )
+{
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed(gRandomSeed);
+    const size_t rowPadding_default = 48;
+    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
+    size_t pixelSize;
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            for ( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if ( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+                int ret = test_fill_image_size_2D( device, &imageInfo, outputType, seed );
+                if ( ret )
+                    return -1;
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            if ( test_fill_image_size_2D( device, &imageInfo, outputType, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+                if (gEnablePitch)
+                {
+                  rowPadding = rowPadding_default;
+                  do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                  } while ((imageInfo.rowPitch % pixelSize) != 0);
+                }
+
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            int ret = test_fill_image_size_2D( device, &imageInfo, outputType, seed );
+            if ( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
new file mode 100644
index 00000000..2ec35f6b
--- /dev/null
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -0,0 +1,200 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestRounding;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern cl_command_queue   queue;
+extern cl_context         context;
+
+// Defined in test_fill_2D_3D.cpp
+extern int test_fill_image_generic( cl_device_id device, image_descriptor *imageInfo,
+                                   const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
+
+
+static int test_fill_image_2D_array( cl_device_id device, image_descriptor *imageInfo, ExplicitType outputType, MTdata d )
+{
+    size_t origin[ 3 ], region[ 3 ];
+    int ret = 0, retCode;
+
+    // First, try just a full covering region
+    origin[ 0 ] = origin[ 1 ] = origin[ 2 ] = 0;
+    region[ 0 ] = imageInfo->width;
+    region[ 1 ] = imageInfo->height;
+    region[ 2 ] = imageInfo->arraySize;
+
+    retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+    if ( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for ( int i = 0; i < 8; i++ )
+    {
+        // Pick a random size
+        region[ 0 ] = ( imageInfo->width > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->width - 1, d ) : imageInfo->width;
+        region[ 1 ] = ( imageInfo->height > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->height - 1, d ) : imageInfo->height;
+        region[ 2 ] = ( imageInfo->arraySize > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->arraySize - 1, d ) : imageInfo->arraySize;
+
+        // Now pick positions within valid ranges
+        origin[ 0 ] = ( imageInfo->width > region[ 0 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->width - region[ 0 ] - 1 ), d ) : 0;
+        origin[ 1 ] = ( imageInfo->height > region[ 1 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->height - region[ 1 ] - 1 ), d ) : 0;
+        origin[ 2 ] = ( imageInfo->arraySize > region[ 2 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->arraySize - region[ 2 ] - 1 ), d ) : 0;
+
+        // Go for it!
+        retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+        if ( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format, ExplicitType outputType )
+{
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    const size_t rowPadding_default = 80;
+    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
+    size_t slicePadding = gEnablePitch ? 3 : 0;
+    size_t pixelSize;
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D array size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            for ( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+                for ( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    if ( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int ret = test_fill_image_2D_array( device, &imageInfo, outputType, seed );
+                    if ( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if ( test_fill_image_2D_array( device, &imageInfo, outputType, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 64, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 64, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32,seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+                if (gEnablePitch)
+                {
+                  rowPadding = rowPadding_default;
+                  do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                  } while ((imageInfo.rowPitch % pixelSize) != 0);
+                }
+
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+            int ret = test_fill_image_2D_array( device, &imageInfo, outputType, seed );
+            if ( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp
new file mode 100644
index 00000000..0c4c2079
--- /dev/null
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp
@@ -0,0 +1,200 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gEnablePitch, gTestRounding;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern cl_command_queue   queue;
+extern cl_context         context;
+
+// Defined in test_fill_2D_3D.cpp
+extern int test_fill_image_generic( cl_device_id device, image_descriptor *imageInfo,
+                                   const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d );
+
+
+int test_fill_image_3D( cl_device_id device, image_descriptor *imageInfo, ExplicitType outputType, MTdata d )
+{
+    size_t origin[ 3 ], region[ 3 ];
+    int ret = 0, retCode;
+
+    // First, try just a full covering region
+    origin[ 0 ] = origin[ 1 ] = origin[ 2 ] = 0;
+    region[ 0 ] = imageInfo->width;
+    region[ 1 ] = imageInfo->height;
+    region[ 2 ] = imageInfo->depth;
+
+    retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+    if ( retCode < 0 )
+        return retCode;
+    else
+        ret += retCode;
+
+    // Now try a sampling of different random regions
+    for ( int i = 0; i < 8; i++ )
+    {
+        // Pick a random size
+        region[ 0 ] = ( imageInfo->width > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->width - 1, d ) : imageInfo->width;
+        region[ 1 ] = ( imageInfo->height > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->height - 1, d ) : imageInfo->height;
+        region[ 2 ] = ( imageInfo->depth > 8 ) ? (size_t)random_in_range( 8, (int)imageInfo->depth - 1, d ) : imageInfo->depth;
+
+        // Now pick positions within valid ranges
+        origin[ 0 ] = ( imageInfo->width > region[ 0 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->width - region[ 0 ] - 1 ), d ) : 0;
+        origin[ 1 ] = ( imageInfo->height > region[ 1 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->height - region[ 1 ] - 1 ), d ) : 0;
+        origin[ 2 ] = ( imageInfo->depth > region[ 2 ] ) ? (size_t)random_in_range( 0, (int)( imageInfo->depth - region[ 2 ] - 1 ), d ) : 0;
+
+        // Go for it!
+        retCode = test_fill_image_generic( device, imageInfo, origin, region, outputType, d );
+        if ( retCode < 0 )
+            return retCode;
+        else
+            ret += retCode;
+    }
+
+    return ret;
+}
+
+
+int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, ExplicitType outputType )
+{
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    const size_t rowPadding_default = 80;
+    size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
+    size_t slicePadding = gEnablePitch ? 3 : 0;
+    size_t pixelSize;
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            for ( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+                for ( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    if ( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int ret = test_fill_image_3D( device, &imageInfo, outputType, seed );
+                    if ( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+
+            imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+            if (gEnablePitch)
+            {
+              rowPadding = rowPadding_default;
+              do {
+                rowPadding++;
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+              } while ((imageInfo.rowPitch % pixelSize) != 0);
+            }
+
+            imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if ( test_fill_image_3D( device, &imageInfo, outputType, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+
+                if (gEnablePitch)
+                {
+                  rowPadding = rowPadding_default;
+                  do {
+                    rowPadding++;
+                    imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
+                  } while ((imageInfo.rowPitch % pixelSize) != 0);
+                }
+
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+            int ret = test_fill_image_3D( device, &imageInfo, outputType, seed );
+            if ( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp
new file mode 100644
index 00000000..eb4cac98
--- /dev/null
+++ b/test_conformance/images/clFillImage/test_fill_generic.cpp
@@ -0,0 +1,556 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool               gDebugTrace, gDisableOffsets, gTestSmallImages, gTestMaxImages, gTestRounding, gEnablePitch;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern uint64_t           gRoundingStartValue;
+extern cl_command_queue   queue;
+extern cl_context         context;
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
+
+
+static void CL_CALLBACK free_pitch_buffer( cl_mem image, void *buf )
+{
+    free( buf );
+}
+
+
+cl_mem create_image( cl_context context, BufferOwningPtr<char>& data, image_descriptor *imageInfo, int *error )
+{
+    cl_mem img;
+    cl_image_desc imageDesc;
+    cl_mem_flags mem_flags = CL_MEM_READ_ONLY;
+    void *host_ptr = NULL;
+
+    memset(&imageDesc, 0x0, sizeof(cl_image_desc));
+    imageDesc.image_type = imageInfo->type;
+    imageDesc.image_width = imageInfo->width;
+    imageDesc.image_height = imageInfo->height;
+    imageDesc.image_depth = imageInfo->depth;
+    imageDesc.image_array_size = imageInfo->arraySize;
+    imageDesc.image_row_pitch = gEnablePitch ? imageInfo->rowPitch : 0;
+    imageDesc.image_slice_pitch = gEnablePitch ? imageInfo->slicePitch : 0;
+
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            if ( gDebugTrace )
+                log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->rowPitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            if ( gDebugTrace )
+                log_info( " - Creating 2D image %d by %d ...\n", (int)imageInfo->width, (int)imageInfo->height );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->height * imageInfo->rowPitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            if ( gDebugTrace )
+                log_info( " - Creating 3D image %d by %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->depth * imageInfo->slicePitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            if ( gDebugTrace )
+                log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->arraySize * imageInfo->slicePitch );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            if ( gDebugTrace )
+                log_info( " - Creating 2D image array %d by %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
+            if ( gEnablePitch )
+                host_ptr = malloc( imageInfo->arraySize * imageInfo->slicePitch );
+            break;
+    }
+
+    if (gEnablePitch)
+    {
+        if ( NULL == host_ptr )
+        {
+            log_error( "ERROR: Unable to create backing store for pitched 3D image. %ld bytes\n",  imageInfo->depth * imageInfo->slicePitch );
+            return NULL;
+        }
+        mem_flags = CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR;
+    }
+
+    img = clCreateImage(context, mem_flags, imageInfo->format, &imageDesc, host_ptr, error);
+
+    if (gEnablePitch)
+    {
+        if ( *error == CL_SUCCESS )
+        {
+            int callbackError = clSetMemObjectDestructorCallback( img, free_pitch_buffer, host_ptr );
+            if ( CL_SUCCESS != callbackError )
+            {
+                free( host_ptr );
+                log_error( "ERROR: Unable to attach destructor callback to pitched 3D image. Err: %d\n", callbackError );
+                clReleaseMemObject( img );
+                return NULL;
+            }
+        }
+        else
+            free(host_ptr);
+    }
+
+    if ( *error != CL_SUCCESS )
+    {
+        long long unsigned imageSize = get_image_size_mb( imageInfo );
+        switch (imageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                log_error( "ERROR: Unable to create 1D image of size %d (%llu MB): %s\n", (int)imageInfo->width, imageSize, IGetErrorString( *error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                log_error( "ERROR: Unable to create 2D image of size %d x %d (%llu MB): %s\n", (int)imageInfo->width, (int)imageInfo->height, imageSize, IGetErrorString( *error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (%llu MB): %s\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, imageSize, IGetErrorString( *error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                log_error( "ERROR: Unable to create 1D image array of size %d x %d (%llu MB): %s\n", (int)imageInfo->width, (int)imageInfo->arraySize, imageSize, IGetErrorString( *error ) );
+                break;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%llu MB): %s\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, imageSize, IGetErrorString( *error ) );
+                break;
+        }
+        return NULL;
+    }
+
+    // Copy the specified data to the image via a Map operation.
+    size_t mappedRow, mappedSlice;
+    size_t height;
+    size_t depth;
+    size_t imageSize = 0;
+
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            height = imageInfo->arraySize;
+            depth = 1;
+            imageSize = imageInfo->rowPitch * imageInfo->arraySize;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D:
+            height = depth = 1;
+            imageSize = imageInfo->rowPitch;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            height = imageInfo->height;
+            depth = 1;
+            imageSize = imageInfo->rowPitch * imageInfo->height;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            height = imageInfo->height;
+            depth = imageInfo->arraySize;
+            imageSize = imageInfo->slicePitch * imageInfo->arraySize;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            height = imageInfo->height;
+            depth = imageInfo->depth;
+            imageSize = imageInfo->slicePitch * imageInfo->depth;
+            break;
+    }
+
+    size_t origin[ 3 ] = { 0, 0, 0 };
+    size_t region[ 3 ] = { imageInfo->width, height, depth };
+
+    void* mapped = (char*)clEnqueueMapImage(queue, img, CL_TRUE, CL_MAP_WRITE, origin, region, &mappedRow, &mappedSlice, 0, NULL, NULL, error);
+    if (*error != CL_SUCCESS || !mapped)
+    {
+        log_error( "ERROR: Unable to map image for writing: %s\n", IGetErrorString( *error ) );
+        return NULL;
+    }
+    size_t mappedSlicePad = mappedSlice - (mappedRow * height);
+
+    // Copy the image.
+    size_t scanlineSize = imageInfo->rowPitch;
+    size_t sliceSize = imageInfo->slicePitch - scanlineSize * height;
+
+    char* src = (char*)data;
+    char* dst = (char*)mapped;
+
+    if ((mappedRow == scanlineSize) && ((mappedSlice == imageInfo->slicePitch) || (imageInfo->depth==0 && imageInfo->arraySize==0))) {
+        // Copy the whole image.
+        memcpy( dst, src, imageSize );
+    }
+    else {
+        // Else copy one scan line at a time.
+        for ( size_t z = 0; z < depth; z++ )
+        {
+            for ( size_t y = 0; y < height; y++ )
+            {
+                memcpy( dst, src, imageInfo->width * get_pixel_size(imageInfo->format) );
+                dst += mappedRow;
+                src += scanlineSize;
+            }
+
+            // mappedSlicePad is incorrect for 2D images here, but we will exit the z loop before this is a problem.
+            dst += mappedSlicePad;
+            src += sliceSize;
+        }
+    }
+
+    // Unmap the image.
+    *error = clEnqueueUnmapMemObject(queue, img, mapped, 0, NULL, NULL);
+    if (*error != CL_SUCCESS)
+    {
+        log_error( "ERROR: Unable to unmap image after writing: %s\n", IGetErrorString( *error ) );
+        return NULL;
+    }
+
+    return img;
+}
+
+static void fill_region_with_value( image_descriptor *imageInfo, void *imageValues,
+    void *value, const size_t origin[], const size_t region[] )
+{
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+
+    // Get initial pointer
+    char *destPtr   = (char *)imageValues + origin[ 2 ] * imageInfo->slicePitch
+        + origin[ 1 ] * imageInfo->rowPitch + pixelSize * origin[ 0 ];
+
+    char *fillColor = (char *)malloc(pixelSize);
+    memcpy(fillColor, value, pixelSize);
+
+    // Use pixel at origin to fill region.
+    for( size_t z = 0; z < ( region[ 2 ] > 0 ? region[ 2 ] : 1 ); z++ ) {
+        char *rowDestPtr = destPtr;
+        for( size_t y = 0; y < region[ 1 ]; y++ ) {
+            char *pixelDestPtr = rowDestPtr;
+
+            for( size_t x = 0; x < region[ 0 ]; x++ ) {
+                memcpy( pixelDestPtr, fillColor, pixelSize );
+                pixelDestPtr += pixelSize;
+            }
+            rowDestPtr += imageInfo->rowPitch;
+        }
+        destPtr += imageInfo->slicePitch;
+    }
+
+    free(fillColor);
+}
+
+int test_fill_image_generic( cl_device_id device, image_descriptor *imageInfo,
+                             const size_t origin[], const size_t region[], ExplicitType outputType, MTdata d )
+{
+    BufferOwningPtr<char> imgData;
+    BufferOwningPtr<char> imgHost;
+
+    int error;
+    clMemWrapper image;
+
+    if ( gDebugTrace )
+        log_info( " ++ Entering inner test loop...\n" );
+
+    // Generate some data to test against
+    size_t dataBytes = 0;
+
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            dataBytes = imageInfo->rowPitch;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            dataBytes = imageInfo->height * imageInfo->rowPitch;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            dataBytes = imageInfo->depth * imageInfo->slicePitch;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            dataBytes = imageInfo->arraySize * imageInfo->slicePitch;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            dataBytes = imageInfo->arraySize * imageInfo->slicePitch;
+            break;
+    }
+
+    if (dataBytes > imgData.getSize())
+    {
+        if ( gDebugTrace )
+            log_info( " - Resizing random image data...\n" );
+
+        generate_random_image_data( imageInfo, imgData, d  );
+
+        imgHost.reset( NULL ); // Free previously allocated memory first.
+        imgHost.reset(malloc(dataBytes),NULL,0,dataBytes);
+        if (imgHost == NULL)
+        {
+            log_error( "ERROR: Unable to malloc %lu bytes for imgHost\n", dataBytes );
+            return -1;
+        }
+    }
+
+    // Reset the host verification copy of the data.
+    memcpy(imgHost, imgData, dataBytes);
+
+    // Construct testing sources
+    if ( gDebugTrace )
+        log_info( " - Creating image...\n" );
+
+    image = create_image( context, imgData, imageInfo, &error );
+    if ( image == NULL )
+        return error;
+
+    // Now fill the region defined by origin, region with the pixel value found at origin.
+    if ( gDebugTrace )
+        log_info( " - Filling at %d,%d,%d size %d,%d,%d\n", (int)origin[ 0 ], (int)origin[ 1 ], (int)origin[ 2 ],
+                 (int)region[ 0 ], (int)region[ 1 ], (int)region[ 2 ] );
+
+    // We need to know the rounding mode, in the case of half to allow the
+    // pixel pack that generates the verification value to succeed.
+    if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
+        DetectFloatToHalfRoundingMode(queue);
+
+    if( outputType == kFloat )
+    {
+        cl_float fillColor[ 4 ];
+        read_image_pixel_float( imgHost, imageInfo, origin[ 0 ], origin[ 1 ], origin[ 2 ], fillColor );
+        if ( gDebugTrace )
+            log_info( " - with value %g, %g, %g, %g\n", fillColor[ 0 ], fillColor[ 1 ], fillColor[ 2 ], fillColor[ 3 ] );
+        error = clEnqueueFillImage ( queue, image, fillColor, origin, region, 0, NULL, NULL );
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to fill image at %d,%d,%d size %d,%d,%d! (%s)\n",
+                      (int)origin[ 0 ], (int)origin[ 1 ], (int)origin[ 2 ],
+                      (int)region[ 0 ], (int)region[ 1 ], (int)region[ 2 ], IGetErrorString( error ) );
+            return error;
+        }
+
+        // Write the approriate verification value to the correct region.
+        void* verificationValue = malloc(get_pixel_size(imageInfo->format));
+        pack_image_pixel(fillColor, imageInfo->format, verificationValue);
+        fill_region_with_value( imageInfo, imgHost, verificationValue, origin, region );
+        free(verificationValue);
+    }
+    else if( outputType == kInt )
+    {
+        cl_int fillColor[ 4 ];
+        read_image_pixel<cl_int>( imgHost, imageInfo, origin[ 0 ], origin[ 1 ], origin[ 2 ], fillColor );
+        if ( gDebugTrace )
+            log_info( " - with value %d, %d, %d, %d\n", fillColor[ 0 ], fillColor[ 1 ], fillColor[ 2 ], fillColor[ 3 ] );
+        error = clEnqueueFillImage ( queue, image, fillColor, origin, region, 0, NULL, NULL );
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to fill image at %d,%d,%d size %d,%d,%d! (%s)\n",
+                      (int)origin[ 0 ], (int)origin[ 1 ], (int)origin[ 2 ],
+                      (int)region[ 0 ], (int)region[ 1 ], (int)region[ 2 ], IGetErrorString( error ) );
+            return error;
+        }
+
+        // Write the approriate verification value to the correct region.
+        void* verificationValue = malloc(get_pixel_size(imageInfo->format));
+        pack_image_pixel(fillColor, imageInfo->format, verificationValue);
+        fill_region_with_value( imageInfo, imgHost, verificationValue, origin, region );
+        free(verificationValue);
+    }
+    else // if( outputType == kUInt )
+    {
+        cl_uint fillColor[ 4 ];
+        read_image_pixel<cl_uint>( imgHost, imageInfo, origin[ 0 ], origin[ 1 ], origin[ 2 ], fillColor );
+        if ( gDebugTrace )
+            log_info( " - with value %u, %u, %u, %u\n", fillColor[ 0 ], fillColor[ 1 ], fillColor[ 2 ], fillColor[ 3 ] );
+        error = clEnqueueFillImage ( queue, image, fillColor, origin, region, 0, NULL, NULL );
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to fill image at %d,%d,%d size %d,%d,%d! (%s)\n",
+                      (int)origin[ 0 ], (int)origin[ 1 ], (int)origin[ 2 ],
+                      (int)region[ 0 ], (int)region[ 1 ], (int)region[ 2 ], IGetErrorString( error ) );
+            return error;
+        }
+
+        // Write the approriate verification value to the correct region.
+        void* verificationValue = malloc(get_pixel_size(imageInfo->format));
+        pack_image_pixel(fillColor, imageInfo->format, verificationValue);
+        fill_region_with_value( imageInfo, imgHost, verificationValue, origin, region );
+        free(verificationValue);
+    }
+
+    // Map the destination image to verify the results with the host
+    // copy. The contents of the entire buffer are compared.
+    if ( gDebugTrace )
+        log_info( " - Mapping results...\n" );
+
+    size_t imageOrigin[ 3 ] = { 0, 0, 0 };
+    size_t imageRegion[ 3 ] = { imageInfo->width, 1, 1 };
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            imageRegion[ 1 ] = imageInfo->height;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            imageRegion[ 1 ] = imageInfo->height;
+            imageRegion[ 2 ] = imageInfo->depth;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            imageRegion[ 1 ] = imageInfo->arraySize;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            imageRegion[ 1 ] = imageInfo->height;
+            imageRegion[ 2 ] = imageInfo->arraySize;
+            break;
+    }
+
+    size_t mappedRow, mappedSlice;
+    void* mapped = (char*)clEnqueueMapImage(queue, image, CL_TRUE, CL_MAP_READ, imageOrigin, imageRegion, &mappedRow, &mappedSlice, 0, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: Unable to map image for verification: %s\n", IGetErrorString( error ) );
+        return NULL;
+    }
+
+    // Verify scanline by scanline, since the pitches are different
+    char *sourcePtr = imgHost;
+    char *destPtr = (char*)mapped;
+
+    size_t scanlineSize = imageInfo->width * get_pixel_size( imageInfo->format );
+
+    if ( gDebugTrace )
+        log_info( " - Scanline verification...\n" );
+
+    size_t thirdDim = 1;
+    size_t secondDim = 1;
+
+    switch (imageInfo->type) {
+      case CL_MEM_OBJECT_IMAGE1D:
+        secondDim = 1;
+        thirdDim = 1;
+        break;
+      case CL_MEM_OBJECT_IMAGE2D:
+        secondDim = imageInfo->height;
+        thirdDim = 1;
+        break;
+      case CL_MEM_OBJECT_IMAGE3D:
+        secondDim = imageInfo->height;
+        thirdDim = imageInfo->depth;
+        break;
+      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+        secondDim = imageInfo->arraySize;
+        thirdDim = 1;
+        break;
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+        secondDim = imageInfo->height;
+        thirdDim = imageInfo->arraySize;
+        break;
+      default:
+        log_error("Test error: unhandled image type at %s:%d\n",__FILE__,__LINE__);
+    };
+
+    // Count the number of bytes successfully matched
+    size_t total_matched = 0;
+
+    for ( size_t z = 0; z < thirdDim; z++ )
+    {
+        for ( size_t y = 0; y < secondDim; y++ )
+        {
+            // If the data type is 101010 ignore bits 31 and 32 when comparing the row
+            if (imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010) {
+              for (size_t w=0;w!=scanlineSize/4;++w) {
+                ((cl_uint*)sourcePtr)[w] &= 0x3FFFFFFF;
+                ((cl_uint*)destPtr)[w] &= 0x3FFFFFFF;
+              }
+            }
+
+            if (memcmp( sourcePtr, destPtr, scanlineSize ) != 0)
+            {
+                log_error( "ERROR: Scanline %d did not verify for image size %d,%d,%d pitch %d (extra %d bytes)\n", (int)y, (int)imageInfo->width, (int)imageInfo->height, (int)thirdDim, (int)imageInfo->rowPitch, (int)imageInfo->rowPitch - (int)imageInfo->width * (int)get_pixel_size( imageInfo->format ) );
+
+                // Find the first missing pixel
+                size_t pixel_size = get_pixel_size( imageInfo->format );
+                size_t where = 0;
+                for ( where = 0; where < imageInfo->width; where++ )
+                    if ( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
+                        break;
+                log_error( "Failed at column: %ld   ", where );
+                switch ( pixel_size )
+                {
+                case 1:
+                    log_error( "*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 2:
+                    log_error( "*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 3:
+                    log_error( "*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. {0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
+                               ((cl_uchar*)(sourcePtr + pixel_size * where))[0], ((cl_uchar*)(sourcePtr + pixel_size * where))[1], ((cl_uchar*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_uchar*)(destPtr + pixel_size * where))[0], ((cl_uchar*)(destPtr + pixel_size * where))[1], ((cl_uchar*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 4:
+                    log_error( "*0x%8.8x vs. 0x%8.8x\n", ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 6:
+                    log_error( "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. {0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
+                               ((cl_ushort*)(sourcePtr + pixel_size * where))[0], ((cl_ushort*)(sourcePtr + pixel_size * where))[1], ((cl_ushort*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_ushort*)(destPtr + pixel_size * where))[0], ((cl_ushort*)(destPtr + pixel_size * where))[1], ((cl_ushort*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 8:
+                    log_error( "*0x%16.16llx vs. 0x%16.16llx\n", ((cl_ulong*)(sourcePtr + pixel_size * where))[0], ((cl_ulong*)(destPtr + pixel_size * where))[0] );
+                    break;
+                case 12:
+                    log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                               ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2],
+                               ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2]
+                             );
+                    break;
+                case 16:
+                    log_error( "*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. {0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
+                               ((cl_uint*)(sourcePtr + pixel_size * where))[0], ((cl_uint*)(sourcePtr + pixel_size * where))[1], ((cl_uint*)(sourcePtr + pixel_size * where))[2], ((cl_uint*)(sourcePtr + pixel_size * where))[3],
+                               ((cl_uint*)(destPtr + pixel_size * where))[0], ((cl_uint*)(destPtr + pixel_size * where))[1], ((cl_uint*)(destPtr + pixel_size * where))[2], ((cl_uint*)(destPtr + pixel_size * where))[3]
+                             );
+                    break;
+                default:
+                    log_error( "Don't know how to print pixel size of %ld\n", pixel_size );
+                    break;
+                }
+
+                return -1;
+            }
+
+            total_matched += scanlineSize;
+            sourcePtr += imageInfo->rowPitch;
+            destPtr += mappedRow;
+        }
+
+        sourcePtr += imageInfo->slicePitch - ( imageInfo->rowPitch * (imageInfo->height > 0 ? imageInfo->height : 1) );
+        destPtr += mappedSlice - ( mappedRow * (imageInfo->height > 0 ? imageInfo->height : 1) );
+    }
+
+    // Unmap the image.
+    error = clEnqueueUnmapMemObject(queue, image, mapped, 0, NULL, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error( "ERROR: Unable to unmap image after verify: %s\n", IGetErrorString( error ) );
+        return NULL;
+    }
+
+    imgHost.reset(0x0);
+    imgData.reset(0x0);
+
+    size_t expected_bytes = scanlineSize * imageRegion[1] * imageRegion[2];
+    return (total_matched == expected_bytes) ? 0 : -1;
+}
diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp
new file mode 100644
index 00000000..c5cbc4a4
--- /dev/null
+++ b/test_conformance/images/clFillImage/test_loops.cpp
@@ -0,0 +1,327 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern bool               gDebugTrace;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type    gChannelTypeToUse;
+extern cl_channel_order   gChannelOrderToUse;
+extern cl_command_queue   queue;
+extern cl_context         context;
+
+
+extern int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, ExplicitType outputType );
+extern int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, ExplicitType outputType );
+extern int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, ExplicitType outputType );
+extern int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format, ExplicitType outputType );
+extern int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format, ExplicitType outputType );
+
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for ( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if ( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+
+        // Have we already discarded this via the command line?
+        if ( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+    // Have we already discarded the channel order via the command line?
+        if ( gChannelOrderToUse != (cl_channel_order)-1 && gChannelOrderToUse != formatList[ j ].image_channel_order )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // We don't filter by channel type
+        if( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+
+        // Is the format supported?
+        int i;
+        for ( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+        {
+            if ( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+            {
+                numSupported++;
+                break;
+            }
+        }
+        if ( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+        {
+            // Format is NOT supported, so mark it as such
+            filterFlags[ j ] = true;
+        }
+
+
+    }
+    return numSupported;
+}
+
+
+int get_format_list( cl_device_id device, cl_mem_object_type image_type, cl_image_format * &outFormatList,
+                    unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    int error;
+
+    cl_image_format tempList[ 128 ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                       image_type, 128, tempList, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                       image_type, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+    return 0;
+}
+
+
+int test_image_type( cl_device_id device, MethodsToTest testMethod, cl_mem_flags flags )
+{
+    const char *name;
+    cl_mem_object_type imageType;
+
+    if ( testMethod == k1D )
+    {
+        name = "1D Image Fill";
+        imageType = CL_MEM_OBJECT_IMAGE1D;
+    }
+    else if ( testMethod == k2D )
+    {
+        name = "2D Image Fill";
+        imageType = CL_MEM_OBJECT_IMAGE2D;
+    }
+    else if ( testMethod == k1DArray )
+    {
+        name = "1D Image Array Fill";
+        imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    }
+    else if ( testMethod == k2DArray )
+    {
+        name = "2D Image Array Fill";
+        imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    }
+    else if ( testMethod == k3D )
+    {
+        name = "3D Image Fill";
+        imageType = CL_MEM_OBJECT_IMAGE3D;
+    }
+
+    log_info( "Running %s tests...\n", name );
+
+    int ret = 0;
+
+    // Grab the list of supported image formats
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+    if ( get_format_list( device, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if ( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+
+    /////// float tests ///////
+
+    if( gTypesToTest & kTestFloat )
+    {
+        cl_channel_type floatFormats[] = { CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
+#ifdef OBSOLETE_FORAMT
+            CL_UNORM_SHORT_565_REV, CL_UNORM_SHORT_555_REV, CL_UNORM_INT_8888, CL_UNORM_INT_8888_REV, CL_UNORM_INT_101010_REV,
+#endif
+#ifdef CL_SFIXED14_APPLE
+            CL_SFIXED14_APPLE,
+#endif
+            CL_UNORM_INT8, CL_SNORM_INT8,
+            CL_UNORM_INT16, CL_SNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, floatFormats ) == 0 )
+        {
+            log_info( "No formats supported for float type\n" );
+        }
+        else
+        {
+            // Run the format list
+            for ( unsigned int i = 0; i < numFormats; i++ )
+            {
+                int test_return = 0;
+                if ( filterFlags[i] )
+                {
+                    continue;
+                }
+
+                print_header( &formatList[ i ], false );
+
+                gTestCount++;
+
+                if ( testMethod == k1D )
+                    test_return = test_fill_image_set_1D( device, &formatList[ i ], kFloat );
+                else if ( testMethod == k2D )
+                    test_return = test_fill_image_set_2D( device, &formatList[ i ], kFloat );
+                else if ( testMethod == k1DArray )
+                    test_return = test_fill_image_set_1D_array( device, &formatList[ i ], kFloat );
+                else if ( testMethod == k2DArray )
+                    test_return = test_fill_image_set_2D_array( device, &formatList[ i ], kFloat );
+                else if ( testMethod == k3D )
+                    test_return = test_fill_image_set_3D( device, &formatList[ i ], kFloat );
+
+                if (test_return)
+                {
+                    gTestFailure++;
+                    log_error( "FAILED: " );
+                    print_header( &formatList[ i ], true );
+                    log_info( "\n" );
+                }
+
+                ret += test_return;
+            }
+        }
+    }
+
+    /////// int tests ///////
+    if( gTypesToTest & kTestInt )
+    {
+        cl_channel_type intFormats[] = { CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, intFormats ) == 0 )
+        {
+            log_info( "No formats supported for integer type\n" );
+        }
+        else
+        {
+            // Run the format list
+            for ( unsigned int i = 0; i < numFormats; i++ )
+            {
+                int test_return = 0;
+                if ( filterFlags[i] )
+                {
+                    continue;
+                }
+
+                print_header( &formatList[ i ], false );
+
+                gTestCount++;
+
+                if ( testMethod == k1D )
+                    test_return = test_fill_image_set_1D( device, &formatList[ i ], kInt );
+                else if ( testMethod == k2D )
+                    test_return = test_fill_image_set_2D( device, &formatList[ i ], kInt );
+                else if ( testMethod == k1DArray )
+                    test_return = test_fill_image_set_1D_array( device, &formatList[ i ], kInt );
+                else if ( testMethod == k2DArray )
+                    test_return = test_fill_image_set_2D_array( device, &formatList[ i ], kInt );
+                else if ( testMethod == k3D )
+                    test_return = test_fill_image_set_3D( device, &formatList[ i ], kInt );
+
+                if (test_return) {
+                    gTestFailure++;
+                    log_error( "FAILED: " );
+                    print_header( &formatList[ i ], true );
+                    log_info( "\n" );
+                }
+
+                ret += test_return;
+            }
+        }
+    }
+
+    /////// uint tests ///////
+
+    if( gTypesToTest & kTestUInt )
+    {
+        cl_channel_type uintFormats[] = { CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, uintFormats ) == 0 )
+        {
+            log_info( "No formats supported for unsigned int type\n" );
+        }
+        else
+        {
+            // Run the format list
+            for ( unsigned int i = 0; i < numFormats; i++ )
+            {
+                int test_return = 0;
+                if ( filterFlags[i] )
+                {
+                    continue;
+                }
+
+                print_header( &formatList[ i ], false );
+
+                gTestCount++;
+
+                if ( testMethod == k1D )
+                    test_return = test_fill_image_set_1D( device, &formatList[ i ], kUInt );
+                else if ( testMethod == k2D )
+                    test_return = test_fill_image_set_2D( device, &formatList[ i ], kUInt );
+                else if ( testMethod == k1DArray )
+                    test_return = test_fill_image_set_1D_array( device, &formatList[ i ], kUInt );
+                else if ( testMethod == k2DArray )
+                    test_return = test_fill_image_set_2D_array( device, &formatList[ i ], kUInt );
+                else if ( testMethod == k3D )
+                    test_return = test_fill_image_set_3D( device, &formatList[ i ], kUInt );
+
+                if (test_return) {
+                    gTestFailure++;
+                    log_error( "FAILED: " );
+                    print_header( &formatList[ i ], true );
+                    log_info( "\n" );
+                }
+
+                ret += test_return;
+            }
+        }
+    }
+
+    delete filterFlags;
+    delete formatList;
+
+    return ret;
+}
+
+
+int test_image_set( cl_device_id device, MethodsToTest testMethod )
+{
+    int ret = 0;
+
+    ret += test_image_type( device, testMethod, CL_MEM_READ_ONLY );
+
+    return ret;
+}
diff --git a/test_conformance/images/clGetInfo/CMakeLists.txt b/test_conformance/images/clGetInfo/CMakeLists.txt
new file mode 100644
index 00000000..df5f781a
--- /dev/null
+++ b/test_conformance/images/clGetInfo/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(MODULE_NAME CL_GET_INFO)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_1D_2D_array.cpp
+    test_1D.cpp
+    test_2D.cpp
+    test_loops.cpp
+    test_3D.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/threadTesting.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/imageHelpers.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/typeWrappers.cpp
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/images/clGetInfo/Jamfile b/test_conformance/images/clGetInfo/Jamfile
new file mode 100644
index 00000000..9e8ac8d3
--- /dev/null
+++ b/test_conformance/images/clGetInfo/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_cl_get_info
+    : main.cpp
+      test_2D.cpp
+      test_3D.cpp
+      test_loops.cpp
+    ;
+
+install dist
+    : test_cl_get_info
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/clGetInfo
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/clGetInfo
+    ;
diff --git a/test_conformance/images/clGetInfo/Makefile b/test_conformance/images/clGetInfo/Makefile
new file mode 100644
index 00000000..eb8b3cd0
--- /dev/null
+++ b/test_conformance/images/clGetInfo/Makefile
@@ -0,0 +1,50 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		test_1D.cpp \
+		test_2D.cpp \
+		test_1D_2D_array.cpp \
+		test_loops.cpp \
+		test_3D.cpp \
+		../../../test_common/harness/errorHelpers.c \
+		../../../test_common/harness/threadTesting.c \
+		../../../test_common/harness/kernelHelpers.c \
+		../../../test_common/harness/imageHelpers.cpp \
+		../../../test_common/harness/conversions.c \
+		../../../test_common/harness/testHarness.c \
+		../../../test_common/harness/mt19937.c \
+		../../../test_common/harness/typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_cl_get_info
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/images/clGetInfo/main.cpp b/test_conformance/images/clGetInfo/main.cpp
new file mode 100644
index 00000000..d61766f1
--- /dev/null
+++ b/test_conformance/images/clGetInfo/main.cpp
@@ -0,0 +1,287 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+
+bool            gDebugTrace = false, gTestSmallImages = false, gTestMaxImages = false, gTestRounding = false;
+int                gTypesToTest = 0;
+cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
+cl_device_type    gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_command_queue queue;
+cl_context context;
+
+extern int test_image_set( cl_device_id device, cl_mem_object_type image_type );
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [debug_trace] [1D|2D|3D|1Darray|2Darray] [small_images|max_images] [randomize]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging (default no debug info)\n" );
+    log_info( "\n" );
+    log_info( "\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes (default test random sizes)\n" );
+  log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128 (default test random sizes)\n" );
+    log_info( "\n" );
+  log_info( "\trandomize - Seed random number generator (default do not seed random number generator)\n" );
+}
+
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id  platform;
+    cl_device_id       device;
+    cl_channel_type chanType;
+    char            str[ 128 ];
+    bool            test3DImages = true;
+    bool            randomize = false;
+    int       testMethods = 0;
+
+    test_start();
+
+    checkDeviceTypeOverride( &gDeviceType );
+
+    // Parse arguments
+    for( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+        else if( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+
+        else if( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+
+        else if( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+
+        else if( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+        else if( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+        else if( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+        else if( strcmp( str, "1Darray" ) == 0 )
+            testMethods |= k1DArray;
+        else if( strcmp( str, "2Darray" ) == 0 )
+            testMethods |= k2DArray;
+
+        else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+        else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+    else
+    {
+      log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+      return -1;
+    }
+
+  }
+
+  if ( testMethods == 0 )
+    testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
+
+    // Seed the random # generators
+  if( randomize )
+  {
+    gRandomSeed = (cl_uint) time( NULL );
+    log_info( "Random seed: %u.\n", gRandomSeed );
+    gReSeed = 1;
+  }
+
+    int error;
+    // Get our platform
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if( error )
+    {
+        print_error( error, "Unable to get platform" );
+        test_finish();
+        return -1;
+    }
+
+  // Get our device
+  unsigned int num_devices;
+  error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices);
+  if( error )
+  {
+    print_error( error, "Unable to get number of devices" );
+    test_finish();
+    return -1;
+  }
+
+  uint32_t gDeviceIndex = 0;
+  const char* device_index_env = getenv("CL_DEVICE_INDEX");
+  if (device_index_env) {
+    if (device_index_env) {
+        gDeviceIndex = atoi(device_index_env);
+    }
+
+    if (gDeviceIndex >= num_devices) {
+      vlog("Specified CL_DEVICE_INDEX=%d out of range, using index 0.\n", gDeviceIndex);
+      gDeviceIndex = 0;
+    }
+  }
+
+  cl_device_id *gDeviceList = (cl_device_id *)malloc( num_devices * sizeof( cl_device_id ) );
+  error = clGetDeviceIDs(platform, gDeviceType, num_devices, gDeviceList, NULL);
+  if( error )
+  {
+    print_error( error, "Unable to get devices" );
+    free( gDeviceList );
+    test_finish();
+    return -1;
+  }
+
+  device = gDeviceList[gDeviceIndex];
+  free( gDeviceList );
+
+    log_info( "Using " );
+    if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+  // Check for image support
+  if(checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+    log_info("Device does not support images. Skipping test.\n");
+    test_finish();
+    return 0;
+  }
+
+  // Check for 3D image support
+  {
+    size_t max_height, max_depth, max_width;
+    max_height = max_depth = max_width = -1L;
+
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL ) ))
+    {   print_error( error, "FAILURE: Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" ); test_finish(); return -1; }
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL ) ))
+    {   print_error( error, "FAILURE: Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT"); test_finish(); return -1; }
+    if( (error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( max_depth ), &max_depth, NULL ) ))
+    {   print_error( error, "FAILURE: Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" ); test_finish(); return -1; }
+
+    if( 0 == (max_height | max_depth | max_width) )
+    {
+      char deviceProfile[128];
+      error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( deviceProfile ), deviceProfile, NULL );
+      if( error )
+      {
+        print_error( error, "Unable to get device profile" );
+        test_finish();
+        return -1;
+      }
+
+      if( strcmp( deviceProfile, "EMBEDDED_PROFILE" ) )
+      {
+        log_error( "FAILURE: non-Embedded device with image support does not support 3D images." );
+        test_finish();
+        return -1;
+      }
+
+      test3DImages = false;
+    }
+  }
+
+    // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+
+    // Create a queue against the context
+    queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
+  if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+
+    if( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+
+    // Run the test now
+    int ret = 0;
+  if (testMethods & k1D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D );
+  if (testMethods & k2D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D );
+  if (test3DImages && (testMethods & k3D))
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
+  if (testMethods & k1DArray)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+  if (testMethods & k2DArray)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+
+  if (gTestFailure == 0) {
+    if (gTestCount > 1)
+      log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+    else
+      log_info("PASSED test.\n");
+  } else if (gTestFailure > 0) {
+    if (gTestCount > 1)
+      log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+    else
+      log_error("FAILED test.\n");
+  }
+
+    // Clean up
+  clReleaseCommandQueue(queue);
+  clReleaseContext(context);
+    test_finish();
+
+  if (gTestFailure > 0)
+    return gTestFailure;
+
+    return ret;
+}
diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp
new file mode 100644
index 00000000..8a484804
--- /dev/null
+++ b/test_conformance/images/clGetInfo/test_1D.cpp
@@ -0,0 +1,138 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern int test_get_image_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
+
+
+int test_get_image_info_1D( cl_device_id device, cl_image_format *format, cl_mem_flags flags )
+{
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    cl_mem_flags all_host_ptr_flags[5] = {
+        flags,
+        CL_MEM_ALLOC_HOST_PTR | flags,
+        CL_MEM_COPY_HOST_PTR  | flags,
+        CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_USE_HOST_PTR | flags
+    };
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 1D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+                if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info( "Testing %d x 1\n", (int)sizes[ idx ][ 0 ]);
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at max size %d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+                if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                        return -1;
+                }
+            }
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size = (cl_ulong)imageInfo.rowPitch * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d (flags[%u] 0x%x pitch %d) out of %d\n", (int)imageInfo.width, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch, (int)maxWidth );
+                if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                        return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
new file mode 100644
index 00000000..132a9c32
--- /dev/null
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
@@ -0,0 +1,279 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern int test_get_image_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
+
+int test_get_image_info_1D_array( cl_device_id device, cl_image_format *format, cl_mem_flags flags )
+{
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+
+    cl_mem_flags all_host_ptr_flags[5] = {
+        flags,
+        CL_MEM_ALLOC_HOST_PTR | flags,
+        CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_USE_HOST_PTR | flags
+    };
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 1D array size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, (int)imageInfo.arraySize, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                        return -1;
+                    if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                        if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                            return -1;
+                    }
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize,
+                                                                CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes [ idx] [ 2 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.rowPitch;
+
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+              if( gDebugTrace )
+                log_info( "   at max size %d,%d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, (int)imageInfo.arraySize, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+              if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                return -1;
+              if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                  return -1;
+              }
+            }
+      }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                imageInfo.slicePitch = imageInfo.rowPitch;
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d (flags[%u] 0x%x pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+                if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                        return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+int test_get_image_info_2D_array( cl_device_id device, cl_image_format *format, cl_mem_flags flags )
+{
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    cl_mem_flags all_host_ptr_flags[5] = {
+        flags,
+        CL_MEM_ALLOC_HOST_PTR | flags,
+        CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_USE_HOST_PTR | flags
+    };
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 1D array size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+                    {
+                        if( gDebugTrace )
+                            log_info( "   at size %d,%d,%d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+                        if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                            return -1;
+                        if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                            if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                                return -1;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+              if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+              if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                return -1;
+              if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                  return -1;
+              }
+            }
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d,%d (flags[%u] 0x%x pitch %d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+                if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                        return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
new file mode 100644
index 00000000..20933581
--- /dev/null
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -0,0 +1,385 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+extern cl_command_queue queue;
+extern cl_context context;
+
+
+int test_get_image_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch )
+{
+    int error;
+    clMemWrapper image;
+    cl_image_desc imageDesc;
+    void *host_ptr = NULL;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) {
+        host_ptr = (char *)imageValues;
+    }
+
+    memset(&imageDesc, 0x0, sizeof(cl_image_desc));
+    imageDesc.image_type = imageInfo->type;
+    imageDesc.image_width = imageInfo->width;
+    imageDesc.image_height = imageInfo->height;
+    imageDesc.image_depth = imageInfo->depth;
+    imageDesc.image_array_size = imageInfo->arraySize;
+    imageDesc.image_row_pitch = row_pitch;
+    imageDesc.image_slice_pitch = slice_pitch;
+
+    // Construct testing source
+    // Note: for now, just reset the pitches, since they only can actually be different
+    // if we use CL_MEM_USE_HOST_PTR or CL_MEM_COPY_HOST_PTR
+    imageInfo->rowPitch = imageInfo->width * get_pixel_size( imageInfo->format );
+    imageInfo->slicePitch = 0;
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            if ( gDebugTrace )
+                log_info( " - Creating 1D image %d with flags=0x%lx row_pitch=%d slice_pitch=%d host_ptr=%p...\n", (int)imageInfo->width, (unsigned long)flags, (int)row_pitch, (int)slice_pitch, host_ptr );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            if ( gDebugTrace )
+                log_info( " - Creating 2D image %d by %d with flags=0x%lx row_pitch=%d slice_pitch=%d host_ptr=%p...\n", (int)imageInfo->width, (int)imageInfo->height, (unsigned long)flags, (int)row_pitch, (int)slice_pitch, host_ptr );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            imageInfo->slicePitch = imageInfo->rowPitch * imageInfo->height;
+            if ( gDebugTrace )
+                log_info( " - Creating 3D image %d by %d by %d with flags=0x%lx row_pitch=%d slice_pitch=%d host_ptr=%p...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (unsigned long)flags, (int)row_pitch, (int)slice_pitch, host_ptr );
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            imageInfo->slicePitch = imageInfo->rowPitch;
+            if ( gDebugTrace )
+                log_info( " - Creating 1D image array %d by %d with flags=0x%lx row_pitch=%d slice_pitch=%d host_ptr=%p...\n", (int)imageInfo->width, (int)imageInfo->arraySize, (unsigned long)flags, (int)row_pitch, (int)slice_pitch, host_ptr );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            imageInfo->slicePitch = imageInfo->rowPitch * imageInfo->height;
+            if ( gDebugTrace )
+                log_info( " - Creating 2D image array %d by %d by %d with flags=0x%lx row_pitch=%d slice_pitch=%d host_ptr=%p...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (unsigned long)flags, (int)row_pitch, (int)slice_pitch, host_ptr );
+            break;
+    }
+
+    image = clCreateImage(context, flags, imageInfo->format, &imageDesc, host_ptr, &error);
+    if( image == NULL )
+    {
+        switch (imageInfo->type)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) );
+                break;
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) );
+                break;
+        }
+        return -1;
+    }
+
+    // Get info of the image and verify each item is correct
+    cl_image_format outFormat;
+    error = clGetImageInfo( image, CL_IMAGE_FORMAT, sizeof( outFormat ), &outFormat, NULL );
+    test_error( error, "Unable to get image info (format)" );
+    if( outFormat.image_channel_order != imageInfo->format->image_channel_order ||
+        outFormat.image_channel_data_type != imageInfo->format->image_channel_data_type )
+    {
+        log_error( "ERROR: image format returned is invalid! (expected %s:%s, got %s:%s (%d:%d))\n",
+                    GetChannelOrderName( imageInfo->format->image_channel_order ), GetChannelTypeName( imageInfo->format->image_channel_data_type ),
+                      GetChannelOrderName( outFormat.image_channel_order ), GetChannelTypeName( outFormat.image_channel_data_type ),
+                       (int)outFormat.image_channel_order, (int)outFormat.image_channel_data_type );
+        return 1;
+    }
+
+    size_t outElementSize;
+    error = clGetImageInfo( image, CL_IMAGE_ELEMENT_SIZE, sizeof( outElementSize ), &outElementSize, NULL );
+    test_error( error, "Unable to get image info (element size)" );
+    if( outElementSize != get_pixel_size( imageInfo->format ) )
+    {
+        log_error( "ERROR: image element size returned is invalid! (expected %d, got %d)\n",
+                  (int)get_pixel_size( imageInfo->format ), (int)outElementSize );
+        return 1;
+    }
+
+    size_t outRowPitch;
+    error = clGetImageInfo( image, CL_IMAGE_ROW_PITCH, sizeof( outRowPitch ), &outRowPitch, NULL );
+    test_error( error, "Unable to get image info (row pitch)" );
+
+  size_t outSlicePitch;
+  error = clGetImageInfo( image, CL_IMAGE_SLICE_PITCH, sizeof( outSlicePitch ), &outSlicePitch, NULL );
+  test_error( error, "Unable to get image info (row pitch)" );
+    if( imageInfo->type == CL_MEM_OBJECT_IMAGE1D && outSlicePitch != 0 )
+    {
+        log_error( "ERROR: slice pitch returned is invalid! (expected %d, got %d)\n",
+              (int)0, (int)outSlicePitch );
+        return 1;
+    }
+
+    size_t outWidth;
+    error = clGetImageInfo( image, CL_IMAGE_WIDTH, sizeof( outWidth ), &outWidth, NULL );
+    test_error( error, "Unable to get image info (width)" );
+    if( outWidth != imageInfo->width )
+    {
+        log_error( "ERROR: image width returned is invalid! (expected %d, got %d)\n",
+                  (int)imageInfo->width, (int)outWidth );
+        return 1;
+    }
+
+  size_t required_height;
+  switch (imageInfo->type)
+  {
+    case CL_MEM_OBJECT_IMAGE1D:
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+      required_height = 0;
+      break;
+    case CL_MEM_OBJECT_IMAGE2D:
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE3D:
+      required_height = imageInfo->height;
+      break;
+  }
+
+    size_t outHeight;
+  error = clGetImageInfo( image, CL_IMAGE_HEIGHT, sizeof( outHeight ), &outHeight, NULL );
+  test_error( error, "Unable to get image info (height)" );
+  if( outHeight != required_height )
+  {
+    log_error( "ERROR: image height returned is invalid! (expected %d, got %d)\n",
+              (int)required_height, (int)outHeight );
+    return 1;
+  }
+
+  size_t required_depth;
+  switch (imageInfo->type)
+  {
+    case CL_MEM_OBJECT_IMAGE1D:
+    case CL_MEM_OBJECT_IMAGE2D:
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      required_depth = 0;
+      break;
+    case CL_MEM_OBJECT_IMAGE3D:
+      required_depth = imageInfo->depth;
+      break;
+  }
+
+  size_t outDepth;
+  error = clGetImageInfo( image, CL_IMAGE_DEPTH, sizeof( outDepth ), &outDepth, NULL );
+  test_error( error, "Unable to get image info (depth)" );
+  if( outDepth != required_depth )
+  {
+    log_error( "ERROR: image depth returned is invalid! (expected %d, got %d)\n",
+        (int)required_depth, (int)outDepth );
+    return 1;
+  }
+
+  size_t required_array_size;
+  switch (imageInfo->type)
+  {
+    case CL_MEM_OBJECT_IMAGE1D:
+    case CL_MEM_OBJECT_IMAGE2D:
+    case CL_MEM_OBJECT_IMAGE3D:
+      required_array_size = 0;
+      break;
+    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+      required_array_size = imageInfo->arraySize;
+      break;
+  }
+
+  size_t outArraySize;
+  error = clGetImageInfo( image, CL_IMAGE_ARRAY_SIZE, sizeof( outArraySize ), &outArraySize, NULL );
+  test_error( error, "Unable to get image info (array size)" );
+  if( outArraySize != required_array_size )
+  {
+      log_error( "ERROR: image array size returned is invalid! (expected %d, got %d)\n",
+                (int)required_array_size, (int)outArraySize );
+      return 1;
+  }
+
+  cl_mem outBuffer;
+  error = clGetImageInfo( image, CL_IMAGE_BUFFER, sizeof( outBuffer ), &outBuffer, NULL );
+  test_error( error, "Unable to get image info (buffer)" );
+  if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+      if (outBuffer != imageInfo->buffer) {
+          log_error( "ERROR: cl_mem returned is invalid! (expected %p, got %p)\n",
+                    imageInfo->buffer, outBuffer );
+          return 1;
+      }
+  } else {
+      if (outBuffer != (cl_mem)NULL) {
+          log_error( "ERROR: cl_mem returned is invalid! (expected %p, got %p)\n",
+                    (cl_mem)NULL, outBuffer );
+          return 1;
+      }
+  }
+
+    cl_uint numMipLevels;
+    error = clGetImageInfo( image, CL_IMAGE_NUM_MIP_LEVELS, sizeof( numMipLevels ), &numMipLevels, NULL );
+    test_error( error, "Unable to get image info (num mip levels)" );
+    if( numMipLevels != 0 )
+    {
+        log_error( "ERROR: image num_mip_levels returned is invalid! (expected %d, got %d)\n",
+                  (int)0, (int)numMipLevels );
+        return 1;
+    }
+
+    cl_uint numSamples;
+    error = clGetImageInfo( image, CL_IMAGE_NUM_SAMPLES, sizeof( numSamples ), &numSamples, NULL );
+    test_error( error, "Unable to get image info (num samples)" );
+    if( numSamples != 0 )
+    {
+        log_error( "ERROR: image num_samples returned is invalid! (expected %d, got %d)\n",
+                  (int)0, (int)numSamples );
+        return 1;
+    }
+
+    return 0;
+}
+
+int test_get_image_info_2D( cl_device_id device, cl_image_format *format, cl_mem_flags flags )
+{
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    cl_mem_flags all_host_ptr_flags[5] = {
+        flags,
+        CL_MEM_ALLOC_HOST_PTR | flags,
+        CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_USE_HOST_PTR | flags
+    };
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D width or max image 3D height or max memory allocation size or global memory size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, (int)imageInfo.height, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                        return -1;
+                    if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                        if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                            return -1;
+                    }
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at max size %d,%d (flags[%u] 0x%x pitch %d)\n", (int)imageInfo.width, (int)imageInfo.height, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch );
+                if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                        return -1;
+        }
+            }
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size = (cl_ulong)imageInfo.rowPitch * (cl_ulong)imageInfo.height * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d (flags[%u] 0x%x pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, j, (unsigned int) all_host_ptr_flags[j], (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+                if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, 0 ) )
+                        return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp
new file mode 100644
index 00000000..24f7021d
--- /dev/null
+++ b/test_conformance/images/clGetInfo/test_3D.cpp
@@ -0,0 +1,158 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern int test_get_image_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d, cl_mem_flags flags, size_t row_pitch, size_t slice_pitch );
+
+int test_get_image_info_3D( cl_device_id device, cl_image_format *format, cl_mem_flags flags )
+{
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    cl_mem_flags all_host_ptr_flags[] = {
+        flags,
+        CL_MEM_ALLOC_HOST_PTR | flags,
+        CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | flags,
+        CL_MEM_USE_HOST_PTR | flags
+    };
+
+    memset(&imageInfo, 0x0, sizeof(image_descriptor));
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+                    {
+                        if( gDebugTrace )
+                            log_info( "   at size %d,%d,%d (flags[%u] 0x%lx pitch %d,%d)\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, j, (unsigned long)all_host_ptr_flags[j], (int)imageInfo.rowPitch, (int)imageInfo.slicePitch );
+                        if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                            return -1;
+                        if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                            if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, imageInfo.slicePitch ) )
+                                return -1;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at max size %d,%d,%d (flags[%u] 0x%lx pitch %d,%d)\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ], j, (unsigned long)all_host_ptr_flags[j], (int)imageInfo.rowPitch, (int)imageInfo.slicePitch );
+                if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, imageInfo.slicePitch ) )
+                        return -1;
+                }
+            }
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            for (unsigned int j=0; j < sizeof(all_host_ptr_flags)/sizeof(cl_mem_flags); j++)
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d,%d (flags[%u] 0x%lx pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, j, (unsigned long) all_host_ptr_flags[i], (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+                if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], 0, 0 ) )
+                    return -1;
+                if (all_host_ptr_flags[j] & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { // skip test when host_ptr is NULL
+                    if ( test_get_image_info_single( device, &imageInfo, seed, all_host_ptr_flags[j], imageInfo.rowPitch, imageInfo.slicePitch ) )
+                        return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clGetInfo/test_loops.cpp b/test_conformance/images/clGetInfo/test_loops.cpp
new file mode 100644
index 00000000..2c9e400f
--- /dev/null
+++ b/test_conformance/images/clGetInfo/test_loops.cpp
@@ -0,0 +1,228 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_command_queue queue;
+extern cl_context context;
+
+
+extern bool gDebugTrace;
+
+extern int test_get_image_info_1D( cl_device_id device, cl_image_format *format, cl_mem_flags flags );
+extern int test_get_image_info_2D( cl_device_id device, cl_image_format *format, cl_mem_flags flags );
+extern int test_get_image_info_3D( cl_device_id device, cl_image_format *format, cl_mem_flags flags );
+extern int test_get_image_info_1D_array( cl_device_id device, cl_image_format *format, cl_mem_flags flags );
+extern int test_get_image_info_2D_array( cl_device_id device, cl_image_format *format, cl_mem_flags flags );
+
+static const char *str_1d_image = "1D";
+static const char *str_2d_image = "2D";
+static const char *str_3d_image = "3D";
+static const char *str_1d_image_array = "1D array";
+static const char *str_2d_image_array = "2D array";
+
+static const char *convert_image_type_to_string(cl_mem_object_type image_type)
+{
+    const char *p;
+    switch (image_type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            p = str_1d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            p = str_2d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            p = str_3d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            p = str_1d_image_array;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            p = str_2d_image_array;
+            break;
+    }
+    return p;
+}
+
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+
+        // Have we already discarded this via the command line?
+        if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // We don't filter by channel type
+        if( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+
+        // Is the format supported?
+        int i;
+        for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+        {
+            if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+            {
+                numSupported++;
+                break;
+            }
+        }
+        if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+        {
+            // Format is NOT supported, so mark it as such
+            filterFlags[ j ] = true;
+        }
+    }
+    return numSupported;
+}
+
+int get_format_list( cl_device_id device, cl_mem_object_type image_type, cl_image_format * &outFormatList,
+                                                         unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    int error;
+
+    cl_image_format tempList[ 128 ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                       image_type, 128, tempList, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                        image_type, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+    return 0;
+}
+
+int test_image_type( cl_device_id device, cl_mem_object_type image_type, cl_mem_flags flags )
+{
+    log_info( "Running %s %s-only tests...\n", convert_image_type_to_string(image_type), flags == CL_MEM_READ_ONLY ? "read" : "write" );
+
+    int ret = 0;
+
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+    if ( get_format_list( device, image_type, formatList, numFormats, flags ) )
+        return -1;
+
+    BufferOwningPtr<cl_image_format> formatListBuf(formatList);
+
+    if ((image_type == CL_MEM_OBJECT_IMAGE3D) && (flags != CL_MEM_READ_ONLY)) {
+        log_info("No requirement for 3D write in OpenCL 1.2. Not checking formats.\n");
+    } else {
+        log_info("Checking for required OpenCL 1.2 formats.\n");
+        if (check_minimum_supported( formatList, numFormats, flags ) == false) {
+            ret++;
+        } else {
+            log_info("All required formats present.\n");
+        }
+    }
+
+    filterFlags = new bool[ numFormats ];
+    BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
+
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
+
+    // Run the format list
+    for( unsigned int i = 0; i < numFormats; i++ )
+    {
+        int test_return = 0;
+        if( filterFlags[i] )
+        {
+            log_info( "NOT RUNNING: " );
+            print_header( &formatList[ i ], false );
+            continue;
+        }
+
+        print_header( &formatList[ i ], false );
+
+        gTestCount++;
+
+        switch (image_type) {
+          case CL_MEM_OBJECT_IMAGE1D:
+            test_return = test_get_image_info_1D( device, &formatList[ i ], flags );
+            break;
+          case CL_MEM_OBJECT_IMAGE2D:
+            test_return = test_get_image_info_2D( device, &formatList[ i ], flags );
+            break;
+          case CL_MEM_OBJECT_IMAGE3D:
+            test_return = test_get_image_info_3D( device, &formatList[ i ], flags );
+            break;
+          case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            test_return = test_get_image_info_1D_array( device, &formatList[ i ], flags );
+            break;
+          case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            test_return = test_get_image_info_2D_array( device, &formatList[ i ], flags );
+            break;
+        }
+
+        if (test_return) {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_header( &formatList[ i ], true );
+            log_info( "\n" );
+        }
+
+        ret += test_return;
+    }
+
+    return ret;
+}
+
+int test_image_set( cl_device_id device, cl_mem_object_type image_type )
+{
+    int ret = 0;
+
+    ret += test_image_type( device, image_type, CL_MEM_READ_ONLY );
+    ret += test_image_type( device, image_type, CL_MEM_WRITE_ONLY );
+
+    return ret;
+}
+
+
+
+
diff --git a/test_conformance/images/clReadWriteImage/CMakeLists.txt b/test_conformance/images/clReadWriteImage/CMakeLists.txt
new file mode 100644
index 00000000..b56b1e34
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(MODULE_NAME CL_READ_WRITE_IMAGES)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_read_1D.cpp
+    test_read_1D_array.cpp
+    test_read_2D.cpp
+    test_read_2D_array.cpp
+    test_loops.cpp
+    test_read_3D.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/threadTesting.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/imageHelpers.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/typeWrappers.cpp
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
+
diff --git a/test_conformance/images/clReadWriteImage/Jamfile b/test_conformance/images/clReadWriteImage/Jamfile
new file mode 100644
index 00000000..70fa42f1
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_cl_read_write_images
+    : main.cpp
+      test_read_2D.cpp
+      test_read_3D.cpp
+      test_loops.cpp
+    ;
+
+install dist
+    : test_cl_read_write_images
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/clReadWriteImage
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/clReadWriteImage
+    ;
diff --git a/test_conformance/images/clReadWriteImage/Makefile b/test_conformance/images/clReadWriteImage/Makefile
new file mode 100644
index 00000000..458dcef1
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/Makefile
@@ -0,0 +1,51 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		test_read_1D.cpp \
+		test_read_1D_array.cpp \
+		test_read_2D.cpp \
+		test_read_2D_array.cpp \
+		test_loops.cpp \
+		test_read_3D.cpp \
+		../../../test_common/harness/errorHelpers.c \
+		../../../test_common/harness/threadTesting.c \
+		../../../test_common/harness/kernelHelpers.c \
+		../../../test_common/harness/imageHelpers.cpp \
+		../../../test_common/harness/conversions.c \
+		../../../test_common/harness/testHarness.c \
+		../../../test_common/harness/mt19937.c \
+		  ../../../test_common/harness/typeWrappers.cpp
+
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_cl_read_write_images
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/images/clReadWriteImage/main.cpp b/test_conformance/images/clReadWriteImage/main.cpp
new file mode 100644
index 00000000..afee9c4c
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/main.cpp
@@ -0,0 +1,265 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+
+bool            gDebugTrace = false, gTestSmallImages = false, gTestMaxImages = false, gUseRamp = false, gTestRounding = false, gTestMipmaps = false;
+int                gTypesToTest = 0;
+cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
+bool            gEnablePitch = false;
+cl_device_type    gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_command_queue queue;
+cl_context context;
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [debug_trace] [small_images]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging\n" );
+    log_info( "\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
+    log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
+    log_info( "\trounding - Runs every format through a single image filled with every possible value for that image format, to verify rounding works properly\n" );
+    log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "\tuse_ramp - Instead of random data, uses images filled with ramps (and 0xff on any padding pixels) to ease debugging\n" );
+    log_info( "\ttest_mipmaps - Test mipmapped images\n" );
+}
+
+
+extern int test_image_set( cl_device_id device, cl_mem_object_type image_type );
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id platform;
+    cl_device_id device;
+    cl_channel_type chanType;
+    char str[ 128 ];
+    bool randomize = false;
+  int testMethods = 0;
+
+  test_start();
+
+    checkDeviceTypeOverride( &gDeviceType );
+
+    // Parse arguments
+    for( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+        else if( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+
+        else if( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+        else if( strcmp( str, "use_pitches" ) == 0 )
+            gEnablePitch = true;
+        else if( strcmp( str, "use_ramps" ) == 0 )
+            gUseRamp = true;
+        else if( strcmp(str, "test_mipmaps") == 0 ) {
+            gTestMipmaps = true;
+            // Don't test pitches with mipmaps right now.
+            gEnablePitch = false;
+        }
+        else if( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+
+        else if( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+        else if( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+        else if( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+        else if( strcmp( str, "1Darray" ) == 0 )
+            testMethods |= k1DArray;
+        else if( strcmp( str, "2Darray" ) == 0 )
+            testMethods |= k2DArray;
+
+        else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+        else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+    else
+    {
+      log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+      return -1;
+    }
+    }
+
+  if (testMethods == 0)
+    testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
+
+    // Seed the random # generators
+  if( randomize )
+  {
+    gRandomSeed = (cl_uint) time( NULL );
+    log_info( "Random seed: %u.\n", gRandomSeed );
+    gReSeed = 1;
+  }
+
+  int error;
+  // Get our platform
+  error = clGetPlatformIDs(1, &platform, NULL);
+  if( error )
+  {
+    print_error( error, "Unable to get platform" );
+    test_finish();
+    return -1;
+  }
+
+  // Get our device
+  unsigned int num_devices;
+  error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices);
+  if( error )
+  {
+    print_error( error, "Unable to get number of devices" );
+    test_finish();
+    return -1;
+  }
+
+  uint32_t gDeviceIndex = 0;
+  const char* device_index_env = getenv("CL_DEVICE_INDEX");
+  if (device_index_env) {
+    if (device_index_env) {
+      gDeviceIndex = atoi(device_index_env);
+    }
+
+    if (gDeviceIndex >= num_devices) {
+      vlog("Specified CL_DEVICE_INDEX=%d out of range, using index 0.\n", gDeviceIndex);
+      gDeviceIndex = 0;
+    }
+  }
+
+  cl_device_id *gDeviceList = (cl_device_id *)malloc( num_devices * sizeof( cl_device_id ) );
+  error = clGetDeviceIDs(platform, gDeviceType, num_devices, gDeviceList, NULL);
+  if( error )
+  {
+    print_error( error, "Unable to get devices" );
+    free( gDeviceList );
+    test_finish();
+    return -1;
+  }
+
+  device = gDeviceList[gDeviceIndex];
+  free( gDeviceList );
+
+  log_info( "Using " );
+  if( printDeviceHeader( device ) != CL_SUCCESS )
+  {
+    test_finish();
+    return -1;
+  }
+
+  // Check for image support
+  if(checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+    log_info("Device does not support images. Skipping test.\n");
+    test_finish();
+    return 0;
+  }
+
+    // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+
+    // Create a queue against the context
+    queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
+  if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+
+    if( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+
+    // Run the test now
+    int ret = 0;
+  if (testMethods & k1D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D );
+  if (testMethods & k2D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D );
+  if (testMethods & k3D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
+  if (testMethods & k1DArray)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+  if (testMethods & k2DArray)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+
+  error = clFinish(queue);
+  if (error)
+    print_error(error, "clFinish failed.");
+
+  if (gTestFailure == 0) {
+    if (gTestCount > 1)
+      log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+    else
+      log_info("PASSED test.\n");
+  } else if (gTestFailure > 0) {
+    if (gTestCount > 1)
+      log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+    else
+      log_error("FAILED test.\n");
+  }
+
+    // Clean up
+  clReleaseCommandQueue(queue);
+  clReleaseContext(context);
+    test_finish();
+
+  if (gTestFailure > 0)
+    return gTestFailure;
+
+    return ret;
+}
diff --git a/test_conformance/images/clReadWriteImage/test_loops.cpp b/test_conformance/images/clReadWriteImage/test_loops.cpp
new file mode 100644
index 00000000..ab2cc5e3
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/test_loops.cpp
@@ -0,0 +1,227 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_command_queue queue;
+extern cl_context context;
+
+
+extern bool gDebugTrace;
+extern bool gTestMipmaps;
+
+extern int test_read_image_set_1D( cl_device_id device, cl_image_format *format );
+extern int test_read_image_set_2D( cl_device_id device, cl_image_format *format );
+extern int test_read_image_set_3D( cl_device_id device, cl_image_format *format );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format );
+
+static const char *str_1d_image = "1D";
+static const char *str_2d_image = "2D";
+static const char *str_3d_image = "3D";
+static const char *str_1d_image_array = "1D array";
+static const char *str_2d_image_array = "2D array";
+
+static const char *convert_image_type_to_string(cl_mem_object_type imageType)
+{
+    const char *p;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            p = str_1d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            p = str_2d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            p = str_3d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            p = str_1d_image_array;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            p = str_2d_image_array;
+            break;
+    }
+    return p;
+}
+
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+
+        // Have we already discarded this via the command line?
+        if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // We don't filter by channel type
+        if( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+
+        // Is the format supported?
+        int i;
+        for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+        {
+            if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+            {
+                numSupported++;
+                break;
+            }
+        }
+        if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+        {
+            // Format is NOT supported, so mark it as such
+            filterFlags[ j ] = true;
+        }
+    }
+    return numSupported;
+}
+
+int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    int error;
+
+    cl_image_format tempList[ 128 ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                       imageType, 128, tempList, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                        imageType, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+    return 0;
+}
+
+int test_image_type( cl_device_id device, cl_mem_object_type imageType, cl_mem_flags flags )
+{
+  log_info( "Running %s %s %s-only tests...\n", gTestMipmaps?"mipmapped":"",convert_image_type_to_string(imageType), flags == CL_MEM_READ_ONLY ? "read" : "write" );
+
+    int ret = 0;
+
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+  if ( gTestMipmaps )
+  {
+    if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
+    {
+      log_info( "-----------------------------------------------------\n" );
+      log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
+      log_info( "-----------------------------------------------------\n\n" );
+      return 0;
+    }
+  }
+
+    if( get_format_list( device, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
+
+    // Run the format list
+    for( unsigned int i = 0; i < numFormats; i++ )
+    {
+        int test_return = 0;
+        if( filterFlags[i] )
+        {
+            log_info( "NOT RUNNING: " );
+            print_header( &formatList[ i ], false );
+            continue;
+        }
+
+        print_header( &formatList[ i ], false );
+
+        gTestCount++;
+
+        switch (imageType) {
+            case CL_MEM_OBJECT_IMAGE1D:
+                test_return = test_read_image_set_1D( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                test_return = test_read_image_set_2D( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                test_return = test_read_image_set_3D( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                test_return = test_read_image_set_1D_array( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                test_return = test_read_image_set_2D_array( device, &formatList[ i ] );
+                break;
+        }
+
+        if (test_return) {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_header( &formatList[ i ], true );
+            log_info( "\n" );
+        }
+
+        ret += test_return;
+    }
+
+    delete filterFlags;
+    delete formatList;
+
+    return ret;
+}
+
+int test_image_set( cl_device_id device, cl_mem_object_type imageType )
+{
+    int ret = 0;
+
+    ret += test_image_type( device, imageType, CL_MEM_READ_ONLY );
+    ret += test_image_type( device, imageType, CL_MEM_WRITE_ONLY );
+
+    return ret;
+}
+
+
+
+
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
new file mode 100644
index 00000000..407f27fe
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -0,0 +1,273 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+
+int test_read_image_1D( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error;
+
+    clMemWrapper image;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+  {
+    log_info( " - Creating %s 1D image %d...\n", gTestMipmaps?"mipmapped":"", (int)imageInfo->width );
+    log_info( " with %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels );
+  }
+
+    // Construct testing sources
+  if(!gTestMipmaps)
+  {
+    image = create_image_1d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, 0, NULL, NULL, &error );
+    if( image == NULL )
+    {
+      log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) );
+      return -1;
+    }
+  }
+  else
+  {
+    cl_image_desc image_desc = {0};
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+    image_desc.image_width = imageInfo->width;
+    image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+    image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
+    if( error != CL_SUCCESS )
+    {
+      log_error( "ERROR: Unable to create %d level mipmapped 1D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+      return error;
+    }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Writing image...\n" );
+
+  size_t origin[ 3 ] = { 0, 0, 0 };
+  size_t region[ 3 ] = { 0, 1, 1 };
+  size_t fullImageSize;
+  if( gTestMipmaps )
+  {
+      fullImageSize = (size_t)compute_mipmapped_image_size( *imageInfo );
+  }
+  else
+  {
+      fullImageSize = imageInfo->rowPitch;
+  }
+
+  BufferOwningPtr<char> resultValues(malloc(fullImageSize));
+  size_t imgValMipLevelOffset = 0;
+
+  for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+  {
+    float lod_float = (float) lod;
+    origin[1] = lod;
+    size_t width_lod, row_pitch_lod;
+
+    width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+    row_pitch_lod = gTestMipmaps ? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+
+    region[0] = width_lod;
+
+    if ( gDebugTrace )
+      if ( gTestMipmaps) {
+        log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
+      }
+      error = clEnqueueWriteImage(queue, image, CL_FALSE,
+        origin, region, ( gEnablePitch ? row_pitch_lod : 0 ), 0,
+        (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL);
+      if (error != CL_SUCCESS) {
+        log_error( "ERROR: Unable to write to 1D image of size %d \n", (int)width_lod );
+        return -1;
+    }
+
+    // To verify, we just read the results right back and see whether they match the input
+      if( gDebugTrace )
+      {
+        log_info( " - Initing result array...\n" );
+      }
+
+      // Note: we read back without any pitch, to verify pitch actually WORKED
+      size_t scanlineSize = width_lod * get_pixel_size( imageInfo->format );
+    size_t imageSize = scanlineSize;
+    memset( resultValues, 0xff, imageSize );
+
+    if( gDebugTrace )
+        log_info( " - Reading results...\n" );
+
+    error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, resultValues, 0, NULL, NULL );
+    test_error( error, "Unable to read image values" );
+
+    // Verify scanline by scanline, since the pitches are different
+      char *sourcePtr = (char*)imageValues + imgValMipLevelOffset;
+    char *destPtr = resultValues;
+
+    if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
+    {
+        log_error( "ERROR: Scanline did not verify for image size %d pitch %d (extra %d bytes)\n", (int)width_lod, (int)row_pitch_lod, (int)row_pitch_lod - (int)width_lod * (int)get_pixel_size( imageInfo->format ) );
+
+        log_error( "First few values: \n" );
+        log_error( " Input: " );
+        uint32_t *s = (uint32_t *)sourcePtr;
+        uint32_t *d = (uint32_t *)destPtr;
+        for( int q = 0; q < 12; q++ )
+            log_error( "%08x ", s[ q ] );
+        log_error( "\nOutput: " );
+        for( int q = 0; q < 12; q++ )
+            log_error( "%08x ", d[ q ] );
+        log_error( "\n" );
+
+        int outX;
+        int offset = (int)get_pixel_size( imageInfo->format ) * (int)( width_lod - 16 );
+        if( offset < 0 )
+            offset = 0;
+        int foundCount = debug_find_vector_in_image( (char*)imageValues + imgValMipLevelOffset, imageInfo, destPtr + offset, get_pixel_size( imageInfo->format ), &outX, NULL, NULL );
+        if( foundCount > 0 )
+        {
+            int returnedOffset = ( offset / (int)get_pixel_size( imageInfo->format ) ) - outX;
+
+            if( memcmp( sourcePtr + returnedOffset * get_pixel_size( imageInfo->format ), destPtr, get_pixel_size( imageInfo->format ) * 8 ) == 0 )
+                log_error( "       Values appear to be offsetted by %d\n", returnedOffset );
+            else
+                log_error( "       Calculated offset is %d but unable to verify\n", returnedOffset );
+        }
+        else
+        {
+            log_error( "      Unable to determine offset\n" );
+        }
+        return -1;
+    }
+      imgValMipLevelOffset += width_lod * get_pixel_size( imageInfo->format );
+  }
+    return 0;
+}
+
+int test_read_image_set_1D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed  seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = imageInfo.slicePitch = 0;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+            if( gDebugTrace )
+                log_info( "   at size %d\n", (int)imageInfo.width );
+
+            int ret = test_read_image_1D( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[idx][0];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+            log_info("Testing %d\n", (int)imageInfo.width);
+            if( gDebugTrace )
+                log_info( "   at max size %d\n", (int)maxWidth );
+            if( test_read_image_1D( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+
+        if (gTestMipmaps)
+        {
+          imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+          imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+          size = compute_mipmapped_image_size( imageInfo );
+        }
+        else
+        {
+          imageInfo.rowPitch = imageInfo.width * pixelSize;
+          if( gEnablePitch )
+          {
+            size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+            imageInfo.rowPitch += extraWidth * pixelSize;
+          }
+
+          size = (size_t)imageInfo.rowPitch * 4;
+                }
+            } while(  size > maxAllocSize || ( size / 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            int ret = test_read_image_1D( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
new file mode 100644
index 00000000..b6e8ce01
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -0,0 +1,287 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+
+int test_read_image_1D_array( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error;
+
+    clMemWrapper image;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+    {
+        log_info( " - Creating %s image array of size %d by %d...\n", gTestMipmaps?"mipmapped":"", (int)imageInfo->width, (int)imageInfo->arraySize );
+        if( gTestMipmaps )
+            log_info( " with %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels );
+    }
+
+    // Construct testing sources
+    if(!gTestMipmaps)
+    {
+        image = create_image_1d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->arraySize, 0, 0, NULL, &error );
+        if( image == NULL )
+        {
+            log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) );
+            return -1;
+        }
+    }
+    else
+    {
+        cl_image_desc image_desc = {0};
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        image_desc.image_width = imageInfo->width;
+        image_desc.image_array_size = imageInfo->arraySize;
+        image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create %d level mipmapped 1D image of width %d and array size %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+    }
+    if( gDebugTrace )
+        log_info( " - Writing image...\n" );
+
+    size_t origin[ 3 ] = { 0, 0, 0 };
+    size_t region[ 3 ] = { 0, 0, 1 };
+    size_t fullImageSize;
+    if( gTestMipmaps )
+    {
+        fullImageSize = (size_t)compute_mipmapped_image_size( *imageInfo );
+    }
+    else
+    {
+        fullImageSize = imageInfo->arraySize * imageInfo->slicePitch;
+    }
+
+    size_t imgValMipLevelOffset = 0;
+    BufferOwningPtr<char> resultValues(malloc(fullImageSize));
+
+    for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        float lod_float = (float) lod;
+        size_t width_lod, row_pitch_lod, slice_pitch_lod;
+        if( gTestMipmaps )
+            origin[2] = lod;
+
+        width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+        row_pitch_lod = gTestMipmaps ? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+        slice_pitch_lod = row_pitch_lod;
+
+        region[0] = width_lod;
+        region[1] = imageInfo->arraySize;
+
+        if ( gDebugTrace && gTestMipmaps )
+            log_info("Working at mip level %llu\n", (unsigned long long) lod);
+
+        error = clEnqueueWriteImage(queue, image, CL_FALSE,
+                                    origin, region, ( gEnablePitch ? row_pitch_lod : 0 ), 0,
+                                    (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL);
+        if (error != CL_SUCCESS) {
+            log_error( "ERROR: Unable to write to 1D image array  of width %d and size %d\n", (int)width_lod, (int)imageInfo->arraySize );
+            return -1;
+        }
+
+        // To verify, we just read the results right back and see whether they match the input
+        if( gDebugTrace )
+            log_info( " - Initing result array...\n" );
+
+        // Note: we read back without any pitch, to verify pitch actually WORKED
+        size_t scanlineSize = width_lod * get_pixel_size( imageInfo->format );
+        size_t imageSize = scanlineSize * imageInfo->arraySize;
+        memset( resultValues, 0xff, imageSize );
+
+        if( gDebugTrace )
+            log_info( " - Reading results...\n" );
+
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read image values" );
+
+        // Verify scanline by scanline, since the pitches are different
+        char *sourcePtr = (char *)imageValues + imgValMipLevelOffset;
+        char *destPtr = resultValues;
+
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
+            {
+                log_error( "ERROR: Image array index %d did not verify for image size %d,%d pitch %d (extra %d bytes)\n", (int)y, (int)width_lod, (int)imageInfo->arraySize, (int)row_pitch_lod, (int)row_pitch_lod - (int)width_lod * (int)get_pixel_size( imageInfo->format ) );
+
+                log_error( "First few values: \n" );
+                log_error( " Input: " );
+                uint32_t *s = (uint32_t *)sourcePtr;
+                uint32_t *d = (uint32_t *)destPtr;
+                for( int q = 0; q < 12; q++ )
+                    log_error( "%08x ", s[ q ] );
+                log_error( "\nOutput: " );
+                for( int q = 0; q < 12; q++ )
+                    log_error( "%08x ", d[ q ] );
+                log_error( "\n" );
+
+                int outX, outY;
+                int offset = (int)get_pixel_size( imageInfo->format ) * (int)( width_lod - 16 );
+                if( offset < 0 )
+                    offset = 0;
+                int foundCount = debug_find_vector_in_image( imageValues + imgValMipLevelOffset, imageInfo, destPtr + offset, get_pixel_size( imageInfo->format ), &outX, &outY, NULL );
+                if( foundCount > 0 )
+                {
+                    int returnedOffset = ( (int)y * (int)width_lod + offset / (int)get_pixel_size( imageInfo->format ) ) - ( outY * (int)width_lod + outX );
+
+                    if( memcmp( sourcePtr + returnedOffset * get_pixel_size( imageInfo->format ), destPtr, get_pixel_size( imageInfo->format ) * 8 ) == 0 )
+                        log_error( "       Values appear to be offsetted by %d\n", returnedOffset );
+                    else
+                        log_error( "       Calculated offset is %d but unable to verify\n", returnedOffset );
+                }
+                else
+                {
+                    log_error( "      Unable to determine offset\n" );
+                }
+                return -1;
+            }
+            sourcePtr += row_pitch_lod;
+            destPtr += scanlineSize;
+        }
+        imgValMipLevelOffset += width_lod * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+    }
+    return 0;
+}
+
+int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed  seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = 0;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+        memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if (gTestMipmaps)
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+
+                int ret = test_read_image_1D_array( device, &imageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[idx][0];
+            imageInfo.arraySize = sizes[idx][2];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+
+            log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)maxWidth, (int)maxArraySize );
+            if( test_read_image_1D_array( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+                if (gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, 0, 0), seed);
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.rowPitch;
+                    size = compute_mipmapped_image_size( imageInfo );
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * pixelSize;
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+                    }
+                    imageInfo.slicePitch = imageInfo.rowPitch;
+
+                    size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+            int ret = test_read_image_1D_array( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
new file mode 100644
index 00000000..22f5ea1b
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -0,0 +1,286 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_command_queue queue;
+extern cl_context context;
+
+
+int test_read_image_2D( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error;
+
+    clMemWrapper image;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+    {
+        log_info( " - Creating %s image %d by %d...\n", gTestMipmaps?"mipmapped":"", (int)imageInfo->width, (int)imageInfo->height );
+        if( gTestMipmaps )
+            log_info( " with %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels );
+    }
+
+    // Construct testing sources
+    if(!gTestMipmaps)
+    {
+        image = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 0, NULL, &error );
+        if( image == NULL )
+        {
+            log_error( "ERROR: Unable to create 2D image of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, IGetErrorString( error ) );
+            return -1;
+        }
+    }
+    else
+    {
+        cl_image_desc image_desc = {0};
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+        image_desc.image_width = imageInfo->width;
+        image_desc.image_height = imageInfo->height;
+        image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create %d level mipmapped 2D image of size %d x %d (pitch %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+    }
+    if( gDebugTrace )
+        log_info( " - Writing image...\n" );
+
+    size_t origin[ 3 ] = { 0, 0, 0 };
+    size_t region[ 3 ] = { 0, 0, 1 };
+    size_t fullImageSize;
+    if( gTestMipmaps )
+    {
+        fullImageSize = (size_t)compute_mipmapped_image_size( *imageInfo );
+    }
+    else
+    {
+        fullImageSize = imageInfo->height * imageInfo->rowPitch;
+    }
+    BufferOwningPtr<char> resultValues(malloc(fullImageSize));
+    size_t imgValMipLevelOffset = 0;
+
+    for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        float lod_float = (float) lod;
+        origin[2] = lod;
+        size_t width_lod, height_lod, row_pitch_lod;
+
+        width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+        height_lod = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+        row_pitch_lod = gTestMipmaps ? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+
+        region[0] = width_lod;
+        region[1] = height_lod;
+
+        if ( gDebugTrace && gTestMipmaps) {
+            log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
+        }
+        error = clEnqueueWriteImage(queue, image, CL_FALSE,
+                                    origin, region, ( gEnablePitch ? row_pitch_lod : 0 ), 0,
+                                   (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL);
+        if (error != CL_SUCCESS) {
+            log_error( "ERROR: Unable to write to 2D image of size %d x %d \n", (int)width_lod, (int)height_lod );
+            return -1;
+        }
+
+        // To verify, we just read the results right back and see whether they match the input
+        if( gDebugTrace ) {
+            log_info( " - Initing result array...\n" );
+        }
+
+        // Note: we read back without any pitch, to verify pitch actually WORKED
+        size_t scanlineSize = width_lod * get_pixel_size( imageInfo->format );
+        size_t imageSize = scanlineSize * height_lod;
+        memset( resultValues, 0xff, imageSize );
+
+        if( gDebugTrace )
+            log_info( " - Reading results...\n" );
+
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read image values" );
+
+        // Verify scanline by scanline, since the pitches are different
+        char *sourcePtr = (char *)imageValues + imgValMipLevelOffset;
+        char *destPtr = resultValues;
+
+        for( size_t y = 0; y < height_lod; y++ )
+        {
+            if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
+            {
+                if(gTestMipmaps)
+                {
+                    log_error("At mip level %llu\n",(unsigned long long) lod);
+                }
+                log_error( "ERROR: Scanline %d did not verify for image size %d,%d pitch %d (extra %d bytes)\n", (int)y, (int)width_lod, (int)height_lod, (int)row_pitch_lod, (int)row_pitch_lod - (int)width_lod * (int)get_pixel_size( imageInfo->format ) );
+
+                log_error( "First few values: \n" );
+                log_error( " Input: " );
+                uint32_t *s = (uint32_t *)sourcePtr;
+                uint32_t *d = (uint32_t *)destPtr;
+                for( int q = 0; q < 12; q++ )
+                    log_error( "%08x ", s[ q ] );
+                log_error( "\nOutput: " );
+                for( int q = 0; q < 12; q++ )
+                    log_error( "%08x ", d[ q ] );
+                log_error( "\n" );
+
+                int outX, outY;
+                int offset = (int)get_pixel_size( imageInfo->format ) * (int)( width_lod - 16 );
+                if( offset < 0 )
+                    offset = 0;
+                int foundCount = debug_find_vector_in_image( (char*)imageValues + imgValMipLevelOffset, imageInfo, destPtr + offset, get_pixel_size( imageInfo->format ), &outX, &outY, NULL );
+                if( foundCount > 0 )
+                {
+                    int returnedOffset = ( (int)y * (int)width_lod + offset / (int)get_pixel_size( imageInfo->format ) ) - ( outY * (int)width_lod + outX );
+
+                    if( memcmp( sourcePtr + returnedOffset * get_pixel_size( imageInfo->format ), destPtr, get_pixel_size( imageInfo->format ) * 8 ) == 0 )
+                        log_error( "       Values appear to be offsetted by %d\n", returnedOffset );
+                    else
+                        log_error( "       Calculated offset is %d but unable to verify\n", returnedOffset );
+                }
+                else
+                {
+                    log_error( "      Unable to determine offset\n" );
+                }
+                return -1;
+            }
+            sourcePtr += row_pitch_lod;
+            destPtr += scanlineSize;
+        }
+        imgValMipLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
+    }
+    return 0;
+}
+
+int test_read_image_set_2D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed  seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    imageInfo.format = format;
+    imageInfo.depth = imageInfo.slicePitch = 0;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+        memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if (gTestMipmaps)
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+                int ret = test_read_image_2D( device, &imageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[idx][0];
+            imageInfo.height = sizes[idx][1];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+            log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)maxWidth, (int)maxHeight );
+            if( test_read_image_2D( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                if (gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    size = compute_mipmapped_image_size( imageInfo );
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * pixelSize;
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+                    }
+
+                    size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+                }
+            } while(  size > maxAllocSize || ( size / 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            int ret = test_read_image_2D( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
new file mode 100644
index 00000000..b82a8bac
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -0,0 +1,272 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern cl_command_queue queue;
+extern cl_context context;
+
+int test_read_image_2D_array( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error;
+
+    clMemWrapper image;
+
+    // Create some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+    {
+        log_info( " - Creating %s image %d by %d by %d...\n", gTestMipmaps?"mipmapped":"", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
+        if( gTestMipmaps )
+            log_info( " with %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels );
+    }
+
+    // Construct testing sources
+    if(!gTestMipmaps)
+    {
+        image = create_image_2d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, NULL, &error );
+        if( image == NULL )
+        {
+            log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) );
+            return -1;
+        }
+    }
+    else
+    {
+        cl_image_desc image_desc = {0};
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        image_desc.image_width = imageInfo->width;
+        image_desc.image_height = imageInfo->height;
+        image_desc.image_array_size = imageInfo->arraySize;
+        image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Writing image...\n" );
+
+    size_t origin[ 4 ] = { 0, 0, 0, 0 };
+    size_t region[ 3 ] = { 0, 0, 0 };
+    size_t fullImageSize;
+    if( gTestMipmaps )
+    {
+        fullImageSize = (size_t)compute_mipmapped_image_size( *imageInfo );
+    }
+    else
+    {
+        fullImageSize = imageInfo->arraySize * imageInfo->slicePitch;
+    }
+    BufferOwningPtr<char> resultValues(malloc(fullImageSize));
+    size_t imgValMipLevelOffset = 0;
+
+    for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        float lod_float = (float) lod;
+        origin[3] = lod;
+        size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod;
+
+        width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+        height_lod = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+        row_pitch_lod = gTestMipmaps ? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+        slice_pitch_lod = gTestMipmaps ? (row_pitch_lod * height_lod): imageInfo->slicePitch;
+        region[0] = width_lod;
+        region[1] = height_lod;
+        region[2] = imageInfo->arraySize;
+
+        if ( gDebugTrace && gTestMipmaps) {
+            log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
+        }
+
+        error = clEnqueueWriteImage(queue, image, CL_FALSE,
+                                    origin, region, ( gEnablePitch ? row_pitch_lod : 0 ), ( gEnablePitch ? slice_pitch_lod : 0 ),
+                                    (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL);
+        if (error != CL_SUCCESS) {
+            log_error( "ERROR: Unable to write to 2D image array of size %d x %d x %d\n", (int)width_lod, (int)height_lod, (int)imageInfo->arraySize );
+            return -1;
+        }
+
+        // To verify, we just read the results right back and see whether they match the input
+        if( gDebugTrace )
+            log_info( " - Initing result array...\n" );
+
+        // Note: we read back without any pitch, to verify pitch actually WORKED
+        size_t scanlineSize = width_lod * get_pixel_size( imageInfo->format );
+        size_t pageSize = scanlineSize * height_lod;
+        size_t imageSize = pageSize * imageInfo->arraySize;
+        memset( resultValues, 0xff, imageSize );
+
+        if( gDebugTrace )
+            log_info( " - Reading results...\n" );
+
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read image values" );
+
+        // Verify scanline by scanline, since the pitches are different
+        char *sourcePtr = (char *)imageValues + imgValMipLevelOffset;
+        char *destPtr = resultValues;
+
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < height_lod; y++ )
+            {
+                if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
+                {
+                    log_error( "ERROR: Scanline %d,%d did not verify for image size %d,%d,%d pitch %d,%d\n", (int)y, (int)z, (int)width_lod, (int)height_lod, (int)imageInfo->arraySize, (int)row_pitch_lod, (int)slice_pitch_lod );
+                    return -1;
+                }
+                sourcePtr += row_pitch_lod;
+                destPtr += scanlineSize;
+            }
+            sourcePtr += slice_pitch_lod - ( row_pitch_lod * height_lod );
+            destPtr += pageSize - scanlineSize * height_lod;
+        }
+        imgValMipLevelOffset += width_lod * height_lod * imageInfo->arraySize * get_pixel_size( imageInfo->format );
+    }
+    return 0;
+}
+
+int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+        memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    if (gTestMipmaps)
+                        imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int ret = test_read_image_2D_array( device, &imageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            // Try a specific set of maximum sizes
+            imageInfo.width = sizes[idx][0];
+            imageInfo.height = sizes[idx][1];
+            imageInfo.arraySize = sizes[idx][2];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+
+            log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
+            if( test_read_image_2D_array( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                if (gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, 0), seed);
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                    size = compute_mipmapped_image_size( imageInfo ) * 4;
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * pixelSize;
+                    imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+
+                        size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
+                        imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                    }
+
+                    size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+            int ret = test_read_image_2D_array( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
new file mode 100644
index 00000000..b8ac3fec
--- /dev/null
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -0,0 +1,276 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern cl_command_queue queue;
+extern cl_context context;
+
+int test_read_image_3D( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error;
+
+    clMemWrapper image;
+
+    // Create some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+    {
+        log_info( " - Creating %s image %d by %d by %d...\n", gTestMipmaps?"mipmapped":"", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+        if( gTestMipmaps )
+            log_info( " with %llu mip levels\n", (unsigned long long) imageInfo->num_mip_levels );
+    }
+    // Construct testing sources
+    if(!gTestMipmaps)
+    {
+        image = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, NULL, &error );
+        if( image == NULL )
+        {
+            log_error( "ERROR: Unable to create 2D image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) );
+            return -1;
+        }
+    }
+    else
+    {
+        cl_image_desc image_desc = {0};
+        image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+        image_desc.image_width = imageInfo->width;
+        image_desc.image_height = imageInfo->height;
+        image_desc.image_depth = imageInfo->depth;
+        image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+        image = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Writing image...\n" );
+
+    size_t origin[ 4 ] = { 0, 0, 0, 0 };
+    size_t region[ 3 ] = { 0, 0, 0 };
+    size_t fullImageSize;
+    if( gTestMipmaps )
+    {
+        fullImageSize = (size_t)compute_mipmapped_image_size( *imageInfo );
+    }
+    else
+    {
+        fullImageSize = imageInfo->depth * imageInfo->slicePitch;
+    }
+
+    BufferOwningPtr<char> resultValues(malloc(fullImageSize));
+    size_t imgValMipLevelOffset = 0;
+
+    for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        float lod_float = (float) lod;
+        origin[3] = lod;
+        size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod;
+
+        width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+        height_lod = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
+        depth_lod = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
+        row_pitch_lod = gTestMipmaps ? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
+        slice_pitch_lod = gTestMipmaps ? (row_pitch_lod * height_lod): imageInfo->slicePitch;
+        region[0] = width_lod;
+        region[1] = height_lod;
+        region[2] = depth_lod;
+
+        if ( gDebugTrace && gTestMipmaps) {
+            log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
+        }
+        error = clEnqueueWriteImage(queue, image, CL_FALSE,
+                                    origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                    (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL);
+        if (error != CL_SUCCESS) {
+            log_error( "ERROR: Unable to write to %s 3D image of size %d x %d x %d\n", gTestMipmaps?"mipmapped":"", (int)width_lod, (int)height_lod, (int)depth_lod );
+            return -1;
+        }
+
+        // To verify, we just read the results right back and see whether they match the input
+        if( gDebugTrace ) {
+            log_info( " - Initing result array...\n" );
+        }
+
+        // Note: we read back without any pitch, to verify pitch actually WORKED
+        size_t scanlineSize = width_lod * get_pixel_size( imageInfo->format );
+        size_t pageSize = scanlineSize * height_lod;
+        size_t imageSize = pageSize * depth_lod;
+        memset( resultValues, 0xff, imageSize );
+
+        if( gDebugTrace )
+            log_info( " - Reading results...\n" );
+
+        error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read image values" );
+
+        // Verify scanline by scanline, since the pitches are different
+        char *sourcePtr = (char *)imageValues + imgValMipLevelOffset;
+        char *destPtr = resultValues;
+
+        for( size_t z = 0; z < depth_lod; z++ )
+        {
+            for( size_t y = 0; y < height_lod; y++ )
+            {
+                if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
+                {
+                    if(gTestMipmaps)
+                    {
+                        log_error("At mip level %llu\n",(unsigned long long) lod);
+                    }
+                    log_error( "ERROR: Scanline %d,%d did not verify for image size %d,%d,%d pitch %d,%d\n", (int)y, (int)z, (int)width_lod, (int)height_lod, (int)depth_lod, (int)row_pitch_lod, (int)slice_pitch_lod );
+                    return -1;
+                }
+                sourcePtr += row_pitch_lod;
+                destPtr += scanlineSize;
+            }
+            sourcePtr += slice_pitch_lod - ( row_pitch_lod * height_lod );
+            destPtr += pageSize - scanlineSize * height_lod;
+        }
+        imgValMipLevelOffset += width_lod * height_lod * depth_lod * get_pixel_size( imageInfo->format );
+  }
+    return 0;
+}
+
+int test_read_image_set_3D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+        memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    if (gTestMipmaps)
+                        imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
+
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int ret = test_read_image_3D( device, &imageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+    // Try a specific set of maximum sizes
+    size_t numbeOfSizes;
+    size_t sizes[100][3];
+
+    get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format);
+
+    for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+    {
+      // Try a specific set of maximum sizes
+      imageInfo.width = sizes[idx][0];
+      imageInfo.height = sizes[idx][1];
+      imageInfo.depth = sizes[idx][2];
+      imageInfo.rowPitch = imageInfo.width * pixelSize;
+      imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+
+      if (gTestMipmaps)
+        imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
+
+      log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
+      if( test_read_image_3D( device, &imageInfo, seed ) )
+        return -1;
+    }
+  }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+                if (gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth), seed);
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                    size = compute_mipmapped_image_size( imageInfo );
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * pixelSize;
+                    imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+
+                        size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
+                        imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                    }
+
+                    size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+            int ret = test_read_image_3D( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_image_methods/CMakeLists.txt b/test_conformance/images/kernel_image_methods/CMakeLists.txt
new file mode 100644
index 00000000..bc2663fd
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(MODULE_NAME KERNEL_IMAGE_METHODS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_1D.cpp
+    test_1D_array.cpp
+    test_2D.cpp
+    test_2D_array.cpp
+    test_loops.cpp
+    test_3D.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/threadTesting.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/imageHelpers.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/typeWrappers.cpp
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/images/kernel_image_methods/Jamfile b/test_conformance/images/kernel_image_methods/Jamfile
new file mode 100644
index 00000000..1d5e227a
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/Jamfile
@@ -0,0 +1,18 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_kernel_image_methods
+    : main.cpp
+      test_2D.cpp
+      test_3D.cpp
+      test_loops.cpp
+    ;
+
+install dist
+    : test_kernel_image_methods
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/kernel_image_methods
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/kernel_image_methods
+    ;
diff --git a/test_conformance/images/kernel_image_methods/Makefile b/test_conformance/images/kernel_image_methods/Makefile
new file mode 100644
index 00000000..8079f5fb
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/Makefile
@@ -0,0 +1,51 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		test_1D.cpp \
+		test_1D_array.cpp \
+		test_2D.cpp \
+		test_2D_array.cpp \
+		test_loops.cpp \
+		test_3D.cpp \
+		../../../test_common/harness/errorHelpers.c \
+		../../../test_common/harness/threadTesting.c \
+		../../../test_common/harness/kernelHelpers.c \
+		../../../test_common/harness/imageHelpers.cpp \
+		../../../test_common/harness/conversions.c \
+		../../../test_common/harness/testHarness.c \
+		../../../test_common/harness/mt19937.c \
+		../../../test_common/harness/typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_kernel_image_methods
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/images/kernel_image_methods/main.cpp b/test_conformance/images/kernel_image_methods/main.cpp
new file mode 100644
index 00000000..f3dd9c55
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/main.cpp
@@ -0,0 +1,262 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../../test_common/harness/compat.h"
+#include "../../../test_common/harness/parseParameters.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+
+bool            gDebugTrace = false, gTestSmallImages = false, gTestMaxImages = false, gTestRounding = false;
+int                gTypesToTest = 0;
+cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
+cl_device_type    gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+extern int test_image_set( cl_device_id device, cl_mem_object_type imageType );
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
+clCommandQueueWrapper queue;
+clContextWrapper context;
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [debug_trace] [small_images]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging\n" );
+    log_info( "\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
+    log_info( "\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
+}
+
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id platform;
+    cl_device_id device;
+    cl_channel_type chanType;
+    char str[ 128 ];
+  int testMethods = 0;
+    bool randomize = false;
+
+  test_start();
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+  
+    checkDeviceTypeOverride( &gDeviceType );
+
+    // Parse arguments
+    for( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+        else if( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+
+        else if( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+
+        else if( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+
+        else if ( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+        else if( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+        else if( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+        else if( strcmp( str, "1Darray" ) == 0 )
+            testMethods |= k1DArray;
+        else if( strcmp( str, "2Darray" ) == 0 )
+            testMethods |= k2DArray;
+
+        else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+        else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+    else
+    {
+      log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+      return -1;
+    }
+  }
+
+  if (testMethods == 0)
+    testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
+
+    // Seed the random # generators
+  if( randomize )
+  {
+    gRandomSeed = (cl_uint) time( NULL );
+    log_info( "Random seed: %u.\n", gRandomSeed );
+    gReSeed = 1;
+  }
+
+    // Get our device
+    int error;
+
+    // Get our platform
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if( error )
+    {
+        print_error( error, "Unable to get platform" );
+        test_finish();
+        return -1;
+    }
+
+  // Get our device
+  unsigned int num_devices;
+  error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices);
+  if( error )
+  {
+    print_error( error, "Unable to get number of devices" );
+    test_finish();
+    return -1;
+  }
+
+  uint32_t gDeviceIndex = 0;
+  const char* device_index_env = getenv("CL_DEVICE_INDEX");
+  if (device_index_env) {
+    if (device_index_env) {
+        gDeviceIndex = atoi(device_index_env);
+    }
+
+    if (gDeviceIndex >= num_devices) {
+      vlog("Specified CL_DEVICE_INDEX=%d out of range, using index 0.\n", gDeviceIndex);
+      gDeviceIndex = 0;
+    }
+  }
+
+  cl_device_id *gDeviceList = (cl_device_id *)malloc( num_devices * sizeof( cl_device_id ) );
+  error = clGetDeviceIDs(platform, gDeviceType, num_devices, gDeviceList, NULL);
+  if( error )
+  {
+    print_error( error, "Unable to get devices" );
+    free( gDeviceList );
+    test_finish();
+    return -1;
+  }
+
+  device = gDeviceList[gDeviceIndex];
+  free( gDeviceList );
+
+  log_info( "Using " );
+    if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+  // Check for image support
+  if (checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+  {
+    log_info("Device does not support images. Skipping test.\n");
+    test_finish();
+    return 0;
+  }
+
+  // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+
+    // Create a queue against the context
+    queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
+  if ( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+
+    if( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+
+  // Run the test now
+  int ret = 0;
+  if (testMethods & k1D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D );
+  if (testMethods & k2D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D );
+  if (testMethods & k3D)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
+  if (testMethods & k1DArray)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+  if (testMethods & k2DArray)
+    ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+
+  // Clean up
+  error = clFinish(queue);
+  if (error)
+    print_error(error, "clFinish failed.");
+
+  if (gTestFailure == 0) {
+    if (gTestCount > 1)
+      log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+    else
+      log_info("PASSED test.\n");
+  } else if (gTestFailure > 0) {
+    if (gTestCount > 1)
+      log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+    else
+      log_error("FAILED test.\n");
+  }
+
+  test_finish();
+
+  if (gTestFailure > 0)
+    return gTestFailure;
+
+  return ret;
+}
diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp
new file mode 100644
index 00000000..f1b92bee
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/test_1D.cpp
@@ -0,0 +1,237 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern clCommandQueueWrapper queue;
+extern clContextWrapper context;
+
+typedef struct image_kernel_data
+{
+    cl_int width;
+    cl_int channelType;
+    cl_int channelOrder;
+    cl_int expectedChannelType;
+    cl_int expectedChannelOrder;
+};
+
+static const char *methodTest1DImageKernelPattern =
+"typedef struct {\n"
+"    int width;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image1d_t input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
+
+static int test_get_1Dimage_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error = 0;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper image, outDataBuffer;
+    char programSrc[ 10240 ];
+
+    image_kernel_data    outKernelData;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    // Construct testing source
+    if( gDebugTrace )
+        log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width );
+
+    image = create_image_1d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, 0, NULL, NULL, &error );
+    if( image == NULL )
+    {
+        log_error( "ERROR: Unable to create 1D image of size %d (%s)", (int)imageInfo->width, IGetErrorString( error ) );
+        return -1;
+    }
+
+    char channelTypeConstantString[256] = {0};
+    char channelOrderConstantString[256] = {0};
+
+    const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
+    const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
+
+    if(channelTypeName && strlen(channelTypeName))
+        sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
+
+    if(channelOrderName && strlen(channelOrderName))
+        sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
+
+    // Create a program to run against
+    sprintf( programSrc, methodTest1DImageKernelPattern,
+            channelTypeConstantString, channelOrderConstantString);
+
+    //log_info("-----------------------------------\n%s\n", programSrc);
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.\n");
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create kernel to test against" );
+
+    // Create an output buffer
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set up arguments and run
+    error = clSetKernelArg( kernel, 0, sizeof( image ), &image );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
+    test_error( error, "Unable to set kernel argument" );
+
+    size_t threads[1] = { 1 }, localThreads[1] = { 1 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    error = clEnqueueReadBuffer( queue, outDataBuffer, CL_TRUE, 0, sizeof( outKernelData ), &outKernelData, 0, NULL, NULL );
+    test_error( error, "Unable to read data buffer" );
+
+
+    // Verify the results now
+    if( outKernelData.width != (cl_int)imageInfo->width )
+    {
+        log_error( "ERROR: Returned width did not validate (expected %d, got %d)\n", (int)imageInfo->width, (int)outKernelData.width );
+        error = -1;
+    }
+    if( outKernelData.channelType != (cl_int)outKernelData.expectedChannelType )
+    {
+        log_error( "ERROR: Returned channel type did not validate (expected %s (%d), got %d)\n", GetChannelTypeName( imageInfo->format->image_channel_data_type ),
+                                                                                              (int)outKernelData.expectedChannelType, (int)outKernelData.channelType );
+        error = -1;
+    }
+    if( outKernelData.channelOrder != (cl_int)outKernelData.expectedChannelOrder )
+    {
+        log_error( "ERROR: Returned channel order did not validate (expected %s (%d), got %d)\n", GetChannelOrderName( imageInfo->format->image_channel_order ),
+                                                                                              (int)outKernelData.expectedChannelOrder, (int)outKernelData.channelOrder );
+        error = -1;
+    }
+
+     if( clFinish(queue) != CL_SUCCESS )
+     {
+         log_error( "ERROR: CL Finished failed in %s \n", __FUNCTION__);
+         error = -1;
+     }
+
+    return error;
+}
+
+int test_get_image_info_1D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = imageInfo.slicePitch = 0;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 1D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            if( gDebugTrace )
+                log_info( "   at size %d\n", (int)imageInfo.width );
+
+            int ret = test_get_1Dimage_info_single( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            log_info( "Testing %d\n", (int)sizes[ idx ][ 0 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
+            if( test_get_1Dimage_info_single( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size = (cl_ulong)imageInfo.rowPitch * (cl_ulong)imageInfo.height * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            int ret = test_get_1Dimage_info_single( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
new file mode 100644
index 00000000..c99b3f8e
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
@@ -0,0 +1,255 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern clCommandQueueWrapper queue;
+extern clContextWrapper context;
+
+typedef struct image_kernel_data
+{
+    cl_int width;
+    cl_int arraySize;
+    cl_int channelType;
+    cl_int channelOrder;
+    cl_int expectedChannelType;
+    cl_int expectedChannelOrder;
+};
+
+static const char *methodTestKernelPattern =
+"typedef struct {\n"
+"    int width;\n"
+"    int arraySize;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image1d_array_t input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->arraySize = get_image_array_size( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
+
+int test_get_1Dimage_array_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error = 0;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper image, outDataBuffer;
+    char programSrc[ 10240 ];
+
+    image_kernel_data    outKernelData;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    // Construct testing source
+    if( gDebugTrace )
+        log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
+
+    image = create_image_1d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->arraySize, 0, 0, NULL, &error );
+    if( image == NULL )
+    {
+        log_error( "ERROR: Unable to create 1D image array of size %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->arraySize, IGetErrorString( error ) );
+        return -1;
+    }
+
+    char channelTypeConstantString[256] = {0};
+    char channelOrderConstantString[256] = {0};
+
+    const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
+    const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
+
+    if(channelTypeName && strlen(channelTypeName))
+        sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
+
+    if(channelOrderName && strlen(channelOrderName))
+        sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
+
+    // Create a program to run against
+    sprintf( programSrc, methodTestKernelPattern,
+            channelTypeConstantString, channelOrderConstantString);
+
+    //log_info("-----------------------------------\n%s\n", programSrc);
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.\n");
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create kernel to test against" );
+
+    // Create an output buffer
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set up arguments and run
+    error = clSetKernelArg( kernel, 0, sizeof( image ), &image );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
+    test_error( error, "Unable to set kernel argument" );
+
+    size_t threads[1] = { 1 }, localThreads[1] = { 1 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    error = clEnqueueReadBuffer( queue, outDataBuffer, CL_TRUE, 0, sizeof( outKernelData ), &outKernelData, 0, NULL, NULL );
+    test_error( error, "Unable to read data buffer" );
+
+
+    // Verify the results now
+    if( outKernelData.width != (cl_int)imageInfo->width )
+    {
+        log_error( "ERROR: Returned width did not validate (expected %d, got %d)\n", (int)imageInfo->width, (int)outKernelData.width );
+        error = -1;
+    }
+    if( outKernelData.arraySize != (cl_int)imageInfo->arraySize )
+    {
+        log_error( "ERROR: Returned array size did not validate (expected %d, got %d)\n", (int)imageInfo->arraySize, (int)outKernelData.arraySize );
+        error = -1;
+    }
+    if( outKernelData.channelType != (cl_int)outKernelData.expectedChannelType )
+    {
+        log_error( "ERROR: Returned channel type did not validate (expected %s (%d), got %d)\n", GetChannelTypeName( imageInfo->format->image_channel_data_type ),
+                                                                                              (int)outKernelData.expectedChannelType, (int)outKernelData.channelType );
+        error = -1;
+    }
+    if( outKernelData.channelOrder != (cl_int)outKernelData.expectedChannelOrder )
+    {
+        log_error( "ERROR: Returned channel order did not validate (expected %s (%d), got %d)\n", GetChannelOrderName( imageInfo->format->image_channel_order ),
+                                                                                              (int)outKernelData.expectedChannelOrder, (int)outKernelData.channelOrder );
+        error = -1;
+    }
+
+     if( clFinish(queue) != CL_SUCCESS )
+     {
+         log_error( "ERROR: CL Finished failed in %s \n", __FUNCTION__);
+         error = -1;
+     }
+
+    return error;
+}
+
+int test_get_image_info_1D_array( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = imageInfo.slicePitch = 0;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for( imageInfo.arraySize = 1; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+
+                int ret = test_get_1Dimage_array_info_single( device, &imageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
+            if( test_get_1Dimage_array_info_single( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                imageInfo.slicePitch = imageInfo.rowPitch;
+
+                size = (cl_ulong)imageInfo.rowPitch * (cl_ulong)imageInfo.arraySize * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+            int ret = test_get_1Dimage_array_info_single( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp
new file mode 100644
index 00000000..6eddbebb
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/test_2D.cpp
@@ -0,0 +1,295 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern clCommandQueueWrapper queue;
+extern clContextWrapper context;
+
+typedef struct image_kernel_data
+{
+    cl_int width;
+    cl_int height;
+    cl_int depth;
+    cl_int widthDim;
+    cl_int heightDim;
+    cl_int depthDim;
+    cl_int channelType;
+    cl_int channelOrder;
+    cl_int expectedChannelType;
+    cl_int expectedChannelOrder;
+};
+
+static const char *methodTestKernelPattern =
+"typedef struct {\n"
+"    int width;\n"
+"    int height;\n"
+"    int depth;\n"
+"    int widthDim;\n"
+"    int heightDim;\n"
+"    int depthDim;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only image%dd%s_t input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->height = get_image_height( input );\n"
+"%s\n"
+"   int%d dim = get_image_dim( input );\n"
+"   outData->widthDim = dim.x;\n"
+"   outData->heightDim = dim.y;\n"
+"%s\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
+
+static const char *depthKernelLine = "   outData->depth = get_image_depth( input );\n";
+static const char *depthDimKernelLine = "   outData->depthDim = dim.z;\n";
+
+int test_get_image_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error = 0;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper image, outDataBuffer;
+    char programSrc[ 10240 ];
+
+    image_kernel_data    outKernelData;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    // Construct testing source
+    if( gDebugTrace )
+        log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
+
+    if( imageInfo->depth != 0 )
+        image = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, NULL, &error );
+    else
+        image = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, 0, NULL, &error );
+    if( image == NULL )
+    {
+        log_error( "ERROR: Unable to create image of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, IGetErrorString( error ) );
+        return -1;
+    }
+
+    char channelTypeConstantString[256] = {0};
+    char channelOrderConstantString[256] = {0};
+
+    const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
+    const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
+
+    if(channelTypeName && strlen(channelTypeName))
+        sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
+
+    if(channelOrderName && strlen(channelOrderName))
+        sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
+
+    // Create a program to run against
+    sprintf( programSrc, methodTestKernelPattern,
+            ( imageInfo->depth != 0 ) ? 3 : 2,
+            (imageInfo->format->image_channel_order == CL_DEPTH) ? "_depth" : "",
+            ( imageInfo->depth != 0 ) ? depthKernelLine : "",
+            ( imageInfo->depth != 0 ) ? 4 : 2,
+            ( imageInfo->depth != 0 ) ? depthDimKernelLine : "",
+            channelTypeConstantString, channelOrderConstantString);
+
+    //log_info("-----------------------------------\n%s\n", programSrc);
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.\n");
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create kernel to test against" );
+
+    // Create an output buffer
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set up arguments and run
+    error = clSetKernelArg( kernel, 0, sizeof( image ), &image );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
+    test_error( error, "Unable to set kernel argument" );
+
+    size_t threads[1] = { 1 }, localThreads[1] = { 1 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    error = clEnqueueReadBuffer( queue, outDataBuffer, CL_TRUE, 0, sizeof( outKernelData ), &outKernelData, 0, NULL, NULL );
+    test_error( error, "Unable to read data buffer" );
+
+
+    // Verify the results now
+    if( outKernelData.width != (cl_int)imageInfo->width )
+    {
+        log_error( "ERROR: Returned width did not validate (expected %d, got %d)\n", (int)imageInfo->width, (int)outKernelData.width );
+        error = -1;
+    }
+    if( outKernelData.height != (cl_int)imageInfo->height )
+    {
+        log_error( "ERROR: Returned height did not validate (expected %d, got %d)\n", (int)imageInfo->height, (int)outKernelData.height );
+        error = -1;
+    }
+    if( ( imageInfo->depth != 0 ) && ( outKernelData.depth != (cl_int)imageInfo->depth ) )
+    {
+        log_error( "ERROR: Returned depth did not validate (expected %d, got %d)\n", (int)imageInfo->depth, (int)outKernelData.depth );
+        error = -1;
+    }
+    if( outKernelData.widthDim != (cl_int)imageInfo->width )
+    {
+        log_error( "ERROR: Returned width from get_image_dim did not validate (expected %d, got %d)\n", (int)imageInfo->width, (int)outKernelData.widthDim );
+        error = -1;
+    }
+    if( outKernelData.heightDim != (cl_int)imageInfo->height )
+    {
+        log_error( "ERROR: Returned height from get_image_dim did not validate (expected %d, got %d)\n", (int)imageInfo->height, (int)outKernelData.heightDim );
+        error = -1;
+    }
+    if( ( imageInfo->depth != 0 ) && ( outKernelData.depthDim != (cl_int)imageInfo->depth ) )
+    {
+        log_error( "ERROR: Returned depth from get_image_dim did not validate (expected %d, got %d)\n", (int)imageInfo->depth, (int)outKernelData.depthDim );
+        error = -1;
+    }
+    if( outKernelData.channelType != (cl_int)outKernelData.expectedChannelType )
+    {
+        log_error( "ERROR: Returned channel type did not validate (expected %s (%d), got %d)\n", GetChannelTypeName( imageInfo->format->image_channel_data_type ),
+                                                                                              (int)outKernelData.expectedChannelType, (int)outKernelData.channelType );
+        error = -1;
+    }
+    if( outKernelData.channelOrder != (cl_int)outKernelData.expectedChannelOrder )
+    {
+        log_error( "ERROR: Returned channel order did not validate (expected %s (%d), got %d)\n", GetChannelOrderName( imageInfo->format->image_channel_order ),
+                                                                                              (int)outKernelData.expectedChannelOrder, (int)outKernelData.channelOrder );
+        error = -1;
+    }
+
+     if( clFinish(queue) != CL_SUCCESS )
+     {
+         log_error( "ERROR: CL Finished failed in %s \n", __FUNCTION__);
+         error = -1;
+     }
+
+    return error;
+}
+
+int test_get_image_info_2D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    imageInfo.format = format;
+    imageInfo.depth = imageInfo.slicePitch = 0;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+                int ret = test_get_image_info_single( device, &imageInfo, seed );
+                if( ret )
+                    return -1;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            log_info( "Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            if( test_get_image_info_single( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size = (cl_ulong)imageInfo.rowPitch * (cl_ulong)imageInfo.height * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            int ret = test_get_image_info_single( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_image_methods/test_2D_array.cpp b/test_conformance/images/kernel_image_methods/test_2D_array.cpp
new file mode 100644
index 00000000..3542077e
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/test_2D_array.cpp
@@ -0,0 +1,272 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern clCommandQueueWrapper queue;
+extern clContextWrapper context;
+
+typedef struct image_kernel_data
+{
+    cl_int width;
+    cl_int height;
+    cl_int arraySize;
+    cl_int channelType;
+    cl_int channelOrder;
+    cl_int expectedChannelType;
+    cl_int expectedChannelOrder;
+};
+
+static const char *methodTestKernelPattern =
+"typedef struct {\n"
+"    int width;\n"
+"    int height;\n"
+"    int arraySize;\n"
+"    int channelType;\n"
+"    int channelOrder;\n"
+"    int expectedChannelType;\n"
+"    int expectedChannelOrder;\n"
+" } image_kernel_data;\n"
+"__kernel void sample_kernel( read_only %s input, __global image_kernel_data *outData )\n"
+"{\n"
+"   outData->width = get_image_width( input );\n"
+"   outData->height = get_image_height( input );\n"
+"   outData->arraySize = get_image_array_size( input );\n"
+"   outData->channelType = get_image_channel_data_type( input );\n"
+"   outData->channelOrder = get_image_channel_order( input );\n"
+"\n"
+"   outData->expectedChannelType = %s;\n"
+"   outData->expectedChannelOrder = %s;\n"
+"}";
+
+int test_get_2Dimage_array_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d )
+{
+    int error = 0;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper image, outDataBuffer;
+    char programSrc[ 10240 ];
+
+    image_kernel_data    outKernelData;
+
+    // Generate some data to test against
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    // Construct testing source
+    if( gDebugTrace )
+        log_info( " - Creating 2D image array %d by %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
+
+    image = create_image_2d_array( context, (cl_mem_flags)(CL_MEM_READ_ONLY), imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, NULL, &error );
+    if( image == NULL )
+    {
+        log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, IGetErrorString( error ) );
+        return -1;
+    }
+
+    char channelTypeConstantString[256] = {0};
+    char channelOrderConstantString[256] = {0};
+
+    const char* channelTypeName = GetChannelTypeName( imageInfo->format->image_channel_data_type );
+    const char* channelOrderName = GetChannelOrderName( imageInfo->format->image_channel_order );
+
+    if(channelTypeName && strlen(channelTypeName))
+        sprintf(channelTypeConstantString, "CLK_%s", &channelTypeName[3]);  // replace CL_* with CLK_*
+
+    if(channelOrderName && strlen(channelOrderName))
+        sprintf(channelOrderConstantString, "CLK_%s", &channelOrderName[3]); // replace CL_* with CLK_*
+
+    // Create a program to run against
+    sprintf( programSrc, methodTestKernelPattern,
+            (imageInfo->format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t" ,
+            channelTypeConstantString, channelOrderConstantString);
+
+    //log_info("-----------------------------------\n%s\n", programSrc);
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.\n");
+    const char *ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create kernel to test against" );
+
+    // Create an output buffer
+    outDataBuffer = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( outKernelData ), NULL, &error );
+    test_error( error, "Unable to create output buffer" );
+
+    // Set up arguments and run
+    error = clSetKernelArg( kernel, 0, sizeof( image ), &image );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
+    test_error( error, "Unable to set kernel argument" );
+
+    size_t threads[1] = { 1 }, localThreads[1] = { 1 };
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    error = clEnqueueReadBuffer( queue, outDataBuffer, CL_TRUE, 0, sizeof( outKernelData ), &outKernelData, 0, NULL, NULL );
+    test_error( error, "Unable to read data buffer" );
+
+
+    // Verify the results now
+    if( outKernelData.width != (cl_int)imageInfo->width )
+    {
+        log_error( "ERROR: Returned width did not validate (expected %d, got %d)\n", (int)imageInfo->width, (int)outKernelData.width );
+        error = -1;
+    }
+    if( outKernelData.height != (cl_int)imageInfo->height )
+    {
+        log_error( "ERROR: Returned height did not validate (expected %d, got %d)\n", (int)imageInfo->height, (int)outKernelData.height );
+        error = -1;
+    }
+    if( outKernelData.arraySize != (cl_int)imageInfo->arraySize )
+    {
+        log_error( "ERROR: Returned array size did not validate (expected %d, got %d)\n", (int)imageInfo->arraySize, (int)outKernelData.arraySize );
+        error = -1;
+    }
+    if( outKernelData.channelType != (cl_int)outKernelData.expectedChannelType )
+    {
+        log_error( "ERROR: Returned channel type did not validate (expected %s (%d), got %d)\n", GetChannelTypeName( imageInfo->format->image_channel_data_type ),
+                  (int)outKernelData.expectedChannelType, (int)outKernelData.channelType );
+        error = -1;
+    }
+    if( outKernelData.channelOrder != (cl_int)outKernelData.expectedChannelOrder )
+    {
+        log_error( "ERROR: Returned channel order did not validate (expected %s (%d), got %d)\n", GetChannelOrderName( imageInfo->format->image_channel_order ),
+                  (int)outKernelData.expectedChannelOrder, (int)outKernelData.channelOrder );
+        error = -1;
+    }
+
+     if( clFinish(queue) != CL_SUCCESS )
+     {
+         log_error( "ERROR: CL Finished failed in %s \n", __FUNCTION__);
+         error = -1;
+     }
+
+    return error;
+}
+
+int test_get_image_info_2D_array( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int ret = test_get_2Dimage_array_info_single( device, &imageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( test_get_2Dimage_array_info_single( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+            int ret = test_get_2Dimage_array_info_single( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_image_methods/test_3D.cpp b/test_conformance/images/kernel_image_methods/test_3D.cpp
new file mode 100644
index 00000000..1e8237be
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/test_3D.cpp
@@ -0,0 +1,131 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern bool            gDebugTrace, gTestSmallImages, gTestMaxImages;
+
+extern int test_get_image_info_single( cl_device_id device, image_descriptor *imageInfo, MTdata d );
+
+int test_get_image_info_3D( cl_device_id device, cl_image_format *format )
+{
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    RandomSeed seed( gRandomSeed );
+    size_t pixelSize;
+
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    imageInfo.format = format;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    int error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+  if (memSize > (cl_ulong)SIZE_MAX) {
+    memSize = (cl_ulong)SIZE_MAX;
+  }
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int ret = test_get_image_info_single( device, &imageInfo, seed );
+                    if( ret )
+                        return -1;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+
+            log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            if( test_get_image_info_single( device, &imageInfo, seed ) )
+                return -1;
+        }
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                imageInfo.rowPitch += extraWidth;
+
+                do {
+                    extraWidth++;
+                    imageInfo.rowPitch += extraWidth;
+                } while ((imageInfo.rowPitch % pixelSize) != 0);
+
+                size_t extraHeight = (int)random_log_in_range( 0, 8, seed );
+                imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+            int ret = test_get_image_info_single( device, &imageInfo, seed );
+            if( ret )
+                return -1;
+        }
+    }
+
+    return 0;
+}
+
diff --git a/test_conformance/images/kernel_image_methods/test_loops.cpp b/test_conformance/images/kernel_image_methods/test_loops.cpp
new file mode 100644
index 00000000..0c7a2b13
--- /dev/null
+++ b/test_conformance/images/kernel_image_methods/test_loops.cpp
@@ -0,0 +1,219 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+
+
+extern bool gDebugTrace;
+
+extern int test_get_image_info_1D( cl_device_id device, cl_image_format *format );
+extern int test_get_image_info_2D( cl_device_id device, cl_image_format *format );
+extern int test_get_image_info_3D( cl_device_id device, cl_image_format *format );
+extern int test_get_image_info_1D_array( cl_device_id device, cl_image_format *format );
+extern int test_get_image_info_2D_array( cl_device_id device, cl_image_format *format );
+
+static const char *str_1d_image = "1D";
+static const char *str_2d_image = "2D";
+static const char *str_3d_image = "3D";
+static const char *str_1d_image_array = "1D array";
+static const char *str_2d_image_array = "2D array";
+
+static const char *convert_image_type_to_string(cl_mem_object_type imageType)
+{
+    const char *p;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            p = str_1d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            p = str_2d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            p = str_3d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            p = str_1d_image_array;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            p = str_2d_image_array;
+            break;
+    }
+    return p;
+}
+
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+
+        // Have we already discarded this via the command line?
+        if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // We don't filter by channel type
+        if( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+
+        // Is the format supported?
+        int i;
+        for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+        {
+            if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+            {
+                numSupported++;
+                break;
+            }
+        }
+        if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+        {
+            // Format is NOT supported, so mark it as such
+            filterFlags[ j ] = true;
+        }
+    }
+    return numSupported;
+}
+
+int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    extern clContextWrapper context;
+    int error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                       imageType, 0, NULL, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, (cl_mem_flags)flags,
+                                        imageType, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+    return 0;
+}
+
+
+int test_image_type( cl_device_id device, cl_mem_object_type imageType, cl_mem_flags flags )
+{
+    log_info( "Running %s %s-only tests...\n", convert_image_type_to_string(imageType), flags == CL_MEM_READ_ONLY ? "read" : "write" );
+
+    int ret = 0;
+
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+    if( get_format_list( device, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+    filter_formats( formatList, filterFlags, numFormats, 0 );
+
+    // Run the format list
+    for( unsigned int i = 0; i < numFormats; i++ )
+    {
+        int test_return = 0;
+        if( filterFlags[i] )
+        {
+            log_info( "NOT RUNNING: " );
+            print_header( &formatList[ i ], false );
+            continue;
+        }
+
+        print_header( &formatList[ i ], false );
+
+        gTestCount++;
+
+        switch (imageType) {
+            case CL_MEM_OBJECT_IMAGE1D:
+                test_return = test_get_image_info_1D( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                test_return = test_get_image_info_2D( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                test_return = test_get_image_info_3D( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                test_return = test_get_image_info_1D_array( device, &formatList[ i ] );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                test_return = test_get_image_info_2D_array( device, &formatList[ i ] );
+                break;
+        }
+
+        if (test_return) {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_header( &formatList[ i ], true );
+            log_info( "\n" );
+        }
+
+        ret += test_return;
+    }
+
+    delete filterFlags;
+    delete formatList;
+
+    return ret;
+}
+
+int test_image_set( cl_device_id device, cl_mem_object_type imageType )
+{
+    int version_check;
+  if ((version_check = check_opencl_version(device,1,2))) {
+    switch (imageType) {
+      case CL_MEM_OBJECT_IMAGE1D:
+        test_missing_feature(version_check, "image_1D");
+      case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+        test_missing_feature(version_check, "image_1D_array");
+      case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+        test_missing_feature(version_check, "image_2D_array");
+    }
+  }
+
+  int ret = 0;
+    ret += test_image_type( device, imageType, CL_MEM_READ_ONLY );
+    ret += test_image_type( device, imageType, CL_MEM_WRITE_ONLY );
+
+    return ret;
+}
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
new file mode 100644
index 00000000..92109df2
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(MODULE_NAME IMAGE_STREAMS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_iterations.cpp
+    test_loops.cpp
+    test_read_1D.cpp
+    test_read_1D_array.cpp
+    test_read_2D_array.cpp
+    test_read_3D.cpp
+    test_write_image.cpp
+    test_write_1D.cpp
+    test_write_1D_array.cpp
+    test_write_2D_array.cpp
+    test_write_3D.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/threadTesting.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/imageHelpers.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/typeWrappers.cpp
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
+
diff --git a/test_conformance/images/kernel_read_write/Jamfile b/test_conformance/images/kernel_read_write/Jamfile
new file mode 100644
index 00000000..db1d2455
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_image_streams
+    : main.cpp
+      test_iterations.cpp
+      test_loops.cpp
+      test_read_3D.cpp
+      test_write_image.cpp
+    ;
+
+install dist
+    : test_image_streams
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/kernel_read_write
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/kernel_read_write
+    ;
diff --git a/test_conformance/images/kernel_read_write/Makefile b/test_conformance/images/kernel_read_write/Makefile
new file mode 100644
index 00000000..a9af0878
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/Makefile
@@ -0,0 +1,56 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+	test_iterations.cpp \
+	test_loops.cpp \
+	test_write_image.cpp \
+	test_read_1D.cpp \
+	test_read_3D.cpp \
+	test_read_1D_array.cpp \
+	test_read_2D_array.cpp \
+	test_write_1D.cpp \
+	test_write_3D.cpp \
+	test_write_1D_array.cpp \
+	test_write_2D_array.cpp \
+	../../../test_common/harness/errorHelpers.c \
+	../../../test_common/harness/threadTesting.c \
+	../../../test_common/harness/kernelHelpers.c \
+	../../../test_common/harness/imageHelpers.cpp \
+	../../../test_common/harness/conversions.c \
+	../../../test_common/harness/testHarness.c \
+	../../../test_common/harness/mt19937.c \
+	../../../test_common/harness/typeWrappers.cpp
+
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_image_streams
+INCLUDE = -I../../test_common/harness
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp
new file mode 100644
index 00000000..3a7f1047
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/main.cpp
@@ -0,0 +1,659 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+#include "../../../test_common/harness/fpcontrol.h"
+#include "../../../test_common/harness/parseParameters.h"
+
+#include <vector>
+
+#if defined(__PPC__)
+// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// be used because not all Power implementations retain or observed the NI (non-IEEE
+// mode) bit.
+__thread fpu_control_t fpu_control = 0;
+#endif
+
+bool            gDebugTrace = false, gExtraValidateInfo = false, gDisableOffsets = false, gTestSmallImages = false, gTestMaxImages = false, gTestRounding = false, gTestImage2DFromBuffer = 0, gTestMipmaps = false;
+cl_filter_mode    gFilterModeToUse = (cl_filter_mode)-1;
+// Default is CL_MEM_USE_HOST_PTR for the test
+cl_mem_flags    gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
+bool            gUseKernelSamplers = false;
+int                gTypesToTest = 0;
+cl_addressing_mode gAddressModeToUse = (cl_addressing_mode)-1;
+int             gNormalizedModeToUse = 7;
+cl_channel_type gChannelTypeToUse = (cl_channel_type)-1;
+cl_channel_order gChannelOrderToUse = (cl_channel_order)-1;
+bool            gEnablePitch = false;
+cl_device_type    gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+int             gtestTypesToRun = 0;
+cl_command_queue queue;
+cl_context context;
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [read] [write] [CL_FILTER_LINEAR|CL_FILTER_NEAREST] [no_offsets] [debug_trace] [small_images]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\n" );
+    log_info( "\tThe following flags specify what kinds of operations to test. They can be combined; if none are specified, all are tested:\n" );
+    log_info( "\t\tread - Tests reading from an image\n" );
+    log_info( "\t\twrite - Tests writing to an image (can be specified with read to run both; default is both)\n" );
+    log_info( "\n" );
+    log_info( "\tThe following flags specify the types to test. They can be combined; if none are specified, all are tested:\n" );
+    log_info( "\t\tint - Test integer I/O (read_imagei, write_imagei)\n" );
+    log_info( "\t\tuint - Test unsigned integer I/O (read_imageui, write_imageui)\n" );
+    log_info( "\t\tfloat - Test float I/O (read_imagef, write_imagef)\n" );
+    log_info( "\n" );
+    log_info( "\tCL_FILTER_LINEAR - Only tests formats with CL_FILTER_LINEAR filtering\n" );
+    log_info( "\tCL_FILTER_NEAREST - Only tests formats with CL_FILTER_NEAREST filtering\n" );
+    log_info( "\n" );
+    log_info( "\tNORMALIZED - Only tests formats with NORMALIZED coordinates\n" );
+    log_info( "\tUNNORMALIZED - Only tests formats with UNNORMALIZED coordinates\n" );
+    log_info( "\n" );
+    log_info( "\tCL_ADDRESS_CLAMP - Only tests formats with CL_ADDRESS_CLAMP addressing\n" );
+    log_info( "\tCL_ADDRESS_CLAMP_TO_EDGE - Only tests formats with CL_ADDRESS_CLAMP_TO_EDGE addressing\n" );
+    log_info( "\tCL_ADDRESS_REPEAT - Only tests formats with CL_ADDRESS_REPEAT addressing\n" );
+    log_info( "\tCL_ADDRESS_MIRRORED_REPEAT - Only tests formats with CL_ADDRESS_MIRRORED_REPEAT addressing\n" );
+    log_info( "\n" );
+    log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
+    log_info( "\n" );
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\tlocal_samplers - Use samplers declared in the kernel functions instead of passed in as arguments\n" );
+    log_info( "\n" );
+    log_info( "\tThe following specify to use the specific flag to allocate images to use in the tests:\n" );
+    log_info( "\t\tCL_MEM_COPY_HOST_PTR\n" );
+    log_info( "\t\tCL_MEM_USE_HOST_PTR (default)\n" );
+    log_info( "\t\tCL_MEM_ALLOC_HOST_PTR\n" );
+    log_info( "\t\tNO_HOST_PTR - Specifies to use none of the above flags\n" );
+    log_info( "\n" );
+    log_info( "\tThe following modify the types of images tested:\n" );
+    log_info( "\t\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
+    log_info( "\t\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
+    log_info( "\t\trounding - Runs every format through a single image filled with every possible value for that image format, to verify rounding works properly\n" );
+    log_info( "\n" );
+    log_info( "\tno_offsets - Disables offsets when testing reads (can be good for diagnosing address repeating/clamping problems)\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging\n" );
+    log_info( "\textra_validate - Enables additional validation failure debug information\n" );
+    log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+    log_info( "\ttest_mipmaps - Enables mipmapped images\n");
+}
+
+extern int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
+
+/** read_write images only support sampler-less read buildt-ins which require special settings
+  * for some global parameters. This pair of functions temporarily overwrite those global parameters
+  * and then recover them after completing a read_write test.
+  */
+static void overwrite_global_params_for_read_write_test(  bool            *tTestMipmaps,
+                                                            bool            *tDisableOffsets,
+                                                            bool            *tNormalizedModeToUse,
+                                                            cl_filter_mode  *tFilterModeToUse)
+{
+    log_info("Overwrite global settings for read_write image tests. The overwritten values:\n");
+    log_info("gTestMipmaps = false, gDisableOffsets = true, gNormalizedModeToUse = false, gFilterModeToUse = CL_FILTER_NEAREST\n" );
+    // mipmap images only support sampler read built-in while read_write images only support
+    // sampler-less read built-in. Hence we cannot test mipmap for read_write image.
+    *tTestMipmaps = gTestMipmaps;
+    gTestMipmaps = false;
+
+    // Read_write images are read by sampler-less read which does not handle out-of-bound read
+    // It's application responsibility to make sure that the read happens in-bound
+    // Therefore we should not enable offset in testing read_write images because it will cause out-of-bound
+    *tDisableOffsets    = gDisableOffsets;
+    gDisableOffsets     = true;
+
+    // The sampler-less read image functions behave exactly as the corresponding read image functions
+
+
+    *tNormalizedModeToUse   = gNormalizedModeToUse;
+    gNormalizedModeToUse    = false;
+    *tFilterModeToUse       = gFilterModeToUse;
+    gFilterModeToUse        = CL_FILTER_NEAREST;
+}
+
+/** Recover the global settings overwritten for read_write tests. This is necessary because
+  * there may be other tests (i.e. read or write) are called together with read_write test.
+  */
+static void recover_global_params_from_read_write_test(bool            tTestMipmaps,
+                                                         bool            tDisableOffsets,
+                                                         bool            tNormalizedModeToUse,
+                                                         cl_filter_mode  tFilterModeToUse)
+{
+    gTestMipmaps            = tTestMipmaps;
+    gDisableOffsets         = tDisableOffsets;
+    gNormalizedModeToUse    = tNormalizedModeToUse;
+    gFilterModeToUse        = tFilterModeToUse;
+}
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id  platform;
+    cl_device_id       device;
+    cl_channel_type chanType;
+    cl_channel_order chanOrder;
+    char            str[ 128 ];
+    int                testTypesToRun = 0;
+    int             testMethods = 0;
+    bool            randomize = false;
+    bool            tTestMipMaps = false;
+    bool            tDisableOffsets = false;
+    bool            tNormalizedModeToUse = false;
+    cl_filter_mode  tFilterModeToUse = (cl_filter_mode)-1;
+
+    test_start();
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    //Check CL_DEVICE_TYPE environment variable
+    checkDeviceTypeOverride( &gDeviceType );
+
+    // Parse arguments
+    for( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+        else if( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+
+        else if( strcmp( str, "CL_FILTER_NEAREST" ) == 0 || strcmp( str, "NEAREST" ) == 0 )
+            gFilterModeToUse = CL_FILTER_NEAREST;
+        else if( strcmp( str, "CL_FILTER_LINEAR" ) == 0 || strcmp( str, "LINEAR" ) == 0 )
+            gFilterModeToUse = CL_FILTER_LINEAR;
+
+        else if( strcmp( str, "CL_ADDRESS_NONE" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_NONE;
+        else if( strcmp( str, "CL_ADDRESS_CLAMP" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_CLAMP;
+        else if( strcmp( str, "CL_ADDRESS_CLAMP_TO_EDGE" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_CLAMP_TO_EDGE;
+        else if( strcmp( str, "CL_ADDRESS_REPEAT" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_REPEAT;
+        else if( strcmp( str, "CL_ADDRESS_MIRRORED_REPEAT" ) == 0 )
+            gAddressModeToUse = CL_ADDRESS_MIRRORED_REPEAT;
+
+        else if( strcmp( str, "NORMALIZED" ) == 0 )
+            gNormalizedModeToUse = true;
+        else if( strcmp( str, "UNNORMALIZED" ) == 0 )
+            gNormalizedModeToUse = false;
+
+
+        else if( strcmp( str, "no_offsets" ) == 0 )
+            gDisableOffsets = true;
+        else if( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+        else if( strcmp( str, "use_pitches" ) == 0 )
+            gEnablePitch = true;
+        else if( strcmp( str, "rounding" ) == 0 )
+            gTestRounding = true;
+        else if( strcmp( str, "extra_validate" ) == 0 )
+            gExtraValidateInfo = true;
+        else if( strcmp( str, "test_mipmaps" ) == 0 ) {
+            // 2.0 Spec does not allow using mem flags, unnormalized coordinates with mipmapped images
+            gTestMipmaps = true;
+            gMemFlagsToUse = 0;
+            gNormalizedModeToUse = true;
+        }
+
+        else if( strcmp( str, "read" ) == 0 )
+            testTypesToRun |= kReadTests;
+        else if( strcmp( str, "write" ) == 0 )
+            testTypesToRun |= kWriteTests;
+        else if( strcmp( str, "read_write" ) == 0 )
+        {
+            testTypesToRun |= kReadWriteTests;
+        }
+
+        else if( strcmp( str, "local_samplers" ) == 0 )
+            gUseKernelSamplers = true;
+
+        else if( strcmp( str, "int" ) == 0 )
+            gTypesToTest |= kTestInt;
+        else if( strcmp( str, "uint" ) == 0 )
+            gTypesToTest |= kTestUInt;
+        else if( strcmp( str, "float" ) == 0 )
+            gTypesToTest |= kTestFloat;
+
+        else if( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+
+        else if ( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+        else if( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+        else if( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+        else if( strcmp( str, "1Darray" ) == 0 )
+            testMethods |= k1DArray;
+        else if( strcmp( str, "2Darray" ) == 0 )
+            testMethods |= k2DArray;
+
+        else if( strcmp( str, "CL_MEM_COPY_HOST_PTR" ) == 0 || strcmp( str, "COPY_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
+        else if( strcmp( str, "CL_MEM_USE_HOST_PTR" ) == 0 || strcmp( str, "USE_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = CL_MEM_USE_HOST_PTR;
+        else if( strcmp( str, "CL_MEM_ALLOC_HOST_PTR" ) == 0 || strcmp( str, "ALLOC_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = CL_MEM_ALLOC_HOST_PTR;
+        else if( strcmp( str, "NO_HOST_PTR" ) == 0 )
+            gMemFlagsToUse = 0;
+
+        else if( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+
+        else if( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+
+        else if( ( chanOrder = get_channel_order_from_name( str ) ) != (cl_channel_order)-1 )
+            gChannelOrderToUse = chanOrder;
+        else
+        {
+            log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+            return -1;
+        }
+
+    }
+
+    if (testMethods == 0)
+        testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
+    if( testTypesToRun == 0 )
+        testTypesToRun = kAllTests;
+    if( gTypesToTest == 0 )
+        gTypesToTest = kTestAllTypes;
+
+#if defined( __APPLE__ )
+#if defined( __i386__ ) || defined( __x86_64__ )
+#define    kHasSSE3                0x00000008
+#define kHasSupplementalSSE3    0x00000100
+#define    kHasSSE4_1              0x00000400
+#define    kHasSSE4_2              0x00000800
+    /* check our environment for a hint to disable SSE variants */
+    {
+        const char *env = getenv( "CL_MAX_SSE" );
+        if( env )
+        {
+            extern int _cpu_capabilities;
+            int mask = 0;
+            if( 0 == strcmp( env, "SSE4.1" ) )
+                mask = kHasSSE4_2;
+            else if( 0 == strcmp( env, "SSSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1;
+            else if( 0 == strcmp( env, "SSE3" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+            else if( 0 == strcmp( env, "SSE2" ) )
+                mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+
+            log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+            _cpu_capabilities &= ~mask;
+        }
+    }
+#endif
+#endif
+
+    // Seed the random # generators
+    if( randomize )
+    {
+        gRandomSeed = (cl_uint) time( NULL );
+        gReSeed = 1;
+        log_info( "Random seed: %u\n", gRandomSeed );
+    }
+
+    int error;
+    // Get our platform
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if( error )
+    {
+        print_error( error, "Unable to get platform" );
+        test_finish();
+        return -1;
+    }
+
+    // Get our device
+    cl_uint num_devices = 0;
+    error = clGetDeviceIDs(platform, gDeviceType, 0, NULL, &num_devices );
+    if( error )
+    {
+        print_error( error, "Unable to get the number of devices" );
+        test_finish();
+        return -1;
+    }
+
+    std::vector<cl_device_id> devices(num_devices);
+    error = clGetDeviceIDs(platform, gDeviceType, num_devices, &devices[0], NULL );
+    if( error )
+    {
+        print_error( error, "Unable to get specified device type" );
+        test_finish();
+        return -1;
+    }
+
+    int device_index = 0;
+    char* device_index_str = getenv("CL_DEVICE_INDEX");
+    if (device_index_str && ((device_index = atoi(device_index_str))) >= num_devices) {
+        log_error("CL_DEVICE_INDEX=%d is greater than the number of devices %d\n",device_index,num_devices);
+        test_finish();
+        return -1;
+    }
+
+    device = devices[device_index];
+
+    // Get the device type so we know if it is a GPU even if default is passed in.
+    error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(gDeviceType), &gDeviceType, NULL);
+    if( error )
+    {
+        print_error( error, "Unable to get device type" );
+        test_finish();
+        return -1;
+    }
+
+      if( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    // Check for image support
+    if(checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+        log_info("Device does not support images. Skipping test.\n");
+        test_finish();
+        return 0;
+    }
+
+    // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+
+    // Create a queue against the context
+    queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
+    if( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+
+    if( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+
+    // On most platforms which support denorm, default is FTZ off. However,
+    // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+    // This creates issues in result verification. Since spec allows the implementation to either flush or
+    // not flush denorms to zero, an implementation may choose not to flush i.e. return denorm result whereas
+    // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+    // where reference is being computed to make sure we get non-flushed reference result. If implementation
+    // returns flushed result, we correctly take care of that in verification code.
+
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+
+    // Run the test now
+    int ret = 0;
+    if (testMethods & k1D)
+    {
+        if (testTypesToRun & kReadTests)
+        {
+            gtestTypesToRun = kReadTests;
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D );
+        }
+
+        if (testTypesToRun & kWriteTests)
+        {
+            gtestTypesToRun = kWriteTests;
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D );
+        }
+
+        if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
+        {
+            gtestTypesToRun = kReadWriteTests;
+            overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D );
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D );
+            recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
+        }
+    }
+    if (testMethods & k2D)
+    {
+        if (testTypesToRun & kReadTests)
+        {
+            gtestTypesToRun = kReadTests;
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
+            if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
+            {
+                log_info("Testing read_image{f | i | ui} for 2D image from buffer\n");
+                // NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages, gTestRounding and gTestMipmaps must be false
+                if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
+                {
+                    cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
+                    gTestImage2DFromBuffer = true;
+                    // disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
+                    gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
+                    ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
+                    gTestImage2DFromBuffer = false;
+                    gMemFlagsToUse = saved_gMemFlagsToUse;
+                }
+            }
+        }
+
+        if (testTypesToRun & kWriteTests)
+        {
+            gtestTypesToRun = kWriteTests;
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
+            if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
+            {
+                log_info("Testing write_image{f | i | ui} for 2D image from buffer\n");
+                // NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages,gTestRounding and gTestMipmaps must be false
+                if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
+                {
+                    bool saved_gEnablePitch = gEnablePitch;
+                    cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
+                    gEnablePitch = true;
+                    // disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
+                    gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
+                    gTestImage2DFromBuffer = true;
+                    ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
+                    gTestImage2DFromBuffer = false;
+                    gMemFlagsToUse = saved_gMemFlagsToUse;
+                    gEnablePitch = saved_gEnablePitch;
+                }
+            }
+        }
+
+        if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
+        {
+            gtestTypesToRun = kReadWriteTests;
+            overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
+            if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
+            {
+                log_info("Testing read_image{f | i | ui} for 2D image from buffer\n");
+                // NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages, gTestRounding and gTestMipmaps must be false
+                if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
+                {
+                    cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
+                    gTestImage2DFromBuffer = true;
+                    // disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
+                    gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
+                    ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D );
+                    gTestImage2DFromBuffer = false;
+                    gMemFlagsToUse = saved_gMemFlagsToUse;
+                }
+            }
+
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
+            if (is_extension_available(device, "cl_khr_image2d_from_buffer"))
+            {
+                log_info("Testing write_image{f | i | ui} for 2D image from buffer\n");
+                // NOTE: for 2D image from buffer test, gTestSmallImages, gTestMaxImages,gTestRounding and gTestMipmaps must be false
+                if (gTestSmallImages == false && gTestMaxImages == false && gTestRounding == false && gTestMipmaps == false)
+                {
+                    bool saved_gEnablePitch = gEnablePitch;
+                    cl_mem_flags saved_gMemFlagsToUse = gMemFlagsToUse;
+                    gEnablePitch = true;
+                    // disable CL_MEM_USE_HOST_PTR for 1.2 extension but enable this for 2.0
+                    gMemFlagsToUse = CL_MEM_COPY_HOST_PTR;
+                    gTestImage2DFromBuffer = true;
+                    ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D );
+                    gTestImage2DFromBuffer = false;
+                    gMemFlagsToUse = saved_gMemFlagsToUse;
+                    gEnablePitch = saved_gEnablePitch;
+                }
+            }
+            recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
+        }
+    }
+    if (testMethods & k3D)
+    {
+        if (testTypesToRun & kReadTests)
+        {
+            gtestTypesToRun = kReadTests;
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE3D );
+        }
+
+        if (testTypesToRun & kWriteTests)
+        {
+            gtestTypesToRun = kWriteTests;
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE3D );
+    }
+
+        if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
+        {
+            gtestTypesToRun = kReadWriteTests;
+            overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE3D );
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE3D );
+            recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
+        }
+    }
+    if (testMethods & k1DArray)
+    {
+        if (testTypesToRun & kReadTests)
+        {
+            gtestTypesToRun = kReadTests;
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+        }
+
+        if (testTypesToRun & kWriteTests)
+        {
+            gtestTypesToRun = kWriteTests;
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+    }
+
+        if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
+        {
+            gtestTypesToRun = kReadWriteTests;
+            overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+            recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
+        }
+    }
+    if (testMethods & k2DArray)
+    {
+        if (testTypesToRun & kReadTests)
+        {
+            gtestTypesToRun = kReadTests;
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+        }
+
+        if (testTypesToRun & kWriteTests)
+        {
+            gtestTypesToRun = kWriteTests;
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+    }
+
+        if ((testTypesToRun & kReadWriteTests) && !gTestMipmaps)
+        {
+            gtestTypesToRun = kReadWriteTests;
+            overwrite_global_params_for_read_write_test(&tTestMipMaps, &tDisableOffsets, &tNormalizedModeToUse, &tFilterModeToUse);
+            ret += test_image_set( device, test_read_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+            ret += test_image_set( device, test_write_image_formats, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+            recover_global_params_from_read_write_test(tTestMipMaps, tDisableOffsets, tNormalizedModeToUse, tFilterModeToUse);
+        }
+    }
+
+    // Restore FP state before leaving
+    RestoreFPState(&oldMode);
+
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.");
+
+    clReleaseContext(context);
+    clReleaseCommandQueue(queue);
+
+    if (gTestFailure == 0) {
+        if (gTestCount > 1)
+            log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+        else
+            log_info("PASSED test.\n");
+    } else if (gTestFailure > 0) {
+        if (gTestCount > 1)
+            log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+        else
+            log_error("FAILED test.\n");
+    }
+
+    // Clean up
+    test_finish();
+
+    if (gTestFailure > 0)
+        return gTestFailure;
+
+    return ret;
+}
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
new file mode 100644
index 00000000..854ccc8b
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -0,0 +1,1765 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestImage2DFromBuffer, gTestMipmaps;
+extern cl_device_type    gDeviceType;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+// Utility function to clamp down image sizes for certain tests to avoid
+// using too much memory.
+static size_t reduceImageSizeRange(size_t maxDimSize) {
+  size_t DimSize = maxDimSize/32;
+  if (DimSize < (size_t) 16)
+    return 16;
+  else if (DimSize > (size_t) 256)
+    return 256;
+  else
+    return DimSize;
+}
+
+const char *read2DKernelSourcePattern =
+"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+"}";
+
+const char *read_write2DKernelSourcePattern =
+"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, coords %s);\n"
+"}";
+
+const char *intCoordKernelSource =
+"   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
+
+const char *floatKernelSource =
+"   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+static const char *lodOffsetSource =
+"   unsigned int lod_int = (unsigned int) lod;\n"
+"   int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
+"   int offset = tidY*width_lod + tidX;\n";
+
+static const char *offsetSource =
+"   int offset = tidY*get_image_width(input) + tidX;\n";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData );
+template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                T *resultPtr, T * expected, float error,
+                                float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
+{
+    int actualX, actualY;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, ignoreMe;
+
+    clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
+                if (imageInfo->format->image_channel_order == CL_DEPTH)
+                {
+                    if( printAsFloat )
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
+                                (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
+                    }
+                    else
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
+                                (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
+                    }
+                }
+                else
+                {
+                    if( printAsFloat )
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                                (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                                (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                    }
+                    else
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                                (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                                (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                    }
+                }
+                return 1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
+                if (imageInfo->format->image_channel_order == CL_DEPTH)
+                {
+                    if( printAsFloat )
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
+                                (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
+                    }
+                    else
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
+                                (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
+                    }
+                }
+                else
+                {
+                    if( printAsFloat )
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                                (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                                (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                    }
+                    else
+                    {
+                      log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                                (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                                (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                    }
+                }
+                return 1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        if (imageInfo->format->image_channel_order == CL_DEPTH)
+        {
+            if( printAsFloat )
+            {
+              log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
+                        (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
+            }
+            else
+            {
+              log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
+                        (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
+            }
+        }
+        else
+        {
+            if( printAsFloat )
+            {
+                log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
+                          (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                          (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+            }
+            else
+            {
+                log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                          (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                                    (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+            }
+        }
+        log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
+        if( clamped )
+        {
+            log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
+        }
+        if( printAsFloat && gExtraValidateInfo)
+        {
+            log_error( "Nearby values:\n" );
+            log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+            for( int yOff = -2; yOff <= 1; yOff++ )
+            {
+                float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                if (imageInfo->format->image_channel_order == CL_DEPTH)
+                {
+                    log_error( "%d\t(%g)",clampedY + yOff, top[0] );
+                    log_error( " (%g)", real[0] );
+                    log_error( " (%g)",bot[0] );
+                    log_error( " (%g)\n",bot2[0] );
+                }
+                else
+                {
+                    log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                    log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                    log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                    log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+                }
+            }
+
+            if( clampedY < 1 )
+            {
+                log_error( "Nearby values:\n" );
+                log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+                for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
+                {
+                    float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                    if (imageInfo->format->image_channel_order == CL_DEPTH)
+                    {
+                        log_error( "%d\t(%g)",clampedY + yOff, top[0] );
+                        log_error( " (%g)", real[0] );
+                        log_error( " (%g)",bot[0] );
+                        log_error( " (%g)\n",bot2[0] );
+                    }
+                    else
+                    {
+                        log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                        log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                        log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                        log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+                    }
+                }
+            }
+        }
+
+        if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+        {
+            if( found )
+                log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
+            else
+                log_error( "\tValue not actually found in image\n" );
+        }
+        log_error( "\n" );
+
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) x);
+                yOffsets[ i ] = (float) (yfract + (double) y);
+            }
+        }
+    }
+    else
+    {
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+            }
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+                yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
+            }
+        }
+    }
+
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->height; y++ )
+        {
+            for( size_t x = 0; x < imageInfo->width; x++, i++ )
+            {
+                xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
+            }
+        }
+    }
+}
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
+{
+    size_t i = 0;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
+
+    if( gTestMipmaps )
+    {
+        width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
+        height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
+    }
+    if( gDisableOffsets )
+    {
+        for( size_t y = 0; y < height_lod; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (float) x);
+                yOffsets[ i ] = (float) (yfract + (float) y);
+            }
+        }
+    }
+    else
+    {
+        for( size_t y = 0; y < height_lod; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+            }
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t y = 0; y < height_lod; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
+                yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
+            }
+        }
+    }
+
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t y = 0; y < height_lod; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
+                yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
+            }
+        }
+    }
+}
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
+                                                        ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
+{
+    // Validate results element by element
+    size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
+    size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
+    /*
+     * FLOAT output type
+     */
+    if( outputType == kFloat )
+    {
+        // Validate float results
+        float *resultPtr = (float *)(char *)resultValues;
+        float expected[4], error=0.0f;
+        float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+        for( size_t y = 0, j = 0; y < height_lod; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, j++ )
+            {
+                // Step 1: go through and see if the results verify for the pixel
+                // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                int checkOnlyOnePixel = 0;
+                int found_pixel = 0;
+                float offset = NORM_OFFSET;
+                if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                    // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                    || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                    )
+                    offset = 0.0f;          // Loop only once
+
+                for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+                    for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
+
+                        // Try sampling the pixel, without flushing denormals.
+                        int containsDenormals = 0;
+                        FloatPixel maxPixel;
+                        maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                    imageSampler, expected, 0, &containsDenormals );
+
+                        float err1 = fabsf( resultPtr[0] - expected[0] );
+                        // Clamp to the minimum absolute error for the format
+                        if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+
+                        // Check if the result matches.
+                        if( ! (err1 <= maxErr1) )
+                        {
+                            //try flushing the denormals, if there is a failure.
+                            if( containsDenormals )
+                            {
+                                // If implementation decide to flush subnormals to zero,
+                                // max error needs to be adjusted
+                                maxErr1 += 4 * FLT_MIN;
+
+                                maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                             imageSampler, expected, 0, NULL );
+
+                                err1 = fabsf( resultPtr[0] - expected[0] );
+                            }
+                        }
+
+                        // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                        found_pixel = (err1 <= maxErr1);
+                    }//norm_offset_x
+                }//norm_offset_y
+
+
+                // Step 2: If we did not find a match, then print out debugging info.
+                if (!found_pixel) {
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    checkOnlyOnePixel = 0;
+                    int shouldReturn = 0;
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel;
+                            maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                    imageSampler, expected, 0, &containsDenormals );
+
+                            float err1 = fabsf( resultPtr[0] - expected[0] );
+                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+
+
+                            if( ! (err1 <= maxErr1) )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                    maxErr1 += 4 * FLT_MIN;
+
+                                    maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL );
+
+                                    err1 = fabsf( resultPtr[0] - expected[0] );
+                                }
+                            }
+                            if( ! (err1 <= maxErr1) )
+                            {
+                                log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                float tempOut[4];
+                                shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                  expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
+
+                                log_error( "Step by step:\n" );
+                                FloatPixel temp;
+                                temp = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                               xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                               imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
+                                log_error( "\tulps: %2.2f  (max allowed: %2.2f)\n\n",
+                                                    Ulp_Error( resultPtr[0], expected[0] ),
+                                                    Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+
+                            } else {
+                                log_error("Test error: we should have detected this passing above.\n");
+                            }
+
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    if( shouldReturn )
+                        return 1;
+                } // if (!found_pixel)
+
+                resultPtr += 1;
+            }
+        }
+    }
+    else
+    {
+        log_error("Test error: Not supported format.\n");
+        return 1;
+    }
+    return 0;
+}
+
+int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
+                                                        ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
+{
+    // Validate results element by element
+    size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
+    size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
+    /*
+     * FLOAT output type
+     */
+    if( outputType == kFloat )
+    {
+        // Validate float results
+        float *resultPtr = (float *)(char *)resultValues;
+        float expected[4], error=0.0f;
+        float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+        for( size_t y = 0, j = 0; y < height_lod; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, j++ )
+            {
+                // Step 1: go through and see if the results verify for the pixel
+                // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                int checkOnlyOnePixel = 0;
+                int found_pixel = 0;
+                float offset = NORM_OFFSET;
+                if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                    // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                    || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                    )
+                    offset = 0.0f;          // Loop only once
+
+                for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+                    for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
+
+
+                        // Try sampling the pixel, without flushing denormals.
+                        int containsDenormals = 0;
+                        FloatPixel maxPixel;
+                        if ( gTestMipmaps )
+                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                        imageSampler, expected, 0, &containsDenormals, lod );
+                        else
+                            maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                        imageSampler, expected, 0, &containsDenormals );
+
+                        float err1 = fabsf( resultPtr[0] - expected[0] );
+                        float err2 = fabsf( resultPtr[1] - expected[1] );
+                        float err3 = fabsf( resultPtr[2] - expected[2] );
+                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                        // Clamp to the minimum absolute error for the format
+                        if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                        if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                        if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                        if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+                        // Check if the result matches.
+                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                           ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                        {
+                            //try flushing the denormals, if there is a failure.
+                            if( containsDenormals )
+                            {
+                               // If implementation decide to flush subnormals to zero,
+                               // max error needs to be adjusted
+                                maxErr1 += 4 * FLT_MIN;
+                                maxErr2 += 4 * FLT_MIN;
+                                maxErr3 += 4 * FLT_MIN;
+                                maxErr4 += 4 * FLT_MIN;
+
+                                if(gTestMipmaps)
+                                    maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL,lod );
+                                else
+                                    maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL );
+
+                                err1 = fabsf( resultPtr[0] - expected[0] );
+                                err2 = fabsf( resultPtr[1] - expected[1] );
+                                err3 = fabsf( resultPtr[2] - expected[2] );
+                                err4 = fabsf( resultPtr[3] - expected[3] );
+                            }
+                        }
+
+                        // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                        found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                    }//norm_offset_x
+                }//norm_offset_y
+
+
+                // Step 2: If we did not find a match, then print out debugging info.
+                if (!found_pixel) {
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    checkOnlyOnePixel = 0;
+                    int shouldReturn = 0;
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel;
+                            if(gTestMipmaps)
+                                maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals, lod );
+                            else
+                                maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals );
+
+                            float err1 = fabsf( resultPtr[0] - expected[0] );
+                            float err2 = fabsf( resultPtr[1] - expected[1] );
+                            float err3 = fabsf( resultPtr[2] - expected[2] );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+
+                            if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                               ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                    maxErr1 += 4 * FLT_MIN;
+                                    maxErr2 += 4 * FLT_MIN;
+                                    maxErr3 += 4 * FLT_MIN;
+                                    maxErr4 += 4 * FLT_MIN;
+
+                                    if(gTestMipmaps)
+                                        maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL, lod );
+                                    else
+                                        maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL );
+
+                                    err1 = fabsf( resultPtr[0] - expected[0] );
+                                    err2 = fabsf( resultPtr[1] - expected[1] );
+                                    err3 = fabsf( resultPtr[2] - expected[2] );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );
+                                }
+                            }
+                            if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                               ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                            {
+                                log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                float tempOut[4];
+                                shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                  expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
+
+                                log_error( "Step by step:\n" );
+                                FloatPixel temp;
+                                if( gTestMipmaps )
+                                     temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                    imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
+                                 else
+                                     temp = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                    imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
+                                log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                    Ulp_Error( resultPtr[0], expected[0] ),
+                                                    Ulp_Error( resultPtr[1], expected[1] ),
+                                                    Ulp_Error( resultPtr[2], expected[2] ),
+                                                    Ulp_Error( resultPtr[3], expected[3] ),
+                                                    Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+
+                            } else {
+                                log_error("Test error: we should have detected this passing above.\n");
+                            }
+
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    if( shouldReturn )
+                        return 1;
+                } // if (!found_pixel)
+
+                resultPtr += 4;
+            }
+        }
+    }
+    /*
+     * UINT output type
+     */
+    else if( outputType == kUInt )
+    {
+        // Validate unsigned integer results
+        unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+        unsigned int expected[4];
+        float error;
+        for( size_t y = 0, j = 0; y < height_lod ; y++ )
+        {
+            for( size_t x = 0; x < width_lod ; x++, j++ )
+            {
+                // Step 1: go through and see if the results verify for the pixel
+                // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                int checkOnlyOnePixel = 0;
+                int found_pixel = 0;
+                for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                    for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                        // E.g., test one pixel.
+                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            norm_offset_x = 0.0f;
+                            norm_offset_y = 0.0f;
+                            checkOnlyOnePixel = 1;
+                        }
+
+                        if ( gTestMipmaps )
+                            sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
+                                                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                             imageSampler, expected, lod );
+                        else
+                            sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                             imageSampler, expected);
+
+
+                        error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                       errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                        if (error <= MAX_ERR)
+                            found_pixel = 1;
+                    }//norm_offset_x
+                }//norm_offset_y
+
+                // Step 2: If we did not find a match, then print out debugging info.
+                if (!found_pixel) {
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    checkOnlyOnePixel = 0;
+                    int shouldReturn = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                            if( gTestMipmaps )
+                                sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
+                                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                                 imageSampler, expected, lod );
+                            else
+                                sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
+                                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                                 imageSampler, expected);
+
+
+                            error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                           errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                            if( error > MAX_ERR )
+                            {
+                                log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                         expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
+                            } else {
+                                log_error("Test error: we should have detected this passing above.\n");
+                            }
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    if( shouldReturn )
+                        return 1;
+                } // if (!found_pixel)
+
+                resultPtr += 4;
+            }
+        }
+    }
+    /*
+     * INT output type
+     */
+    else
+    {
+        // Validate integer results
+        int *resultPtr = (int *)(char *)resultValues;
+        int expected[4];
+        float error;
+        for( size_t y = 0, j = 0; y < height_lod ; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, j++ )
+            {
+                // Step 1: go through and see if the results verify for the pixel
+                // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                int checkOnlyOnePixel = 0;
+                int found_pixel = 0;
+                for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                    for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                        // E.g., test one pixel.
+                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            norm_offset_x = 0.0f;
+                            norm_offset_y = 0.0f;
+                            checkOnlyOnePixel = 1;
+                        }
+
+                        if ( gTestMipmaps )
+                            sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                            xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                            imageSampler, expected , lod);
+                        else
+                            sample_image_pixel_offset<int>( imageValues, imageInfo,
+                                                            xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                            imageSampler, expected );
+
+
+                        error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                       errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                        if (error <= MAX_ERR)
+                            found_pixel = 1;
+                    }//norm_offset_x
+                }//norm_offset_y
+
+                // Step 2: If we did not find a match, then print out debugging info.
+                if (!found_pixel) {
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    checkOnlyOnePixel = 0;
+                    int shouldReturn = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                            if ( gTestMipmaps )
+                                sample_image_pixel_offset<int>( imageValues, imageInfo,
+                                                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                imageSampler, expected, lod );
+                            else
+                                sample_image_pixel_offset<int>( imageValues, imageInfo,
+                                                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                imageSampler, expected );
+
+
+                            error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                           errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                            if( error > MAX_ERR )
+                            {
+                                log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
+                            } else {
+                                log_error("Test error: we should have detected this passing above.\n");
+                            }
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    if( shouldReturn )
+                        return 1;
+                } // if (!found_pixel)
+
+                resultPtr += 4;
+            }
+        }
+    }
+    return 0;
+}
+
+int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
+                                                        ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
+{
+    // Validate results element by element
+    size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
+    size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
+    /*
+     * FLOAT output type
+     */
+    if( outputType == kFloat )
+    {
+        // Validate float results
+        float *resultPtr = (float *)(char *)resultValues;
+        float expected[4], error=0.0f;
+        float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+        for( size_t y = 0, j = 0; y < height_lod; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, j++ )
+            {
+                // Step 1: go through and see if the results verify for the pixel
+                // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                int checkOnlyOnePixel = 0;
+                int found_pixel = 0;
+                float offset = NORM_OFFSET;
+                if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                    // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                    || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                    )
+                    offset = 0.0f;          // Loop only once
+
+                for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+                    for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
+
+
+                        // Try sampling the pixel, without flushing denormals.
+                        int containsDenormals = 0;
+                        FloatPixel maxPixel;
+                        if ( gTestMipmaps )
+                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                        imageSampler, expected, 0, &containsDenormals, lod );
+                        else
+                            maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                        imageSampler, expected, 0, &containsDenormals );
+                        float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                        float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                        float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                        float maxErr = 0.5;
+
+                        // Check if the result matches.
+                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                           ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                        {
+                            //try flushing the denormals, if there is a failure.
+                            if( containsDenormals )
+                            {
+                                // If implementation decide to flush subnormals to zero,
+                                // max error needs to be adjusted
+                                maxErr += 4 * FLT_MIN;
+
+                                if(gTestMipmaps)
+                                    maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL,lod );
+                                else
+                                    maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL );
+
+                                err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                err4 = fabsf( resultPtr[3] - expected[3] );
+                            }
+                        }
+
+                        // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                        found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
+                    }//norm_offset_x
+                }//norm_offset_y
+
+
+                // Step 2: If we did not find a match, then print out debugging info.
+                if (!found_pixel) {
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    checkOnlyOnePixel = 0;
+                    int shouldReturn = 0;
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel;
+                            if(gTestMipmaps)
+                                maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals, lod );
+                            else
+                                maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals );
+
+                            float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                            float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                            float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            float maxErr = 0.6;
+
+                            if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                               ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                    // If implementation decide to flush subnormals to zero,
+                                    // max error needs to be adjusted
+                                    maxErr += 4 * FLT_MIN;
+                                    if(gTestMipmaps)
+                                        maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL, lod );
+                                    else
+                                        maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL );
+
+                                    err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                    err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                    err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );
+                                }
+                            }
+                            if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                               ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                            {
+                                log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                float tempOut[4];
+                                shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                  expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
+
+                                log_error( "Step by step:\n" );
+                                FloatPixel temp;
+                                if( gTestMipmaps )
+                                     temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                    imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
+                                 else
+                                     temp = sample_image_pixel_float_offset( imageValues, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                    imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
+                                log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                    Ulp_Error( resultPtr[0], expected[0] ),
+                                                    Ulp_Error( resultPtr[1], expected[1] ),
+                                                    Ulp_Error( resultPtr[2], expected[2] ),
+                                                    Ulp_Error( resultPtr[3], expected[3] ),
+                                                    Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+
+                            } else {
+                                log_error("Test error: we should have detected this passing above.\n");
+                            }
+
+                        }//norm_offset_x
+                    }//norm_offset_y
+                    if( shouldReturn )
+                        return 1;
+                } // if (!found_pixel)
+
+                resultPtr += 4;
+            }
+        }
+    }
+    else {
+        log_error("Test error: NOT SUPPORTED.\n");
+    }
+    return 0;
+}
+
+int test_read_image_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    static int initHalf = 0;
+    cl_mem imageBuffer;
+    cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
+    size_t threads[2];
+
+    clMemWrapper xOffsets, yOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // The DataBuffer template class really does use delete[], not free -- IRO
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
+
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+    {
+        log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
+        if( gTestMipmaps )
+        {
+            log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
+        }
+    }
+
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        image_read_write_flags = CL_MEM_READ_ONLY;
+    }
+    else
+    {
+        image_read_write_flags = CL_MEM_READ_WRITE;
+    }
+
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        if (gTestImage2DFromBuffer)
+        {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+                                          imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
+            test_error( error, "Unable to create buffer" );
+            unprotImage = create_image_2d_buffer( context,
+                                          image_read_write_flags,
+                                          imageInfo->format,
+                                          imageInfo->width, imageInfo->height, imageInfo->rowPitch,
+                                          imageBuffer, &error );
+
+        }
+        else
+        {
+            // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+            // Do not use protected images for max image size test since it rounds the row size to a page size
+            if (gTestMaxImages) {
+                generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+                unprotImage = create_image_2d( context,
+                                        image_read_write_flags | CL_MEM_USE_HOST_PTR,
+                                        imageInfo->format,
+                                        imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                        maxImageUseHostPtrBackingStore, &error );
+            }
+            else
+            {
+                error = protImage.Create( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, imageInfo->width, imageInfo->height );
+            }
+        }
+
+        if( error != CL_SUCCESS )
+        {
+            if (gTestImage2DFromBuffer) {
+                clReleaseMemObject(imageBuffer);
+                if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
+                    log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
+                    return 0;
+                }
+            }
+
+            log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+
+        if (gTestMaxImages || gTestImage2DFromBuffer)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        if (gTestImage2DFromBuffer)
+        {
+            imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                                         imageInfo->rowPitch * imageInfo->height, imageValues, &error);
+            test_error( error, "Unable to create buffer" );
+            unprotImage = create_image_2d_buffer( context,
+                                                 image_read_write_flags,
+                                                 imageInfo->format,
+                                                 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
+                                                 imageBuffer, &error );
+
+        }
+        else
+        {
+            // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+            unprotImage = create_image_2d( context,
+                                      image_read_write_flags | CL_MEM_COPY_HOST_PTR,
+                                      imageInfo->format,
+                                      imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      imageValues, &error );
+        }
+        if( error != CL_SUCCESS )
+        {
+            if (gTestImage2DFromBuffer) {
+                clReleaseMemObject(imageBuffer);
+                if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
+                    log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
+                    return 0;
+                }
+            }
+
+            log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        if( gTestMipmaps )
+        {
+            cl_image_desc image_desc = {0};
+            image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+            image_desc.image_width = imageInfo->width;
+            image_desc.image_height = imageInfo->height;
+            image_desc.num_mip_levels = imageInfo->num_mip_levels;
+            unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
+        }
+        else if (gTestImage2DFromBuffer)
+        {
+            imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
+                                         imageInfo->rowPitch * imageInfo->height, imageValues, &error);
+            test_error( error, "Unable to create buffer" );
+            unprotImage = create_image_2d_buffer( context,
+                                                 image_read_write_flags,
+                                                 imageInfo->format,
+                                                 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
+                                                 imageBuffer, &error );
+
+        }
+        else
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            unprotImage = create_image_2d( context,
+                                      image_read_write_flags | gMemFlagsToUse,
+                                      imageInfo->format,
+                                      imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      imageValues, &error );
+        }
+        if( error != CL_SUCCESS )
+        {
+            if (gTestImage2DFromBuffer) {
+                clReleaseMemObject(imageBuffer);
+                if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
+                    log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
+                    return 0;
+                }
+            }
+
+            log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
+
+        if(!gTestMipmaps)
+        {
+            error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                        origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
+                                       imageValues, 0, NULL, NULL);
+            if (error != CL_SUCCESS)
+            {
+                log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
+                return error;
+            }
+        }
+        else
+        {
+            size_t tmpNextLevelOffset = 0;
+            for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
+            {
+                origin[2] = level;
+                error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                            origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
+                                            (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
+                tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
+                region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
+                region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
+            }
+        }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, imageSampler->normalized_coords,
+        CL_SAMPLER_ADDRESSING_MODE, imageSampler->addressing_mode,
+        CL_SAMPLER_FILTER_MODE, imageSampler->filter_mode,
+        0, 0, 0 };
+    if (gTestMipmaps) {
+        properties[6] = CL_SAMPLER_MIP_FILTER_MODE;
+        properties[7] = imageSampler->filter_mode;
+    }
+    actualSampler = clCreateSamplerWithProperties(context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // A cast of troublesome offsets. The first one has to be zero.
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+      log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+
+    if(gtestTypesToRun & kReadWriteTests)
+    {
+        loopCount = 1;
+    }
+
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    size_t nextLevelOffset = 0;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
+    for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
+    {
+        size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
+        float lod_float = (float)lod;
+        char *imagePtr = (char *)imageValues + nextLevelOffset;
+        if( gTestMipmaps )
+        {
+            if(gDebugTrace)
+                log_info("\t- Working at mip level %d\n", lod);
+            error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
+        }
+
+        // Validate results element by element
+        for( int q = 0; q < loopCount; q++ )
+        {
+            float offset = float_offsets[ q % float_offset_count ];
+
+            // Init the coordinates
+            InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
+                                q>=float_offset_count ? -offset: offset,
+                                q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
+
+            error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
+            test_error( error, "Unable to write x offsets" );
+            error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
+            test_error( error, "Unable to write y offsets" );
+
+            // Get results
+            memset( resultValues, 0xff, resultValuesSize );
+            clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+            // Run the kernel
+            threads[0] = (size_t)width_lod;
+            threads[1] = (size_t)height_lod;
+            error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            if( gDebugTrace )
+                log_info( "    reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
+
+            error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
+            test_error( error, "Unable to read results from kernel" );
+            if( gDebugTrace )
+                log_info( "    results read\n" );
+
+            int retCode;
+            switch (imageInfo->format->image_channel_order) {
+            case CL_DEPTH:
+                retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
+                break;
+            case CL_sRGB:
+            case CL_sRGBx:
+            case CL_sRGBA:
+            case CL_sBGRA:
+                retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
+                break;
+            default:
+                retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
+            }
+            if (retCode)
+                return retCode;
+        }
+        end:
+        if ( gTestMipmaps )
+        {
+            nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
+            width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
+            height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
+        }
+    }
+
+    if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
+
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                        bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *KernelSourcePattern = NULL;
+
+    if (gTestImage2DFromBuffer)
+    {
+        if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
+        {
+            switch (format->image_channel_data_type)
+            {
+                case CL_UNORM_INT8:
+                case CL_UNORM_INT16:
+                case CL_SNORM_INT8:
+                case CL_SNORM_INT16:
+                case CL_HALF_FLOAT:
+                case CL_FLOAT:
+                case CL_SIGNED_INT8:
+                case CL_SIGNED_INT16:
+                case CL_SIGNED_INT32:
+                case CL_UNSIGNED_INT8:
+                case CL_UNSIGNED_INT16:
+                case CL_UNSIGNED_INT32:
+                    log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
+                             GetChannelTypeName( format->image_channel_data_type ));
+                    return 0;
+                default:
+                    break;
+            }
+        }
+    }
+
+
+    RandomSeed seed( gRandomSeed );
+    int error;
+
+    // Get our operating params
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        KernelSourcePattern = read2DKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = read_write2DKernelSourcePattern;
+    }
+
+
+    sprintf( programSrc, KernelSourcePattern,
+            (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
+            samplerArg, get_explicit_type_name( outputType ),
+            (format->image_channel_order == CL_DEPTH) ? "" : "4",
+            gTestMipmaps?", float lod":" ",
+            samplerVar,
+            gTestMipmaps? lodOffsetSource : offsetSource,
+            floatCoords ? floatKernelSource : intCoordKernelSource,
+            readFormat,
+            gTestMipmaps?", lod":" ");
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if( gTestMipmaps )
+                imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
+
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+                int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
+
+            if( gTestMipmaps )
+                imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
+
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
+        typeRange /= pixelSize / get_format_type_size( imageInfo.format );
+        imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        while( imageInfo.height >= maxHeight / 2 )
+        {
+            imageInfo.width <<= 1;
+            imageInfo.height >>= 1;
+        }
+
+        while( imageInfo.width >= maxWidth / 2 )
+            imageInfo.width >>= 1;
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+        gRoundingStartValue = 0;
+        do
+        {
+            if( gDebugTrace )
+                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+
+            gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
+
+        } while( gRoundingStartValue < typeRange );
+    }
+    else
+    {
+        cl_uint imagePitchAlign = 0;
+        if (gTestImage2DFromBuffer)
+        {
+#if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
+            error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
+#endif
+            if (!imagePitchAlign || error) {
+              test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
+              imagePitchAlign = 1;
+            }
+        }
+
+        int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
+        int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
+
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if( gTestMipmaps )
+                {
+                    imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
+                    size = 4 * compute_mipmapped_image_size(imageInfo);
+                }
+                else
+                {
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+                    }
+
+                // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
+                    if (gTestImage2DFromBuffer)
+                    {
+                        size_t pitch = imagePitchAlign * pixelSize;
+                        imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
+                    }
+
+                    size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_loops.cpp b/test_conformance/images/kernel_read_write/test_loops.cpp
new file mode 100644
index 00000000..dbaa9c33
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_loops.cpp
@@ -0,0 +1,466 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern cl_context context;
+extern cl_filter_mode     gFilterModeToUse;
+extern cl_addressing_mode gAddressModeToUse;
+extern int                gTypesToTest;
+extern int                gNormalizedModeToUse;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order      gChannelOrderToUse;
+
+extern bool gDebugTrace;
+extern bool gTestMipmaps;
+
+extern int  gtestTypesToRun;
+
+extern int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                                  bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                                        bool floatCoords, ExplicitType outputType );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                                        bool floatCoords, ExplicitType outputType );
+
+static const char *str_1d_image = "1D";
+static const char *str_2d_image = "2D";
+static const char *str_3d_image = "3D";
+static const char *str_1d_image_array = "1D array";
+static const char *str_2d_image_array = "2D array";
+
+static const char *convert_image_type_to_string(cl_mem_object_type imageType)
+{
+    const char *p;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            p = str_1d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            p = str_2d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            p = str_3d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            p = str_1d_image_array;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            p = str_2d_image_array;
+            break;
+    }
+    return p;
+}
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+
+        // skip mipmap tests for CL_DEPTH formats (re# Khronos Bug 13762)
+        if(gTestMipmaps && (formatList[ j ].image_channel_order == CL_DEPTH))
+        {
+            log_info("Skip mipmap tests for CL_DEPTH format\n");
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Have we already discarded the channel type via the command line?
+        if( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Have we already discarded the channel order via the command line?
+        if( gChannelOrderToUse != (cl_channel_order)-1 && gChannelOrderToUse != formatList[ j ].image_channel_order )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        if ( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+
+        // Is the format supported?
+        int i;
+        for( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+        {
+            if( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+            {
+                numSupported++;
+                break;
+            }
+        }
+        if( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+        {
+            // Format is NOT supported, so mark it as such
+            filterFlags[ j ] = true;
+        }
+    }
+    return numSupported;
+}
+
+int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    int error;
+
+    cl_image_format tempList[ 128 ];
+    error = clGetSupportedImageFormats( context, flags,
+                                       imageType, 128, tempList, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, flags,
+                                       imageType, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+
+    return 0;
+}
+
+int test_read_image_type( cl_device_id device, cl_image_format *format, bool floatCoords,
+                         image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    cl_addressing_mode *addressModes = NULL;
+
+    // The sampler-less read image functions behave exactly as the corresponding read image functions
+    // described in section 6.13.14.2 that take integer coordinates and a sampler with filter mode set to
+    // CLK_FILTER_NEAREST, normalized coordinates set to CLK_NORMALIZED_COORDS_FALSE and addressing mode to CLK_ADDRESS_NONE
+    cl_addressing_mode addressModes_rw[] = { CL_ADDRESS_NONE, (cl_addressing_mode)-1 };
+    cl_addressing_mode addressModes_ro[] = { /* CL_ADDRESS_CLAMP_NONE,*/ CL_ADDRESS_CLAMP_TO_EDGE, CL_ADDRESS_CLAMP, CL_ADDRESS_REPEAT, CL_ADDRESS_MIRRORED_REPEAT, (cl_addressing_mode)-1 };
+
+    if(gtestTypesToRun & kReadWriteTests)
+    {
+        addressModes = addressModes_rw;
+    }
+    else
+    {
+        addressModes = addressModes_ro;
+    }
+
+#if defined( __APPLE__ )
+    // According to the OpenCL specification, we do not guarantee the precision
+    // of operations for linear filtering on the GPU.  We do not test linear
+    // filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we
+    // test it internally for a set of other image formats.
+    if ((gDeviceType == CL_DEVICE_TYPE_GPU) &&
+        (imageSampler->filter_mode == CL_FILTER_LINEAR) &&
+        (format->image_channel_order == CL_RGB) &&
+        (format->image_channel_data_type == CL_UNORM_INT_101010))
+    {
+        log_info("--- Skipping CL_RGB CL_UNORM_INT_101010 format with CL_FILTER_LINEAR on GPU.\n");
+        return 0;
+    }
+#endif
+
+    for( int adMode = 0; addressModes[ adMode ] != (cl_addressing_mode)-1; adMode++ )
+    {
+        imageSampler->addressing_mode = addressModes[ adMode ];
+
+        if( (addressModes[ adMode ] == CL_ADDRESS_REPEAT || addressModes[ adMode ] == CL_ADDRESS_MIRRORED_REPEAT) && !( imageSampler->normalized_coords ) )
+            continue; // Repeat doesn't make sense for non-normalized coords
+
+        // Use this run if we were told to only run a certain filter mode
+        if( gAddressModeToUse != (cl_addressing_mode)-1 && imageSampler->addressing_mode != gAddressModeToUse )
+            continue;
+
+        /*
+         Remove redundant check to see if workaround still necessary
+         // Check added in because this case was leaking through causing a crash on CPU
+         if( ! imageSampler->normalized_coords && imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
+         continue;       //repeat mode requires normalized coordinates
+         */
+        print_read_header( format, imageSampler, false );
+
+        gTestCount++;
+
+        int retCode = 0;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                retCode = test_read_image_set_1D( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                retCode = test_read_image_set_1D_array( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                retCode = test_read_image_set_2D( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                retCode = test_read_image_set_2D_array( device, format, imageSampler, floatCoords, outputType );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                retCode = test_read_image_set_3D( device, format, imageSampler, floatCoords, outputType );
+                break;
+        }
+        if( retCode != 0 )
+        {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_read_header( format, imageSampler, true );
+            log_info( "\n" );
+        }
+        ret |= retCode;
+    }
+
+    return ret;
+}
+
+int test_read_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                            image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    bool flipFlop[2] = { false, true };
+    int normalizedIdx, floatCoordIdx;
+
+
+    // Use this run if we were told to only run a certain filter mode
+    if( gFilterModeToUse != (cl_filter_mode)-1 && imageSampler->filter_mode != gFilterModeToUse )
+        return 0;
+
+    // Test normalized/non-normalized
+    for( normalizedIdx = 0; normalizedIdx < 2; normalizedIdx++ )
+    {
+        imageSampler->normalized_coords = flipFlop[ normalizedIdx ];
+        if( gNormalizedModeToUse != 7 && gNormalizedModeToUse != (int)imageSampler->normalized_coords )
+            continue;
+
+        for( floatCoordIdx = 0; floatCoordIdx < 2; floatCoordIdx++ )
+        {
+            // Checks added in because this case was leaking through causing a crash on CPU
+            if( !flipFlop[ floatCoordIdx ] )
+                if( imageSampler->filter_mode != CL_FILTER_NEAREST      ||  // integer coords can only be used with nearest
+                   flipFlop[ normalizedIdx ])                               // Normalized integer coords makes no sense (they'd all be zero)
+                    continue;
+
+            if( flipFlop[ floatCoordIdx ] && (gtestTypesToRun & kReadWriteTests))
+                // sampler-less read in read_write tests run only integer coord
+                continue;
+
+
+            log_info( "read_image (%s coords, %s results) *****************************\n",
+                     flipFlop[ floatCoordIdx ] ? ( imageSampler->normalized_coords ? "normalized float" : "unnormalized float" ) : "integer",
+                     get_explicit_type_name( outputType ) );
+
+            for( unsigned int i = 0; i < numFormats; i++ )
+            {
+                if( filterFlags[i] )
+                    continue;
+
+                cl_image_format &imageFormat = formatList[ i ];
+
+                ret |= test_read_image_type( device, &imageFormat, flipFlop[ floatCoordIdx ], imageSampler, outputType, imageType );
+            }
+        }
+    }
+    return ret;
+}
+
+
+int test_image_set( cl_device_id device, test_format_set_fn formatTestFn, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    static int printedFormatList = -1;
+
+
+    if ( ( 0 == is_extension_available( device, "cl_khr_3d_image_writes" )) && (imageType == CL_MEM_OBJECT_IMAGE3D) && (formatTestFn == test_write_image_formats) )
+    {
+        gTestFailure++;
+        log_error( "-----------------------------------------------------\n" );
+        log_error( "FAILED: test writing CL_MEM_OBJECT_IMAGE3D images\n" );
+        log_error( "This device does not support the mandated extension cl_khr_3d_image_writes.\n");
+        log_error( "-----------------------------------------------------\n\n" );
+        return -1;
+    }
+
+    if ( gTestMipmaps )
+    {
+        if ( 0 == is_extension_available( device, "cl_khr_mipmap_image" ))
+        {
+            log_info( "-----------------------------------------------------\n" );
+            log_info( "This device does not support cl_khr_mipmap_image.\nSkipping mipmapped image test. \n" );
+            log_info( "-----------------------------------------------------\n\n" );
+            return 0;
+        }
+        if ( ( 0 == is_extension_available( device, "cl_khr_mipmap_image_writes" )) && (formatTestFn == test_write_image_formats))
+        {
+            log_info( "-----------------------------------------------------\n" );
+            log_info( "This device does not support cl_khr_mipmap_image_writes.\nSkipping mipmapped image write test. \n" );
+            log_info( "-----------------------------------------------------\n\n" );
+            return 0;
+        }
+    }
+
+    int version_check = check_opencl_version(device,1,2);
+    if (version_check != 0) {
+      switch (imageType) {
+        case CL_MEM_OBJECT_IMAGE1D:
+          test_missing_feature(version_check, "image_1D");
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+          test_missing_feature(version_check, "image_1D_array");
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+          test_missing_feature(version_check, "image_2D_array");
+      }
+    }
+
+    // Grab the list of supported image formats for integer reads
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+    // This flag is only for querying the list of supported formats
+    // The flag for creating image will be set explicitly in test functions
+    cl_mem_flags flags;
+    const char *flagNames;
+    if( formatTestFn == test_read_image_formats )
+    {
+        if(gtestTypesToRun & kReadTests)
+        {
+            flags = CL_MEM_READ_ONLY;
+            flagNames = "read";
+        }
+        else
+        {
+            flags = CL_MEM_KERNEL_READ_AND_WRITE;
+            flagNames = "read_write";
+        }
+    }
+    else
+    {
+        if(gtestTypesToRun & kWriteTests)
+        {
+            flags = CL_MEM_WRITE_ONLY;
+            flagNames = "write";
+        }
+        else
+        {
+            flags = CL_MEM_KERNEL_READ_AND_WRITE;
+            flagNames = "read_write";
+        }
+    }
+
+    if( get_format_list( device, imageType, formatList, numFormats, flags ) )
+        return -1;
+    BufferOwningPtr<cl_image_format> formatListBuf(formatList);
+
+
+    filterFlags = new bool[ numFormats ];
+    if( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    BufferOwningPtr<bool> filterFlagsBuf(filterFlags);
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+
+    // First time through, we'll go ahead and print the formats supported, regardless of type
+    int test = imageType | (formatTestFn == test_read_image_formats ? (1 << 16) : (1 << 17));
+    if( printedFormatList != test )
+    {
+        log_info( "---- Supported %s %s formats for this device ---- \n", convert_image_type_to_string(imageType), flagNames );
+        for( unsigned int f = 0; f < numFormats; f++ )
+        {
+            if ( IsChannelOrderSupported( formatList[ f ].image_channel_order ) && IsChannelTypeSupported( formatList[ f ].image_channel_data_type ) )
+                log_info( "  %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
+                        GetChannelTypeName( formatList[ f ].image_channel_data_type ),
+                        (int)get_format_channel_count( &formatList[ f ] ) );
+        }
+        log_info( "------------------------------------------- \n" );
+        printedFormatList = test;
+    }
+
+    image_sampler_data imageSampler;
+
+    /////// float tests ///////
+
+    if( gTypesToTest & kTestFloat )
+    {
+        cl_channel_type floatFormats[] = { CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
+#ifdef OBSOLETE_FORAMT
+            CL_UNORM_SHORT_565_REV, CL_UNORM_SHORT_555_REV, CL_UNORM_INT_8888, CL_UNORM_INT_8888_REV, CL_UNORM_INT_101010_REV,
+#endif
+#ifdef CL_SFIXED14_APPLE
+            CL_SFIXED14_APPLE,
+#endif
+            CL_UNORM_INT8, CL_SNORM_INT8,
+            CL_UNORM_INT16, CL_SNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, floatFormats ) == 0 )
+        {
+            log_info( "No formats supported for float type\n" );
+        }
+        else
+        {
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
+
+            imageSampler.filter_mode = CL_FILTER_LINEAR;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
+        }
+    }
+
+    /////// int tests ///////
+    if( gTypesToTest & kTestInt )
+    {
+        cl_channel_type intFormats[] = { CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, intFormats ) == 0 )
+        {
+            log_info( "No formats supported for integer type\n" );
+        }
+        else
+        {
+            // Only filter mode we support on int is nearest
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kInt, imageType );
+        }
+    }
+
+    /////// uint tests ///////
+
+    if( gTypesToTest & kTestUInt )
+    {
+        cl_channel_type uintFormats[] = { CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, (cl_channel_type)-1 };
+        if( filter_formats( formatList, filterFlags, numFormats, uintFormats ) == 0 )
+        {
+            log_info( "No formats supported for unsigned int type\n" );
+        }
+        else
+        {
+            // Only filter mode we support on uint is nearest
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += formatTestFn( device, formatList, filterFlags, numFormats, &imageSampler, kUInt, imageType );
+        }
+    }
+    return ret;
+}
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
new file mode 100644
index 00000000..18158294
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -0,0 +1,1169 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_device_type    gDeviceType;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+const char *read1DKernelSourcePattern =
+"__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coord %s);\n"
+"}";
+
+const char *read_write1DKernelSourcePattern =
+"__kernel void sample_kernel( read_write image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"%s"
+"   results[offset] = read_image%s( input, coord %s);\n"
+"}";
+
+const char *int1DCoordKernelSource =
+"   int coord = xOffsets[offset];\n";
+
+const char *float1DKernelSource =
+"   float coord = (float)xOffsets[offset];\n";
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                            int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_1D( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                T *resultPtr, T * expected, float error,
+                                float x, float xAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
+{
+    int actualX, actualY;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, ignoreMe;
+
+    clamped = get_integer_coords_offset( x, 0.0f, 0.0f, xAddressOffset, 0.0f, 0.0f, imageInfo->width, 0, 0, imageSampler, imageInfo, clampedX, ignoreMe, ignoreMe );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld!\n", imageInfo->width );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld!\n", imageInfo->width );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        if( printAsFloat )
+        {
+            log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
+                      (int)j, x, x, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                      (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+        }
+        else
+        {
+            log_error( "Sample %d: coord {%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                      (int)j, x, x, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                                (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+        }
+        log_error( "img size %ld (pitch %ld)", imageInfo->width, imageInfo->rowPitch );
+        if( clamped )
+        {
+            log_error( " which would clamp to %d\n", clampedX );
+        }
+        if( printAsFloat && gExtraValidateInfo)
+        {
+            log_error( "Nearby values:\n" );
+            log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+            {
+                float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 2, 0, 0, top );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 1, 0, 0, real );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX, 0, 0, bot );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, 0, 0, bot2 );
+                log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+            }
+        }
+
+        if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+        {
+            if( found )
+                log_error( "\tValue really found in image at %d (%s)\n", actualX, ( found > 1 ) ? "NOT unique!!" : "unique" );
+            else
+                log_error( "\tValue not actually found in image\n" );
+        }
+        log_error( "\n" );
+
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float xfract, int normalized_coords, MTdata d, int lod)
+{
+    size_t i = 0;
+    size_t width_lod = imageInfo->width;
+
+    if(gTestMipmaps)
+        width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+
+    if( gDisableOffsets )
+    {
+        for( size_t x = 0; x < width_lod; x++, i++ )
+        {
+            xOffsets[ i ] = (float) (xfract + (double) x);
+        }
+    }
+    else
+    {
+        for( size_t x = 0; x < width_lod; x++, i++ )
+        {
+            xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t x = 0; x < width_lod; x++, i++ )
+        {
+            xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
+        }
+    }
+
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t x = 0; x < width_lod; x++, i++ )
+        {
+            xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod);
+        }
+    }
+}
+
+#ifndef MAX
+    #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+
+int test_read_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    static int initHalf = 0;
+
+    size_t threads[2];
+    cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
+    clMemWrapper xOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // The DataBuffer template class really does use delete[], not free -- IRO
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width));
+
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+    {
+        log_info( " - Creating 1D image %d ...\n", (int)imageInfo->width );
+        if(gTestMipmaps)
+            log_info("  - and %d mip levels\n", (int)imageInfo->num_mip_levels);
+    }
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        image_read_write_flags = CL_MEM_READ_ONLY;
+    }
+    else
+    {
+        image_read_write_flags = CL_MEM_READ_WRITE;
+    }
+
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+          generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+          unprotImage = create_image_1d( context,
+                                        image_read_write_flags | CL_MEM_USE_HOST_PTR,
+                                        imageInfo->format,
+                                        imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                        maxImageUseHostPtrBackingStore, NULL, &error );
+        } else {
+                error = protImage.Create( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, imageInfo->width );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_1d( context,
+                                       image_read_write_flags | CL_MEM_COPY_HOST_PTR,
+                                       imageInfo->format,
+                                      imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      imageValues, NULL, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        if(gTestMipmaps)
+        {
+            cl_image_desc image_desc = {0};
+            image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+            image_desc.image_width = imageInfo->width;
+            image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+            unprotImage = clCreateImage( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, &image_desc, NULL, &error);
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create %d level mipmapped 1D image of size %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+        }
+        else
+        {
+            unprotImage = create_image_1d( context,
+                                          image_read_write_flags | gMemFlagsToUse,
+                                          imageInfo->format,
+                                          imageInfo->width, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                          imageValues, NULL, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+        }
+        image = unprotImage;
+    }
+
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, 1, 1 };
+
+        if(gTestMipmaps)
+        {
+            int nextLevelOffset = 0;
+
+            for (int i =0; i < imageInfo->num_mip_levels; i++)
+            {   origin[1] = i;
+                error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                            origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                                            ((char*)imageValues + nextLevelOffset), 0, NULL, NULL);
+                if (error != CL_SUCCESS)
+                {
+                    log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                    return error;
+                }
+                nextLevelOffset += region[0]*get_pixel_size(imageInfo->format);
+                //Subsequent mip level dimensions keep halving
+                region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
+            }
+        }
+        else
+        {
+            error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                        origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
+                                       imageValues, 0, NULL, NULL);
+            if (error != CL_SUCCESS)
+            {
+                log_error( "ERROR: Unable to write to 1D image of size %d\n", (int)imageInfo->width );
+                return error;
+            }
+        }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, imageSampler->normalized_coords,
+        CL_SAMPLER_ADDRESSING_MODE, imageSampler->addressing_mode,
+        CL_SAMPLER_FILTER_MODE, imageSampler->filter_mode,
+        0, 0, 0 };
+    if (gTestMipmaps) {
+        properties[6] = CL_SAMPLER_MIP_FILTER_MODE;
+        properties[7] = imageSampler->filter_mode;
+    }
+    actualSampler = clCreateSamplerWithProperties(context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // A cast of troublesome offsets. The first one has to be zero.
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+      log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+
+    // Get the maximum absolute error for this format
+      double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
+      if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    size_t width_lod = imageInfo->width;
+    size_t nextLevelOffset = 0;
+    for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        float lod_float = (float)lod;
+        size_t resultValuesSize = width_lod * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
+        char *imagePtr = (char*)imageValues + nextLevelOffset;
+        if (gTestMipmaps) {
+            //Set the lod kernel arg
+            if(gDebugTrace)
+                log_info(" - Working at mip level %d\n", lod);
+            error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float);
+            test_error( error, "Unable to set kernel arguments" );
+        }
+        for( int q = 0; q < loopCount; q++ )
+        {
+            float offset = float_offsets[ q % float_offset_count ];
+
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues,
+                            q>=float_offset_count ? -offset: offset,
+                                imageSampler->normalized_coords, d , lod);
+
+            error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * width_lod, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+
+        // Get results
+            memset( resultValues, 0xff, resultValuesSize );
+            clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+        // Run the kernel
+            threads[0] = (size_t)width_lod;
+        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+
+        if( gDebugTrace )
+                log_info( "    reading results, %ld kbytes\n", (unsigned long)( width_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
+
+            error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+
+        // Validate results element by element
+            char *imagePtr = imageValues + nextLevelOffset;
+                /*
+                 * FLOAT output type
+                 */
+        if(is_sRGBA_order(imageInfo->format->image_channel_order) && ( outputType == kFloat ))
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            {
+                for( size_t x = 0, j = 0; x < width_lod; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    float offset = NORM_OFFSET;
+                    if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                        || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                    )
+                        offset = 0.0f;          // Loop only once
+
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+
+                            // Try sampling the pixel, without flushing denormals.
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                            xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                            imageSampler, expected, 0, &containsDenormals, lod );
+
+                            float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                            float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                            float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+
+                            float maxErr = 0.5;
+
+                            // Check if the result matches.
+                            if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                    // If implementation decide to flush subnormals to zero,
+                                    // max error needs to be adjusted
+                                    maxErr += 4 * FLT_MIN;
+
+                                    maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                 xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL, lod );
+
+                                    err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                    err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                    err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );
+                                }
+                            }
+
+                            // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                            found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
+                    }//norm_offset_x
+
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                int containsDenormals = 0;
+                                FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                        xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals, lod );
+
+                                float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                float err4 = fabsf( resultPtr[3] - expected[3] );
+
+                                float maxErr = 0.6;
+
+                                if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                                    ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                                {
+                                    //try flushing the denormals, if there is a failure.
+                                    if( containsDenormals )
+                                    {
+                                        // If implementation decide to flush subnormals to zero,
+                                        // max error needs to be adjusted
+                                        maxErr += 4 * FLT_MIN;
+
+                                        maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                     xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL, lod );
+
+                                        err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                        err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                        err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                        err4 = fabsf( resultPtr[3] - expected[3] );
+                                    }
+                                }
+                                if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                                    ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                                {
+                                    log_error("FAILED norm_offsets: %g:\n", norm_offset_x);
+
+                                    float tempOut[4];
+                                    shouldReturn |= determine_validation_error_1D<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                        expected, error, xOffsetValues[ j ], norm_offset_x, j, numTries, numClamped, true, lod );
+
+                                    log_error( "Step by step:\n" );
+                                    FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                        xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                        imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
+                                    log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                        Ulp_Error( resultPtr[0], expected[0] ),
+                                                        Ulp_Error( resultPtr[1], expected[1] ),
+                                                        Ulp_Error( resultPtr[2], expected[2] ),
+                                                        Ulp_Error( resultPtr[3], expected[3] ),
+                                                        Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+
+                        }//norm_offset_x
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+                }
+            }
+        }
+        else if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            {
+                for( size_t x = 0, j = 0; x < width_lod; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    float offset = NORM_OFFSET;
+                    if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                        || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                    )
+                        offset = 0.0f;          // Loop only once
+
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+
+                            // Try sampling the pixel, without flushing denormals.
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                            xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                            imageSampler, expected, 0, &containsDenormals, lod );
+
+                            float err1 = fabsf( resultPtr[0] - expected[0] );
+                            float err2 = fabsf( resultPtr[1] - expected[1] );
+                            float err3 = fabsf( resultPtr[2] - expected[2] );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            // Clamp to the minimum absolute error for the format
+                            if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                            if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                            if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                            if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+                            // Check if the result matches.
+                            if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                   // If implementation decide to flush subnormals to zero,
+                                   // max error needs to be adjusted
+                                    maxErr1 += 4 * FLT_MIN;
+                                    maxErr2 += 4 * FLT_MIN;
+                                    maxErr3 += 4 * FLT_MIN;
+                                    maxErr4 += 4 * FLT_MIN;
+
+                                    maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                 xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                 imageSampler, expected, 0, NULL, lod );
+
+                                    err1 = fabsf( resultPtr[0] - expected[0] );
+                                    err2 = fabsf( resultPtr[1] - expected[1] );
+                                    err3 = fabsf( resultPtr[2] - expected[2] );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );
+                                }
+                            }
+
+                            // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                            found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                    }//norm_offset_x
+
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                int containsDenormals = 0;
+                                FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                        xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                        imageSampler, expected, 0, &containsDenormals, lod );
+
+                                float err1 = fabsf( resultPtr[0] - expected[0] );
+                                float err2 = fabsf( resultPtr[1] - expected[1] );
+                                float err3 = fabsf( resultPtr[2] - expected[2] );
+                                float err4 = fabsf( resultPtr[3] - expected[3] );
+                                float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                    ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {
+                                    //try flushing the denormals, if there is a failure.
+                                    if( containsDenormals )
+                                    {
+                                        maxErr1 += 4 * FLT_MIN;
+                                        maxErr2 += 4 * FLT_MIN;
+                                        maxErr3 += 4 * FLT_MIN;
+                                        maxErr4 += 4 * FLT_MIN;
+
+                                        maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                     xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                     imageSampler, expected, 0, NULL, lod );
+
+                                        err1 = fabsf( resultPtr[0] - expected[0] );
+                                        err2 = fabsf( resultPtr[1] - expected[1] );
+                                        err3 = fabsf( resultPtr[2] - expected[2] );
+                                        err4 = fabsf( resultPtr[3] - expected[3] );
+                                    }
+                                }
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                    ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {
+                                    log_error("FAILED norm_offsets: %g:\n", norm_offset_x);
+
+                                    float tempOut[4];
+                                    shouldReturn |= determine_validation_error_1D<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                        expected, error, xOffsetValues[ j ], norm_offset_x, j, numTries, numClamped, true, lod );
+
+                                    log_error( "Step by step:\n" );
+                                    FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                        xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                        imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
+                                    log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                        Ulp_Error( resultPtr[0], expected[0] ),
+                                                        Ulp_Error( resultPtr[1], expected[1] ),
+                                                        Ulp_Error( resultPtr[2], expected[2] ),
+                                                        Ulp_Error( resultPtr[3], expected[3] ),
+                                                        Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+
+                        }//norm_offset_x
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+                }
+            }
+        }
+            /*
+             * UINT output type
+             */
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t x = 0, j = 0; x < width_lod; x++, j++ )
+            {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                                if ( gTestMipmaps )
+                                    sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                                     xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                                     imageSampler, expected, lod );
+                                else
+                                    sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                                     xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                                     imageSampler, expected );
+
+                            error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                         errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                            if (error <= MAX_ERR)
+                                found_pixel = 1;
+                    }//norm_offset_x
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                if ( gTestMipmaps )
+                                    sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                                     xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                                     imageSampler, expected, lod );
+                                else
+                                    sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                                     xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                                                     imageSampler, expected );
+
+
+
+                                error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                             errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g:\n", norm_offset_x);
+
+                                    shouldReturn |= determine_validation_error_1D<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                                     expected, error, xOffsetValues[j], norm_offset_x, j, numTries, numClamped, false, lod );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                        }//norm_offset_x
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+            }
+        }
+            /*
+             * INT output type
+             */
+        else
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t x = 0, j = 0; x < width_lod; x++, j++ )
+            {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                                if ( gTestMipmaps )
+                                    sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                    xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                    imageSampler, expected, lod);
+                                else
+                                    sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                    xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                    imageSampler, expected );
+
+                            error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                         errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                            if (error <= MAX_ERR)
+                                found_pixel = 1;
+                    }//norm_offset_x
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                if ( gTestMipmaps )
+                                    sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                    xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                    imageSampler, expected, lod);
+                                else
+                                    sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                    xOffsetValues[ j ], 0.0f, 0.0f, norm_offset_x, 0.0f, 0.0f,
+                                                                    imageSampler, expected );
+
+
+                                error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                             errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g:\n", norm_offset_x);
+
+                                    shouldReturn |= determine_validation_error_1D<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                     expected, error, xOffsetValues[j], norm_offset_x, j, numTries, numClamped, false, lod );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                        }//norm_offset_x
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+            }
+        }
+        }
+        {
+            nextLevelOffset += width_lod * get_pixel_size(imageInfo->format);
+            width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
+        }
+    }
+
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                        bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+
+    // Get our operating params
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+
+    const char *KernelSourcePattern = NULL;
+
+    imageInfo.format = format;
+    imageInfo.height = 1;
+    imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+
+    if(gtestTypesToRun & kReadWriteTests)
+    {
+        KernelSourcePattern = read_write1DKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = read1DKernelSourcePattern;
+    }
+    sprintf( programSrc,
+            KernelSourcePattern,
+            samplerArg, get_explicit_type_name( outputType ),
+            gTestMipmaps ? ", float lod" : "",
+            samplerVar,
+            floatCoords ? float1DKernelSource : int1DCoordKernelSource,
+            readFormat,
+            gTestMipmaps ? ", lod" : "" );
+
+    ptr = programSrc;
+
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    if(error)
+    {
+        exit(1);
+    }
+    test_error( error, "Unable to create testing kernel" );
+
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            if(gTestMipmaps)
+                imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed);
+
+            if( gDebugTrace )
+                log_info( "   at size %d\n", (int)imageInfo.width );
+
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d\n", (int)sizes[ idx ][ 0 ]);
+            if(gTestMipmaps)
+                imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed);
+            if( gDebugTrace )
+                log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
+        typeRange /= get_pixel_size( imageInfo.format ) / get_format_type_size( imageInfo.format );
+        imageInfo.width = (size_t)( ( typeRange + 255LL ) / 256LL );
+
+        while( imageInfo.width >= maxWidth / 2 )
+            imageInfo.width >>= 1;
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+        gRoundingStartValue = 0;
+        do
+        {
+            if( gDebugTrace )
+                log_info( "   at size %d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, gRoundingStartValue, typeRange );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+
+            gRoundingStartValue += imageInfo.width * pixelSize / get_format_type_size( imageInfo.format );
+
+        } while( gRoundingStartValue < typeRange );
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if(gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed);
+                    size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
+                }
+                else
+                {
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+                    }
+
+                    size = (size_t)imageInfo.rowPitch * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
new file mode 100644
index 00000000..2cd7f8a6
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -0,0 +1,1284 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+#include <signal.h>
+#include <sys/signal.h>
+#include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_device_type    gDeviceType;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern uint64_t gRoundingStartValue;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+const char *read1DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+"}";
+
+const char *read_write1DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_write image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s )\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, coords %s);\n"
+"}";
+
+const char *offset1DArrayKernelSource =
+"   int offset = tidY*get_image_width(input) + tidX;\n";
+
+const char *offset1DArrayLodKernelSource =
+"   int lod_int = (int)lod;\n"
+"   int width_lod = (get_image_width(input) >> lod_int) ? (get_image_width(input) >> lod_int): 1;\n"
+"   int offset = tidY*width_lod + tidX;\n";
+
+const char *intCoordKernelSource1DArray =
+"   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
+
+const char *floatKernelSource1DArray =
+"   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                   int x, int y, int z, float *outData );
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
+                                   int x, int y, int z, float *outData , int lod);
+
+template <class T> int determine_validation_error_1D_arr( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                  T *resultPtr, T * expected, float error,
+                                                  float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
+{
+    int actualX, actualY;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, ignoreMe;
+
+    // FIXME: I do not believe this is correct for 1D or 2D image arrays;
+    //        it will report spurious validation failure reasons since
+    //        the clamping for such image objects is different than 1D-3D
+    //        image objects.
+    clamped = get_integer_coords_offset( x, y, 0.0f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->arraySize, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->arraySize );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->arraySize );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
+                              (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                              (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return 1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        if( printAsFloat )
+        {
+            log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
+                      (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                      (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+        }
+        else
+        {
+            log_error( "Sample %d: coord {%f(%a), %f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
+                      (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                      (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+        }
+        log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->arraySize, imageInfo->rowPitch );
+        if( clamped )
+        {
+            log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
+        }
+        if( printAsFloat && gExtraValidateInfo)
+        {
+            log_error( "Nearby values:\n" );
+            log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+            for( int yOff = -2; yOff <= 1; yOff++ )
+            {
+                float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+            }
+
+            if( clampedY < 1 )
+            {
+                log_error( "Nearby values:\n" );
+                log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
+                for( int yOff = (int)imageInfo->arraySize - 2; yOff <= (int)imageInfo->arraySize + 1; yOff++ )
+                {
+                    float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
+                    read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
+                    log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
+                    log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                    log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
+                    log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
+                }
+            }
+        }
+
+        if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+        {
+            if( found )
+                log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
+            else
+                log_error( "\tValue not actually found in image\n" );
+        }
+        log_error( "\n" );
+
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d , int lod)
+{
+    size_t i = 0;
+    size_t width_lod = imageInfo->width;
+
+    if(gTestMipmaps)
+        width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
+
+    if( gDisableOffsets )
+    {
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) x);
+                yOffsets[ i ] = (float) (yfract + (double) y);
+            }
+        }
+    }
+    else
+    {
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+            }
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double)width_lod - 1.0);
+                yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->arraySize - 1.0);
+            }
+        }
+    }
+
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t y = 0; y < imageInfo->arraySize; y++ )
+        {
+            for( size_t x = 0; x < width_lod; x++, i++ )
+            {
+                xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod);
+            }
+        }
+    }
+}
+
+#ifndef MAX
+#define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+
+int test_read_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                             image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                             bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    static int initHalf = 0;
+
+    size_t threads[2];
+    cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
+    clMemWrapper xOffsets, yOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // The DataBuffer template class really does use delete[], not free -- IRO
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->arraySize));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->arraySize));
+
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if( gDebugTrace )
+    {
+        log_info( " - Creating 1D image array %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
+        if(gTestMipmaps)
+            log_info("  - and %d mip levels\n", (int)imageInfo->num_mip_levels);
+    }
+
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        image_read_write_flags = CL_MEM_READ_ONLY;
+    }
+    else
+    {
+        image_read_write_flags = CL_MEM_READ_WRITE;
+    }
+
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+
+            unprotImage = create_image_1d_array(context,
+                                                image_read_write_flags | CL_MEM_USE_HOST_PTR,
+                                                imageInfo->format,
+                                                imageInfo->width, imageInfo->arraySize,
+                                                ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                                ( gEnablePitch ? imageInfo->slicePitch : 0),
+                                                maxImageUseHostPtrBackingStore, &error);
+        } else {
+            error = protImage.Create( context, CL_MEM_OBJECT_IMAGE1D_ARRAY,
+                                    image_read_write_flags,
+                                    imageInfo->format,
+                imageInfo->width, 1, 1, imageInfo->arraySize );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n",
+                      (int)imageInfo->width, (int)imageInfo->arraySize,
+                      (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_1d_array(context,
+                                            image_read_write_flags | CL_MEM_COPY_HOST_PTR,
+                                            imageInfo->format,
+                                            imageInfo->width, imageInfo->arraySize,
+                                            ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                            ( gEnablePitch ? imageInfo->slicePitch : 0),
+                                            imageValues, &error);
+
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n",
+                      (int)imageInfo->width, (int)imageInfo->arraySize,
+                      (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        if(gTestMipmaps)
+        {
+            cl_image_desc image_desc = {0};
+            image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+            image_desc.image_width = imageInfo->width;
+            image_desc.image_array_size = imageInfo->arraySize;
+            image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+            unprotImage = clCreateImage( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, &image_desc, NULL, &error);
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create %d level mipmapped 1D image array of size %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+        }
+        else
+        {
+            unprotImage = create_image_1d_array(context,
+                                                image_read_write_flags | gMemFlagsToUse,
+                                                imageInfo->format,
+                                                imageInfo->width, imageInfo->arraySize,
+                                                ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                                ( gEnablePitch ? imageInfo->slicePitch : 0),
+                                                imageValues, &error);
+
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 1D image array of size %d x %d pitch %d (%s)\n",
+                          (int)imageInfo->width, (int)imageInfo->arraySize,
+                          (int)imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+        }
+        image = unprotImage;
+    }
+
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 };
+
+        if(gTestMipmaps)
+        {
+            int nextLevelOffset = 0;
+
+            for (int i =0; i < imageInfo->num_mip_levels; i++)
+            {   origin[2] = i;
+                error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                            origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                                            ((char*)imageValues + nextLevelOffset), 0, NULL, NULL);
+                if (error != CL_SUCCESS)
+                {
+                    log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                    return error;
+                }
+                nextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
+                //Subsequent mip level dimensions keep halving
+                region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
+            }
+        }
+        else
+        {
+            error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                        origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
+                                        imageValues, 0, NULL, NULL);
+            if (error != CL_SUCCESS)
+            {
+                log_error( "ERROR: Unable to write to %d level 1D image array of size %d x %d\n",
+                          (int)imageInfo->num_mip_levels,
+                          (int)imageInfo->width, (int)imageInfo->arraySize );
+                return error;
+            }
+        }
+    }
+
+    if( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+        sizeof( cl_float ) * imageInfo->width * imageInfo->arraySize, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+        get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->arraySize, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, imageSampler->normalized_coords,
+        CL_SAMPLER_ADDRESSING_MODE, imageSampler->addressing_mode,
+        CL_SAMPLER_FILTER_MODE, imageSampler->filter_mode,
+        0, 0, 0 };
+    if (gTestMipmaps) {
+        properties[6] = CL_SAMPLER_MIP_FILTER_MODE;
+        properties[7] = imageSampler->filter_mode;
+    }
+    actualSampler = clCreateSamplerWithProperties(context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // A cast of troublesome offsets. The first one has to be zero.
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+
+    // Get the maximum absolute error for this format
+    if(gtestTypesToRun & kReadWriteTests)
+    {
+        loopCount = 1;
+    }
+
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    size_t width_lod = imageInfo->width;
+    size_t nextLevelOffset = 0;
+    char * imagePtr;
+    for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        size_t resultValuesSize = width_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
+        float lod_float = (float)lod;
+        if (gTestMipmaps) {
+            //Set the lod kernel arg
+            if(gDebugTrace)
+                log_info(" - Working at mip level %d\n", lod);
+            error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float);
+            test_error( error, "Unable to set kernel arguments" );
+        }
+
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+
+        // Init the coordinates
+        InitFloatCoords(imageInfo, imageSampler, xOffsetValues, yOffsetValues,
+                        q>=float_offset_count ? -offset: offset,
+                        q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
+
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->arraySize * imageInfo->width, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->arraySize * imageInfo->width, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+
+        // Get results
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+        // Run the kernel
+        threads[0] = (size_t)width_lod;
+        threads[1] = (size_t)imageInfo->arraySize;
+        error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+
+        if( gDebugTrace )
+            log_info( "    reading results, %ld kbytes\n", (unsigned long)( width_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4 / 1024 ) );
+
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+
+        // Validate results element by element
+        imagePtr = (char*)imageValues + nextLevelOffset;
+        /*
+         * FLOAT output type, order= sRGB
+         */
+        if(is_sRGBA_order(imageInfo->format->image_channel_order) && ( outputType == kFloat ))
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    float offset = NORM_OFFSET;
+                    if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                        || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                        )
+                        offset = 0.0f;          // Loop only once
+
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
+
+
+                            // Try sampling the pixel, without flushing denormals.
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                imageSampler, expected, 0, &containsDenormals, lod );
+
+                            float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                            float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                            float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            float maxErr = 0.5;
+
+                            // Check if the result matches.
+                            if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                               ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                    // If implementation decide to flush subnormals to zero,
+                                    // max error needs to be adjusted
+                                    maxErr += 4 * FLT_MIN;
+
+                                    maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                               xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                               imageSampler, expected, 0, NULL, lod );
+
+                                    err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                    err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                    err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );
+                                }
+                            }
+
+                            // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                            found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
+                        }//norm_offset_x
+                    }//norm_offset_y
+
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                int containsDenormals = 0;
+                                FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                      imageSampler, expected, 0, &containsDenormals, lod );
+
+                                float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                float err4 = fabsf( resultPtr[3] - expected[3] );
+
+                                float maxErr = 0.6;
+
+                                if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                                   ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                                {
+                                    //try flushing the denormals, if there is a failure.
+                                    if( containsDenormals )
+                                    {
+                                        // If implementation decide to flush subnormals to zero,
+                                        // max error needs to be adjusted
+                                        maxErr += 4 * FLT_MIN;
+
+                                        maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                   xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                   imageSampler, expected, 0, NULL, lod );
+
+                                        err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                        err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                        err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                        err4 = fabsf( resultPtr[3] - expected[3] );
+                                    }
+                                }
+                                if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
+                                   ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                    float tempOut[4];
+                                    shouldReturn |= determine_validation_error_1D_arr<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                      expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
+
+                                    log_error( "Step by step:\n" );
+                                    FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                      imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
+                                    log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                              Ulp_Error( resultPtr[0], expected[0] ),
+                                              Ulp_Error( resultPtr[1], expected[1] ),
+                                              Ulp_Error( resultPtr[2], expected[2] ),
+                                              Ulp_Error( resultPtr[3], expected[3] ),
+                                              Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+                }
+            }
+        }
+        /*
+         * FLOAT output type
+         */
+        else if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+            for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    float offset = NORM_OFFSET;
+                    if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                        || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                        )
+                        offset = 0.0f;          // Loop only once
+
+                    for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
+
+
+                            // Try sampling the pixel, without flushing denormals.
+                            int containsDenormals = 0;
+                            FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                imageSampler, expected, 0, &containsDenormals, lod );
+
+                            float err1 = fabsf( resultPtr[0] - expected[0] );
+                            float err2 = fabsf( resultPtr[1] - expected[1] );
+                            float err3 = fabsf( resultPtr[2] - expected[2] );
+                            float err4 = fabsf( resultPtr[3] - expected[3] );
+                            // Clamp to the minimum absolute error for the format
+                            if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                            if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                            if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                            if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+                            // Check if the result matches.
+                            if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                               ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                            {
+                                //try flushing the denormals, if there is a failure.
+                                if( containsDenormals )
+                                {
+                                    // If implementation decide to flush subnormals to zero,
+                                    // max error needs to be adjusted
+                                    maxErr1 += 4 * FLT_MIN;
+                                    maxErr2 += 4 * FLT_MIN;
+                                    maxErr3 += 4 * FLT_MIN;
+                                    maxErr4 += 4 * FLT_MIN;
+
+                                    maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                               xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                               imageSampler, expected, 0, NULL, lod );
+
+                                    err1 = fabsf( resultPtr[0] - expected[0] );
+                                    err2 = fabsf( resultPtr[1] - expected[1] );
+                                    err3 = fabsf( resultPtr[2] - expected[2] );
+                                    err4 = fabsf( resultPtr[3] - expected[3] );
+                                }
+                            }
+
+                            // If the final result DOES match, then we've found a valid result and we're done with this pixel.
+                            found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                        }//norm_offset_x
+                    }//norm_offset_y
+
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                int containsDenormals = 0;
+                                FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                      imageSampler, expected, 0, &containsDenormals, lod );
+
+                                float err1 = fabsf( resultPtr[0] - expected[0] );
+                                float err2 = fabsf( resultPtr[1] - expected[1] );
+                                float err3 = fabsf( resultPtr[2] - expected[2] );
+                                float err4 = fabsf( resultPtr[3] - expected[3] );
+                                float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                   ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {
+                                    //try flushing the denormals, if there is a failure.
+                                    if( containsDenormals )
+                                    {
+                                        maxErr1 += 4 * FLT_MIN;
+                                        maxErr2 += 4 * FLT_MIN;
+                                        maxErr3 += 4 * FLT_MIN;
+                                        maxErr4 += 4 * FLT_MIN;
+
+                                        maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                   xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                   imageSampler, expected, 0, NULL, lod );
+
+                                        err1 = fabsf( resultPtr[0] - expected[0] );
+                                        err2 = fabsf( resultPtr[1] - expected[1] );
+                                        err3 = fabsf( resultPtr[2] - expected[2] );
+                                        err4 = fabsf( resultPtr[3] - expected[3] );
+                                    }
+                                }
+                                if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
+                                   ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                    float tempOut[4];
+                                    shouldReturn |= determine_validation_error_1D_arr<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                      expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
+
+                                    log_error( "Step by step:\n" );
+                                    FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                                      imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
+                                    log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                              Ulp_Error( resultPtr[0], expected[0] ),
+                                              Ulp_Error( resultPtr[1], expected[1] ),
+                                              Ulp_Error( resultPtr[2], expected[2] ),
+                                              Ulp_Error( resultPtr[3], expected[3] ),
+                                              Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                            sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                    xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                    imageSampler, expected, lod );
+
+
+                            error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                           errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                            if (error <= MAX_ERR)
+                                found_pixel = 1;
+                        }//norm_offset_x
+                    }//norm_offset_y
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                        xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                                        imageSampler, expected, lod );
+
+
+                                error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                               errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                    shouldReturn |= determine_validation_error_1D_arr<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+                }
+            }
+        }
+        /*
+         * INT output type
+         */
+        else
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t y = 0, j = 0; y < imageInfo->arraySize; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, j++ )
+                {
+                    // Step 1: go through and see if the results verify for the pixel
+                    // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                    // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                    int checkOnlyOnePixel = 0;
+                    int found_pixel = 0;
+                    for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                        for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                            // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                            // E.g., test one pixel.
+                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                norm_offset_x = 0.0f;
+                                norm_offset_y = 0.0f;
+                                checkOnlyOnePixel = 1;
+                            }
+
+                            sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                           xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                           imageSampler, expected, lod );
+
+
+                            error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                           errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                            if (error <= MAX_ERR)
+                                found_pixel = 1;
+                        }//norm_offset_x
+                    }//norm_offset_y
+
+                    // Step 2: If we did not find a match, then print out debugging info.
+                    if (!found_pixel) {
+                        // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        checkOnlyOnePixel = 0;
+                        int shouldReturn = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+
+                                // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                // E.g., test one pixel.
+                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    norm_offset_x = 0.0f;
+                                    norm_offset_y = 0.0f;
+                                    checkOnlyOnePixel = 1;
+                                }
+
+                                sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                               xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
+                                                               imageSampler, expected, lod );
+
+
+                                error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                               errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                if( error > MAX_ERR )
+                                {
+                                    log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
+
+                                    shouldReturn |= determine_validation_error_1D_arr<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
+                                } else {
+                                    log_error("Test error: we should have detected this passing above.\n");
+                                }
+                            }//norm_offset_x
+                        }//norm_offset_y
+                        if( shouldReturn )
+                            return 1;
+                    } // if (!found_pixel)
+
+                    resultPtr += 4;
+                }
+                }
+            }
+        }
+        {
+            nextLevelOffset += width_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format);
+            width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
+        }
+    }
+
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                                 bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+    const char *KernelSourcePattern = NULL;
+
+    // Get our operating params
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.depth = imageInfo.height = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D array size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        KernelSourcePattern = read1DArrayKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = read_write1DArrayKernelSourcePattern;
+    }
+
+    sprintf( programSrc,
+            KernelSourcePattern,
+            samplerArg, get_explicit_type_name( outputType ),
+            gTestMipmaps ? ", float lod" : "",
+            samplerVar,
+            gTestMipmaps ? offset1DArrayLodKernelSource : offset1DArrayKernelSource,
+            floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray,
+            readFormat,
+            gTestMipmaps ? ", lod" : "" );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.slicePitch = imageInfo.width * pixelSize;
+            for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if(gTestMipmaps)
+                    imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed);
+
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+
+                int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ]; // 3rd dimension in get_max_sizes
+            imageInfo.rowPitch = imageInfo.slicePitch = imageInfo.width * pixelSize;
+            log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
+            if(gTestMipmaps)
+                imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
+        typeRange /= pixelSize / get_format_type_size( imageInfo.format );
+        imageInfo.arraySize = (size_t)( ( typeRange + 255LL ) / 256LL );
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.arraySize );
+        while( imageInfo.arraySize >= maxArraySize / 2 )
+        {
+            imageInfo.width <<= 1;
+            imageInfo.arraySize >>= 1;
+        }
+
+        while( imageInfo.width >= maxWidth / 2 )
+            imageInfo.width >>= 1;
+        imageInfo.rowPitch = imageInfo.slicePitch = imageInfo.width * pixelSize;
+
+        gRoundingStartValue = 0;
+        do
+        {
+            if( gDebugTrace )
+                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.arraySize, gRoundingStartValue, typeRange );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+
+            gRoundingStartValue += imageInfo.width * imageInfo.arraySize * pixelSize / get_format_type_size( imageInfo.format );
+
+        } while( gRoundingStartValue < typeRange );
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if(gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), seed);
+                    size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
+                }
+                else
+                {
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+                    }
+                    imageInfo.slicePitch = imageInfo.rowPitch;
+
+                    size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
new file mode 100644
index 00000000..61af6925
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -0,0 +1,1502 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool         gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_device_type   gDeviceType;
+extern bool         gUseKernelSamplers;
+extern cl_filter_mode   gFilterModeToUse;
+extern cl_addressing_mode   gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+// Utility function to clamp down image sizes for certain tests to avoid
+// using too much memory.
+static size_t reduceImageSizeRange(size_t maxDimSize) {
+  size_t DimSize = maxDimSize/128;
+  if (DimSize < (size_t) 16)
+    return 16;
+  else if (DimSize > (size_t) 64)
+    return 64;
+  else
+    return DimSize;
+}
+
+static size_t reduceImageDepth(size_t maxDepth) {
+  size_t Depth = maxDepth/32;
+  if (Depth < (size_t) 8)
+    return 8;
+  else if (Depth > (size_t) 32)
+    return 32;
+  else
+    return Depth;
+}
+
+const char *read2DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s%s *results %s )\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+"}";
+
+const char *read_write2DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s%s *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, coords %s);\n"
+"}";
+
+const char* offset2DarraySource ="   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n";
+const char* offset2DarraySourceLod =
+    "   int lod_int = (int)lod;\n"
+    "   int width_lod, height_lod;\n"
+    "   width_lod = (get_image_width(input) >> lod_int ) ? (get_image_width(input) >> lod_int ) : 1;\n"
+    "   height_lod = (get_image_height(input) >> lod_int ) ? (get_image_height(input) >> lod_int ) : 1;\n"
+    "   int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
+
+const char *int2DArrayCoordKernelSource =
+"   int4 coords = (int4)( (int) xOffsets[offset], (int) yOffsets[offset], (int) zOffsets[offset], 0 );\n";
+
+const char *float2DArrayUnnormalizedCoordKernelSource =
+"   float4 coords = (float4)( xOffsets[offset], yOffsets[offset], zOffsets[offset], 0.0f );\n";
+
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_offset_2D_array( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                         T *resultPtr, T * expected, float error,
+                                                         float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
+{
+    int actualX, actualY, actualZ;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, clampedZ;
+
+    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->arraySize;
+
+    clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "\nERROR: TEST FAILED! Read is erroneously clamping coordinates!\n" );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
+                               "   Expected (%g,%g,%g,%g)\n"
+                               "   Observed (%g,%g,%g,%g)\n"
+                               "   error of %g\n",
+                               j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                               (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
+                               "   Expected (%x,%x,%x,%x)\n"
+                               "   Observed (%x,%x,%x,%x)\n",
+                               j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                               (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+
+                if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+                {
+                  if( found )
+                    log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
+                  else
+                    log_error( "\tValue not actually found in image\n" );
+                }
+                log_error( "\n" );
+
+                return -1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                log_error( "\nERROR: TEST FAILED: Clamping is erroneously returning border color!\n" );
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
+                               "   Expected (%g,%g,%g,%g)\n"
+                               "   Observed (%g,%g,%g,%g)\n"
+                               "   error of %g\n",
+                               j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                               (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
+                               "   Expected (%x,%x,%x,%x)\n"
+                               "   Observed (%x,%x,%x,%x)\n",
+                               j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                               (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                return -1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        if( true ) // gExtraValidateInfo )
+        {
+            if( printAsFloat )
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
+                    "   Expected (%g,%g,%g,%g)\n"
+                    "   Observed (%g,%g,%g,%g)\n"
+                    "   error of %g\n",
+                    j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                    (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+            }
+            else
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
+                    "   Expected (%x,%x,%x,%x)\n"
+                    "   Observed (%x,%x,%x,%x)\n",
+                    j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                    (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+            }
+            log_error( "Integer coords resolve to %d,%d,%d, image size = %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth );
+
+            if( printAsFloat && gExtraValidateInfo )
+            {
+                log_error( "\nNearby values:\n" );
+                for( int zOff = -1; zOff <= 1; zOff++ )
+                {
+                    for( int yOff = -1; yOff <= 1; yOff++ )
+                    {
+                        float top[ 4 ], real[ 4 ], bot[ 4 ];
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot );
+                        log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                        log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                        log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] );
+                    }
+                }
+            }
+            if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+            {
+                if( found )
+                    log_error( "Value really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
+                else
+                    log_error( "Value not actually found in image\n" );
+            }
+            log_error( "\n" );
+        }
+
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+            return -1;
+    }
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod)
+{
+    size_t i = 0;
+    size_t width_lod = imageInfo->width;
+    size_t height_lod = imageInfo->height;
+    if(gTestMipmaps)
+    {
+        width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod)  : 1;
+        height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod)  : 1;
+
+    }
+    if( gDisableOffsets )
+    {
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < height_lod; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) x);
+                    yOffsets[ i ] = (float) (yfract + (double) y);
+                    zOffsets[ i ] = (float) (zfract + (double) z);
+                }
+            }
+        }
+    }
+    else
+    {
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < height_lod; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                    yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+                    zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d )));
+                }
+            }
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < height_lod; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
+                    yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) height_lod - 1.0);
+                    zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->arraySize - 1.0);
+                }
+            }
+        }
+    }
+
+    if( normalized_coords )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->arraySize; z++ )
+        {
+            for( size_t y = 0; y < height_lod; y++ )
+            {
+                for( size_t x = 0; x < width_lod; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod);
+                    yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) height_lod);
+                }
+            }
+        }
+    }
+}
+
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+int test_read_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                       image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[3];
+    static int initHalf = 0;
+    cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
+
+    clMemWrapper xOffsets, yOffsets, zOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // Create offset data
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
+    BufferOwningPtr<cl_float> zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
+
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        image_read_write_flags = CL_MEM_READ_ONLY;
+    }
+    else
+    {
+        image_read_write_flags = CL_MEM_READ_WRITE;
+    }
+
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            unprotImage = create_image_2d_array( context,
+                                                image_read_write_flags | CL_MEM_USE_HOST_PTR,
+                                                imageInfo->format,
+                                                imageInfo->width, imageInfo->height,
+                                                imageInfo->arraySize,
+                                                ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                                ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                          maxImageUseHostPtrBackingStore, &error );
+        } else {
+            error = protImage.Create( context, CL_MEM_OBJECT_IMAGE2D_ARRAY,
+                                    image_read_write_flags,
+                                    imageInfo->format, imageInfo->width, imageInfo->height, 1, imageInfo->arraySize );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_2d_array( context,
+                                            image_read_write_flags | CL_MEM_COPY_HOST_PTR,
+                                            imageInfo->format,
+                                            imageInfo->width,
+                                            imageInfo->height,
+                                            imageInfo->arraySize,
+                                            ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                            ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        if ( gTestMipmaps )
+        {
+            cl_image_desc image_desc = {0};
+            image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+            image_desc.image_width = imageInfo->width;
+            image_desc.image_height = imageInfo->height;
+            image_desc.image_array_size = imageInfo->arraySize;
+            //image_desc.image_rowPitch = imageInfo->rowPitch;
+            //image_desc.image_slicePitch = imageInfo->slicePitch;
+            image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+            unprotImage = clCreateImage( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, &image_desc, NULL, &error);
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create %d level mipmapped 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+        }
+        else
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            unprotImage = create_image_2d_array( context,
+                                          image_read_write_flags | gMemFlagsToUse,
+                                          imageInfo->format,
+                                          imageInfo->width, imageInfo->height, imageInfo->arraySize,
+                                          ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                          ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                          imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+        }
+        image = unprotImage;
+    }
+
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+
+        size_t origin[ 4 ] = { 0, 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
+        size_t tmpNextLevelOffset = 0;
+
+        if( gTestMipmaps )
+        {
+            for(int level = 0; level < imageInfo->num_mip_levels; level++)
+            {
+                origin[3] = level;
+                error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                            origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                                            imageValues + tmpNextLevelOffset, 0, NULL, NULL);
+                if (error != CL_SUCCESS)
+                {
+                    log_error( "ERROR: Unable to write to level %d of 2D image array of size %d x %d x %d\n", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
+                    return error;
+                }
+
+                tmpNextLevelOffset += region[0]*region[1]*region[2]*get_pixel_size(imageInfo->format);
+                region[0] = ( region[0] >> 1 ) ? ( region[0] >> 1 ) : 1;
+                region[1] = ( region[1] >> 1 ) ? ( region[1] >> 1 ) : 1;
+            }
+       }
+       else
+       {
+            error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                        origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0,
+                                        imageValues, 0, NULL, NULL);
+            if (error != CL_SUCCESS)
+            {
+                log_error( "ERROR: Unable to write to 2D image array of size %d x %d x %d\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
+                return error;
+            }
+        }
+    }
+
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->arraySize, zOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->arraySize, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, imageSampler->normalized_coords,
+        CL_SAMPLER_ADDRESSING_MODE, imageSampler->addressing_mode,
+        CL_SAMPLER_FILTER_MODE, imageSampler->filter_mode,
+        0, 0, 0 };
+    if (gTestMipmaps) {
+        properties[6] = CL_SAMPLER_MIP_FILTER_MODE;
+        properties[7] = imageSampler->filter_mode;
+    }
+    actualSampler = clCreateSamplerWithProperties(context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+    size_t nextLevelOffset = 0;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
+    for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
+    {
+        size_t resultValuesSize = width_lod * height_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc( resultValuesSize ));
+        float lod_float = (float)lod;
+        if( gTestMipmaps )
+        {
+            if(gDebugTrace)
+                log_info(" - Working at mip level %d\n", lod);
+            error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
+        }
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues,
+                        q>=float_offset_count ? -offset: offset,
+                        q>=float_offset_count ? offset: -offset,
+                        q>=float_offset_count ? -offset: offset,
+                        imageSampler->normalized_coords, d, lod );
+
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, zOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write z offsets" );
+
+
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+        // Figure out thread dimensions
+        threads[0] = (size_t)width_lod;
+        threads[1] = (size_t)height_lod;
+        threads[2] = (size_t)imageInfo->arraySize;
+
+        // Run the kernel
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+
+        // Get results
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+
+        // Validate results element by element
+        char *imagePtr = (char *)imageValues + nextLevelOffset;
+
+        if((imageInfo->format->image_channel_order == CL_DEPTH) && (outputType == kFloat) )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                            )
+                            offset = 0.0f;          // Loop only once
+
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+
+                                    float err1 = fabsf( resultPtr[0] - expected[0] );
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+
+                                    if( ! (err1 <= maxErr1) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                            maxErr1 += 4 * FLT_MIN;
+
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+
+                                            err1 = fabsf( resultPtr[0] - expected[0] );
+                                        }
+                                    }
+
+                                    found_pixel = (err1 <= maxErr1);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+
+                                        float err1 = fabsf( resultPtr[0] - expected[0] );
+                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+
+
+                                        if( ! (err1 <= maxErr1) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                maxErr1 += 4 * FLT_MIN;
+
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+
+                                                err1 = fabsf( resultPtr[0] - expected[0] );
+                                            }
+                                        }
+
+                                        if( ! (err1 <= maxErr1) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset_2D_array<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 1;
+                    }
+                }
+            }
+        }
+        /*
+         * FLOAT output type, order=CL_sRGBA, CL_sRGB, CL_sRGBx, CL_BGRA
+         */
+        else if(is_sRGBA_order(imageInfo->format->image_channel_order) && (outputType == kFloat) )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                            )
+                            offset = 0.0f;          // Loop only once
+
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+
+                                    float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                    float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                    float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                    float err4 = fabsf( resultPtr[3] - expected[3] );
+                                    float maxErr = 0.5;
+
+                                    if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                              maxErr += 4 * FLT_MIN;
+
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+
+                                            err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                            err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                            err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                            err4 = fabsf( resultPtr[3] - expected[3] );
+                                        }
+                                    }
+
+                                    found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+
+                                        float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                        float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                        float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                                        float maxErr = 0.6;
+
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                // If implementation decide to flush subnormals to zero,
+                                                // max error needs to be adjusted
+                                                maxErr += 4 * FLT_MIN;
+
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+
+                                                err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                                err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                                err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                                err4 = fabsf( resultPtr[3] - expected[3] );
+                                            }
+                                        }
+
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset_2D_array<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * FLOAT output type
+         */
+        else if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                            )
+                            offset = 0.0f;          // Loop only once
+
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+
+                                    float err1 = fabsf( resultPtr[0] - expected[0] );
+                                    float err2 = fabsf( resultPtr[1] - expected[1] );
+                                    float err3 = fabsf( resultPtr[2] - expected[2] );
+                                    float err4 = fabsf( resultPtr[3] - expected[3] );
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                    float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                    float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                    float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+                                    if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                            maxErr1 += 4 * FLT_MIN;
+                                            maxErr2 += 4 * FLT_MIN;
+                                            maxErr3 += 4 * FLT_MIN;
+                                            maxErr4 += 4 * FLT_MIN;
+
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+
+                                            err1 = fabsf( resultPtr[0] - expected[0] );
+                                            err2 = fabsf( resultPtr[1] - expected[1] );
+                                            err3 = fabsf( resultPtr[2] - expected[2] );
+                                            err4 = fabsf( resultPtr[3] - expected[3] );
+                                        }
+                                    }
+
+                                    found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+
+                                        float err1 = fabsf( resultPtr[0] - expected[0] );
+                                        float err2 = fabsf( resultPtr[1] - expected[1] );
+                                        float err3 = fabsf( resultPtr[2] - expected[2] );
+                                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                maxErr1 += 4 * FLT_MIN;
+                                                maxErr2 += 4 * FLT_MIN;
+                                                maxErr3 += 4 * FLT_MIN;
+                                                maxErr4 += 4 * FLT_MIN;
+
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+
+                                                err1 = fabsf( resultPtr[0] - expected[0] );
+                                                err2 = fabsf( resultPtr[1] - expected[1] );
+                                                err3 = fabsf( resultPtr[2] - expected[2] );
+                                                err4 = fabsf( resultPtr[3] - expected[3] );
+                                            }
+                                        }
+
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset_2D_array<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+
+                                        if(gTestMipmaps)
+                                            sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                    imageSampler, expected, lod );
+                                        else
+                                            sample_image_pixel_offset<unsigned int>( imageValues, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                    imageSampler, expected );
+
+                                    error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+
+                                            if(gTestMipmaps)
+                                                sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                        norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                        imageSampler, expected, lod );
+                                            else
+                                                sample_image_pixel_offset<unsigned int>( imageValues, imageInfo,
+                                                                                        xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                        norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                        imageSampler, expected );
+
+                                        error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset_2D_array<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                             norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                             j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        else
+        /*
+         * INT output type
+         */
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+
+                                        if(gTestMipmaps)
+                                            sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                           xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                           norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                           imageSampler, expected, lod );
+                                        else
+                                            sample_image_pixel_offset<int>( imageValues, imageInfo,
+                                                                           xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                           norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                           imageSampler, expected );
+
+                                    error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+
+                                            if(gTestMipmaps)
+                                                sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                               xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                               norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                               imageSampler, expected, lod );
+                                            else
+                                                sample_image_pixel_offset<int>( imageValues, imageInfo,
+                                                                               xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                               norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                               imageSampler, expected, 0 );
+
+                                        error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset_2D_array<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                    j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        }
+        {
+            nextLevelOffset += width_lod * height_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format);
+            width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
+            height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
+        }
+    }
+
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                           bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    RandomSeed seed( gRandomSeed );
+
+    const char *KernelSourcePattern = NULL;
+
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    // Get operating parameters
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+
+    const char *imageType;
+    const char *imageElement;
+    if (format->image_channel_order == CL_DEPTH)
+    {
+        imageType = "image2d_array_depth_t";
+        imageElement = "";
+    }
+    else
+    {
+        imageType = "image2d_array_t";
+        imageElement = "4";
+    }
+
+    // Construct the source
+    if(gtestTypesToRun & kReadTests)
+    {
+        KernelSourcePattern = read2DArrayKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = read_write2DArrayKernelSourcePattern;
+    }
+
+    // Construct the source
+    sprintf( programSrc,
+            KernelSourcePattern,
+            imageType,
+            samplerArg, get_explicit_type_name( outputType ),
+            imageElement,
+            gTestMipmaps ? ", float lod" : " ",
+            samplerVar,
+            gTestMipmaps ? offset2DarraySourceLod : offset2DarraySource,
+            floatCoords ? float2DArrayUnnormalizedCoordKernelSource : int2DArrayCoordKernelSource,
+            readFormat,
+            gTestMipmaps ? ", lod" : " " );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Run tests
+
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    if( gTestMipmaps )
+                        imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
+
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            if( gTestMipmaps )
+                imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
+            cl_ulong size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            while(  size > maxAllocSize || ( size * 3 ) > memSize )
+            {
+                if(imageInfo.arraySize == 1)
+                {
+                    // ArraySize cannot be 0.
+                    break;
+                }
+                imageInfo.arraySize--;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            }
+
+            while(  size > maxAllocSize || ( size * 3 ) > memSize )
+            {
+                imageInfo.height--;
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            }
+            log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.arraySize = 2;
+
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
+        int maxHeighthRange = (int) reduceImageSizeRange(maxHeight);
+        int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
+
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, maxHeighthRange, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                if( gTestMipmaps )
+                {
+                    imageInfo.num_mip_levels = random_in_range(2,compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1, seed);
+                    //Need to take into account the output buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
+                    size = (cl_ulong) 4*compute_mipmapped_image_size( imageInfo ) * get_explicit_type_size( outputType );
+                }
+                else
+                {
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+
+                        size_t extraHeight = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                    }
+
+                    size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+            {
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+                if ( gTestMipmaps )
+                    log_info("  and %d mip levels\n", (int) imageInfo.num_mip_levels);
+            }
+            int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_read_3D.cpp b/test_conformance/images/kernel_read_write/test_read_3D.cpp
new file mode 100644
index 00000000..7cf2a4f5
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp
@@ -0,0 +1,1328 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_device_type    gDeviceType;
+extern bool            gUseKernelSamplers;
+extern cl_filter_mode    gFilterModeToUse;
+extern cl_addressing_mode    gAddressModeToUse;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int gtestTypesToRun;
+
+#define MAX_TRIES               1
+#define MAX_CLAMPED             1
+
+// Utility function to clamp down image sizes for certain tests to avoid
+// using too much memory.
+static size_t reduceImageSizeRange(size_t maxDimSize, RandomSeed& seed) {
+  size_t DimSize = random_log_in_range(16, (int) maxDimSize/32, seed);
+  if (DimSize > (size_t) 128)
+    return 128;
+  else
+    return DimSize;
+}
+
+static size_t reduceImageDepth(size_t maxDimSize, RandomSeed& seed) {
+  size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
+  if (DimSize > (size_t) 32)
+    return 32;
+  else
+    return DimSize;
+}
+
+
+const char *read3DKernelSourcePattern =
+"__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+"}";
+
+const char *read_write3DKernelSourcePattern =
+"__kernel void sample_kernel( read_write image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results %s)\n"
+"{\n"
+"%s"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"%s"
+"   results[offset] = read_image%s( input, coords %s);\n"
+"}";
+
+const char *offset3DKernelSource =
+"   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n";
+
+const char *offset3DLodKernelSource =
+"   int lod_int = (int)lod;\n"
+"   int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int): 1;\n"
+"   int height_lod = (get_image_height(input) >> lod_int) ?(get_image_height(input) >> lod_int): 1;\n"
+"   int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
+
+const char *int3DCoordKernelSource =
+"   int4 coords = (int4)( (int) xOffsets[offset], (int) yOffsets[offset], (int) zOffsets[offset], 0 );\n";
+
+const char *float3DUnnormalizedCoordKernelSource =
+"   float4 coords = (float4)( xOffsets[offset], yOffsets[offset], zOffsets[offset], 0.0f );\n";
+
+
+static const char *samplerKernelArg = " sampler_t imageSampler,";
+
+#define ABS_ERROR( result, expected ) ( fabsf( (float)expected - (float)result ) )
+
+extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo, int x, int y, int z, float *outData );
+template <class T> int determine_validation_error_offset( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                                                         T *resultPtr, T * expected, float error,
+                                                         float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
+{
+    int actualX, actualY, actualZ;
+    int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod );
+    bool clampingErr = false, clamped = false, otherClampingBug = false;
+    int clampedX, clampedY, clampedZ;
+
+    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->depth;
+
+    clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ );
+
+    if( found )
+    {
+        // Is it a clamping bug?
+        if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates!\n" );
+                return -1;
+            }
+            clampingErr = true;
+            otherClampingBug = true;
+        }
+    }
+    if( clamped && !otherClampingBug )
+    {
+        // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
+        if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
+        {
+            if( (--numClamped) == 0 )
+            {
+                if( printAsFloat )
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%g,%g,%g,%g), got (%g,%g,%g,%g), error of %g\n",
+                              j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                              (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+                }
+                else
+                {
+                    log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate! Expected (%x,%x,%x,%x), got (%x,%x,%x,%x)\n",
+                              j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                              (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+                }
+                log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color!\n" );
+                return -1;
+            }
+            clampingErr = true;
+        }
+    }
+    if( !clampingErr )
+    {
+        /*        if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 || (int)y + (int)yOffsetValues[ j ] < 0 ) )
+         {
+         log_error( "NEGATIVE COORDINATE ERROR\n" );
+         return -1;
+         }
+         */
+        if( true ) // gExtraValidateInfo )
+        {
+            if( printAsFloat )
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\t     got (%g,%g,%g,%g), error of %g\n",
+                          j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
+                          (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
+            }
+            else
+            {
+                log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\t     got (%x,%x,%x,%x)\n",
+                          j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
+                          (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
+            }
+            log_error( "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth );
+
+            if( printAsFloat && gExtraValidateInfo )
+            {
+                log_error( "\nNearby values:\n" );
+                for( int zOff = -1; zOff <= 1; zOff++ )
+                {
+                    for( int yOff = -1; yOff <= 1; yOff++ )
+                    {
+                        float top[ 4 ], real[ 4 ], bot[ 4 ];
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real );
+                        read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot );
+                        log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
+                        log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
+                        log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] );
+                    }
+                }
+            }
+            //        }
+            //        else
+            //            log_error( "\n" );
+            if( imageSampler->filter_mode != CL_FILTER_LINEAR )
+            {
+                if( found )
+                    log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
+                else
+                    log_error( "\tValue not actually found in image\n" );
+            }
+            log_error( "\n" );
+        }
+
+        numClamped = -1; // We force the clamped counter to never work
+        if( ( --numTries ) == 0 )
+            return -1;
+    }
+    return 0;
+}
+
+#define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
+
+static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod)
+{
+    size_t i = 0;
+    if( gDisableOffsets )
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) x);
+                    yOffsets[ i ] = (float) (yfract + (double) y);
+                    zOffsets[ i ] = (float) (zfract + (double) z);
+                }
+            }
+        }
+    }
+    else
+    {
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
+                    yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
+                    zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d )));
+                }
+            }
+        }
+    }
+
+    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
+    {
+        i = 0;
+        for( size_t z = 0; z < imageInfo->depth; z++ )
+        {
+            for( size_t y = 0; y < imageInfo->height; y++ )
+            {
+                for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                {
+                    xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
+                    yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) imageInfo->height - 1.0);
+                    zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->depth - 1.0);
+                }
+            }
+        }
+    }
+
+    if( normalized_coords || gTestMipmaps)
+    {
+        i = 0;
+        if (lod == 0)
+        {
+            for( size_t z = 0; z < imageInfo->depth; z++ )
+            {
+                for( size_t y = 0; y < imageInfo->height; y++ )
+                {
+                    for( size_t x = 0; x < imageInfo->width; x++, i++ )
+                    {
+                        xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
+                        yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
+                        zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) imageInfo->depth);
+                    }
+                }
+            }
+        }
+        else if (gTestMipmaps)
+        {
+            size_t width_lod, height_lod, depth_lod;
+
+            width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
+            height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
+            depth_lod = (imageInfo->depth >> lod)?(imageInfo->depth >> lod):1;
+
+            for( size_t z = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, i++ )
+                    {
+                        xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod);
+                        yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) height_lod);
+                        zOffsets[ i ] = (float) ((double) zOffsets[ i ] / (double) depth_lod);
+                    }
+                }
+            }
+        }
+    }
+}
+
+#ifndef MAX
+#define MAX(_a, _b)             ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+int test_read_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                       image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                       bool useFloatCoords, ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[3];
+    static int initHalf = 0;
+
+    cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
+
+    clMemWrapper xOffsets, yOffsets, zOffsets, results;
+    clSamplerWrapper actualSampler;
+    BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
+
+    // Create offset data
+    BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+    BufferOwningPtr<cl_float> zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->depth));
+
+    if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+        if( DetectFloatToHalfRoundingMode(queue) )
+            return 1;
+
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    // Construct testing sources
+    clProtectedImage protImage;
+    clMemWrapper unprotImage;
+    cl_mem image;
+
+    if(gtestTypesToRun & kReadTests)
+    {
+        image_read_write_flags = CL_MEM_READ_ONLY;
+    }
+    else
+    {
+        image_read_write_flags = CL_MEM_READ_WRITE;
+    }
+
+    if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+    {
+        // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+        // Do not use protected images for max image size test since it rounds the row size to a page size
+        if (gTestMaxImages) {
+            generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
+            unprotImage = create_image_3d(  context,
+                                            image_read_write_flags | CL_MEM_USE_HOST_PTR,
+                                            imageInfo->format,
+                                            imageInfo->width,
+                                            imageInfo->height,
+                                            imageInfo->depth,
+                                            ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                            ( gEnablePitch ? imageInfo->slicePitch : 0 ), maxImageUseHostPtrBackingStore, &error );
+        } else {
+            error = protImage.Create( context,
+                                    (cl_mem_flags)(image_read_write_flags),
+                                    imageInfo->format, imageInfo->width, imageInfo->height, imageInfo->depth );
+        }
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        if (gTestMaxImages)
+            image = (cl_mem)unprotImage;
+        else
+            image = (cl_mem)protImage;
+    }
+    else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
+    {
+        // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+        unprotImage = create_image_3d( context,
+                                      image_read_write_flags | CL_MEM_COPY_HOST_PTR,
+                                      imageInfo->format,
+                                      imageInfo->width,
+                                      imageInfo->height,
+                                      imageInfo->depth,
+                                      ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                      ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                      imageValues, &error );
+        if( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+        image = unprotImage;
+    }
+    else // Either CL_MEM_ALLOC_HOST_PTR or none
+    {
+        // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+        // it works just as if no flag is specified, so we just do the same thing either way
+        if ( !gTestMipmaps )
+        {
+            unprotImage = create_image_3d( context,
+                                          image_read_write_flags | gMemFlagsToUse,
+                                          imageInfo->format,
+                                          imageInfo->width, imageInfo->height, imageInfo->depth,
+                                          ( gEnablePitch ? imageInfo->rowPitch : 0 ),
+                                          ( gEnablePitch ? imageInfo->slicePitch : 0 ),
+                                          imageValues, &error );
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+        else
+        {
+            cl_image_desc image_desc = {0};
+            image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+            image_desc.image_width = imageInfo->width;
+            image_desc.image_height = imageInfo->height;
+            image_desc.image_depth = imageInfo->depth;
+            image_desc.num_mip_levels = imageInfo->num_mip_levels;
+
+
+            unprotImage = clCreateImage( context,
+                                        image_read_write_flags,
+                                        imageInfo->format, &image_desc, NULL, &error);
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %d x %d x %d (pitch %d, %d ) (%s)",(int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+                return error;
+            }
+            image = unprotImage;
+        }
+    }
+
+    if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
+    {
+        size_t origin[ 4 ] = { 0, 0, 0, 0};
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
+
+        if( gDebugTrace )
+            log_info( " - Writing image...\n" );
+
+        if ( !gTestMipmaps )
+        {
+
+            error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                        origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0,
+                                        imageValues , 0, NULL, NULL);
+
+            if (error != CL_SUCCESS)
+            {
+                log_error( "ERROR: Unable to write to 3D image of size %d x %d x %d \n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                return error;
+            }
+        }
+        else
+        {
+            int nextLevelOffset = 0;
+
+            for (int i =0; i < imageInfo->num_mip_levels; i++)
+            {   origin[3] = i;
+                error = clEnqueueWriteImage(queue, image, CL_TRUE,
+                                            origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                                            ((char*)imageValues + nextLevelOffset), 0, NULL, NULL);
+                if (error != CL_SUCCESS)
+                {
+                    log_error( "ERROR: Unable to write to %d level mipmapped 3D image of size %d x %d x %d\n", (int)imageInfo->num_mip_levels,(int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth );
+                    return error;
+                }
+                nextLevelOffset += region[0]*region[1]*region[2]*get_pixel_size(imageInfo->format);
+                //Subsequent mip level dimensions keep halving
+                region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
+                region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
+                region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
+            }
+        }
+    }
+
+    xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, xOffsetValues, &error );
+    test_error( error, "Unable to create x offset buffer" );
+    yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, yOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    zOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height * imageInfo->depth, zOffsetValues, &error );
+    test_error( error, "Unable to create y offset buffer" );
+    results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height * imageInfo->depth, NULL, &error );
+    test_error( error, "Unable to create result buffer" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, imageSampler->normalized_coords,
+        CL_SAMPLER_ADDRESSING_MODE, imageSampler->addressing_mode,
+        CL_SAMPLER_FILTER_MODE, imageSampler->filter_mode,
+        0, 0, 0 };
+    if (gTestMipmaps) {
+        properties[6] = CL_SAMPLER_MIP_FILTER_MODE;
+        properties[7] = imageSampler->filter_mode;
+    }
+    actualSampler = clCreateSamplerWithProperties(context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
+    test_error( error, "Unable to set kernel arguments" );
+    if( !gUseKernelSamplers )
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
+    int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
+    int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
+    int loopCount = 2 * float_offset_count;
+    if( ! useFloatCoords )
+        loopCount = 1;
+    if (gTestMaxImages) {
+        loopCount = 1;
+        log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
+    }
+
+    // Get the maximum absolute error for this format
+    double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
+    if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
+
+    if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
+        initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
+        if (initHalf) {
+            log_info("Half rounding mode successfully detected.\n");
+        }
+    }
+
+    int nextLevelOffset = 0;
+    size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
+
+    //Loop over all mipmap levels, if we are testing mipmapped images.
+    for(int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+    {
+        size_t resultValuesSize = width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4;
+        BufferOwningPtr<char> resultValues(malloc( resultValuesSize ));
+        float lod_float = (float)lod;
+        if (gTestMipmaps) {
+            //Set the lod kernel arg
+            if(gDebugTrace)
+                log_info(" - Working at mip level %d\n", lod);
+            error = clSetKernelArg( kernel, idx, sizeof( float ), &lod_float);
+            test_error( error, "Unable to set kernel arguments" );
+        }
+
+    for( int q = 0; q < loopCount; q++ )
+    {
+        float offset = float_offsets[ q % float_offset_count ];
+
+        // Init the coordinates
+        InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues,
+                        q>=float_offset_count ? -offset: offset,
+                        q>=float_offset_count ? offset: -offset,
+                        q>=float_offset_count ? -offset: offset,
+                        imageSampler->normalized_coords, d, lod );
+
+        error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, xOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write x offsets" );
+        error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, yOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write y offsets" );
+        error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->depth, zOffsetValues, 0, NULL, NULL );
+        test_error( error, "Unable to write z offsets" );
+
+
+        memset( resultValues, 0xff, resultValuesSize );
+        clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+        // Figure out thread dimensions
+        threads[0] = (size_t)width_lod;
+        threads[1] = (size_t)height_lod;
+        threads[2] = (size_t)depth_lod;
+
+        // Run the kernel
+        error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( error, "Unable to run kernel" );
+
+        // Get results
+        error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * depth_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
+        test_error( error, "Unable to read results from kernel" );
+        if( gDebugTrace )
+            log_info( "    results read\n" );
+
+        // Validate results element by element
+        char *imagePtr = (char*)imageValues + nextLevelOffset;
+        /*
+         * FLOAT output type
+         */
+        if(is_sRGBA_order(imageInfo->format->image_channel_order) && (outputType == kFloat) )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                            )
+                            offset = 0.0f;          // Loop only once
+
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+
+                                    float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                    float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                    float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                    float err4 = fabsf( resultPtr[3] - expected[3] );
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                                    float maxErr = 0.5;
+
+                                    if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                              maxErr += 4 * FLT_MIN;
+
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+
+                                            err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                            err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                            err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                            err4 = fabsf( resultPtr[3] - expected[3] );
+                                        }
+                                    }
+
+                                    found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+
+                                        float err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                        float err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                        float err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                                        float maxErr = 0.6;
+
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                // If implementation decide to flush subnormals to zero,
+                                                // max error needs to be adjusted
+                                                  maxErr += 4 * FLT_MIN;
+
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+
+                                                err1 = fabsf( sRGBmap( resultPtr[0] ) - sRGBmap( expected[0] ) );
+                                                err2 = fabsf( sRGBmap( resultPtr[1] ) - sRGBmap( expected[1] ) );
+                                                err3 = fabsf( sRGBmap( resultPtr[2] ) - sRGBmap( expected[2] ) );
+                                                err4 = fabsf( resultPtr[3] - expected[3] );
+                                            }
+                                        }
+
+                                        if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * FLOAT output type
+         */
+        else if( outputType == kFloat )
+        {
+            // Validate float results
+            float *resultPtr = (float *)(char *)resultValues;
+            float expected[4], error=0.0f;
+            float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
+
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        float offset = NORM_OFFSET;
+                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+#if defined( __APPLE__ )
+                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
+                            || gDeviceType != CL_DEVICE_TYPE_GPU
+#endif
+                            )
+                            offset = 0.0f;          // Loop only once
+
+                        for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
+
+                                    int hasDenormals = 0;
+                                    FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                          xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                          norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                          imageSampler, expected, 0, &hasDenormals, lod );
+
+                                    float err1 = fabsf( resultPtr[0] - expected[0] );
+                                    float err2 = fabsf( resultPtr[1] - expected[1] );
+                                    float err3 = fabsf( resultPtr[2] - expected[2] );
+                                    float err4 = fabsf( resultPtr[3] - expected[3] );
+                                    // Clamp to the minimum absolute error for the format
+                                    if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
+                                    if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
+                                    if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
+                                    if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
+                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                    float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                    float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                    float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+                                    if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                    {
+                                        // Try flushing the denormals
+                                        if( hasDenormals )
+                                        {
+                                            // If implementation decide to flush subnormals to zero,
+                                            // max error needs to be adjusted
+                                              maxErr1 += 4 * FLT_MIN;
+                                            maxErr2 += 4 * FLT_MIN;
+                                            maxErr3 += 4 * FLT_MIN;
+                                            maxErr4 += 4 * FLT_MIN;
+
+                                            maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                       imageSampler, expected, 0, NULL, lod );
+
+                                            err1 = fabsf( resultPtr[0] - expected[0] );
+                                            err2 = fabsf( resultPtr[1] - expected[1] );
+                                            err3 = fabsf( resultPtr[2] - expected[2] );
+                                            err4 = fabsf( resultPtr[3] - expected[3] );
+                                        }
+                                    }
+
+                                    found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        int hasDenormals = 0;
+                                        FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, expected, 0, &hasDenormals, lod );
+
+                                        float err1 = fabsf( resultPtr[0] - expected[0] );
+                                        float err2 = fabsf( resultPtr[1] - expected[1] );
+                                        float err3 = fabsf( resultPtr[2] - expected[2] );
+                                        float err4 = fabsf( resultPtr[3] - expected[3] );
+                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
+                                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
+                                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+
+
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            // Try flushing the denormals
+                                            if( hasDenormals )
+                                            {
+                                                  maxErr1 += 4 * FLT_MIN;
+                                                maxErr2 += 4 * FLT_MIN;
+                                                maxErr3 += 4 * FLT_MIN;
+                                                maxErr4 += 4 * FLT_MIN;
+
+                                                maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
+                                                                                    xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                    imageSampler, expected, 0, NULL, lod );
+
+                                                err1 = fabsf( resultPtr[0] - expected[0] );
+                                                err2 = fabsf( resultPtr[1] - expected[1] );
+                                                err3 = fabsf( resultPtr[2] - expected[2] );
+                                                err4 = fabsf( resultPtr[3] - expected[3] );
+                                            }
+                                        }
+
+                                        if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+
+                                            float tempOut[4];
+                                            shouldReturn |= determine_validation_error_offset<float>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                     expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                     norm_offset_x, norm_offset_y, norm_offset_z, j,
+                                                                                                     numTries, numClamped, true, lod );
+                                            log_error( "Step by step:\n" );
+                                            FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
+                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                              norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                              imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
+                                            log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
+                                                      Ulp_Error( resultPtr[0], expected[0] ),
+                                                      Ulp_Error( resultPtr[1], expected[1] ),
+                                                      Ulp_Error( resultPtr[2], expected[2] ),
+                                                      Ulp_Error( resultPtr[3], expected[3] ),
+                                                      Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        /*
+         * UINT output type
+         */
+        else if( outputType == kUInt )
+        {
+            // Validate unsigned integer results
+            unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
+            unsigned int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+
+                                    sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                            xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                            norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                            imageSampler, expected, lod );
+
+                                    error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+
+                                        sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
+                                                                                xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                                norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                imageSampler, expected, lod );
+
+                                        error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                             expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                             norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                             j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                    }
+                }
+            }
+        }
+        else
+        /*
+         * INT output type
+         */
+        {
+            // Validate integer results
+            int *resultPtr = (int *)(char *)resultValues;
+            int expected[4];
+            float error;
+            for( size_t z = 0, j = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    for( size_t x = 0; x < width_lod; x++, j++ )
+                    {
+                        // Step 1: go through and see if the results verify for the pixel
+                        // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
+                        // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                        int checkOnlyOnePixel = 0;
+                        int found_pixel = 0;
+                        for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                            for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                    // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                    // E.g., test one pixel.
+                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        norm_offset_x = 0.0f;
+                                        norm_offset_y = 0.0f;
+                                        norm_offset_z = 0.0f;
+                                        checkOnlyOnePixel = 1;
+                                    }
+
+                                    sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                   xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                   norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                   imageSampler, expected, lod );
+
+                                    error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                   errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                    if (error < MAX_ERR)
+                                        found_pixel = 1;
+                                }//norm_offset_z
+                            }//norm_offset_y
+                        }//norm_offset_x
+
+                        // Step 2: If we did not find a match, then print out debugging info.
+                        if (!found_pixel) {
+                            // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
+                            // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
+                            checkOnlyOnePixel = 0;
+                            int shouldReturn = 0;
+                            for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
+                                for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
+                                    for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
+
+                                        // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
+                                        // E.g., test one pixel.
+                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+                                            norm_offset_x = 0.0f;
+                                            norm_offset_y = 0.0f;
+                                            norm_offset_z = 0.0f;
+                                            checkOnlyOnePixel = 1;
+                                        }
+
+                                        sample_image_pixel_offset<int>( imagePtr, imageInfo,
+                                                                       xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
+                                                                       norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                       imageSampler, expected, lod );
+
+                                        error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
+                                                       errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
+
+                                        if( error > MAX_ERR )
+                                        {
+                                            log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
+                                            shouldReturn |=  determine_validation_error_offset<int>( imagePtr, imageInfo, imageSampler, resultPtr,
+                                                                                                    expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
+                                                                                                    norm_offset_x, norm_offset_y, norm_offset_z,
+                                                                                                    j, numTries, numClamped, false, lod );
+                                        } else {
+                                            log_error("Test error: we should have detected this passing above.\n");
+                                        }
+                                    }//norm_offset_z
+                                }//norm_offset_y
+                            }//norm_offset_x
+                            if( shouldReturn )
+                                return 1;
+                        } // if (!found_pixel)
+
+                        resultPtr += 4;
+                        }
+                    }
+                }
+            }
+        }
+        {
+            nextLevelOffset += width_lod * height_lod * depth_lod * get_pixel_size(imageInfo->format);
+            width_lod = ( width_lod >> 1) ?( width_lod >> 1) : 1;
+            height_lod = ( height_lod >> 1) ?( height_lod >> 1) : 1;
+            depth_lod = ( depth_lod >> 1) ?( depth_lod >> 1) : 1;
+        }
+    }
+
+    return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
+}
+
+int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                           bool floatCoords, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    RandomSeed seed( gRandomSeed );
+
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *KernelSourcePattern = NULL;
+
+    // Get operating parameters
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0x0 };
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( outputType == kInt )
+        readFormat = "i";
+    else if( outputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    // Construct the source
+    const char *samplerArg = samplerKernelArg;
+    char samplerVar[ 1024 ] = "";
+    if( gUseKernelSamplers )
+    {
+        get_sampler_kernel_code( imageSampler, samplerVar );
+        samplerArg = "";
+    }
+
+    // Construct the source
+    if(gtestTypesToRun & kReadTests)
+    {
+        KernelSourcePattern = read3DKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = read_write3DKernelSourcePattern;
+    }
+
+    sprintf( programSrc,
+            KernelSourcePattern,
+            samplerArg, get_explicit_type_name( outputType ),
+            gTestMipmaps? ", float lod": " ",
+            samplerVar,
+            gTestMipmaps? offset3DLodKernelSource: offset3DKernelSource,
+            floatCoords ? float3DUnnormalizedCoordKernelSource : int3DCoordKernelSource,
+            readFormat,
+            gTestMipmaps? ",lod":" ");
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    if (gTestMipmaps)
+                        imageInfo.num_mip_levels = (cl_uint) (2+rand()%(compute_max_mip_levels(imageInfo.width,imageInfo.height,imageInfo.depth) - 1));
+
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (cl_uint) (2+rand()%(compute_max_mip_levels(imageInfo.width,imageInfo.height,imageInfo.depth) - 1));
+            log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
+            if( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.depth = 2;
+
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = reduceImageSizeRange(maxWidth, seed );
+                imageInfo.height = reduceImageSizeRange(maxHeight, seed );
+                imageInfo.depth = reduceImageDepth(maxDepth, seed );
+
+                if (gTestMipmaps)
+                {
+                    //imageInfo.num_mip_levels = (cl_uint) random_log_in_range(2, (int)compute_max_mip_levels(imageInfo.width, imageInfo.depth, imageInfo.depth), seed);
+                    imageInfo.num_mip_levels = (cl_uint) (2+rand()%(compute_max_mip_levels(imageInfo.width,imageInfo.height,imageInfo.depth) - 1));
+                    //Need to take into account the output buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
+                    size = compute_mipmapped_image_size( imageInfo )*4 * get_explicit_type_size( outputType );
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+
+                        size_t extraHeight = (int)random_log_in_range( 0, 64, seed );
+                        imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                    }
+
+                    size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+            {
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+                if ( gTestMipmaps )
+                    log_info( "   and number of mip levels :%d\n", (int)imageInfo.num_mip_levels );
+            }
+            int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
+
+
+
+
diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
new file mode 100644
index 00000000..85756222
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -0,0 +1,696 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int gtestTypesToRun;
+
+const char *readwrite1DKernelSourcePattern =
+"__kernel void sample_kernel( __global %s4 *input, read_write image1d_t output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"   write_image%s( output, tidX %s, input[ offset ]);\n"
+"}";
+
+const char *write1DKernelSourcePattern =
+"__kernel void sample_kernel( __global %s4 *input, write_only image1d_t output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"   write_image%s( output, tidX %s, input[ offset ]);\n"
+"}";
+
+int test_write_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                     image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    size_t              num_flags   = 0;
+    const cl_mem_flags  *mem_flag_types = NULL;
+    const char *        *mem_flag_names = NULL;
+    const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
+    const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        mem_flag_types = write_only_mem_flag_types;
+        mem_flag_names = write_only_mem_flag_names;
+        num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
+    }
+    else
+    {
+        mem_flag_types = read_write_mem_flag_types;
+        mem_flag_names = read_write_mem_flag_names;
+        num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
+    }
+    for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
+    {
+        int error;
+        size_t threads[2];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+
+        if(!gTestMipmaps)
+        {
+            if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
+            {
+                /* Pilot data for sRGB images */
+                if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                {
+                    // We want to generate ints (mostly) in range of the target format which should be [0,255]
+                    // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
+                    // it can test some out-of-range data points
+                    const unsigned int test_range_ext = 16;
+                    int formatMin = 0 - test_range_ext;
+                    int formatMax = 255 + test_range_ext;
+                    int pixel_value = 0;
+                    float *inputValues = NULL;
+
+                    // First, fill with arbitrary floats
+                    {
+                         inputValues = (float *)(char*)imageValues;
+                        for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                        {
+                            pixel_value = random_in_range( formatMin, (int)formatMax, d );
+                            inputValues[ i ] = (float)(pixel_value/255.0f);
+                        }
+                    }
+
+                    // Throw a few extra test values in there
+                    inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+
+                    // Piloting some debug inputs.
+                    inputValues[ i++ ] = -0.5f;
+                    inputValues[ i++ ] = 0.5f;
+                    inputValues[ i++ ] = 2.f;
+                    inputValues[ i++ ] = 0.5f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                    }
+                }
+                else
+                {
+                    // First, fill with arbitrary floats
+                    {
+                        float *inputValues = (float *)(char*)imageValues;
+                        for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                            inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+                    inputValues[ i++ ] = -0.0000000000009f;
+                    inputValues[ i++ ] = 1.f;
+                    inputValues[ i++ ] = -1.f;
+                    inputValues[ i++ ] = 2.f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                        verifyRounding = true;
+                    }
+                }
+            }
+            else if( inputType == kUInt )
+            {
+                unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+                size_t i = 0;
+                inputValues[ i++ ] = 0;
+                inputValues[ i++ ] = 65535;
+                inputValues[ i++ ] = 7271820;
+                inputValues[ i++ ] = 0;
+            }
+        }
+
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+            // Do not use protected images for max image size test since it rounds the row size to a page size
+            if (gTestMaxImages) {
+                create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+
+                unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
+                                              imageInfo->width, 0,
+                                              maxImageUseHostPtrBackingStore, NULL, &error );
+            } else {
+                error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width );
+            }
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 1D image of size %ld pitch %ld (%s, %s)\n", imageInfo->width,
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+
+            if (gTestMaxImages)
+                image = (cl_mem)unprotImage;
+            else
+                image = (cl_mem)protImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            if( gTestMipmaps )
+            {
+                cl_image_desc image_desc = {0};
+                image_desc.image_type = imageInfo->type;
+                image_desc.num_mip_levels = imageInfo->num_mip_levels;
+                image_desc.image_width = imageInfo->width;
+                image_desc.image_array_size = imageInfo->arraySize;
+
+                unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
+                                             imageInfo->format, &image_desc, NULL, &error);
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create %d level 1D image of size %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width,
+                               IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                    return error;
+                }
+            }
+            else
+            {
+                unprotImage = create_image_1d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
+                                              imageInfo->width, 0,
+                                              imageValues, NULL, &error );
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create 1D image of size %ld pitch %ld (%s, %s)\n", imageInfo->width,
+                              imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                    return error;
+                }
+            }
+            image = unprotImage;
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+
+        size_t width_lod = imageInfo->width, nextLevelOffset = 0;
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, 1, 1 };
+        size_t resultSize;
+
+        for( int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+        {
+            if(gTestMipmaps)
+            {
+                error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
+            }
+
+            clMemWrapper inputStream;
+
+            char *imagePtrOffset = imageValues + nextLevelOffset;
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod, imagePtrOffset, &error );
+            test_error( error, "Unable to create input buffer" );
+
+            // Set arguments
+            error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+            test_error( error, "Unable to set kernel arguments" );
+
+            // Run the kernel
+            threads[0] = (size_t)width_lod;
+            error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            // Get results
+            if( gTestMipmaps )
+                resultSize = width_lod * get_pixel_size( imageInfo->format );
+            else
+                resultSize = imageInfo->rowPitch;
+            clProtectedArray PA(resultSize);
+            char *resultValues = (char *)((void *)PA);
+
+            if( gDebugTrace )
+                log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+
+            origin[ 1 ] = lod;
+            region[ 0 ] = width_lod;
+
+            error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
+            test_error( error, "Unable to read results from kernel" );
+            if( gDebugTrace )
+                log_info( "    results read\n" );
+
+            // Validate results element by element
+            char *imagePtr = imageValues + nextLevelOffset;
+            int numTries = 5;
+            {
+                char *resultPtr = (char *)resultValues;
+                for( size_t x = 0, i = 0; x < width_lod; x++, i++ )
+                {
+                    char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+
+                    // Convert this pixel
+                    if( inputType == kFloat )
+                        pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                    else if( inputType == kInt )
+                        pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                    else // if( inputType == kUInt )
+                        pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+
+                    // Compare against the results
+                    if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                    {
+                        // Compare sRGB-mapped values
+                        cl_float expected[4]    = {0};
+                        cl_float* input_values  = (float*)imagePtr;
+                        cl_uchar *actual        = (cl_uchar*)resultPtr;
+                        float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
+                        float err[4]            = {0.0f};
+
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        {
+                            if(j < 3)
+                            {
+                                expected[j] = sRGBmap(input_values[j]);
+                            }
+                            else // there is no sRGB conversion for alpha component if it exists
+                            {
+                                expected[j] = NORMALIZE(input_values[j], 255.0f);
+                            }
+
+                            err[j] = fabsf( expected[ j ] - actual[ j ] );
+                        }
+
+                        if ((err[0] > max_err) ||
+                            (err[1] > max_err) ||
+                            (err[2] > max_err) ||
+                            (err[3] > 0)) // there is no conversion for alpha so the error should be zero
+                        {
+                            log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
+                            log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
+                            log_error( "       Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Actual:   %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                    {
+                        // Compare floats
+                        float *expected = (float *)resultBuffer;
+                        float *actual = (float *)resultPtr;
+                        float err = 0.f;
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+
+                        err /= (float)get_format_channel_count( imageInfo->format );
+                        if( err > MAX_ERR )
+                        {
+                            unsigned int *e = (unsigned int *)expected;
+                            unsigned int *a = (unsigned int *)actual;
+                            log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
+                            log_error( "       Error: %g\n", err );
+                            log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                            log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                            totalErrors++;
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                    {
+                        // Compare half floats
+                        if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                        {
+                            cl_ushort *e = (cl_ushort *)resultBuffer;
+                            cl_ushort *a = (cl_ushort *)resultPtr;
+                            int err_cnt = 0;
+
+                            //Fix up cases where we have NaNs
+                            for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            {
+                                if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                    continue;
+                                if( e[j] != a[j] )
+                                    err_cnt++;
+                            }
+
+                            if( err_cnt )
+                            {
+                                totalErrors++;
+                                log_error( "ERROR: Sample %ld (%ld) did not validate! (%s)\n", i, x, mem_flag_names[mem_flag_index] );
+                                log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                                log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                                if( inputType == kFloat )
+                                {
+                                    float *p = (float *)(char *)imagePtr;
+                                    log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                    log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                }
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        // Exact result passes every time
+                        if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                        {
+                            // result is inexact.  Calculate error
+                            int failure = 1;
+                            float errors[4] = {NAN, NAN, NAN, NAN};
+                            pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+
+                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                            if( 0 == forceCorrectlyRoundedWrites    &&
+                               (
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16
+                                ))
+                            {
+                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                    failure = 0;
+                            }
+
+
+                            if( failure )
+                            {
+                                totalErrors++;
+                                // Is it our special rounding test?
+                                if( verifyRounding && i >= 1 && i <= 2 )
+                                {
+                                    // Try to guess what the rounding mode of the device really is based on what it returned
+                                    const char *deviceRounding = "unknown";
+                                    unsigned int deviceResults[8];
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
+
+                                    if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                       deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                        deviceRounding = "truncate";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to nearest";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to even";
+
+                                    log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                    log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
+                                              deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
+                                    log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                    return 1;
+                                }
+                                log_error( "ERROR: Sample %d (%d) did not validate!\n", (int)i, (int)x );
+                                switch(imageInfo->format->image_channel_data_type)
+                                {
+                                    case CL_UNORM_INT8:
+                                    case CL_SNORM_INT8:
+                                    case CL_UNSIGNED_INT8:
+                                    case CL_SIGNED_INT8:
+                                        log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNORM_INT16:
+                                    case CL_SNORM_INT16:
+                                    case CL_UNSIGNED_INT16:
+                                    case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                    case CL_SFIXED14_APPLE:
+#endif
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_HALF_FLOAT:
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNSIGNED_INT32:
+                                    case CL_SIGNED_INT32:
+                                        log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                        break;
+                                    case CL_FLOAT:
+                                        log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                        log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                }
+
+                                float *v = (float *)(char *)imagePtr;
+                                log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    imagePtr += get_explicit_type_size( inputType ) * 4;
+                    resultPtr += get_pixel_size( imageInfo->format );
+                }
+            }
+            {
+                nextLevelOffset += width_lod * get_pixel_size( imageInfo->format );
+                width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
+            }
+        }
+    }
+
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *KernelSourcePattern = NULL;
+    int error;
+
+    // Get our operating parameters
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    size_t pixelSize;
+
+    image_descriptor imageInfo = { 0x0 };
+
+    imageInfo.format = format;
+    imageInfo.slicePitch = imageInfo.arraySize = 0;
+    imageInfo.height = imageInfo.depth = 1;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    // Construct the source
+    if(gtestTypesToRun & kWriteTests)
+    {
+        KernelSourcePattern = write1DKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = readwrite1DKernelSourcePattern;
+    }
+
+    sprintf( programSrc,
+             KernelSourcePattern,
+             get_explicit_type_name( inputType ),
+             gTestMipmaps ? ", int lod" : "",
+             readFormat,
+             gTestMipmaps ? ", lod" :"" );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            if(gTestMipmaps)
+                imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
+
+            if( gDebugTrace )
+                log_info( "   at size %d\n", (int)imageInfo.width );
+            int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            if(gTestMipmaps)
+                imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
+            log_info("Testing %d\n", (int)imageInfo.width);
+            int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.width = typeRange / 256;
+
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+        int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+
+                if( gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
+                    size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * pixelSize;
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+                    }
+
+                    size = (size_t)imageInfo.rowPitch * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+            {
+                log_info( "   at size %d (pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+                if( gTestMipmaps )
+                    log_info( " and %d mip levels\n", (int)imageInfo.num_mip_levels );
+            }
+
+            int retCode = test_write_image_1D( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
new file mode 100644
index 00000000..aa9aab1f
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -0,0 +1,723 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+
+const char *readwrite1DArrayKernelSourcePattern =
+"__kernel void sample_kernel( __global %s4 *input, read_write image1d_array_t output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
+"}";
+
+const char *write1DArrayKernelSourcePattern =
+"__kernel void sample_kernel( __global %s4 *input, write_only image1d_array_t output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
+"}";
+
+const char *offset1DArraySource =
+"   int offset = tidY*get_image_width(output) + tidX;\n";
+
+const char *offset1DArrayLodSource =
+"   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
+"   int offset = tidY*width_lod + tidX;\n";
+
+int test_write_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                     image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    size_t              num_flags   = 0;
+    const cl_mem_flags  *mem_flag_types = NULL;
+    const char *        *mem_flag_names = NULL;
+    const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
+    const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        mem_flag_types = write_only_mem_flag_types;
+        mem_flag_names = write_only_mem_flag_names;
+        num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
+    }
+    else
+    {
+        mem_flag_types = read_write_mem_flag_types;
+        mem_flag_names = read_write_mem_flag_names;
+        num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
+    }
+
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+
+    for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
+    {
+        int error;
+        size_t threads[2];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+
+        if(!gTestMipmaps)
+        {
+            if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
+            {
+                /* Pilot data for sRGB images */
+                if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                {
+                    // We want to generate ints (mostly) in range of the target format which should be [0,255]
+                    // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
+                    // it can test some out-of-range data points
+                    const unsigned int test_range_ext = 16;
+                    int formatMin = 0 - test_range_ext;
+                    int formatMax = 255 + test_range_ext;
+                    int pixel_value = 0;
+
+                    // First, fill with arbitrary floats
+                    for( size_t y = 0; y < imageInfo->arraySize; y++ )
+                    {
+                        float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
+                        for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                        {
+                            pixel_value = random_in_range( formatMin, (int)formatMax, d );
+                            inputValues[ i ] = (float)(pixel_value/255.0f);
+                        }
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+
+                    // Piloting some debug inputs.
+                    inputValues[ i++ ] = -0.5f;
+                    inputValues[ i++ ] = 0.5f;
+                    inputValues[ i++ ] = 2.f;
+                    inputValues[ i++ ] = 0.5f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                    }
+                }
+                else
+                {
+                    // First, fill with arbitrary floats
+                    for( size_t y = 0; y < imageInfo->arraySize; y++ )
+                    {
+                        float *inputValues = (float *)(char*)imageValues + y * imageInfo->width * 4;
+                        for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                            inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+                    inputValues[ i++ ] = -0.0000000000009f;
+                    inputValues[ i++ ] = 1.f;
+                    inputValues[ i++ ] = -1.f;
+                    inputValues[ i++ ] = 2.f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                        verifyRounding = true;
+                    }
+                }
+            }
+            else if( inputType == kUInt )
+            {
+                unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+                size_t i = 0;
+                inputValues[ i++ ] = 0;
+                inputValues[ i++ ] = 65535;
+                inputValues[ i++ ] = 7271820;
+                inputValues[ i++ ] = 0;
+            }
+        }
+
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+            // Do not use protected images for max image size test since it rounds the row size to a page size
+            if (gTestMaxImages) {
+                create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+
+                unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
+                                              imageInfo->width, imageInfo->arraySize, 0, 0,
+                                              maxImageUseHostPtrBackingStore, &error );
+            } else {
+                error = protImage.Create( context, (cl_mem_object_type)CL_MEM_OBJECT_IMAGE1D_ARRAY, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, 1, 1, imageInfo->arraySize );
+            }
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+
+            if (gTestMaxImages)
+                image = (cl_mem)unprotImage;
+            else
+                image = (cl_mem)protImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            if( gTestMipmaps )
+            {
+                cl_image_desc image_desc = {0};
+                image_desc.image_type = imageInfo->type;
+                image_desc.num_mip_levels = imageInfo->num_mip_levels;
+                image_desc.image_width = imageInfo->width;
+                image_desc.image_array_size = imageInfo->arraySize;
+
+                unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
+                                             imageInfo->format, &image_desc, NULL, &error);
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create %d level 1D image array of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->arraySize,
+                               IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                    return error;
+                }
+            }
+            else
+            {
+                unprotImage = create_image_1d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
+                                              imageInfo->width, imageInfo->arraySize, 0, 0,
+                                              imageValues, &error );
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create 1D image array of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->arraySize,
+                              imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                    return error;
+                }
+            }
+            image = unprotImage;
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+
+        size_t width_lod = imageInfo->width, nextLevelOffset = 0;
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->arraySize, 1 };
+        size_t resultSize;
+
+        for( int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
+        {
+            if(gTestMipmaps)
+            {
+                error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
+
+            }
+            // Run the kernel
+            threads[0] = (size_t)width_lod;
+            threads[1] = (size_t)imageInfo->arraySize;
+
+            clMemWrapper inputStream;
+
+            char *imagePtrOffset = imageValues + nextLevelOffset;
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * imageInfo->arraySize, imagePtrOffset, &error );
+            test_error( error, "Unable to create input buffer" );
+
+            // Set arguments
+            error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+            test_error( error, "Unable to set kernel arguments" );
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            // Get results
+            if( gTestMipmaps )
+                resultSize = width_lod * get_pixel_size(imageInfo->format) * imageInfo->arraySize;
+            else
+                resultSize = imageInfo->rowPitch * imageInfo->arraySize;
+
+            clProtectedArray PA(resultSize);
+            char *resultValues = (char *)((void *)PA);
+
+            if( gDebugTrace )
+                log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+
+
+            origin[2] = lod;
+            region[0] = width_lod;
+            error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region,
+                                        gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
+            test_error( error, "Unable to read results from kernel" );
+            if( gDebugTrace )
+                log_info( "    results read\n" );
+
+            // Validate results element by element
+            char *imagePtr = imageValues + nextLevelOffset;
+            int numTries = 5;
+            for( size_t y = 0, i = 0; y < imageInfo->arraySize; y++ )
+            {
+                char *resultPtr;
+                if( gTestMipmaps )
+                    resultPtr = (char *)resultValues + y * width_lod * pixelSize;
+                else
+                    resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
+                for( size_t x = 0; x < width_lod; x++, i++ )
+                {
+                    char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+
+                    // Convert this pixel
+                    if( inputType == kFloat )
+                        pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                    else if( inputType == kInt )
+                        pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                    else // if( inputType == kUInt )
+                        pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+
+                    // Compare against the results
+                    if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                    {
+                        // Compare sRGB-mapped values
+                        cl_float expected[4]    = {0};
+                        cl_float* input_values  = (float*)imagePtr;
+                        cl_uchar *actual        = (cl_uchar*)resultPtr;
+                        float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
+                        float err[4]            = {0.0f};
+
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        {
+                            if(j < 3)
+                            {
+                                expected[j] = sRGBmap(input_values[j]);
+                            }
+                            else // there is no sRGB conversion for alpha component if it exists
+                            {
+                                expected[j] = NORMALIZE(input_values[j], 255.0f);
+                            }
+
+                            err[j] = fabsf( expected[ j ] - actual[ j ] );
+                        }
+
+                        if ((err[0] > max_err) ||
+                            (err[1] > max_err) ||
+                            (err[2] > max_err) ||
+                            (err[3] > 0)) // there is no conversion for alpha so the error should be zero
+                        {
+                            log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
+                            log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
+                            log_error( "       Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Actual:   %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                    {
+                        // Compare floats
+                        float *expected = (float *)resultBuffer;
+                        float *actual = (float *)resultPtr;
+                        float err = 0.f;
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+
+                        err /= (float)get_format_channel_count( imageInfo->format );
+                        if( err > MAX_ERR )
+                        {
+                            unsigned int *e = (unsigned int *)expected;
+                            unsigned int *a = (unsigned int *)actual;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            log_error( "       Error: %g\n", err );
+                            log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                            log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                            totalErrors++;
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                    {
+
+                        // Compare half floats
+                        if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                        {
+                            cl_ushort *e = (cl_ushort *)resultBuffer;
+                            cl_ushort *a = (cl_ushort *)resultPtr;
+                            int err_cnt = 0;
+
+                            //Fix up cases where we have NaNs
+                            for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            {
+                                if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                    continue;
+                                if( e[j] != a[j] )
+                                    err_cnt++;
+                            }
+
+                            if( err_cnt )
+                            {
+                                totalErrors++;
+                                log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                                log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                                log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                                if( inputType == kFloat )
+                                {
+                                    float *p = (float *)(char *)imagePtr;
+                                    log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                    log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                }
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        // Exact result passes every time
+                        if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 )
+                        {
+                            // result is inexact.  Calculate error
+                            int failure = 1;
+                            float errors[4] = {NAN, NAN, NAN, NAN};
+                            pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+
+                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                            if( 0 == forceCorrectlyRoundedWrites    &&
+                               (
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16
+                                ))
+                            {
+                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                    failure = 0;
+                            }
+
+
+                            if( failure )
+                            {
+                                totalErrors++;
+                                // Is it our special rounding test?
+                                if( verifyRounding && i >= 1 && i <= 2 )
+                                {
+                                    // Try to guess what the rounding mode of the device really is based on what it returned
+                                    const char *deviceRounding = "unknown";
+                                    unsigned int deviceResults[8];
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
+
+                                    if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                       deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                        deviceRounding = "truncate";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to nearest";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to even";
+
+                                    log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                    log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
+                                              deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
+                                    log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                    return 1;
+                                }
+                                log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                                switch(imageInfo->format->image_channel_data_type)
+                                {
+                                    case CL_UNORM_INT8:
+                                    case CL_SNORM_INT8:
+                                    case CL_UNSIGNED_INT8:
+                                    case CL_SIGNED_INT8:
+                                        log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNORM_INT16:
+                                    case CL_SNORM_INT16:
+                                    case CL_UNSIGNED_INT16:
+                                    case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                    case CL_SFIXED14_APPLE:
+#endif
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_HALF_FLOAT:
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNSIGNED_INT32:
+                                    case CL_SIGNED_INT32:
+                                        log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                        break;
+                                    case CL_FLOAT:
+                                        log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                        log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                }
+
+                                float *v = (float *)(char *)imagePtr;
+                                log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    imagePtr += get_explicit_type_size( inputType ) * 4;
+                    resultPtr += pixelSize;
+                }
+            }
+            {
+                nextLevelOffset += width_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format);
+                width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
+            }
+        }
+    }
+
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *KernelSourcePattern = NULL;
+    int error;
+
+    // Get our operating parameters
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    size_t pixelSize;
+
+    image_descriptor imageInfo = { 0x0 };
+
+    imageInfo.format = format;
+    imageInfo.slicePitch = 0;
+    imageInfo.height = imageInfo.depth = 1;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        KernelSourcePattern = write1DArrayKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = readwrite1DArrayKernelSourcePattern;
+    }
+    // Construct the source
+    // Construct the source
+    sprintf( programSrc,
+             KernelSourcePattern,
+             get_explicit_type_name( inputType ),
+             gTestMipmaps ? ", int lod" : "",
+             gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource,
+             readFormat,
+             gTestMipmaps ? ", lod" :"" );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if(gTestMipmaps)
+                    imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
+
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+                int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            if(gTestMipmaps)
+                imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
+            log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.arraySize);
+            int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.arraySize = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.arraySize );
+
+        imageInfo.rowPitch = imageInfo.width * pixelSize;
+        imageInfo.slicePitch = imageInfo.rowPitch;
+        int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
+
+                if( gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (size_t)random_in_range(2, (compute_max_mip_levels(imageInfo.width, 0, 0)-1), d);
+                    size = (cl_ulong) compute_mipmapped_image_size(imageInfo) * 4;
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * pixelSize;
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                        imageInfo.rowPitch += extraWidth * pixelSize;
+                    }
+                    imageInfo.slicePitch = imageInfo.rowPitch;
+
+                    size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+
+            int retCode = test_write_image_1D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
new file mode 100644
index 00000000..d46d93b8
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -0,0 +1,771 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int gtestTypesToRun;
+
+extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
+                                ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
+
+// Utility function to clamp down image sizes for certain tests to avoid
+// using too much memory.
+static size_t reduceImageSizeRange(size_t maxDimSize) {
+  size_t DimSize = maxDimSize/32;
+  if (DimSize < (size_t) 16)
+    return 16;
+  else if (DimSize > (size_t) 128)
+    return 128;
+  else
+    return DimSize;
+}
+
+static size_t reduceImageDepth(size_t maxDepth) {
+  size_t Depth = maxDepth/32;
+  if (Depth < (size_t) 8)
+    return 8;
+  else if (Depth > (size_t) 32)
+    return 32;
+  else
+    return Depth;
+}
+
+const char *write2DArrayKernelSourcePattern =
+"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
+"}";
+
+const char *readwrite2DArrayKernelSourcePattern =
+"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ] );\n"
+"}";
+
+const char *offset2DArrayKernelSource =
+"   int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
+
+const char *offset2DArrayLodKernelSource =
+"   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
+"   int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
+"   int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
+
+int test_write_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+
+    size_t              num_flags   = 0;
+    const cl_mem_flags  *mem_flag_types = NULL;
+    const char *        *mem_flag_names = NULL;
+    const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
+    const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
+    if(gtestTypesToRun & kWriteTests)
+    {
+        mem_flag_types = write_only_mem_flag_types;
+        mem_flag_names = write_only_mem_flag_names;
+        num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
+    }
+    else
+    {
+        mem_flag_types = read_write_mem_flag_types;
+        mem_flag_names = read_write_mem_flag_names;
+        num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
+    }
+
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+
+    for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
+    {
+        int error;
+        size_t threads[3];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+
+        if(!gTestMipmaps)
+        {
+            if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
+            {
+                /* Pilot data for sRGB images */
+                if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                {
+                    // We want to generate ints (mostly) in range of the target format which should be [0,255]
+                    // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
+                    // it can test some out-of-range data points
+                    const unsigned int test_range_ext = 16;
+                    int formatMin = 0 - test_range_ext;
+                    int formatMax = 255 + test_range_ext;
+                    int pixel_value = 0;
+
+                    // First, fill with arbitrary floats
+                    for( size_t z = 0; z < imageInfo->arraySize; z++ )
+                    {
+                        for( size_t y = 0; y < imageInfo->height; y++ )
+                        {
+                            float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
+                            for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                            {
+                                pixel_value = random_in_range( formatMin, (int)formatMax, d );
+                                inputValues[ i ] = (float)(pixel_value/255.0f);
+                            }
+                        }
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+                    // Piloting some debug inputs.
+                    inputValues[ i++ ] = -0.5f;
+                    inputValues[ i++ ] = 0.5f;
+                    inputValues[ i++ ] = 2.f;
+                    inputValues[ i++ ] = 0.5f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                    }
+                }
+                else
+                {
+                    // First, fill with arbitrary floats
+                    for( size_t z = 0; z < imageInfo->arraySize; z++ )
+                    {
+                        for( size_t y = 0; y < imageInfo->height; y++ )
+                        {
+                            float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
+                            for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                                inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                        }
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+                    inputValues[ i++ ] = -0.0000000000009f;
+                    inputValues[ i++ ] = 1.f;
+                    inputValues[ i++ ] = -1.f;
+                    inputValues[ i++ ] = 2.f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                        verifyRounding = true;
+                    }
+                }
+            }
+            else if( inputType == kUInt )
+            {
+                unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+                size_t i = 0;
+                inputValues[ i++ ] = 0;
+                inputValues[ i++ ] = 65535;
+                inputValues[ i++ ] = 7271820;
+                inputValues[ i++ ] = 0;
+            }
+        }
+
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+
+            unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
+                                          imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0,
+                                          maxImageUseHostPtrBackingStore, &error );
+
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+
+            image = (cl_mem)unprotImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            if( gTestMipmaps )
+            {
+                cl_image_desc image_desc = {0};
+                image_desc.image_type = imageInfo->type;
+                image_desc.num_mip_levels = imageInfo->num_mip_levels;
+                image_desc.image_width = imageInfo->width;
+                image_desc.image_height = imageInfo->height;
+                image_desc.image_array_size = imageInfo->arraySize;
+
+                unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
+                                             imageInfo->format, &image_desc, NULL, &error);
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create %d level 2D image array of size %ld x %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->arraySize,
+                               IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                    return error;
+                }
+            }
+            else
+            {
+                unprotImage = create_image_2d_array( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
+                                              imageInfo->width, imageInfo->height, imageInfo->arraySize, 0, 0, imageValues, &error );
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create 2D image array of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->arraySize, imageInfo->rowPitch, IGetErrorString( error ) );
+                    return error;
+                }
+            }
+            image = unprotImage;
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+
+        size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
+        size_t origin[ 4 ] = { 0, 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
+        size_t resultSize;
+
+        int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
+        for( int lod = 0; lod < num_lod_loops; lod++)
+        {
+            if(gTestMipmaps)
+            {
+                error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
+            }
+            // Run the kernel
+            threads[0] = (size_t)width_lod;
+            threads[1] = (size_t)height_lod;
+            threads[2] = (size_t)imageInfo->arraySize;
+
+            clMemWrapper inputStream;
+
+            char *imagePtrOffset = imageValues + nextLevelOffset;
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * imageInfo->arraySize, imagePtrOffset, &error );
+            test_error( error, "Unable to create input buffer" );
+
+            // Set arguments
+            error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+            test_error( error, "Unable to set kernel arguments" );
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            // Get results
+            if( gTestMipmaps )
+                resultSize = width_lod * height_lod *imageInfo->arraySize * pixelSize;
+            else
+                resultSize = imageInfo->slicePitch *imageInfo->arraySize;
+            clProtectedArray PA(resultSize);
+            char *resultValues = (char *)((void *)PA);
+
+            if( gDebugTrace )
+                log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+
+            origin[3] = lod;
+            region[0] = width_lod;
+            region[1] = height_lod;
+
+            error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
+            test_error( error, "Unable to read results from kernel" );
+            if( gDebugTrace )
+                log_info( "    results read\n" );
+
+            // Validate results element by element
+            char *imagePtr = imageValues + nextLevelOffset;
+            int numTries = 5;
+            for( size_t z = 0, i = 0; z < imageInfo->arraySize; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    char *resultPtr;
+                    if( gTestMipmaps )
+                        resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
+                    else
+                        resultPtr = (char*)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
+                    for( size_t x = 0; x < width_lod; x++, i++ )
+                    {
+                        char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+
+                        // Convert this pixel
+                        if( inputType == kFloat )
+                            pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                        else if( inputType == kInt )
+                            pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                        else // if( inputType == kUInt )
+                            pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+
+                        // Compare against the results
+                        if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                        {
+                            // Compare sRGB-mapped values
+                            cl_float expected[4]    = {0};
+                            cl_float* input_values  = (float*)imagePtr;
+                            cl_uchar *actual        = (cl_uchar*)resultPtr;
+                            float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
+                            float err[4]            = {0.0f};
+
+                            for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            {
+                                if(j < 3)
+                                {
+                                    expected[j] = sRGBmap(input_values[j]);
+                                }
+                                else // there is no sRGB conversion for alpha component if it exists
+                                {
+                                    expected[j] = NORMALIZE(input_values[j], 255.0f);
+                                }
+
+                                err[j] = fabsf( expected[ j ] - actual[ j ] );
+                            }
+
+                            if ((err[0] > max_err) ||
+                                (err[1] > max_err) ||
+                                (err[2] > max_err) ||
+                                (err[3] > 0)) // there is no conversion for alpha so the error should be zero
+                            {
+                                log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
+                                log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
+                                log_error( "       Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                                log_error( "       Actual:   %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                                return 1;
+                            }
+                        }
+                        else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                        {
+                            // Compare floats
+                            float *expected = (float *)resultBuffer;
+                            float *actual = (float *)resultPtr;
+                            float err = 0.f;
+                            for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                                err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+
+                            err /= (float)get_format_channel_count( imageInfo->format );
+                            if( err > MAX_ERR )
+                            {
+                                unsigned int *e = (unsigned int *)expected;
+                                unsigned int *a = (unsigned int *)actual;
+                                log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                                log_error( "       Error: %g\n", err );
+                                log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                                log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                                log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                                log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                                totalErrors++;
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                        else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                        {
+                            // Compare half floats
+                            if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                            {
+                                cl_ushort *e = (cl_ushort *)resultBuffer;
+                                cl_ushort *a = (cl_ushort *)resultPtr;
+                                int err_cnt = 0;
+
+                                //Fix up cases where we have NaNs
+                                for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                                {
+                                    if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                        continue;
+                                    if( e[j] != a[j] )
+                                        err_cnt++;
+                                }
+
+                                if( err_cnt )
+                                {
+                                totalErrors++;
+                                log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                                unsigned short *e = (unsigned short *)resultBuffer;
+                                unsigned short *a = (unsigned short *)resultPtr;
+                                log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                                log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                                if( inputType == kFloat )
+                                {
+                                    float *p = (float *)(char *)imagePtr;
+                                    log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                    log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                }
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                            }
+                        }
+                        else
+                        {
+                            // Exact result passes every time
+                            if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                            {
+                                // result is inexact.  Calculate error
+                                int failure = 1;
+                                float errors[4] = {NAN, NAN, NAN, NAN};
+                                pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+
+                                // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                                if( 0 == forceCorrectlyRoundedWrites    &&
+                                   (
+                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT16
+                                    ))
+                                {
+                                    if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                                       ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                        failure = 0;
+                                }
+
+
+                                if( failure )
+                                {
+                                    totalErrors++;
+                                    // Is it our special rounding test?
+                                    if( verifyRounding && i >= 1 && i <= 2 )
+                                    {
+                                        // Try to guess what the rounding mode of the device really is based on what it returned
+                                        const char *deviceRounding = "unknown";
+                                        unsigned int deviceResults[8];
+                                        read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod);
+                                        read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
+
+                                        if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                           deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                            deviceRounding = "truncate";
+                                        else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                                deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                            deviceRounding = "round to nearest";
+                                        else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                                deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                            deviceRounding = "round to even";
+
+                                        log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                        log_error( "       Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
+                                                  deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
+                                        log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                        return 1;
+                                    }
+                                    log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                                    switch(imageInfo->format->image_channel_data_type)
+                                    {
+                                        case CL_UNORM_INT8:
+                                        case CL_SNORM_INT8:
+                                        case CL_UNSIGNED_INT8:
+                                        case CL_SIGNED_INT8:
+                                            log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                            log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                        case CL_UNORM_INT16:
+                                        case CL_SNORM_INT16:
+                                        case CL_UNSIGNED_INT16:
+                                        case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                        case CL_SFIXED14_APPLE:
+#endif
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                            log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                        case CL_HALF_FLOAT:
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                            log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                        case CL_UNSIGNED_INT32:
+                                        case CL_SIGNED_INT32:
+                                            log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                            break;
+                                        case CL_FLOAT:
+                                            log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                            log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                            log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                    }
+
+                                    float *v = (float *)(char *)imagePtr;
+                                    log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                    log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                    log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+
+                                    if( ( --numTries ) == 0 )
+                                        return 1;
+                                }
+                            }
+                        }
+                        imagePtr += get_explicit_type_size( inputType ) * (( imageInfo->format->image_channel_order == CL_DEPTH ) ? 1 : 4);
+                        resultPtr += get_pixel_size( imageInfo->format );
+                    }
+                }
+            }
+            {
+                nextLevelOffset += width_lod*height_lod*imageInfo->arraySize*pixelSize;
+                width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
+                height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
+            }
+        }
+    }
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *KernelSourcePattern = NULL;
+    int error;
+
+    // Get our operating parameters
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+
+    image_descriptor imageInfo = { 0x0 };
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    imageInfo.depth = 1;
+    imageInfo.slicePitch = 0;
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        KernelSourcePattern = write2DArrayKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = readwrite2DArrayKernelSourcePattern;
+    }
+    // Construct the source
+    // Construct the source
+    sprintf( programSrc,
+             KernelSourcePattern,
+             get_explicit_type_name( inputType ),
+             (format->image_channel_order == CL_DEPTH) ? "" : "4",
+             (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
+             gTestMipmaps ? " , int lod" : "",
+             gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
+             readFormat,
+             gTestMipmaps ? ", lod" : "" );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                for( imageInfo.arraySize = 2; imageInfo.arraySize < 7; imageInfo.arraySize++ )
+                {
+                    if( gTestMipmaps )
+                        imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
+
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            if( gTestMipmaps )
+                imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
+            log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize);
+            int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.arraySize = 2;
+
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
+            int maxHeighthRange = (int) reduceImageSizeRange(maxHeight);
+            int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
+
+            cl_ulong size, buffSize;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, d );
+                imageInfo.height = (size_t)random_log_in_range( 16, maxHeighthRange, d );
+                imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, d );
+
+                if(gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1), d);
+                    //Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
+                    size = 4 * compute_mipmapped_image_size(imageInfo);
+                    buffSize = size * get_explicit_type_size( inputType );
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                        imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+
+                        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                        extraWidth = (int)random_log_in_range( 0, 64, d );
+                        imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
+                    }
+
+
+                    // Image size and buffer size may differ due to different pixel size.
+                    // See creation of buffer at line ~153.
+                    size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4;
+                    buffSize = (cl_ulong)imageInfo.width * (cl_ulong)imageInfo.height * imageInfo.arraySize * get_explicit_type_size(inputType) * 4;
+                }
+            } while(  size > maxAllocSize || buffSize > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.arraySize,
+                         imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxArraySize );
+
+            int retCode = test_write_image_2D_array( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
new file mode 100644
index 00000000..e228af55
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -0,0 +1,768 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+
+extern int gtestTypesToRun;
+
+extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
+                                ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
+
+// Utility function to clamp down image sizes for certain tests to avoid
+// using too much memory.
+static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
+  size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
+  if (DimSize > (size_t) 128)
+    return 128;
+  else
+    return DimSize;
+}
+
+static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
+  size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
+  if (DimSize > (size_t) 32)
+    return 32;
+  else
+    return DimSize;
+}
+
+
+const char *write3DKernelSourcePattern =
+"%s"
+"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output %s )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
+"}";
+
+const char *readwrite3DKernelSourcePattern =
+"%s"
+"__kernel void sample_kernel( __global %s4 *input, read_write image3d_t output %s )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"%s"
+"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
+"}";
+
+const char *khr3DWritesPragma =
+"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
+
+const char *offset3DSource=
+"   int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
+
+const char *offset3DLodSource =
+"   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
+"   int height_lod = ( get_image_height(output) >> lod ) ? ( get_image_height(output) >> lod ) : 1;\n"
+"   int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
+
+int test_write_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+
+    size_t              num_flags   = 0;
+    const cl_mem_flags  *mem_flag_types = NULL;
+    const char *        *mem_flag_names = NULL;
+    const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
+    const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        mem_flag_types = write_only_mem_flag_types;
+        mem_flag_names = write_only_mem_flag_names;
+        num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
+    }
+    else
+    {
+        mem_flag_types = read_write_mem_flag_types;
+        mem_flag_names = read_write_mem_flag_names;
+        num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
+    }
+
+    size_t pixelSize = get_pixel_size( imageInfo->format );
+
+    for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
+    {
+        int error;
+        size_t threads[3];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues;
+
+        create_random_image_data( inputType, imageInfo, imageValues, d );
+
+        if(!gTestMipmaps)
+        {
+            if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT )
+            {
+                /* Pilot data for sRGB images */
+                if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                {
+                    // We want to generate ints (mostly) in range of the target format which should be [0,255]
+                    // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
+                    // it can test some out-of-range data points
+                    const unsigned int test_range_ext = 16;
+                    int formatMin = 0 - test_range_ext;
+                    int formatMax = 255 + test_range_ext;
+                    int pixel_value = 0;
+
+                    // First, fill with arbitrary floats
+                    for( size_t z = 0; z < imageInfo->depth; z++ )
+                    {
+                        for( size_t y = 0; y < imageInfo->height; y++ )
+                        {
+                            float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
+                            for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                            {
+                                pixel_value = random_in_range( formatMin, (int)formatMax, d );
+                                inputValues[ i ] = (float)(pixel_value/255.0f);
+                            }
+                        }
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+
+                    // Piloting some debug inputs.
+                    inputValues[ i++ ] = -0.5f;
+                    inputValues[ i++ ] = 0.5f;
+                    inputValues[ i++ ] = 2.f;
+                    inputValues[ i++ ] = 0.5f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                    }
+                }
+                else
+                {
+                    // First, fill with arbitrary floats
+                    for( size_t z = 0; z < imageInfo->depth; z++ )
+                    {
+                        for( size_t y = 0; y < imageInfo->height; y++ )
+                        {
+                            float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4 + imageInfo->height * imageInfo->width * z * 4;
+                            for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                                inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                        }
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+                    inputValues[ i++ ] = -0.0000000000009f;
+                    inputValues[ i++ ] = 1.f;
+                    inputValues[ i++ ] = -1.f;
+                    inputValues[ i++ ] = 2.f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                        verifyRounding = true;
+                    }
+                }
+            }
+            else if( inputType == kUInt )
+            {
+                unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+                size_t i = 0;
+                inputValues[ i++ ] = 0;
+                inputValues[ i++ ] = 65535;
+                inputValues[ i++ ] = 7271820;
+                inputValues[ i++ ] = 0;
+            }
+        }
+
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+
+            unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
+                                          imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0,
+                                          maxImageUseHostPtrBackingStore, &error );
+
+            if( error != CL_SUCCESS )
+            {
+                log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
+                return error;
+            }
+
+            image = (cl_mem)unprotImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+            // it works just as if no flag is specified, so we just do the same thing either way
+            // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+            if(gTestMipmaps)
+            {
+                cl_image_desc image_desc = {0};
+                image_desc.image_type = imageInfo->type;
+                image_desc.num_mip_levels = imageInfo->num_mip_levels;
+                image_desc.image_width = imageInfo->width;
+                image_desc.image_height = imageInfo->height;
+                image_desc.image_depth = imageInfo->depth;
+
+                unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
+                                             imageInfo->format, &image_desc, NULL, &error);
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create %d level mipmapped 3D image of size %ld x %ld *%ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height, imageInfo->depth,
+                               IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                    return error;
+                }
+            }
+            else
+            {
+                unprotImage = create_image_3d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
+                                              imageInfo->width, imageInfo->height, imageInfo->depth, 0, 0, imageValues, &error );
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create 3D image of size %ld x %ld x %ld pitch %ld (%s)\n", imageInfo->width, imageInfo->height, imageInfo->depth, imageInfo->rowPitch, IGetErrorString( error ) );
+                    return error;
+                }
+            }
+            image = unprotImage;
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+
+        size_t width_lod = imageInfo->width;
+        size_t height_lod = imageInfo->height;
+        size_t depth_lod = imageInfo->depth;
+        size_t nextLevelOffset = 0;
+        size_t origin[ 4 ] = { 0, 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->depth };
+
+        int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
+        for( int lod = 0; lod < num_lod_loops; lod++)
+        {
+            if(gTestMipmaps)
+            {
+                error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
+            }
+            // Run the kernel
+            threads[0] = (size_t)width_lod;
+            threads[1] = (size_t)height_lod;
+            threads[2] = (size_t)depth_lod;
+
+            clMemWrapper inputStream;
+
+            char *imagePtrOffset = imageValues + nextLevelOffset;
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * 4 * width_lod * height_lod * depth_lod, imagePtrOffset, &error );
+            test_error( error, "Unable to create input buffer" );
+
+            // Set arguments
+            error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+            test_error( error, "Unable to set kernel arguments" );
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            // Get results
+            size_t resultSize;
+            if(gTestMipmaps)
+                resultSize = width_lod * height_lod * depth_lod * pixelSize;
+            else
+                resultSize = imageInfo->slicePitch *imageInfo->depth;
+            clProtectedArray PA(resultSize);
+            char *resultValues = (char *)((void *)PA);
+
+            if( gDebugTrace )
+                log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+
+            origin[3] = lod;
+            region[0] = width_lod;
+            region[1] = height_lod;
+            region[2] = depth_lod;
+            error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0, resultValues, 0, NULL, NULL );
+            test_error( error, "Unable to read results from kernel" );
+            if( gDebugTrace )
+                log_info( "    results read\n" );
+
+            // Validate results element by element
+            char *imagePtr = (char*)imageValues + nextLevelOffset;
+            int numTries = 5;
+            for( size_t z = 0, i = 0; z < depth_lod; z++ )
+            {
+                for( size_t y = 0; y < height_lod; y++ )
+                {
+                    char *resultPtr;
+                    if( gTestMipmaps )
+                        resultPtr = (char *)resultValues + y * width_lod * pixelSize + z * width_lod * height_lod * pixelSize;
+                    else
+                        resultPtr = (char *)resultValues + y * imageInfo->rowPitch + z * imageInfo->slicePitch;
+                    for( size_t x = 0; x < width_lod; x++, i++ )
+                    {
+                        char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+
+                        // Convert this pixel
+                        if( inputType == kFloat )
+                            pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                        else if( inputType == kInt )
+                            pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                        else // if( inputType == kUInt )
+                            pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+
+                        // Compare against the results
+                        if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                        {
+                            // Compare sRGB-mapped values
+                            cl_float expected[4]    = {0};
+                            cl_float* input_values  = (float*)imagePtr;
+                            cl_uchar *actual        = (cl_uchar*)resultPtr;
+                            float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
+                            float err[4]            = {0.0f};
+
+                            for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            {
+                                if(j < 3)
+                                {
+                                    expected[j] = sRGBmap(input_values[j]);
+                                }
+                                else // there is no sRGB conversion for alpha component if it exists
+                                {
+                                    expected[j] = NORMALIZE(input_values[j], 255.0f);
+                                }
+
+                                err[j] = fabsf( expected[ j ] - actual[ j ] );
+                            }
+
+                            if ((err[0] > max_err) ||
+                                (err[1] > max_err) ||
+                                (err[2] > max_err) ||
+                                (err[3] > FLT_EPSILON)) // there is no conversion for alpha
+                            {
+                                log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
+                                log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
+                                log_error( "       Expected: %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                                log_error( "       Actual:   %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                                return 1;
+                            }
+                        }
+                        else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                        {
+                            // Compare floats
+                            float *expected = (float *)resultBuffer;
+                            float *actual = (float *)resultPtr;
+                            float err = 0.f;
+                            for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                                err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+
+                            err /= (float)get_format_channel_count( imageInfo->format );
+                            if( err > MAX_ERR )
+                            {
+                                unsigned int *e = (unsigned int *)expected;
+                                unsigned int *a = (unsigned int *)actual;
+                                log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                                log_error( "       Error: %g\n", err );
+                                log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                                log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                                log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                                log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                                totalErrors++;
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                        else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                        {
+                            // Compare half floats
+                            if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                            {
+                                cl_ushort *e = (cl_ushort *)resultBuffer;
+                                cl_ushort *a = (cl_ushort *)resultPtr;
+                                int err_cnt = 0;
+
+                                //Fix up cases where we have NaNs
+                                for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                                {
+                                    if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                        continue;
+                                    if( e[j] != a[j] )
+                                        err_cnt++;
+                                }
+
+                                if( err_cnt )
+                                {
+                                totalErrors++;
+                                log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                                unsigned short *e = (unsigned short *)resultBuffer;
+                                unsigned short *a = (unsigned short *)resultPtr;
+                                log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                                log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                                if( inputType == kFloat )
+                                {
+                                    float *p = (float *)(char *)imagePtr;
+                                    log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                    log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                }
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                            }
+                        }
+                        else
+                        {
+                            // Exact result passes every time
+                            if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                            {
+                                // result is inexact.  Calculate error
+                                int failure = 1;
+                                float errors[4] = {NAN, NAN, NAN, NAN};
+                                pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+
+                                // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                                if( 0 == forceCorrectlyRoundedWrites    &&
+                                   (
+                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT16
+                                    ))
+                                {
+                                    if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                                       ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                        failure = 0;
+                                }
+
+
+                                if( failure )
+                                {
+                                    totalErrors++;
+                                    // Is it our special rounding test?
+                                    if( verifyRounding && i >= 1 && i <= 2 )
+                                    {
+                                        // Try to guess what the rounding mode of the device really is based on what it returned
+                                        const char *deviceRounding = "unknown";
+                                        unsigned int deviceResults[8];
+                                        read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
+                                        read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
+
+                                        if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                           deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                            deviceRounding = "truncate";
+                                        else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                                deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                            deviceRounding = "round to nearest";
+                                        else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                                deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                            deviceRounding = "round to even";
+
+                                        log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                        log_error( "       Actual values rounded by device: %d %d %d %d %d %d %d %d\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
+                                                  deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
+                                        log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                        return 1;
+                                    }
+                                    log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                                    switch(imageInfo->format->image_channel_data_type)
+                                    {
+                                        case CL_UNORM_INT8:
+                                        case CL_SNORM_INT8:
+                                        case CL_UNSIGNED_INT8:
+                                        case CL_SIGNED_INT8:
+                                            log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                            log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                        case CL_UNORM_INT16:
+                                        case CL_SNORM_INT16:
+                                        case CL_UNSIGNED_INT16:
+                                        case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                        case CL_SFIXED14_APPLE:
+#endif
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                            log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                        case CL_HALF_FLOAT:
+                                            log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                            log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                        case CL_UNSIGNED_INT32:
+                                        case CL_SIGNED_INT32:
+                                            log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                            log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                            break;
+                                        case CL_FLOAT:
+                                            log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                            log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                            log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                            break;
+                                    }
+
+                                    float *v = (float *)(char *)imagePtr;
+                                    log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                    log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                    log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+
+                                    if( ( --numTries ) == 0 )
+                                        return 1;
+                                }
+                            }
+                        }
+                        imagePtr += get_explicit_type_size( inputType ) * 4;
+                        resultPtr += get_pixel_size( imageInfo->format );
+                    }
+                }
+            }
+            {
+                nextLevelOffset += width_lod * height_lod * depth_lod * pixelSize;
+                width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
+                height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
+                depth_lod = ( depth_lod >> 1 ) ? ( depth_lod >> 1 ) : 1;
+            }
+        }
+    }
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *KernelSourcePattern = NULL;
+
+    int error;
+
+    // Get our operating parameters
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+
+    image_descriptor imageInfo = { 0x0 };
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        KernelSourcePattern = write3DKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = readwrite3DKernelSourcePattern;
+    }
+
+    // Construct the source
+    sprintf( programSrc,
+             KernelSourcePattern,
+             gTestMipmaps ? "" : khr3DWritesPragma,
+             get_explicit_type_name( inputType ),
+             gTestMipmaps ? ", int lod" : "",
+             gTestMipmaps ? offset3DLodSource : offset3DSource,
+             readFormat,
+             gTestMipmaps ? ", lod" : "" );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                for( imageInfo.depth = 2; imageInfo.depth < 7; imageInfo.depth++ )
+                {
+                    if (gTestMipmaps)
+                        imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
+
+                    if( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
+                    if( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            if (gTestMipmaps)
+                imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
+            log_info("Testing %d x %d x %d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth);
+            int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+        imageInfo.depth = 1;
+
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+        int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = reduceImageSizeRange(maxWidth, d );
+                imageInfo.height = reduceImageSizeRange(maxHeight, d );
+                imageInfo.depth = reduceImageDepth(maxDepth, d );
+
+                if(gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (size_t) random_in_range(2,(compute_max_mip_levels(imageInfo.width, imageInfo.height, imageInfo.depth) - 1), d);
+                    //Need to take into account the input buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
+                    size = 4 * compute_mipmapped_image_size(imageInfo) * get_explicit_type_size( inputType );
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                        imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+
+                        imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+                        extraWidth = (int)random_log_in_range( 0, 64, d );
+                        imageInfo.slicePitch += extraWidth * imageInfo.rowPitch;
+                    }
+
+                    size = (size_t)imageInfo.slicePitch * (size_t)imageInfo.depth * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %ld,%ld,%ld (pitch %ld, slice %ld) out of %ld,%ld,%ld\n", imageInfo.width, imageInfo.height, imageInfo.depth,
+                         imageInfo.rowPitch, imageInfo.slicePitch, maxWidth, maxHeight, maxDepth );
+
+            int retCode = test_write_image_3D( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
new file mode 100644
index 00000000..455098bb
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -0,0 +1,887 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+#if !defined(_WIN32)
+#include <sys/mman.h>
+#endif
+
+#define MAX_ERR 0.005f
+
+extern cl_command_queue queue;
+extern cl_context context;
+extern bool            gDebugTrace, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding, gTestImage2DFromBuffer, gTestMipmaps;
+extern cl_filter_mode    gFilterModeToSkip;
+extern cl_mem_flags gMemFlagsToUse;
+extern int gtestTypesToRun;
+
+extern int test_write_image_1D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_3D_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+extern int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d );
+
+
+const char *writeKernelSourcePattern =
+"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
+"}";
+
+const char *read_writeKernelSourcePattern =
+"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"%s"
+"   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
+"}";
+
+const char *offset2DKernelSource =
+"   int offset = tidY*get_image_width(output) + tidX;\n";
+
+const char *offset2DLodKernelSource =
+"   int width_lod = ( get_image_width(output) >> lod ) ? ( get_image_width(output) >> lod ) : 1;\n"
+"   int offset = tidY * width_lod + tidX;\n";
+
+int test_write_image( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                     image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
+{
+    int                 totalErrors = 0;
+    size_t              num_flags   = 0;
+    const cl_mem_flags  *mem_flag_types = NULL;
+    const char *        *mem_flag_names = NULL;
+    const cl_mem_flags  write_only_mem_flag_types[2] = {  CL_MEM_WRITE_ONLY,   CL_MEM_READ_WRITE };
+    const char *        write_only_mem_flag_names[2] = { "CL_MEM_WRITE_ONLY", "CL_MEM_READ_WRITE" };
+    const cl_mem_flags  read_write_mem_flag_types[1] = {  CL_MEM_READ_WRITE};
+    const char *        read_write_mem_flag_names[1] = { "CL_MEM_READ_WRITE"};
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        mem_flag_types = write_only_mem_flag_types;
+        mem_flag_names = write_only_mem_flag_names;
+        num_flags      = sizeof( write_only_mem_flag_types ) / sizeof( write_only_mem_flag_types[0] );
+    }
+    else
+    {
+        mem_flag_types = read_write_mem_flag_types;
+        mem_flag_names = read_write_mem_flag_names;
+        num_flags      = sizeof( read_write_mem_flag_types ) / sizeof( read_write_mem_flag_types[0] );
+    }
+
+    size_t  pixelSize       = get_pixel_size( imageInfo->format );
+    int     channel_scale   = (imageInfo->format->image_channel_order == CL_DEPTH) ? 1 : 4;
+
+    for( size_t mem_flag_index = 0; mem_flag_index < num_flags; mem_flag_index++ )
+    {
+        int error;
+        size_t threads[2];
+        bool verifyRounding = false;
+        int totalErrors = 0;
+        int forceCorrectlyRoundedWrites = 0;
+
+#if defined( __APPLE__ )
+        // Require Apple's CPU implementation to be correctly rounded, not just within 0.6
+        cl_device_type type = 0;
+        if( (error = clGetDeviceInfo( device, CL_DEVICE_TYPE, sizeof( type), &type, NULL )))
+        {
+            log_error("Error: Could not get device type for Apple device! (%d) \n", error );
+            return 1;
+        }
+        if( type == CL_DEVICE_TYPE_CPU )
+            forceCorrectlyRoundedWrites = 1;
+#endif
+
+        if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+            if( DetectFloatToHalfRoundingMode(queue) )
+                return 1;
+
+        BufferOwningPtr<char> maxImageUseHostPtrBackingStore, imageValues, imageBufferValues;
+
+        create_random_image_data( inputType, imageInfo, imageValues, d, gTestImage2DFromBuffer );
+
+        if(!gTestMipmaps)
+        {
+            if( inputType == kFloat && imageInfo->format->image_channel_data_type != CL_FLOAT && imageInfo->format->image_channel_data_type != CL_HALF_FLOAT )
+            {
+                /* Pilot data for sRGB images */
+                if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                {
+                    // We want to generate ints (mostly) in range of the target format which should be [0,255]
+                    // However the range chosen here is [-test_range_ext, 255 + test_range_ext] so that
+                    // it can test some out-of-range data points
+                    const unsigned int test_range_ext = 16;
+                    int formatMin = 0 - test_range_ext;
+                    int formatMax = 255 + test_range_ext;
+                    int pixel_value = 0;
+
+                    // First, fill with arbitrary floats
+                    for( size_t y = 0; y < imageInfo->height; y++ )
+                    {
+                        float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * 4;
+                        for( size_t i = 0; i < imageInfo->width * 4; i++ )
+                        {
+                            pixel_value = random_in_range( formatMin, (int)formatMax, d );
+                            inputValues[ i ] = (float)(pixel_value/255.0f);
+                        }
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+
+                    // Piloting some debug inputs.
+                    inputValues[ i++ ] = -0.5f;
+                    inputValues[ i++ ] = 0.5f;
+                    inputValues[ i++ ] = 2.0f;
+                    inputValues[ i++ ] = 0.5f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                    }
+                }
+                else
+                {
+                    // First, fill with arbitrary floats
+                    for( size_t y = 0; y < imageInfo->height; y++ )
+                    {
+                        float *inputValues = (float *)(char*)imageValues + imageInfo->width * y * channel_scale;
+                        for( size_t i = 0; i < imageInfo->width * channel_scale; i++ )
+                            inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
+                    }
+
+                    // Throw a few extra test values in there
+                    float *inputValues = (float *)(char*)imageValues;
+                    size_t i = 0;
+                    inputValues[ i++ ] = -0.0000000000009f;
+                    inputValues[ i++ ] = 1.f;
+                    inputValues[ i++ ] = -1.f;
+                    inputValues[ i++ ] = 2.f;
+
+                    // Also fill in the first few vectors with some deliberate tests to determine the rounding mode
+                    // is correct
+                    if( imageInfo->width > 12 )
+                    {
+                        float formatMax = (float)get_format_max_int( imageInfo->format );
+                        inputValues[ i++ ] = 4.0f / formatMax;
+                        inputValues[ i++ ] = 4.3f / formatMax;
+                        inputValues[ i++ ] = 4.5f / formatMax;
+                        inputValues[ i++ ] = 4.7f / formatMax;
+                        inputValues[ i++ ] = 5.0f / formatMax;
+                        inputValues[ i++ ] = 5.3f / formatMax;
+                        inputValues[ i++ ] = 5.5f / formatMax;
+                        inputValues[ i++ ] = 5.7f / formatMax;
+                        verifyRounding = true;
+                    }
+                }
+            }
+            else if( inputType == kUInt )
+            {
+                unsigned int *inputValues = (unsigned int*)(char*)imageValues;
+                size_t i = 0;
+                inputValues[ i++ ] = 0;
+                inputValues[ i++ ] = 65535;
+                inputValues[ i++ ] = 7271820;
+                inputValues[ i++ ] = 0;
+            }
+        }
+
+        // Construct testing sources
+        clProtectedImage protImage;
+        clMemWrapper unprotImage;
+        cl_mem image;
+        cl_mem imageBuffer;
+
+        if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
+        {
+            if (gTestImage2DFromBuffer)
+            {
+                imageBuffer = clCreateBuffer( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR,
+                                             imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
+                test_error( error, "Unable to create buffer" );
+                unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
+                                                     imageInfo->width, imageInfo->height, imageInfo->rowPitch,
+                                                     imageBuffer, &error );
+
+            }
+            else
+            {
+                // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
+                // Do not use protected images for max image size test since it rounds the row size to a page size
+                if (gTestMaxImages) {
+                    create_random_image_data( inputType, imageInfo, maxImageUseHostPtrBackingStore, d );
+
+                    unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | CL_MEM_USE_HOST_PTR, imageInfo->format,
+                                              imageInfo->width, imageInfo->height, 0,
+                                              maxImageUseHostPtrBackingStore, &error );
+                } else {
+                    error = protImage.Create( context, mem_flag_types[mem_flag_index], imageInfo->format, imageInfo->width, imageInfo->height );
+                }
+            }
+            if( error != CL_SUCCESS )
+            {
+                if (gTestImage2DFromBuffer) {
+                    clReleaseMemObject(imageBuffer);
+                    if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
+                        log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
+                        return 0;
+                    }
+                }
+
+                log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+
+            if (gTestMaxImages || gTestImage2DFromBuffer)
+                image = (cl_mem)unprotImage;
+            else
+                image = (cl_mem)protImage;
+        }
+        else // Either CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR or none
+        {
+            if( gTestMipmaps )
+            {
+                cl_image_desc image_desc = {0};
+                image_desc.image_type = imageInfo->type;
+                image_desc.num_mip_levels = imageInfo->num_mip_levels;
+                image_desc.image_width = imageInfo->width;
+                image_desc.image_height = imageInfo->height;
+
+                unprotImage = clCreateImage( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ),
+                                             imageInfo->format, &image_desc, NULL, &error);
+                if( error != CL_SUCCESS )
+                {
+                    log_error( "ERROR: Unable to create %d level 2D image of size %ld x %ld (%s, %s)\n", imageInfo->num_mip_levels, imageInfo->width, imageInfo->height,
+                               IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                    return error;
+                }
+            }
+            else if (gTestImage2DFromBuffer)
+            {
+                generate_random_image_data( imageInfo, imageBufferValues, d );
+                imageBuffer = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR,
+                                             imageInfo->rowPitch * imageInfo->height, imageBufferValues, &error);
+                test_error( error, "Unable to create buffer" );
+                unprotImage = create_image_2d_buffer( context, mem_flag_types[mem_flag_index], imageInfo->format,
+                                                     imageInfo->width, imageInfo->height, imageInfo->rowPitch,
+                                                     imageBuffer, &error );
+
+            }
+            else
+            {
+                // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
+                // it works just as if no flag is specified, so we just do the same thing either way
+                // Note: if the flags is really CL_MEM_COPY_HOST_PTR, we want to remove it, because we don't want to copy any incoming data
+                unprotImage = create_image_2d( context, mem_flag_types[mem_flag_index] | ( gMemFlagsToUse & ~(CL_MEM_COPY_HOST_PTR) ), imageInfo->format,
+                                          imageInfo->width, imageInfo->height, 0,
+                                          imageValues, &error );
+            }
+            if( error != CL_SUCCESS )
+            {
+                if (gTestImage2DFromBuffer) {
+                    clReleaseMemObject(imageBuffer);
+                    if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
+                        log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
+                        return 0;
+                    }
+                }
+
+                log_error( "ERROR: Unable to create 2D image of size %ld x %ld pitch %ld (%s, %s)\n", imageInfo->width, imageInfo->height,
+                          imageInfo->rowPitch, IGetErrorString( error ), mem_flag_names[mem_flag_index] );
+                return error;
+            }
+            image = unprotImage;
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &image );
+        test_error( error, "Unable to set kernel arguments" );
+
+        size_t width_lod = imageInfo->width, height_lod = imageInfo->height, nextLevelOffset = 0;
+        size_t origin[ 3 ] = { 0, 0, 0 };
+        size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
+        size_t resultSize;
+
+        int num_lod_loops = (gTestMipmaps)? imageInfo->num_mip_levels : 1;
+        for( int lod = 0; lod < num_lod_loops; lod++)
+        {
+            if(gTestMipmaps)
+            {
+                error = clSetKernelArg( kernel, 2, sizeof( int ), &lod );
+            }
+            // Run the kernel
+            threads[0] = (size_t)width_lod;
+            threads[1] = (size_t)height_lod;
+
+            clMemWrapper inputStream;
+
+            char *imagePtrOffset = imageValues + nextLevelOffset;
+
+            inputStream = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ),
+                                     get_explicit_type_size( inputType ) * channel_scale * width_lod * height_lod, imagePtrOffset, &error );
+            test_error( error, "Unable to create input buffer" );
+
+            // Set arguments
+            error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &inputStream );
+            test_error( error, "Unable to set kernel arguments" );
+
+            error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( error, "Unable to run kernel" );
+
+            // Get results
+            if( gTestMipmaps )
+                resultSize = width_lod * height_lod * get_pixel_size(imageInfo->format);
+            else
+                resultSize = imageInfo->rowPitch * imageInfo->height;
+            clProtectedArray PA(resultSize);
+            char *resultValues = (char *)((void *)PA);
+
+            if( gDebugTrace )
+                log_info( "    reading results, %ld kbytes\n", (unsigned long)( resultSize / 1024 ) );
+
+            origin[2] = lod;
+            region[0] = width_lod;
+            region[1] = height_lod;
+            error = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, gEnablePitch ? imageInfo->rowPitch : 0, 0, resultValues, 0, NULL, NULL );
+            test_error( error, "Unable to read results from kernel" );
+            if( gDebugTrace )
+                log_info( "    results read\n" );
+
+            // Validate results element by element
+            char *imagePtr = (char*)imageValues + nextLevelOffset;
+            int numTries = 5;
+            for( size_t y = 0, i = 0; y < height_lod; y++ )
+            {
+                char *resultPtr;
+                if( gTestMipmaps )
+                    resultPtr = (char *)resultValues + y * width_lod * pixelSize;
+                else
+                    resultPtr = (char*)resultValues + y * imageInfo->rowPitch;
+                for( size_t x = 0; x < width_lod; x++, i++ )
+                {
+                    char resultBuffer[ 16 ]; // Largest format would be 4 channels * 4 bytes (32 bits) each
+
+                    // Convert this pixel
+                    if( inputType == kFloat )
+                        pack_image_pixel( (float *)imagePtr, imageInfo->format, resultBuffer );
+                    else if( inputType == kInt )
+                        pack_image_pixel( (int *)imagePtr, imageInfo->format, resultBuffer );
+                    else // if( inputType == kUInt )
+                        pack_image_pixel( (unsigned int *)imagePtr, imageInfo->format, resultBuffer );
+
+                    // Compare against the results
+                    if(is_sRGBA_order(imageInfo->format->image_channel_order))
+                    {
+                        // Compare sRGB-mapped values
+                        cl_float expected[4]    = {0};
+                        cl_float* input_values  = (float*)imagePtr;
+                        cl_uchar *actual        = (cl_uchar*)resultPtr;
+                        float max_err           = MAX_lRGB_TO_sRGB_CONVERSION_ERROR;
+                        float err[4]            = {0.0f};
+
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                        {
+                            if(j < 3)
+                            {
+                                expected[j] = sRGBmap(input_values[j]);
+                            }
+                            else // there is no sRGB conversion for alpha component if it exists
+                            {
+                                expected[j] = NORMALIZE(input_values[j], 255.0f);
+                            }
+
+                            err[j] = fabsf( expected[ j ] - actual[ j ] );
+                        }
+
+                        if ((err[0] > max_err) ||
+                            (err[1] > max_err) ||
+                            (err[2] > max_err) ||
+                            (err[3] > 0)) // there is no conversion for alpha so the error should be zero
+                        {
+                            log_error( "       Error:     %g %g %g %g\n", err[0], err[1], err[2], err[3]);
+                            log_error( "       Input:     %g %g %g %g\n", *((float *)imagePtr), *((float *)imagePtr + 1), *((float *)imagePtr + 2), *((float *)imagePtr + 3));
+                            log_error( "       Expected:  %g %g %g %g\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Actual:    %d %d %d %d\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_FLOAT )
+                    {
+                        // Compare floats
+                        float *expected = (float *)resultBuffer;
+                        float *actual = (float *)resultPtr;
+                        float err = 0.f;
+                        for( unsigned int j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            err += ( expected[ j ] != 0 ) ? fabsf( ( expected[ j ] - actual[ j ] ) / expected[ j ] ) : fabsf( expected[ j ] - actual[ j ] );
+
+                        err /= (float)get_format_channel_count( imageInfo->format );
+                        if( err > MAX_ERR )
+                        {
+                            unsigned int *e = (unsigned int *)expected;
+                            unsigned int *a = (unsigned int *)actual;
+                            log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                            log_error( "       Error: %g\n", err );
+                            log_error( "       Expected: %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
+                            log_error( "       Expected: %08x %08x %08x %08x\n", e[ 0 ], e[ 1 ], e[ 2 ], e[ 3 ] );
+                            log_error( "       Actual:   %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
+                            log_error( "       Actual:   %08x %08x %08x %08x\n", a[ 0 ], a[ 1 ], a[ 2 ], a[ 3 ] );
+                            totalErrors++;
+                            if( ( --numTries ) == 0 )
+                                return 1;
+                        }
+                    }
+                    else if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
+                    {
+
+                        // Compare half floats
+                        if( memcmp( resultBuffer, resultPtr, 2 * get_format_channel_count( imageInfo->format ) ) != 0 )
+                        {
+                            cl_ushort *e = (cl_ushort *)resultBuffer;
+                            cl_ushort *a = (cl_ushort *)resultPtr;
+                            int err_cnt = 0;
+
+                            //Fix up cases where we have NaNs
+                            for( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
+                            {
+                                if( is_half_nan( e[j] ) && is_half_nan(a[j]) )
+                                    continue;
+                                if( e[j] != a[j] )
+                                    err_cnt++;
+                            }
+
+                            if( err_cnt )
+                            {
+                                totalErrors++;
+                                log_error( "ERROR: Sample %ld (%ld,%ld) did not validate! (%s)\n", i, x, y, mem_flag_names[mem_flag_index] );
+                                log_error( "    Expected: 0x%04x 0x%04x 0x%04x 0x%04x\n", e[0], e[1], e[2], e[3] );
+                                log_error( "    Actual:   0x%04x 0x%04x 0x%04x 0x%04x\n", a[0], a[1], a[2], a[3] );
+                                if( inputType == kFloat )
+                                {
+                                    float *p = (float *)(char *)imagePtr;
+                                    log_error( "    Source: %a %a %a %a\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                    log_error( "          : %12.24f %12.24f %12.24f %12.24f\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
+                                }
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        // Exact result passes every time
+                        if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
+                        {
+                            // result is inexact.  Calculate error
+                            int failure = 1;
+                            float errors[4] = {NAN, NAN, NAN, NAN};
+                            pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
+
+                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
+                            if( 0 == forceCorrectlyRoundedWrites    &&
+                               (
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
+                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
+                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16
+                                ))
+                            {
+                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
+                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
+                                    failure = 0;
+                            }
+
+
+                            if( failure )
+                            {
+                                totalErrors++;
+                                // Is it our special rounding test?
+                                if( verifyRounding && i >= 1 && i <= 2 )
+                                {
+                                    // Try to guess what the rounding mode of the device really is based on what it returned
+                                    const char *deviceRounding = "unknown";
+                                    unsigned int deviceResults[8];
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 0, 0, 0, deviceResults, lod );
+                                    read_image_pixel<unsigned int>( resultPtr, imageInfo, 1, 0, 0, &deviceResults[ 4 ], lod );
+
+                                    if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 4 &&
+                                       deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 5 && deviceResults[ 7 ] == 5 )
+                                        deviceRounding = "truncate";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 5 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to nearest";
+                                    else if( deviceResults[ 0 ] == 4 && deviceResults[ 1 ] == 4 && deviceResults[ 2 ] == 4 && deviceResults[ 3 ] == 5 &&
+                                            deviceResults[ 4 ] == 5 && deviceResults[ 5 ] == 5 && deviceResults[ 6 ] == 6 && deviceResults[ 7 ] == 6 )
+                                        deviceRounding = "round to even";
+
+                                    log_error( "ERROR: Rounding mode sample (%ld) did not validate, probably due to the device's rounding mode being wrong (%s)\n", i, mem_flag_names[mem_flag_index] );
+                                    log_error( "       Actual values rounded by device: %x %x %x %x %x %x %x %x\n", deviceResults[ 0 ], deviceResults[ 1 ], deviceResults[ 2 ], deviceResults[ 3 ],
+                                              deviceResults[ 4 ], deviceResults[ 5 ], deviceResults[ 6 ], deviceResults[ 7 ] );
+                                    log_error( "       Rounding mode of device appears to be %s\n", deviceRounding );
+                                    return 1;
+                                }
+                                log_error( "ERROR: Sample %d (%d,%d) did not validate!\n", (int)i, (int)x, (int)y );
+                                switch(imageInfo->format->image_channel_data_type)
+                                {
+                                    case CL_UNORM_INT8:
+                                    case CL_SNORM_INT8:
+                                    case CL_UNSIGNED_INT8:
+                                    case CL_SIGNED_INT8:
+                                    case CL_UNORM_INT_101010:
+                                        log_error( "    Expected: 0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultBuffer)[0], ((cl_uchar*)resultBuffer)[1], ((cl_uchar*)resultBuffer)[2], ((cl_uchar*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNORM_INT16:
+                                    case CL_SNORM_INT16:
+                                    case CL_UNSIGNED_INT16:
+                                    case CL_SIGNED_INT16:
+#ifdef CL_SFIXED14_APPLE
+                                    case CL_SFIXED14_APPLE:
+#endif
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_HALF_FLOAT:
+                                        log_error( "    Expected: 0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultBuffer)[0], ((cl_ushort*)resultBuffer)[1], ((cl_ushort*)resultBuffer)[2], ((cl_ushort*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%4.4x 0x%4.4x 0x%4.4x 0x%4.4x\n", ((cl_ushort*)resultPtr)[0], ((cl_ushort*)resultPtr)[1], ((cl_ushort*)resultPtr)[2], ((cl_ushort*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                    case CL_UNSIGNED_INT32:
+                                    case CL_SIGNED_INT32:
+                                        log_error( "    Expected: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultBuffer)[0], ((cl_uint*)resultBuffer)[1], ((cl_uint*)resultBuffer)[2], ((cl_uint*)resultBuffer)[3] );
+                                        log_error( "    Actual:   0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", ((cl_uint*)resultPtr)[0], ((cl_uint*)resultPtr)[1], ((cl_uint*)resultPtr)[2], ((cl_uint*)resultPtr)[3] );
+                                        break;
+                                    case CL_FLOAT:
+                                        log_error( "    Expected: %a %a %a %a\n", ((cl_float*)resultBuffer)[0], ((cl_float*)resultBuffer)[1], ((cl_float*)resultBuffer)[2], ((cl_float*)resultBuffer)[3] );
+                                        log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
+                                        log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
+                                        break;
+                                }
+
+                                float *v = (float *)(char *)imagePtr;
+                                log_error( "   src: %g %g %g %g\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "      : %a %a %a %a\n", v[ 0 ], v[ 1], v[ 2 ], v[ 3 ] );
+                                log_error( "   src: %12.24f %12.24f %12.24f %12.24f\n", v[0 ], v[  1], v[ 2 ], v[ 3 ] );
+
+                                if( ( --numTries ) == 0 )
+                                    return 1;
+                            }
+                        }
+                    }
+                    imagePtr += get_explicit_type_size( inputType ) * channel_scale;
+                    resultPtr += get_pixel_size( imageInfo->format );
+                }
+            }
+            {
+                nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format);
+                width_lod = (width_lod >> 1) ?(width_lod >> 1) : 1;
+                height_lod = (height_lod >> 1) ?(height_lod >> 1) : 1;
+            }
+        }
+
+        if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
+    }
+
+
+    // All done!
+    return totalErrors;
+}
+
+
+int test_write_image_set( cl_device_id device, cl_image_format *format, ExplicitType inputType, MTdata d )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    const char *KernelSourcePattern = NULL;
+    int error;
+
+    if (gTestImage2DFromBuffer)
+    {
+      if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
+      {
+        switch (format->image_channel_data_type)
+        {
+          case CL_UNORM_INT8:
+          case CL_UNORM_INT16:
+          case CL_SNORM_INT8:
+          case CL_SNORM_INT16:
+          case CL_HALF_FLOAT:
+          case CL_FLOAT:
+          case CL_SIGNED_INT8:
+          case CL_SIGNED_INT16:
+          case CL_SIGNED_INT32:
+          case CL_UNSIGNED_INT8:
+          case CL_UNSIGNED_INT16:
+          case CL_UNSIGNED_INT32:
+            log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
+                     GetChannelTypeName( format->image_channel_data_type ));
+            return 0;
+          default:
+            break;
+        }
+      }
+    }
+
+    // Get our operating parameters
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+
+    image_descriptor imageInfo = { 0x0 };
+
+    imageInfo.format = format;
+    imageInfo.slicePitch = imageInfo.arraySize = imageInfo.depth = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if( inputType == kInt )
+        readFormat = "i";
+    else if( inputType == kUInt )
+        readFormat = "ui";
+    else // kFloat
+        readFormat = "f";
+
+    if(gtestTypesToRun & kWriteTests)
+    {
+        KernelSourcePattern = writeKernelSourcePattern;
+    }
+    else
+    {
+        KernelSourcePattern = read_writeKernelSourcePattern;
+    }
+
+    // Construct the source
+    sprintf( programSrc,
+             KernelSourcePattern,
+             get_explicit_type_name( inputType ),
+             (format->image_channel_order == CL_DEPTH) ? "" : "4",
+             (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
+             gTestMipmaps ? ", int lod" : "",
+             gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
+             readFormat,
+             gTestMipmaps ? ", lod" : "" );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    // Run tests
+    if( gTestSmallImages )
+    {
+        for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if( gTestMipmaps )
+                    imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
+
+                if( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+                int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
+                if( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
+
+        for( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+            if( gTestMipmaps )
+                imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, d);
+            log_info("Testing %d x %d\n", (int)imageInfo.width, (int)imageInfo.height);
+            int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+    else if( gTestRounding )
+    {
+        size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
+        imageInfo.height = typeRange / 256;
+        imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
+
+        imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+        int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
+        if( retCode )
+            return retCode;
+    }
+    else
+    {
+
+        cl_uint imagePitchAlign = 0;
+        if (gTestImage2DFromBuffer)
+        {
+#if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
+            error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
+            if (!imagePitchAlign)
+              imagePitchAlign = 1;
+#endif
+            test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
+        }
+
+        for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
+
+                if(gTestMipmaps)
+                {
+                    imageInfo.num_mip_levels = (size_t) random_in_range(1, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1, d);
+                    size = 4 * compute_mipmapped_image_size(imageInfo);
+                }
+                else
+                {
+                    imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+                    if( gEnablePitch )
+                    {
+                        size_t extraWidth = (int)random_log_in_range( 0, 64, d );
+                        imageInfo.rowPitch += extraWidth * get_pixel_size( imageInfo.format );
+                    }
+
+                    // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
+                    if (gTestImage2DFromBuffer)
+                    {
+                        size_t pitch = imagePitchAlign * get_pixel_size( imageInfo.format );
+                        imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
+                    }
+
+                    size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+                }
+            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if( gDebugTrace )
+                log_info( "   at size %d,%d (pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+
+            int retCode = test_write_image( device, context, queue, kernel, &imageInfo, inputType, d );
+            if( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
+
+int test_write_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                             image_sampler_data *imageSampler, ExplicitType inputType, cl_mem_object_type imageType )
+{
+    if( imageSampler->filter_mode == CL_FILTER_LINEAR )
+        // No need to run for linear filters
+        return 0;
+
+    int ret = 0;
+
+    log_info( "write_image (%s input) *****************************\n", get_explicit_type_name( inputType ) );
+
+
+    RandomSeed seed( gRandomSeed );
+
+    for( unsigned int i = 0; i < numFormats; i++ )
+    {
+        cl_image_format &imageFormat = formatList[ i ];
+
+        if( filterFlags[ i ] )
+            continue;
+
+        if (is_sRGBA_order(imageFormat.image_channel_order))
+        {
+            if( !is_extension_available( device, "cl_khr_srgb_image_writes" ))
+            {
+                log_missing_feature( "-----------------------------------------------------\n" );
+                log_missing_feature( "WARNING!!! sRGB formats are shown in the supported write-format list.\n");
+                log_missing_feature( "However the extension cl_khr_srgb_image_writes is not available.\n");
+                log_missing_feature( "Please make sure the extension is officially supported by the device .\n");
+                log_missing_feature( "-----------------------------------------------------\n\n" );
+                continue;
+            }
+        }
+
+        gTestCount++;
+
+        print_write_header( &imageFormat, false );
+        int retCode;
+        switch (imageType)
+        {
+            case CL_MEM_OBJECT_IMAGE1D:
+                retCode = test_write_image_1D_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D:
+                retCode = test_write_image_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE3D:
+                retCode = test_write_image_3D_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+                retCode = test_write_image_1D_array_set( device, &imageFormat, inputType, seed );
+                break;
+            case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+                retCode = test_write_image_2D_array_set( device, &imageFormat, inputType, seed );
+                break;
+        }
+
+        if( retCode != 0 )
+        {
+            gTestFailure++;
+            log_error( "FAILED: " );
+            print_write_header( &imageFormat, true );
+            log_info( "\n" );
+        }
+        ret += retCode;
+    }
+    return ret;
+}
+
+
diff --git a/test_conformance/images/samplerlessReads/CMakeLists.txt b/test_conformance/images/samplerlessReads/CMakeLists.txt
new file mode 100644
index 00000000..0f98f4ef
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/CMakeLists.txt
@@ -0,0 +1,24 @@
+set(MODULE_NAME SAMPLERLESS_READS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_iterations.cpp
+    test_loops.cpp
+    test_read_1D.cpp
+    test_read_3D.cpp
+    test_read_1D_buffer.cpp
+    test_read_1D_array.cpp
+    test_read_2D_array.cpp
+    ../../../test_common/harness/errorHelpers.c
+    ../../../test_common/harness/threadTesting.c
+    ../../../test_common/harness/kernelHelpers.c
+    ../../../test_common/harness/imageHelpers.cpp
+    ../../../test_common/harness/mt19937.c
+    ../../../test_common/harness/conversions.c
+    ../../../test_common/harness/testHarness.c
+    ../../../test_common/harness/typeWrappers.cpp
+    ../../../test_common/harness/msvc9.c
+    ../../../test_common/harness/parseParameters.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/images/samplerlessReads/Jamfile b/test_conformance/images/samplerlessReads/Jamfile
new file mode 100644
index 00000000..cc495b35
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+#      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_samplerless_reads
+    : main.cpp
+      test_iterations.cpp
+      test_loops.cpp
+      test_read_3D.cpp
+    ;
+
+install dist
+    : test_samplerless_reads
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/images/samplerlessReads
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/images/samplerlessReads
+    ;
+
diff --git a/test_conformance/images/samplerlessReads/Makefile b/test_conformance/images/samplerlessReads/Makefile
new file mode 100644
index 00000000..c257a11a
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/Makefile
@@ -0,0 +1,52 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		test_iterations.cpp \
+		test_loops.cpp \
+		test_read_1D.cpp \
+		test_read_1D_buffer.cpp \
+		test_read_1D_array.cpp \
+		test_read_2D_array.cpp \
+		test_read_3D.cpp \
+		../../../test_common/harness/errorHelpers.c \
+		../../../test_common/harness/threadTesting.c \
+		../../../test_common/harness/kernelHelpers.c \
+		../../../test_common/harness/imageHelpers.cpp \
+		../../../test_common/harness/conversions.c \
+		../../../test_common/harness/testHarness.c \
+		../../../test_common/harness/mt19937.c \
+		../../../test_common/harness/typeWrappers.cpp
+
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK =
+HEADERS = 
+TARGET = test_samplerless_reads
+INCLUDE = -I../../test_common/harness
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CXX = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/images/samplerlessReads/main.cpp b/test_conformance/images/samplerlessReads/main.cpp
new file mode 100644
index 00000000..cb95dc92
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/main.cpp
@@ -0,0 +1,303 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#include <sys/time.h>
+#endif
+
+#include "../testBase.h"
+#include "../../../test_common/harness/fpcontrol.h"
+#include "../../../test_common/harness/parseParameters.h"
+
+#if defined(__PPC__)
+// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// be used because not all Power implementations retain or observed the NI (non-IEEE
+// mode) bit.
+__thread fpu_control_t fpu_control = 0;
+#endif
+
+bool                gTestReadWrite = false;
+bool                gDebugTrace = false;
+bool                gTestMaxImages = false, gTestSmallImages = false, gTestRounding = false;
+int                 gTypesToTest = 0;
+cl_channel_type     gChannelTypeToUse = (cl_channel_type)-1;
+cl_channel_order    gChannelOrderToUse = (cl_channel_order)-1;
+bool                gEnablePitch = false;
+cl_device_type      gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+cl_command_queue    queue;
+cl_context          context;
+
+#define MAX_ALLOWED_STD_DEVIATION_IN_MB        8.0
+
+void printUsage( const char *execName )
+{
+    const char *p = strrchr( execName, '/' );
+    if ( p != NULL )
+        execName = p + 1;
+
+    log_info( "Usage: %s [options]\n", execName );
+    log_info( "Where:\n" );
+    log_info( "\n" );
+    log_info( "\tThe following flags specify the types to test. They can be combined; if none are specified, all are tested:\n" );
+    log_info( "\t\tint - Test integer I/O (read_imagei)\n" );
+    log_info( "\t\tuint - Test unsigned integer I/O (read_imageui)\n" );
+    log_info( "\t\tfloat - Test float I/O (read_imagef)\n" );
+    log_info( "\n" );
+    log_info( "You may also use appropriate CL_ channel type and ordering constants.\n" );
+    log_info( "\n" );
+    log_info( "\t1D - Only test 1D images\n" );
+    log_info( "\t2D - Only test 2D images\n" );
+    log_info( "\t3D - Only test 3D images\n" );
+    log_info( "\t1Darray - Only test 1D image arrays\n" );
+    log_info( "\t2Darray - Only test 2D image arrays\n" );
+    log_info( "\n" );
+    log_info( "\tThe following modify the types of images tested:\n" );
+    log_info( "\t\read_write - Runs the tests with read_write images which allow a kernel do both read and write to the same image \n" );
+    log_info( "\t\tsmall_images - Runs every format through a loop of widths 1-13 and heights 1-9, instead of random sizes\n" );
+    log_info( "\t\tmax_images - Runs every format through a set of size combinations with the max values, max values - 1, and max values / 128\n" );
+    log_info( "\n" );
+    log_info( "\tdebug_trace - Enables additional debug info logging\n" );
+    log_info( "\tuse_pitches - Enables row and slice pitches\n" );
+}
+
+
+extern int test_image_set( cl_device_id device, cl_mem_object_type imageType );
+
+int main(int argc, const char *argv[])
+{
+    cl_platform_id  platform;
+    cl_device_id    device;
+    cl_channel_type chanType;
+    cl_channel_order chanOrder;
+    char            str[ 128 ];
+    int             testMethods = 0;
+    bool            randomize = false;
+
+    test_start();
+    
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    //Check CL_DEVICE_TYPE environment variable
+    checkDeviceTypeOverride( &gDeviceType );
+
+    // Parse arguments
+    for ( int i = 1; i < argc; i++ )
+    {
+        strncpy( str, argv[ i ], sizeof( str ) - 1 );
+
+        if ( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_CPU;
+        else if ( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_GPU;
+        else if ( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if ( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+        else if ( strcmp( str, "debug_trace" ) == 0 )
+            gDebugTrace = true;
+        else if ( strcmp( str, "read_write" ) == 0 )
+            gTestReadWrite = true;
+        else if ( strcmp( str, "small_images" ) == 0 )
+            gTestSmallImages = true;
+        else if ( strcmp( str, "max_images" ) == 0 )
+            gTestMaxImages = true;
+        else if ( strcmp( str, "use_pitches" ) == 0 )
+            gEnablePitch = true;
+
+        else if ( strcmp( str, "int" ) == 0 )
+            gTypesToTest |= kTestInt;
+        else if ( strcmp( str, "uint" ) == 0 )
+            gTypesToTest |= kTestUInt;
+        else if ( strcmp( str, "float" ) == 0 )
+            gTypesToTest |= kTestFloat;
+
+        else if ( strcmp( str, "randomize" ) == 0 )
+            randomize = true;
+
+        else if ( strcmp( str, "1D" ) == 0 )
+            testMethods |= k1D;
+        else if( strcmp( str, "2D" ) == 0 )
+            testMethods |= k2D;
+        else if( strcmp( str, "3D" ) == 0 )
+            testMethods |= k3D;
+        else if( strcmp( str, "1Darray" ) == 0 )
+            testMethods |= k1DArray;
+        else if( strcmp( str, "2Darray" ) == 0 )
+            testMethods |= k2DArray;
+
+        else if ( strcmp( str, "help" ) == 0 || strcmp( str, "?" ) == 0 )
+        {
+            printUsage( argv[ 0 ] );
+            return -1;
+        }
+
+        else if ( ( chanType = get_channel_type_from_name( str ) ) != (cl_channel_type)-1 )
+            gChannelTypeToUse = chanType;
+
+        else if ( ( chanOrder = get_channel_order_from_name( str ) ) != (cl_channel_order)-1 )
+            gChannelOrderToUse = chanOrder;
+        else
+        {
+            log_error( "ERROR: Unknown argument %d: %s.  Exiting....\n", i, str );
+            return -1;
+        }
+    }
+
+    if (testMethods == 0)
+        testMethods = k1D | k2D | k3D | k1DArray | k2DArray;
+    if ( gTypesToTest == 0 )
+        gTypesToTest = kTestAllTypes;
+
+    // Seed the random # generators
+    if ( randomize )
+    {
+        gRandomSeed = (cl_uint) time( NULL );
+        gReSeed = 1;
+        log_info( "Random seed: %u.\n", gRandomSeed );
+    }
+
+    int error;
+    // Get our platform
+    error = clGetPlatformIDs(1, &platform, NULL);
+    if ( error )
+    {
+        print_error( error, "Unable to get platform" );
+        test_finish();
+        return -1;
+    }
+
+    // Get our device
+    error = clGetDeviceIDs(platform,  gDeviceType, 1, &device, NULL );
+    if ( error )
+    {
+        print_error( error, "Unable to get specified device" );
+        test_finish();
+        return -1;
+    }
+
+    // Get the device type so we know if it is a GPU even if default is passed in.
+    error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(gDeviceType), &gDeviceType, NULL);
+    if ( error )
+    {
+        print_error( error, "Unable to get device type" );
+        test_finish();
+        return -1;
+    }
+
+
+    if ( printDeviceHeader( device ) != CL_SUCCESS )
+    {
+        test_finish();
+        return -1;
+    }
+
+    // Check for image support
+    if (checkForImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED) {
+        log_info("Device does not support images. Skipping test.\n");
+        test_finish();
+        return 0;
+    }
+
+    // Create a context to test with
+    context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if ( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing context" );
+        test_finish();
+        return -1;
+    }
+
+    // Create a queue against the context
+    queue = clCreateCommandQueueWithProperties( context, device, 0, &error );
+    if ( error != CL_SUCCESS )
+    {
+        print_error( error, "Unable to create testing command queue" );
+        test_finish();
+        return -1;
+    }
+
+    if ( gTestSmallImages )
+        log_info( "Note: Using small test images\n" );
+
+    // On most platforms which support denorm, default is FTZ off. However,
+    // on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
+    // This creates issues in result verification. Since spec allows the implementation to either flush or
+    // not flush denorms to zero, an implementation may choose not to flush i.e. return denorm result whereas
+    // reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
+    // where reference is being computed to make sure we get non-flushed reference result. If implementation
+    // returns flushed result, we correctly take care of that in verification code.
+
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+
+    // Run the test now
+    int ret = 0;
+    if (testMethods & k1D) {
+        ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D );
+        ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D_BUFFER );
+    }
+    if (testMethods & k2D)
+        ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D );
+    if (testMethods & k3D)
+        ret += test_image_set( device, CL_MEM_OBJECT_IMAGE3D );
+    if (testMethods & k1DArray)
+        ret += test_image_set( device, CL_MEM_OBJECT_IMAGE1D_ARRAY );
+    if (testMethods & k2DArray)
+        ret += test_image_set( device, CL_MEM_OBJECT_IMAGE2D_ARRAY );
+
+    // Restore FP state before leaving
+    RestoreFPState(&oldMode);
+
+    error = clFinish(queue);
+    if (error)
+        print_error(error, "clFinish failed.");
+
+    clReleaseContext(context);
+    clReleaseCommandQueue(queue);
+
+    if (gTestFailure == 0) {
+        if (gTestCount > 1)
+            log_info("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+        else
+            log_info("PASSED test.\n");
+    }
+    else if (gTestFailure > 0) {
+        if (gTestCount > 1)
+            log_error("FAILED %d of %d tests.\n", gTestFailure, gTestCount);
+        else
+            log_error("FAILED test.\n");
+    }
+
+    // Clean up
+    test_finish();
+
+    if (gTestFailure > 0)
+        return gTestFailure;
+
+    return ret;
+}
diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp
new file mode 100644
index 00000000..185023de
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/test_iterations.cpp
@@ -0,0 +1,337 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue     queue;
+extern cl_context           context;
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type       gDeviceType;
+extern bool                 gTestReadWrite;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
+
+const char *read2DKernelSourcePattern =
+"__kernel void sample_kernel( read_only %s input, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(input) + tidX;\n"
+"   int2 coords = (int2)(tidX, tidY);\n"
+"   %s clr = read_image%s( input, coords );\n"
+"   int4 test = (clr != read_image%s( input, sampler, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+
+const char *read_write2DKernelSourcePattern =
+"__kernel void sample_kernel( read_only %s read_only_image, read_write %s read_write_image, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(read_only_image) + tidX;\n"
+"   int2 coords = (int2)(tidX, tidY);\n"
+"   %s clr = read_image%s( read_only_image, sampler, coords );\n"
+"   write_image%s(read_write_image, coords, clr);\n"
+"   atomic_work_item_fence(CLK_IMAGE_MEM_FENCE, memory_order_acq_rel, memory_scope_work_item);\n"
+"   int4 test = (clr != read_image%s( read_write_image, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+int test_read_image_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                        ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[2];
+    cl_sampler actualSampler;
+
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if ( gDebugTrace )
+        log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
+
+    // Construct testing sources
+    cl_mem read_only_image, read_write_image;
+    cl_image_desc image_desc;
+
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+    image_desc.image_width = imageInfo->width;
+    image_desc.image_height = imageInfo->height;
+    image_desc.image_row_pitch = ( gEnablePitch ? imageInfo->rowPitch : 0 );
+    image_desc.num_mip_levels = 0;
+    read_only_image = clCreateImage( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format,
+                                       &image_desc, imageValues, &error );
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create 2D read_only image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    if(gTestReadWrite)
+    {
+        read_write_image = clCreateImage(context,
+                                         CL_MEM_READ_WRITE,
+                                         imageInfo->format,
+                                         &image_desc,
+                                         NULL,
+                                         &error );
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create 2D read_write image of size %d x %d pitch %d (%s)\n",
+                        (int)imageInfo->width,
+                        (int)imageInfo->height,
+                        (int)imageInfo->rowPitch,
+                        IGetErrorString( error ) );
+            return error;
+        }
+    }
+
+    if ( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    actualSampler = clCreateSamplerWithProperties( context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Create results buffer
+    cl_mem results = clCreateBuffer( context, 0, imageInfo->width * imageInfo->height * sizeof(cl_int), NULL, &error);
+    test_error( error, "Unable to create results buffer" );
+
+    size_t resultValuesSize = imageInfo->width * imageInfo->height * sizeof(cl_int);
+    BufferOwningPtr<int> resultValues(malloc( resultValuesSize ));
+    memset( resultValues, 0xff, resultValuesSize );
+    clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_only_image );
+    test_error( error, "Unable to set kernel arguments" );
+    if(gTestReadWrite)
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_write_image );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // Run the kernel
+    threads[0] = (size_t)imageInfo->width;
+    threads[1] = (size_t)imageInfo->height;
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    if ( gDebugTrace )
+        log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * imageInfo->height * sizeof(cl_int) / 1024 ) );
+
+    error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results from kernel" );
+    if ( gDebugTrace )
+        log_info( "    results read\n" );
+
+    // Check for non-zero comps
+    bool allZeroes = true;
+    for ( size_t ic = 0; ic < imageInfo->width * imageInfo->height; ++ic )
+    {
+        if ( resultValues[ic] ) {
+            allZeroes = false;
+            break;
+        }
+    }
+    if ( !allZeroes )
+    {
+        log_error( " Sampler-less reads differ from reads with sampler.\n" );
+        return -1;
+    }
+
+    clReleaseSampler(actualSampler);
+    clReleaseMemObject(results);
+    clReleaseMemObject(read_only_image);
+    if(gTestReadWrite)
+    {
+        clReleaseMemObject(read_write_image);
+    }
+
+    return 0;
+}
+
+int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    const char *dataType;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+
+    // Get our operating params
+    size_t maxWidth, maxHeight;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if ( outputType == kInt )
+    {
+        readFormat = "i";
+        dataType = "int4";
+    }
+    else if ( outputType == kUInt )
+    {
+        readFormat = "ui";
+        dataType = "uint4";
+    }
+    else // kFloat
+    {
+        readFormat = "f";
+        dataType = (format->image_channel_order == CL_DEPTH) ? "float" : "float4";
+    }
+
+    if(gTestReadWrite)
+    {
+        sprintf(programSrc,
+                read_write2DKernelSourcePattern,
+                (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
+                (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
+                dataType,
+                readFormat,
+                readFormat,
+                readFormat);
+    }
+    else
+    {
+        sprintf(programSrc,
+                read2DKernelSourcePattern,
+                (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
+                dataType,
+                readFormat,
+                readFormat );
+    }
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            for ( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                if ( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
+
+                int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+                if ( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if ( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
+            int retCode = test_read_image_2D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/samplerlessReads/test_loops.cpp b/test_conformance/images/samplerlessReads/test_loops.cpp
new file mode 100644
index 00000000..7d1b8fc1
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/test_loops.cpp
@@ -0,0 +1,305 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+
+extern cl_context           context;
+extern int                  gTypesToTest;
+extern cl_channel_type      gChannelTypeToUse;
+extern cl_channel_order     gChannelOrderToUse;
+
+extern bool                 gDebugTrace;
+
+extern bool                 gTestReadWrite;
+
+extern int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_1D_buffer( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_2D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+extern int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType );
+
+static const char *str_1d_image = "1D";
+static const char *str_2d_image = "2D";
+static const char *str_3d_image = "3D";
+static const char *str_1d_image_array = "1D array";
+static const char *str_2d_image_array = "2D array";
+static const char *str_1d_image_buffer = "1D image buffer";
+
+static const char *convert_image_type_to_string(cl_mem_object_type imageType)
+{
+    const char *p;
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            p = str_1d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            p = str_2d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            p = str_3d_image;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            p = str_1d_image_array;
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            p = str_2d_image_array;
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+            p = str_1d_image_buffer;
+    }
+    return p;
+}
+
+
+int filter_formats( cl_image_format *formatList, bool *filterFlags, unsigned int formatCount, cl_channel_type *channelDataTypesToFilter )
+{
+    int numSupported = 0;
+    for ( unsigned int j = 0; j < formatCount; j++ )
+    {
+        // If this format has been previously filtered, remove the filter
+        if ( filterFlags[ j ] )
+            filterFlags[ j ] = false;
+
+        // Have we already discarded the channel type via the command line?
+        if ( gChannelTypeToUse != (cl_channel_type)-1 && gChannelTypeToUse != formatList[ j ].image_channel_data_type )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Have we already discarded the channel order via the command line?
+        if ( gChannelOrderToUse != (cl_channel_order)-1 && gChannelOrderToUse != formatList[ j ].image_channel_order )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // Is given format standard channel order and type given by spec. We don't want to test it if this is vendor extension
+        if( !IsChannelOrderSupported( formatList[ j ].image_channel_order ) || !IsChannelTypeSupported( formatList[ j ].image_channel_data_type ) )
+        {
+            filterFlags[ j ] = true;
+            continue;
+        }
+
+        // We don't filter by channel type
+        if( !channelDataTypesToFilter )
+        {
+            numSupported++;
+            continue;
+        }
+
+        // Is the format supported?
+        int i;
+        for ( i = 0; channelDataTypesToFilter[ i ] != (cl_channel_type)-1; i++ )
+        {
+            if ( formatList[ j ].image_channel_data_type == channelDataTypesToFilter[ i ] )
+            {
+                numSupported++;
+                break;
+            }
+        }
+        if ( channelDataTypesToFilter[ i ] == (cl_channel_type)-1 )
+        {
+            // Format is NOT supported, so mark it as such
+            filterFlags[ j ] = true;
+        }
+    }
+    return numSupported;
+}
+
+int get_format_list( cl_device_id device, cl_mem_object_type imageType, cl_image_format * &outFormatList, unsigned int &outFormatCount, cl_mem_flags flags )
+{
+    int error;
+
+    cl_image_format tempList[ 128 ];
+    error = clGetSupportedImageFormats( context, flags,
+                                        imageType, 128, tempList, &outFormatCount );
+    test_error( error, "Unable to get count of supported image formats" );
+
+    outFormatList = new cl_image_format[ outFormatCount ];
+    error = clGetSupportedImageFormats( context, flags,
+                                        imageType, outFormatCount, outFormatList, NULL );
+    test_error( error, "Unable to get list of supported image formats" );
+
+    return 0;
+}
+
+int test_read_image_type( cl_device_id device, cl_image_format *format,
+                          image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    imageSampler->addressing_mode = CL_ADDRESS_NONE;
+
+    print_read_header( format, imageSampler, false );
+
+    gTestCount++;
+
+    switch (imageType)
+    {
+        case CL_MEM_OBJECT_IMAGE1D:
+            ret = test_read_image_set_1D( device, format, imageSampler, outputType );
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_BUFFER:
+            ret += test_read_image_set_1D_buffer( device, format, imageSampler, outputType );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D:
+            ret = test_read_image_set_2D( device, format, imageSampler, outputType );
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            ret = test_read_image_set_3D( device, format, imageSampler, outputType );
+            break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY:
+            ret = test_read_image_set_1D_array( device, format, imageSampler, outputType );
+            break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            ret = test_read_image_set_2D_array( device, format, imageSampler, outputType );
+            break;
+    }
+
+    if ( ret != 0 )
+    {
+        gTestFailure++;
+        log_error( "FAILED: " );
+        print_read_header( format, imageSampler, true );
+        log_info( "\n" );
+    }
+    return ret;
+}
+
+int test_read_image_formats( cl_device_id device, cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+                             image_sampler_data *imageSampler, ExplicitType outputType, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    imageSampler->normalized_coords = false;
+    log_info( "read_image (%s coords, %s results) *****************************\n",
+              "integer", get_explicit_type_name( outputType ) );
+
+    for ( unsigned int i = 0; i < numFormats; i++ )
+    {
+        if ( filterFlags[i] )
+            continue;
+
+        cl_image_format &imageFormat = formatList[ i ];
+
+        ret |= test_read_image_type( device, &imageFormat, imageSampler, outputType, imageType );
+    }
+    return ret;
+}
+
+
+int test_image_set( cl_device_id device, cl_mem_object_type imageType )
+{
+    int ret = 0;
+    static int printedFormatList = -1;
+
+    // Grab the list of supported image formats
+    cl_image_format *formatList;
+    bool *filterFlags;
+    unsigned int numFormats;
+
+    // This flag is only for querying the list of supported formats
+    // The flag for creating image will be set explicitly in test functions
+    cl_mem_flags flags = (gTestReadWrite)? CL_MEM_KERNEL_READ_AND_WRITE : CL_MEM_READ_ONLY;
+
+    if ( get_format_list( device, imageType, formatList, numFormats, flags ) )
+        return -1;
+
+    filterFlags = new bool[ numFormats ];
+    if ( filterFlags == NULL )
+    {
+        log_error( "ERROR: Out of memory allocating filter flags list!\n" );
+        return -1;
+    }
+    memset( filterFlags, 0, sizeof( bool ) * numFormats );
+
+    // First time through, we'll go ahead and print the formats supported, regardless of type
+    if ( printedFormatList != (int)imageType )
+    {
+        log_info( "---- Supported %s read formats for this device ---- \n", convert_image_type_to_string(imageType) );
+        for ( unsigned int f = 0; f < numFormats; f++ )
+            log_info( "  %-7s %-24s %d\n", GetChannelOrderName( formatList[ f ].image_channel_order ),
+                      GetChannelTypeName( formatList[ f ].image_channel_data_type ),
+                      (int)get_format_channel_count( &formatList[ f ] ) );
+        log_info( "------------------------------------------- \n" );
+        printedFormatList = imageType;
+    }
+
+    image_sampler_data imageSampler;
+
+    /////// float tests ///////
+
+    if ( gTypesToTest & kTestFloat )
+    {
+        cl_channel_type floatFormats[] = { CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
+#ifdef OBSOLETE_FORAMT
+            CL_UNORM_SHORT_565_REV, CL_UNORM_SHORT_555_REV, CL_UNORM_INT_8888, CL_UNORM_INT_8888_REV, CL_UNORM_INT_101010_REV,
+#endif
+#ifdef CL_SFIXED14_APPLE
+            CL_SFIXED14_APPLE,
+#endif
+            CL_UNORM_INT8, CL_SNORM_INT8,
+            CL_UNORM_INT16, CL_SNORM_INT16, CL_FLOAT, CL_HALF_FLOAT, (cl_channel_type)-1 };
+        if ( filter_formats( formatList, filterFlags, numFormats, floatFormats ) == 0 )
+        {
+            log_info( "No formats supported for float type\n" );
+        }
+        else
+        {
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += test_read_image_formats( device, formatList, filterFlags, numFormats, &imageSampler, kFloat, imageType );
+        }
+    }
+
+    /////// int tests ///////
+    if ( gTypesToTest & kTestInt )
+    {
+        cl_channel_type intFormats[] = { CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32, (cl_channel_type)-1 };
+        if ( filter_formats( formatList, filterFlags, numFormats, intFormats ) == 0 )
+        {
+            log_info( "No formats supported for integer type\n" );
+        }
+        else
+        {
+            // Only filter mode we support on int is nearest
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += test_read_image_formats( device, formatList, filterFlags, numFormats, &imageSampler, kInt, imageType );
+        }
+    }
+
+    /////// uint tests ///////
+
+    if ( gTypesToTest & kTestUInt )
+    {
+        cl_channel_type uintFormats[] = { CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32, (cl_channel_type)-1 };
+        if ( filter_formats( formatList, filterFlags, numFormats, uintFormats ) == 0 )
+        {
+            log_info( "No formats supported for unsigned int type\n" );
+        }
+        else
+        {
+            // Only filter mode we support on uint is nearest
+            imageSampler.filter_mode = CL_FILTER_NEAREST;
+            ret += test_read_image_formats( device, formatList, filterFlags, numFormats, &imageSampler, kUInt, imageType );
+        }
+    }
+
+
+    delete[] filterFlags;
+    delete[] formatList;
+
+    return ret;
+}
diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp
new file mode 100644
index 00000000..a0e8e623
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp
@@ -0,0 +1,332 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue     queue;
+extern cl_context           context;
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type       gDeviceType;
+extern bool                 gTestReadWrite;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
+
+const char *read1DKernelSourcePattern =
+"__kernel void sample_kernel( read_only image1d_t input, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"   %s clr = read_image%s( input, tidX );\n"
+"   int4 test = (clr != read_image%s( input, sampler, tidX ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+const char *read_write1DKernelSourcePattern =
+"__kernel void sample_kernel( read_only image1d_t read_only_image, read_write image1d_t read_write_image, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"   %s clr = read_image%s( read_only_image, sampler, tidX );\n"
+"   write_image%s(read_write_image, tidX, clr);\n"
+"   atomic_work_item_fence(CLK_IMAGE_MEM_FENCE, memory_order_acq_rel, memory_scope_work_item);\n"
+"   int4 test = (clr != read_image%s(read_write_image, tidX));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+int test_read_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                        ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[2];
+    cl_sampler actualSampler;
+
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if ( gDebugTrace )
+        log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
+
+    // Construct testing sources
+    cl_mem read_only_image, read_write_image;
+    cl_image_desc image_desc;
+
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+    image_desc.image_width = imageInfo->width;
+    image_desc.image_row_pitch = ( gEnablePitch ? imageInfo->rowPitch : 0 );
+    image_desc.num_mip_levels = 0;
+
+    read_only_image = clCreateImage(context,
+                                    CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                    imageInfo->format,
+                                    &image_desc,
+                                    imageValues,
+                                    &error );
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create read_only 1D image of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    if(gTestReadWrite)
+    {
+        read_write_image = clCreateImage(context,
+                                        CL_MEM_READ_WRITE,
+                                        imageInfo->format,
+                                        &image_desc,
+                                        NULL,
+                                        &error );
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create read_write 1D image of size %d pitch %d (%s)\n",
+                        (int)imageInfo->width,
+                        (int)imageInfo->rowPitch,
+                        IGetErrorString( error ) );
+            return error;
+        }
+    }
+
+
+    if ( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    actualSampler = clCreateSamplerWithProperties( context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Create results buffer
+    cl_mem results = clCreateBuffer( context, 0, imageInfo->width * sizeof(cl_int), NULL, &error);
+    test_error( error, "Unable to create results buffer" );
+
+    size_t resultValuesSize = imageInfo->width * sizeof(cl_int);
+    BufferOwningPtr<int> resultValues(malloc( resultValuesSize ));
+    memset( resultValues, 0xff, resultValuesSize );
+    clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_only_image );
+    test_error( error, "Unable to set kernel arguments" );
+    if(gTestReadWrite)
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_write_image );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // Run the kernel
+    threads[0] = (size_t)imageInfo->width;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    if ( gDebugTrace )
+        log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * sizeof(cl_int) / 1024 ) );
+
+    error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results from kernel" );
+    if ( gDebugTrace )
+        log_info( "    results read\n" );
+
+    // Check for non-zero comps
+    bool allZeroes = true;
+    for ( size_t ic = 0; ic < imageInfo->width; ++ic )
+    {
+        if ( resultValues[ic] ) {
+            allZeroes = false;
+            break;
+        }
+    }
+    if ( !allZeroes )
+    {
+        log_error( " Sampler-less reads differ from reads with sampler.\n" );
+        return -1;
+    }
+
+    clReleaseSampler(actualSampler);
+    clReleaseMemObject(results);
+    clReleaseMemObject(read_only_image);
+    if(gTestReadWrite)
+    {
+        clReleaseMemObject(read_write_image);
+    }
+
+    return 0;
+}
+
+int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    const char *dataType;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+
+    // Get our operating params
+    size_t maxWidth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 1D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if ( outputType == kInt )
+    {
+        readFormat = "i";
+        dataType = "int4";
+    }
+    else if ( outputType == kUInt )
+    {
+        readFormat = "ui";
+        dataType = "uint4";
+    }
+    else // kFloat
+    {
+        readFormat = "f";
+        dataType = "float4";
+    }
+
+    if(gTestReadWrite)
+    {
+        sprintf( programSrc,
+                 read_write1DKernelSourcePattern,
+                 dataType,
+                 readFormat,
+                 readFormat,
+                 readFormat);
+    }
+    else
+    {
+        sprintf( programSrc,
+                 read1DKernelSourcePattern,
+                 dataType,
+                 readFormat,
+                 readFormat );
+    }
+
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            {
+                if ( gDebugTrace )
+                    log_info( "   at size %d\n", (int)imageInfo.width );
+
+                int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+                if ( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d\n", (int)sizes[ idx ][ 0 ]);
+            if ( gDebugTrace )
+                log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if ( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+
+                size = (size_t)imageInfo.rowPitch * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            int retCode = test_read_image_1D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
new file mode 100644
index 00000000..72614378
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
@@ -0,0 +1,336 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue     queue;
+extern cl_context           context;
+extern bool                 gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type       gDeviceType;
+extern bool                 gTestReadWrite;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
+
+const char *read1DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_only image1d_array_t input, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(input) + tidX;\n"
+"   int2 coords = (int2)(tidX, tidY);\n"
+"   %s clr = read_image%s( input, coords );\n"
+"   int4 test = (clr != read_image%s( input, sampler, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+const char *read_write1DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_only image1d_array_t read_only_image, read_write image1d_array_t read_write_image, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+"   int offset = tidY*get_image_width(read_only_image) + tidX;\n"
+"   int2 coords = (int2)(tidX, tidY);\n"
+"   %s clr = read_image%s( read_only_image, sampler, coords );\n"
+"   write_image%s(read_write_image, coords, clr);\n"
+"   atomic_work_item_fence(CLK_IMAGE_MEM_FENCE, memory_order_acq_rel, memory_scope_work_item);\n"
+"   int4 test = (clr != read_image%s( read_write_image, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+int test_read_image_1D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                        ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[2];
+    cl_sampler actualSampler;
+
+    // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if ( gDebugTrace )
+        log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->arraySize );
+
+    // Construct testing sources
+    cl_mem read_only_image, read_write_image;
+    cl_image_desc image_desc;
+
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    image_desc.image_width = imageInfo->width;
+    image_desc.image_height = imageInfo->height;
+    image_desc.image_array_size = imageInfo->arraySize;
+    image_desc.image_row_pitch = ( gEnablePitch ? imageInfo->rowPitch : 0 );
+    image_desc.image_slice_pitch = 0;
+    image_desc.num_mip_levels = 0;
+    read_only_image = clCreateImage( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format,
+                                       &image_desc, imageValues, &error );
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create read_only 1D image array of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    if(gTestReadWrite)
+    {
+        read_write_image = clCreateImage(context,
+                                        CL_MEM_READ_WRITE,
+                                        imageInfo->format,
+                                        &image_desc,
+                                        NULL,
+                                        &error );
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create read_write 1D image array of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+            return error;
+        }
+    }
+    if ( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    actualSampler = clCreateSamplerWithProperties( context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Create results buffer
+    cl_mem results = clCreateBuffer( context, 0, imageInfo->width * imageInfo->arraySize * sizeof(cl_int), NULL, &error);
+    test_error( error, "Unable to create results buffer" );
+
+    size_t resultValuesSize = imageInfo->width * imageInfo->arraySize * sizeof(cl_int);
+    BufferOwningPtr<int> resultValues(malloc( resultValuesSize ));
+    memset( resultValues, 0xff, resultValuesSize );
+    clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_only_image );
+    test_error( error, "Unable to set kernel arguments" );
+    if(gTestReadWrite)
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_write_image );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // Run the kernel
+    threads[0] = (size_t)imageInfo->width;
+    threads[1] = (size_t)imageInfo->arraySize;
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    if ( gDebugTrace )
+        log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * imageInfo->arraySize * sizeof(cl_int) / 1024 ) );
+
+    error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results from kernel" );
+    if ( gDebugTrace )
+        log_info( "    results read\n" );
+
+    // Check for non-zero comps
+    bool allZeroes = true;
+    size_t ic;
+    for ( ic = 0; ic < imageInfo->width * imageInfo->arraySize; ++ic )
+    {
+        if ( resultValues[ic] ) {
+            allZeroes = false;
+            break;
+        }
+    }
+    if ( !allZeroes )
+    {
+        log_error( " Sampler-less reads differ from reads with sampler at index %lu.\n", ic );
+        return -1;
+    }
+
+    clReleaseSampler(actualSampler);
+    clReleaseMemObject(results);
+    if(gTestReadWrite)
+    {
+        clReleaseMemObject(read_write_image);
+    }
+    return 0;
+}
+
+int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    const char *dataType;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+
+    // Get our operating params
+    size_t maxWidth, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if ( outputType == kInt )
+    {
+        readFormat = "i";
+        dataType = "int4";
+    }
+    else if ( outputType == kUInt )
+    {
+        readFormat = "ui";
+        dataType = "uint4";
+    }
+    else // kFloat
+    {
+        readFormat = "f";
+        dataType = "float4";
+    }
+
+    if(gTestReadWrite)
+    {
+        sprintf( programSrc,
+                 read_write1DArrayKernelSourcePattern,
+                 dataType,
+                 readFormat,
+                 readFormat,
+                 readFormat);
+    }
+    else
+    {
+        sprintf( programSrc,
+                 read1DArrayKernelSourcePattern,
+                 dataType,
+                 readFormat,
+                 readFormat );
+    }
+
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            for ( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+            {
+                if ( gDebugTrace )
+                    log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize );
+
+                int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+                if ( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D_ARRAY, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.rowPitch;
+            log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ]);
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 2 ] );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                if ( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+                }
+
+                imageInfo.slicePitch = imageInfo.rowPitch;
+
+                size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.arraySize * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxArraySize );
+            int retCode = test_read_image_1D_array( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
new file mode 100644
index 00000000..5342d068
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -0,0 +1,324 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#if defined( __APPLE__ )
+    #include <signal.h>
+    #include <sys/signal.h>
+    #include <setjmp.h>
+#endif
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue     queue;
+extern cl_context           context;
+extern bool                 gDebugTrace, gTestSmallImages, gTestMaxImages, gTestRounding;
+extern cl_device_type       gDeviceType;
+
+#define MAX_TRIES   1
+#define MAX_CLAMPED 1
+
+const char *read1DBufferKernelSourcePattern =
+"__kernel void sample_kernel( read_only image1d_buffer_t inputA, read_only image1d_t inputB, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0);\n"
+"   int offset = tidX;\n"
+"   %s clr = read_image%s( inputA, tidX );\n"
+"   int4 test = (clr != read_image%s( inputB, sampler, tidX ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+
+int test_read_image_1D_buffer( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                        ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[2];
+    cl_sampler actualSampler;
+
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+
+    if ( gDebugTrace )
+        log_info( " - Creating 1D image from buffer %d ...\n", (int)imageInfo->width );
+
+    // Construct testing sources
+    cl_mem image[2];
+    cl_image_desc image_desc;
+
+    cl_mem imageBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->rowPitch, imageValues, &error);
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create buffer of size %d bytes (%s)\n", (int)imageInfo->rowPitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
+    image_desc.image_width = imageInfo->width;
+    image_desc.mem_object = imageBuffer;
+    image[0] = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format,
+        &image_desc, NULL, &error );
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create IMAGE1D_BUFFER of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    cl_mem ret = NULL;
+    error = clGetMemObjectInfo(image[0], CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(ret), &ret, NULL);
+    if ( error != CL_SUCCESS )
+    {
+      log_error( "ERROR: Unable to query CL_MEM_ASSOCIATED_MEMOBJECT\n", IGetErrorString( error ) );
+      return error;
+    }
+
+    if (ret != imageBuffer) {
+      log_error("ERROR: clGetImageInfo for CL_IMAGE_BUFFER returned wrong value\n");
+      return -1;
+    }
+
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+    image_desc.image_width = imageInfo->width;
+    image[1] = clCreateImage( context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, imageInfo->format, &image_desc, imageValues, &error );
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create IMAGE1D of size %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->rowPitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    if ( gDebugTrace )
+        log_info( " - Creating kernel arguments...\n" );
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    actualSampler = clCreateSamplerWithProperties( context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Create results buffer
+    cl_mem results = clCreateBuffer( context, 0, imageInfo->width * sizeof(cl_int), NULL, &error);
+    test_error( error, "Unable to create results buffer" );
+
+    size_t resultValuesSize = imageInfo->width * sizeof(cl_int);
+    BufferOwningPtr<int> resultValues(malloc( resultValuesSize ));
+    memset( resultValues, 0xff, resultValuesSize );
+    clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image[0] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image[1] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // Run the kernel
+    threads[0] = (size_t)imageInfo->width;
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    if ( gDebugTrace )
+        log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * sizeof(cl_int) / 1024 ) );
+
+    error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results from kernel" );
+    if ( gDebugTrace )
+        log_info( "    results read\n" );
+
+    // Check for non-zero comps
+    bool allZeroes = true;
+    for ( size_t ic = 0; ic < imageInfo->width; ++ic )
+    {
+        if ( resultValues[ic] ) {
+            allZeroes = false;
+            break;
+        }
+    }
+    if ( !allZeroes )
+    {
+        log_error( " Sampler-less reads differ from reads with sampler.\n" );
+        return -1;
+    }
+
+    clReleaseSampler(actualSampler);
+    clReleaseMemObject(results);
+    clReleaseMemObject(image[0]);
+    clReleaseMemObject(image[1]);
+    clReleaseMemObject(imageBuffer);
+    return 0;
+}
+
+int test_read_image_set_1D_buffer( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler,
+                            ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    const char *dataType;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    RandomSeed seed( gRandomSeed );
+    int error;
+
+    // Get our operating params
+    size_t maxWidth, maxWidth1D;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    size_t pixelSize;
+
+    if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
+    {
+        switch (format->image_channel_data_type)
+        {
+            case CL_UNORM_INT8:
+            case CL_UNORM_INT16:
+            case CL_SNORM_INT8:
+            case CL_SNORM_INT16:
+            case CL_HALF_FLOAT:
+            case CL_FLOAT:
+            case CL_SIGNED_INT8:
+            case CL_SIGNED_INT16:
+            case CL_SIGNED_INT32:
+            case CL_UNSIGNED_INT8:
+            case CL_UNSIGNED_INT16:
+            case CL_UNSIGNED_INT32:
+            case CL_UNORM_INT_101010:
+                log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
+                         GetChannelTypeName( format->image_channel_data_type ));
+                return 0;
+            default:
+                break;
+        }
+    }
+
+    imageInfo.format = format;
+    imageInfo.height = imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth1D, NULL );
+    test_error( error, "Unable to get max image 1D buffer size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // note: image_buffer test uses image1D for results validation.
+    // So the test can't use the biggest possible size for image_buffer if it's bigger than the max image1D size
+    maxWidth = (maxWidth > maxWidth1D) ? maxWidth1D : maxWidth;
+    // Determine types
+    if ( outputType == kInt )
+    {
+        readFormat = "i";
+        dataType = "int4";
+    }
+    else if ( outputType == kUInt )
+    {
+        readFormat = "ui";
+        dataType = "uint4";
+    }
+    else // kFloat
+    {
+        readFormat = "f";
+        dataType = "float4";
+    }
+
+    sprintf( programSrc, read1DBufferKernelSourcePattern, dataType,
+             readFormat,
+             readFormat );
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            {
+                if ( gDebugTrace )
+                    log_info( "   at size %d\n", (int)imageInfo.width );
+
+                int retCode = test_read_image_1D_buffer( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+                if ( retCode )
+                    return retCode;
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, 1, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE1D, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            log_info("Testing %d\n", (int)sizes[ idx ][ 0 ]);
+            if ( gDebugTrace )
+                log_info( "   at max size %d\n", (int)sizes[ idx ][ 0 ] );
+            int retCode = test_read_image_1D_buffer( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                size = (size_t)imageInfo.rowPitch * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d (row pitch %d) out of %d\n", (int)imageInfo.width, (int)imageInfo.rowPitch, (int)maxWidth );
+            int retCode = test_read_image_1D_buffer( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
+
+
diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
new file mode 100644
index 00000000..2d6c889d
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
@@ -0,0 +1,334 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context       context;
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type   gDeviceType;
+extern bool             gTestReadWrite;
+
+const char *read2DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_only %s input, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n"
+"   int4 coords = (int4)( tidX, tidY, tidZ, 0 );\n"
+"   %s clr = read_image%s( input, coords );\n"
+"   int4 test = (clr != read_image%s( input, sampler, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+const char *read_write2DArrayKernelSourcePattern =
+"__kernel void sample_kernel( read_only %s read_only_image, read_write %s read_write_image, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(read_only_image)*get_image_height(read_only_image) + tidY*get_image_width(read_only_image) + tidX;\n"
+"   int4 coords = (int4)( tidX, tidY, tidZ, 0 );\n"
+"   %s clr = read_image%s( read_only_image, sampler, coords );\n"
+"   write_image%s(read_write_image, coords, clr);\n"
+"   atomic_work_item_fence(CLK_IMAGE_MEM_FENCE, memory_order_acq_rel, memory_scope_work_item);\n"
+"   int4 test = (clr != read_image%s( read_write_image, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+int test_read_image_2D_array( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                        ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[3];
+    cl_sampler actualSampler;
+
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+    cl_image_desc image_desc;
+    cl_mem read_only_image, read_write_image;
+
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    image_desc.image_width = imageInfo->width;
+    image_desc.image_height = imageInfo->height;
+    image_desc.image_array_size = imageInfo->arraySize;
+    image_desc.image_row_pitch = ( gEnablePitch ? imageInfo->rowPitch : 0 );
+    image_desc.image_slice_pitch = ( gEnablePitch ? imageInfo->slicePitch : 0 );
+    image_desc.num_mip_levels = 0;
+    read_only_image = clCreateImage( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format,
+                                       &image_desc, imageValues, &error );
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create read_only 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    if(gTestReadWrite)
+    {
+        read_write_image = clCreateImage(context,
+                                        CL_MEM_READ_WRITE,
+                                        imageInfo->format,
+                                        &image_desc,
+                                        NULL,
+                                        &error );
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create read_write 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+    }
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    actualSampler = clCreateSamplerWithProperties( context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Create results buffer
+    cl_mem results = clCreateBuffer( context, 0, imageInfo->width * imageInfo->height * imageInfo->arraySize * sizeof(cl_int), NULL, &error);
+    test_error( error, "Unable to create results buffer" );
+
+    size_t resultValuesSize = imageInfo->width * imageInfo->height * imageInfo->arraySize * sizeof(cl_int);
+    BufferOwningPtr<int> resultValues(malloc( resultValuesSize ));
+    memset( resultValues, 0xff, resultValuesSize );
+    clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_only_image );
+    test_error( error, "Unable to set kernel arguments" );
+    if(gTestReadWrite)
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_write_image );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // Figure out thread dimensions
+    threads[0] = (size_t)imageInfo->width;
+    threads[1] = (size_t)imageInfo->height;
+    threads[2] = (size_t)imageInfo->arraySize;
+
+    // Run the kernel
+    error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    // Get results
+    error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results from kernel" );
+    if ( gDebugTrace )
+        log_info( "    results read\n" );
+
+    // Check for non-zero comps
+    bool allZeroes = true;
+    for ( size_t ic = 0; ic < imageInfo->width * imageInfo->height * imageInfo->arraySize; ++ic )
+    {
+        if ( resultValues[ic] ) {
+            allZeroes = false;
+            break;
+        }
+    }
+    if ( !allZeroes )
+    {
+        log_error( " Sampler-less reads differ from reads with sampler.\n" );
+        return -1;
+    }
+
+    clReleaseSampler(actualSampler);
+    clReleaseMemObject(results);
+    clReleaseMemObject(read_only_image);
+    if(gTestReadWrite)
+    {
+        clReleaseMemObject(read_write_image);
+    }
+
+    return 0;
+}
+
+int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    const char *dataType;
+    RandomSeed seed( gRandomSeed );
+
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    // Get operating parameters
+    size_t maxWidth, maxHeight, maxArraySize;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 2D array size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if ( outputType == kInt )
+    {
+        readFormat = "i";
+        dataType = "int4";
+    }
+    else if ( outputType == kUInt )
+    {
+        readFormat = "ui";
+        dataType = "uint4";
+    }
+    else // kFloat
+    {
+        readFormat = "f";
+        dataType = (format->image_channel_order == CL_DEPTH) ? "float" : "float4";
+    }
+
+    // Construct the source
+    if(gTestReadWrite)
+    {
+        sprintf( programSrc, read_write2DArrayKernelSourcePattern,
+                 (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
+                 (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
+                 dataType,
+                 readFormat,
+                 readFormat,
+                 readFormat);
+    }
+    else
+    {
+        sprintf( programSrc, read2DArrayKernelSourcePattern,
+                 (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
+                 dataType,
+                 readFormat,
+                 readFormat );
+    }
+
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+
+    // Run tests
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
+
+            for ( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for ( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
+                {
+                    if ( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
+                    int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+                    if ( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.arraySize = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                if ( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+
+                    size_t extraHeight = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                }
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
+            int retCode = test_read_image_2D_array( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp
new file mode 100644
index 00000000..5913afed
--- /dev/null
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp
@@ -0,0 +1,337 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include <float.h>
+
+#define MAX_ERR 0.005f
+#define MAX_HALF_LINEAR_ERR 0.3f
+
+extern cl_command_queue queue;
+extern cl_context       context;
+extern bool             gDebugTrace, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestRounding;
+extern cl_device_type   gDeviceType;
+extern bool             gTestReadWrite;
+
+const char *read3DKernelSourcePattern =
+"__kernel void sample_kernel( read_only image3d_t input, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n"
+"   int4 coords = (int4)( tidX, tidY, tidZ, 0 );\n"
+"   %s clr = read_image%s( input, coords );\n"
+"   int4 test = (clr != read_image%s( input, sampler, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+
+const char *read_write3DKernelSourcePattern =
+"__kernel void sample_kernel( read_only image3d_t read_only_image, read_write image3d_t read_write_image, sampler_t sampler, __global int *results )\n"
+"{\n"
+"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
+"   int offset = tidZ*get_image_width(read_only_image)*get_image_height(read_only_image) + tidY*get_image_width(read_only_image) + tidX;\n"
+"   int4 coords = (int4)( tidX, tidY, tidZ, 0 );\n"
+"   %s clr = read_image%s( read_only_image, sampler, coords );\n"
+"   write_image%s(read_write_image, coords, clr);\n"
+"   atomic_work_item_fence(CLK_IMAGE_MEM_FENCE, memory_order_acq_rel, memory_scope_work_item);\n"
+"   int4 test = (clr != read_image%s( read_write_image, coords ));\n"
+"   if ( test.x || test.y || test.z || test.w )\n"
+"      results[offset] = -1;\n"
+"   else\n"
+"      results[offset] = 0;\n"
+"}";
+int test_read_image_3D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
+                        image_descriptor *imageInfo, image_sampler_data *imageSampler,
+                        ExplicitType outputType, MTdata d )
+{
+    int error;
+    size_t threads[3];
+    cl_sampler actualSampler;
+
+    BufferOwningPtr<char> imageValues;
+    generate_random_image_data( imageInfo, imageValues, d );
+    // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
+    cl_image_desc image_desc;
+    cl_mem read_only_image, read_write_image;
+
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+    image_desc.image_width = imageInfo->width;
+    image_desc.image_height = imageInfo->height;
+    image_desc.image_depth = imageInfo->depth;
+    image_desc.image_row_pitch = ( gEnablePitch ? imageInfo->rowPitch : 0 );
+    image_desc.image_slice_pitch = ( gEnablePitch ? imageInfo->slicePitch : 0 );
+    image_desc.num_mip_levels = 0;
+    read_only_image = clCreateImage( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, imageInfo->format,
+                                       &image_desc, imageValues, &error );
+    if ( error != CL_SUCCESS )
+    {
+        log_error( "ERROR: Unable to create read_only 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+        return error;
+    }
+
+    if(gTestReadWrite)
+    {
+        read_write_image = clCreateImage(context,
+                                        CL_MEM_READ_WRITE,
+                                        imageInfo->format,
+                                        &image_desc,
+                                        NULL,
+                                        &error);
+        if ( error != CL_SUCCESS )
+        {
+            log_error( "ERROR: Unable to create read_write 3D image of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->depth, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
+            return error;
+        }
+    }
+
+    // Create sampler to use
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_NONE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    actualSampler = clCreateSamplerWithProperties( context, properties, &error );
+    test_error( error, "Unable to create image sampler" );
+
+    // Create results buffer
+    cl_mem results = clCreateBuffer( context, 0, imageInfo->width * imageInfo->height * imageInfo->depth * sizeof(cl_int), NULL, &error);
+    test_error( error, "Unable to create results buffer" );
+
+    size_t resultValuesSize = imageInfo->width * imageInfo->height * imageInfo->depth * sizeof(cl_int);
+    BufferOwningPtr<int> resultValues(malloc( resultValuesSize ));
+    memset( resultValues, 0xff, resultValuesSize );
+    clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+
+    // Set arguments
+    int idx = 0;
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_only_image );
+    test_error( error, "Unable to set kernel arguments" );
+    if(gTestReadWrite)
+    {
+        error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &read_write_image );
+        test_error( error, "Unable to set kernel arguments" );
+    }
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
+    test_error( error, "Unable to set kernel arguments" );
+
+    // Figure out thread dimensions
+    threads[0] = (size_t)imageInfo->width;
+    threads[1] = (size_t)imageInfo->height;
+    threads[2] = (size_t)imageInfo->depth;
+
+    // Run the kernel
+    error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
+    test_error( error, "Unable to run kernel" );
+
+    if ( gDebugTrace )
+        log_info( "    reading results, %ld kbytes\n", (unsigned long)( imageInfo->width * imageInfo->height * imageInfo->depth * sizeof(cl_int) / 1024 ) );
+
+    // Get results
+    error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
+    test_error( error, "Unable to read results from kernel" );
+    if ( gDebugTrace )
+        log_info( "    results read\n" );
+
+    // Check for non-zero comps
+    bool allZeroes = true;
+    size_t ic;
+    for ( ic = 0; ic < imageInfo->width * imageInfo->height * imageInfo->depth; ++ic )
+    {
+        if ( resultValues[ic] ) {
+            allZeroes = false;
+            break;
+        }
+    }
+    if ( !allZeroes )
+    {
+        log_error( " Sampler-less reads differ from reads with sampler at index %lu.\n", ic );
+        return -1;
+    }
+
+    clReleaseSampler(actualSampler);
+    clReleaseMemObject(results);
+    clReleaseMemObject(read_only_image);
+    if(gTestReadWrite)
+    {
+        clReleaseMemObject(read_write_image);
+    }
+
+    return 0;
+}
+
+int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_sampler_data *imageSampler, ExplicitType outputType )
+{
+    char programSrc[10240];
+    const char *ptr;
+    const char *readFormat;
+    const char *dataType;
+    RandomSeed seed( gRandomSeed );
+
+    int error;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    // Get operating parameters
+    size_t maxWidth, maxHeight, maxDepth;
+    cl_ulong maxAllocSize, memSize;
+    image_descriptor imageInfo = { 0 };
+    size_t pixelSize;
+
+    imageInfo.format = format;
+    imageInfo.arraySize = 0;
+    imageInfo.type = CL_MEM_OBJECT_IMAGE3D;
+    pixelSize = get_pixel_size( imageInfo.format );
+
+    error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
+    test_error( error, "Unable to get max image 3D size from device" );
+
+    if (memSize > (cl_ulong)SIZE_MAX) {
+      memSize = (cl_ulong)SIZE_MAX;
+    }
+
+    // Determine types
+    if ( outputType == kInt )
+    {
+        readFormat = "i";
+        dataType = "int4";
+    }
+    else if ( outputType == kUInt )
+    {
+        readFormat = "ui";
+        dataType = "uint4";
+    }
+    else // kFloat
+    {
+        readFormat = "f";
+        dataType = "float4";
+    }
+
+    // Construct the source
+    if(gTestReadWrite)
+    {
+        sprintf( programSrc,
+                 read_write3DKernelSourcePattern,
+                 dataType,
+                 readFormat,
+                 readFormat,
+                 readFormat);
+    }
+    else
+    {
+        sprintf( programSrc,
+                 read3DKernelSourcePattern,
+                 dataType,
+                 readFormat,
+                 readFormat );
+    }
+
+
+    ptr = programSrc;
+    error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", "-cl-std=CL2.0" );
+    test_error( error, "Unable to create testing kernel" );
+
+
+    // Run tests
+    if ( gTestSmallImages )
+    {
+        for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
+        {
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+
+            for ( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
+            {
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+                for ( imageInfo.depth = 2; imageInfo.depth < 9; imageInfo.depth++ )
+                {
+                    if ( gDebugTrace )
+                        log_info( "   at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth );
+                    int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+                    if ( retCode )
+                        return retCode;
+                }
+            }
+        }
+    }
+    else if ( gTestMaxImages )
+    {
+        // Try a specific set of maximum sizes
+        size_t numbeOfSizes;
+        size_t sizes[100][3];
+
+        get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, maxDepth, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE3D, imageInfo.format);
+
+        for ( size_t idx = 0; idx < numbeOfSizes; idx++ )
+        {
+            imageInfo.width = sizes[ idx ][ 0 ];
+            imageInfo.height = sizes[ idx ][ 1 ];
+            imageInfo.depth = sizes[ idx ][ 2 ];
+            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
+            log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
+            if ( gDebugTrace )
+                log_info( "   at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
+            int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+    else
+    {
+        for ( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
+        {
+            cl_ulong size;
+            // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
+            // image, the result array, plus offset arrays, will fit in the global ram space
+            do
+            {
+                imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
+                imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
+                imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
+
+                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
+
+                if ( gEnablePitch )
+                {
+                    size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.rowPitch += extraWidth * pixelSize;
+
+                    size_t extraHeight = (int)random_log_in_range( 0, 64, seed );
+                    imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
+                }
+
+                size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.depth * 4 * 4;
+            } while (  size > maxAllocSize || ( size * 3 ) > memSize );
+
+            if ( gDebugTrace )
+                log_info( "   at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.depth, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxDepth );
+            int retCode = test_read_image_3D( device, context, queue, kernel, &imageInfo, imageSampler, outputType, seed );
+            if ( retCode )
+                return retCode;
+        }
+    }
+
+    return 0;
+}
diff --git a/test_conformance/images/testBase.h b/test_conformance/images/testBase.h
new file mode 100644
index 00000000..69845865
--- /dev/null
+++ b/test_conformance/images/testBase.h
@@ -0,0 +1,78 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/clImageHelper.h"
+#include "../../test_common/harness/imageHelpers.h"
+
+// Amount to offset pixels for checking normalized reads
+#define NORM_OFFSET 0.1f
+
+enum TypesToTest
+{
+    kTestInt = ( 1 << 0 ),
+    kTestUInt = ( 1 << 1 ),
+    kTestFloat = ( 1 << 2 ),
+    kTestAllTypes = kTestInt | kTestUInt | kTestFloat
+};
+
+// For the clCopyImage test
+enum MethodsToTest
+{
+    k1D = ( 1 << 0 ),
+    k2D    = ( 1 << 1 ),
+    k1DArray = ( 1 << 2 ),
+    k2DArray = ( 1 << 3 ),
+    k3D = ( 1 << 4 ),
+    k2DTo3D = ( 1 << 5 ),
+    k3DTo2D = ( 1 << 6 ),
+    k2DArrayTo2D = ( 1 << 7 ),
+    k2DTo2DArray = ( 1 << 8 ),
+    k2DArrayTo3D = ( 1 << 9 ),
+    k3DTo2DArray = ( 1 << 10 ),
+};
+
+
+enum TestTypes
+{
+    kReadTests = 1 << 0 ,
+    kWriteTests = 1 << 1,
+    kReadWriteTests = 1 << 2,
+    kAllTests = ( kReadTests | kWriteTests | kReadWriteTests )
+};
+
+typedef int (*test_format_set_fn)( cl_device_id device,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
+
+extern int test_read_image_formats( cl_device_id device,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
+extern int test_write_image_formats( cl_device_id device,
+  cl_image_format *formatList, bool *filterFlags, unsigned int numFormats,
+  image_sampler_data *imageSampler, ExplicitType outputType,
+  cl_mem_object_type imageType );
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt
new file mode 100644
index 00000000..c5c41276
--- /dev/null
+++ b/test_conformance/integer_ops/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(MODULE_NAME INTEGER_OPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_int_basic_ops.c
+    test_integers.cpp
+    test_upsample.cpp
+    test_intmul24.c test_intmad24.c
+    test_sub_sat.c test_add_sat.c
+    test_abs.c test_absdiff.c
+    test_unary_ops.cpp
+    verification_and_generation_functions.c
+    test_popcount.c
+    ../../test_common/harness/ThreadPool.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
+
diff --git a/test_conformance/integer_ops/Jamfile b/test_conformance/integer_ops/Jamfile
new file mode 100644
index 00000000..961dc955
--- /dev/null
+++ b/test_conformance/integer_ops/Jamfile
@@ -0,0 +1,28 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_integer_ops
+    : main.c
+      test_abs.c
+      test_absdiff.c
+      test_add_sat.c
+      test_int.c
+      test_integers.cpp
+      test_intmad24.c
+      test_intmul24.c
+      test_long.c
+      test_sub_sat.c
+      test_uint.c
+      test_ulong.c
+      test_upsample.cpp
+    ;
+
+install dist
+    : test_integer_ops
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/integer_ops
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/integer_ops
+    ;
+
diff --git a/test_conformance/integer_ops/Makefile b/test_conformance/integer_ops/Makefile
new file mode 100644
index 00000000..9848e76e
--- /dev/null
+++ b/test_conformance/integer_ops/Makefile
@@ -0,0 +1,52 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+			test_popcount.c \
+			test_int_basic_ops.c \
+			test_integers.cpp \
+			test_upsample.cpp \
+			test_intmul24.c test_intmad24.c \
+			test_sub_sat.c test_add_sat.c \
+			test_abs.c test_absdiff.c  \
+			test_unary_ops.cpp \
+			verification_and_generation_functions.c \
+			../../test_common/harness/conversions.c \
+			../../test_common/harness/errorHelpers.c \
+			../../test_common/harness/threadTesting.c \
+			../../test_common/harness/testHarness.c \
+			../../test_common/harness/mt19937.c \
+			../../test_common/harness/ThreadPool.c \
+			../../test_common/harness/kernelHelpers.c
+
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_integer_ops
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/integer_ops/main.c b/test_conformance/integer_ops/main.c
new file mode 100644
index 00000000..c10848d6
--- /dev/null
+++ b/test_conformance/integer_ops/main.c
@@ -0,0 +1,347 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn    basefn_list[] = {
+    test_integer_clz,
+    test_integer_ctz,
+    test_integer_hadd,
+    test_integer_rhadd,
+    test_integer_mul_hi,
+    test_integer_rotate,
+    test_integer_clamp,
+    test_integer_mad_sat,
+    test_integer_mad_hi,
+    test_integer_min,
+    test_integer_max,
+    test_integer_upsample,
+
+    test_abs,
+    test_absdiff,
+    test_add_sat,
+    test_sub_sat,
+
+    test_integer_addAssign,
+    test_integer_subtractAssign,
+    test_integer_multiplyAssign,
+    test_integer_divideAssign,
+    test_integer_moduloAssign,
+    test_integer_andAssign,
+    test_integer_orAssign,
+    test_integer_exclusiveOrAssign,
+
+    test_unary_ops_increment,
+    test_unary_ops_decrement,
+    test_unary_ops_full,
+
+    test_intmul24,
+    test_intmad24,
+
+    test_long_math,
+    test_long_logic,
+    test_long_shift,
+    test_long_compare,
+
+    test_ulong_math,
+    test_ulong_logic,
+    test_ulong_shift,
+    test_ulong_compare,
+
+    test_int_math,
+    test_int_logic,
+    test_int_shift,
+    test_int_compare,
+
+    test_uint_math,
+    test_uint_logic,
+    test_uint_shift,
+    test_uint_compare,
+
+    test_short_math,
+    test_short_logic,
+    test_short_shift,
+    test_short_compare,
+
+    test_ushort_math,
+    test_ushort_logic,
+    test_ushort_shift,
+    test_ushort_compare,
+
+    test_char_math,
+    test_char_logic,
+    test_char_shift,
+    test_char_compare,
+
+    test_uchar_math,
+    test_uchar_logic,
+    test_uchar_shift,
+    test_uchar_compare,
+
+    test_popcount,
+
+
+    // Quick
+    test_quick_long_math,
+    test_quick_long_logic,
+    test_quick_long_shift,
+    test_quick_long_compare,
+
+    test_quick_ulong_math,
+    test_quick_ulong_logic,
+    test_quick_ulong_shift,
+    test_quick_ulong_compare,
+
+    test_quick_int_math,
+    test_quick_int_logic,
+    test_quick_int_shift,
+    test_quick_int_compare,
+
+    test_quick_uint_math,
+    test_quick_uint_logic,
+    test_quick_uint_shift,
+    test_quick_uint_compare,
+
+    test_quick_short_math,
+    test_quick_short_logic,
+    test_quick_short_shift,
+    test_quick_short_compare,
+
+    test_quick_ushort_math,
+    test_quick_ushort_logic,
+    test_quick_ushort_shift,
+    test_quick_ushort_compare,
+
+    test_quick_char_math,
+    test_quick_char_logic,
+    test_quick_char_shift,
+    test_quick_char_compare,
+
+    test_quick_uchar_math,
+    test_quick_uchar_logic,
+    test_quick_uchar_shift,
+    test_quick_uchar_compare,
+
+    test_vector_scalar_ops,
+};
+
+
+const char    *basefn_names[] = {
+    "integer_clz",
+    "integer_ctz",
+    "integer_hadd",
+    "integer_rhadd",
+    "integer_mul_hi",
+    "integer_rotate",
+    "integer_clamp",
+    "integer_mad_sat",
+    "integer_mad_hi",
+    "integer_min",
+    "integer_max",
+    "integer_upsample",
+
+    "integer_abs",
+    "integer_abs_diff",
+    "integer_add_sat",
+    "integer_sub_sat",
+
+    "integer_addAssign",
+    "integer_subtractAssign",
+    "integer_multiplyAssign",
+    "integer_divideAssign",
+    "integer_moduloAssign",
+    "integer_andAssign",
+    "integer_orAssign",
+    "integer_exclusiveOrAssign",
+
+    "unary_ops_increment",
+    "unary_ops_decrement",
+    "unary_ops_full",
+
+    "integer_mul24",
+    "integer_mad24",
+
+    "long_math",
+    "long_logic",
+    "long_shift",
+    "long_compare",
+
+    "ulong_math",
+    "ulong_logic",
+    "ulong_shift",
+    "ulong_compare",
+
+    "int_math",
+    "int_logic",
+    "int_shift",
+    "int_compare",
+
+    "uint_math",
+    "uint_logic",
+    "uint_shift",
+    "uint_compare",
+
+    "short_math",
+    "short_logic",
+    "short_shift",
+    "short_compare",
+
+    "ushort_math",
+    "ushort_logic",
+    "ushort_shift",
+    "ushort_compare",
+
+    "char_math",
+    "char_logic",
+    "char_shift",
+    "char_compare",
+
+    "uchar_math",
+    "uchar_logic",
+    "uchar_shift",
+    "uchar_compare",
+
+    "popcount",
+
+    // Quick
+    "quick_long_math",
+    "quick_long_logic",
+    "quick_long_shift",
+    "quick_long_compare",
+
+    "quick_ulong_math",
+    "quick_ulong_logic",
+    "quick_ulong_shift",
+    "quick_ulong_compare",
+
+    "quick_int_math",
+    "quick_int_logic",
+    "quick_int_shift",
+    "quick_int_compare",
+
+    "quick_uint_math",
+    "quick_uint_logic",
+    "quick_uint_shift",
+    "quick_uint_compare",
+
+    "quick_short_math",
+    "quick_short_logic",
+    "quick_short_shift",
+    "quick_short_compare",
+
+    "quick_ushort_math",
+    "quick_ushort_logic",
+    "quick_ushort_shift",
+    "quick_ushort_compare",
+
+    "quick_char_math",
+    "quick_char_logic",
+    "quick_char_shift",
+    "quick_char_compare",
+
+    "quick_uchar_math",
+    "quick_uchar_logic",
+    "quick_uchar_shift",
+    "quick_uchar_compare",
+
+    "vector_scalar",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
+{
+    static const cl_long sUniqueValues[] = { 0x3333333333333333LL, 0x5555555555555555LL, 0x9999999999999999LL, 0xaaaaaaaaaaaaaaaaLL, 0xccccccccccccccccLL,
+        0x3030303030303030LL, 0x5050505050505050LL, 0x9090909090909090LL,  0xa0a0a0a0a0a0a0a0LL, 0xc0c0c0c0c0c0c0c0LL, 0xf0f0f0f0f0f0f0f0LL,
+        0x0303030303030303LL, 0x0505050505050505LL, 0x0909090909090909LL,  0x0a0a0a0a0a0a0a0aLL, 0x0c0c0c0c0c0c0c0cLL, 0x0f0f0f0f0f0f0f0fLL,
+        0x3300330033003300LL, 0x5500550055005500LL, 0x9900990099009900LL,  0xaa00aa00aa00aa00LL, 0xcc00cc00cc00cc00LL, 0xff00ff00ff00ff00LL,
+        0x0033003300330033LL, 0x0055005500550055LL, 0x0099009900990099LL,  0x00aa00aa00aa00aaLL, 0x00cc00cc00cc00ccLL, 0x00ff00ff00ff00ffLL,
+        0x3333333300000000LL, 0x5555555500000000LL, 0x9999999900000000LL,  0xaaaaaaaa00000000LL, 0xcccccccc00000000LL, 0xffffffff00000000LL,
+        0x0000000033333333LL, 0x0000000055555555LL, 0x0000000099999999LL,  0x00000000aaaaaaaaLL, 0x00000000ccccccccLL, 0x00000000ffffffffLL,
+        0x3333000000003333LL, 0x5555000000005555LL, 0x9999000000009999LL,  0xaaaa00000000aaaaLL, 0xcccc00000000ccccLL, 0xffff00000000ffffLL};
+    static cl_long sSpecialValues[ 128 + 128 + 128 + ( sizeof( sUniqueValues ) / sizeof( sUniqueValues[ 0 ] ) ) ] = { 0 };
+
+    if( sSpecialValues[ 0 ] == 0 )
+    {
+        // Init the power-of-two special values
+        for( size_t i = 0; i < 64; i++ )
+        {
+            sSpecialValues[ i ] = 1LL << i;
+            sSpecialValues[ i + 64 ] = -1LL << i;
+            sSpecialValues[ i + 128 ] = sSpecialValues[ i ] - 1;
+            sSpecialValues[ i + 128 + 64 ] = sSpecialValues[ i ] - 1;
+            sSpecialValues[ i + 256 ] = sSpecialValues[ i ] + 1;
+            sSpecialValues[ i + 256 + 64 ] = sSpecialValues[ i ] + 1;
+        }
+        memcpy( &sSpecialValues[ 128 + 128 + 128 ], sUniqueValues, sizeof( sUniqueValues ) );
+    }
+
+    size_t i, aIdx = 0, bIdx = 0;
+    size_t numSpecials = sizeof( sSpecialValues ) / sizeof( sSpecialValues[ 0 ] );
+
+    for( i = 0; i < numElements; i++ )
+    {
+        outBufferA[ i ] = sSpecialValues[ aIdx ];
+        outBufferB[ i ] = sSpecialValues[ bIdx ];
+        bIdx++;
+        if( bIdx == numSpecials )
+        {
+            bIdx = 0;
+            aIdx++;
+            if( aIdx == numSpecials )
+                break;
+        }
+    }
+    if( i < numElements )
+    {
+        // Fill remainder with random values
+        for( ; i < numElements; i++ )
+        {
+            int a = (int)genrand_int32(d);
+            int b = (int)genrand_int32(d);
+            outBufferA[ i ] = ((cl_long)a <<33 | (cl_long)b) ^ ((cl_long)b << 16);
+
+            a = (int)genrand_int32(d);
+            b = (int)genrand_int32(d);
+            outBufferB[ i ] = ((cl_long)a <<33 | (cl_long)b) ^ ((cl_long)b << 16);
+        }
+    }
+    else if( aIdx < numSpecials )
+    {
+        log_info( "WARNING: Not enough space to fill all special values for long test! (need %d additional elements)\n", (int)( ( numSpecials - aIdx ) * numSpecials ) );
+    }
+}
+
+
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false /* image support required */, false /* force no context creation */, 0 );
+}
+
+
+
+
diff --git a/test_conformance/integer_ops/procs.h b/test_conformance/integer_ops/procs.h
new file mode 100644
index 00000000..90255db7
--- /dev/null
+++ b/test_conformance/integer_ops/procs.h
@@ -0,0 +1,143 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/mt19937.h"
+
+
+// The number of errors to print out for each test
+#define MAX_ERRORS_TO_PRINT 10
+
+extern const size_t vector_aligns[];
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+extern void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d );
+
+
+extern int test_popcount(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_ctz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_mad_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_upsample(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_integer_addAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_subtractAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_multiplyAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_divideAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_moduloAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_andAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_orAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_integer_exclusiveOrAssign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int      test_abs(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_absdiff(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_add_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int      test_sub_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_intmul24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_intmad24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+extern int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+extern int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_unary_ops_full(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_unary_ops_increment(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_vector_scalar_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
diff --git a/test_conformance/integer_ops/testBase.h b/test_conformance/integer_ops/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/integer_ops/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/integer_ops/test_abs.c b/test_conformance/integer_ops/test_abs.c
new file mode 100644
index 00000000..e8479c53
--- /dev/null
+++ b/test_conformance/integer_ops/test_abs.c
@@ -0,0 +1,335 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+static int verify_abs_char( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_char *inA = (const cl_char*) p;
+    const cl_uchar *outptr = (const cl_uchar*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uchar r = inA[i];
+        if( inA[i] < 0 )
+            r = -inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+
+static int verify_abs_short( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_short *inA = (const cl_short*) p;
+    const cl_ushort *outptr = (const cl_ushort*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ushort r = inA[i];
+        if( inA[i] < 0 )
+            r = -inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_abs_int( const void *p, const void *q, size_t n, const char *sizeName , size_t vecSize)
+{
+    const cl_int *inA = (const cl_int*) p;
+    const cl_uint *outptr = (const cl_uint*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uint r = inA[i];
+        if( inA[i] < 0 )
+            r = -inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (int%s) 0x%2.2x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_abs_long( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_long *inA = (const cl_long*) p;
+    const cl_ulong *outptr = (const cl_ulong*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ulong r = inA[i];
+        if( inA[i] < 0 )
+            r = -inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+
+
+static int verify_abs_uchar( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_uchar *inA = (const cl_uchar*) p;
+    const cl_uchar *outptr = (const cl_uchar*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uchar r = inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+
+static int verify_abs_ushort( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_ushort *inA = (const cl_ushort*) p;
+    const cl_ushort *outptr = (const cl_ushort*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ushort r = inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_abs_uint( const void *p, const void *q, size_t n, const char *sizeName , size_t vecSize)
+{
+    const cl_uint *inA = (const cl_uint*) p;
+    const cl_uint *outptr = (const cl_uint*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uint r = inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (int%s) 0x%2.2x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_abs_ulong( const void *p, const void *q, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_ulong *inA = (const cl_ulong*) p;
+    const cl_ulong *outptr = (const cl_ulong*) q;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ulong r = inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for abs( (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i],r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+
+typedef int (*verifyFunc)( const void *, const void *, size_t n, const char *sizeName, size_t vecSize );
+static const verifyFunc verify[] = {
+    verify_abs_char, verify_abs_short, verify_abs_int, verify_abs_long,
+    verify_abs_uchar, verify_abs_ushort, verify_abs_uint, verify_abs_ulong
+};
+
+static const char *test_str_names[] = { "char", "short", "int", "long" ,
+    "uchar", "ushort", "uint", "ulong"};
+static const char *test_ustr_names[] = { "uchar", "ushort", "uint", "ulong" ,
+    "uchar", "ushort", "uint", "ulong"};
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
+static const char *vector_size_names_io_types[] = { "", "2", "", "4", "8", "16" };
+static const size_t  kSizes[9] = { 1, 2, 4, 8, 1, 2, 4, 8 };
+
+static const char * source_loads[] = {
+    "srcA[tid]",
+    "vload3(tid, srcA)"
+};
+
+static const char * dest_stores[] = {
+    "    dst[tid] = tmp;\n",
+    "    vstore3(tmp, tid, dst);\n"
+};
+
+int test_abs(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_int *input_ptr, *output_ptr, *p;
+    int err;
+    int i;
+    cl_uint vectorSizeIdx;
+    cl_uint type;
+    MTdata d;
+    int fail_count = 0;
+
+    size_t length = sizeof(cl_int) * 4 * n_elems;
+
+    input_ptr   = (cl_int*)malloc(length);
+    output_ptr  = (cl_int*)malloc(length);
+
+    p = input_ptr;
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<n_elems * 4; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+        //embedded devices don't support long/ulong so skip over
+        if (! gHasLong && strstr(test_str_names[type],"long"))
+        {
+           log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
+           continue;
+        }
+
+        verifyFunc f = verify[ type ];
+
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[2];
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+
+
+        for( vectorSizeIdx = 0; vectorSizeIdx < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSizeIdx++ )
+        {
+            cl_program program = NULL;
+            cl_kernel kernel = NULL;
+
+            const char *source[] = {
+                "__kernel void test_abs_",
+                test_str_names[type],
+                vector_size_names[vectorSizeIdx],
+                "(__global ", test_str_names[type],
+                vector_size_names_io_types[vectorSizeIdx],
+                " *srcA, __global ", test_ustr_names[type],
+                vector_size_names_io_types[vectorSizeIdx],
+                " *dst)\n"
+                "{\n"
+                "    int  tid = get_global_id(0);\n"
+                "\n"
+                "    ", test_ustr_names[type], vector_size_names[vectorSizeIdx],
+                " tmp = abs(", source_loads[!!(vector_sizes[vectorSizeIdx]==3)], ");\n",
+                dest_stores[!!(vector_sizes[vectorSizeIdx]==3)],
+                "}\n"
+            };
+
+            char kernelName[128];
+            snprintf( kernelName, sizeof( kernelName ), "test_abs_%s%s", test_str_names[type], vector_size_names[vectorSizeIdx] );
+            err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+            if (err)
+                return -1;
+
+            err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+
+            //Wipe the output buffer clean
+            uint32_t pattern = 0xdeadbeef;
+            memset_pattern4( output_ptr, &pattern, length );
+            err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueWriteBuffer failed\n");
+                return -1;
+            }
+
+            size_t size = elementCount / ((vector_sizes[vectorSizeIdx]));
+            err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueReadBuffer failed\n");
+                return -1;
+            }
+
+            char *inP = (char *)input_ptr;
+            char *outP = (char *)output_ptr;
+
+            for( size_t e = 0; e < size; e++ )
+            {
+                if( f( inP, outP, (vector_sizes[vectorSizeIdx]), vector_size_names[vectorSizeIdx], vector_sizes[vectorSizeIdx] ) ) {
+                    ++fail_count; break; // return -1;
+                }
+                inP += kSizes[type] * (vector_sizes[vectorSizeIdx] );
+                outP += kSizes[type] * (vector_sizes[vectorSizeIdx]);
+            }
+
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
+            log_info( "." );
+            fflush( stdout );
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        log_info( "done\n" );
+    }
+
+    if(fail_count) {
+        log_info("Failed on %d types\n", fail_count);
+        return -1;
+    }
+    log_info("ABS test passed\n");
+
+    free(input_ptr);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/test_absdiff.c b/test_conformance/integer_ops/test_absdiff.c
new file mode 100644
index 00000000..44c39db1
--- /dev/null
+++ b/test_conformance/integer_ops/test_absdiff.c
@@ -0,0 +1,374 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int verify_absdiff_char( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_char *inA = (const cl_char *)p;
+    const cl_char *inB = (const cl_char *)q;
+    const cl_uchar *outptr = (const cl_uchar *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uchar r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for absdiff( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_absdiff_uchar( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_uchar *inA = (const cl_uchar *)p;
+    const cl_uchar *inB = (const cl_uchar *)q;
+    const cl_uchar *outptr = (const cl_uchar *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uchar r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for absdiff( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_absdiff_short( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_short *inA = (const cl_short *)p;
+    const cl_short *inB = (const cl_short *)q;
+    const cl_ushort *outptr = (const cl_ushort *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ushort r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for absdiff( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_absdiff_ushort( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_ushort *inA = (const cl_ushort *)p;
+    const cl_ushort *inB = (const cl_ushort *)q;
+    const cl_ushort *outptr = (const cl_ushort *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ushort r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for absdiff( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_absdiff_int( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_int *inA = (const cl_int *)p;
+    const cl_int *inB = (const cl_int *)q;
+    const cl_uint *outptr = (const cl_uint *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uint r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        {
+            log_info( "%ld) Failure for absdiff( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] );
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int verify_absdiff_uint( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_uint *inA = (const cl_uint *)p;
+    const cl_uint *inB = (const cl_uint *)q;
+    const cl_uint *outptr = (const cl_uint *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uint r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for absdiff( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_absdiff_long( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_long *inA = (const cl_long *)p;
+    const cl_long *inB = (const cl_long *)q;
+    const cl_ulong *outptr = (const cl_ulong *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ulong r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for absdiff( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_absdiff_ulong( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
+{
+    const cl_ulong *inA = (const cl_ulong *)p;
+    const cl_ulong *inB = (const cl_ulong *)q;
+    const cl_ulong *outptr = (const cl_ulong *)r;
+    size_t i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ulong r = inA[i] - inB[i];
+        if( inB[i] > inA[i] )
+            r = inB[i] - inA[i];
+        if( r != outptr[i] )
+        { log_info( "%ld) Failure for absdiff( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+typedef int (*verifyFunc)( const void *, const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
+static const verifyFunc verify[] = {   verify_absdiff_char, verify_absdiff_uchar,
+    verify_absdiff_short, verify_absdiff_ushort,
+    verify_absdiff_int, verify_absdiff_uint,
+    verify_absdiff_long, verify_absdiff_ulong };
+
+//FIXME:  enable long and ulong when GPU path is working
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
+
+//FIXME:  enable "16" when support for > 64 byte vectors go into LLVM
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
+static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
+static const size_t  kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
+
+static void printSrc(const char *src[], int nSrcStrings) {
+    int i;
+    for(i = 0; i < nSrcStrings; ++i) {
+        log_info("%s", src[i]);
+    }
+}
+
+int test_absdiff(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_int *input_ptr[2], *output_ptr, *p;
+    int err;
+    int i;
+    cl_uint vectorSize;
+    cl_uint type;
+    MTdata d;
+    int fail_count = 0;
+
+    size_t length = sizeof(cl_int) * 4 * n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<4 * n_elems; i++)
+        p[i] = genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<4 * n_elems; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d);  d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+        //embedded devices don't support long/ulong so skip over
+        if (! gHasLong && strstr(test_str_names[type],"long"))
+        {
+           log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
+           continue;
+        }
+
+        verifyFunc f = verify[ type ];
+        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[3];
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[2])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+        err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+        for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
+        {
+            cl_program program = NULL;
+            cl_kernel kernel = NULL;
+
+            const char *source[] = {
+                "__kernel void test_absdiff_", test_str_names[type], vector_size_names[vectorSize],
+                "(__global ", test_str_names[type], vector_param_size_names[vectorSize],
+                " *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
+                " *srcB, __global u", test_str_names[type & -2], vector_param_size_names[vectorSize],
+                " *dst)\n"
+                "{\n"
+                "    int  tid = get_global_id(0);\n"
+                "\n"
+                "    ", test_str_names[type], vector_size_names[vectorSize], " sA, sB;\n",
+                "    sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
+                "    sB = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcB )" : "srcB[tid]", ";\n",
+                "    u", test_str_names[type & -2], vector_size_names[vectorSize], " dstVal = abs_diff(sA, sB);\n"
+                "     ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
+                "}\n" };
+
+
+            char kernelName[128];
+            snprintf( kernelName, sizeof( kernelName ), "test_absdiff_%s%s", test_str_names[type], vector_size_names[vectorSize] );
+
+            err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+
+            if (err) {
+                return -1;
+            }
+
+#if 0
+            log_info("About to run\n");
+            log_info("=====\n");
+            printSrc(source, sizeof(source)/sizeof(source[0]));
+            log_info("=====\n");
+#endif
+
+            err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+            err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+
+            //Wipe the output buffer clean
+            uint32_t pattern = 0xdeadbeef;
+            memset_pattern4( output_ptr, &pattern, length );
+            err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueWriteBuffer failed\n");
+                return -1;
+            }
+
+            size_t size = elementCount / (vector_sizes[vectorSize]);
+            err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueReadBuffer failed\n");
+                return -1;
+            }
+
+            char *inP = (char *)input_ptr[0];
+            char *inP2 = (char *)input_ptr[1];
+            char *outP = (char *)output_ptr;
+
+            for( size_t e = 0; e < size; e++ )
+            {
+                if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
+                    printSrc(source, sizeof(source)/sizeof(source[0]));
+                    ++fail_count; break; // return -1;
+                }
+                inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
+                inP2 += kSizes[type] * ( (vector_sizes[vectorSize]) );
+                outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
+            }
+
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
+            log_info( "." );
+            fflush( stdout );
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        clReleaseMemObject( streams[2] );
+        log_info( "done\n" );
+    }
+
+
+    if(fail_count) {
+        log_info("Failed on %d types\n", fail_count);
+        return -1;
+    }
+    log_info("ABS_DIFF test passed\n");
+
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/test_add_sat.c b/test_conformance/integer_ops/test_add_sat.c
new file mode 100644
index 00000000..702b735b
--- /dev/null
+++ b/test_conformance/integer_ops/test_add_sat.c
@@ -0,0 +1,378 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define UCHAR_MIN   0
+#define USHRT_MIN   0
+#define UINT_MIN    0
+
+#ifndef MAX
+#define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
+#endif
+#ifndef MIN
+#define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
+#endif
+
+static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
+        r = MAX( r, CL_CHAR_MIN );
+        r = MIN( r, CL_CHAR_MAX );
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (int) inA[i] + (int) inB[i];
+        r = MAX( r, 0 );
+        r = MIN( r, CL_UCHAR_MAX );
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
+        r = MAX( r, CL_SHRT_MIN );
+        r = MIN( r, CL_SHRT_MAX );
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
+        r = MAX( r, 0 );
+        r = MIN( r, CL_USHRT_MAX );
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_addsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) ((cl_uint) inA[i] + (cl_uint)inB[i]);
+        if( inB[i] > 0 )
+        {
+            if( r < inA[i] )
+                r = CL_INT_MAX;
+        }
+        else
+        {
+            if( r > inA[i] )
+                r = CL_INT_MIN;
+        }
+
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for add_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_addsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uint r = inA[i] + inB[i];
+        if( r < inA[i] )
+            r = CL_UINT_MAX;
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for add_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_addsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_long r = (cl_long)((cl_ulong)inA[i] + (cl_ulong)inB[i]);
+        if( inB[i] > 0 )
+        {
+            if( r < inA[i] )
+                r = CL_LONG_MAX;
+        }
+        else
+        {
+            if( r > inA[i] )
+                r = CL_LONG_MIN;
+        }
+        if( r != outptr[i] )
+        { log_info( "%d) Failure for add_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_addsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ulong r = inA[i] + inB[i];
+        if( r < inA[i] )
+            r = CL_ULONG_MAX;
+        if( r != outptr[i] )
+        { log_info( "%d) Failure for add_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
+static const verifyFunc verify[] = {   (verifyFunc) verify_addsat_char, (verifyFunc) verify_addsat_uchar,
+    (verifyFunc) verify_addsat_short, (verifyFunc) verify_addsat_ushort,
+    (verifyFunc) verify_addsat_int, (verifyFunc) verify_addsat_uint,
+    (verifyFunc) verify_addsat_long, (verifyFunc) verify_addsat_ulong };
+//FIXME:  enable long and ulong when GPU path is working
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
+
+//FIXME:  enable "16" when support for > 64 byte vectors go into LLVM
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
+static const size_t  kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
+
+int test_add_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_int *input_ptr[2], *output_ptr, *p;
+    int err;
+    int i;
+    cl_uint vectorSize;
+    cl_uint type;
+    MTdata d;
+    int fail_count = 0;
+
+    size_t length = sizeof(cl_int) * 4 * n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<4 * n_elems; i++)
+        p[i] = genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<4 * n_elems; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+
+        //embedded devices don't support long/ulong so skip over
+        if (! gHasLong && strstr(test_str_names[type],"long"))
+        {
+            log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
+            continue;
+        }
+
+        verifyFunc f = verify[ type ];
+        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[3];
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[2])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+        err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+        for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
+        {
+            cl_program program = NULL;
+            cl_kernel kernel = NULL;
+
+            const char *source[] = {
+                "__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
+                "(__global ", test_str_names[type], vector_size_names[vectorSize],
+                " *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
+                " *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
+                " *dst)\n"
+                "{\n"
+                "    int  tid = get_global_id(0);\n"
+                "\n"
+                "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(srcA[tid], srcB[tid]);\n"
+                "    dst[tid] = tmp;\n"
+                "}\n" };
+
+
+            const char *sourceV3[] = {
+                "__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
+                "(__global ", test_str_names[type],
+                " *srcA, __global ", test_str_names[type],
+                " *srcB, __global ", test_str_names[type],
+                " *dst)\n"
+                "{\n"
+                "    int  tid = get_global_id(0);\n"
+                "\n"
+                "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
+                "    vstore3(tmp, tid, dst);\n"
+                "}\n" };
+
+            char kernelName[128];
+            snprintf( kernelName, sizeof( kernelName ), "test_add_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
+            if(vector_sizes[vectorSize] != 3)
+            {
+                err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+            }
+            else
+            {
+                err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
+            }
+            if (err)
+                return -1;
+
+            err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+            err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+
+            //Wipe the output buffer clean
+            uint32_t pattern = 0xdeadbeef;
+            memset_pattern4( output_ptr, &pattern, length );
+            err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clWriteArray failed\n");
+                return -1;
+            }
+
+            size_t size = elementCount / (vector_sizes[vectorSize]);
+            err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clExecuteKernel failed\n");
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clReadArray failed\n");
+                return -1;
+            }
+
+            char *inP = (char *)input_ptr[0];
+            char *inP2 = (char *)input_ptr[1];
+            char *outP = (char *)output_ptr;
+
+            for( size_t e = 0; e < size; e++ )
+            {
+                if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
+                    ++fail_count; break; // return -1;
+                }
+                inP += kSizes[type] * vector_sizes[vectorSize];
+                inP2 += kSizes[type] * vector_sizes[vectorSize];
+                outP += kSizes[type] * vector_sizes[vectorSize];
+            }
+
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
+            log_info( "." );
+            fflush( stdout );
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        clReleaseMemObject( streams[2] );
+        log_info( "done\n" );
+    }
+    if(fail_count) {
+        log_info("Failed on %d types\n", fail_count);
+        return -1;
+    }
+
+    log_info("ADD_SAT test passed\n");
+
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/test_int_basic_ops.c b/test_conformance/integer_ops/test_int_basic_ops.c
new file mode 100644
index 00000000..64040251
--- /dev/null
+++ b/test_conformance/integer_ops/test_int_basic_ops.c
@@ -0,0 +1,1551 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/ThreadPool.h"
+
+#define NUM_TESTS 23
+
+#define  LONG_MATH_SHIFT_SIZE 26
+#define QUICK_MATH_SHIFT_SIZE 16
+
+static const char *kernel_code =
+"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] %s srcB[tid];\n"
+"}\n";
+
+static const char *kernel_code_V3 =
+"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3( vload3( tid, srcA ) %s vload3( tid, srcB), tid, dst );\n"
+"}\n";
+
+static const char *kernel_code_V3_scalar_vector =
+"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3( srcA[tid] %s vload3( tid, srcB), tid, dst );\n"
+"}\n";
+
+static const char *kernel_code_V3_vector_scalar =
+"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3( vload3( tid, srcA ) %s srcB[tid], tid, dst );\n"
+"}\n";
+
+
+// Separate kernel here because it does not fit the pattern
+static const char *not_kernel_code =
+"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = %ssrcA[tid];\n"
+"}\n";
+
+static const char *not_kernel_code_V3 =
+"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3( %s vload3( tid, srcA ), tid, dst );\n"
+"}\n";
+
+static const char *kernel_code_scalar_shift =
+"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = srcA[tid] %s srcB[tid]%s;\n"
+"}\n";
+
+static const char *kernel_code_scalar_shift_V3 =
+"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3( vload3( tid, srcA) %s vload3( tid, srcB )%s, tid, dst );\n"
+"}\n";
+
+static const char *kernel_code_question_colon =
+"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (srcA[tid]%s < srcB[tid]%s) ? srcA[tid] : srcB[tid];\n"
+"}\n";
+
+static const char *kernel_code_question_colon_V3 =
+"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3( (vload3( tid, srcA)%s < vload3(tid, srcB)%s) ? vload3( tid, srcA) : vload3( tid, srcB), tid, dst );\n"
+"}\n";
+
+
+
+
+// External verification and data generation functions
+extern const char *tests[];
+extern const char *test_names[];
+extern int verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n);
+extern void init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d) ;
+extern int verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n);
+extern void init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d) ;
+extern int verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n);
+extern void init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d) ;
+extern int verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n);
+extern void init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d) ;
+extern int verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n);
+extern void init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d) ;
+extern int verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n);
+extern void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) ;
+extern int verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n);
+extern void init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d) ;
+extern int verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n);
+extern void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) ;
+
+// Supported type list
+const ExplicitType types[] = {
+    kChar,
+    kUChar,
+    kShort,
+    kUShort,
+    kInt,
+    kUInt,
+    kLong,
+    kULong,
+};
+
+enum TestStyle
+{
+    kDontCare=0,
+    kBothVectors,
+    kInputAScalar,
+    kInputBScalar,
+    kVectorScalarScalar,    // for the ?: operator only; indicates vector ? scalar : scalar.
+    kInputCAlsoScalar = 0x80    // Or'ed flag to indicate that the selector for the ?: operator is also scalar
+};
+
+typedef struct _perThreadData
+{
+    cl_mem            m_streams[3];
+    cl_int            *m_input_ptr[2], *m_output_ptr;
+    size_t                      m_type_size;
+    cl_program                m_program[NUM_TESTS];
+    cl_kernel                m_kernel[NUM_TESTS];
+} perThreadData;
+
+
+perThreadData * perThreadDataNew()
+{
+    perThreadData * pThis = (perThreadData *)malloc(sizeof(perThreadData));
+
+
+    memset(pThis->m_program, 0, sizeof(cl_program)*NUM_TESTS);
+    memset(pThis->m_kernel, 0, sizeof(cl_kernel)*NUM_TESTS);
+
+    pThis->m_input_ptr[0] = pThis->m_input_ptr[1] = NULL;
+    pThis->m_output_ptr = NULL;
+
+    return pThis;
+}
+
+
+void perThreadDataDestroy(perThreadData * pThis)
+{
+    int                i;
+    // cleanup
+    clReleaseMemObject(pThis->m_streams[0]);
+    clReleaseMemObject(pThis->m_streams[1]);
+    clReleaseMemObject(pThis->m_streams[2]);
+    for (i=0; i<NUM_TESTS; i++)
+    {
+        if (pThis->m_kernel[i] != NULL) clReleaseKernel(pThis->m_kernel[i]);
+        if (pThis->m_program[i] != NULL) clReleaseProgram(pThis->m_program[i]);
+    }
+    free(pThis->m_input_ptr[0]);
+    free(pThis->m_input_ptr[1]);
+    free(pThis->m_output_ptr);
+
+    free(pThis);
+}
+
+
+cl_int perThreadDataInit(perThreadData * pThis, ExplicitType type,
+                         int num_elements, int vectorSize,
+                         int inputAVecSize, int inputBVecSize,
+                         cl_context context, int start_test_ID,
+                         int end_test_ID, int testID)
+{
+    int i;
+    const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+
+    const char *type_name = get_explicit_type_name(type);
+    pThis->m_type_size = get_explicit_type_size(type);
+    int err;
+    // Used for the && and || tests where the vector case returns a signed value
+    const char *signed_type_name;
+    switch (type) {
+        case kChar:
+        case kUChar:
+            signed_type_name = get_explicit_type_name(kChar);
+            break;
+        case kShort:
+        case kUShort:
+            signed_type_name = get_explicit_type_name(kShort);
+            break;
+        case kInt:
+        case kUInt:
+            signed_type_name = get_explicit_type_name(kInt);
+            break;
+        case kLong:
+        case kULong:
+            signed_type_name = get_explicit_type_name(kLong);
+            break;
+        default:
+            log_error("Invalid type.\n");
+            return -1;
+            break;
+    }
+
+    pThis->m_input_ptr[0] =
+    (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
+    pThis->m_input_ptr[1] =
+    (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
+    pThis->m_output_ptr =
+    (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
+    pThis->m_streams[0] =
+    clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
+
+    test_error(err, "clCreateBuffer failed");
+
+    pThis->m_streams[1] =
+    clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputBVecSize, NULL, &err );
+
+    test_error(err, "clCreateBuffer failed");
+
+    pThis->m_streams[2] =
+    clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * vectorSize, NULL, &err );
+
+    test_error(err, "clCreateBuffer failed");
+
+    const char *vectorString = sizeNames[ vectorSize ];
+    const char *inputAVectorString = sizeNames[ inputAVecSize ];
+    const char *inputBVectorString = sizeNames[ inputBVecSize ];
+
+    if (testID == -1)
+    {
+        log_info("\tTesting %s%s (%d bytes)...\n", type_name, vectorString, (int)(pThis->m_type_size*vectorSize));
+    }
+
+    char programString[4096];
+    const char *ptr;
+
+
+    const char * kernel_code_base = ( vectorSize != 3 ) ? kernel_code : ( inputAVecSize == 1 ) ? kernel_code_V3_scalar_vector : ( inputBVecSize == 1 ) ? kernel_code_V3_vector_scalar : kernel_code_V3;
+
+    for (i=start_test_ID; i<end_test_ID; i++) {
+        switch (i) {
+            case 10:
+            case 11:
+                sprintf(programString, vectorSize == 3 ? kernel_code_scalar_shift_V3 : kernel_code_scalar_shift, type_name, inputAVectorString, type_name, inputBVectorString,
+                        type_name, vectorString, tests[i], ((vectorSize == 1) ? "":".s0"));
+                break;
+            case 12:
+                sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
+                        type_name, vectorString, tests[i]);
+                break;
+            case 13:
+                sprintf(programString, vectorSize == 3 ? kernel_code_question_colon_V3 : kernel_code_question_colon,
+                        type_name, inputAVectorString, type_name, inputBVectorString,
+                        type_name, vectorString, ((vectorSize == 1) ? "":".s0"), ((vectorSize == 1) ? "":".s0")) ;
+                break;
+            case 14:
+            case 15:
+            case 16:
+            case 17:
+            case 18:
+            case 19:
+            case 20:
+            case 21:
+                // Need an unsigned result here for vector sizes > 1
+                sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
+                        ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
+                break;
+            case 22:
+                // Need an unsigned result here for vector sizes > 1
+                sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
+                        ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
+                break;
+            default:
+                sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
+                        type_name, vectorString, tests[i]);
+                break;
+        }
+
+        //printf("kernel: %s\n", programString);
+        ptr = programString;
+        err = create_single_kernel_helper( context,
+                                          &(pThis->m_program[ i ]),
+                                          &(pThis->m_kernel[ i ]), 1,
+                                          &ptr, "test" );
+        test_error( err, "Unable to create test kernel" );
+        err = clSetKernelArg(pThis->m_kernel[i], 0,
+                             sizeof pThis->m_streams[0],
+                             &(pThis->m_streams[0]) );
+        err |= clSetKernelArg(pThis->m_kernel[i], 1,
+                              sizeof pThis->m_streams[1],
+                              &(pThis->m_streams[1]) );
+        err |= clSetKernelArg(pThis->m_kernel[i], 2,
+                              sizeof pThis->m_streams[2],
+                              &(pThis->m_streams[2]) );
+        test_error(err, "clSetKernelArgs failed");
+    }
+
+    return CL_SUCCESS;
+}
+
+typedef struct _globalThreadData
+{
+    cl_device_id     m_deviceID;
+    cl_context       m_context;
+    // cl_command_queue m_queue;
+    int              m_num_elements;
+    int              m_threadcount;
+    int              m_vectorSize;
+    int              m_num_runs_shift;
+    TestStyle        m_style;
+    ExplicitType     m_type;
+    MTdata *         m_pRandData;
+    uint64_t         m_offset;
+    int              m_testID;
+    perThreadData  **m_arrPerThreadData;
+} globalThreadData;
+
+
+
+globalThreadData * globalThreadDataNew(cl_device_id deviceID, cl_context context,
+                                       cl_command_queue queue, int num_elements,
+                                       int vectorSize, TestStyle style, int num_runs_shift,
+                                       ExplicitType type, int testID,
+                                       int threadcount)
+{
+    int i;
+    globalThreadData * pThis = (globalThreadData *)malloc(sizeof(globalThreadData));
+    pThis->m_deviceID = deviceID;
+    pThis->m_context = context;
+    // pThis->m_queue = queue;
+    pThis->m_num_elements = num_elements;
+    pThis->m_num_runs_shift = num_runs_shift;
+    pThis->m_vectorSize = vectorSize;
+    pThis->m_style = style;
+    pThis->m_type = type;
+    pThis->m_offset = (uint64_t)0;
+    pThis->m_testID = testID;
+    pThis->m_arrPerThreadData = NULL;
+    pThis->m_threadcount = threadcount;
+
+    pThis->m_pRandData = (MTdata *)malloc(threadcount*sizeof(MTdata));
+    pThis->m_arrPerThreadData = (perThreadData **)
+    malloc(threadcount*sizeof(perThreadData *));
+    for(i=0; i < threadcount; ++i)
+    {
+        pThis->m_pRandData[i] = init_genrand(i+1);
+        pThis->m_arrPerThreadData[i] = NULL;
+    }
+
+    return pThis;
+}
+
+void globalThreadDataDestroy(globalThreadData * pThis)
+{
+    int i;
+
+    for(i=0; i < pThis->m_threadcount; ++i)
+    {
+        free_mtdata(pThis->m_pRandData[i]);
+        if(pThis->m_arrPerThreadData[i] != NULL)
+        {
+            perThreadDataDestroy(pThis->m_arrPerThreadData[i]);
+        }
+    }
+    free(pThis->m_arrPerThreadData);
+    free(pThis->m_pRandData);
+    free(pThis);
+}
+
+int
+test_integer_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID, MTdata randIn, uint64_t startIndx, uint64_t endIndx,
+                 perThreadData ** ppThreadData);
+
+
+cl_int test_integer_ops_do_thread( cl_uint job_id, cl_uint thread_id, void *userInfo )
+{
+    cl_int error; cl_int result;
+    globalThreadData * threadInfoGlobal = (globalThreadData *)userInfo;
+    cl_command_queue queue;
+
+#if THREAD_DEBUG
+    log_error("Thread %x (job %x) about to create command queue\n",
+              thread_id, job_id);
+#endif
+
+    queue =  clCreateCommandQueue (threadInfoGlobal->m_context,
+                                   threadInfoGlobal->m_deviceID,0,
+                                   &error);
+
+    if(error != CL_SUCCESS)
+    {
+        log_error("Thread %x (job %x) could not create command queue\n",
+                  thread_id, job_id);
+        return error; // should we clean up the queue too?
+    }
+
+#if THREAD_DEBUG
+    log_error("Thread %x (job %x) created command queue\n",
+              thread_id, job_id);
+#endif
+
+    result = test_integer_ops(  threadInfoGlobal->m_deviceID,
+                              threadInfoGlobal->m_context,
+                              queue,
+                              threadInfoGlobal->m_num_elements,
+                              threadInfoGlobal->m_vectorSize, threadInfoGlobal->m_style,
+                              threadInfoGlobal->m_num_runs_shift,
+                              threadInfoGlobal->m_type, threadInfoGlobal->m_testID,
+                              threadInfoGlobal->m_pRandData[thread_id],
+                              threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*job_id,
+                              threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*(job_id+1),
+                              &(threadInfoGlobal->m_arrPerThreadData[thread_id])
+                              );
+
+    if(result != 0)
+    {
+        log_error("Thread %x (job %x) failed test_integer_ops with result %x\n",
+                  thread_id, job_id, result);
+        // return error;
+    }
+
+
+    error = clReleaseCommandQueue(queue);
+    if(error != CL_SUCCESS)
+    {
+        log_error("Thread %x (job %x) could not release command queue\n",
+                  thread_id, job_id);
+        return error;
+    }
+    return result;
+}
+
+int
+test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID)
+{
+    globalThreadData * pThreadInfo = NULL;
+    cl_int result=0;
+    cl_uint threadcount = GetThreadCount();
+
+  // Check to see if we are using single threaded mode on other than a 1.0 device
+  if (getenv( "CL_TEST_SINGLE_THREADED" )) {
+
+    char device_version[1024] = { 0 };
+    result = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
+    if(result != CL_SUCCESS)
+    {
+      log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
+      return result;
+    }
+
+    if (strcmp("OpenCL 1.0 ",device_version)) {
+      log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
+    }
+  }
+
+    // This test will run threadcount threads concurrently; each thread will execute test_integer_ops()
+    // which will allocate 2 OpenCL buffers on the device; each buffer has size num_elements * type_size * vectorSize.
+    // We need to make sure that the total device memory allocated by all threads does not exceed the maximum
+    // memory on the device. If it does, we decrease num_elements until all threads combined will not
+    // over-subscribe device memory.
+    cl_ulong maxDeviceGlobalMem;
+    result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
+    if(result != CL_SUCCESS)
+    {
+        log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
+        return result;
+    }
+
+  if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
+    maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
+  }
+
+    // Let's not take all device memory - reduce by 75%
+    maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
+    // Now reduce num_elements so that the total device memory usage does not exceed 75% of global device memory.
+    size_t type_size = get_explicit_type_size(type);
+    while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize > maxDeviceGlobalMem)
+    {
+        num_elements >>= 1;
+    }
+
+    uint64_t startIndx = (uint64_t)0;
+    uint64_t endIndx = (1ULL<<num_runs_shift);
+    uint64_t jobcount = (endIndx-startIndx)/num_elements;
+
+    if(jobcount==0)
+    {
+        jobcount = 1;
+    }
+
+    pThreadInfo = globalThreadDataNew(deviceID, context, queue, num_elements,
+                                      vectorSize, style, num_runs_shift,
+                                      type, testID, threadcount);
+
+
+    pThreadInfo->m_offset = startIndx;
+
+#if THREAD_DEBUG
+    log_error("Launching %llx jobs\n",
+              jobcount);
+#endif
+
+    result = ThreadPool_Do(test_integer_ops_do_thread, (cl_uint)jobcount, (void *)pThreadInfo);
+
+    if(result != 0)
+    {
+        // cleanup ??
+        log_error("ThreadPool_Do return non-success value %d\n", result);
+
+    }
+    globalThreadDataDestroy(pThreadInfo);
+    return result;
+}
+
+
+
+int
+test_integer_ops(cl_device_id deviceID, cl_context context,
+                 cl_command_queue queue, int num_elements,
+                 int vectorSize, TestStyle style, int num_runs_shift,
+                 ExplicitType type, int testID, MTdata randDataIn,
+                 uint64_t startIndx, uint64_t endIndx,
+                 perThreadData ** ppThreadData)
+{
+    size_t    threads[1];
+    int                err;
+    int                i;
+    int inputAVecSize, inputBVecSize;
+
+
+
+    inputAVecSize = inputBVecSize = vectorSize;
+    if( style == kInputAScalar )
+        inputAVecSize = 1;
+    else if( style == kInputBScalar )
+        inputBVecSize = 1;
+
+    /*
+     if( inputAVecSize != inputBVecSize )
+     log_info("Testing \"%s\" on %s%d (%s-%s inputs) (range %llx - %llx of 0-%llx)\n",
+     test_names[testID],
+     get_explicit_type_name(type), vectorSize,
+     ( inputAVecSize == 1 ) ? "scalar" : "vector",
+     ( inputBVecSize == 1 ) ? "scalar" : "vector",
+     startIndx, endIndx, (1ULL<<num_runs_shift) );
+     else
+     log_info("Testing \"%s\" on %s%d (range %llx - %llx of 0-%llx)\n",
+     test_names[testID],
+     get_explicit_type_name(type), vectorSize,
+     startIndx, endIndx, (1ULL<<num_runs_shift));
+     */
+
+
+    // Figure out which sub-test to run, or all of them
+    int start_test_ID = 0;
+    int end_test_ID = NUM_TESTS;
+    if (testID != -1) {
+        start_test_ID = testID;
+        end_test_ID = testID+1;
+    }
+    if (testID > NUM_TESTS) {
+        log_error("Invalid test ID: %d\n", testID);
+        return -1;
+    }
+
+    if(*ppThreadData == NULL)
+    {
+        *ppThreadData = perThreadDataNew();
+        err = perThreadDataInit(*ppThreadData,
+                                type, num_elements, vectorSize,
+                                inputAVecSize, inputBVecSize,
+                                context, start_test_ID,
+                                end_test_ID, testID);
+        test_error(err, "failed to init per thread data\n");
+    }
+
+    perThreadData * pThreadData = *ppThreadData;
+
+
+
+    threads[0] = (size_t)num_elements;
+    int error_count = 0;
+    for (i=start_test_ID; i<end_test_ID; i++)
+    {
+        uint64_t    indx;
+
+
+        if(startIndx >= endIndx)
+        {
+            startIndx = (uint64_t)0;
+            endIndx = (1ULL<<num_runs_shift);
+        }
+        for (indx=startIndx; indx < endIndx; indx+=num_elements)
+        {
+
+            switch (type) {
+                case     kChar:
+                    init_char_data(indx, num_elements * vectorSize, (cl_char**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                case     kUChar:
+                    init_uchar_data(indx, num_elements * vectorSize, (cl_uchar**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                case     kShort:
+                    init_short_data(indx, num_elements * vectorSize, (cl_short**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                case     kUShort:
+                    init_ushort_data(indx, num_elements * vectorSize, (cl_ushort**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                case     kInt:
+                    init_int_data(indx, num_elements * vectorSize, (cl_int**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                case     kUInt:
+                    init_uint_data(indx, num_elements * vectorSize, (cl_uint**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                case     kLong:
+                    init_long_data(indx, num_elements * vectorSize, (cl_long**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                case     kULong:
+                    init_ulong_data(indx, num_elements * vectorSize, (cl_ulong**)(pThreadData->m_input_ptr), randDataIn);
+                    break;
+                default:
+                    err = 1;
+                    log_error("Invalid type.\n");
+                    break;
+            }
+
+
+            err = clEnqueueWriteBuffer(queue, pThreadData->m_streams[0], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputAVecSize, (void *)pThreadData->m_input_ptr[0], 0, NULL, NULL);
+            test_error(err, "clEnqueueWriteBuffer failed");
+            err = clEnqueueWriteBuffer( queue, pThreadData->m_streams[1], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputBVecSize, (void *)pThreadData->m_input_ptr[1], 0, NULL, NULL );
+            test_error(err, "clEnqueueWriteBuffer failed");
+
+            err = clEnqueueNDRangeKernel( queue, pThreadData->m_kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error(err, "clEnqueueNDRangeKernel failed");
+
+            err = clEnqueueReadBuffer( queue, pThreadData->m_streams[2], CL_TRUE, 0, pThreadData->m_type_size*num_elements * vectorSize, (void *)pThreadData->m_output_ptr, 0, NULL, NULL );
+            test_error(err, "clEnqueueReadBuffer failed");
+
+            // log_info("Performing verification\n");
+
+            // If one of the inputs are scalar, we need to extend the input values to vectors
+            // to accommodate the verify functions
+            if( vectorSize > 1 )
+            {
+                char * p = NULL;
+                if( style == kInputAScalar )
+                    p = (char *)pThreadData->m_input_ptr[ 0 ];
+                else if( style == kInputBScalar )
+                    p = (char *)pThreadData->m_input_ptr[ 1 ];
+                if( p != NULL )
+                {
+                    for( int element = num_elements - 1; element >= 0; element-- )
+                    {
+                        for( int vec = ( element == 0 ) ? 1 : 0; vec < vectorSize; vec++ )
+                            memcpy( p + ( element * vectorSize + vec ) * pThreadData->m_type_size, p + element * pThreadData->m_type_size, pThreadData->m_type_size );
+                    }
+                }
+            }
+
+            switch (type) {
+                case     kChar:
+                    err = verify_char(i, vectorSize, (cl_char*)pThreadData->m_input_ptr[0], (cl_char*)pThreadData->m_input_ptr[1], (cl_char*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                case     kUChar:
+                    err = verify_uchar(i, vectorSize, (cl_uchar*)pThreadData->m_input_ptr[0], (cl_uchar*)pThreadData->m_input_ptr[1], (cl_uchar*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                case     kShort:
+                    err = verify_short(i, vectorSize, (cl_short*)pThreadData->m_input_ptr[0], (cl_short*)pThreadData->m_input_ptr[1], (cl_short*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                case     kUShort:
+                    err = verify_ushort(i, vectorSize, (cl_ushort*)pThreadData->m_input_ptr[0], (cl_ushort*)pThreadData->m_input_ptr[1], (cl_ushort*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                case     kInt:
+                    err = verify_int(i, vectorSize, (cl_int*)pThreadData->m_input_ptr[0], (cl_int*)pThreadData->m_input_ptr[1], (cl_int*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                case     kUInt:
+                    err = verify_uint(i, vectorSize, (cl_uint*)pThreadData->m_input_ptr[0], (cl_uint*)pThreadData->m_input_ptr[1], (cl_uint*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                case     kLong:
+                    err = verify_long(i, vectorSize, (cl_long*)pThreadData->m_input_ptr[0], (cl_long*)pThreadData->m_input_ptr[1], (cl_long*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                case     kULong:
+                    err = verify_ulong(i, vectorSize, (cl_ulong*)pThreadData->m_input_ptr[0], (cl_ulong*)pThreadData->m_input_ptr[1], (cl_ulong*)pThreadData->m_output_ptr, num_elements * vectorSize);
+                    break;
+                default:
+                    err = 1;
+                    log_error("Invalid type.\n");
+                    break;
+            }
+
+            if (err) {
+#if 0
+                log_error( "* inASize: %d inBSize: %d numElem: %d\n", inputAVecSize, inputBVecSize, num_elements );
+                cl_char *inP = (cl_char *)pThreadData->m_input_ptr[0];
+                log_error( "from 18:\n" );
+                for( int q = 18; q < 64; q++ )
+                {
+                    log_error( "%02x ", inP[ q ] );
+                }
+                log_error( "\n" );
+                inP = (cl_char *)pThreadData->m_input_ptr[1];
+                for( int q = 18; q < 64; q++ )
+                {
+                    log_error( "%02x ", inP[ q ] );
+                }
+                log_error( "\n" );
+                inP = (cl_char *)pThreadData->m_output_ptr;
+                for( int q = 18; q < 64; q++ )
+                {
+                    log_error( "%02x ", inP[ q ] );
+                }
+                log_error( "\n" );
+                log_error( "from 36:\n" );
+                inP = (cl_char *)pThreadData->m_input_ptr[0];
+                for( int q = 36; q < 64; q++ )
+                {
+                    log_error( "%02x ", inP[ q ] );
+                }
+                log_error( "\n" );
+                inP = (cl_char *)pThreadData->m_input_ptr[1];
+                for( int q = 36; q < 64; q++ )
+                {
+                    log_error( "%02x ", inP[ q ] );
+                }
+                log_error( "\n" );
+                inP = (cl_char *)pThreadData->m_output_ptr;
+                for( int q = 36; q < 64; q++ )
+                {
+                    log_error( "%02x ", inP[ q ] );
+                }
+                log_error( "\n" );
+#endif
+                error_count++;
+                break;
+            }
+        }
+
+        /*
+
+         const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+
+         if (err) {
+         log_error("\t\t%s%s test %s failed (range %llx - %llx of 0-%llx)\n",
+         get_explicit_type_name(type), sizeNames[vectorSize],
+         test_names[i],
+         startIndx, endIndx,
+         (1ULL<<num_runs_shift));
+         } else {
+         log_info("\t\t%s%s test %s passed (range %llx - %llx of 0-%llx)\n",
+         get_explicit_type_name(type), sizeNames[vectorSize],
+         test_names[i],
+         startIndx, endIndx,
+         (1ULL<<num_runs_shift));
+         }
+         */
+    }
+
+
+
+    return error_count;
+}
+
+
+
+
+
+
+
+
+
+// Run all the vector sizes for a given test
+int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) {
+    int errors = 0;
+    errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/1, 1, kBothVectors, num, type, testID);
+    errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/2, 2, kBothVectors, num, type, testID);
+    errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/3, 3, kBothVectors, num, type, testID);
+    errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/4, 4, kBothVectors, num, type, testID);
+    errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/8, 8, kBothVectors, num, type, testID);
+    errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/16, 16, kBothVectors, num, type, testID);
+    return errors;
+}
+
+// Run multiple tests for a given type
+int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
+    int errors = 0;
+
+    if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) {
+      log_info("Detected CL_WIMPY_MODE env\n");
+      log_info("Skipping long test\n");
+      return 0;
+    }
+
+    int i;
+    for (i=0; i<total_tests; i++)
+    {
+        int localErrors;
+        log_info("Testing \"%s\" ", test_names[tests[i]]);  fflush( stdout );
+        localErrors = run_specific_test(deviceID, context, queue, num_elements, type, num, tests[i]);
+        if( localErrors )
+            log_info( "FAILED\n" );
+        else
+            log_info( "passed\n" );
+
+        errors += localErrors;
+    }
+
+    return errors;
+}
+
+// Run all the math tests for a given type
+int run_test_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
+    int tests[] = {0, 1, 2, 3, 4};
+    return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
+}
+
+// Run all the logic tests for a given type
+int run_test_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
+    int tests[] = {5, 6, 7, 12, 14, 15, 22};
+    return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
+}
+
+// Run all the shifting tests for a given type
+int run_test_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
+    int tests[] = {8, 9, 10, 11};
+    return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
+}
+
+// Run all the comparison tests for a given type
+int run_test_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
+    int tests[] = {13, 16, 17, 18, 19, 20, 21};
+    return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
+}
+
+// Run all tests for a given type
+int run_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
+    int errors = 0;
+    errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 1, kBothVectors, num, type, -1);
+    errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 2, kBothVectors, num, type, -1);
+    errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 3, kBothVectors, num, type, -1);
+    errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 4, kBothVectors, num, type, -1);
+    errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 8, kBothVectors, num, type, -1);
+    errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 16, kBothVectors, num, type, -1);
+    return errors;
+}
+
+
+// -----------------
+// Long tests
+// -----------------
+int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_math(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
+}
+int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_logic(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
+}
+int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_shift(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
+}
+int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_compare(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_math(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_logic(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_shift(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_compare(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+// -----------------
+// ULong tests
+// -----------------
+int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_math(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
+}
+int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_logic(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
+}
+int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_shift(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
+}
+int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_compare(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_math(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_logic(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_shift(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test_compare(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+// -----------------
+// Int tests
+// -----------------
+int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+// -----------------
+// UInt tests
+// -----------------
+int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+// -----------------
+// Short tests
+// -----------------
+int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+// -----------------
+// UShort tests
+// -----------------
+int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+// -----------------
+// Char tests
+// -----------------
+int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+// -----------------
+// UChar tests
+// -----------------
+int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
+}
+int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_math(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_logic(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_shift(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
+}
+int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test_compare(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
+}
+
+
+
+// These are kept for debugging if you want to run all the tests together.
+
+int test_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
+}
+
+int test_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    if (!gHasLong)
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
+        return CL_SUCCESS;
+    }
+    return run_test(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
+}
+
+int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
+}
+
+int test_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
+}
+
+int test_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
+}
+
+int test_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
+}
+
+int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
+}
+
+int test_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
+}
+
+int test_quick_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return run_test(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
+}
+
+// Prototype for below
+int test_question_colon_op(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                           int vectorSize, TestStyle style, ExplicitType type );
+
+// Run all the vector sizes for a given test in scalar-vector and vector-scalar modes
+int run_test_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID)
+{
+    int sizes[] = { 2, 3, 4, 8, 16, 0 };
+    int errors = 0;
+
+    for( int i = 0; sizes[ i ] != 0; i++ )
+    {
+        if( testID == 13 )
+        {
+            errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, type );
+            errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, type );
+            errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kVectorScalarScalar, type );
+
+            errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kBothVectors | kInputCAlsoScalar), type );
+            errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputAScalar | kInputCAlsoScalar), type );
+            errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputBScalar | kInputCAlsoScalar), type );
+        }
+        else
+        {
+            errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, num, type, testID);
+            errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, num, type, testID);
+        }
+    }
+    return errors;
+}
+
+// Run all the tests for scalar-vector and vector-scalar for a given type
+int run_vector_scalar_tests( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num )
+{
+    int errors = 0;
+    size_t i;
+
+    // Shift operators:
+    // a) cannot take scalars as first parameter and vectors as second
+    // b) have the vector >> scalar case tested by tests 10 and 11
+    // so they get skipped entirely
+
+    int testsToRun[] = { 0, 1, 2, 3, 4, 5, 6, 7,
+        13, 14, 15, 16, 17, 18, 19, 20, 21 };
+    for (i=0; i< sizeof(testsToRun)/sizeof(testsToRun[0]); i++)
+    {
+        errors += run_test_sizes(deviceID, context, queue, 2048, type, num, testsToRun[i]);
+    }
+    return errors;
+}
+
+int test_vector_scalar_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int errors = 0;
+    int numTypes = sizeof( types ) / sizeof( types[ 0 ] );
+
+    for( int t = 0; t < numTypes; t++ )
+    {
+        if ((types[ t ] == kLong || types[ t ] == kULong) && !gHasLong)
+            continue;
+
+        errors += run_vector_scalar_tests( deviceID, context, queue, num_elements, types[ t ], 1 );
+        break;
+    }
+
+    return errors;
+}
+
+void generate_random_bool_data( size_t count, MTdata d, cl_char *outData, size_t outDataSize )
+{
+    cl_uint bits = genrand_int32(d);
+    cl_uint bitsLeft = 32;
+
+    memset( outData, 0, outDataSize * count );
+
+    for( size_t i = 0; i < count; i++ )
+    {
+        if( 0 == bitsLeft)
+        {
+            bits = genrand_int32(d);
+            bitsLeft = 32;
+        }
+
+        // Note: we will be setting just any bit non-zero for the type, so we can easily skip past
+        // and just write bytes (assuming the entire output buffer is already zeroed, which we did)
+        *outData = ( bits & 1 ) ? 0xff : 0;
+
+        bits >>= 1; bitsLeft -= 1;
+
+        outData += outDataSize;
+    }
+}
+
+static const char *kernel_question_colon_full =
+"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *srcC, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    %s%s valA = %ssrcA%s"
+"    %s%s valB = %ssrcB%s"
+"    %s%s valC = %ssrcC%s"
+"    %s%s destVal = valC ? valA : valB;\n"
+"    %s"
+"}\n";
+
+static const char *kernel_qc_load_plain_prefix = "";
+static const char *kernel_qc_load_plain_suffix = "[ tid ];\n";
+
+static const char *kernel_qc_load_vec3_prefix = "vload3( tid, ";
+static const char *kernel_qc_load_vec3_suffix = ");\n";
+
+static const char *kernel_qc_store_plain = "dst[ tid ] = destVal;\n";
+static const char *kernel_qc_store_vec3 = "vstore3( destVal, tid, dst );\n";
+
+int test_question_colon_op(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements,
+                           int vectorSize, TestStyle style, ExplicitType type )
+{
+    cl_mem              streams[4];
+    cl_int              *input_ptr[3], *output_ptr;
+    cl_program          program;
+    cl_kernel           kernel;
+    size_t              threads[1];
+    int                 err;
+    int inputAVecSize, inputBVecSize, inputCVecSize;
+    const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    // Identical to sizeNames but with a blank for 3, since we use vload/store there
+    const char * paramSizeNames[] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    MTdata s_randStates;
+
+    inputAVecSize = inputBVecSize = inputCVecSize = vectorSize;
+    if( style & kInputCAlsoScalar )
+    {
+        style = (TestStyle)( style & ~kInputCAlsoScalar );
+        inputCVecSize = 1;
+    }
+    if( style == kInputAScalar )
+        inputAVecSize = 1;
+    else if( style == kInputBScalar )
+        inputBVecSize = 1;
+    else if( style == kVectorScalarScalar )
+        inputAVecSize = inputBVecSize = 1;
+
+    log_info("Testing \"?:\" on %s%d (%s?%s:%s inputs)\n",
+             get_explicit_type_name(type), vectorSize, ( inputCVecSize == 1 ) ? "scalar" : "vector",
+             ( inputAVecSize == 1 ) ? "scalar" : "vector",
+             ( inputBVecSize == 1 ) ? "scalar" : "vector" );
+
+
+    const char *type_name = get_explicit_type_name(type);
+    size_t type_size = get_explicit_type_size(type);
+
+    // Create and initialize I/O buffers
+
+    input_ptr[0] = (cl_int*)malloc(type_size * num_elements * vectorSize);
+    input_ptr[1] = (cl_int*)malloc(type_size * num_elements * vectorSize);
+    input_ptr[2] = (cl_int*)malloc(type_size * num_elements * vectorSize);
+    output_ptr = (cl_int*)malloc(type_size * num_elements * vectorSize);
+
+    s_randStates = init_genrand( gRandomSeed );
+
+    generate_random_data( type, num_elements * inputAVecSize, s_randStates, input_ptr[ 0 ] );
+    generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
+    generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputAVecSize, input_ptr[0], &err);
+    test_error(err, "clCreateBuffer failed");
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputBVecSize, input_ptr[1], &err );
+    test_error(err, "clCreateBuffer failed");
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputCVecSize, input_ptr[2], &err );
+    test_error(err, "clCreateBuffer failed");
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_WRITE_ONLY), type_size * num_elements * vectorSize, NULL, &err );
+    test_error(err, "clCreateBuffer failed");
+
+    const char *vectorString = sizeNames[ vectorSize ];
+    const char *inputAVectorString = sizeNames[ inputAVecSize ];
+    const char *inputBVectorString = sizeNames[ inputBVecSize ];
+    const char *inputCVectorString = sizeNames[ inputCVecSize ];
+
+    char programString[4096];
+    const char *ptr;
+
+    sprintf( programString, kernel_question_colon_full, type_name, paramSizeNames[ inputAVecSize ],
+            type_name, paramSizeNames[ inputBVecSize ],
+            type_name, paramSizeNames[ inputCVecSize ],
+         type_name, paramSizeNames[ vectorSize ],
+            // Loads
+            type_name, inputAVectorString, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
+            type_name, inputBVectorString, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
+            type_name, inputCVectorString, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
+            // Dest type
+            type_name, vectorString,
+            // Store
+            ( vectorSize == 3 ) ? kernel_qc_store_vec3 : kernel_qc_store_plain );
+
+    ptr = programString;
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test" );
+    test_error( err, "Unable to create test kernel" );
+
+    err = clSetKernelArg( kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg( kernel, 1, sizeof streams[1], &streams[1] );
+    err |= clSetKernelArg( kernel, 2, sizeof streams[2], &streams[2] );
+    err |= clSetKernelArg( kernel, 3, sizeof streams[3], &streams[3] );
+    test_error(err, "clSetKernelArgs failed");
+
+    // Run
+    threads[0] = (size_t)num_elements;
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    // Read and verify results
+    err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, type_size*num_elements * vectorSize, (void *)output_ptr, 0, NULL, NULL );
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    // log_info("Performing verification\n");
+    int error_count = 0;
+
+    char *inputAPtr = (char *)input_ptr[ 0 ];
+    char *inputBPtr = (char *)input_ptr[ 1 ];
+    cl_char *inputCPtr = (cl_char *)input_ptr[ 2 ];
+    char *actualPtr = (char *)output_ptr;
+
+    for( int i = 0; i < num_elements; i++ )
+    {
+        for( int j = 0; j < vectorSize; j++ )
+        {
+            char *expectedPtr = ( *inputCPtr ) ? inputAPtr : inputBPtr;
+            if( memcmp( expectedPtr, actualPtr, type_size ) != 0 )
+            {
+#if 0
+                char expectedStr[ 128 ], actualStr[ 128 ], inputAStr[ 128 ], inputBStr[ 128 ];
+                print_type_to_string( type, inputAPtr, inputAStr );
+                print_type_to_string( type, inputBPtr, inputBStr );
+                print_type_to_string( type, expectedPtr, expectedStr );
+                print_type_to_string( type, actualPtr, actualStr );
+                log_error( "cl_%s verification failed at element %d:%d (expected %s, got %s, inputs: %s, %s, %s)\n",
+                          type_name, i, j, expectedStr, actualStr, inputAStr, inputBStr, ( *inputCPtr ) ? "true" : "false" );
+#endif
+                error_count++;
+            }
+            // Advance for each element member. Note if any of the vec sizes are 1, they don't advance here
+            inputAPtr += ( inputAVecSize == 1 ) ? 0 : type_size;
+            inputBPtr += ( inputBVecSize == 1 ) ? 0 : type_size;
+            inputCPtr += ( inputCVecSize == 1 ) ? 0 : type_size;
+            actualPtr += ( vectorSize == 1 ) ? 0 : type_size;
+        }
+        // Reverse for the member advance. If the vec sizes are 1, we need to advance, but otherwise they're already correct
+        inputAPtr += ( inputAVecSize == 1 ) ? type_size : 0;
+        inputBPtr += ( inputBVecSize == 1 ) ? type_size : 0;
+        inputCPtr += ( inputCVecSize == 1 ) ? type_size : 0;
+        actualPtr += ( vectorSize == 1 ) ? type_size : 0;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    free_mtdata( s_randStates );
+
+    return error_count;
+}
diff --git a/test_conformance/integer_ops/test_integers.cpp b/test_conformance/integer_ops/test_integers.cpp
new file mode 100644
index 00000000..fa7133cf
--- /dev/null
+++ b/test_conformance/integer_ops/test_integers.cpp
@@ -0,0 +1,1889 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+
+#define TEST_SIZE 512
+
+#ifndef MIN
+    #define MIN( _a, _b )   ((_a) < (_b) ? (_a) : (_b))
+#endif
+#ifndef MAX
+    #define MAX( _a, _b )   ((_a) > (_b) ? (_a) : (_b))
+#endif
+
+const char *singleParamIntegerKernelSourcePattern =
+"__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    %s%s tmp = vload%s( tid, destValues );\n"
+"    tmp %s= %s( vload%s( tid, sourceA ) );\n"
+"    vstore%s( tmp, tid, destValues );\n"
+"\n"
+"}\n";
+
+const char *singleParamSingleSizeIntegerKernelSourcePattern =
+"__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] %s= %s( sourceA[tid] );\n"
+"}\n";
+
+typedef bool (*singleParamIntegerVerifyFn)( void *source, void *destination, ExplicitType vecType );
+static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType );
+bool verify_integer_divideAssign( void *source, void *destination, ExplicitType vecType );
+bool verify_integer_moduloAssign( void *source, void *destination, ExplicitType vecType );
+
+int test_single_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
+                                  ExplicitType vecType, size_t vecSize, singleParamIntegerVerifyFn verifyFn,
+                                     MTdata d, bool useOpKernel = false )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+    cl_long inDataA[TEST_SIZE * 16], outData[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], expected;
+    int error, i;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4];
+
+    if (! gHasLong && strstr(get_explicit_type_name(vecType),"long"))
+    {
+       log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecType) );
+       return CL_SUCCESS;
+    }
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", (int)vecSize );
+
+    if( vecSize == 1 )
+        sprintf( kernelSource, singleParamSingleSizeIntegerKernelSourcePattern,
+                get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
+                useOpKernel ? fnName : "", useOpKernel ? "" : fnName );
+    else
+        sprintf( kernelSource, singleParamIntegerKernelSourcePattern,
+                get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
+                get_explicit_type_name( vecType ), sizeName, sizeName,
+                useOpKernel ? fnName : "", useOpKernel ? "" : fnName, sizeName,
+                sizeName );
+
+    bool isOpenCL20Function = (strcmp(fnName,"ctz") == 0)? true: false;
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test", isOpenCL20Function ? "-cl-std=CL2.0": "" ) )
+    {
+        log_error("The program we attempted to compile was: \n%s\n", kernelSource);
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataA );
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+
+    if( useOpKernel )
+    {
+        // Op kernels use an r/w buffer for the second param, so we need to init it with data
+        generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataB );
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | ( useOpKernel ? CL_MEM_COPY_HOST_PTR : 0 )),
+                                 get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                ( useOpKernel ) ? &inDataB : NULL, NULL );
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    memset(outData, 0xFF, get_explicit_type_size( vecType ) * TEST_SIZE * vecSize );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0,
+                                 get_explicit_type_size( vecType ) * TEST_SIZE * vecSize,
+                                 outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    // deal with division by 0 -- any answer is allowed here
+    if( verifyFn == verify_integer_divideAssign || verifyFn == verify_integer_moduloAssign )
+        patchup_divide_results( outData, inDataA, inDataB, TEST_SIZE * vecSize, vecType );
+
+    /* And verify! */
+    char *p = (char *)outData;
+    char *in = (char *)inDataA;
+    char *in2 = (char *)inDataB;
+    for( i = 0; i < (int)TEST_SIZE; i++ )
+    {
+        for( size_t j = 0; j < vecSize; j++ )
+        {
+            if( useOpKernel )
+                memcpy( &expected, in2, get_explicit_type_size( vecType ) );
+
+            verifyFn( in, &expected, vecType );
+            if( memcmp( &expected, p, get_explicit_type_size( vecType ) ) != 0 )
+            {
+                switch( get_explicit_type_size( vecType ))
+                {
+                    case 1:
+                        if( useOpKernel )
+                            log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x)\n",
+                                      (int)i, (int)j,
+                                      ((cl_uchar*)&expected)[0],
+                                      *( (cl_uchar *)p ),
+                                      *( (cl_uchar *)in ),
+                                      *( (cl_uchar *)in2 ) );
+                        else
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x)\n",
+                                  (int)i, (int)j,
+                                   ((cl_uchar*)&expected)[0],
+                                   *( (cl_uchar *)p ),
+                                   *( (cl_uchar *)in ) );
+                        break;
+
+                    case 2:
+                        if( useOpKernel )
+                            log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x)\n",
+                                      (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
+                                      *( (cl_ushort *)in ), *( (cl_ushort *)in2 ) );
+                        else
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x)\n",
+                                  (int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
+                                            *( (cl_ushort *)in ) );
+                        break;
+
+                    case 4:
+                        if( useOpKernel )
+                            log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
+                                      (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
+                                      *( (cl_uint *)in ), *( (cl_uint *)in2 ) );
+                        else
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x)\n",
+                                  (int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
+                                            *( (cl_uint *)in ) );
+                        break;
+
+                    case 8:
+                        if( useOpKernel )
+                            log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
+                                      (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
+                                      *( (cl_ulong *)in ), *( (cl_ulong *)in2 ) );
+                        else
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx)\n",
+                                  (int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
+                                            *( (cl_ulong *)in ) );
+                        break;
+                }
+                return -1;
+            }
+            p += get_explicit_type_size( vecType );
+            in += get_explicit_type_size( vecType );
+            in2 += get_explicit_type_size( vecType );
+        }
+    }
+
+    return 0;
+}
+
+int test_single_param_integer_fn( cl_command_queue queue, cl_context context, const char *fnName, singleParamIntegerVerifyFn verifyFn, bool useOpKernel = false )
+{
+    ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO 3 not tested
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed );
+
+    for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if ((types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
+            continue;
+
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            if( test_single_param_integer_kernel(queue, context, fnName, types[ typeIndex ], vecSizes[ index ], verifyFn, seed, useOpKernel ) != 0 )
+            {
+                log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
+                retVal = -1;
+            }
+        }
+    }
+
+    return retVal;
+}
+
+bool verify_integer_clz( void *source, void *destination, ExplicitType vecType )
+{
+    cl_long testValue;
+    int count;
+    int typeBits;
+
+    switch( vecType )
+    {
+        case kChar:
+            testValue = *( (cl_char *)source );
+            typeBits = 8 * sizeof( cl_char );
+            break;
+        case kUChar:
+            testValue = *( (cl_uchar *)source );
+            typeBits = 8 * sizeof( cl_uchar );
+            break;
+        case kShort:
+            testValue = *( (cl_short *)source );
+            typeBits = 8 * sizeof( cl_short );
+            break;
+        case kUShort:
+            testValue = *( (cl_ushort *)source );
+            typeBits = 8 * sizeof( cl_ushort );
+            break;
+        case kInt:
+            testValue = *( (cl_int *)source );
+            typeBits = 8 * sizeof( cl_int );
+            break;
+        case kUInt:
+            testValue = *( (cl_uint *)source );
+            typeBits = 8 * sizeof( cl_uint );
+            break;
+        case kLong:
+            testValue = *( (cl_long *)source );
+            typeBits = 8 * sizeof( cl_long );
+            break;
+        case kULong:
+            // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting
+            testValue = *( (cl_ulong *)source );
+            typeBits = 8 * sizeof( cl_ulong );
+            break;
+        default:
+            // Should never happen
+            return false;
+    }
+
+    count = typeBits;
+    if( testValue )
+    {
+        testValue <<= 8 * sizeof( testValue ) - typeBits;
+        for( count = 0; 0 == (testValue & CL_LONG_MIN); count++ )
+            testValue <<= 1;
+    }
+
+    switch( vecType )
+    {
+        case kChar:
+            *( (cl_char *)destination ) = count;
+            break;
+        case kUChar:
+            *( (cl_uchar *)destination ) = count;
+            break;
+        case kShort:
+            *( (cl_short *)destination ) = count;
+            break;
+        case kUShort:
+            *( (cl_ushort *)destination ) = count;
+            break;
+        case kInt:
+            *( (cl_int *)destination ) = count;
+            break;
+        case kUInt:
+            *( (cl_uint *)destination ) = count;
+            break;
+        case kLong:
+            *( (cl_long *)destination ) = count;
+            break;
+        case kULong:
+            *( (cl_ulong *)destination ) = count;
+            break;
+        default:
+            // Should never happen
+            return false;
+    }
+    return true;
+}
+
+int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_single_param_integer_fn( queue, context, "clz", verify_integer_clz );
+}
+
+
+bool verify_integer_ctz( void *source, void *destination, ExplicitType vecType )
+{
+  cl_long testValue;
+  int count;
+  int typeBits;
+
+  switch( vecType )
+  {
+  case kChar:
+    testValue = *( (cl_char *)source );
+    typeBits = 8 * sizeof( cl_char );
+    break;
+  case kUChar:
+    testValue = *( (cl_uchar *)source );
+    typeBits = 8 * sizeof( cl_uchar );
+    break;
+  case kShort:
+    testValue = *( (cl_short *)source );
+    typeBits = 8 * sizeof( cl_short );
+    break;
+  case kUShort:
+    testValue = *( (cl_ushort *)source );
+    typeBits = 8 * sizeof( cl_ushort );
+    break;
+  case kInt:
+    testValue = *( (cl_int *)source );
+    typeBits = 8 * sizeof( cl_int );
+    break;
+  case kUInt:
+    testValue = *( (cl_uint *)source );
+    typeBits = 8 * sizeof( cl_uint );
+    break;
+  case kLong:
+    testValue = *( (cl_long *)source );
+    typeBits = 8 * sizeof( cl_long );
+    break;
+  case kULong:
+    // Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting
+    testValue = *( (cl_ulong *)source );
+    typeBits = 8 * sizeof( cl_ulong );
+    break;
+  default:
+    // Should never happen
+    return false;
+  }
+
+  if ( testValue == 0 )
+    count = typeBits;
+  else
+  {
+    for( count = 0; (0 == (testValue & 0x1)); count++ )
+      testValue >>= 1;
+  }
+
+  switch( vecType )
+  {
+  case kChar:
+    *( (cl_char *)destination ) = count;
+    break;
+  case kUChar:
+    *( (cl_uchar *)destination ) = count;
+    break;
+  case kShort:
+    *( (cl_short *)destination ) = count;
+    break;
+  case kUShort:
+    *( (cl_ushort *)destination ) = count;
+    break;
+  case kInt:
+    *( (cl_int *)destination ) = count;
+    break;
+  case kUInt:
+    *( (cl_uint *)destination ) = count;
+    break;
+  case kLong:
+    *( (cl_long *)destination ) = count;
+    break;
+  case kULong:
+    *( (cl_ulong *)destination ) = count;
+    break;
+  default:
+    // Should never happen
+    return false;
+  }
+  return true;
+}
+
+
+int test_integer_ctz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  return test_single_param_integer_fn( queue, context, "ctz", verify_integer_ctz );
+}
+
+#define OP_CASE( op, sizeName, size ) \
+    case sizeName: \
+    {    \
+        cl_##size *d = (cl_##size *)destination; \
+        *d op##= *( (cl_##size *)source ); \
+        break; \
+    }
+
+#define OP_CASES( op ) \
+    switch( vecType ) \
+    {                    \
+        OP_CASE( op, kChar, char ) \
+        OP_CASE( op, kUChar, uchar ) \
+        OP_CASE( op, kShort, short ) \
+        OP_CASE( op, kUShort, ushort ) \
+        OP_CASE( op, kInt, int ) \
+        OP_CASE( op, kUInt, uint ) \
+        OP_CASE( op, kLong, long ) \
+        OP_CASE( op, kULong, ulong ) \
+        default: \
+            break; \
+    }
+
+#define OP_TEST( op, opName ) \
+    bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType )    \
+    {    \
+        OP_CASES( op )    \
+        return true; \
+    }    \
+    int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)    \
+    {    \
+        return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
+    }
+
+OP_TEST( +, add )
+OP_TEST( -, subtract )
+OP_TEST( *, multiply )
+OP_TEST( ^, exclusiveOr )
+OP_TEST( |, or )
+OP_TEST( &, and )
+
+#define OP_CASE_GUARD( op, sizeName, size ) \
+    case sizeName: \
+    {    \
+        cl_##size *d = (cl_##size *)destination; \
+        cl_##size *s = (cl_##size *)source;     \
+        if( *s == 0 )                           \
+            *d = -1;                            \
+        else                                    \
+            *d op##= *s;                        \
+        break; \
+    }
+
+#define OP_CASE_GUARD_SIGNED( op, sizeName, size, MIN_VAL ) \
+    case sizeName: \
+    {    \
+        cl_##size *d = (cl_##size *)destination; \
+        cl_##size *s = (cl_##size *)source;     \
+        if( *s == 0 || (*d == MIN_VAL && *s == -1))  \
+            *d = -1 - MIN_VAL;                  \
+        else                                    \
+            *d op##= *s;                        \
+        break; \
+    }
+
+#define OP_CASES_GUARD( op ) \
+    switch( vecType ) \
+    {                    \
+        OP_CASE_GUARD_SIGNED( op, kChar, char, CL_CHAR_MIN ) \
+        OP_CASE_GUARD( op, kUChar, uchar ) \
+        OP_CASE_GUARD_SIGNED( op, kShort, short, CL_SHRT_MIN ) \
+        OP_CASE_GUARD( op, kUShort, ushort ) \
+        OP_CASE_GUARD_SIGNED( op, kInt, int, CL_INT_MIN ) \
+        OP_CASE_GUARD( op, kUInt, uint ) \
+        OP_CASE_GUARD_SIGNED( op, kLong, long, CL_LONG_MIN ) \
+        OP_CASE_GUARD( op, kULong, ulong ) \
+        default: \
+            break; \
+    }
+
+#define OP_TEST_GUARD( op, opName ) \
+    bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType )    \
+    {    \
+        OP_CASES_GUARD( op )    \
+        return true;            \
+    }    \
+    int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)    \
+    {    \
+        return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
+    }
+
+OP_TEST_GUARD( /, divide )
+OP_TEST_GUARD( %, modulo )
+
+#define PATCH_CASE( _out, _src, _dest, _count, _cl_type )           \
+    {                                                               \
+        const _cl_type *denom = (const _cl_type* ) _src;            \
+        _cl_type *result = (_cl_type* ) _out;                       \
+        for( size_t i = 0; i < _count; i++ )                        \
+            if( denom[i] == 0 )                                     \
+                result[i] = (_cl_type) -1;                          \
+    }
+
+#define PATCH_CASE_SIGNED( _out, _src, _dest, _count, _cl_type, _MIN_VAL )      \
+    {                                                                           \
+        const _cl_type *num = (const _cl_type* ) _dest;                         \
+        const _cl_type *denom = (const _cl_type* ) _src;                        \
+        _cl_type *result = (_cl_type* ) _out;                                   \
+        for( size_t i = 0; i < _count; i++ )                                    \
+            if( denom[i] == 0 || ( num[i] == _MIN_VAL && denom[i] == -1))       \
+                result[i] = -1 - _MIN_VAL;                                      \
+    }
+
+static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType )
+{
+    switch( vecType )
+    {
+        case kChar:
+            PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_char, CL_CHAR_MIN )
+            break;
+        case kUChar:
+            PATCH_CASE( outData, inDataA, inDataB, count, cl_uchar )
+            break;
+        case kShort:
+            PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_short, CL_SHRT_MIN )
+            break;
+        case kUShort:
+            PATCH_CASE( outData, inDataA, inDataB, count, cl_ushort )
+            break;
+        case kInt:
+            PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_int, CL_INT_MIN )
+            break;
+        case kUInt:
+            PATCH_CASE( outData, inDataA, inDataB, count, cl_uint )
+            break;
+        case kLong:
+            PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_long, CL_LONG_MIN )
+            break;
+        case kULong:
+            PATCH_CASE( outData, inDataA, inDataB, count, cl_ulong )
+            break;
+        default:
+            log_error( "ERROR: internal test error -- unknown data type %d\n", vecType );
+            break;
+    }
+}
+
+const char *twoParamIntegerKernelSourcePattern =
+"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    %s%s sA = %s;\n"
+"    %s%s sB = %s;\n"
+"    %s%s dst = %s( sA, sB );\n"
+"     %s;\n"
+"\n"
+"}\n";
+
+typedef bool (*twoParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *destination, ExplicitType vecType );
+
+static char * build_load_statement( char *outString, size_t vecSize, const char *name )
+{
+    if( vecSize != 3 )
+        sprintf( outString, "%s[ tid ]", name );
+    else
+        sprintf( outString, "vload3( tid, %s )", name );
+    return outString;
+}
+
+static char * build_store_statement( char *outString, size_t vecSize, const char *name, const char *srcName )
+{
+    if( vecSize != 3 )
+        sprintf( outString, "%s[ tid ] = %s", name, srcName );
+    else
+        sprintf( outString, "vstore3( %s, tid, %s )", srcName, name );
+    return outString;
+}
+
+int test_two_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
+                                     ExplicitType vecAType, ExplicitType vecBType, unsigned int vecSize, twoParamIntegerVerifyFn verifyFn, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[3];
+    cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
+    int error, i;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4], paramSizeName[4];
+
+    // embedded profiles don't support long/ulong datatypes
+    if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
+    {
+       log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
+       return CL_SUCCESS;
+    }
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", vecSize );
+    if( ( vecSize == 1 ) || ( vecSize == 3 ) )
+        paramSizeName[ 0 ] = 0;
+        else
+        sprintf( paramSizeName, "%d", vecSize );
+
+    char sourceALoad[ 128 ], sourceBLoad[ 128 ], destStore[ 128 ];
+
+    sprintf( kernelSource, twoParamIntegerKernelSourcePattern,
+                get_explicit_type_name( vecAType ), paramSizeName,
+                get_explicit_type_name( vecBType ), paramSizeName,
+                get_explicit_type_name( vecAType ), paramSizeName,
+                get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
+                get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
+                get_explicit_type_name( vecAType ), sizeName,
+                fnName,
+                build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
+                );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        log_error("The program we attempted to compile was: \n%s\n", kernelSource);
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
+    generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
+                                &inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE,
+                                &inDataB, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating input array B failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),
+                                 get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
+                                 NULL, NULL );
+    if( streams[2] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    memset(outData, 0xFF, get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize);
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0,
+                                 get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize, outData, 0,
+                                 NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    char *inA = (char *)inDataA;
+    char *inB = (char *)inDataB;
+    char *out = (char *)outData;
+    for( i = 0; i < (int)TEST_SIZE; i++ )
+    {
+        for( size_t j = 0; j < vecSize; j++ )
+        {
+            bool test = verifyFn( inA, inB, &expected, vecAType );
+            if( test && ( memcmp( &expected, out, get_explicit_type_size( vecAType ) ) != 0 ) )
+            {
+                switch( get_explicit_type_size( vecAType ))
+                {
+                    case 1:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x), TEST_SIZE %d\n",
+                                   (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
+                                   *( (cl_uchar *)inA ),
+                                   *( (cl_uchar *)inB ) ,
+                                   TEST_SIZE);
+                        break;
+
+                    case 2:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x), TEST_SIZE %d\n",
+                                   (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
+                                   *( (cl_ushort *)inA ),
+                                   *( (cl_ushort *)inB ),
+                                   TEST_SIZE);
+                        break;
+
+                    case 4:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
+                                  (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
+                                            *( (cl_uint *)inA ),
+                                            *( (cl_uint *)inB ) );
+                        break;
+
+                    case 8:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
+                                  (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
+                                            *( (cl_ulong *)inA ),
+                                            *( (cl_ulong *)inB ) );
+                        break;
+                }
+                return -1;
+            }
+            inA += get_explicit_type_size( vecAType );
+            inB += get_explicit_type_size( vecBType );
+            out += get_explicit_type_size( vecAType );
+        }
+    }
+
+    return 0;
+}
+
+int test_two_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
+{
+    ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO : 3 not tested
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed );
+
+    for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if (( types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
+            continue;
+
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            if( test_two_param_integer_kernel(queue, context, fnName, types[ typeIndex ], types[ typeIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
+            {
+                log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
+                retVal = -1;
+            }
+        }
+    }
+
+    return retVal;
+}
+
+int test_two_param_unmatched_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
+{
+    ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int index, typeAIndex, typeBIndex;
+    int retVal = 0;
+    RandomSeed seed( gRandomSeed );
+
+    for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
+    {
+        if (( types[ typeAIndex ] == kLong || types[ typeAIndex ] == kULong) && !gHasLong)
+            continue;
+
+        for( typeBIndex = 0; types[ typeBIndex ] != kNumExplicitTypes; typeBIndex++ )
+        {
+            if (( types[ typeBIndex ] == kLong || types[ typeBIndex ] == kULong) && !gHasLong)
+                continue;
+
+            for( index = 0; vecSizes[ index ] != 0; index++ )
+            {
+                if( test_two_param_integer_kernel( queue, context, fnName, types[ typeAIndex ], types[ typeBIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
+                {
+                    log_error( "   Vector %s%d / %s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ],  get_explicit_type_name( types[ typeBIndex ] ), vecSizes[ index ] );
+                    retVal = -1;
+                }
+            }
+        }
+    }
+
+    return retVal;
+}
+
+bool verify_integer_hadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
+{
+    cl_long testValueA, testValueB, overflow;
+    cl_ulong uValueA, uValueB, uOverflow;
+
+    switch( vecType )
+    {
+        case kChar:
+            testValueA = *( (cl_char *)sourceA );
+            testValueB = *( (cl_char *)sourceB );
+            *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB ) >> 1 );
+            break;
+        case kUChar:
+            testValueA = *( (cl_uchar *)sourceA );
+            testValueB = *( (cl_uchar *)sourceB );
+            *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB ) >> 1 );
+            break;
+        case kShort:
+            testValueA = *( (cl_short *)sourceA );
+            testValueB = *( (cl_short *)sourceB );
+            *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB ) >> 1 );
+            break;
+        case kUShort:
+            testValueA = *( (cl_ushort *)sourceA );
+            testValueB = *( (cl_ushort *)sourceB );
+            *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB ) >> 1 );
+            break;
+        case kInt:
+            testValueA = *( (cl_int *)sourceA );
+            testValueB = *( (cl_int *)sourceB );
+            *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB ) >> 1 );
+            break;
+        case kUInt:
+            testValueA = *( (cl_uint *)sourceA );
+            testValueB = *( (cl_uint *)sourceB );
+            *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB ) >> 1 );
+            break;
+        case kLong:
+            // The long way to avoid dropping bits
+            testValueA = *( (cl_long *)sourceA );
+            testValueB = *( (cl_long *)sourceB );
+            overflow = ( testValueA & 0x1 ) + ( testValueB & 0x1 );
+            *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + ( overflow >> 1 );
+            break;
+        case kULong:
+            // The long way to avoid dropping bits
+            uValueA = *( (cl_ulong *)sourceA );
+            uValueB = *( (cl_ulong *)sourceB );
+            uOverflow = ( uValueA & 0x1 ) + ( uValueB & 0x1 );
+            *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + ( uOverflow >> 1 );
+            break;
+        default:
+            // Should never happen
+            return false;
+    }
+    return true;
+}
+
+int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_two_param_integer_fn( queue, context, "hadd", verify_integer_hadd );
+}
+
+bool verify_integer_rhadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
+{
+    cl_long testValueA, testValueB, overflow;
+    cl_ulong uValueA, uValueB, uOverflow;
+
+    switch( vecType )
+    {
+        case kChar:
+            testValueA = *( (cl_char *)sourceA );
+            testValueB = *( (cl_char *)sourceB );
+            *( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB + 1 ) >> 1 );
+            break;
+        case kUChar:
+            testValueA = *( (cl_uchar *)sourceA );
+            testValueB = *( (cl_uchar *)sourceB );
+            *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB + 1 ) >> 1 );
+            break;
+        case kShort:
+            testValueA = *( (cl_short *)sourceA );
+            testValueB = *( (cl_short *)sourceB );
+            *( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB + 1 ) >> 1 );
+            break;
+        case kUShort:
+            testValueA = *( (cl_ushort *)sourceA );
+            testValueB = *( (cl_ushort *)sourceB );
+            *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB + 1 ) >> 1 );
+            break;
+        case kInt:
+            testValueA = *( (cl_int *)sourceA );
+            testValueB = *( (cl_int *)sourceB );
+            *( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB + 1 ) >> 1 );
+            break;
+        case kUInt:
+            testValueA = *( (cl_uint *)sourceA );
+            testValueB = *( (cl_uint *)sourceB );
+            *( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB + 1 ) >> 1 );
+            break;
+        case kLong:
+            // The long way to avoid dropping bits
+            testValueA = *( (cl_long *)sourceA );
+            testValueB = *( (cl_long *)sourceB );
+            overflow = ( testValueA | testValueB ) & 0x1;
+            *( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + overflow;
+            break;
+        case kULong:
+            // The long way to avoid dropping bits
+            uValueA = *( (cl_ulong *)sourceA );
+            uValueB = *( (cl_ulong *)sourceB );
+            uOverflow = ( uValueA | uValueB ) & 0x1;
+            *( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + uOverflow;
+            break;
+        default:
+            // Should never happen
+            return false;
+    }
+    return true;
+}
+
+int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_two_param_integer_fn( queue, context, "rhadd", verify_integer_rhadd );
+}
+
+#define MIN_CASE( type, const ) \
+    case const : \
+    {            \
+        cl_##type valueA = *( (cl_##type *)sourceA ); \
+        cl_##type valueB = *( (cl_##type *)sourceB ); \
+        *( (cl_##type *)destination ) = (cl_##type)( valueB < valueA ? valueB : valueA ); \
+        break; \
+    }
+
+bool verify_integer_min( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
+{
+    switch( vecType )
+    {
+        MIN_CASE( char, kChar )
+        MIN_CASE( uchar, kUChar )
+        MIN_CASE( short, kShort )
+        MIN_CASE( ushort, kUShort )
+        MIN_CASE( int, kInt )
+        MIN_CASE( uint, kUInt )
+        MIN_CASE( long, kLong )
+        MIN_CASE( ulong, kULong )
+        default:
+            // Should never happen
+            return false;
+    }
+    return true;
+}
+
+int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_two_param_integer_fn( queue, context, "min", verify_integer_min);
+}
+
+#define MAX_CASE( type, const ) \
+    case const : \
+    {            \
+        cl_##type valueA = *( (cl_##type *)sourceA ); \
+        cl_##type valueB = *( (cl_##type *)sourceB ); \
+        *( (cl_##type *)destination ) = (cl_##type)( valueA < valueB ? valueB : valueA ); \
+        break; \
+    }
+
+bool verify_integer_max( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
+{
+    switch( vecType )
+    {
+            MAX_CASE( char, kChar )
+            MAX_CASE( uchar, kUChar )
+            MAX_CASE( short, kShort )
+            MAX_CASE( ushort, kUShort )
+            MAX_CASE( int, kInt )
+            MAX_CASE( uint, kUInt )
+            MAX_CASE( long, kLong )
+            MAX_CASE( ulong, kULong )
+        default:
+            // Should never happen
+            return false;
+    }
+    return true;
+}
+
+int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_two_param_integer_fn( queue, context, "max", verify_integer_max );
+}
+
+
+void multiply_unsigned_64_by_64( cl_ulong sourceA, cl_ulong sourceB, cl_ulong &destLow, cl_ulong &destHi )
+{
+    cl_ulong lowA, lowB;
+    cl_ulong highA, highB;
+
+    // Split up the values
+    lowA = sourceA & 0xffffffff;
+    highA = sourceA >> 32;
+    lowB = sourceB & 0xffffffff;
+    highB = sourceB >> 32;
+
+    // Note that, with this split, our multiplication becomes:
+    //     ( a * b )
+    // = ( ( aHI << 32 + aLO ) * ( bHI << 32 + bLO ) ) >> 64
+    // = ( ( aHI << 32 * bHI << 32 ) + ( aHI << 32 * bLO ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
+    // = ( ( aHI * bHI << 64 ) + ( aHI * bLO << 32 ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
+    // = ( aHI * bHI ) + ( aHI * bLO >> 32 ) + ( aLO * bHI >> 32 ) + ( aLO * bLO >> 64 )
+
+    // Now, since each value is 32 bits, the max size of any multiplication is:
+    // ( 2 ^ 32 - 1 ) * ( 2 ^ 32 - 1 ) = 2^64 - 4^32 + 1 = 2^64 - 2^33 + 1, which fits within 64 bits
+    // Which means we can do each component within a 64-bit integer as necessary (each component above marked as AB1 - AB4)
+    cl_ulong aHibHi = highA * highB;
+    cl_ulong aHibLo = highA * lowB;
+    cl_ulong aLobHi = lowA * highB;
+    cl_ulong aLobLo = lowA * lowB;
+
+    // Assemble terms.
+    //  We note that in certain cases, sums of products cannot overflow:
+    //
+    //      The maximum product of two N-bit unsigned numbers is
+    //
+    //          (2**N-1)^2 = 2**2N - 2**(N+1) + 1
+    //
+    //      We note that we can add the maximum N-bit number to the 2N-bit product twice without overflow:
+    //
+    //          (2**N-1)^2 + 2*(2**N-1) = 2**2N - 2**(N+1) + 1 + 2**(N+1) - 2 = 2**2N - 1
+    //
+    //  If we breakdown the product of two numbers a,b into high and low halves of partial products as follows:
+    //
+    //                                              a.hi                a.lo
+    // x                                            b.hi                b.lo
+    //===============================================================================
+    //  (b.hi*a.hi).hi      (b.hi*a.hi).lo
+    //                      (b.lo*a.hi).hi      (b.lo*a.hi).lo
+    //                      (b.hi*a.lo).hi      (b.hi*a.lo).lo
+    // +                                        (b.lo*a.lo).hi      (b.lo*a.lo).lo
+    //===============================================================================
+    //
+    // The (b.lo*a.lo).lo term cannot cause a carry, so we can ignore them for now.  We also know from above, that we can add (b.lo*a.lo).hi
+    // and (b.hi*a.lo).lo to the 2N bit term [(b.lo*a.hi).hi + (b.lo*a.hi).lo] without overflow.  That takes care of all of the terms
+    // on the right half that might carry.  Do that now.
+    //
+    cl_ulong aLobLoHi = aLobLo >> 32;
+    cl_ulong aLobHiLo = aLobHi & 0xFFFFFFFFULL;
+    aHibLo += aLobLoHi + aLobHiLo;
+
+    // That leaves us with these terms:
+    //
+    //                                              a.hi                a.lo
+    // x                                            b.hi                b.lo
+    //===============================================================================
+    //  (b.hi*a.hi).hi      (b.hi*a.hi).lo
+    //                      (b.hi*a.lo).hi
+    //                    [ (b.lo*a.hi).hi + (b.lo*a.hi).lo + other ]
+    // +                                                                (b.lo*a.lo).lo
+    //===============================================================================
+
+    // All of the overflow potential from the right half has now been accumulated into the [ (b.lo*a.hi).hi + (b.lo*a.hi).lo ] 2N bit term.
+    // We can safely separate into high and low parts. Per our rule above, we know we can accumulate the high part of that and (b.hi*a.lo).hi
+    // into the 2N bit term (b.lo*a.hi) without carry.  The low part can be pieced together with (b.lo*a.lo).lo, to give the final low result
+
+    destHi = aHibHi + (aHibLo >> 32 ) + (aLobHi >> 32);             // Cant overflow
+    destLow = (aHibLo << 32) | ( aLobLo & 0xFFFFFFFFULL );
+}
+
+void multiply_signed_64_by_64( cl_long sourceA, cl_long sourceB, cl_ulong &destLow, cl_long &destHi )
+{
+    // Find sign of result
+    cl_long aSign = sourceA >> 63;
+    cl_long bSign = sourceB >> 63;
+    cl_long resultSign = aSign ^ bSign;
+
+    // take absolute values of the argument
+    sourceA = (sourceA ^ aSign) - aSign;
+    sourceB = (sourceB ^ bSign) - bSign;
+
+    cl_ulong hi;
+    multiply_unsigned_64_by_64( (cl_ulong) sourceA, (cl_ulong) sourceB, destLow, hi );
+
+    // Fix the sign
+    if( resultSign )
+    {
+        destLow ^= resultSign;
+        hi  ^= resultSign;
+        destLow -= resultSign;
+
+        //carry if necessary
+        if( 0 == destLow )
+            hi -= resultSign;
+    }
+
+    destHi = (cl_long) hi;
+}
+
+bool verify_integer_mul_hi( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
+{
+    cl_long testValueA, testValueB, highSigned;
+    cl_ulong highUnsigned, lowHalf;
+
+    switch( vecType )
+    {
+        case kChar:
+            testValueA = *( (cl_char *)sourceA );
+            testValueB = *( (cl_char *)sourceB );
+            *( (cl_char *)destination ) = (cl_char)( ( testValueA * testValueB ) >> 8 );
+            break;
+        case kUChar:
+            testValueA = *( (cl_uchar *)sourceA );
+            testValueB = *( (cl_uchar *)sourceB );
+            *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA * testValueB ) >> 8 );
+            break;
+        case kShort:
+            testValueA = *( (cl_short *)sourceA );
+            testValueB = *( (cl_short *)sourceB );
+            *( (cl_short *)destination ) = (cl_short)( ( testValueA * testValueB ) >> 16 );
+            break;
+        case kUShort:
+            testValueA = *( (cl_ushort *)sourceA );
+            testValueB = *( (cl_ushort *)sourceB );
+            *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA * testValueB ) >> 16 );
+            break;
+        case kInt:
+            testValueA = *( (cl_int *)sourceA );
+            testValueB = *( (cl_int *)sourceB );
+            *( (cl_int *)destination ) = (cl_int)( ( testValueA * testValueB ) >> 32 );
+            break;
+        case kUInt:
+            testValueA = *( (cl_uint *)sourceA );
+            testValueB = *( (cl_uint *)sourceB );
+            *( (cl_uint *)destination ) = (cl_uint)( ( testValueA * testValueB ) >> 32 );
+            break;
+        case kLong:
+            testValueA = *( (cl_long *)sourceA );
+            testValueB = *( (cl_long *)sourceB );
+
+            multiply_signed_64_by_64( testValueA, testValueB, lowHalf, highSigned );
+            *( (cl_long *)destination ) = highSigned;
+            break;
+        case kULong:
+            testValueA = *( (cl_ulong *)sourceA );
+            testValueB = *( (cl_ulong *)sourceB );
+
+            multiply_unsigned_64_by_64( testValueA, testValueB, lowHalf, highUnsigned );
+            *( (cl_ulong *)destination ) = highUnsigned;
+            break;
+        default:
+            // Should never happen
+            return false;
+    }
+    return true;
+}
+
+int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_two_param_integer_fn( queue, context, "mul_hi", verify_integer_mul_hi );
+}
+
+bool verify_integer_rotate( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
+{
+    cl_ulong testValueA;
+    char numBits;
+
+    switch( vecType )
+    {
+        case kChar:
+        case kUChar:
+            testValueA = *( (cl_uchar *)sourceA );
+            numBits = *( (cl_uchar *)sourceB );
+            numBits &= 7;
+            if ( numBits == 0 )
+                *( (cl_uchar *)destination ) =  (cl_uchar)testValueA;
+            else
+                *( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA << numBits ) | ( testValueA >> ( 8 - numBits ) ) );
+            break;
+        case kShort:
+        case kUShort:
+            testValueA = *( (cl_ushort *)sourceA );
+            numBits = *( (cl_ushort *)sourceB );
+            numBits &= 15;
+            if ( numBits == 0 )
+                *( (cl_ushort *)destination ) =  (cl_ushort)testValueA;
+            else
+                *( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA << numBits ) | ( testValueA >> ( 16 - numBits ) ) );
+            break;
+        case kInt:
+        case kUInt:
+            testValueA = *( (cl_uint *)sourceA );
+            numBits = *( (cl_uint *)sourceB );
+            numBits &= 31;
+            if ( numBits == 0 )
+                *( (cl_uint *)destination ) =  (cl_uint) testValueA;
+            else
+                *( (cl_uint *)destination ) = (cl_uint)( ( testValueA << numBits ) | ( testValueA >> ( 32 - numBits ) ) );
+            break;
+        case kLong:
+        case kULong:
+            testValueA = *( (cl_ulong *)sourceA );
+            numBits = *( (cl_ulong *)sourceB );
+            numBits &= 63;
+            if ( numBits == 0 )
+                *( (cl_ulong *)destination ) =  (cl_ulong)testValueA;
+            else
+                *( (cl_ulong *)destination ) = (cl_ulong)( ( testValueA << numBits ) | ( testValueA >> ( 64 - numBits ) ) );
+            break;
+        default:
+            // Should never happen
+            log_error( "Unknown type encountered in verify_integer_rotate. Test failed. Aborting...\n" );
+            abort();
+            return false;
+    }
+    return true;
+}
+
+int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_two_param_integer_fn( queue, context, "rotate", verify_integer_rotate );
+}
+
+const char *threeParamIntegerKernelSourcePattern =
+"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    %s%s sA = %s;\n"
+"    %s%s sB = %s;\n"
+"    %s%s sC = %s;\n"
+"    %s%s dst = %s( sA, sB, sC );\n"
+"     %s;\n"
+"\n"
+"}\n";
+
+typedef bool (*threeParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *sourceC, void *destination,
+                                            ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType );
+
+int test_three_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
+                                  ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType,
+                                    unsigned int vecSize, threeParamIntegerVerifyFn verifyFn, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[4];
+    cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], inDataC[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
+    int error, i;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4], paramSizeName[4];
+
+    if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
+    {
+        log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
+        return CL_SUCCESS;
+    }
+
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", vecSize );
+    if( ( vecSize == 1 ) || ( vecSize == 3 ) )
+        paramSizeName[ 0 ] = 0;
+        else
+        sprintf( paramSizeName, "%d", vecSize );
+
+    char sourceALoad[ 128 ], sourceBLoad[ 128 ], sourceCLoad[ 128 ], destStore[ 128 ];
+
+    sprintf( kernelSource, threeParamIntegerKernelSourcePattern,
+            get_explicit_type_name( vecAType ), paramSizeName,
+            get_explicit_type_name( vecBType ), paramSizeName,
+            get_explicit_type_name( vecCType ), paramSizeName,
+            get_explicit_type_name( destType ), paramSizeName,
+            get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
+            get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
+            get_explicit_type_name( vecCType ), sizeName, build_load_statement( sourceCLoad, (size_t)vecSize, "sourceC" ),
+            get_explicit_type_name( destType ), sizeName,
+            fnName,
+            build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
+            );
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+    log_error("The program we attempted to compile was: \n%s\n", kernelSource);
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
+    generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
+    generate_random_data( vecCType, vecSize * TEST_SIZE, d, inDataC );
+
+    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE, &inDataA, NULL);
+    if( streams[0] == NULL )
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE, &inDataB, NULL);
+    if( streams[1] == NULL )
+    {
+        log_error("ERROR: Creating input array B failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecCType ) * vecSize * TEST_SIZE, &inDataC, NULL);
+    if( streams[2] == NULL )
+    {
+        log_error("ERROR: Creating input array C failed!\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( destType ) * vecSize * TEST_SIZE, NULL, NULL );
+    if( streams[3] == NULL )
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    memset(outData, 0xFF, get_explicit_type_size( destType ) * TEST_SIZE * vecSize);
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, get_explicit_type_size( destType ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    char *inA = (char *)inDataA;
+    char *inB = (char *)inDataB;
+    char *inC = (char *)inDataC;
+    char *out = (char *)outData;
+    for( i = 0; i < (int)TEST_SIZE; i++ )
+    {
+        for( size_t j = 0; j < vecSize; j++ )
+        {
+            bool test = verifyFn( inA, inB, inC, &expected, vecAType, vecBType, vecCType, destType );
+            if( test && ( memcmp( &expected, out, get_explicit_type_size( destType ) ) != 0 ) )
+            {
+                switch( get_explicit_type_size( vecAType ))
+                {
+                    case 1:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x, 0x%2.2x)\n",
+                                  (int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
+                                            *( (cl_uchar *)inA ),
+                                            *( (cl_uchar *)inB ),
+                                            *( (cl_uchar *)inC ) );
+                        break;
+
+                    case 2:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x, 0x%4.4x)\n",
+                                  (int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
+                                            *( (cl_ushort *)inA ),
+                                            *( (cl_ushort *)inB ),
+                                            *( (cl_ushort *)inC ) );
+                        break;
+
+                    case 4:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x, 0x%8.8x)\n",
+                                  (int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
+                                            *( (cl_uint *)inA ),
+                                            *( (cl_uint *)inB ),
+                                            *( (cl_uint *)inC ) );
+                        break;
+
+                    case 8:
+                        log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx, 0x%16.16llx)\n",
+                                  (int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
+                                            *( (cl_ulong *)inA ),
+                                            *( (cl_ulong *)inB ),
+                                            *( (cl_ulong *)inC ) );
+                        break;
+                }
+                return -1;
+            }
+            inA += get_explicit_type_size( vecAType );
+            inB += get_explicit_type_size( vecBType );
+            inC += get_explicit_type_size( vecCType );
+            out += get_explicit_type_size( destType );
+        }
+    }
+
+    return 0;
+}
+
+int test_three_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, threeParamIntegerVerifyFn verifyFn)
+{
+    ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index, typeAIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+    for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
+    {
+        if ((types[ typeAIndex ] == kLong || types[ typeAIndex] == kULong) && !gHasLong)
+            continue;
+
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            if( test_three_param_integer_kernel(queue, context, fnName, types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
+            {
+                log_error( "   Vector %s%d,%s%d,%s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ],
+                                                            get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] ,
+                                                            get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] );
+                retVal = -1;
+            }
+        }
+    }
+
+    return retVal;
+}
+
+bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *destination,
+                        ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
+{
+    if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
+    {
+        cl_ulong valueA, valueB, valueC;
+
+        switch( vecAType )
+        {
+            case kULong:
+                valueA = ((cl_ulong*) sourceA)[0];
+                valueB = ((cl_ulong*) sourceB)[0];
+                valueC = ((cl_ulong*) sourceC)[0];
+                break;
+            case kUInt:
+                valueA = ((cl_uint*) sourceA)[0];
+                valueB = ((cl_uint*) sourceB)[0];
+                valueC = ((cl_uint*) sourceC)[0];
+                break;
+            case kUShort:
+                valueA = ((cl_ushort*) sourceA)[0];
+                valueB = ((cl_ushort*) sourceB)[0];
+                valueC = ((cl_ushort*) sourceC)[0];
+                break;
+            case kUChar:
+                valueA = ((cl_uchar*) sourceA)[0];
+                valueB = ((cl_uchar*) sourceB)[0];
+                valueC = ((cl_uchar*) sourceC)[0];
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+
+        if(valueB > valueC) {
+            return false; // results are undefined : let expected alone.
+        }
+
+        switch( vecAType )
+        {
+            case kULong:
+                ((cl_ulong*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
+                break;
+            case kUInt:
+                ((cl_uint*) destination)[0] = (cl_uint)
+                    (MAX(MIN(valueA, valueC), valueB));
+                break;
+            case kUShort:
+                ((cl_ushort*) destination)[0] = (cl_ushort)
+                    (MAX(MIN(valueA, valueC), valueB));
+                break;
+            case kUChar:
+                ((cl_uchar*) destination)[0] = (cl_uchar)
+                    (MAX(MIN(valueA, valueC), valueB));
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+
+
+
+    }
+    else
+    {
+        cl_long valueA, valueB, valueC;
+
+
+        switch( vecAType )
+        {
+            case kLong:
+                valueA = ((cl_long*) sourceA)[0];
+                valueB = ((cl_long*) sourceB)[0];
+                valueC = ((cl_long*) sourceC)[0];
+                break;
+            case kInt:
+                valueA = ((cl_int*) sourceA)[0];
+                valueB = ((cl_int*) sourceB)[0];
+                valueC = ((cl_int*) sourceC)[0];
+                break;
+            case kShort:
+                valueA = ((cl_short*) sourceA)[0];
+                valueB = ((cl_short*) sourceB)[0];
+                valueC = ((cl_short*) sourceC)[0];
+                break;
+            case kChar:
+                valueA = ((cl_char*) sourceA)[0];
+                valueB = ((cl_char*) sourceB)[0];
+                valueC = ((cl_char*) sourceC)[0];
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+        if(valueB > valueC) {
+            return false; // undefined behavior : leave "expected" alone
+        }
+
+        switch( vecAType )
+        {
+            case kLong:
+                ((cl_long*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
+                break;
+            case kInt:
+                ((cl_int*) destination)[0] = (cl_int)
+                    (MAX(MIN(valueA, valueC), valueB));
+                break;
+            case kShort:
+                ((cl_short*) destination)[0] = (cl_short)
+                    (MAX(MIN(valueA, valueC), valueB));
+                break;
+            case kChar:
+                ((cl_char*) destination)[0] = (cl_char)
+                    (MAX(MIN(valueA, valueC), valueB));
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+    }
+    return true;
+}
+
+int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_three_param_integer_fn( queue, context, "clamp", verify_integer_clamp );
+}
+
+bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *destination,
+                        ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
+{
+    if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
+    {
+        cl_ulong valueA, valueB, valueC;
+
+        switch( vecAType )
+        {
+            case kULong:
+                valueA = ((cl_ulong*) sourceA)[0];
+                valueB = ((cl_ulong*) sourceB)[0];
+                valueC = ((cl_ulong*) sourceC)[0];
+                break;
+            case kUInt:
+                valueA = ((cl_uint*) sourceA)[0];
+                valueB = ((cl_uint*) sourceB)[0];
+                valueC = ((cl_uint*) sourceC)[0];
+                break;
+            case kUShort:
+                valueA = ((cl_ushort*) sourceA)[0];
+                valueB = ((cl_ushort*) sourceB)[0];
+                valueC = ((cl_ushort*) sourceC)[0];
+                break;
+            case kUChar:
+                valueA = ((cl_uchar*) sourceA)[0];
+                valueB = ((cl_uchar*) sourceB)[0];
+                valueC = ((cl_uchar*) sourceC)[0];
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+        cl_ulong multHi, multLo;
+        multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
+
+        multLo += valueC;
+        multHi += multLo < valueC;  // carry if overflow
+        if( multHi )
+            multLo = 0xFFFFFFFFFFFFFFFFULL;
+
+        switch( vecAType )
+        {
+            case kULong:
+                ((cl_ulong*) destination)[0] = multLo;
+                break;
+            case kUInt:
+                ((cl_uint*) destination)[0] = (cl_uint) MIN( multLo, (cl_ulong) CL_UINT_MAX );
+                break;
+            case kUShort:
+                ((cl_ushort*) destination)[0] = (cl_ushort) MIN( multLo, (cl_ulong) CL_USHRT_MAX );
+                break;
+            case kUChar:
+                ((cl_uchar*) destination)[0] = (cl_uchar) MIN( multLo, (cl_ulong) CL_UCHAR_MAX );
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+    }
+    else
+    {
+        cl_long valueA, valueB, valueC;
+
+        switch( vecAType )
+        {
+            case kLong:
+                valueA = ((cl_long*) sourceA)[0];
+                valueB = ((cl_long*) sourceB)[0];
+                valueC = ((cl_long*) sourceC)[0];
+                break;
+            case kInt:
+                valueA = ((cl_int*) sourceA)[0];
+                valueB = ((cl_int*) sourceB)[0];
+                valueC = ((cl_int*) sourceC)[0];
+                break;
+            case kShort:
+                valueA = ((cl_short*) sourceA)[0];
+                valueB = ((cl_short*) sourceB)[0];
+                valueC = ((cl_short*) sourceC)[0];
+                break;
+            case kChar:
+                valueA = ((cl_char*) sourceA)[0];
+                valueB = ((cl_char*) sourceB)[0];
+                valueC = ((cl_char*) sourceC)[0];
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+        cl_long multHi;
+        cl_ulong multLo;
+        multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
+
+        cl_ulong sum = multLo + valueC;
+        // carry if overflow
+        if( valueC >= 0 )
+        {
+            if( multLo > sum )
+            {
+                multHi++;
+                if( CL_LONG_MIN == multHi )
+                {
+                    multHi = CL_LONG_MAX;
+                    sum = CL_ULONG_MAX;
+                }
+            }
+        }
+        else
+        {
+            if( multLo < sum )
+            {
+                multHi--;
+                if( CL_LONG_MAX == multHi )
+                {
+                    multHi = CL_LONG_MIN;
+                    sum = 0;
+                }
+            }
+        }
+
+        // saturate
+        if( multHi > 0 )
+            sum = CL_LONG_MAX;
+        else if( multHi < -1 )
+            sum = CL_LONG_MIN;
+        cl_long result = (cl_long) sum;
+
+        switch( vecAType )
+        {
+            case kLong:
+                ((cl_long*) destination)[0] = result;
+                break;
+            case kInt:
+                result = MIN( result, (cl_long) CL_INT_MAX );
+                result = MAX( result, (cl_long) CL_INT_MIN );
+                ((cl_int*) destination)[0] = (cl_int) result;
+                break;
+            case kShort:
+                result = MIN( result, (cl_long) CL_SHRT_MAX );
+                result = MAX( result, (cl_long) CL_SHRT_MIN );
+                ((cl_short*) destination)[0] = (cl_short) result;
+                break;
+            case kChar:
+                result = MIN( result, (cl_long) CL_CHAR_MAX );
+                result = MAX( result, (cl_long) CL_CHAR_MIN );
+                ((cl_char*) destination)[0] = (cl_char) result;
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+    }
+    return true;
+}
+
+int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_three_param_integer_fn( queue, context, "mad_sat", verify_integer_mad_sat );
+}
+
+bool verify_integer_mad_hi( void *sourceA, void *sourceB, void *sourceC, void *destination,
+                            ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
+{
+    if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
+    {
+        cl_ulong valueA, valueB, valueC;
+
+        switch( vecAType )
+        {
+            case kULong:
+                valueA = ((cl_ulong*) sourceA)[0];
+                valueB = ((cl_ulong*) sourceB)[0];
+                valueC = ((cl_ulong*) sourceC)[0];
+                break;
+            case kUInt:
+                valueA = ((cl_uint*) sourceA)[0];
+                valueB = ((cl_uint*) sourceB)[0];
+                valueC = ((cl_uint*) sourceC)[0];
+                break;
+            case kUShort:
+                valueA = ((cl_ushort*) sourceA)[0];
+                valueB = ((cl_ushort*) sourceB)[0];
+                valueC = ((cl_ushort*) sourceC)[0];
+                break;
+            case kUChar:
+                valueA = ((cl_uchar*) sourceA)[0];
+                valueB = ((cl_uchar*) sourceB)[0];
+                valueC = ((cl_uchar*) sourceC)[0];
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+        cl_ulong multHi, multLo;
+        multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
+
+        switch( vecAType )
+        {
+            case kULong:
+                ((cl_ulong*) destination)[0] = multHi + valueC;
+                break;
+            case kUInt:
+                ((cl_uint*) destination)[0] = (cl_uint) (( multLo >> 32) + valueC );
+                break;
+            case kUShort:
+                ((cl_ushort*) destination)[0] = (cl_ushort) (( multLo >> 16) + valueC );
+                break;
+            case kUChar:
+                ((cl_uchar*) destination)[0] = (cl_uchar) (( multLo >> 8) + valueC );
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+    }
+    else
+    {
+        cl_long valueA, valueB, valueC;
+
+        switch( vecAType )
+        {
+            case kLong:
+                valueA = ((cl_long*) sourceA)[0];
+                valueB = ((cl_long*) sourceB)[0];
+                valueC = ((cl_long*) sourceC)[0];
+                break;
+            case kInt:
+                valueA = ((cl_int*) sourceA)[0];
+                valueB = ((cl_int*) sourceB)[0];
+                valueC = ((cl_int*) sourceC)[0];
+                break;
+            case kShort:
+                valueA = ((cl_short*) sourceA)[0];
+                valueB = ((cl_short*) sourceB)[0];
+                valueC = ((cl_short*) sourceC)[0];
+                break;
+            case kChar:
+                valueA = ((cl_char*) sourceA)[0];
+                valueB = ((cl_char*) sourceB)[0];
+                valueC = ((cl_char*) sourceC)[0];
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+
+        cl_long multHi;
+        cl_ulong multLo;
+        multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
+
+        switch( vecAType )
+        {
+            case kLong:
+                ((cl_long*) destination)[0] = multHi + valueC;
+                break;
+            case kInt:
+                ((cl_int*) destination)[0] = (cl_int) ((multLo >> 32) + valueC);
+                break;
+            case kShort:
+                ((cl_short*) destination)[0] = (cl_int) ((multLo >> 16) + valueC);
+                break;
+            case kChar:
+                ((cl_char*) destination)[0] = (cl_char) (cl_int) ((multLo >> 8) + valueC);
+                break;
+            default:
+                //error -- should never get here
+                abort();
+                break;
+        }
+    }
+    return true;
+}
+
+int test_integer_mad_hi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_three_param_integer_fn( queue, context, "mad_hi", verify_integer_mad_hi );
+}
+
+
diff --git a/test_conformance/integer_ops/test_intmad24.c b/test_conformance/integer_ops/test_intmad24.c
new file mode 100644
index 00000000..d513442c
--- /dev/null
+++ b/test_conformance/integer_ops/test_intmad24.c
@@ -0,0 +1,365 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define NUM_PROGRAMS 6
+
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+
+
+const char *int_mad24_kernel_code =
+"__kernel void test_int_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *int2_mad24_kernel_code =
+"__kernel void test_int2_mad24(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *int3_mad24_kernel_code =
+"__kernel void test_int3_mad24(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    int3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
+"    vstore3(tmp, tid, dst);\n"
+"}\n";
+
+const char *int4_mad24_kernel_code =
+"__kernel void test_int4_mad24(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *int8_mad24_kernel_code =
+"__kernel void test_int8_mad24(__global int8 *srcA, __global int8 *srcB, __global int8 *srcC, __global int8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *int16_mad24_kernel_code =
+"__kernel void test_int16_mad24(__global int16 *srcA, __global int16 *srcB, __global int16 *srcC, __global int16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+
+const char *uint_mad24_kernel_code =
+"__kernel void test_uint_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
+"{\n"
+"    uint  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *uint2_mad24_kernel_code =
+"__kernel void test_uint2_mad24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *srcC, __global uint2 *dst)\n"
+"{\n"
+"    uint  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *uint3_mad24_kernel_code =
+"__kernel void test_uint3_mad24(__global uint *srcA, __global uint *srcB, __global uint *srcC, __global uint *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    uint3 tmp = mad24(vload3(tid, srcA), vload3(tid, srcB), vload3(tid, srcC));\n"
+"    vstore3(tmp, tid, dst);\n"
+"}\n";
+
+
+const char *uint4_mad24_kernel_code =
+"__kernel void test_uint4_mad24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *srcC, __global uint4 *dst)\n"
+"{\n"
+"    uint  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *uint8_mad24_kernel_code =
+"__kernel void test_uint8_mad24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *srcC, __global uint8 *dst)\n"
+"{\n"
+"    uint  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+const char *uint16_mad24_kernel_code =
+"__kernel void test_uint16_mad24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *srcC, __global uint16 *dst)\n"
+"{\n"
+"    uint  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mad24(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+
+int
+verify_int_mad24(int *inptrA, int *inptrB, int *inptrC, int *outptr, size_t n, size_t vecSize)
+{
+    int            r;
+    size_t         i;
+
+    for (i=0; i<n; i++)
+    {
+        int a = inptrA[i];
+        int b = inptrB[i];
+        r = a * b + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error( "Failed at %ld)  0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
+             return -1;
+        }
+    }
+
+    return 0;
+}
+
+int
+verify_uint_mad24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *inptrC, cl_uint *outptr, size_t n, size_t vecSize)
+{
+    cl_uint         r;
+    size_t         i;
+
+    for (i=0; i<n; i++)
+    {
+        cl_uint a = inptrA[i] & 0xFFFFFFU;
+        cl_uint b = inptrB[i] & 0xFFFFFFU;
+        r = a * b + inptrC[i];
+        if (r != outptr[i])
+        {
+            log_error( "Failed at %ld)  0x%8.8x * 0x%8.8x + 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, a, b, inptrC[i], r, outptr[i] );
+             return -1;
+        }
+    }
+
+    return 0;
+}
+
+static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
+
+static inline int random_int24( MTdata d )
+{
+    int result = genrand_int32(d);
+
+    return (result << 8) >> 8;
+}
+
+static inline int random_int32( MTdata d )
+{
+    return genrand_int32(d);
+}
+
+
+int
+test_intmad24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem streams[4];
+    cl_int *input_ptr[3], *output_ptr, *p;
+
+    cl_program program[2*NUM_PROGRAMS];
+    cl_kernel kernel[2*NUM_PROGRAMS];
+    size_t threads[1];
+
+    int                num_elements;
+    int                err;
+    int                i;
+    MTdata              d;
+
+    size_t length = sizeof(cl_int) * 16 * n_elems;
+    num_elements = n_elems * 16;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    input_ptr[2] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, 0, length, NULL, &err);
+      test_error(err, "clCreateBuffer failed");
+    streams[1] = clCreateBuffer(context, 0, length, NULL, &err);
+      test_error(err, "clCreateBuffer failed");
+    streams[2] = clCreateBuffer(context, 0, length, NULL, &err);
+      test_error(err, "clCreateBuffer failed");
+    streams[3] = clCreateBuffer(context, 0, length, NULL, &err);
+      test_error(err, "clCreateBuffer failed");
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = random_int24(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = random_int24(d);
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+        p[i] = random_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+         test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+      test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
+      test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mad24_kernel_code, "test_int_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mad24_kernel_code, "test_int2_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mad24_kernel_code, "test_int3_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mad24_kernel_code, "test_int4_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mad24_kernel_code, "test_int8_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mad24_kernel_code, "test_int16_mad24");
+    if (err)
+        return -1;
+
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mad24_kernel_code, "test_uint_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mad24_kernel_code, "test_uint2_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mad24_kernel_code, "test_uint3_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mad24_kernel_code, "test_uint4_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mad24_kernel_code, "test_uint8_mad24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mad24_kernel_code, "test_uint16_mad24");
+    if (err)
+        return -1;
+
+    for (i=0; i< 2*NUM_PROGRAMS; i++)
+    {
+        err  = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
+        err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3]);
+          test_error(err, "clSetKernelArg failed");
+    }
+
+
+    threads[0] = (unsigned int)n_elems;
+    // test signed
+    for (i=0; i<NUM_PROGRAMS; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+          test_error(err, "clEnqueueNDRangeKernel failed");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+          test_error(err, "clEnqueueNDRangeKernel failed");
+
+        if (verify_int_mad24(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * vector_sizes[i], vector_sizes[i]))
+        {
+            log_error("INT_MAD24 %s test failed\n", test_str_names[i]);
+            err = -1;
+        }
+        else
+        {
+            log_info("INT_MAD24 %s test passed\n", test_str_names[i]);
+            err = 0;
+        }
+
+        if (err)
+            break;
+    }
+
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] &= 0xffffffU;
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] &= 0xffffffU;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+      test_error(err, "clEnqueueWriteBuffer failed");
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+      test_error(err, "clEnqueueWriteBuffer failed");
+
+
+    // test unsigned
+    for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+          test_error(err, "clEnqueueNDRangeKernel failed");
+
+        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+          test_error(err, "clEnqueueNDRangeKernel failed");
+
+        if (verify_uint_mad24( (cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) input_ptr[2], (cl_uint*)output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]))
+        {
+            log_error("UINT_MAD24 %s test failed\n", test_str_names[i]);
+            err = -1;
+        }
+        else
+        {
+            log_info("UINT_MAD24 %s test passed\n", test_str_names[i]);
+            err = 0;
+        }
+
+        if (err)
+        break;
+    }
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    for (i=0; i<2*NUM_PROGRAMS; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/test_intmul24.c b/test_conformance/integer_ops/test_intmul24.c
new file mode 100644
index 00000000..4f00ef4e
--- /dev/null
+++ b/test_conformance/integer_ops/test_intmul24.c
@@ -0,0 +1,390 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define NUM_PROGRAMS 6
+
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+
+
+const char *int_mul24_kernel_code =
+"__kernel void test_int_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *int2_mul24_kernel_code =
+"__kernel void test_int2_mul24(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *int3_mul24_kernel_code =
+"__kernel void test_int3_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    int3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
+"    vstore3(tmp, tid, dst);\n"
+"}\n";
+
+const char *int4_mul24_kernel_code =
+"__kernel void test_int4_mul24(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *int8_mul24_kernel_code =
+"__kernel void test_int8_mul24(__global int8 *srcA, __global int8 *srcB, __global int8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *int16_mul24_kernel_code =
+"__kernel void test_int16_mul24(__global int16 *srcA, __global int16 *srcB, __global int16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *uint_mul24_kernel_code =
+"__kernel void test_int_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *uint2_mul24_kernel_code =
+"__kernel void test_int2_mul24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *uint3_mul24_kernel_code =
+"__kernel void test_int3_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    uint3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
+"    vstore3(tmp, tid, dst);\n"
+"}\n";
+
+const char *uint4_mul24_kernel_code =
+"__kernel void test_int4_mul24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *uint8_mul24_kernel_code =
+"__kernel void test_int8_mul24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+const char *uint16_mul24_kernel_code =
+"__kernel void test_int16_mul24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
+"}\n";
+
+
+int
+verify_int_mul24(int *inptrA, int *inptrB, int *outptr, size_t n, size_t vecSize)
+{
+    int            r;
+    size_t         i;
+
+    for (i=0; i<n; i++)
+    {
+        int a = (inptrA[i] << 8 ) >> 8;
+        int b = (inptrB[i] << 8 ) >> 8;
+        r = a * b;
+        if (r != outptr[i])
+             return -1;
+    }
+
+    return 0;
+}
+
+int
+verify_uint_mul24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n, size_t vecSize)
+{
+    cl_uint            r;
+    size_t         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = (inptrA[i] & 0xffffffU) * (inptrB[i] & 0xffffffU);
+        if (r != outptr[i])
+        {
+            log_error( "failed at %ld: 0x%8.8x * 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, inptrA[i], inptrB[i], r, outptr[i] );
+             return -1;
+        }
+    }
+
+    return 0;
+}
+
+static inline int random_int24( MTdata d )
+{
+    int result = genrand_int32(d);
+
+    return (result << 8) >> 8;
+}
+
+
+static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
+
+int
+test_intmul24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem streams[3];
+    cl_int *input_ptr[2], *output_ptr, *p;
+
+    cl_program program[NUM_PROGRAMS*2];
+    cl_kernel kernel[NUM_PROGRAMS*2];
+    size_t threads[1];
+
+    int                num_elements;
+    int                err;
+    int                i;
+    MTdata              d;
+
+    size_t length = sizeof(cl_int) * 16 * n_elems;
+    num_elements = n_elems * 16;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    input_ptr[1] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] = random_int24(d);
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] = random_int24(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mul24_kernel_code, "test_int_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mul24_kernel_code, "test_int2_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mul24_kernel_code, "test_int3_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mul24_kernel_code, "test_int4_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mul24_kernel_code, "test_int8_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mul24_kernel_code, "test_int16_mul24");
+    if (err)
+        return -1;
+
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mul24_kernel_code, "test_int_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mul24_kernel_code, "test_int2_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mul24_kernel_code, "test_int3_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mul24_kernel_code, "test_int4_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mul24_kernel_code, "test_int8_mul24");
+    if (err)
+        return -1;
+    err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mul24_kernel_code, "test_int16_mul24");
+    if (err)
+        return -1;
+
+    for (i=0; i<2*NUM_PROGRAMS; i++)
+    {
+        err  = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    // test signed
+    threads[0] = (unsigned int)n_elems;
+    for (i=0; i<NUM_PROGRAMS; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        err = verify_int_mul24(input_ptr[0], input_ptr[1], output_ptr, vector_sizes[i], vector_sizes[i]);
+        if (err)
+        {
+            log_error("INT_MUL24 %s test failed\n", test_str_names[i]);
+            err = -1;
+        }
+        else
+        {
+            log_info("INT_MUL24 %s test passed\n", test_str_names[i]);
+            err = 0;
+        }
+
+        if (err)
+            break;
+    }
+
+    // clamp the set of input values to be in range
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        p[i] &= 0xffffffU;
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        p[i] &= 0xffffffU;
+
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    // test unsigned
+    for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
+    {
+        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        err = verify_uint_mul24((cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]);
+        if (err)
+        {
+            log_error("UINT_MUL24 %s test failed\n", test_str_names[i]);
+            err = -1;
+        }
+        else
+        {
+            log_info("UINT_MUL24 %s test passed\n", test_str_names[i]);
+            err = 0;
+        }
+
+        if (err)
+            break;
+    }
+
+
+    // cleanup
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<2*NUM_PROGRAMS; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/test_popcount.c b/test_conformance/integer_ops/test_popcount.c
new file mode 100644
index 00000000..71c1bf63
--- /dev/null
+++ b/test_conformance/integer_ops/test_popcount.c
@@ -0,0 +1,248 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define str(s) #s
+
+#define __popcnt(x, __T, __n, __r) \
+    { \
+        __T y = x; \
+        __r = 0; \
+        int k; \
+        for(k = 0; k < __n; k++) \
+        { \
+            if(y & (__T)0x1) __r++; \
+            y >>= (__T)1; \
+        } \
+    }
+
+#define __verify_popcount_func(__T) \
+    static int verify_popcount_##__T( const void *p, const void *r, size_t n, const char *sizeName, size_t vecSize ) \
+    { \
+        const __T *inA = (const __T *) p; \
+        const __T *outptr = (const __T *) r; \
+        size_t i; \
+        int _n = sizeof(__T)*8; \
+        __T ref; \
+        for(i = 0; i < n; i++) \
+        { \
+            __T x = inA[i]; \
+            __T res = outptr[i]; \
+            __popcnt(x, __T, _n, ref); \
+            if(res != ref) \
+            { \
+                log_info( "%ld) Failure for popcount( (%s%s) 0x%x ) = *%d vs %d\n", i, str(__T), sizeName, x, (int)ref, (int)res ); \
+                return -1; \
+            }\
+        } \
+        return 0; \
+    }
+
+__verify_popcount_func(cl_char);
+__verify_popcount_func(cl_uchar);
+__verify_popcount_func(cl_short);
+__verify_popcount_func(cl_ushort);
+__verify_popcount_func(cl_int);
+__verify_popcount_func(cl_uint);
+__verify_popcount_func(cl_long);
+__verify_popcount_func(cl_ulong);
+
+typedef int (*verifyFunc)( const void *, const void *, size_t n, const char *sizeName, size_t vecSize);
+static const verifyFunc verify[] = {   verify_popcount_cl_char, verify_popcount_cl_uchar,
+    verify_popcount_cl_short, verify_popcount_cl_ushort,
+    verify_popcount_cl_int, verify_popcount_cl_uint,
+    verify_popcount_cl_long, verify_popcount_cl_ulong };
+
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
+
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
+static const char *vector_param_size_names[] = { "", "2", "", "4", "8", "16" };
+static const size_t  kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
+
+static void printSrc(const char *src[], int nSrcStrings) {
+    int i;
+    for(i = 0; i < nSrcStrings; ++i) {
+        log_info("%s", src[i]);
+    }
+}
+
+int test_popcount(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_int *input_ptr[1], *output_ptr, *p;
+    int err;
+    int i;
+    cl_uint vectorSize;
+    cl_uint type;
+    MTdata d;
+    int fail_count = 0;
+
+    size_t length = sizeof(cl_int) * 8 * n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(length);
+    output_ptr   = (cl_int*)malloc(length);
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<8 * n_elems; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d);  d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+        //embedded devices don't support long/ulong so skip over
+        if (! gHasLong && strstr(test_str_names[type],"long"))
+        {
+           log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
+           continue;
+        }
+
+        verifyFunc f = verify[ type ];
+        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[2];
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+        for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
+        {
+            cl_program program = NULL;
+            cl_kernel kernel = NULL;
+
+            const char *source[] = {
+                "__kernel void test_popcount_", test_str_names[type], vector_size_names[vectorSize],
+                "(__global ", test_str_names[type], vector_param_size_names[vectorSize],
+                " *srcA, __global ", test_str_names[type], vector_param_size_names[vectorSize],
+                " *dst)\n"
+                "{\n"
+                "    int  tid = get_global_id(0);\n"
+                "\n"
+                "    ", test_str_names[type], vector_size_names[vectorSize], " sA;\n",
+                "    sA = ", ( vector_sizes[ vectorSize ] == 3 ) ? "vload3( tid, srcA )" : "srcA[tid]", ";\n",
+                "    ", test_str_names[type], vector_size_names[vectorSize], " dstVal = popcount(sA);\n"
+                "     ", ( vector_sizes[ vectorSize ] == 3 ) ? "vstore3( dstVal, tid, dst )" : "dst[ tid ] = dstVal", ";\n",
+                "}\n" };
+
+
+            char kernelName[128];
+            snprintf( kernelName, sizeof( kernelName ), "test_popcount_%s%s", test_str_names[type], vector_size_names[vectorSize] );
+
+            err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+
+            if (err) {
+                return -1;
+            }
+
+            err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+
+            //Wipe the output buffer clean
+            uint32_t pattern = 0xdeadbeef;
+            memset_pattern4( output_ptr, &pattern, length );
+            err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueWriteBuffer failed\n");
+                return -1;
+            }
+
+            size_t size = elementCount / (vector_sizes[vectorSize]);
+            err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueReadBuffer failed\n");
+                return -1;
+            }
+
+            char *inP = (char *)input_ptr[0];
+            char *outP = (char *)output_ptr;
+
+            for( size_t e = 0; e < size; e++ )
+            {
+                if( f( inP, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
+                    printSrc(source, sizeof(source)/sizeof(source[0]));
+                    ++fail_count; break; // return -1;
+                }
+                inP += kSizes[type] * ( (vector_sizes[vectorSize]) );
+                outP += kSizes[type] * ( (vector_sizes[vectorSize]) );
+            }
+
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
+            log_info( "." );
+            fflush( stdout );
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        log_info( "done\n" );
+    }
+
+
+    if(fail_count) {
+        log_info("Failed on %d types\n", fail_count);
+        return -1;
+    }
+    log_info("popcount test passed\n");
+
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/test_sub_sat.c b/test_conformance/integer_ops/test_sub_sat.c
new file mode 100644
index 00000000..cc8a902d
--- /dev/null
+++ b/test_conformance/integer_ops/test_sub_sat.c
@@ -0,0 +1,376 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define UCHAR_MIN   0
+#define USHRT_MIN   0
+#define UINT_MIN    0
+
+#ifndef MAX
+#define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
+#endif
+#ifndef MIN
+#define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
+#endif
+
+
+static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
+        r = MAX( r, CL_CHAR_MIN );
+        r = MIN( r, CL_CHAR_MAX );
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
+        r = MAX( r, 0 );
+        r = MIN( r, CL_UCHAR_MAX );
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName, int vecSize )
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
+        r = MAX( r, CL_SHRT_MIN );
+        r = MIN( r, CL_SHRT_MAX );
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
+        r = MAX( r, 0 );
+        r = MIN( r, CL_USHRT_MAX );
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_subsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_int r = (cl_int) ((cl_uint)inA[i] - (cl_uint)inB[i]);
+        if( inB[i] < 0 )
+        {
+            if( r < inA[i] )
+                r = CL_INT_MAX;
+        }
+        else
+        {
+            if( r > inA[i] )
+                r = CL_INT_MIN;
+        }
+
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for sub_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_subsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_uint r = inA[i] - inB[i];
+        if(  inA[i] < inB[i] )
+            r = 0;
+
+        if( r != outptr[i] )
+        { log_info( "\n%d) Failure for sub_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_subsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_long r = (cl_long)((cl_ulong)inA[i] - (cl_ulong)inB[i]);
+        if( inB[i] < 0 )
+        {
+            if( r < inA[i] )
+                r = CL_LONG_MAX;
+        }
+        else
+        {
+            if( r > inA[i] )
+                r = CL_LONG_MIN;
+        }
+        if( r != outptr[i] )
+        { log_info( "%d) Failure for sub_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+static int verify_subsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
+{
+    int i;
+    for( i = 0; i < n; i++ )
+    {
+        cl_ulong r = inA[i] - inB[i];
+        if(  inA[i] < inB[i] )
+            r = 0;
+        if( r != outptr[i] )
+        { log_info( "%d) Failure for sub_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
+    }
+    return 0;
+}
+
+typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
+static const verifyFunc verify[] = {   (verifyFunc) verify_subsat_char, (verifyFunc) verify_subsat_uchar,
+    (verifyFunc) verify_subsat_short, (verifyFunc) verify_subsat_ushort,
+    (verifyFunc) verify_subsat_int, (verifyFunc) verify_subsat_uint,
+    (verifyFunc) verify_subsat_long, (verifyFunc) verify_subsat_ulong };
+
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
+
+static const size_t  kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
+
+int test_sub_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int *input_ptr[2], *output_ptr, *p;
+    int err;
+    cl_uint i;
+    cl_uint vectorSize;
+    cl_uint type;
+    MTdata d;
+    int fail_count = 0;
+
+    size_t length = sizeof(int) * 4 * n_elems;
+
+    input_ptr[0] = (int*)malloc(length);
+    input_ptr[1] = (int*)malloc(length);
+    output_ptr   = (int*)malloc(length);
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<4 * (cl_uint) n_elems; i++)
+        p[i] = genrand_int32(d);
+    p = input_ptr[1];
+    for (i=0; i<4 * (cl_uint) n_elems; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
+    {
+
+        //embedded devices don't support long/ulong so skip over
+        if (! gHasLong && strstr(test_str_names[type],"long"))
+        {
+            log_info( "WARNING: device does not support 64-bit integers. Skipping %s\n", test_str_names[type] );
+            continue;
+        }
+
+        verifyFunc f = verify[ type ];
+        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
+        size_t elementCount = length / kSizes[type];
+        cl_mem streams[3];
+
+        log_info( "%s", test_str_names[type] );
+        fflush( stdout );
+
+        // Set up data streams for the type
+        streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+        streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
+        if (!streams[2])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+        err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+
+        for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
+        {
+            cl_program program = NULL;
+            cl_kernel kernel = NULL;
+
+            const char *source[] = {
+                "__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
+                "(__global ", test_str_names[type], vector_size_names[vectorSize],
+                " *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
+                " *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
+                " *dst)\n"
+                "{\n"
+                "    int  tid = get_global_id(0);\n"
+                "\n"
+                "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(srcA[tid], srcB[tid]);\n"
+                "    dst[tid] = tmp;\n"
+                "}\n"
+            };
+
+            const char *sourceV3[] = {
+                "__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
+                "(__global ", test_str_names[type],
+                " *srcA, __global ", test_str_names[type],
+                " *srcB, __global ", test_str_names[type],
+                " *dst)\n"
+                "{\n"
+                "    int  tid = get_global_id(0);\n"
+                "\n"
+                "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
+                "    vstore3(tmp, tid, dst);\n"
+                "}\n"
+            };
+
+            char kernelName[128];
+            snprintf( kernelName, sizeof( kernelName ), "test_sub_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
+            if(vector_sizes[vectorSize] != 3)
+            {
+                err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
+            } else {
+                err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
+            }
+            if (err)
+                return -1;
+
+            err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+            err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clSetKernelArgs failed\n");
+                return -1;
+            }
+
+            //Wipe the output buffer clean
+            uint32_t pattern = 0xdeadbeef;
+            memset_pattern4( output_ptr, &pattern, length );
+            err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueWriteBuffer failed\n");
+                return -1;
+            }
+
+            size_t size = elementCount / vector_sizes[vectorSize];
+            err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueNDRangeKernel failed\n");
+                return -1;
+            }
+
+            err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                log_error("clEnqueueReadBuffer failed\n");
+                return -1;
+            }
+
+            char *inP = (char *)input_ptr[0];
+            char *inP2 = (char *)input_ptr[1];
+            char *outP = (char *)output_ptr;
+
+            for( size_t e = 0; e < size; e++ )
+            {
+                if( f( inP, inP2, outP, vector_sizes[vectorSize], vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
+                    ++fail_count; break; // return -1;
+                }
+                inP += kSizes[type] * vector_sizes[vectorSize];
+                inP2 += kSizes[type] * vector_sizes[vectorSize];
+                outP += kSizes[type] * vector_sizes[vectorSize];
+            }
+
+            clReleaseKernel( kernel );
+            clReleaseProgram( program );
+            log_info( "." );
+            fflush( stdout );
+        }
+
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        clReleaseMemObject( streams[2] );
+        log_info( "done\n" );
+    }
+    if(fail_count) {
+        log_info("Failed on %d types\n", fail_count);
+        return -1;
+    }
+    log_info("SUB_SAT test passed\n");
+
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp
new file mode 100644
index 00000000..06c6cf06
--- /dev/null
+++ b/test_conformance/integer_ops/test_unary_ops.cpp
@@ -0,0 +1,211 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+
+#define TEST_SIZE 512
+
+enum OpKonstants
+{
+    kIncrement = 0,
+    kDecrement,
+    kBoth
+};
+
+const char *testKernel =
+"__kernel void test( __global %s *inOut, __global char * control )\n"
+"{\n"
+"    size_t tid = get_global_id(0);\n"
+"\n"
+"   %s%s inOutVal = %s;\n"
+"\n"
+"   if( control[tid] == 0 )\n"
+"        inOutVal++;\n"
+"   else if( control[tid] == 1 )\n"
+"        ++inOutVal;\n"
+"   else if( control[tid] == 2 )\n"
+"        inOutVal--;\n"
+"   else // if( control[tid] == 3 )\n"
+"        --inOutVal;\n"
+"\n"
+"   %s;\n"
+"}\n";
+
+typedef int (*OpVerifyFn)( void * actualPtr, void * inputPtr, size_t vecSize, size_t numVecs, cl_char * controls );
+
+int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants whichOp,
+                                     ExplicitType vecType, size_t vecSize,
+                                     MTdata d, OpVerifyFn verifyFn )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+    cl_long inData[TEST_SIZE * 16], outData[TEST_SIZE * 16];
+    cl_char controlData[TEST_SIZE];
+    int error;
+    size_t i;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+
+
+    // Create the source
+    char loadLine[ 1024 ], storeLine[ 1024 ];
+    if( vecSize == 1 )
+    {
+        sprintf( loadLine, "inOut[tid]" );
+        sprintf( storeLine, "inOut[tid] = inOutVal" );
+    }
+    else
+    {
+        sprintf( loadLine, "vload%ld( tid, inOut )", vecSize );
+        sprintf( storeLine, "vstore%ld( inOutVal, tid, inOut )", vecSize );
+    }
+
+    char sizeNames[][4] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
+    sprintf( kernelSource, testKernel, get_explicit_type_name( vecType ), /*sizeNames[ vecSize ],*/
+                                        get_explicit_type_name( vecType ), sizeNames[ vecSize ],
+                                        loadLine, storeLine );
+
+    // Create the kernel
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test" ) )
+    {
+        log_error( "ERROR: Unable to create test program!\n" );
+        return -1;
+    }
+
+    // Generate two streams. The first is our random data to test against, the second is our control stream
+    generate_random_data( vecType, vecSize * TEST_SIZE, d, inData );
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                 get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
+                                 inData, &error );
+    test_error( error, "Creating input data array failed" );
+
+    cl_uint bits;
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        size_t which = i & 7;
+        if( which == 0 )
+            bits = genrand_int32(d);
+
+        controlData[ i ] = ( bits >> ( which << 1 ) ) & 0x03;
+        if( whichOp == kDecrement )
+            // For sub ops, the min control value is 2. Otherwise, it's 0
+            controlData[ i ] |= 0x02;
+        else if( whichOp == kIncrement )
+            // For addition ops, the MAX control value is 1. Otherwise, it's 3
+            controlData[ i ] &= ~0x02;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
+                                sizeof( controlData ), controlData, &error );
+    test_error( error, "Unable to create control stream" );
+
+    // Assign streams and execute
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    // Run the kernel
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+
+    // Read the results
+    error = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0,
+                                get_explicit_type_size( vecType ) * TEST_SIZE * vecSize,
+                                outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    // Now verify the results
+    return verifyFn( outData, inData, vecSize, TEST_SIZE, controlData );
+}
+
+template<typename T> int VerifyFn( void * actualPtr, void * inputPtr, size_t vecSize, size_t numVecs, cl_char * controls )
+{
+    T * actualData = (T *)actualPtr;
+    T * inputData = (T *)inputPtr;
+
+    size_t index = 0;
+    for( size_t i = 0; i < numVecs; i++ )
+    {
+        for( size_t j = 0; j < vecSize; j++, index++ )
+        {
+            T nextVal = inputData[ index ];
+            if( controls[ i ] & 0x02 )
+                nextVal--;
+            else
+                nextVal++;
+
+            if( actualData[ index ] != nextVal )
+            {
+                log_error( "ERROR: Validation failed on vector %ld:%ld (expected %lld, got %lld)", i, j,
+                          (cl_long)nextVal, (cl_long)actualData[ index ] );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_unary_op_set( cl_command_queue queue, cl_context context, OpKonstants whichOp )
+{
+    ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
+    OpVerifyFn verifys[] = { VerifyFn<cl_char>, VerifyFn<cl_uchar>, VerifyFn<cl_short>, VerifyFn<cl_ushort>, VerifyFn<cl_int>, VerifyFn<cl_uint>, VerifyFn<cl_long>, VerifyFn<cl_ulong>, NULL };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed );
+
+    for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    {
+        if ((types[ typeIndex ] == kLong || types[ typeIndex ] == kULong) && !gHasLong)
+            continue;
+
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            if( test_unary_op( queue, context, whichOp, types[ typeIndex ], vecSizes[ index ], seed, verifys[ typeIndex ] ) != 0 )
+            {
+                log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
+                retVal = -1;
+            }
+        }
+    }
+
+    return retVal;
+}
+
+int test_unary_ops_full(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_unary_op_set( queue, context, kBoth );
+}
+
+int test_unary_ops_increment(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_unary_op_set( queue, context, kIncrement );
+}
+
+int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_unary_op_set( queue, context, kDecrement );
+}
diff --git a/test_conformance/integer_ops/test_upsample.cpp b/test_conformance/integer_ops/test_upsample.cpp
new file mode 100644
index 00000000..6e823398
--- /dev/null
+++ b/test_conformance/integer_ops/test_upsample.cpp
@@ -0,0 +1,263 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+
+static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
+#define NUM_VECTOR_SIZES 6
+
+const char *permute_2_param_kernel_pattern =
+"__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"\n"
+"}\n";
+
+
+const char *permute_2_param_kernel_pattern_v3srcdst =
+"__kernel void test_upsample(__global %s *sourceA, __global %s *sourceB, __global %s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    vstore3( %s( vload3(tid,sourceA), vload3(tid, sourceB) ), tid, destValues);\n"
+"\n"
+"}\n";
+
+int test_upsample_2_param_fn(cl_command_queue queue, cl_context context, const char *fnName, ExplicitType sourceAType, ExplicitType sourceBType, ExplicitType outType,
+                            size_t sourceAVecSize, size_t sourceBVecSize, size_t outVecSize, size_t count,
+                            void *sourceA, void *sourceB, void *expectedResults )
+{
+    cl_program program;
+    cl_kernel kernel;
+    int error, retCode = 0;
+    cl_mem streams[3];
+    void *outData;
+    size_t threadSize, groupSize, i;
+    unsigned char *expectedPtr, *outPtr;
+    size_t sourceATypeSize, sourceBTypeSize, outTypeSize, outStride;
+    char programSource[ 10240 ], aType[ 64 ], bType[ 64 ], tType[ 64 ];
+    const char *progPtr;
+
+
+    sourceATypeSize = get_explicit_type_size( sourceAType );
+    sourceBTypeSize = get_explicit_type_size( sourceBType );
+    outTypeSize = get_explicit_type_size( outType );
+
+    outStride = outTypeSize * outVecSize;
+    outData = malloc( outStride * count );
+
+    /* Construct the program */
+    strcpy( aType, get_explicit_type_name( sourceAType ) );
+    strcpy( bType, get_explicit_type_name( sourceBType ) );
+    strcpy( tType, get_explicit_type_name( outType ) );
+    if( sourceAVecSize > 1 && sourceAVecSize != 3)
+        sprintf( aType + strlen( aType ), "%d", (int)sourceAVecSize );
+    if( sourceBVecSize > 1  && sourceBVecSize != 3)
+        sprintf( bType + strlen( bType ), "%d", (int)sourceBVecSize );
+    if( outVecSize > 1  && outVecSize != 3)
+        sprintf( tType + strlen( tType ), "%d", (int)outVecSize );
+
+    if(sourceAVecSize == 3 && sourceBVecSize == 3 && outVecSize == 3)
+    {
+        // permute_2_param_kernel_pattern_v3srcdst
+        sprintf( programSource, permute_2_param_kernel_pattern_v3srcdst, aType, bType, tType, fnName );
+    }
+    else if(sourceAVecSize != 3 && sourceBVecSize != 3 && outVecSize != 3)
+    {
+    sprintf( programSource, permute_2_param_kernel_pattern, aType, bType, tType, fnName );
+    } else {
+        vlog_error("Not implemented for %d,%d -> %d\n",
+                   (int)sourceAVecSize, (int)sourceBVecSize, (int)outVecSize);
+        return -1;
+    }
+
+    progPtr = (const char *)programSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, &progPtr, "test_upsample" ) )
+    {
+        free( outData );
+        return -1;
+    }
+
+    /* Set up parameters */
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceATypeSize * sourceAVecSize * count, sourceA, NULL );
+    if (!streams[0])
+    {
+        log_error("ERROR: Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sourceBTypeSize * sourceBVecSize * count, sourceB, NULL );
+    if (!streams[1])
+    {
+        log_error("ERROR: Creating input array B failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outStride * count, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Set the arguments */
+    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2] );
+    test_error( error, "Unable to set kernel arguments" );
+
+    /* Run the kernel */
+    threadSize = count;
+
+    error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &threadSize, &groupSize, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now verify the results. Each value should have been duplicated four times, and we should be able to just
+     do a memcpy instead of relying on the actual type of data */
+    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, outStride * count, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output values!" );
+
+    expectedPtr = (unsigned char *)expectedResults;
+    outPtr = (unsigned char *)outData;
+
+    for( i = 0; i < count; i++ )
+    {
+        if( memcmp( outPtr, expectedPtr, outTypeSize * outVecSize ) != 0 )
+        {
+            log_error( "ERROR: Output value %d does not validate!\n", (int)i );
+            retCode = -1;
+            break;
+        }
+        expectedPtr += outTypeSize * outVecSize;
+        outPtr += outStride;
+    }
+
+    clReleaseMemObject( streams[0] );
+    clReleaseMemObject( streams[1] );
+    clReleaseMemObject( streams[2] );
+    clReleaseKernel( kernel );
+    clReleaseProgram( program );
+    free( outData );
+
+    return retCode;
+}
+
+void * create_upsample_data( ExplicitType type, void *sourceA, void *sourceB, size_t count )
+{
+    void *outData;
+    size_t i, tSize;
+
+    tSize = get_explicit_type_size( type );
+    outData = malloc( tSize * count * 2 );
+
+    switch( tSize )
+    {
+        case 1:
+            {
+                const cl_uchar *aPtr = (const cl_uchar *) sourceA;
+                const cl_uchar *bPtr = (const cl_uchar *) sourceB;
+                cl_ushort       *dPtr = (cl_ushort*) outData;
+                for( i = 0; i < count; i++ )
+                {
+                    cl_ushort u = *bPtr++;
+                    u |= ((cl_ushort) *aPtr++) << 8;
+                    *dPtr++ = u;
+                }
+            }
+            break;
+        case 2:
+            {
+                const cl_ushort *aPtr = (const cl_ushort *) sourceA;
+                const cl_ushort *bPtr = (const cl_ushort *) sourceB;
+                cl_uint       *dPtr = (cl_uint*) outData;
+                for( i = 0; i < count; i++ )
+                {
+                    cl_uint u = *bPtr++;
+                    u |= ((cl_uint) *aPtr++) << 16;
+                    *dPtr++ = u;
+                }
+            }
+            break;
+        case 4:
+            {
+                const cl_uint *aPtr = (const cl_uint *) sourceA;
+                const cl_uint *bPtr = (const cl_uint *) sourceB;
+                cl_ulong       *dPtr = (cl_ulong*) outData;
+                for( i = 0; i < count; i++ )
+                {
+                    cl_ulong u = *bPtr++;
+                    u |= ((cl_ulong) *aPtr++) << 32;
+                    *dPtr++ = u;
+                }
+            }
+            break;
+        default:
+            log_error( "ERROR: unknown type size: %ld\n", tSize );
+            return NULL;
+    }
+
+    return outData;
+}
+
+int test_integer_upsample(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    ExplicitType typesToTest[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kNumExplicitTypes };
+    ExplicitType baseTypes[] = { kUChar, kUChar, kUShort, kUShort, kUInt, kUInt, kNumExplicitTypes };
+    ExplicitType outTypes[] = { kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
+    int i, err = 0;
+    int sizeIndex;
+    size_t size;
+    void *sourceA, *sourceB, *expected;
+    RandomSeed seed(gRandomSeed );
+
+    for( i = 0; typesToTest[ i ] != kNumExplicitTypes; i++ )
+    {
+        if ((outTypes[i] == kLong || outTypes[i] == kULong) && !gHasLong)
+        {
+            log_info( "Longs unsupported on this device. Skipping...\n");
+            continue;
+        }
+
+        for( sizeIndex = 0; sizeIndex < NUM_VECTOR_SIZES; sizeIndex++)
+        {
+            size = (size_t)vector_sizes[sizeIndex];
+            log_info("running upsample test for %s %s vector size %d\n", get_explicit_type_name(typesToTest[i]), get_explicit_type_name(baseTypes[i]), (int)size);
+            sourceA = create_random_data( typesToTest[ i ], seed, 256 );
+            sourceB = create_random_data( baseTypes[ i ], seed, 256 );
+            expected = create_upsample_data( typesToTest[ i ], sourceA, sourceB, 256 );
+
+            if( test_upsample_2_param_fn( queue, context, "upsample",
+                                          typesToTest[ i ], baseTypes[ i ],
+                                          outTypes[ i ],
+                                          size, size, size,
+                                          256 / size,
+                                          sourceA, sourceB, expected ) != 0 )
+            {
+                log_error( "TEST FAILED: %s for %s%d\n", "upsample", get_explicit_type_name( typesToTest[ i ] ), (int)size );
+                err = -1;
+            }
+            free( sourceA );
+            free( sourceB );
+            free( expected );
+        }
+    }
+    return err;
+}
+
+
diff --git a/test_conformance/integer_ops/verification_and_generation_functions.c b/test_conformance/integer_ops/verification_and_generation_functions.c
new file mode 100644
index 00000000..f36a1bf4
--- /dev/null
+++ b/test_conformance/integer_ops/verification_and_generation_functions.c
@@ -0,0 +1,1675 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+
+extern     MTdata          d;
+
+// The tests we are running
+const char *tests[] = {
+    "+",
+    "-",
+    "*",
+    "/",
+    "%",
+    "&",
+    "|",
+    "^",
+    ">>",
+    "<<",
+    ">>",
+    "<<",
+    "~",
+    "?:",
+    "&&",
+    "||",
+    "<",
+    ">",
+    "<=",
+    ">=",
+    "==",
+    "!=",
+    "!",
+};
+
+// The names of the tests
+const char *test_names[] = {
+    "+", // 0
+    "-", // 1
+    "*", // 2
+    "/", // 3
+    "%", // 4
+    "&", // 5
+    "|", // 6
+    "^", // 7
+    ">> by vector", // 8
+    "<< by vector", // 9
+    ">> by scalar", // 10
+    "<< by scalar", // 11
+    "~",  // 12
+    "?:", // 13
+    "&&", // 14
+    "||", // 15
+    "<",  // 16
+    ">",  // 17
+    "<=", // 18
+    ">=", // 19
+    "==", // 20
+    "!=", // 21
+    "!",  // 22
+};
+
+const size_t vector_aligns[] = {0, 1, 2, 4, 4,
+    8, 8, 8, 8,
+    16, 16, 16, 16,
+    16, 16, 16, 16};
+
+// =======================================
+// long
+// =======================================
+int
+verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n)
+{
+    cl_long            r, shift_mask = (sizeof(cl_long)*8)-1;
+    size_t         i, j;
+    int count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_LONG_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_LONG_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_long Verification failed at element %ld of %ld : 0x%llx %s 0x%llx = 0x%llx, got 0x%llx\n", i, n, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Vector shift failure at element %ld: original is 0x%llx %s %d (0x%llx)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %lld (0x%llx).\n", (int)log2(sizeof(cl_long)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+
+                    log_error("cl_long Verification failed at element %ld of %ld (%ld): 0x%llx %s 0x%llx = 0x%llx, got 0x%llx\n", i, n, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%llx %s %d (0x%llx)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %lld (0x%llx).\n", (int)log2(sizeof(cl_long)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld (%ld): (0x%llx < 0x%llx) ? 0x%llx : 0x%llx = 0x%llx, got 0x%llx\n", i, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_long Verification failed at element %ld of %ld: 0x%llx %s 0x%llx = 0x%llx, got 0x%llx\n", i, n, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+
+    if (count) return -1; else return 0;
+}
+
+void
+init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d)
+{
+    cl_ulong        *p = (cl_ulong *)input_ptr[0];
+    int         j;
+
+    if (indx == 0) {
+        // Do the tricky values the first time around
+        fill_test_values( input_ptr[ 0 ], input_ptr[ 1 ], (size_t)num_elements, d );
+    } else {
+        // Then just test lots of random ones.
+        for (j=0; j<num_elements; j++) {
+            cl_uint a = (cl_uint)genrand_int32(d);
+            cl_uint b = (cl_uint)genrand_int32(d);
+            p[j] = ((cl_ulong)a <<32 | b);
+        }
+        p = (cl_ulong *)input_ptr[1];
+        for (j=0; j<num_elements; j++) {
+            cl_uint a = (cl_uint)genrand_int32(d);
+            cl_uint b = (cl_uint)genrand_int32(d);
+            p[j] = ((cl_ulong)a <<32 | b);
+        }
+    }
+}
+
+
+// =======================================
+// ulong
+// =======================================
+int
+verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n)
+{
+    cl_ulong        r, shift_mask = (sizeof(cl_ulong)*8)-1;
+    size_t          i, j;
+    int count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_ulong Verification failed at element %ld of %ld: 0x%llx %s 0x%llx = 0x%llx, got 0x%llx\n", i, n, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Shift failure at element %ld: original is 0x%llx %s %d (0x%llx)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %llu (0x%llx).\n", (int)log2(sizeof(cl_ulong)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+                    log_error("cl_ulong Verification failed at element %ld of %ld (%ld): 0x%llx %s 0x%llx = 0x%llx, got 0x%llx\n", i, n, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%llx %s %d (0x%llx)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %lld (0x%llx).\n", (int)log2(sizeof(cl_long)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld of %ld (%ld): (0x%llx < 0x%llx) ? 0x%llx : 0x%llx = 0x%llx, got 0x%llx\n", i, n, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_ulong Verification failed at element %ld of %ld: 0x%llx %s 0x%llx = 0x%llx, got 0x%llx\n", i, n, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+    if (count) return -1; else return 0;
+}
+
+void
+init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d)
+{
+    cl_ulong        *p = (cl_ulong *)input_ptr[0];
+    int            j;
+
+    if (indx == 0)
+    {
+        // Do the tricky values the first time around
+        fill_test_values( (cl_long*)input_ptr[ 0 ], (cl_long*)input_ptr[ 1 ], (size_t)num_elements, d );
+    }
+    else
+    {
+        // Then just test lots of random ones.
+        for (j=0; j<num_elements; j++)
+        {
+            cl_ulong a = genrand_int32(d);
+            cl_ulong b = genrand_int32(d);
+            // Fill in the top, bottom, and middle, remembering that random only sets 31 bits.
+            p[j] = (a <<32) | b;
+        }
+        p = (cl_ulong *)input_ptr[1];
+        for (j=0; j<num_elements; j++)
+        {
+            cl_ulong a = genrand_int32(d);
+            cl_ulong b = genrand_int32(d);
+            // Fill in the top, bottom, and middle, remembering that random only sets 31 bits.
+            p[j] = (a <<32) | b;
+        }
+    }
+}
+
+
+// =======================================
+// int
+// =======================================
+int
+verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n)
+{
+    cl_int            r, shift_mask = (sizeof(cl_int)*8)-1;
+    size_t          i, j;
+    int count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_INT_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_INT_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_int Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_int)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+                    log_error("cl_int Verification failed at element %ld (%ld): 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_int)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld (%ld): (0x%x < 0x%x) ? 0x%x : 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_int Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+
+    if (count) return -1; else return 0;
+}
+
+void
+init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d)
+{
+    static const cl_int specialCaseList[] = { 0, -1, 1, CL_INT_MIN, CL_INT_MIN + 1, CL_INT_MAX };
+    int            j;
+
+    // Set the inputs to a random number
+    for (j=0; j<num_elements; j++)
+    {
+        ((cl_int *)input_ptr[0])[j] = (cl_int)genrand_int32(d);
+        ((cl_int *)input_ptr[1])[j] = (cl_int)genrand_int32(d);
+    }
+
+    // Init the first few values to test special cases
+    {
+        size_t x, y, index = 0;
+        for( x = 0; x < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); x++ )
+            for( y = 0; y < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); y++ )
+            {
+                ((cl_int *)input_ptr[0])[index] = specialCaseList[x];
+                ((cl_int *)input_ptr[1])[index++] = specialCaseList[y];
+            }
+    }
+}
+
+
+// =======================================
+// uint
+// =======================================
+int
+verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n)
+{
+    cl_uint            r, shift_mask = (sizeof(cl_uint)*8)-1;
+    size_t          i, j;
+    int count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_uint Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_uint)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+                    log_error("cl_uint Verification failed at element %ld (%ld): 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_uint)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld (%ld): (0x%x < 0x%x) ? 0x%x : 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_uint Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+    if (count) return -1; else return 0;
+}
+
+void
+init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d)
+{
+    static cl_uint specialCaseList[] = { 0, (cl_uint) CL_INT_MAX, (cl_uint) CL_INT_MAX + 1, CL_UINT_MAX-1, CL_UINT_MAX };
+    int            j;
+
+    // Set the first input to an incrementing number
+    // Set the second input to a random number
+    for (j=0; j<num_elements; j++)
+    {
+        ((cl_uint *)input_ptr[0])[j] = genrand_int32(d);
+        ((cl_uint *)input_ptr[1])[j] = genrand_int32(d);
+    }
+
+    // Init the first few values to test special cases
+    {
+        size_t x, y, index = 0;
+        for( x = 0; x < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); x++ )
+            for( y = 0; y < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); y++ )
+            {
+                ((cl_uint *)input_ptr[0])[index] = specialCaseList[x];
+                ((cl_uint *)input_ptr[1])[index++] = specialCaseList[y];
+            }
+    }
+}
+
+// =======================================
+// short
+// =======================================
+int
+verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n)
+{
+    cl_short r;
+    cl_int   shift_mask = vector_size == 1 ? (cl_int)(sizeof(cl_int)*8)-1
+    : (cl_int)(sizeof(cl_short)*8)-1;
+    size_t   i, j;
+    int      count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_SHRT_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_SHRT_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_short Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_short)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+                    log_error("cl_short Verification failed at element %ld (%ld): 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_short)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld (%ld): (0x%x < 0x%x) ? 0x%x : 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_short Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+
+    if (count) return -1; else return 0;
+}
+
+void
+init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d)
+{
+    static const cl_short specialCaseList[] = { 0, -1, 1, CL_SHRT_MIN, CL_SHRT_MIN + 1, CL_SHRT_MAX };
+    int            j;
+
+    // Set the inputs to a random number
+    for (j=0; j<num_elements; j++)
+    {
+        cl_uint bits = genrand_int32(d);
+        ((cl_short *)input_ptr[0])[j] = (cl_short) bits;
+        ((cl_short *)input_ptr[1])[j] = (cl_short) (bits >> 16);
+    }
+
+    // Init the first few values to test special cases
+    {
+        size_t x, y, index = 0;
+        for( x = 0; x < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); x++ )
+            for( y = 0; y < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); y++ )
+            {
+                ((cl_short *)input_ptr[0])[index] = specialCaseList[x];
+                ((cl_short *)input_ptr[1])[index++] = specialCaseList[y];
+            }
+    }
+}
+
+
+// =======================================
+// ushort
+// =======================================
+int
+verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n)
+{
+    cl_ushort       r;
+    cl_uint   shift_mask = vector_size == 1 ? (cl_uint)(sizeof(cl_uint)*8)-1
+    : (cl_uint)(sizeof(cl_ushort)*8)-1;
+    size_t          i, j;
+    int             count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_ushort Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_ushort)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+                    log_error("cl_ushort Verification failed at element %ld (%ld): 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_ushort)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld (%ld): (0x%x < 0x%x) ? 0x%x : 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_ushort Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+
+    if (count) return -1; else return 0;
+}
+
+void
+init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d)
+{
+    static const cl_ushort specialCaseList[] = { 0, -1, 1, CL_SHRT_MAX, CL_SHRT_MAX + 1, CL_USHRT_MAX };
+    int            j;
+
+    // Set the inputs to a random number
+    for (j=0; j<num_elements; j++)
+    {
+        cl_uint bits = genrand_int32(d);
+        ((cl_ushort *)input_ptr[0])[j] = (cl_ushort) bits;
+        ((cl_ushort *)input_ptr[1])[j] = (cl_ushort) (bits >> 16);
+    }
+
+    // Init the first few values to test special cases
+    {
+        size_t x, y, index = 0;
+        for( x = 0; x < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); x++ )
+            for( y = 0; y < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); y++ )
+            {
+                ((cl_ushort *)input_ptr[0])[index] = specialCaseList[x];
+                ((cl_ushort *)input_ptr[1])[index++] = specialCaseList[y];
+            }
+    }
+}
+
+
+
+// =======================================
+// char
+// =======================================
+int
+verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n)
+{
+    cl_char   r;
+    cl_int    shift_mask = vector_size == 1 ? (cl_int)(sizeof(cl_int)*8)-1
+    : (cl_int)(sizeof(cl_char)*8)-1;
+    size_t    i, j;
+    int       count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_CHAR_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0 || (inptrB[i] == -1 && inptrA[i] == CL_CHAR_MIN))
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_char Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_char)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+                    log_error("cl_char Verification failed at element %ld (%ld): 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_long)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld (%ld): (0x%x < 0x%x) ? 0x%x : 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_char Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+    if (count) return -1; else return 0;
+}
+
+void
+init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d)
+{
+    static const cl_char specialCaseList[] = { 0, -1, 1, CL_CHAR_MIN, CL_CHAR_MIN + 1, CL_CHAR_MAX };
+    int            j;
+
+    // FIXME comment below might not be appropriate for
+    // vector data.  Yes, checking every scalar char against every
+    // scalar char is only 2^16 ~ 64000 tests, but once we get to vec3,
+    // vec4, vec8...
+
+    // in the meantime, this means I can use [] to access vec3 instead of
+    // vload3 / vstore3 :D
+
+    // FIXME: we really should just check every char against every char here
+    // Set the inputs to a random number
+    for (j=0; j<num_elements; j++)
+    {
+        cl_uint bits = genrand_int32(d);
+        ((cl_char *)input_ptr[0])[j] = (cl_char) bits;
+        ((cl_char *)input_ptr[1])[j] = (cl_char) (bits >> 16);
+    }
+
+    // Init the first few values to test special cases
+    {
+        size_t x, y, index = 0;
+        for( x = 0; x < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); x++ )
+            for( y = 0; y < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); y++ )
+            {
+                ((cl_char *)input_ptr[0])[index] = specialCaseList[x];
+                ((cl_char *)input_ptr[1])[index++] = specialCaseList[y];
+            }
+    }
+}
+
+
+// =======================================
+// uchar
+// =======================================
+int
+verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n)
+{
+    cl_uchar r;
+    cl_uint  shift_mask = vector_size == 1 ? (cl_uint)(sizeof(cl_uint)*8)-1
+    : (cl_uint)(sizeof(cl_uchar)*8)-1;;
+    size_t   i, j;
+    int      count=0;
+
+    for (j=0; j<n; j += vector_size )
+    {
+        for( i = j; i < j + vector_size; i++ )
+        {
+            switch (test) {
+                case 0:
+                    r = inptrA[i] + inptrB[i];
+                    break;
+                case 1:
+                    r = inptrA[i] - inptrB[i];
+                    break;
+                case 2:
+                    r = inptrA[i] * inptrB[i];
+                    break;
+                case 3:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] / inptrB[i];
+                    break;
+                case 4:
+                    if (inptrB[i] == 0)
+                        continue;
+                    else
+                        r = inptrA[i] % inptrB[i];
+                    break;
+                case 5:
+                    r = inptrA[i] & inptrB[i];
+                    break;
+                case 6:
+                    r = inptrA[i] | inptrB[i];
+                    break;
+                case 7:
+                    r = inptrA[i] ^ inptrB[i];
+                    break;
+                case 8:
+                    r = inptrA[i] >> (inptrB[i] & shift_mask);
+                    break;
+                case 9:
+                    r = inptrA[i] << (inptrB[i] & shift_mask);
+                    break;
+                case 10:
+                    r = inptrA[i] >> (inptrB[j] & shift_mask);
+                    break;
+                case 11:
+                    r = inptrA[i] << (inptrB[j] & shift_mask);
+                    break;
+                case 12:
+                    r = ~inptrA[i];
+                    break;
+                case 13:
+                    r = (inptrA[j] < inptrB[j]) ? inptrA[i] : inptrB[i];
+                    break;
+                case 14:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] && inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 15:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] || inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 16:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] < inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 17:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] > inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 18:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] <= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 19:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] >= inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 20:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] == inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 21:
+                    // Scalars are set to 1/0
+                    r = inptrA[i] != inptrB[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                case 22:
+                    // Scalars are set to 1/0
+                    r = !inptrA[i];
+                    // Vectors are set to -1/0
+                    if (vector_size != 1 && r) {
+                        r = -1;
+                    }
+                    break;
+                default:
+                    log_error("Invalid test: %d\n", test);
+                    return -1;
+                    break;
+            }
+            if (r != outptr[i]) {
+                // Shift is tricky
+                if (test == 8 || test == 9) {
+                    log_error("cl_uchar Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                    log_error("\t1) Shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[i], inptrB[i]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_uchar)*8),  inptrB[i]&shift_mask, inptrB[i]&shift_mask);
+                }
+                else if (test == 10 || test == 11) {
+                    log_error("cl_uchar Verification failed at element %ld (%ld): 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[i], tests[test], inptrB[j], r, outptr[i]);
+                    log_error("\t1) Scalar shift failure at element %ld: original is 0x%x %s %d (0x%x)\n", i, inptrA[i], tests[test], (int)inptrB[j], inptrB[j]);
+                    log_error("\t2) Take the %d LSBs of the shift to get the final shift amount %d (0x%x).\n", (int)log2(sizeof(cl_uchar)*8),  inptrB[j]&shift_mask, inptrB[j]&shift_mask);
+                } else if (test == 13) {
+                    log_error("cl_int Verification failed at element %ld (%ld): (0x%x < 0x%x) ? 0x%x : 0x%x = 0x%x, got 0x%x\n", i, j, inptrA[j], inptrB[j],
+                              inptrA[i], inptrB[i], r, outptr[i]);
+                } else {
+                    log_error("cl_uchar Verification failed at element %ld: 0x%x %s 0x%x = 0x%x, got 0x%x\n", i, inptrA[i], tests[test], inptrB[i], r, outptr[i]);
+                }
+                count++;
+                if (count >= MAX_ERRORS_TO_PRINT) {
+                    log_error("Further errors ignored.\n");
+                    return -1;
+                }
+            }
+        }
+    }
+
+    if (count) return -1; else return 0;
+}
+
+void
+init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d)
+{
+    static const cl_uchar specialCaseList[] = { 0, -1, 1, CL_CHAR_MAX, CL_CHAR_MAX + 1, CL_UCHAR_MAX };
+    int            j;
+
+    // FIXME: we really should just check every char against every char here
+
+    // Set the inputs to a random number
+    for (j=0; j<num_elements; j++)
+    {
+        cl_uint bits = genrand_int32(d);
+        ((cl_uchar *)input_ptr[0])[j] = (cl_uchar) bits;
+        ((cl_uchar *)input_ptr[1])[j] = (cl_uchar) (bits >> 16);
+    }
+
+    // Init the first few values to test special cases
+    {
+        size_t x, y, index = 0;
+        for( x = 0; x < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); x++ )
+            for( y = 0; y < sizeof( specialCaseList ) / sizeof( specialCaseList[0] ); y++ )
+            {
+                ((cl_uchar *)input_ptr[0])[index] = specialCaseList[x];
+                ((cl_uchar *)input_ptr[1])[index++] = specialCaseList[y];
+            }
+    }
+}
+
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
new file mode 100644
index 00000000..176b3f66
--- /dev/null
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -0,0 +1,86 @@
+set(MODULE_NAME BRUTEFORCE)
+
+set(${MODULE_NAME}_SOURCES
+    FunctionList.c
+    Sleep.c
+    binary.c
+    binaryOperator.c
+    Utility.c
+    binary_i.c
+    binary_two_results_i.c
+    i_unary.c
+    macro_binary.c
+    macro_unary.c
+    mad.c
+    main.c
+    reference_math.c
+    ternary.c
+    unary.c
+    unary_two_results.c
+    unary_two_results_i.c
+    unary_u.c
+    ../../test_common/harness/rounding_mode.c
+    ../../test_common/harness/ThreadPool.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/errorHelpers.c
+)
+
+
+if (NOT ANDROID)
+set_source_files_properties(
+    FunctionList.c
+    Sleep.c
+    binary.c
+    binaryOperator.c
+    Utility.c
+    binary_i.c
+    binary_two_results_i.c
+    i_unary.c
+    macro_binary.c
+    macro_unary.c
+    mad.c
+    main.c     reference_math.c
+    ternary.c     unary.c     unary_two_results.c
+    unary_two_results_i.c unary_u.c
+    COMPILE_FLAGS -msse2    )
+endif(NOT ANDROID)
+
+set_source_files_properties(
+    FunctionList.c
+    Sleep.c
+    binary.c
+    binaryOperator.c
+    Utility.c
+    binary_i.c
+    binary_two_results_i.c
+    i_unary.c
+    macro_binary.c
+    macro_unary.c
+    mad.c
+    main.c
+    reference_math.c
+    ternary.c
+    unary.c
+    unary_two_results.c
+    unary_two_results_i.c unary_u.c
+    ../../test_common/harness/rounding_mode.c
+    ../../test_common/harness/ThreadPool.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+    PROPERTIES LANGUAGE CXX)
+
+if (NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
+set_source_files_properties(
+${MODULE_NAME}_SOURCES
+    COMPILE_FLAGS -march=i686)
+endif (NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
+
+if(CMAKE_COMPILER_IS_GNUCC)
+set_source_files_properties(
+        COMPILE_FLAGS -O0)
+endif(CMAKE_COMPILER_IS_GNUCC)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/FunctionList.c b/test_conformance/math_brute_force/FunctionList.c
new file mode 100644
index 00000000..027672fb
--- /dev/null
+++ b/test_conformance/math_brute_force/FunctionList.c
@@ -0,0 +1,180 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "FunctionList.h"
+#include "reference_math.h"
+
+#define FTZ_ON  1
+#define FTZ_OFF 0
+#define EXACT    0.0f
+#define RELAXED_ON 1
+#define RELAXED_OFF 0
+
+#define STRINGIFY( _s)                  #_s
+
+#define ENTRY(      _name, _ulp, _embedded_ulp, _rmode, _type )                 { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##_name},           _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+#define ENTRY_EXT(  _name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type )   { STRINGIFY(_name), STRINGIFY(_name),                 {(void*)reference_##_name}, {(void*)reference_##_name##l}, {(void*)reference_##relaxed_##_name}, _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, RELAXED_ON,  _type }
+#define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type )                 { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {(void*)reference_##_name}, {NULL}, {NULL},                   _ulp, _ulp, _embedded_ulp, INFINITY, _rmode, RELAXED_OFF, _type }
+#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)    { STRINGIFY(_name), _operator,                        {(void*)reference_##_name}, {(void*)reference_##_name##l}, {NULL},                               _ulp, _ulp, _embedded_ulp, INFINITY,     _rmode, RELAXED_OFF, _type }
+
+#if defined( __cplusplus )
+    extern "C" {
+#endif
+extern const vtbl _unary;               // float foo( float )
+extern const vtbl _unary_u;             // float foo( uint ),  double foo( ulong )
+extern const vtbl _i_unary;             // int foo( float )
+extern const vtbl _macro_unary;         // int foo( float ),  returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _binary;              // float foo( float, float )
+extern const vtbl _binary_nextafter;    // float foo( float, float ), special handling for nextafter
+extern const vtbl _binary_operator;     // float .op. float
+extern const vtbl _macro_binary;        // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
+extern const vtbl _binary_i;            // float foo( float, int )
+extern const vtbl _ternary;             // float foo( float, float, float )
+extern const vtbl _unary_two_results;   // float foo( float, float * )
+extern const vtbl _unary_two_results_i; // float foo( float, int * )
+extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
+extern const vtbl _mad_tbl;             // float mad( float, float, float )
+#if defined( __cplusplus)
+    }
+#endif
+
+#define unaryF &_unary
+#define i_unaryF &_i_unary
+#define unaryF_u  &_unary_u
+#define macro_unaryF &_macro_unary
+#define binaryF &_binary
+#define binaryF_nextafter &_binary_nextafter
+#define binaryOperatorF &_binary_operator
+#define binaryF_i &_binary_i
+#define macro_binaryF &_macro_binary
+#define ternaryF &_ternary
+#define unaryF_two_results  &_unary_two_results
+#define unaryF_two_results_i  &_unary_two_results_i
+#define binaryF_two_results_i  &_binary_two_results_i
+#define mad_function        &_mad_tbl
+
+
+const Func  functionList[] = {
+                                    ENTRY( acos,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( acosh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( acospi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asin,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asinh,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( asinpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atan,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atanh,                 5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atanpi,                5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( atan2,                 6.0f,         6.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( atan2pi,               6.0f,         6.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( cbrt,                  2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( ceil,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( copysign,              0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY_EXT( cos,               4.0f,         4.0f,        0.00048828125f,        FTZ_OFF,     unaryF), //relaxed ulp 2^-11
+                                    ENTRY( cosh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( cospi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+//                                  ENTRY( erfc,                  16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
+//                                  ENTRY( erf,                   16.0f,         16.0f,         FTZ_OFF,     unaryF), //disabled for 1.0 due to lack of reference implementation
+                                    ENTRY_EXT( exp,               3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
+                                    ENTRY_EXT( exp2,              3.0f,         4.0f,       3.0f,       FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x))
+                                    ENTRY_EXT( exp10,             3.0f,         4.0f,       8192.0f,    FTZ_OFF,    unaryF), //relaxed error is actually overwritten in unary.c as it is 3+floor(fabs(2*x)) in derived mode,
+                                    // in non-derived mode it uses the ulp error for half_exp10.
+                                    ENTRY( expm1,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fabs,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fdim,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( floor,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( fma,                   0.0f,         0.0f,         FTZ_OFF,     ternaryF),
+                                    ENTRY( fmax,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( fmin,                  0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( fmod,                  0.0f,         0.0f,         FTZ_OFF,     binaryF ),
+                                    ENTRY( fract,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results),
+                                    ENTRY( frexp,                 0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results_i),
+                                    ENTRY( hypot,                 4.0f,         4.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( ilogb,                 0.0f,         0.0f,         FTZ_OFF,     i_unaryF),
+                                    ENTRY( isequal,               0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isfinite,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isgreater,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isgreaterequal,        0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isinf,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isless,                0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( islessequal,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( islessgreater,         0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isnan,                 0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isnormal,              0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY( isnotequal,            0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isordered,             0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( isunordered,           0.0f,         0.0f,         FTZ_OFF,     macro_binaryF),
+                                    ENTRY( ldexp,                 0.0f,         0.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( lgamma,            INFINITY,     INFINITY,         FTZ_OFF,     unaryF),
+                                    ENTRY( lgamma_r,          INFINITY,     INFINITY,         FTZ_OFF,     unaryF_two_results_i),
+                                    ENTRY_EXT( log,               3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
+                                    ENTRY_EXT( log2,              3.0f,         4.0f,       4.76837158203125e-7f,   FTZ_OFF,    unaryF), //relaxed ulp 2^-21
+                                    ENTRY( log10,                 3.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( log1p,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( logb,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY_EXT( mad,           INFINITY,     INFINITY,        INFINITY,    FTZ_OFF,    mad_function), //in fast-relaxed-math mode it has to be either exactly rounded fma or exactly rounded a*b+c
+                                    ENTRY( maxmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
+                                    ENTRY( minmag,                0.0f,         0.0f,         FTZ_OFF,    binaryF ),
+                                    ENTRY( modf,                  0.0f,         0.0f,         FTZ_OFF,     unaryF_two_results ),
+                                    ENTRY( nan,                   0.0f,         0.0f,         FTZ_OFF,     unaryF_u),
+                                    ENTRY( nextafter,             0.0f,         0.0f,         FTZ_OFF,     binaryF_nextafter),
+                                    ENTRY_EXT( pow,              16.0f,        16.0f,         8192.0f,     FTZ_OFF,    binaryF), //in derived mode the ulp error is calculated as exp2(y*log2(x)) and in non-derived it is the same as half_pow
+                                    ENTRY( pown,                 16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( powr,                 16.0f,        16.0f,         FTZ_OFF,     binaryF),
+//                                  ENTRY( reciprocal,            1.0f,         1.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( remainder,             0.0f,         0.0f,         FTZ_OFF,     binaryF),
+                                    ENTRY( remquo,                0.0f,         0.0f,         FTZ_OFF,     binaryF_two_results_i),
+                                    ENTRY( rint,                  0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( rootn,                16.0f,        16.0f,         FTZ_OFF,     binaryF_i),
+                                    ENTRY( round,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( rsqrt,                 2.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( signbit,               0.0f,         0.0f,         FTZ_OFF,     macro_unaryF),
+                                    ENTRY_EXT( sin,               4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF), //relaxed ulp 2^-11
+                                    ENTRY_EXT( sincos,            4.0f,         4.0f,  0.00048828125f,     FTZ_OFF,    unaryF_two_results), //relaxed ulp 2^-11
+                                    ENTRY( sinh,                  4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( sinpi,                 4.0f,         4.0f,         FTZ_OFF,     unaryF),
+                                    { "sqrt", "sqrt",     {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 3.0f, 0.0f,    4.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
+                                    { "sqrt_cr", "sqrt",  {(void*)reference_sqrt}, {(void*)reference_sqrtl}, {NULL}, 0.0f, 0.0f,    0.0f, INFINITY, FTZ_OFF, RELAXED_OFF, unaryF },
+                                    ENTRY_EXT( tan,               5.0f,         5.0f,         8192.0f,    FTZ_OFF,     unaryF), //in derived mode it the ulp error is calculated as sin/cos and in non-derived mode it is the same as half_tan.
+                                    ENTRY( tanh,                  5.0f,         5.0f,         FTZ_OFF,     unaryF),
+                                    ENTRY( tanpi,                 6.0f,         6.0f,         FTZ_OFF,     unaryF),
+//                                    ENTRY( tgamma,                 16.0f,         16.0f,         FTZ_OFF,     unaryF), // Commented this out until we can be sure this requirement is realistic
+                                    ENTRY( trunc,                 0.0f,         0.0f,         FTZ_OFF,     unaryF),
+
+                                    HALF_ENTRY( cos,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( divide,        8192.0f,      8192.0f,          FTZ_ON,     binaryF),
+                                    HALF_ENTRY( exp,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( exp2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( exp10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log2,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( log10,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( powr,          8192.0f,      8192.0f,          FTZ_ON,     binaryF),
+                                    HALF_ENTRY( recip,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( rsqrt,         8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( sin,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( sqrt,          8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+                                    HALF_ENTRY( tan,           8192.0f,      8192.0f,          FTZ_ON,     unaryF),
+
+                                    // basic operations
+                                    OPERATOR_ENTRY( add, "+",         0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    OPERATOR_ENTRY( subtract, "-",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    { "divide", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 2.5f, 0.0f,         3.0f, 2.5f, FTZ_OFF, RELAXED_ON, binaryOperatorF },
+                                    { "divide_cr", "/",  {(void*)reference_divide}, {(void*)reference_dividel}, {(void*)reference_relaxed_divide}, 0.0f, 0.0f,         0.0f, 0.f, FTZ_OFF, RELAXED_OFF, binaryOperatorF },
+                                    OPERATOR_ENTRY( multiply, "*",     0.0f,         0.0f,     FTZ_OFF,     binaryOperatorF),
+                                    OPERATOR_ENTRY( assignment, "", 0.0f,       0.0f,     FTZ_OFF,     unaryF),        // A simple copy operation
+                                    OPERATOR_ENTRY( not, "!",       0.0f,       0.0f,   FTZ_OFF,    macro_unaryF),
+                                };
+
+const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] );
diff --git a/test_conformance/math_brute_force/FunctionList.h b/test_conformance/math_brute_force/FunctionList.h
new file mode 100644
index 00000000..346654fb
--- /dev/null
+++ b/test_conformance/math_brute_force/FunctionList.h
@@ -0,0 +1,99 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef FUNCTIONLIST_H
+#define FUNCTIONLIST_H
+
+#include "../../test_common/harness/compat.h"
+
+#ifndef WIN32
+#include <unistd.h>
+#endif
+
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/cl.h>
+#endif
+
+#include "../../test_common/harness/mt19937.h"
+
+typedef union fptr
+{
+    void    *p;
+    double  (*f_f)(double);
+    double  (*f_u)(cl_uint);
+    int     (*i_f)(double);
+    int     (*i_f_f)(float);
+    float   (*f_ff_f)(float, float);
+    double  (*f_ff)(double, double);
+    int     (*i_ff)(double, double);
+    double  (*f_fi)(double, int);
+    double  (*f_fpf)(double, double*);
+    double  (*f_fpI)(double, int*);
+    double  (*f_ffpI)(double, double, int*);
+    double  (*f_fff)(double, double, double );
+    float   (*f_fma)(float, float, float, int);
+}fptr;
+
+typedef union dptr
+{
+    void            *p;
+    long double     (*f_f)(long double);
+    long double     (*f_u)(cl_ulong);
+    int             (*i_f)(long double);
+    long double     (*f_ff)(long double, long double);
+    int             (*i_ff)(long double, long double);
+    long double     (*f_fi)(long double, int);
+    long double     (*f_fpf)(long double, long double*);
+    long double     (*f_fpI)(long double, int*);
+    long double     (*f_ffpI)(long double, long double, int*);
+    long double     (*f_fff)(long double, long double, long double);
+}dptr;
+
+struct Func;
+
+typedef struct vtbl
+{
+    const char  *type_name;
+    int         (*TestFunc)( const struct Func *, MTdata );
+    int         (*DoubleTestFunc)( const struct Func *, MTdata);        // may be NULL if function is single precision only
+}vtbl;
+
+typedef struct Func
+{
+  const char      *name;              // common name, to be used as an argument in the shell
+  const char      *nameInCode;        // name as it appears in the __kernel, usually the same as name, but different for multiplication
+  fptr            func;
+  dptr            dfunc;
+  fptr            rfunc;
+  float           float_ulps;
+  float           double_ulps;
+  float           float_embedded_ulps;
+  float           relaxed_error;
+  int             ftz;
+  int             relaxed;
+  const ::vtbl    *vtbl;
+}Func;
+
+
+extern const Func  functionList[];
+
+extern const size_t functionListCount;
+
+
+#endif
+
+
diff --git a/test_conformance/math_brute_force/Jamfile b/test_conformance/math_brute_force/Jamfile
new file mode 100644
index 00000000..39ee8bc8
--- /dev/null
+++ b/test_conformance/math_brute_force/Jamfile
@@ -0,0 +1,36 @@
+project
+    : requirements
+      -<library>/harness//harness <use>/harness//harness
+      <library>/Runtime//OpenCL.lib
+#      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe bruteforce
+    : binary.c
+      binary_i.c
+      binaryOperator.c
+      binary_two_results_i.c
+      FunctionList.c
+      i_unary.c
+      macro_binary.c
+      macro_unary.c
+      mad.c
+      main.c
+      reference_math.c
+      Sleep.c
+      ternary.c
+      unary.c
+      unary_two_results.c
+      unary_two_results_i.c
+      unary_u.c
+      Utility.c
+      /harness//mt19937.c
+    : <target-os>windows:<source>/harness//msvc9.c
+    ;
+
+install dist
+    : bruteforce
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/math_brute_force
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/math_brute_force
+    ;
diff --git a/test_conformance/math_brute_force/Makefile b/test_conformance/math_brute_force/Makefile
new file mode 100644
index 00000000..3c69e3e2
--- /dev/null
+++ b/test_conformance/math_brute_force/Makefile
@@ -0,0 +1,33 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+CC = cc
+CFLAGS = -g -Wall -Wshorten-64-to-32 $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
+LIBRARIES = -framework OpenCL -framework ApplicationServices -framework IOKit -I/System/Library/Frameworks/OpenCL.framework/Headers ${RC_CFLAGS} ${ATF}
+
+release:
+	echo "Build Release"
+	$(CC) *.c ../../test_common/harness/mt19937.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/ThreadPool.c -Os $(CFLAGS) -o bruteforce $(LIBRARIES)
+
+debug:
+	echo "Build Debug"
+	$(CC) *.c ../../test_common/harness/mt19937.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/ThreadPool.c -O0 $(CFLAGS) -D_DEBUG=1 -o bruteforce_debug  $(LIBRARIES)
+
+test:	release
+	arch -i386 ./bruteforce -c > cpu.log	&
+	arch -i386 ./bruteforce -g > gpu.log	&
+	echo "Testing 32-bit mode in progress. This may take up to 1 day to complete. See cpu.log and gpu.log for results."
+
+test64:	release
+	arch -x86_64 ./bruteforce -c > cpu64.log	&
+	arch -x86_64 ./bruteforce -g > gpu64.log	&
+	echo "Testing 64-bit mode in progress. This may take up to 1 day to complete. See cpu64.log and gpu64.log for results."
+
+
+clean:
+	rm -f ./bruteforce_debug
+	rm -f ./bruteforce
+
+all:	release 
diff --git a/test_conformance/math_brute_force/README.txt b/test_conformance/math_brute_force/README.txt
new file mode 100644
index 00000000..5b289868
--- /dev/null
+++ b/test_conformance/math_brute_force/README.txt
@@ -0,0 +1,150 @@
+Copyright:	(c) 2009-2013 by Apple Inc. All Rights Reserved.
+
+math_brute_force test                                                   Feb 24, 2009
+=====================
+
+Usage:
+
+        Please run the executable with --help for usage information.
+	
+
+
+System Requirements:
+
+        This test requires support for correctly rounded single and double precision arithmetic.
+The current version also requires a reasonably accurate operating system math library to 
+be present. The OpenCL implementation must be able to compile kernels online. The test assumes
+that the host system stores its floating point data according to the IEEE-754 binary single and 
+double precision floating point formats. 
+
+
+Test Completion Time:
+
+        This test takes a while. Modern desktop systems can usually finish it in 1-3
+days. Engineers doing OpenCL math library software development may find wimpy mode (-w)
+a useful screen to quickly look for problems in a new implementation, before committing
+to a lengthy test run. Likewise, it is possible to run just a range of tests, or specific
+tests. See Usage above.
+
+
+Test Design:
+
+        This test is designed to do a somewhat exhaustive examination of the single
+and double precision math library functions in OpenCL, for all vector lengths. Math 
+library functions are compared against results from a higher precision reference 
+function to determine correctness. All possible inputs are  examined for unary 
+single precision functions.  Other functions are tested against a table of difficult 
+values, followed by a few billion random values. If an error is found in a function,
+the test for that function terminates early, reports an error, and moves on to the 
+next test, if any.
+
+The test currently doesn't support half precision math functions covered in section 
+9 of the OpenCL 1.0 specification, but does cover the half_func functions covered in 
+section six. It also doesn't test the native_<funcname> functions, for which any result 
+is conformant.  
+
+For the OpenCL 1.0 time frame, the reference library shall be the operating system 
+math library, as modified by the test itself to conform to the OpenCL specification. 
+That will help ensure that all devices on a particular operating system are returning 
+similar results.  Going forward to future OpenCL releases, it is planned to gradually 
+introduce a reference math library directly into the test, so as to reduce inter-
+platform variance between OpenCL implementations. 
+
+Generally speaking, this test will consider a result correct if it is one of the following:
+
+        1) bitwise identical to the output of the reference function, 
+                rounded to the appropriate precision
+
+        2) within the allowed ulp error tolerance of the infinitely precise
+                result (as estimated by the reference function)
+
+        3) If the reference result is a NaN, then any NaN is deemed correct.
+
+        4) if the devices is running in FTZ mode, then the result is also correct
+                if the infinitely precise result (as estimated by the reference
+                function) is subnormal, and the returned result is a zero
+        
+        5) if the devices is running in FTZ mode, then we also calculate the 
+                estimate of the infinitely precise result with the reference function 
+                with subnormal inputs flushed to +- zero.  If any of those results 
+                are within the error tolerance of the returned result, then it is 
+                deemed correct
+
+        6) half_func functions may flush per 4&5 above, even if the device is not
+                in FTZ mode.
+
+        7) Functions are allowed to prematurely overflow to infinity, so long as 
+                the estimated infinitely precise result is within the stated ulp 
+                error limit of the maximum finite representable value of appropriate 
+                sign
+
+        8) Functions are allowed to prematurely underflow (and if in FTZ mode, 
+                have behavior covered by 4&5 above), so long as the estimated
+                infinitely precise result is within the stated ulp error limit
+                of the minimum normal representable value of appropriate sign
+
+        9) Some functions have limited range. Results of inputs outside that range
+                are considered correct, so long as a result is returned.
+
+        10) Some functions have infinite error bounds. Results of these function
+                are considered correct, so long as a result is returned.
+
+        11) The test currently does not discriminate based on the sign of zero
+                We anticipate a later test will.
+
+        12) The test currently does not check to make sure that edge cases called 
+                out in the standard (e.g. pow(1.0, any) = 1.0) are exactly correct.
+                We anticipate a later test will.
+
+        13) The test doesn't check IEEE flags or exceptions. See section 7.3 of the 
+                OpenCL standard.
+
+
+
+Performance Measurement:
+
+        There is also some optional timing code available, currently turned off by default. 
+These may be useful for tracking internal performance regressions, but is not required to 
+be part of the conformance submission.
+
+
+If the test is believed to be in error:
+
+The above correctness heuristics shall not be construed to be an alternative to the correctness 
+criteria established by the OpenCL standard. An implementation shall be judged correct
+or not on appeal based on whether it is within prescribed error bounds of the infinitely 
+precise result. (The ulp is defined in section 7.4 of the spec.) If the input value corresponds
+to an edge case listed in OpenCL specification sections covering edge case behavior, or 
+similar sections in the C99 TC2 standard (section F.9 and G.6), the the function shall return
+exactly that result, and the sign of a zero result shall be correct. In the event that the test 
+is found to be faulty, resulting in a spurious failure result, the committee shall make a reasonable 
+attempt to fix the test. If no practical and timely remedy can be found, then the implementation 
+shall be granted a waiver.
+
+
+Guidelines for reference function error tolerances:
+
+        Errors are measured in ulps, and stored in a single precision representation. So as
+to avoid introducing error into the error measurement due to error in the reference function
+itself, the reference function should attempt to deliver 24 bits more precision than the test 
+function return type. (All functions are currently either required to be correctly rounded or 
+may have >= 1 ulp of error. This places the 1's bit at the LSB of the result, with 23 bits of 
+sub-ulp accuracy. One more bit is required to avoid accrual of extra error due to round-to-
+nearest behavior. If we start to require sub-ulp precision, then the accuracy requirements 
+for reference functions increase.) Therefore reference functions for single precision should 
+have 24+24=48 bits of accuracy, and reference functions for double precision should ideally 
+have 53+24 = 77 bits of accuracy. 
+
+A double precision system math library function should be sufficient to safely verify a single 
+precision OpenCL math library function.  A long double precision math library function may or 
+may not be sufficient to verify a double precision OpenCL math library function, depending on 
+the precision of the long double type. A later version of these tests is expected to replace 
+long double with a head+tail double double representation that can represent sufficient precision,
+on all platforms that support double. 
+
+
+Revision history:
+
+ Feb 24, 2009                IRO        Created README
+                                        Added some reference functions so the test will run on Windows.
+
diff --git a/test_conformance/math_brute_force/Sleep.c b/test_conformance/math_brute_force/Sleep.c
new file mode 100644
index 00000000..4d3b2c64
--- /dev/null
+++ b/test_conformance/math_brute_force/Sleep.c
@@ -0,0 +1,118 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Sleep.h"
+#include "Utility.h"
+
+#if defined( __APPLE__ )
+    #include <IOKit/pwr_mgt/IOPMLib.h>
+    #include <IOKit/IOMessage.h>
+
+    struct
+    {
+        io_connect_t            connection;
+        IONotificationPortRef    port;
+        io_object_t                iterator;
+    }sleepInfo;
+
+    void sleepCallback(    void *            refcon,
+                        io_service_t        service,
+                        natural_t        messageType,
+                        void *            messageArgument );
+
+    void sleepCallback(    void *            refcon UNUSED,
+                        io_service_t        service UNUSED,
+                        natural_t        messageType,
+                        void *            messageArgument )
+    {
+
+        IOReturn result;
+    /*
+    service -- The IOService whose state has changed.
+    messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
+    messageArgument -- An argument for the message, dependent on the messageType.
+    */
+        switch ( messageType )
+        {
+            case kIOMessageSystemWillSleep:
+                // Handle demand sleep (such as sleep caused by running out of
+                // batteries, closing the lid of a laptop, or selecting
+                // sleep from the Apple menu.
+                IOAllowPowerChange(sleepInfo.connection,(long)messageArgument);
+                vlog( "Hard sleep occurred.\n" );
+                break;
+            case kIOMessageCanSystemSleep:
+                // In this case, the computer has been idle for several minutes
+                // and will sleep soon so you must either allow or cancel
+                // this notification. Important: if you don’t respond, there will
+                // be a 30-second timeout before the computer sleeps.
+                // IOCancelPowerChange(root_port,(long)messageArgument);
+                result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
+                if( kIOReturnSuccess != result )
+                    vlog( "sleep prevention failed. (%d)\n", result);
+            break;
+            case kIOMessageSystemHasPoweredOn:
+                // Handle wakeup.
+                break;
+        }
+    }
+#endif
+
+
+
+
+
+void PreventSleep( void )
+{
+#if defined( __APPLE__ )
+    vlog( "Disabling sleep... " );
+    sleepInfo.iterator = (io_object_t) 0;
+    sleepInfo.port = NULL;
+    sleepInfo.connection = IORegisterForSystemPower
+                            (
+                                &sleepInfo,                    //void * refcon,
+                                &sleepInfo.port,            //IONotificationPortRef * thePortRef,
+                                sleepCallback,                //IOServiceInterestCallback callback,
+                                &sleepInfo.iterator            //io_object_t * notifier
+                            );
+
+    if( (io_connect_t) 0 == sleepInfo.connection )
+        vlog( "failed.\n" );
+    else
+        vlog( "done.\n" );
+
+    CFRunLoopAddSource(CFRunLoopGetCurrent(),
+                        IONotificationPortGetRunLoopSource(sleepInfo.port),
+                        kCFRunLoopDefaultMode);
+#else
+    vlog( "*** PreventSleep() is not implemented on this platform.\n" );
+#endif
+}
+
+void ResumeSleep( void )
+{
+#if defined( __APPLE__ )
+    IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator );
+    if( 0 != result )
+        vlog( "Got error %d restoring sleep \n", result );
+    else
+        vlog( "Sleep restored.\n" );
+#else
+    vlog( "*** ResumeSleep() is not implemented on this platform.\n" );
+#endif
+}
+
+
+
diff --git a/test_conformance/math_brute_force/Sleep.h b/test_conformance/math_brute_force/Sleep.h
new file mode 100644
index 00000000..f983a32f
--- /dev/null
+++ b/test_conformance/math_brute_force/Sleep.h
@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef SLEEP_H
+#define SLEEP_H
+
+void PreventSleep( void );
+void ResumeSleep( void );
+
+#endif /* SLEEP_H */
+
+
diff --git a/test_conformance/math_brute_force/Utility.c b/test_conformance/math_brute_force/Utility.c
new file mode 100644
index 00000000..3c706fab
--- /dev/null
+++ b/test_conformance/math_brute_force/Utility.c
@@ -0,0 +1,169 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#if defined(__PPC__)
+// Global varaiable used to hold the FPU control register state. The FPSCR register can not
+// be used because not all Power implementations retain or observed the NI (non-IEEE
+// mode) bit.
+__thread fpu_control_t fpu_control = 0;
+#endif
+
+void MulD(double *rhi, double *rlo, double u, double v)
+{
+    const double c = 134217729.0; // 1+2^27
+    double up, u1, u2, vp, v1, v2;
+
+    up = u*c;
+    u1 = (u - up) + up;
+    u2 = u - u1;
+
+    vp = v*c;
+    v1 = (v - vp) + vp;
+    v2 = v - v1;
+
+    double rh = u*v;
+    double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2);
+
+    *rhi = rh;
+    *rlo = rl;
+}
+
+void AddD(double *rhi, double *rlo, double a, double b)
+{
+    double zhi, zlo;
+    zhi = a + b;
+    if(fabs(a) > fabs(b)) {
+        zlo = zhi - a;
+        zlo = b - zlo;
+    }
+    else {
+        zlo = zhi - b;
+        zlo = a - zlo;
+    }
+
+    *rhi = zhi;
+    *rlo = zlo;
+}
+
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
+{
+    double mh, ml;
+    double c = 134217729.0;
+    double up, u1, u2, vp, v1, v2;
+
+    up = xh*c;
+    u1 = (xh - up) + up;
+    u2 = xh - u1;
+
+    vp = yh*c;
+    v1 = (yh - vp) + vp;
+    v2 = yh - v1;
+
+    mh = xh*yh;
+    ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2);
+    ml += xh*yl + xl*yh;
+
+    *rhi = mh + ml;
+    *rlo = (mh - (*rhi)) + ml;
+}
+
+void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
+{
+    double r, s;
+    r = xh + yh;
+    s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
+    *rhi = r + s;
+    *rlo = (r - (*rhi)) + s;
+}
+
+void DivideDD(double *chi, double *clo, double a, double b)
+{
+    *chi = a / b;
+    double rhi, rlo;
+    MulD(&rhi, &rlo, *chi, b);
+    AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
+    *clo = rhi / b;
+}
+
+// These functions comapre two floats/doubles. Since some platforms may choose to
+// flush denormals to zeros before comparison, comparison like a < b may give wrong
+// result in "certain cases" where we do need correct compasion result when operands
+// are denormals .... these functions comapre floats/doubles using signed integer/long int
+// rep. In other cases, when flushing to zeros is fine, these should not be used.
+// Also these doesn't check for nans and assume nans are handled separately as special edge case
+// by the caller which calls these functions
+// return 0 if both are equal, 1 if x > y and -1 if x < y.
+
+inline
+int compareFloats(float x, float y)
+{
+    int32f_t a, b;
+
+    a.f = x;
+    b.f = y;
+
+    if( a.i & 0x80000000 )
+        a.i = 0x80000000 - a.i;
+    if( b.i & 0x80000000 )
+        b.i = 0x80000000 - b.i;
+
+    if( a.i == b.i )
+        return 0;
+
+    return a.i < b.i ? -1 : 1;
+}
+
+inline
+int compareDoubles(double x, double y)
+{
+    int64d_t a, b;
+
+    a.d = x;
+    b.d = y;
+
+    if( a.l & 0x8000000000000000LL )
+        a.l = 0x8000000000000000LL - a.l;
+    if( b.l & 0x8000000000000000LL )
+        b.l = 0x8000000000000000LL - b.l;
+
+    if( a.l == b.l )
+        return 0;
+
+    return a.l < b.l ? -1 : 1;
+}
+
+void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed)
+{
+    char const *fpSizeStr = NULL;
+    char const *fpFastRelaxedStr = "";
+    switch (float_size) {
+    case sizeof(cl_double):
+        fpSizeStr = "fp64";
+        break;
+    case sizeof(cl_float):
+        fpSizeStr = "fp32";
+        break;
+    case sizeof(cl_half):
+        fpSizeStr = "fp16";
+        break;
+    }
+    if (isFastRelaxed) {
+        fpFastRelaxedStr = "rlx";
+    }
+    vlog("%15s %4s %4s",fname, fpSizeStr, fpFastRelaxedStr);
+}
+
diff --git a/test_conformance/math_brute_force/Utility.h b/test_conformance/math_brute_force/Utility.h
new file mode 100644
index 00000000..13651037
--- /dev/null
+++ b/test_conformance/math_brute_force/Utility.h
@@ -0,0 +1,280 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef UTILITY_H
+#define UTILITY_H
+
+#include "../../test_common/harness/compat.h"
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+#include <stdio.h>
+#include "../../test_common/harness/rounding_mode.h"
+#include "../../test_common/harness/fpcontrol.h"
+
+#if defined( _WIN32) && defined (_MSC_VER)
+#include "../../test_common/harness/testHarness.h"
+#endif
+
+#include "../../test_common/harness/ThreadPool.h"
+#define BUFFER_SIZE         (1024*1024*2)
+
+#if defined( __GNUC__ )
+    #define UNUSED  __attribute__ ((unused))
+#else
+    #define UNUSED
+#endif
+
+extern int gWimpyBufferSize;
+extern int gWimpyReductionFactor;
+
+#define VECTOR_SIZE_COUNT   6
+extern const char *sizeNames[VECTOR_SIZE_COUNT];
+extern const int   sizeValues[VECTOR_SIZE_COUNT];
+
+extern cl_device_type   gDeviceType;
+extern cl_device_id     gDevice;
+extern cl_context       gContext;
+extern cl_command_queue gQueue;
+extern void             *gIn;
+extern void             *gIn2;
+extern void             *gIn3;
+extern void             *gOut_Ref;
+extern void             *gOut_Ref2;
+extern void             *gOut[VECTOR_SIZE_COUNT];
+extern void             *gOut2[VECTOR_SIZE_COUNT];
+extern cl_mem           gInBuffer;
+extern cl_mem           gInBuffer2;
+extern cl_mem           gInBuffer3;
+extern cl_mem           gOutBuffer[VECTOR_SIZE_COUNT];
+extern cl_mem           gOutBuffer2[VECTOR_SIZE_COUNT];
+extern uint32_t         gComputeDevices;
+extern uint32_t         gSimdSize;
+extern int              gSkipCorrectnessTesting;
+extern int              gMeasureTimes;
+extern int              gReportAverageTimes;
+extern int              gForceFTZ;
+extern volatile int     gTestFastRelaxed;
+extern int              gFastRelaxedDerived;
+extern int              gWimpyMode;
+extern int              gHasDouble;
+extern int              gIsInRTZMode;
+extern int              gInfNanSupport;
+extern int              gIsEmbedded;
+extern int              gVerboseBruteForce;
+extern uint32_t         gMaxVectorSizeIndex;
+extern uint32_t         gMinVectorSizeIndex;
+extern uint32_t         gDeviceFrequency;
+extern cl_device_fp_config gFloatCapabilities;
+extern cl_device_fp_config gDoubleCapabilities;
+
+#define LOWER_IS_BETTER     0
+#define HIGHER_IS_BETTER    1
+
+#if USE_ATF
+
+    #include <ATF/ATF.h>
+    #define test_start()        ATFTestStart()
+    #define test_finish()       ATFTestFinish()
+    #define vlog( ... )         ATFLogInfo(__VA_ARGS__)
+    #define vlog_error( ... )   ATFLogError(__VA_ARGS__)
+    #define vlog_perf( _number, _higherIsBetter, _units, _nameFmt, ... )    ATFLogPerformanceNumber(_number, _higherIsBetter, _units, _nameFmt, __VA_ARGS__ )
+
+#else
+    #include "../../test_common/harness/errorHelpers.h"
+#endif
+
+#if defined (_MSC_VER )
+    //Deal with missing scalbn on windows
+    #define scalbnf( _a, _i )       ldexpf( _a, _i )
+    #define scalbn( _a, _i )        ldexp( _a, _i )
+    #define scalbnl( _a, _i )       ldexpl( _a, _i )
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+float Abs_Error( float test, double reference );
+float Ulp_Error( float test, double reference );
+//float Ulp_Error_Half( float test, double reference );
+float Ulp_Error_Double( double test, long double reference );
+#ifdef __cplusplus
+} //extern "C"
+#endif
+
+uint64_t GetTime( void );
+double SubtractTime( uint64_t endTime, uint64_t startTime );
+int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
+int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+// used to convert a bucket of bits into a search pattern through double
+static inline double DoubleFromUInt32( uint32_t bits );
+static inline double DoubleFromUInt32( uint32_t bits )
+{
+    union{ uint64_t u; double d;} u;
+
+    // split 0x89abcdef to 0x89abc00000000def
+    u.u = bits & 0xfffU;
+    u.u |= (uint64_t) (bits & ~0xfffU) << 32;
+
+    // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
+    u.u -= (bits & 0x800U) << 1;
+
+    // return result
+    return u.d;
+}
+
+void _LogBuildError( cl_program p, int line, const char *file );
+#define LogBuildError( program )        _LogBuildError( program, __LINE__, __FILE__ )
+
+#define PERF_LOOP_COUNT 100
+
+// Note: though this takes a double, this is for use with single precision tests
+static inline int IsFloatSubnormal( double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ float d; uint32_t u;}u;
+    u.d = fabsf((float)x);
+    return (u.u-1) < 0x007fffffU;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) FLT_MIN && x != 0.0;
+#endif
+}
+
+
+static inline int IsDoubleSubnormal( long double x )
+{
+#if 2 == FLT_RADIX
+    // Do this in integer to avoid problems with FTZ behavior
+    union{ double d; uint64_t u;}u;
+    u.d = fabs((double) x);
+    return (u.u-1) < 0x000fffffffffffffULL;
+#else
+    // rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
+    return fabs(x) < (double) DBL_MIN && x != 0.0;
+#endif
+}
+
+//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
+// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
+static inline int IsFloatResultSubnormal( double x, float ulps )
+{
+    x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
+    return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+}
+
+static inline int IsFloatResultSubnormalAbsError( double x , float abs_err)
+{
+  x = x - abs_err;
+  return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
+}
+
+static inline int IsDoubleResultSubnormal( long double x, float ulps )
+{
+    x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
+    return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
+}
+
+static inline int IsFloatInfinity(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F800000U);
+}
+
+static inline int IsFloatMaxFloat(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
+}
+
+static inline int IsFloatNaN(double x)
+{
+  union { cl_float d; cl_uint u; } u;
+  u.d = (cl_float) x;
+  return ((u.u & 0x7fffffffU) > 0x7F800000U);
+}
+
+extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
+
+// Windows (since long double got deprecated) sets the x87 to 53-bit precision
+// (that's x87 default state).  This causes problems with the tests that
+// convert long and ulong to float and double or otherwise deal with values
+// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
+static inline void Force64BitFPUPrecision(void)
+{
+#if __MINGW32__
+    // The usual method is to use _controlfp as follows:
+    //     #include <float.h>
+    //     _controlfp(_PC_64, _MCW_PC);
+    //
+    // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
+    // divergent code just use inline assembly which works for both.
+    unsigned short int orig_cw = 0;
+    unsigned short int new_cw = 0;
+    __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
+    new_cw = orig_cw | 0x0300;   // set precision to 64-bit
+    __asm__ __volatile__ ("fldcw  %0"::"m" (new_cw));
+#elif defined( _WIN32 ) && defined( __INTEL_COMPILER )
+    // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not* work on win.x64:
+    // > On the x64 architecture, changing the floating point precision is not supported.
+    // (Taken from http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
+    int cw;
+    __asm { fnstcw cw };    // Get current value of FPU control word.
+    cw = cw & 0xfffffcff | ( 3 << 8 ); // Set Precision Control to Double Extended Precision.
+    __asm { fldcw cw };     // Set new value of FPU control word.
+#else
+    /* Implement for other platforms if needed */
+#endif
+}
+
+#ifdef __cplusplus
+extern "C"
+#else
+extern
+#endif
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
+
+typedef union
+{
+    int32_t i;
+    float   f;
+}int32f_t;
+
+typedef union
+{
+    int64_t l;
+    double  d;
+}int64d_t;
+
+void MulD(double *rhi, double *rlo, double u, double v);
+void AddD(double *rhi, double *rlo, double a, double b);
+void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
+void DivideDD(double *chi, double *clo, double a, double b);
+int compareFloats(float x, float y);
+int compareDoubles(double x, double y);
+
+void logFunctionInfo(const char *fname, unsigned int float_size, unsigned int isFastRelaxed);
+
+#endif /* UTILITY_H */
+
+
diff --git a/test_conformance/math_brute_force/binary.c b/test_conformance/math_brute_force/binary.c
new file mode 100644
index 00000000..5800aaf5
--- /dev/null
+++ b/test_conformance/math_brute_force/binary.c
@@ -0,0 +1,1537 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double_Double(const Func *f, MTdata);
+int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata);
+int TestFunc_Float_Float_Float_common(const Func *f, MTdata, int isNextafter);
+int TestFunc_Double_Double_Double_common(const Func *f, MTdata, int isNextafter);
+
+const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
+const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
+
+#if defined( __cplusplus )
+    extern "C"
+#endif
+const vtbl _binary = { "binary", TestFunc_Float_Float_Float, TestFunc_Double_Double_Double };
+
+#if defined( __cplusplus )
+    extern "C"
+#endif
+const vtbl _binary_nextafter = { "binary_nextafter", TestFunc_Float_Float_Float_nextafter, TestFunc_Double_Double_Double_nextafter };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, f1 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, f1 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, d1 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, d1 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    double      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    int         isFDim;
+    int         skipNanInf;
+    int         isNextafter;
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    int         skipTestingRelaxed = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = 1;
+
+    if (gWimpyMode){
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
+    test_info.skipNanInf = test_info.isFDim  && ! gInfNanSupport;
+    test_info.isNextafter = isNextafter;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf2 )
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer2 for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gOutBuffer[%d] for region {%zd, %zd}\n", (int) j, region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
+            p2[j] = 0x3fc00000;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata( test_info.tinfo[i].d );
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    cl_uchar    *overflow = (cl_uchar*)malloc(buffer_size);
+    const char  *name = job->f->name;
+    int         isFDim = job->isFDim;
+    int         skipNanInf = job->skipNanInf;
+    int         isNextafter = job->isNextafter;
+    cl_uint     *t = 0;
+    float       *r=0,*s=0,*s2=0;
+    cl_int copysign_test = 0;
+    RoundingMode oldRoundMode;
+    int skipVerification = 0;
+
+    if(gTestFastRelaxed)
+    {
+      if (strcmp(name,"pow")==0 && gFastRelaxedDerived)
+      {
+        func = job->f->rfunc;
+        ulps = INFINITY;
+        skipVerification = 1;
+      }else
+      {
+        func = job->f->rfunc;
+        ulps = job->f->relaxed_error;
+      }
+    }
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if (job_id <= (cl_uint)indx)
+    { // test edge cases
+        float *fp = (float *)p;
+        float *fp2 = (float *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesFloat[x];
+            fp2[j] = specialValuesFloat[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesFloatCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+    {
+        if( (error = clFinish(tinfo->tQueue)) )
+        {
+          vlog_error( "Error: clFinish failed! err: %d\n", error );
+          goto exit;
+        }
+        free(overflow);
+        return CL_SUCCESS;
+    }
+
+    FPU_mode_type oldMode;
+    oldRoundMode = kRoundToNearestEven;
+    if( isFDim )
+    {
+        //Calculate the correctly rounded reference result
+        memset( &oldMode, 0, sizeof( oldMode ) );
+        if( ftz )
+            ForceFTZ( &oldMode );
+
+        // Set the rounding mode to match the device
+        if (gIsInRTZMode)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+    }
+
+    if(!strcmp(name, "copysign"))
+        copysign_test = 1;
+
+#define ref_func(s, s2) (copysign_test ? func.f_ff_f( s, s2 ) : func.f_ff( s, s2 ))
+
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    if( skipNanInf )
+    {
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            feclearexcept(FE_OVERFLOW);
+            r[j] = (float) ref_func( s[j], s2[j] );
+            overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+        }
+    }
+    else
+    {
+        for( j = 0; j < buffer_elements; j++ )
+            r[j] = (float) ref_func( s[j], s2[j] );
+    }
+
+    if( isFDim && ftz )
+        RestoreFPState( &oldMode );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_uint*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    if (!skipVerification) {
+        //Verify data
+        t = (cl_uint *)r;
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                cl_uint *q = out[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = ref_func( s[j], s2[j] );
+
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                    // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                    // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                    if ( gTestFastRelaxed || skipNanInf)
+                    {
+                        if( skipNanInf && overflow[j])
+                            continue;
+                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                        if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                            IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                            IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                            continue;
+                    }
+
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatResultSubnormal(correct, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // nextafter on FTZ platforms may return the smallest
+                        // normal float (2^-126) given a denormal or a zero
+                        // as the first argument. The rationale here is that
+                        // nextafter flushes the argument to zero and then
+                        // returns the next representable number in the
+                        // direction of the second argument, and since
+                        // denorms are considered as zero, the smallest
+                        // normal number is the next representable number.
+                        // In which case, it should have the same sign as the
+                        // second argument.
+                        if (isNextafter )
+                        {
+                            if(IsFloatSubnormal(s[j]) || s[j] == 0.0f)
+                            {
+                                float value = copysignf(twoToMinus126, s2[j]);
+                                fail = fail && (test != value);
+                                if (!fail)
+                                    err = 0.0f;
+                            }
+                        }
+                        else
+                        {
+                            // retry per section 6.5.3.3
+                            if( IsFloatSubnormal( s[j] ) )
+                            {
+                                double correct2, correct3;
+                                float err2, err3;
+
+                                if( skipNanInf )
+                                    feclearexcept(FE_OVERFLOW);
+
+                                correct2 = ref_func( 0.0, s2[j] );
+                                correct3 = ref_func( -0.0, s2[j] );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                                // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                if( gTestFastRelaxed || skipNanInf )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) && skipNanInf )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                        IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                //try with both args as zero
+                                if( IsFloatSubnormal( s2[j] )  )
+                                {
+                                    double correct4, correct5;
+                                    float err4, err5;
+
+                                    if( skipNanInf )
+                                        feclearexcept(FE_OVERFLOW);
+
+                                    correct2 = ref_func( 0.0, 0.0 );
+                                    correct3 = ref_func( -0.0, 0.0 );
+                                    correct4 = ref_func( 0.0, -0.0 );
+                                    correct5 = ref_func( -0.0, -0.0 );
+
+                                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                    // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                                    // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                    // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                    if( gTestFastRelaxed || skipNanInf )
+                                    {
+                                        if( fetestexcept(FE_OVERFLOW) && skipNanInf )
+                                            continue;
+
+                                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                                        if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                            IsFloatInfinity(correct3) || IsFloatNaN(correct3)   ||
+                                            IsFloatInfinity(correct4) || IsFloatNaN(correct4)   ||
+                                            IsFloatInfinity(correct5) || IsFloatNaN(correct5)    )
+                                            continue;
+                                    }
+
+                                    err2 = Ulp_Error( test, correct2  );
+                                    err3 = Ulp_Error( test, correct3  );
+                                    err4 = Ulp_Error( test, correct4  );
+                                    err5 = Ulp_Error( test, correct5  );
+                                    fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                                     (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ||
+                                        IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if(IsFloatSubnormal(s2[j]) )
+                            {
+                                double correct2, correct3;
+                                float err2, err3;
+
+                                if( skipNanInf )
+                                    feclearexcept(FE_OVERFLOW);
+
+                                correct2 = ref_func( s[j], 0.0 );
+                                correct3 = ref_func( s[j], -0.0 );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                // As per OpenCL 2.0 spec, section 5.8.4.3, enabling fast-relaxed-math mode also enables
+                                // -cl-finite-math-only optimization. This optimization allows to assume that arguments and
+                                // results are not NaNs or +/-INFs. Hence, accept any result if inputs or results are NaNs or INFs.
+                                if ( gTestFastRelaxed || skipNanInf )
+                                {
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( overflow[j] && skipNanInf)
+                                        continue;
+
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                        IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > tinfo->maxError )
+                    {
+                        tinfo->maxError = fabsf(err);
+                        tinfo->maxErrorValue = s[j];
+                        tinfo->maxErrorValue2 = s2[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a (0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], ((cl_uint*)s)[j], s2[j], ((cl_uint*)s2)[j], r[j], test, ((cl_uint*)&test)[0], j );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+    }
+
+    if (isFDim && gIsInRTZMode)
+        (void)set_round(oldRoundMode, kfloat);
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+
+exit:
+    if( overflow )
+        free( overflow );
+    return error;
+
+}
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafter)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = 1;
+
+
+    if (gWimpyMode){
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
+    test_info.skipNanInf = 0;
+    test_info.isNextafter = isNextafter;
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+       vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata( test_info.tinfo[i].d );
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+
+    int         isNextafter = job->isNextafter;
+    cl_ulong    *t;
+    cl_double   *r,*s,*s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_double *fp2 = (cl_double *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            fp2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int64(d);
+        p2[j] = genrand_int64(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_ff( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_ff( s[j], s2[j] );
+                float err = Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // nextafter on FTZ platforms may return the smallest
+                    // normal float (2^-126) given a denormal or a zero
+                    // as the first argument. The rationale here is that
+                    // nextafter flushes the argument to zero and then
+                    // returns the next representable number in the
+                    // direction of the second argument, and since
+                    // denorms are considered as zero, the smallest
+                    // normal number is the next representable number.
+                    // In which case, it should have the same sign as the
+                    // second argument.
+                    if (isNextafter )
+                    {
+                        if(IsDoubleSubnormal(s[j]) || s[j] == 0.0f)
+                        {
+                            cl_double value = copysign(twoToMinus1022, s2[j]);
+                            fail = fail && (test != value);
+                            if (!fail)
+                                err = 0.0f;
+                        }
+                    }
+                    else
+                    {
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2 = func.f_ff( 0.0, s2[j] );
+                            long double correct3 = func.f_ff( -0.0, s2[j] );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with both args as zero
+                            if( IsDoubleSubnormal( s2[j] )  )
+                            {
+                                correct2 = func.f_ff( 0.0, 0.0 );
+                                correct3 = func.f_ff( -0.0, 0.0 );
+                                long double correct4 = func.f_ff( 0.0, -0.0 );
+                                long double correct5 = func.f_ff( -0.0, -0.0 );
+                                err2 = Ulp_Error_Double( test, correct2  );
+                                err3 = Ulp_Error_Double( test, correct3  );
+                                float err4 = Ulp_Error_Double( test, correct4  );
+                                float err5 = Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                                 (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if(IsDoubleSubnormal(s2[j]) )
+                        {
+                            long double correct2 = func.f_ff( s[j], 0.0 );
+                            long double correct3 = func.f_ff( s[j], -0.0 );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %.13la}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+exit:
+    return error;
+
+}
+
+int TestFunc_Float_Float_Float(const Func *f, MTdata d)
+{
+    return TestFunc_Float_Float_Float_common(f, d, 0);
+}
+
+int TestFunc_Double_Double_Double(const Func *f, MTdata d)
+{
+    return TestFunc_Double_Double_Double_common(f, d, 0);
+}
+
+int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata d)
+{
+    return TestFunc_Float_Float_Float_common(f, d, 1);
+}
+
+int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata d)
+{
+    return TestFunc_Double_Double_Double_common(f, d, 1);
+}
+
diff --git a/test_conformance/math_brute_force/binaryOperator.c b/test_conformance/math_brute_force/binaryOperator.c
new file mode 100644
index 00000000..718afd3d
--- /dev/null
+++ b/test_conformance/math_brute_force/binaryOperator.c
@@ -0,0 +1,1455 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata);
+
+#if defined(__cplusplus)
+    extern "C"
+#endif
+
+const vtbl _binary_operator = { "binaryOperator", TestFunc_Float_Float_Float_Operator, TestFunc_Double_Double_Double_Operator };
+
+static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   out[i] =  in1[i] ", operator_symbol, " in2[i];\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = f0 ", operator_symbol, " f1;\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = f0 ", operator_symbol, " f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   out[i] =  in1[i] ", operator_symbol, " in2[i];\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+                            "__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = d0 ", operator_symbol, " d1;\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = d0 ", operator_symbol, " d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *name;
+    const char  *operator_symbol;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->name, info->operator_symbol, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    double      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    // no special fields
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode) {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x20000000;
+            p2[j] = 0x3fc00000;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    if ( gTestFastRelaxed )
+    {
+      func = job->f->rfunc;
+    }
+
+
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    cl_uchar    *overflow = (cl_uchar*)malloc(buffer_size);
+    const char  *name = job->f->name;
+    cl_uint     *t;
+    cl_float    *r,*s,*s2;
+    RoundingMode oldRoundMode;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        float *fp = (float *)p;
+        float *fp2 = (float *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesFloat[x];
+            fp2[j] = specialValuesFloat[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesFloatCount )
+                    break;
+            }
+            if(gTestFastRelaxed && strcmp(name,"divide") == 0 )
+            {
+              float fpj = *(float*)&fp[j];
+              float fpj2 = *(float*)&fp2[j];
+              if(fabs(fpj) > 0x5E800000 ) //[2^-62,2^62]
+              {
+                fp[j] = NAN;
+              }
+              if( fabs(fpj2) > 0x5E800000 ) //[2^-62,2^62]
+              {
+                fp2[j] = NAN;
+              }
+        }
+    }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+
+        if(gTestFastRelaxed)
+        {
+          if( strcmp(name,"divide")==0){
+            float pj = *(float*)&p[j];
+            float pj2 = *(float*)&p2[j];
+            if(fabs(pj) > 0x5E800000 ) //[2^-62,2^62]
+            {
+              p[j] = NAN;
+            }
+            if( fabs(pj2) > 0x5E800000 ) //[2^-62,2^62]
+            {
+              p2[j] = NAN;
+            }
+          }
+    }
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+    {
+        free( overflow );
+        return CL_SUCCESS;
+    }
+
+    //Calculate the correctly rounded reference result
+    FPU_mode_type oldMode;
+    memset( &oldMode, 0, sizeof( oldMode ) );
+    if( ftz )
+        ForceFTZ( &oldMode );
+
+    // Set the rounding mode to match the device
+    oldRoundMode = kRoundToNearestEven;
+    if (gIsInRTZMode)
+        oldRoundMode = set_round(kRoundTowardZero, kfloat);
+
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    if( gInfNanSupport )
+    {
+        for( j = 0; j < buffer_elements; j++ )
+            r[j] = (float) func.f_ff( s[j], s2[j] );
+    }
+    else
+    {
+        for( j = 0; j < buffer_elements; j++ )
+        {
+            feclearexcept(FE_OVERFLOW);
+            r[j] = (float) func.f_ff( s[j], s2[j] );
+            overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+        }
+    }
+
+    if (gIsInRTZMode)
+      (void)set_round(oldRoundMode, kfloat);
+
+    if( ftz )
+        RestoreFPState( &oldMode );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_uint *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_uint *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_ff( s[j], s2[j] );
+
+                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                if ( !gInfNanSupport)
+                {
+                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                    if( overflow[j]                                         ||
+                        IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                        IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                        IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                        continue;
+                }
+
+        // Per section 10 paragraph 6, accept embedded devices always returning positive 0.0.
+        if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue;
+
+                float err = Ulp_Error( test, correct );
+                float errB = Ulp_Error( test, (float) correct  );
+
+                if( gTestFastRelaxed )
+                  ulps = job->f->relaxed_error;
+
+                int fail = ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
+                if( fabsf( errB ) < fabsf(err ) )
+                  err = errB;
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsFloatResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if( IsFloatSubnormal( s[j] ) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+
+                        if( !gInfNanSupport )
+                            feclearexcept(FE_OVERFLOW);
+
+                        correct2 = func.f_ff( 0.0, s2[j] );
+                        correct3 = func.f_ff( -0.0, s2[j] );
+
+                        // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                        if( !gInfNanSupport )
+                        {
+                            if( fetestexcept(FE_OVERFLOW) )
+                                continue;
+
+                            // Note: no double rounding here.  Reference functions calculate in single precision.
+                            if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                IsFloatInfinity(correct3) || IsFloatNaN(correct3)    )
+                                continue;
+                        }
+
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        //try with both args as zero
+                        if( IsFloatSubnormal( s2[j] )  )
+                        {
+                            double correct4, correct5;
+                            float err4, err5;
+
+                            if( !gInfNanSupport )
+                                feclearexcept(FE_OVERFLOW);
+
+                            correct2 = func.f_ff( 0.0, 0.0 );
+                            correct3 = func.f_ff( -0.0, 0.0 );
+                            correct4 = func.f_ff( 0.0, -0.0 );
+                            correct5 = func.f_ff( -0.0, -0.0 );
+
+                            // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                            if( !gInfNanSupport )
+                            {
+                                if( fetestexcept(FE_OVERFLOW) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2)   ||
+                                    IsFloatInfinity(correct3) || IsFloatNaN(correct3)   ||
+                                    IsFloatInfinity(correct4) || IsFloatNaN(correct4)   ||
+                                    IsFloatInfinity(correct5) || IsFloatNaN(correct5)    )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            err4 = Ulp_Error( test, correct4  );
+                            err5 = Ulp_Error( test, correct5  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                             (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( fabsf( err4 ) < fabsf(err ) )
+                                err = err4;
+                            if( fabsf( err5 ) < fabsf(err ) )
+                                err = err5;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) ||
+                                IsFloatResultSubnormal( correct4, ulps ) || IsFloatResultSubnormal( correct5, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    else if(IsFloatSubnormal(s2[j]) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+
+                        if( !gInfNanSupport )
+                            feclearexcept(FE_OVERFLOW);
+
+                        correct2 = func.f_ff( s[j], 0.0 );
+                        correct3 = func.f_ff( s[j], -0.0 );
+
+                        // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                        if ( !gInfNanSupport)
+                        {
+                            // Note: no double rounding here.  Reference functions calculate in single precision.
+                            if( overflow[j]                                         ||
+                                IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                                IsFloatInfinity(correct2)|| IsFloatNaN(correct2)    )
+                                continue;
+                        }
+
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint*)&test)[0], j );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step,  job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+exit:
+    if( overflow )
+        free( overflow );
+    return error;
+
+}
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    double      maxErrorVal2 = 0.0;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_ulong    *t;
+    cl_double   *r,*s,*s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_double *fp2 = (cl_double *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            fp2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int64(d);
+        p2[j] = genrand_int64(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_ff( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_ff( s[j], s2[j] );
+                float err = Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+
+                    // retry per section 6.5.3.3
+                    if( IsDoubleSubnormal( s[j] ) )
+                    {
+                        long double correct2 = func.f_ff( 0.0, s2[j] );
+                        long double correct3 = func.f_ff( -0.0, s2[j] );
+                        float err2 = Ulp_Error_Double( test, correct2  );
+                        float err3 = Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        //try with both args as zero
+                        if( IsDoubleSubnormal( s2[j] )  )
+                        {
+                            correct2 = func.f_ff( 0.0, 0.0 );
+                            correct3 = func.f_ff( -0.0, 0.0 );
+                            long double correct4 = func.f_ff( 0.0, -0.0 );
+                            long double correct5 = func.f_ff( -0.0, -0.0 );
+                            err2 = Ulp_Error_Double( test, correct2  );
+                            err3 = Ulp_Error_Double( test, correct3  );
+                            float err4 = Ulp_Error_Double( test, correct4  );
+                            float err5 = Ulp_Error_Double( test, correct5  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)) &&
+                                             (!(fabsf(err4) <= ulps)) && (!(fabsf(err5) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( fabsf( err4 ) < fabsf(err ) )
+                                err = err4;
+                            if( fabsf( err5 ) < fabsf(err ) )
+                                err = err5;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) ||
+                                IsDoubleResultSubnormal( correct4, ulps ) || IsDoubleResultSubnormal( correct5, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    else if(IsDoubleSubnormal(s2[j]) )
+                    {
+                        long double correct2 = func.f_ff( s[j], 0.0 );
+                        long double correct3 = func.f_ff( s[j], -0.0 );
+                        float err2 = Ulp_Error_Double( test, correct2  );
+                        float err3 = Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements,  job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+exit:
+    return error;
+
+}
+
+
+
+
diff --git a/test_conformance/math_brute_force/binary_i.c b/test_conformance/math_brute_force/binary_i.c
new file mode 100644
index 00000000..b72d117f
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_i.c
@@ -0,0 +1,1214 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include <limits.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_Float_Int(const Func *f, MTdata);
+int TestFunc_Double_Double_Int(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+    extern "C"
+#endif
+const vtbl _binary_i = { "binary_i", TestFunc_Float_Float_Int, TestFunc_Double_Double_Int };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global int* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, i0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       int3 i0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global int* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, i0 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0;\n"
+                            "       int3 i0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+
+static const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147,
+                                            -1, -2, -3, -126, -127, -128, -0x02000001, -0x04000001, -1465264071, -1488522147 };
+static size_t specialValuesIntCount = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value (param 1).  Init to 0.
+    cl_int      maxErrorValue2;                     // position of the max error value (param 2).  Init to 0.
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    // no special values
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float_Int(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    cl_int      maxErrorVal2 = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) };
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    // Run the kernels
+    error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+
+    // Accumulate the arithmetic errors
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        if( test_info.tinfo[i].maxError > maxError )
+        {
+            maxError = test_info.tinfo[i].maxError;
+            maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+        }
+    }
+
+    if( error )
+        goto exit;
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = (genrand_int32(d) & ~0x40000000) | 0x38000000;
+            p2[j] = 3;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_uint     *t;
+    cl_float    *r,*s;
+    cl_int      *s2;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesIntCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        float *fp = (float *)p;
+        cl_int *ip2 = (cl_int *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+          fp[j] = specialValuesFloat[x];
+          ip2[j] = specialValuesInt[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesIntCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+      p[j] = genrand_int32(d);
+      p2[j] = genrand_int32(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (float *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_int *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (float) func.f_fi( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_uint *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_uint *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_fi( s[j], s2[j] );
+                float err = Ulp_Error( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsFloatResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if( IsFloatSubnormal( s[j] ) )
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+                        correct2 = func.f_fi( 0.0, s2[j] );
+                        correct3 = func.f_fi( -0.0, s2[j] );
+                        err2 = Ulp_Error( test, correct2  );
+                        err3 = Ulp_Error( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %d}: *%a vs. %a (0x%8.8x) at index: %d\n", name, sizeNames[k], err, s[j], s2[j], r[j], test, ((cl_uint*)&test)[0], j );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step,  job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+exit:
+    return error;
+
+}
+
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+static const int specialValuesInt2[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX,
+                                            -1, -2, -3, -1022, -1023, -11024, -INT_MAX };
+static size_t specialValuesInt2Count = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Double_Double_Int(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    cl_int      maxErrorVal2 = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize * sizeof( cl_int), test_info.subBufferSize * sizeof( cl_int) };
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+
+    // Accumulate the arithmetic errors
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        if( test_info.tinfo[i].maxError > maxError )
+        {
+            maxError = test_info.tinfo[i].maxError;
+            maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+        }
+    }
+
+    if( error )
+        goto exit;
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        double *p = (double *)gIn;
+        cl_int *p2 = (cl_int *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = 3;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE/2, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
+    vlog( "\n" );
+
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    float       ulps = job->ulps;
+    dptr        func = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_ulong    *t;
+    cl_double   *r,*s;
+    cl_int      *s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
+    cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesInt2Count;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        cl_double *fp = (cl_double *)p;
+        cl_int *ip2 = (cl_int *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesDouble[x];
+            ip2[j] = specialValuesInt2[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesInt2Count )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = DoubleFromUInt32(genrand_int32(d));
+        p2[j] = genrand_int32(d);
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size/2, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_double *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_int *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_fi( s[j], s2[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_fi( s[j], s2[j] );
+                float err = Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail && ftz )
+                {
+                    // retry per section 6.5.3.2
+                    if( IsDoubleResultSubnormal(correct, ulps ) )
+                    {
+                        fail = fail && ( test != 0.0f );
+                        if( ! fail )
+                            err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if( IsDoubleSubnormal( s[j] ) )
+                    {
+                        long double correct2 = func.f_fi( 0.0, s2[j] );
+                        long double correct3 = func.f_fi( -0.0, s2[j] );
+                        float err2 = Ulp_Error_Double( test, correct2  );
+                        float err3 = Ulp_Error_Double( test, correct3  );
+                        fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                        if( fabsf( err2 ) < fabsf(err ) )
+                            err = err2;
+                        if( fabsf( err3 ) < fabsf(err ) )
+                            err = err3;
+
+                        // retry per section 6.5.3.4
+                        if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
+                        {
+                            fail = fail && ( test != 0.0f);
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+                    }
+                }
+
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at {%.13la, %d}: *%.13la vs. %.13la\n", name, sizeNames[k], err, s[j], s2[j], r[j], test );
+                    error = -1;
+                    goto exit;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+exit:
+    return error;
+
+}
+
+
+
diff --git a/test_conformance/math_brute_force/binary_two_results_i.c b/test_conformance/math_brute_force/binary_two_results_i.c
new file mode 100644
index 00000000..ab06c450
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_two_results_i.c
@@ -0,0 +1,1133 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <limits.h>
+#include <string.h>
+#include "FunctionList.h"
+
+#define PARALLEL_REFERENCE
+
+int TestFunc_FloatI_Float_Float(const Func *f, MTdata);
+int TestFunc_DoubleI_Double_Double(const Func *f, MTdata);
+
+#if defined( __cplusplus )
+    extern "C"
+#endif
+const vtbl _binary_two_results_i = { "binary_two_results_i", TestFunc_FloatI_Float_Float, TestFunc_DoubleI_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in, __global float* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       f0 = ", name, "( f0, f1, &i0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( i0, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       f0 = ", name, "( f0, f1, &i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in, __global double* in2)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       d0 = ", name, "( d0, d1, &i0 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "       vstore3( i0, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = 0xdeaddead;\n"
+                            "       d0 = ", name, "( d0, d1, &i0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               out2[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               out2[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+#if defined PARALLEL_REFERENCE
+typedef struct ComputeReferenceInfoF_
+{
+    const float *x;
+    const float *y;
+    float *r;
+    int *i;
+    double (*f_ffpI)(double, double, int*);
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoF;
+
+typedef struct ComputeReferenceInfoD_
+{
+    const double *x;
+    const double *y;
+    double *r;
+    int *i;
+    long double (*f_ffpI)(long double, long double, int*);
+    cl_uint lim;
+    cl_uint count;
+} ComputeReferenceInfoD;
+
+static cl_int
+ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const float *x = cri->x + off;
+    const float *y = cri->y + off;
+    float *r = cri->r + off;
+    int *i = cri->i + off;
+    double (*f)(double, double, int *) = cri->f_ffpI;
+    cl_uint j;
+
+    if (off + count > lim)
+    count = lim - off;
+
+    for (j = 0; j < count; ++j)
+    r[j] = (float)f((double)x[j], (double)y[j], i + j);
+
+    return CL_SUCCESS;
+}
+
+static cl_int
+ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const double *x = cri->x + off;
+    const double *y = cri->y + off;
+    double *r = cri->r + off;
+    int *i = cri->i + off;
+    long double (*f)(long double, long double, int *) = cri->f_ffpI;
+    cl_uint j;
+
+    if (off + count > lim)
+    count = lim - off;
+
+    Force64BitFPUPrecision();
+
+    for (j = 0; j < count; ++j)
+    r[j] = (double)f((long double)x[j], (long double)y[j], i + j);
+
+    return CL_SUCCESS;
+}
+
+#endif
+
+int TestFunc_FloatI_Float_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    float float_ulps;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+
+#if defined PARALLEL_REFERENCE
+    cl_uint threadCount = GetThreadCount();
+#endif
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    if(gWimpyMode ){
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    int testingRemquo = !strcmp(f->name, "remquo");
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        cl_uint *p = (cl_uint *)gIn;
+        cl_uint *p2 = (cl_uint *)gIn2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        // Calculate the correctly rounded reference result
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+
+#if defined PARALLEL_REFERENCE
+    if (threadCount > 1) {
+        ComputeReferenceInfoF cri;
+        cri.x = s;
+        cri.y = s2;
+        cri.r = (float *)gOut_Ref;
+        cri.i = (int *)gOut_Ref2;
+        cri.f_ffpI = f->func.f_ffpI;
+        cri.lim = bufferSize / sizeof( float );
+        cri.count = (cri.lim + threadCount - 1) / threadCount;
+        ThreadPool_Do(ReferenceF, threadCount, &cri);
+    } else {
+#endif
+            float *r = (float *)gOut_Ref;
+            int *r2 = (int *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                r[j] = (float) f->func.f_ffpI( s[j], s2[j], r2+j );
+#if defined PARALLEL_REFERENCE
+    }
+#endif
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)gOut[k];
+                int32_t *q2 = (int32_t *)gOut2[k];
+
+                // Check for exact match to correctly rounded result
+        if (t[j] == q[j] && t2[j] == q2[j])
+            continue;
+
+        // Check for paired NaNs
+        if ((t[j] & 0x7fffffff) > 0x7f800000 && (q[j] & 0x7fffffff) > 0x7f800000 && t2[j] == q2[j])
+            continue;
+
+                // if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    float test = ((float*) q)[j];
+                    int correct2 = INT_MIN;
+                    double correct = f->func.f_ffpI( s[j], s2[j], &correct2 );
+                    float err = Ulp_Error( test, correct );
+                    int64_t iErr;
+
+                    // in case of remquo, we only care about the sign and last seven bits of
+                    // integer as per the spec.
+                    if(testingRemquo)
+                        iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f);
+                    else
+                        iErr = (long long) q2[j] - (long long) correct2;
+
+                    //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects
+                    //to say what is returned in iptr or leaves it undefined or implementation defined.
+                    int iptrUndefined = fabs(((float*) gIn)[j]) == INFINITY ||
+                                        ((float*) gIn2)[j] == 0.0f          ||
+                                        isnan(((float*) gIn2)[j])           ||
+                                        isnan(((float*) gIn)[j]);
+                    if(iptrUndefined)
+                         iErr = 0;
+
+                    int fail = ! (fabsf(err) <= float_ulps && iErr == 0 );
+                    if( ftz && fail )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatResultSubnormal(correct, float_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            double correct3 = f->func.f_ffpI( 0.0, s2[j], &correct3i );
+                            double correct4 = f->func.f_ffpI( -0.0, s2[j], &correct4i );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with both args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            {
+                                int correct7i, correct8i;
+                                correct3 = f->func.f_ffpI( 0.0, 0.0, &correct3i );
+                                correct4 = f->func.f_ffpI( -0.0, 0.0, &correct4i );
+                                double correct7 = f->func.f_ffpI( 0.0, -0.0, &correct7i );
+                                double correct8 = f->func.f_ffpI( -0.0, -0.0, &correct8i );
+                                err2 = Ulp_Error( test, correct3  );
+                                err3 = Ulp_Error( test, correct4  );
+                                float err4 = Ulp_Error( test, correct7  );
+                                float err5 = Ulp_Error( test, correct8  );
+                                iErr3 = (long long) q2[j] - (long long) correct3i;
+                                iErr4 = (long long) q2[j] - (long long) correct4i;
+                                int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
+                                int64_t iErr8 = (long long) q2[j] - (long long) correct8i;
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps  && iErr4 == 0)) &&
+                                                 (!(fabsf(err4) <= float_ulps  && iErr7 == 0)) && (!(fabsf(err5) <= float_ulps  && iErr8 == 0)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                if( llabs(iErr3) < llabs( iErr ) )
+                                    iErr = iErr3;
+                                if( llabs(iErr4) < llabs( iErr ) )
+                                    iErr = iErr4;
+                                if( llabs(iErr7) < llabs( iErr ) )
+                                    iErr = iErr7;
+                                if( llabs(iErr8) < llabs( iErr ) )
+                                    iErr = iErr8;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct3, float_ulps ) || IsFloatResultSubnormal(correct4, float_ulps )  ||
+                                    IsFloatResultSubnormal(correct7, float_ulps ) || IsFloatResultSubnormal(correct8, float_ulps ) )
+                                {
+                                    fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0));
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( IsFloatSubnormal( s2[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            double correct3 = f->func.f_ffpI( s[j], 0.0, &correct3i );
+                            double correct4 = f->func.f_ffpI( s[j], -0.0, &correct4i );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps && iErr3 == 0)) && (!(fabsf(err3) <= float_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} ({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, 0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
+                                    f->name, sizeNames[k], err, iErr,
+                                   ((float*) gIn)[j], ((float*) gIn2)[j],
+                                   ((cl_uint*) gIn)[j], ((cl_uint*) gIn2)[j],
+                                   ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j],
+                                   ((cl_uint*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j],
+                                   test, q2[j],
+                                   ((cl_uint*)&test)[0], ((cl_uint*) q2)[j] );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( double );
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if(gWimpyMode ){
+       step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+#if defined PARALLEL_REFERENCE
+    cl_uint threadCount = GetThreadCount();
+#endif
+
+    Force64BitFPUPrecision();
+
+    int testingRemquo = !strcmp(f->name, "remquo");
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                    &build_info ) ))
+        {
+            return error;
+        }
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+
+#if defined PARALLEL_REFERENCE
+    if (threadCount > 1) {
+        ComputeReferenceInfoD cri;
+        cri.x = s;
+        cri.y = s2;
+        cri.r = (double *)gOut_Ref;
+        cri.i = (int *)gOut_Ref2;
+        cri.f_ffpI = f->dfunc.f_ffpI;
+        cri.lim = bufferSize / sizeof( double );
+        cri.count = (cri.lim + threadCount - 1) / threadCount;
+        ThreadPool_Do(ReferenceD, threadCount, &cri);
+    } else {
+#endif
+            double *r = (double *)gOut_Ref;
+            int *r2 = (int *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                r[j] = (double) f->dfunc.f_ffpI( s[j], s2[j], r2+j );
+#if defined PARALLEL_REFERENCE
+    }
+#endif
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)gOut[k];
+                int32_t *q2 = (int32_t *)gOut2[k];
+
+        // Check for exact match to correctly rounded result
+        if (t[j] == q[j] && t2[j] == q2[j])
+            continue;
+
+        // Check for paired NaNs
+        if ((t[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL &&
+            (q[j] & 0x7fffffffffffffffUL) > 0x7ff0000000000000UL &&
+            t2[j] == q2[j])
+            continue;
+
+                // if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    double test = ((double*) q)[j];
+                    int correct2 = INT_MIN;
+                    long double correct = f->dfunc.f_ffpI( s[j], s2[j], &correct2 );
+                    float err = Ulp_Error_Double( test, correct );
+                    int64_t iErr;
+
+                    // in case of remquo, we only care about the sign and last seven bits of
+                    // integer as per the spec.
+                    if(testingRemquo)
+                        iErr = (long long) (q2[j] & 0x0000007f) - (long long) (correct2 & 0x0000007f);
+                    else
+                        iErr = (long long) q2[j] - (long long) correct2;
+
+                    //For remquo, if y = 0, x is infinite, or either is NaN then the standard either neglects
+                    //to say what is returned in iptr or leaves it undefined or implementation defined.
+                    int iptrUndefined = fabs(((double*) gIn)[j]) == INFINITY ||
+                                        ((double*) gIn2)[j] == 0.0          ||
+                                        isnan(((double*) gIn2)[j])           ||
+                                        isnan(((double*) gIn)[j]);
+                    if(iptrUndefined)
+                         iErr = 0;
+
+                    int fail = ! (fabsf(err) <= f->double_ulps && iErr == 0 );
+                    if( ftz && fail )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            long double correct3 = f->dfunc.f_ffpI( 0.0, s2[j], &correct3i );
+                            long double correct4 = f->dfunc.f_ffpI( -0.0, s2[j], &correct4i );
+                            float err2 = Ulp_Error_Double( test, correct3  );
+                            float err3 = Ulp_Error_Double( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with both args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            {
+                                int correct7i, correct8i;
+                                correct3 = f->dfunc.f_ffpI( 0.0, 0.0, &correct3i );
+                                correct4 = f->dfunc.f_ffpI( -0.0, 0.0, &correct4i );
+                                long double correct7 = f->dfunc.f_ffpI( 0.0, -0.0, &correct7i );
+                                long double correct8 = f->dfunc.f_ffpI( -0.0, -0.0, &correct8i );
+                                err2 = Ulp_Error_Double( test, correct3  );
+                                err3 = Ulp_Error_Double( test, correct4  );
+                                float err4 = Ulp_Error_Double( test, correct7  );
+                                float err5 = Ulp_Error_Double( test, correct8  );
+                                iErr3 = (long long) q2[j] - (long long) correct3i;
+                                iErr4 = (long long) q2[j] - (long long) correct4i;
+                                int64_t iErr7 = (long long) q2[j] - (long long) correct7i;
+                                int64_t iErr8 = (long long) q2[j] - (long long) correct8i;
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps  && iErr4 == 0)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps  && iErr7 == 0)) && (!(fabsf(err5) <= f->double_ulps  && iErr8 == 0)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+                                if( llabs(iErr3) < llabs( iErr ) )
+                                    iErr = iErr3;
+                                if( llabs(iErr4) < llabs( iErr ) )
+                                    iErr = iErr4;
+                                if( llabs(iErr7) < llabs( iErr ) )
+                                    iErr = iErr7;
+                                if( llabs(iErr8) < llabs( iErr ) )
+                                    iErr = iErr8;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct3, f->double_ulps ) || IsDoubleResultSubnormal( correct4, f->double_ulps )  ||
+                                    IsDoubleResultSubnormal( correct7, f->double_ulps ) || IsDoubleResultSubnormal( correct8, f->double_ulps ) )
+                                {
+                                    fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0 || iErr7 == 0 || iErr8 == 0));
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( IsDoubleSubnormal( s2[j] ) )
+                        {
+                            int correct3i, correct4i;
+                            long double correct3 = f->dfunc.f_ffpI( s[j], 0.0, &correct3i );
+                            long double correct4 = f->dfunc.f_ffpI( s[j], -0.0, &correct4i );
+                            float err2 = Ulp_Error_Double( test, correct3  );
+                            float err3 = Ulp_Error_Double( test, correct4  );
+                            int64_t iErr3 = (long long) q2[j] - (long long) correct3i;
+                            int64_t iErr4 = (long long) q2[j] - (long long) correct4i;
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps && iErr3 == 0)) && (!(fabsf(err3) <= f->double_ulps && iErr4 == 0)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+                            if( llabs(iErr3) < llabs( iErr ) )
+                                iErr = iErr3;
+                            if( llabs(iErr4) < llabs( iErr ) )
+                                iErr = iErr4;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (iErr3 == 0 || iErr4 == 0) );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, %.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, %d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ 0x%16.16llx, 0x%8.8x})\n",
+                                    f->name, sizeNames[k], err, iErr,
+                                   ((double*) gIn)[j], ((double*) gIn2)[j],
+                                   ((cl_ulong*) gIn)[j], ((cl_ulong*) gIn2)[j],
+                                   ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j],
+                                   ((cl_ulong*) gOut_Ref)[j], ((cl_uint*) gOut_Ref2)[j],
+                                   test, q2[j],
+                                   ((cl_ulong*) q)[j], ((cl_uint*) q2)[j]);
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_TRUE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+
diff --git a/test_conformance/math_brute_force/i_unary.c b/test_conformance/math_brute_force/i_unary.c
new file mode 100644
index 00000000..e41d5d82
--- /dev/null
+++ b/test_conformance/math_brute_force/i_unary.c
@@ -0,0 +1,629 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Int_Float(const Func *f, MTdata);
+int TestFunc_Int_Double(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+    extern "C"
+#endif
+const vtbl _i_unary = { "i_unary", TestFunc_Int_Float, TestFunc_Int_Double };
+
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       vstore3( i0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       int3 i0 = ", name, "( f0 );\n"
+                        "       vstore3( i0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       int3 i0 = ", name, "( f0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = i0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = i0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_Int_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    int ftz = f->ftz || 0 == (gFloatCapabilities & CL_FP_DENORM) || gForceFTZ;
+    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    // This test is not using ThreadPool so we need to disable FTZ here
+    // for reference computations
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        int *r = (int *)gOut_Ref;
+        float *s = (float *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = f->func.i_f( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    if( ftz && IsFloatSubnormal(s[j]))
+                    {
+                        unsigned int correct0 = f->func.i_f( 0.0 );
+                        unsigned int correct1 = f->func.i_f( -0.0 );
+                        if( q[j] == correct0 || q[j] == correct1 )
+                            continue;
+                    }
+
+                    uint32_t err = t[j] - q[j];
+                    if( q[j] > t[j] )
+                        err = q[j] - t[j];
+                    vlog_error( "\nERROR: %s%s: %d ulp error at %a (0x%8.8x): *%d vs. %d\n", f->name, sizeNames[k], err, ((float*) gIn)[j], ((cl_uint*) gIn)[j], t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    vlog( "\n" );
+exit:
+    RestoreFPState(&oldMode);
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_Int_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    int ftz = f->ftz || gForceFTZ;
+    size_t bufferSize = (gWimpyMode)?gWimpyBufferSize:BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // This test is not using ThreadPool so we need to disable FTZ here
+    // for reference computations
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32( (uint32_t) i + j * scale );
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32( (uint32_t) i + j );
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        int *r = (int *)gOut_Ref;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            r[j] = f->dfunc.i_f( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    if( ftz && IsDoubleSubnormal(s[j]))
+                    {
+                        unsigned int correct0 = f->dfunc.i_f( 0.0 );
+                        unsigned int correct1 = f->dfunc.i_f( -0.0 );
+                        if( q[j] == correct0 || q[j] == correct1 )
+                            continue;
+                    }
+
+                    uint32_t err = t[j] - q[j];
+                    if( q[j] > t[j] )
+                        err = q[j] - t[j];
+                    vlog_error( "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n", f->name, sizeNames[k], err, ((double*) gIn)[j], t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+            } else
+            {
+               vlog("." );
+            }
+           fflush(stdout);
+
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    vlog( "\n" );
+
+
+exit:
+    RestoreFPState(&oldMode);
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+
diff --git a/test_conformance/math_brute_force/macro_binary.c b/test_conformance/math_brute_force/macro_binary.c
new file mode 100644
index 00000000..e489c598
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_binary.c
@@ -0,0 +1,1215 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestMacro_Int_Float_Float(const Func *f, MTdata);
+int TestMacro_Int_Double_Double(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+extern "C"
+#endif
+const vtbl _macro_binary = { "macro_binary", TestMacro_Int_Float_Float, TestMacro_Int_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in, __global float* in2)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       float3 f0 = vload3( 0, in + 3 * i );\n"
+        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       int3 i0 = ", name, "( f0, f1 );\n"
+        "       vstore3( i0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       float3 f0, f1;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       int3 i0 = ", name, "( f0, f1 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = i0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = i0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);   }
+
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in, __global double* in2)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+        "       double3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       long3 l0 = ", name, "( f0, f1 );\n"
+        "       vstore3( l0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       double3 f0, f1;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (double3)( in2[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       long3 l0 = ", name, "( f0, f1 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = l0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = l0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f,  -4.0f, -3.5f,
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26),  MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      inBuf2;                             // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    MTdata      d;
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestMacro_Int_Float_Float(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+
+    // Run the kernels
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data  )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    fptr        func = job->f->func;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_int      *t,*r;
+    cl_float    *s,*s2;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_int  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
+    cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        float *fp = (float *)p;
+        float *fp2 = (float *)p2;
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesFloatCount;
+    y = (job_id * buffer_elements) / specialValuesFloatCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            fp[j] = specialValuesFloat[x];
+            fp2[j] = specialValuesFloat[y];
+            if( ++x >= specialValuesFloatCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesFloatCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        p[j] = genrand_int32(d);
+        p2[j] = genrand_int32(d);
+    }
+
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_int *)gOut_Ref  + thread_id * buffer_elements;
+    s = (float *)gIn  + thread_id * buffer_elements;
+    s2 = (float *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = func.i_ff( s[j], s2[j] );
+
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_int *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_int *q = out[0];
+
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
+        {
+            if( ftz )
+            {
+                if( IsFloatSubnormal( s[j])  )
+                {
+                    if( IsFloatSubnormal( s2[j] )  )
+                    {
+                        int correct = func.i_ff( 0.0f, 0.0f );
+                        int correct2 = func.i_ff( 0.0f, -0.0f );
+                        int correct3 = func.i_ff( -0.0f, 0.0f );
+                        int correct4 = func.i_ff( -0.0f, -0.0f );
+
+                        if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                            continue;
+                    }
+                    else
+                    {
+                        int correct = func.i_ff( 0.0f, s2[j] );
+                        int correct2 = func.i_ff( -0.0f, s2[j] );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                else if( IsFloatSubnormal( s2[j] ) )
+                {
+                    int correct = func.i_ff( s[j], 0.0f );
+                    int correct2 = func.i_ff( s[j], -0.0f );
+                    if( correct == q[j] || correct2 == q[j]  )
+                        continue;
+                }
+
+            }
+
+            uint32_t err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, err, ((float*) s)[j], ((float*) s2)[j], t[j], q[j], j );
+            error = -1;
+            goto exit;
+        }
+
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsFloatSubnormal( s[j])  )
+                    {
+                        if( IsFloatSubnormal( s2[j] )  )
+                        {
+                            int correct = -func.i_ff( 0.0f, 0.0f );
+                            int correct2 = -func.i_ff( 0.0f, -0.0f );
+                            int correct3 = -func.i_ff( -0.0f, 0.0f );
+                            int correct4 = -func.i_ff( -0.0f, -0.0f );
+
+                            if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                                continue;
+                        }
+                        else
+                        {
+                            int correct = -func.i_ff( 0.0f, s2[j] );
+                            int correct2 = -func.i_ff( -0.0f, s2[j] );
+                            if( correct == q[j] || correct2 == q[j]  )
+                                continue;
+                        }
+                    }
+                    else if( IsFloatSubnormal( s2[j] ) )
+                    {
+                        int correct = -func.i_ff( s[j], 0.0f );
+                        int correct2 = -func.i_ff( s[j], -0.0f );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+
+                }
+                cl_uint err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x vs. 0x%8.8x (index: %d)\n", name, sizeNames[k], err, ((float*) s)[j], ((float*) s2)[j], -t[j], q[j], j );
+                error = -1;
+                goto exit;
+            }
+        }
+    }
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+exit:
+    return error;
+}
+
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100.,  -4.0, -3.5,
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100.,  +4.0, +3.5,
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54),  MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestMacro_Int_Double_Double(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+         test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 = clCreateSubBuffer( gInBuffer2, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input arrays
+        uint64_t *p = (uint64_t *)gIn;
+        uint64_t *p2 = (uint64_t *)gIn2;
+        for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+        {
+            p[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+            p2[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+        }
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, BUFFER_SIZE, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t      buffer_elements = job->subBufferSize;
+    size_t      buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint     base = job_id * (cl_uint) job->step;
+    ThreadInfo  *tinfo = job->tinfo + thread_id;
+    dptr        dfunc = job->f->dfunc;
+    int         ftz = job->ftz;
+    MTdata      d = tinfo->d;
+    cl_uint     j, k;
+    cl_int      error;
+    const char  *name = job->f->name;
+    cl_long     *t,*r;
+    cl_double   *s,*s2;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_long  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    //Init input array
+    double *p = (double *)gIn + thread_id * buffer_elements;
+    double *p2 = (double *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if( job_id <= (cl_uint)indx )
+    { // test edge cases
+        uint32_t x, y;
+
+    x = (job_id * buffer_elements) % specialValuesDoubleCount;
+    y = (job_id * buffer_elements) / specialValuesDoubleCount;
+
+        for( ; j < buffer_elements; j++ )
+        {
+            p[j] = specialValuesDouble[x];
+            p2[j] = specialValuesDouble[y];
+            if( ++x >= specialValuesDoubleCount )
+            {
+                x = 0;
+                y++;
+                if( y >= specialValuesDoubleCount )
+                    break;
+            }
+        }
+    }
+
+    //Init any remaining values.
+    for( ; j < buffer_elements; j++ )
+    {
+        ((cl_ulong*)p)[j] = genrand_int64(d);
+        ((cl_ulong*)p2)[j] = genrand_int64(d);
+    }
+
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        goto exit;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            goto exit;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            goto exit;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            goto exit;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    r = (cl_long *)gOut_Ref  + thread_id * buffer_elements;
+    s = (cl_double *)gIn  + thread_id * buffer_elements;
+    s2 = (cl_double *)gIn2  + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = dfunc.i_ff( s[j], s2[j] );
+
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            goto exit;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        goto exit;
+    }
+
+    //Verify data
+    t = (cl_long *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_long *q = (cl_long *) out[0];
+
+        // If we aren't getting the correctly rounded result
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
+        {
+            if( ftz )
+            {
+                if( IsDoubleSubnormal( s[j])  )
+                {
+                    if( IsDoubleSubnormal( s2[j] )  )
+                    {
+                        int64_t correct = dfunc.i_ff( 0.0f, 0.0f );
+                        int64_t correct2 = dfunc.i_ff( 0.0f, -0.0f );
+                        int64_t correct3 = dfunc.i_ff( -0.0f, 0.0f );
+                        int64_t correct4 = dfunc.i_ff( -0.0f, -0.0f );
+
+                        if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                            continue;
+                    }
+                    else
+                    {
+                        int64_t correct = dfunc.i_ff( 0.0f, s2[j] );
+                        int64_t correct2 = dfunc.i_ff( -0.0f, s2[j] );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+                }
+                else if( IsDoubleSubnormal( s2[j] ) )
+                {
+                    int64_t correct = dfunc.i_ff( s[j], 0.0f );
+                    int64_t correct2 = dfunc.i_ff( s[j], -0.0f );
+                    if( correct == q[j] || correct2 == q[j]  )
+                        continue;
+                }
+
+            }
+
+            uint64_t err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld  (index: %d)\n", name, err, ((double*) s)[j], ((double*) s2)[j], t[j], q[j], j );
+            error = -1;
+            goto exit;
+        }
+
+
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = (cl_long*) out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsDoubleSubnormal( s[j])  )
+                    {
+                        if( IsDoubleSubnormal( s2[j] )  )
+                        {
+                            int64_t correct = -dfunc.i_ff( 0.0f, 0.0f );
+                            int64_t correct2 = -dfunc.i_ff( 0.0f, -0.0f );
+                            int64_t correct3 = -dfunc.i_ff( -0.0f, 0.0f );
+                            int64_t correct4 = -dfunc.i_ff( -0.0f, -0.0f );
+
+                            if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
+                                continue;
+                        }
+                        else
+                        {
+                            int64_t correct = -dfunc.i_ff( 0.0f, s2[j] );
+                            int64_t correct2 = -dfunc.i_ff( -0.0f, s2[j] );
+                            if( correct == q[j] || correct2 == q[j]  )
+                                continue;
+                        }
+                    }
+                    else if( IsDoubleSubnormal( s2[j] ) )
+                    {
+                        int64_t correct = -dfunc.i_ff( s[j], 0.0f );
+                        int64_t correct2 = -dfunc.i_ff( s[j], -0.0f );
+                        if( correct == q[j] || correct2 == q[j]  )
+                            continue;
+                    }
+
+                }
+
+                uint64_t err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %sD%s: %lld ulp error at {%.13la, %.13la}: *%lld vs. %lld  (index: %d)\n", name, sizeNames[k], err, ((double*) s)[j], ((double*) s2)[j], -t[j], q[j], j );
+                error = -1;
+                goto exit;
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+exit:
+    return error;
+}
+
diff --git a/test_conformance/math_brute_force/macro_unary.c b/test_conformance/math_brute_force/macro_unary.c
new file mode 100644
index 00000000..74e1dece
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_unary.c
@@ -0,0 +1,965 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestMacro_Int_Float(const Func *f, MTdata);
+int TestMacro_Int_Double(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+    extern "C"
+#endif
+const vtbl _macro_unary = { "macro_unary", TestMacro_Int_Float, TestMacro_Int_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 i0 = ", name, "( f0 );\n"
+                            "       vstore3( i0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       int3 i0;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       i0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = i0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = i0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                        "       long3 l0 = ", name, "( d0 );\n"
+                        "       vstore3( l0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 d0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       long3 l0 = ", name, "( d0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = l0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = l0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestMacro_Int_Float(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode )
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gOutBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        cl_uint *p = (cl_uint *)gIn;
+        for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;    // BUFFER_SIZE / vectorSize  rounded up
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    fptr    func = job->f->func;
+    int     ftz = job->ftz;
+    cl_uint j, k;
+    cl_int error = CL_SUCCESS;
+    const char *name = job->f->name;
+
+    int signbit_test = 0;
+    if(!strcmp(name, "signbit"))
+        signbit_test = 1;
+
+    #define ref_func(s) ( signbit_test ? func.i_f_f( s ) : func.i_f( s ) )
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_int  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = base + j * scale;
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
+    float *s = (float *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = ref_func( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+    //Verify data
+    cl_int *t = (cl_int *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_int *q = out[0];
+
+            // If we aren't getting the correctly rounded result
+            if( gMinVectorSizeIndex == 0 && t[j] != q[j])
+            {
+                // If we aren't getting the correctly rounded result
+                if( ftz )
+                {
+                    if( IsFloatSubnormal( s[j]) )
+                    {
+                        int correct = ref_func( +0.0f );
+                        int correct2 = ref_func( -0.0f );
+                        if( correct == q[j] || correct2 == q[j] )
+                            continue;
+                    }
+                }
+
+                uint32_t err = t[j] - q[j];
+                if( q[j] > t[j] )
+                    err = q[j] - t[j];
+                vlog_error( "\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", name,  err, ((float*) s)[j], t[j], q[j] );
+                error = -1;
+                goto exit;
+            }
+
+
+            for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+            {
+                q = out[k];
+                // If we aren't getting the correctly rounded result
+                if( -t[j] != q[j] )
+                {
+                    if( ftz )
+                    {
+                        if( IsFloatSubnormal( s[j]))
+                        {
+                            int correct = -ref_func( +0.0f );
+                            int correct2 = -ref_func( -0.0f );
+                            if( correct == q[j] || correct2 == q[j] )
+                                continue;
+                        }
+                    }
+
+                    uint32_t err = -t[j] - q[j];
+                    if( q[j] > -t[j] )
+                        err = q[j] + t[j];
+                    vlog_error( "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, sizeNames[k], err, ((float*) s)[j], -t[j], q[j] );
+                  error = -1;
+                  goto exit;
+                }
+            }
+        }
+    }
+
+exit:
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data );
+
+int TestMacro_Int_Double(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode )
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        cl_ulong *p = (cl_ulong *)gIn;
+        for( j = 0; j < BUFFER_SIZE / sizeof( cl_double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    dptr    dfunc = job->f->dfunc;
+    cl_uint j, k;
+    cl_int error;
+    int ftz = job->ftz;
+    const char *name = job->f->name;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_long *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_double *p = (cl_double*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = DoubleFromUInt32( base + j * scale);
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
+    cl_double *s = (cl_double *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = dfunc.i_f( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+
+    //Verify data
+    cl_long *t = (cl_long *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        cl_long *q = out[0];
+
+
+        // If we aren't getting the correctly rounded result
+        if( gMinVectorSizeIndex == 0 && t[j] != q[j])
+        {
+            // If we aren't getting the correctly rounded result
+            if( ftz )
+            {
+                if( IsDoubleSubnormal( s[j]) )
+                {
+                    cl_long correct = dfunc.i_f( +0.0f );
+                    cl_long correct2 = dfunc.i_f( -0.0f );
+                    if( correct == q[j] || correct2 == q[j] )
+                        continue;
+                }
+            }
+
+            cl_ulong err = t[j] - q[j];
+            if( q[j] > t[j] )
+                err = q[j] - t[j];
+            vlog_error( "\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n", name,  err, ((double*) gIn)[j], t[j], q[j] );
+            return -1;
+        }
+
+
+        for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if( -t[j] != q[j] )
+            {
+                if( ftz )
+                {
+                    if( IsDoubleSubnormal( s[j]))
+                    {
+                        int64_t correct = -dfunc.i_f( +0.0f );
+                        int64_t correct2 = -dfunc.i_f( -0.0f );
+                        if( correct == q[j] || correct2 == q[j] )
+                            continue;
+                    }
+                }
+
+                cl_ulong err = -t[j] - q[j];
+                if( q[j] > -t[j] )
+                    err = q[j] + t[j];
+                vlog_error( "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n", name, sizeNames[k], err, ((double*) gIn)[j], -t[j], q[j] );
+                return -1;
+            }
+        }
+
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+       if (gVerboseBruteForce)
+       {
+           vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->threadCount);
+       } else
+       {
+          vlog("." );
+       }
+       fflush(stdout);
+    }
+
+    return CL_SUCCESS;
+}
+
+
+
+
diff --git a/test_conformance/math_brute_force/mad.c b/test_conformance/math_brute_force/mad.c
new file mode 100644
index 00000000..844da097
--- /dev/null
+++ b/test_conformance/math_brute_force/mad.c
@@ -0,0 +1,1131 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_mad(const Func *f, MTdata);
+int TestFunc_mad_Double(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+    extern "C"
+#endif
+const vtbl _mad_tbl = { "ternary", TestFunc_mad, TestFunc_mad_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2,  __global float", sizeNames[vectorSize], "* in3 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2, __global float* in3)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+                            "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
+                            "       f0 = ", name, "( f0, f1, f2 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0, f1, f2;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+                            "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, f1, f2 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2,  __global double", sizeNames[vectorSize], "* in3 )\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2, __global double* in3)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 d0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+                            "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
+                            "       d0 = ", name, "( d0, d1, d2 );\n"
+                            "       vstore3( d0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 d0, d1, d2;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+                            "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+                            "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       d0 = ", name, "( d0, d1, d2 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = d0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = d0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_mad(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+//    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+        float *s3 = (float *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_fff( s[j], s2[j], s3[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data  -- Commented out on purpose. no verification possible. MAD is a random number generator.
+/*
+        uint32_t *t = gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = gOut[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = f->func.f_fff( s[j], s2[j], s3[j] );
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= f->float_ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatSubnormal(correct) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsFloatSubnormal( s[j] ) )
+                        { // look at me,
+                            double correct2 = f->func.f_fff( 0.0, s2[j], s3[j] );
+                            double correct3 = f->func.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->func.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->func.f_fff( -0.0, 0.0, s3[j] );
+                                double correct4 = f->func.f_fff( 0.0, -0.0, s3[j] );
+                                double correct5 = f->func.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                if( IsFloatSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->func.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->func.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->func.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->func.f_fff( -0.0, -0.0, 0.0f );
+                                    double correct6 = f->func.f_fff( 0.0, 0.0, -0.0f );
+                                    double correct7 = f->func.f_fff( -0.0, 0.0, -0.0f );
+                                    double correct8 = f->func.f_fff( 0.0, -0.0, -0.0f );
+                                    double correct9 = f->func.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Ulp_Error( test, correct2  );
+                                    err3 = Ulp_Error( test, correct3  );
+                                    err4 = Ulp_Error( test, correct4  );
+                                    err5 = Ulp_Error( test, correct5  );
+                                    float err6 = Ulp_Error( test, correct6  );
+                                    float err7 = Ulp_Error( test, correct7  );
+                                    float err8 = Ulp_Error( test, correct8  );
+                                    float err9 = Ulp_Error( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                     (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)) &&
+                                                     (!(fabsf(err5) <= f->float_ulps)) && (!(fabsf(err6) <= f->float_ulps)) &&
+                                                     (!(fabsf(err7) <= f->float_ulps)) && (!(fabsf(err8) <= f->float_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps )  ||
+                                        IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps )  ||
+                                        IsFloatResultSubnormal( correct6, f->float_ulps ) || IsFloatResultSubnormal(correct7, f->float_ulps )  ||
+                                        IsFloatResultSubnormal(correct8, f->float_ulps ) || IsFloatResultSubnormal( correct9, f->float_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->func.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->func.f_fff( -0.0, s2[j], 0.0 );
+                                double correct4 = f->func.f_fff( 0.0,  s2[j], -0.0 );
+                                double correct5 = f->func.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps )  ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal( s2[j] ) )
+                        {
+                            double correct2 = f->func.f_fff( s[j], 0.0, s3[j] );
+                            double correct3 = f->func.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps )  || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->func.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->func.f_fff( s[j], -0.0, 0.0 );
+                                double correct4 = f->func.f_fff( s[j], 0.0, -0.0 );
+                                double correct5 = f->func.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                float err4 = Ulp_Error( test, correct4  );
+                                float err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)) &&
+                                                 (!(fabsf(err4) <= f->float_ulps)) && (!(fabsf(err5) <= f->float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) ||
+                                    IsFloatResultSubnormal(correct4, f->float_ulps ) || IsFloatResultSubnormal(correct5, f->float_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal(s3[j]) )
+                        {
+                            double correct2 = f->func.f_fff( s[j], s2[j], 0.0 );
+                            double correct3 = f->func.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Ulp_Error( test, correct2  );
+                            float err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->float_ulps)) && (!(fabsf(err3) <= f->float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsFloatResultSubnormal(correct2, f->float_ulps ) || IsFloatResultSubnormal(correct3, f->float_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((float*) gOut_Ref)[j], test );
+ error = -1;
+ goto exit;
+                    }
+                }
+            }
+        }
+*/
+        if( 0 == (i & 0x0fffffff) )
+        {
+            vlog("." );
+            fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "pass" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_mad_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+//    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    double maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    uint64_t step = bufferSize / sizeof( double );
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+        double *s3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data  -- Commented out on purpose. no verification possible. MAD is a random number generator.
+/*
+        uint64_t *t = gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = gOut[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
+                    float err = Ulp_Error_Double( test, correct );
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsDoubleSubnormal( s[j] ) )
+                        { // look at me,
+                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
+                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
+                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
+                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Ulp_Error_Double( test, correct2  );
+                                err3 = Ulp_Error_Double( test, correct3  );
+                                float err4 = Ulp_Error_Double( test, correct4  );
+                                float err5 = Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                if( IsDoubleSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f );
+                                    long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f );
+                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
+                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
+                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Ulp_Error_Double( test, correct2  );
+                                    err3 = Ulp_Error_Double( test, correct3  );
+                                    err4 = Ulp_Error_Double( test, correct4  );
+                                    err5 = Ulp_Error_Double( test, correct5  );
+                                    float err6 = Ulp_Error_Double( test, correct6  );
+                                    float err7 = Ulp_Error_Double( test, correct7  );
+                                    float err8 = Ulp_Error_Double( test, correct8  );
+                                    float err9 = Ulp_Error_Double( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
+                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
+                                                     (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps )  ||
+                                        IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
+                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
+                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Ulp_Error_Double( test, correct2  );
+                                err3 = Ulp_Error_Double( test, correct3  );
+                                float err4 = Ulp_Error_Double( test, correct4  );
+                                float err5 = Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal( s2[j] ) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
+                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps )  || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
+                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
+                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Ulp_Error_Double( test, correct2  );
+                                err3 = Ulp_Error_Double( test, correct3  );
+                                float err4 = Ulp_Error_Double( test, correct4  );
+                                float err5 = Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                    IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal(s3[j]) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
+                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: %f ulp error at {%a, %a, %a}: *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test );
+ error = -1;
+ goto exit;
+                    }
+                }
+            }
+        }
+*/
+        if( 0 == (i & 0x0fffffff) )
+        {
+            vlog("." );
+            fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "pass" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+
diff --git a/test_conformance/math_brute_force/main.c b/test_conformance/math_brute_force/main.c
new file mode 100644
index 00000000..147e3af2
--- /dev/null
+++ b/test_conformance/math_brute_force/main.c
@@ -0,0 +1,1571 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <stdlib.h>
+#include <time.h>
+#include "FunctionList.h"
+#include "Sleep.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/parseParameters.h"
+
+#if defined( __APPLE__ )
+    #include <sys/sysctl.h>
+    #include <sys/mman.h>
+    #include <libgen.h>
+    #include <sys/time.h>
+#elif defined( __linux__ )
+    #include <unistd.h>
+    #include <sys/syscall.h>
+    #include <linux/sysctl.h>
+    #include <sys/param.h>
+#endif
+
+#if defined (__linux__) || (defined WIN32 && defined __MINGW32__)
+#include <sys/param.h>
+#endif
+
+#define kPageSize           4096
+#define DOUBLE_REQUIRED_FEATURES    ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM  )
+
+const char      **gTestNames = NULL;
+unsigned int    gTestNameCount = 0;
+char            appName[ MAXPATHLEN ] = "";
+cl_device_type  gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+cl_device_id    gDevice = NULL;
+cl_context      gContext = NULL;
+cl_command_queue gQueue = NULL;
+int             gTestCount = 0;
+int             gFailCount = 0;
+int32_t         gStartTestNumber = -1;
+int32_t         gEndTestNumber = -1;
+int             gSkipCorrectnessTesting = 0;
+int             gStopOnError = 0;
+#if defined( __APPLE__ )
+int             gMeasureTimes = 1;
+#else
+int             gMeasureTimes = 0;
+#endif
+int             gReportAverageTimes = 0;
+int             gForceFTZ = 0;
+int             gWimpyMode = 0;
+int             gHasDouble = 0;
+int             gTestFloat = 1;
+//This flag should be 'ON' by default and it can be changed through the command line arguments.
+volatile int             gTestFastRelaxed = 1;
+/*This flag corresponds to defining if the implementation has Derived Fast Relaxed functions.
+  The spec does not specify ULP for derived function.  The derived functions are composed of base functions which are tested for ULP, thus when this flag is enabled,
+  Derived functions will not be tested for ULP, as per table 7.1 of OpenCL 2.0 spec.
+  Since there is no way of quering the device whether it is a derived or non-derived implementation according to OpenCL 2.0 spec then it has to be changed through a command line argument.
+*/
+int             gFastRelaxedDerived = 1;
+int             gToggleCorrectlyRoundedDivideSqrt = 0;
+int             gDeviceILogb0 = 1;
+int             gDeviceILogbNaN = 1;
+int             gCheckTininessBeforeRounding = 1;
+int             gIsInRTZMode = 0;
+int             gInfNanSupport = 1;
+int             gIsEmbedded = 0;
+uint32_t        gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
+uint32_t        gMinVectorSizeIndex = 0;
+const char      *method[] = { "Best", "Average" };
+void            *gIn = NULL;
+void            *gIn2 = NULL;
+void            *gIn3 = NULL;
+void            *gOut_Ref = NULL;
+void            *gOut[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
+void            *gOut_Ref2 = NULL;
+void            *gOut2[VECTOR_SIZE_COUNT] = {NULL, NULL, NULL, NULL, NULL, NULL };
+cl_mem          gInBuffer = NULL;
+cl_mem          gInBuffer2 = NULL;
+cl_mem          gInBuffer3 = NULL;
+cl_mem          gOutBuffer[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
+cl_mem          gOutBuffer2[VECTOR_SIZE_COUNT]= {NULL, NULL, NULL, NULL, NULL, NULL };
+uint32_t        gComputeDevices = 0;
+uint32_t        gSimdSize = 1;
+uint32_t        gDeviceFrequency = 0;
+cl_uint         chosen_device_index = 0;
+cl_uint         chosen_platform_index = 0;
+cl_uint         gRandomSeed = 0;
+cl_device_fp_config gFloatCapabilities = 0;
+cl_device_fp_config gDoubleCapabilities = 0;
+int             gWimpyReductionFactor = 32;
+int             gWimpyBufferSize = BUFFER_SIZE;
+int             gVerboseBruteForce = 0;
+#if defined( __APPLE__ )
+int             gHasBasicDouble = 0;
+char*           gBasicDoubleFuncs[] = {
+                    "add",
+                    "assignment",
+                    "divide",
+                    "isequal",
+                    "isgreater",
+                    "isgreaterequal",
+                    "isless",
+                    "islessequal",
+                    "isnotequal",
+                    "multiply",
+                    "sqrt",
+                    "subtract" };
+size_t          gNumBasicDoubleFuncs = sizeof(gBasicDoubleFuncs)/sizeof(char*);
+#endif
+
+
+static int ParseArgs( int argc, const char **argv );
+static void PrintArch( void );
+static void PrintUsage( void );
+static void PrintFunctions( void );
+static int InitCL( void );
+static void ReleaseCL( void );
+static int InitILogbConstants( void );
+static int IsTininessDetectedBeforeRounding( void );
+static int IsInRTZMode( void );         //expensive. Please check gIsInRTZMode global instead.
+static void TestFinishAtExit(void);
+
+#pragma mark -
+
+int main (int argc, const char * argv[])
+{
+    unsigned int i, j, error = 0;
+
+    test_start();
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish ();
+        return -1;
+    }
+    atexit(TestFinishAtExit);
+
+#if defined( __APPLE__ )
+    struct timeval startTime;
+    gettimeofday( &startTime, NULL );
+#endif
+
+    error = ParseArgs( argc, argv );
+    if( error )
+        return error;
+
+    // Init OpenCL
+    error = InitCL();
+    if( error )
+        return error;
+
+    // This takes a while, so prevent the machine from going to sleep.
+    PreventSleep();
+    atexit( ResumeSleep );
+
+    if( gSkipCorrectnessTesting )
+        vlog( "*** Skipping correctness testing! ***\n\n" );
+    else if( gStopOnError )
+        vlog( "Stopping at first error.\n" );
+
+    if( gMeasureTimes )
+    {
+        vlog( "%s times are reported at right (cycles per element):\n", method[gReportAverageTimes] );
+        vlog( "\n" );
+        if( gSkipCorrectnessTesting )
+            vlog( "   \t               ");
+        else
+            vlog( "   \t                                        ");
+        if( gWimpyMode )
+            vlog( "   " );
+        for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+            vlog( "\t  float%s", sizeNames[i] );
+    }
+    else
+    {
+        vlog( "   \t                                        ");
+        if( gWimpyMode )
+            vlog( "   " );
+    }
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t  max_ulps" );
+
+    vlog( "\n-----------------------------------------------------------------------------------------------------------\n" );
+
+    uint32_t start = 0;
+    if( gStartTestNumber > (int) start )
+    {
+        vlog( "Skipping to test %d...\n", gStartTestNumber );
+        start = gStartTestNumber;
+    }
+
+    uint32_t stop = (uint32_t) functionListCount;
+    MTdata d = init_genrand( gRandomSeed );
+    if( gStartTestNumber <= gEndTestNumber && -1 != gEndTestNumber && (int) functionListCount > gEndTestNumber + 1)
+        stop = gEndTestNumber + 1;
+
+    FPU_mode_type oldMode;
+    DisableFTZ( &oldMode );
+
+    for( i = start; i < stop; i++ )
+    {
+        const Func *f = functionList + i;
+
+        // If the user passed a list of functions to run, make sure we are in that list
+        if( gTestNameCount )
+        {
+            for( j = 0; j < gTestNameCount; j++ )
+                if( 0 == strcmp(gTestNames[j], f->name ) )
+                    break;
+
+            // If this function doesn't match any on the list skip to the next function
+            if( j == gTestNameCount )
+                continue;
+        }
+
+        // if correctly rounded divide & sqrt are supported by the implementation
+        // then test it; otherwise skip the test
+        if (!strcmp(f->name, "sqrt_cr") || !strcmp(f->name, "divide_cr"))
+        {
+            if(( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT ) == 0 )
+                continue;
+
+        }
+
+
+        {
+            extern int my_ilogb(double);
+            if( 0 == strcmp( "ilogb", f->name) )
+                InitILogbConstants();
+
+            if ( gTestFastRelaxed )
+            {
+                if( f->relaxed )
+                {
+                    gTestCount++;
+                    vlog( "%3d: ", gTestCount );
+                    if( f->vtbl->TestFunc( f, d )  )
+                    {
+                        gFailCount++;
+                        error++;
+                        if( gStopOnError )
+                            break;
+                    }
+                }
+                }
+
+            if( gTestFloat )
+            {
+                int testFastRelaxedTmp = gTestFastRelaxed;
+                gTestFastRelaxed = 0;
+                gTestCount++;
+                vlog( "%3d: ", gTestCount );
+                if( f->vtbl->TestFunc( f, d )  )
+                {
+                    gFailCount++;
+                    error++;
+                    if( gStopOnError )
+                    {
+                        gTestFastRelaxed = testFastRelaxedTmp;
+                        break;
+                    }
+                }
+                gTestFastRelaxed = testFastRelaxedTmp;
+            }
+
+            if( gHasDouble && NULL != f->vtbl->DoubleTestFunc && NULL != f->dfunc.p )
+            {
+                //Disable fast-relaxed-math for double precision floating-point
+                int testFastRelaxedTmp = gTestFastRelaxed;
+                gTestFastRelaxed = 0;
+
+                gTestCount++;
+                vlog( "%3d: ", gTestCount );
+                if( f->vtbl->DoubleTestFunc( f, d )  )
+                {
+                    gFailCount++;
+                    error++;
+                    if( gStopOnError )
+                        break;
+                }
+
+                //Re-enable testing fast-relaxed-math mode
+                gTestFastRelaxed = testFastRelaxedTmp;
+            }
+
+#if defined( __APPLE__ )
+            {
+                if( gHasBasicDouble && NULL != f->vtbl->DoubleTestFunc && NULL != f->dfunc.p)
+                {
+                    //Disable fast-relaxed-math for double precision floating-point
+                    int testFastRelaxedTmp = gTestFastRelaxed;
+                    gTestFastRelaxed = 0;
+
+                    int isBasicTest = 0;
+                    for( j = 0; j < gNumBasicDoubleFuncs; j++ ) {
+                        if( 0 == strcmp(gBasicDoubleFuncs[j], f->name ) ) {
+                            isBasicTest = 1;
+                            break;
+                        }
+                    }
+                    if (isBasicTest) {
+                        gTestCount++;
+                        if( gTestFloat )
+                            vlog( "    " );
+                        if( f->vtbl->DoubleTestFunc( f, d )  )
+                        {
+                            gFailCount++;
+                            error++;
+                            if( gStopOnError )
+                                break;
+                        }
+                    }
+
+                    //Re-enable testing fast-relaxed-math mode
+                    gTestFastRelaxed = testFastRelaxedTmp;
+                }
+            }
+#endif
+        }
+    }
+
+    RestoreFPState( &oldMode );
+
+    free_mtdata(d); d = NULL;
+    vlog( "\ndone.\n" );
+
+    int error_code = clFinish(gQueue);
+    if (error_code)
+        vlog_error("clFinish failed:%d\n", error_code);
+
+    if (gFailCount == 0)
+    {
+        if (gTestCount > 1)
+            vlog("PASSED %d of %d tests.\n", gTestCount, gTestCount);
+        else
+            vlog("PASSED test.\n");
+    }
+    else if (gFailCount > 0)
+    {
+        if (gTestCount > 1)
+            vlog_error("FAILED %d of %d tests.\n", gFailCount, gTestCount);
+        else
+            vlog_error("FAILED test.\n");
+    }
+
+    ReleaseCL();
+
+#if defined( __APPLE__ )
+    struct timeval endTime;
+    gettimeofday( &endTime, NULL );
+    double time = (double) endTime.tv_sec - (double) startTime.tv_sec;
+    time += 1e-6 * ((double) endTime.tv_usec - (double) startTime.tv_usec);
+    vlog( "time: %f s\n", time );
+#endif
+
+
+    if (gFailCount > 0)
+        return -1;
+    return error;
+}
+
+static int ParseArgs( int argc, const char **argv )
+{
+    int i;
+    gTestNames = (const char**) calloc( argc - 1, sizeof( char*) );
+    gTestNameCount = 0;
+    int singleThreaded = 0;
+
+    // Parse arg list
+    if( NULL == gTestNames && argc > 1 )
+        return -1;
+
+    { // Extract the app name
+        strncpy( appName, argv[0], MAXPATHLEN );
+
+#if defined( __APPLE__ )
+        char baseName[MAXPATHLEN];
+        char *base = NULL;
+        strncpy( baseName, argv[0], MAXPATHLEN );
+        base = basename( baseName );
+        if( NULL != base )
+        {
+            strncpy( appName, base, sizeof( appName )  );
+            appName[ sizeof( appName ) -1 ] = '\0';
+        }
+#endif
+    }
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+      if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+        gDeviceType = CL_DEVICE_TYPE_GPU;
+      else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+        gDeviceType = CL_DEVICE_TYPE_CPU;
+      else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+        gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+      else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+        gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+      else
+      {
+         vlog_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
+         abort();
+      }
+    }
+
+
+    vlog( "\n%s\t", appName );
+    for( i = 1; i < argc; i++ )
+    {
+        const char *arg = argv[i];
+        if( NULL == arg )
+            break;
+
+        vlog( "\t%s", arg );
+        int optionFound = 0;
+        if( arg[0] == '-' )
+        {
+            while( arg[1] != '\0' )
+            {
+                arg++;
+                optionFound = 1;
+                switch( *arg )
+                {
+                    case 'a':
+                        gReportAverageTimes ^= 1;
+                        break;
+
+                    case 'c':
+                        gToggleCorrectlyRoundedDivideSqrt ^= 1;
+                        break;
+
+                    case 'd':
+                        gHasDouble ^= 1;
+                        break;
+
+                    case 'e':
+                        gFastRelaxedDerived ^= 1;
+                        break;
+
+                    case 'f':
+                        gTestFloat ^= 1;
+                        break;
+
+                    case 'h':
+                        PrintUsage();
+                        return -1;
+
+                    case 'p':
+                      PrintFunctions();
+                      return -1;
+
+                    case 'l':
+                        gSkipCorrectnessTesting ^= 1;
+                        break;
+
+                    case 'm':
+                        singleThreaded ^= 1;
+                        break;
+
+                    case 'r':
+                        gTestFastRelaxed ^= 1;
+                        break;
+
+                    case 's':
+                        gStopOnError ^= 1;
+                        break;
+
+                    case 't':
+                        gMeasureTimes ^= 1;
+                        break;
+
+                    case 'v':
+                        gVerboseBruteForce ^= 1;
+                        break;
+
+                    case 'w':   // wimpy mode
+                        gWimpyMode ^= 1;
+                        break;
+
+                    case '[':
+                        // wimpy reduction factor can be set with the option -[2^n]
+                        // Default factor is 32, and n practically can be from 1 to 10
+                        {
+                            const char *arg_temp = strchr(&arg[1], ']');
+                            if( arg_temp != 0)
+                            {
+                                int new_factor = atoi(&arg[1]);
+                                arg=arg_temp; // Advance until ']'
+                                if(new_factor && !(new_factor & (new_factor - 1)))
+                                {
+                                    vlog( " WimpyReduction factor changed from %d to %d \n",gWimpyReductionFactor, new_factor);
+                                    gWimpyReductionFactor = new_factor;
+                                }else
+                                {
+                                     vlog( " Error in WimpyReduction factor %d, must be power of 2 \n",gWimpyReductionFactor);
+                                }
+                            }
+                        }
+                        break;
+
+                    case 'z':
+                        gForceFTZ ^= 1;
+                        break;
+
+                    case '1':
+                        if( arg[1] == '6' )
+                        {
+                            gMinVectorSizeIndex = 5;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            arg++;
+                        }
+                        else
+                        {
+                            gMinVectorSizeIndex = 0;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                        }
+                        break;
+                    case '2':
+                            gMinVectorSizeIndex = 1;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
+                    case '3':
+                            gMinVectorSizeIndex = 2;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
+                    case '4':
+                            gMinVectorSizeIndex = 3;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
+                    case '8':
+                            gMinVectorSizeIndex = 4;
+                            gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
+                            break;
+                        break;
+
+                    default:
+                        vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
+                        PrintUsage();
+                        return -1;
+                }
+            }
+        }
+
+        // Check if a particular device id was requested
+        if (strlen(argv[i]) >= 3 && argv[i][0] == 'i' && argv[i][1] =='d')
+        {
+          chosen_device_index = atoi(&(argv[i][2]));
+          optionFound = 1;
+        }
+
+        // Check if a particular platform was requested
+        if (strlen(argv[i]) >= 3 && argv[i][0] == 'p' && argv[i][1] =='l')
+        {
+          chosen_platform_index = atoi(&(argv[i][2]));
+          optionFound = 1;
+        }
+
+        if( ! optionFound )
+        {
+            char *t = NULL;
+            long number = strtol( arg, &t, 0 );
+            if( t != arg )
+            {
+                if( -1 == gStartTestNumber )
+                    gStartTestNumber = (int32_t) number;
+                else
+                    gEndTestNumber = gStartTestNumber + (int32_t) number;
+            }
+            else
+            {
+                // Make sure this is a valid name
+                unsigned int k;
+                for (k=0; k<functionListCount; k++)
+                {
+                    const Func *f = functionList+k;
+                    if (strcmp(arg, f->name) == 0)
+                    {
+                        gTestNames[ gTestNameCount ] = arg;
+                        gTestNameCount++;
+                        break;
+                    }
+                }
+                // If we didn't find it in the list of test names
+                if (k >= functionListCount)
+                {
+                    //It may be a device type or rundomize parameter
+                  if( 0 == strcmp(arg, "CL_DEVICE_TYPE_CPU")) {
+                        gDeviceType = CL_DEVICE_TYPE_CPU;
+                } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_GPU")) {
+                        gDeviceType = CL_DEVICE_TYPE_GPU;
+                } else if( 0 == strcmp(arg, "CL_DEVICE_TYPE_ACCELERATOR")) {
+                        gDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+                } else if( 0 == strcmp(arg, "randomize")) {
+                        gRandomSeed = (cl_uint) time( NULL );
+                        vlog( "\nRandom seed: %u.\n", gRandomSeed );
+                } else {
+                        vlog_error("\nInvalid function name: %s\n", arg);
+                              test_finish();
+                              exit(-1);
+                    }
+                }
+            }
+        }
+    }
+
+    // Check for the wimpy mode environment variable
+    if (getenv("CL_WIMPY_MODE")) {
+      vlog( "\n" );
+      vlog( "*** Detected CL_WIMPY_MODE env                          ***\n" );
+      gWimpyMode = 1;
+    }
+
+#if defined( __APPLE__ )
+    #if defined( __i386__ ) || defined( __x86_64__ )
+        #define    kHasSSE3                0x00000008
+        #define kHasSupplementalSSE3    0x00000100
+        #define    kHasSSE4_1              0x00000400
+        #define    kHasSSE4_2              0x00000800
+        /* check our environment for a hint to disable SSE variants */
+        {
+            const char *env = getenv( "CL_MAX_SSE" );
+            if( env )
+            {
+                extern int _cpu_capabilities;
+                int mask = 0;
+                if( 0 == strcasecmp( env, "SSE4.1" ) )
+                    mask = kHasSSE4_2;
+                else if( 0 == strcasecmp( env, "SSSE3" ) )
+                    mask = kHasSSE4_2 | kHasSSE4_1;
+                else if( 0 == strcasecmp( env, "SSE3" ) )
+                    mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
+                else if( 0 == strcasecmp( env, "SSE2" ) )
+                    mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
+                else
+                {
+                    vlog_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
+                    return -2;
+                }
+
+                vlog( "*** Environment: CL_MAX_SSE = %s ***\n", env );
+                _cpu_capabilities &= ~mask;
+            }
+        }
+    #endif
+#endif   /* __APPLE__ */
+
+    vlog( "\nTest binary built %s %s\n", __DATE__, __TIME__ );
+
+    PrintArch();
+
+    if( gWimpyMode )
+    {
+        vlog( "\n" );
+        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
+        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
+        vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor );
+    }
+
+    if( singleThreaded )
+        SetThreadCount(1);
+
+    return 0;
+}
+
+static void PrintArch( void )
+{
+    vlog( "\nHost info:\n" );
+    vlog( "\tsizeof( void*) = %zd\n", sizeof( void *) );
+    #if defined( __ppc__ )
+        vlog( "\tARCH:\tppc\n" );
+    #elif defined( __ppc64__ )
+        vlog( "\tARCH:\tppc64\n" );
+    #elif defined( __PPC__ )
+                vlog( "ARCH:\tppc\n" );
+    #elif defined( __i386__ )
+        vlog( "\tARCH:\ti386\n" );
+    #elif defined( __x86_64__ )
+        vlog( "\tARCH:\tx86_64\n" );
+    #elif defined( __arm__ )
+        vlog( "\tARCH:\tarm\n" );
+    #else
+        vlog( "\tARCH:\tunknown\n" );
+    #endif
+
+#if defined( __APPLE__ )
+    int type = 0;
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    vlog( "\tcpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    vlog( "\tcpu subtype:\t%d\n", type );
+
+#elif defined( __linux__ ) && !defined(__aarch64__)
+        int _sysctl(struct __sysctl_args *args );
+        #define OSNAMESZ 100
+
+        struct __sysctl_args args;
+        char osname[OSNAMESZ];
+        size_t osnamelth;
+        int name[] = { CTL_KERN, KERN_OSTYPE };
+        memset(&args, 0, sizeof(struct __sysctl_args));
+        args.name = name;
+        args.nlen = sizeof(name)/sizeof(name[0]);
+        args.oldval = osname;
+        args.oldlenp = &osnamelth;
+
+        osnamelth = sizeof(osname);
+
+        if (syscall(SYS__sysctl, &args) == -1) {
+           vlog( "_sysctl error\n" );
+        }
+        else {
+           vlog("this machine is running %*s\n", osnamelth, osname);
+        }
+
+
+#endif
+}
+
+static void PrintFunctions ( void )
+{
+  vlog( "\nMath function names:\n" );
+  for( int i = 0; i < functionListCount; i++ )
+  {
+    vlog( "\t%s\n", functionList[ i ].name );
+  }
+}
+
+static void PrintUsage( void )
+{
+    vlog( "%s [-acglstz]: <optional: math function names>\n", appName );
+    vlog( "\toptions:\n" );
+    vlog( "\t\t-a\tReport average times instead of best times\n" );
+    vlog( "\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: off)\n");
+    vlog( "\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 on)\n" );
+    vlog( "\t\t-f\tToggle float precision testing. (Default: on)\n" );
+    vlog( "\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n" );
+    vlog( "\t\t-e\tToggle test as derived implementations for fast relaxed math precision. (Default: on)\n" );
+    vlog( "\t\t-h\tPrint this message and quit\n" );
+    vlog( "\t\t-p\tPrint all math function names and quit\n" );
+    vlog( "\t\t-l\tlink check only (make sure functions are present, skip accuracy checks.)\n" );
+    vlog( "\t\t-m\tToggle run multi-threaded. (Default: on) )\n" );
+    vlog( "\t\t-s\tStop on error\n" );
+    vlog( "\t\t-t\tToggle timing  (on by default)\n" );
+    vlog( "\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
+    vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-10, default factor(%u)\n",gWimpyReductionFactor );
+    vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
+    vlog( "\t\t-v\tToggle Verbosity (Default: off)\n ");
+    vlog( "\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, \"-16\" tests 16-wide vectors only.\n" );
+    vlog( "\n\tYou may also pass a number instead of a function name.\n" );
+    vlog( "\tThis causes the first N tests to be skipped. The tests are numbered.\n" );
+    vlog( "\tIf you pass a second number, that is the number tests to run after the first one.\n" );
+    vlog( "\tA name list may be used in conjunction with a number range. In that case,\n" );
+    vlog( "\tonly the named cases in the number range will run.\n" );
+    vlog( "\tYou may also choose to pass no arguments, in which case all tests will be run.\n" );
+    vlog( "\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the device.\n" );
+    vlog( "\n" );
+}
+
+static void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    vlog( "%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data );
+}
+
+static int InitCL( void )
+{
+    int error;
+    uint32_t i;
+    int isEmbedded = 0;
+    size_t configSize = sizeof( gComputeDevices );
+
+    cl_uint            num_devices = 0;
+    cl_platform_id     platform = NULL;
+    cl_device_id       *devices = NULL;
+
+    /* Get the platform */
+    cl_uint num_entries = 0;
+    error = clGetPlatformIDs(0, NULL, &num_entries);
+    if (error) {
+      vlog_error( "clGetPlatformIDs failed: %d\n", error );
+      return error;
+    }
+
+    cl_platform_id* pPlatforms = (cl_platform_id*) alloca(num_entries * sizeof(cl_platform_id));
+
+    /* Get the platform */
+    error = clGetPlatformIDs(num_entries, pPlatforms, NULL);
+    if (error) {
+      vlog_error( "clGetPlatformIDs failed: %d\n", error );
+      return error;
+    }
+
+    //Choose platform
+    platform = pPlatforms[chosen_platform_index];
+
+    /* Get the number of requested devices */
+    error = clGetDeviceIDs(platform,  gDeviceType, 0, NULL, &num_devices );
+    if (error) {
+      vlog_error( "clGetDeviceIDs failed: %d\n", error );
+      return error;
+    }
+
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if (!devices || chosen_device_index >= num_devices) {
+      vlog_error( "device index out of range -- chosen_device_index (%d) >= num_devices (%d)\n", chosen_device_index, num_devices );
+      return -1;
+    }
+
+    /* Get the requested device */
+    error = clGetDeviceIDs(platform,  gDeviceType, num_devices, devices, NULL );
+    if (error) {
+      vlog_error( "clGetDeviceIDs failed: %d\n", error );
+      return error;
+    }
+
+    gDevice = devices[chosen_device_index];
+    free(devices);
+    devices = NULL;
+
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
+        gComputeDevices = 1;
+
+    // Check extensions
+    size_t extSize = 0;
+    if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, 0, NULL, &extSize)))
+    {   vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
+    else
+    {
+        char *ext = (char*) malloc( extSize );
+        if( NULL == ext )
+        { vlog_error( "malloc failed at %s:%d\nUnable to determine if double present.\n", __FILE__, __LINE__ ); }
+        else
+        {
+            if((error = clGetDeviceInfo( gDevice, CL_DEVICE_EXTENSIONS, extSize, ext, NULL)))
+            {    vlog_error( "Unable to get device extension string to see if double present. (%d) \n", error ); }
+            else
+            {
+                if( strstr( ext, "cl_khr_fp64" ))
+                {
+                    gHasDouble ^= 1;
+
+#if defined( CL_DEVICE_DOUBLE_FP_CONFIG )
+                    if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(gDoubleCapabilities), &gDoubleCapabilities, NULL)))
+                    {
+                        vlog_error( "ERROR: Unable to get device CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", error );
+                        return -1;
+                    }
+
+                    if( DOUBLE_REQUIRED_FEATURES != (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES) )
+                    {
+                        char list[300] = "";
+                        if( 0 == (gDoubleCapabilities & CL_FP_FMA) )
+                            strncat( list, "CL_FP_FMA, ", sizeof( list )-1 );
+                        if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST) )
+                            strncat( list, "CL_FP_ROUND_TO_NEAREST, ", sizeof( list )-1 );
+                        if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO) )
+                            strncat( list, "CL_FP_ROUND_TO_ZERO, ", sizeof( list )-1 );
+                        if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF) )
+                            strncat( list, "CL_FP_ROUND_TO_INF, ", sizeof( list )-1 );
+                        if( 0 == (gDoubleCapabilities & CL_FP_INF_NAN) )
+                            strncat( list, "CL_FP_INF_NAN, ", sizeof( list )-1 );
+                        if( 0 == (gDoubleCapabilities & CL_FP_DENORM) )
+                            strncat( list, "CL_FP_DENORM, ", sizeof( list )-1 );
+                        vlog_error( "ERROR: required double features are missing: %s\n", list );
+
+                        free(ext);
+                        return -1;
+                    }
+#else
+                    vlog_error( "FAIL: device says it supports cl_khr_fp64 but CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n" );
+                    return -1;
+#endif
+                }
+#if defined( __APPLE__ )
+                else if( strstr( ext, "cl_APPLE_fp64_basic_ops" ))
+                {
+                    gHasBasicDouble ^= 1;
+
+#if defined( CL_DEVICE_DOUBLE_FP_CONFIG )
+                    if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(gDoubleCapabilities), &gDoubleCapabilities, NULL)))
+                    {
+                        vlog_error( "ERROR: Unable to get device CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n", error );
+                        return -1;
+                    }
+
+                    if( DOUBLE_REQUIRED_FEATURES != (gDoubleCapabilities & DOUBLE_REQUIRED_FEATURES) )
+                    {
+                        char list[300] = "";
+                        if( 0 == (gDoubleCapabilities & CL_FP_FMA) )
+                            strncat( list, "CL_FP_FMA, ", sizeof( list ) );
+                        if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_NEAREST) )
+                            strncat( list, "CL_FP_ROUND_TO_NEAREST, ", sizeof( list ) );
+                        if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_ZERO) )
+                            strncat( list, "CL_FP_ROUND_TO_ZERO, ", sizeof( list ) );
+                        if( 0 == (gDoubleCapabilities & CL_FP_ROUND_TO_INF) )
+                            strncat( list, "CL_FP_ROUND_TO_INF, ", sizeof( list ) );
+                        if( 0 == (gDoubleCapabilities & CL_FP_INF_NAN) )
+                            strncat( list, "CL_FP_INF_NAN, ", sizeof( list ) );
+                        if( 0 == (gDoubleCapabilities & CL_FP_DENORM) )
+                            strncat( list, "CL_FP_DENORM, ", sizeof( list ) );
+                        vlog_error( "ERROR: required double features are missing: %s\n", list );
+
+                        free(ext);
+                        return -1;
+                    }
+#else
+                    vlog_error( "FAIL: device says it supports cl_khr_fp64 but CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n" );
+                    return -1;
+#endif
+                }
+#endif /* __APPLE__ */
+            }
+            free(ext);
+        }
+    }
+
+    configSize = sizeof( gDeviceFrequency );
+    if( (error = clGetDeviceInfo( gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
+        gDeviceFrequency = 0;
+
+    if( (error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(gFloatCapabilities), &gFloatCapabilities, NULL)))
+    {
+        vlog_error( "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n", error );
+        return -1;
+    }
+
+    char profile[1024] = "";
+    if( (error = clGetDeviceInfo(gDevice,  CL_DEVICE_PROFILE, sizeof( profile), profile, NULL)))
+    {   vlog_error( "FAILED -- Unable to read device profile\n" ); abort(); }
+    else
+        isEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE"); // we will verify this with a kernel below
+
+    gContext = clCreateContext( NULL, 1, &gDevice, notify_callback, NULL, &error );
+    if( NULL == gContext || error )
+    {
+        vlog_error( "clCreateContext failed. (%d) \n", error );
+        return -1;
+    }
+
+    gQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+    if( NULL == gQueue || error )
+    {
+        vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+        return -2;
+    }
+
+#if defined( __APPLE__ )
+    // FIXME: use clProtectedArray
+#endif
+    //Allocate buffers
+    cl_uint min_alignment = 0;
+    error = clGetDeviceInfo (gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), (void*)&min_alignment, NULL);
+    if (CL_SUCCESS != error)
+    {
+        vlog_error( "clGetDeviceInfo failed. (%d)\n", error );
+        return -2;
+    }
+    min_alignment >>= 3;    // convert bits to bytes
+
+    gIn   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn )
+        return -3;
+    gIn2   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn2 )
+        return -3;
+    gIn3   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gIn3 )
+        return -3;
+    gOut_Ref   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gOut_Ref )
+        return -3;
+    gOut_Ref2   = align_malloc( BUFFER_SIZE, min_alignment );
+    if( NULL == gOut_Ref2 )
+        return -3;
+
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        gOut[i] = align_malloc( BUFFER_SIZE, min_alignment );
+        if( NULL == gOut[i] )
+            return -7 + i;
+        gOut2[i] = align_malloc( BUFFER_SIZE, min_alignment );
+        if( NULL == gOut2[i] )
+            return -7 + i;
+    }
+
+    cl_mem_flags device_flags = CL_MEM_READ_ONLY;
+    // save a copy on the host device to make this go faster
+    if( CL_DEVICE_TYPE_CPU == gDeviceType )
+        device_flags |= CL_MEM_USE_HOST_PTR;
+      else
+          device_flags |= CL_MEM_COPY_HOST_PTR;
+
+    // setup input buffers
+    gInBuffer = clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
+    if( gInBuffer == NULL || error )
+    {
+        vlog_error( "clCreateBuffer1 failed for input (%d)\n", error );
+        return -4;
+    }
+
+    gInBuffer2 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn2, &error );
+    if( gInBuffer2 == NULL || error )
+    {
+        vlog_error( "clCreateArray2 failed for input (%d)\n" , error );
+        return -4;
+    }
+
+    gInBuffer3 = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gIn3, &error );
+    if( gInBuffer3 == NULL  || error)
+    {
+        vlog_error( "clCreateArray3 failed for input (%d)\n", error );
+        return -4;
+    }
+
+
+    // setup output buffers
+    device_flags = CL_MEM_READ_WRITE;
+    // save a copy on the host device to make this go faster
+    if( CL_DEVICE_TYPE_CPU == gDeviceType )
+        device_flags |= CL_MEM_USE_HOST_PTR;
+      else
+          device_flags |= CL_MEM_COPY_HOST_PTR;
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        gOutBuffer[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut[i], &error );
+        if( gOutBuffer[i] == NULL || error )
+        {
+            vlog_error( "clCreateArray failed for output (%d)\n", error  );
+            return -5;
+        }
+        gOutBuffer2[i] = clCreateBuffer( gContext, device_flags, BUFFER_SIZE, gOut2[i], &error );
+        if( gOutBuffer2[i] == NULL || error)
+        {
+            vlog_error( "clCreateArray2 failed for output (%d)\n", error );
+            return -5;
+        }
+    }
+
+    // we are embedded, check current rounding mode
+    if( isEmbedded )
+    {
+        gIsInRTZMode = IsInRTZMode();
+        if (0 == (gFloatCapabilities & CL_FP_INF_NAN) )
+             gInfNanSupport = 0;
+
+        // ensures embedded single precision ulp values are used
+        gIsEmbedded = 1;
+    }
+
+    //Check tininess detection
+    IsTininessDetectedBeforeRounding();
+
+
+    char c[1024];
+    static const char *no_yes[] = { "NO", "YES" };
+    vlog( "\nCompute Device info:\n" );
+    clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
+    vlog( "\tPlatform Version: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
+    vlog( "\tDevice Name: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
+    vlog( "\tVendor: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
+    vlog( "\tDevice Version: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
+    vlog( "\tCL C Version: %s\n", c );
+    clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
+    vlog( "\tDriver Version: %s\n", c );
+    vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
+    vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)] );
+    vlog( "\tCorrectly rounded divide and sqrt supported for floats? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
+    if( gToggleCorrectlyRoundedDivideSqrt )
+    {
+        gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
+    }
+    vlog( "\tTesting with correctly rounded float divide and sqrt? %s\n", no_yes[0 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)] );
+    vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)] );
+    vlog( "\tTesting single precision? %s\n", no_yes[0 != gTestFloat] );
+    vlog( "\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed] );
+    if(gTestFastRelaxed)
+    {
+      vlog( "\tFast relaxed math has derived implementations? %s\n", no_yes[0 != gFastRelaxedDerived] );
+    }
+    vlog( "\tTesting double precision? %s\n", no_yes[0 != gHasDouble] );
+    if( sizeof( long double) == sizeof( double ) && gHasDouble )
+    {
+        vlog( "\n\t\tWARNING: Host system long double does not have better precision than double!\n" );
+        vlog( "\t\t         All double results that do not match the reference result have their reported\n" );
+        vlog( "\t\t         error inflated by 0.5 ulps to account for the fact that this system\n" );
+        vlog( "\t\t         can not accurately represent the right result to an accuracy closer\n" );
+        vlog( "\t\t         than half an ulp. See comments in Ulp_Error_Double() for more details.\n\n" );
+    }
+#if defined( __APPLE__ )
+    vlog( "\tTesting basic double precision? %s\n", no_yes[0 != gHasBasicDouble] );
+#endif
+
+    vlog( "\tIs Embedded? %s\n", no_yes[0 != isEmbedded] );
+    if( isEmbedded )
+        vlog( "\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode] );
+    vlog( "\tTininess is detected before rounding? %s\n", no_yes[0 != gCheckTininessBeforeRounding] );
+    vlog( "\tWorker threads: %d\n", GetThreadCount() );
+    vlog( "\tTesting vector sizes:" );
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        vlog( "\t%d", sizeValues[i] );
+
+    vlog("\n");
+    vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
+    vlog( "\n\n" );
+
+    // Check to see if we are using single threaded mode on other than a 1.0 device
+    if (getenv( "CL_TEST_SINGLE_THREADED" )) {
+
+      char device_version[1024] = { 0 };
+      clGetDeviceInfo( gDevice, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
+
+      if (strcmp("OpenCL 1.0 ",device_version)) {
+        vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
+      }
+    }
+
+    return 0;
+}
+
+static void ReleaseCL( void )
+{
+    uint32_t i;
+    clReleaseMemObject(gInBuffer);
+    clReleaseMemObject(gInBuffer2);
+    clReleaseMemObject(gInBuffer3);
+    for ( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) {
+        clReleaseMemObject(gOutBuffer[i]);
+        clReleaseMemObject(gOutBuffer2[i]);
+    }
+    clReleaseCommandQueue(gQueue);
+    clReleaseContext(gContext);
+
+    align_free(gIn);
+    align_free(gIn2);
+    align_free(gIn3);
+    align_free(gOut_Ref);
+    align_free(gOut_Ref2);
+
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        align_free(gOut[i]);
+        align_free(gOut2[i]);
+    }
+}
+
+void _LogBuildError( cl_program p, int line, const char *file )
+{
+    char the_log[2048] = "";
+
+    vlog_error( "%s:%d: Build Log:\n", file, line );
+    if( 0 == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(the_log), the_log, NULL) )
+        vlog_error( "%s", the_log );
+    else
+        vlog_error( "*** Error getting build log for program %p\n", p );
+}
+
+int InitILogbConstants( void )
+{
+    int error;
+    const char *kernel =
+    "__kernel void GetILogBConstants( __global int *out )\n"
+    "{\n"
+    "   out[0] = FP_ILOGB0;\n"
+    "   out[1] = FP_ILOGBNAN;\n"
+    "}\n";
+
+    cl_program query;
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error("Error: Unable to build program to get FP_ILOGB0 and FP_ILOGBNAN for the device.\n");
+        return -1;
+    }
+
+    cl_kernel k = clCreateKernel( query, "GetILogBConstants", &error );
+    if( NULL == k || error)
+    {
+      vlog_error( "Error: Unable to create kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
+        return error;
+    }
+
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
+        return error;
+    }
+
+    size_t dim = 1;
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: Unable to execute kernel to get FP_ILOGB0 and FP_ILOGBNAN for the device. Err = %d", error );
+        return error;
+    }
+
+    struct{ cl_int ilogb0, ilogbnan; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
+    {
+        vlog_error( "Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the device. Err = %d", error );
+        return error;
+    }
+
+    gDeviceILogb0 = data.ilogb0;
+    gDeviceILogbNaN = data.ilogbnan;
+
+    clReleaseKernel(k);
+    clReleaseProgram(query);
+
+    return 0;
+}
+
+int IsTininessDetectedBeforeRounding( void )
+{
+    int error;
+    const char *kernel =
+    "__kernel void IsTininessDetectedBeforeRounding( __global float *out )\n"
+    "{\n"
+    "   volatile float a = 0x1.000002p-126f;\n"
+    "   volatile float b = 0x1.fffffcp-1f;\n"       // product is 0x1.fffffffffff8p-127
+    "   out[0] = a * b;\n"
+    "}\n";
+
+    cl_program query;
+
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error("Error: Unable to build program to detect how tininess is detected  for the device.\n");
+        return -1;
+    }
+
+    cl_kernel k = clCreateKernel( query, "IsTininessDetectedBeforeRounding", &error );
+    if( NULL == k || error)
+    {
+      vlog_error( "Error: Unable to create kernel to detect how tininess is detected  for the device. Err = %d", error );
+        return error;
+    }
+
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to detect how tininess is detected  for the device. Err = %d", error );
+        return error;
+    }
+
+    size_t dim = 1;
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: Unable to execute kernel to detect how tininess is detected  for the device. Err = %d", error );
+        return error;
+    }
+
+    struct{ cl_uint f; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
+    {
+        vlog_error( "Error: unable to read result from tininess test from the device. Err = %d", error );
+        return error;
+    }
+
+    gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
+
+    clReleaseKernel(k);
+    clReleaseProgram(query);
+
+    return 0;
+}
+
+
+int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p )
+{
+    int error = 0;
+    char options[200];
+
+    strcpy(options, "-cl-std=CL2.0");
+
+    if( gForceFTZ )
+    {
+      strcat(options," -cl-denorms-are-zero");
+    }
+
+    if( gTestFastRelaxed )
+    {
+      strcat(options, " -cl-fast-relaxed-math");
+    }
+
+    error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
+    if (error != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    *k = clCreateKernel( *p, name, &error );
+    if( NULL == *k || error )
+    {
+        char    buffer[2048] = "";
+
+        vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
+        clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
+        vlog_error("Log: %s\n", buffer);
+        clReleaseProgram( *p );
+        return error;
+    }
+
+    return error;
+}
+
+int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    int error = 0;
+    cl_uint i;
+    char options[200];
+
+    strcpy(options, "-cl-std=CL2.0");
+
+    if (gForceFTZ)
+    {
+      strcat(options," -cl-denorms-are-zero ");
+    }
+
+    if( gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT )
+    {
+      strcat(options," -cl-fp32-correctly-rounded-divide-sqrt ");
+    }
+
+    if( gTestFastRelaxed )
+    {
+      strcat(options, " -cl-fast-relaxed-math");
+    }
+
+    error = create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
+    if (error != CL_SUCCESS)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return -1;
+    }
+
+    memset( k, 0, kernel_count * sizeof( *k) );
+    for( i = 0; i< kernel_count; i++ )
+    {
+        k[i] = clCreateKernel( *p, name, &error );
+        if( NULL == k[i]|| error )
+        {
+            char    buffer[2048] = "";
+
+            vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
+            clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL);
+            vlog_error("Log: %s\n", buffer);
+            clReleaseProgram( *p );
+            return error;
+        }
+    }
+
+    return 0;
+}
+
+
+static int IsInRTZMode( void )
+{
+    int error;
+    const char *kernel =
+    "__kernel void GetRoundingMode( __global int *out )\n"
+    "{\n"
+    "   volatile float a = 0x1.0p23f;\n"
+    "   volatile float b = -0x1.0p23f;\n"
+    "   out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);\n"
+    "}\n";
+
+    cl_program query;
+
+    error = create_single_kernel_helper(gContext, &query, NULL, 1, &kernel, NULL);
+    if (error != CL_SUCCESS)
+    {
+        log_error("Error: Unable to build program to detect RTZ mode for the device.\n");
+        return -1;
+    }
+
+    cl_kernel k = clCreateKernel( query, "GetRoundingMode", &error );
+    if( NULL == k || error)
+    {
+      vlog_error( "Error: Unable to create kernel to gdetect RTZ mode for the device. Err = %d", error );
+        return error;
+    }
+
+    if((error = clSetKernelArg(k, 0, sizeof( gOutBuffer[gMinVectorSizeIndex]), &gOutBuffer[gMinVectorSizeIndex])))
+    {
+        vlog_error( "Error: Unable to set kernel arg to detect RTZ mode for the device. Err = %d", error );
+        return error;
+    }
+
+    size_t dim = 1;
+    if((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, &dim, NULL, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: Unable to execute kernel to detect RTZ mode for the device. Err = %d", error );
+        return error;
+    }
+
+    struct{ cl_int isRTZ; }data;
+    if(( error = clEnqueueReadBuffer( gQueue, gOutBuffer[gMinVectorSizeIndex], CL_TRUE, 0, sizeof( data ), &data, 0, NULL, NULL)))
+    {
+        vlog_error( "Error: unable to read RTZ mode data from the device. Err = %d", error );
+        return error;
+    }
+
+    clReleaseKernel(k);
+    clReleaseProgram(query);
+
+    return data.isRTZ;
+}
+
+#pragma mark -
+
+const char *sizeNames[ VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
+const int  sizeValues[ VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
+
+float Abs_Error( float test, double reference )
+{
+  if( isnan(test) && isnan(reference) )
+    return 0.0f;
+  return fabs((float)(reference-(double)test));
+}
+
+/*
+#define HALF_MIN_EXP    -13
+#define HALF_MANT_DIG    11
+float Ulp_Error_Half( float test, double reference )
+{
+    union{ double d; uint64_t u; }u;     u.d = reference;
+
+  // Note: This function presumes that someone has already tested whether the result is correctly,
+  // rounded before calling this function.  That test:
+  //
+  //    if( (float) reference == test )
+  //        return 0.0f;
+  //
+  // would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
+  // Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
+  // results.
+
+    double testVal = test;
+    if( u.u & 0x000fffffffffffffULL )
+    { // Non-power of two and NaN
+        if( isnan( reference ) && isnan( test ) )
+            return 0.0f;    // if we are expecting a NaN, any NaN is fine
+
+        // The unbiased exponent of the ulp unit place
+        int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
+
+        // Scale the exponent of the error
+        return (float) scalbn( testVal - reference, ulp_exp );
+    }
+
+    if( isinf( reference ) )
+    {
+        if( (double) test == reference )
+            return 0.0f;
+
+        return (float) (testVal - reference );
+    }
+
+    // reference is a normal power of two or a zero
+    int ulp_exp =  HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
+
+    // Scale the exponent of the error
+    return (float) scalbn( testVal - reference, ulp_exp );
+}
+*/
+
+
+#if defined( __APPLE__ )
+    #include <mach/mach_time.h>
+#endif
+
+uint64_t GetTime( void )
+{
+#if defined( __APPLE__ )
+    return mach_absolute_time();
+#elif defined(_WIN32) && defined(_MSC_VER)
+    return  ReadTime();
+#else
+    //mach_absolute_time is a high precision timer with precision < 1 microsecond.
+    #warning need accurate clock here.  Times are invalid.
+    return 0;
+#endif
+}
+
+
+#if defined(_WIN32) && defined (_MSC_VER)
+/* function is defined in "compat.h" */
+#else
+double SubtractTime( uint64_t endTime, uint64_t startTime )
+{
+    uint64_t diff = endTime - startTime;
+    static double conversion = 0.0;
+
+    if( 0.0 == conversion )
+    {
+#if defined( __APPLE__ )
+        mach_timebase_info_data_t info = {0,0};
+        kern_return_t   err = mach_timebase_info( &info );
+        if( 0 == err )
+            conversion = 1e-9 * (double) info.numer / (double) info.denom;
+#else
+    // This function consumes output from GetTime() above, and converts the time to secionds.
+    #warning need accurate ticks to seconds conversion factor here. Times are invalid.
+#endif
+    }
+
+    // strictly speaking we should also be subtracting out timer latency here
+    return conversion * (double) diff;
+}
+#endif
+
+cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
+{
+    if( 0 == (x & (x-1)))
+        return x;
+
+    while( x & (x-1) )
+        x &= x-1;
+
+    return x+x;
+}
+
+#if !defined( __APPLE__ )
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
+{
+  uint32_t pat = ((uint32_t*) src_pattern)[0];
+  size_t count = bytes / 4;
+  size_t i;
+  uint32_t *d = (uint32_t*)dest;
+
+  for( i = 0; i < count; i++ )
+    d[i] = pat;
+
+  d += i;
+
+  bytes &= 3;
+  if( bytes )
+    memcpy( d, src_pattern, bytes );
+}
+#endif
+
+void TestFinishAtExit(void) {
+  test_finish();
+}
+
diff --git a/test_conformance/math_brute_force/reference_math.c b/test_conformance/math_brute_force/reference_math.c
new file mode 100644
index 00000000..791375bc
--- /dev/null
+++ b/test_conformance/math_brute_force/reference_math.c
@@ -0,0 +1,5499 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include "reference_math.h"
+#include <limits.h>
+
+#if !defined(_WIN32)
+#include <string.h>
+#endif
+
+#include "Utility.h"
+
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    #include <xmmintrin.h>
+#endif
+#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    #include <emmintrin.h>
+#endif
+
+#ifndef M_PI_4
+    #define M_PI_4 (M_PI/4)
+#endif
+
+#define EVALUATE( x )       x
+#define CONCATENATE(x, y)  x ## EVALUATE(y)
+
+
+// Declare Classification macros for non-C99 platforms
+#ifndef isinf
+    #define isinf(x)    (    sizeof (x) == sizeof(float )    ?    fabsf(x) == INFINITY      \
+                        :    sizeof (x) == sizeof(double)    ?    fabs(x) == INFINITY      \
+                        :    fabsl(x) == INFINITY)
+#endif
+
+#ifndef isfinite
+    #define isfinite(x) (    sizeof (x) == sizeof(float )    ?    fabsf(x) < INFINITY      \
+                        :    sizeof (x) == sizeof(double)    ?    fabs(x) < INFINITY      \
+                        :    fabsl(x) < INFINITY)
+#endif
+
+#ifndef isnan
+    #define isnan(_a)       ( (_a) != (_a) )
+#endif
+
+#ifdef __MINGW32__
+    #undef isnormal
+#endif
+
+#ifndef isnormal
+    #define isnormal(x) (    sizeof (x) == sizeof(float )    ?    (fabsf(x) < INFINITY && fabsf(x) >= FLT_MIN)     \
+                        :    sizeof (x) == sizeof(double)    ?    (fabs(x) < INFINITY && fabs(x) >= DBL_MIN)     \
+                        :    (fabsl(x) < INFINITY && fabsl(x) >= LDBL_MIN)   )
+#endif
+
+#ifndef islessgreater
+    // Note: Non-C99 conformant. This will trigger floating point exceptions. We don't care about that here.
+    #define islessgreater( _x, _y )     ( (_x) < (_y) || (_x) > (_y) )
+#endif
+
+#pragma STDC FP_CONTRACT OFF
+static void __log2_ep(double *hi, double *lo, double x);
+
+typedef union
+{
+    uint64_t i;
+    double d;
+}uint64d_t;
+
+static const uint64d_t _CL_NAN = { 0x7ff8000000000000ULL };
+
+#define cl_make_nan() _CL_NAN.d
+
+static double reduce1( double x );
+static double reduce1( double x )
+{
+    if( fabs(x) >= HEX_DBL( +, 1, 0, +, 53 ) )
+    {
+        if( fabs(x) == INFINITY )
+            return cl_make_nan();
+
+        return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs
+    }
+
+    // Find the nearest multiple of 2
+    const double r = copysign( HEX_DBL( +, 1, 0, +, 53 ), x );
+    double z = x + r;
+    z -= r;
+
+    // subtract it from x. Value is now in the range -1 <= x <= 1
+    return x - z;
+}
+
+/*
+static double reduceHalf( double x );
+static double reduceHalf( double x )
+{
+    if( fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) )
+    {
+        if( fabs(x) == INFINITY )
+            return cl_make_nan();
+
+        return 0.0; //we patch up the sign for sinPi and cosPi later, since they need different signs
+    }
+
+    // Find the nearest multiple of 1
+    const double r = copysign( HEX_DBL( +, 1, 0, +, 52 ), x );
+    double z = x + r;
+    z -= r;
+
+    // subtract it from x. Value is now in the range -0.5 <= x <= 0.5
+    return x - z;
+}
+*/
+
+double reference_acospi( double x) {  return reference_acos( x ) / M_PI;    }
+double reference_asinpi( double x) {  return reference_asin( x ) / M_PI;    }
+double reference_atanpi( double x) {  return reference_atan( x ) / M_PI;    }
+double reference_atan2pi( double y, double x ) { return reference_atan2( y, x) / M_PI; }
+double reference_cospi( double x)
+{
+    if( reference_fabs(x) >= HEX_DBL( +, 1, 0, +, 52 ) )
+    {
+        if( reference_fabs(x) == INFINITY )
+            return cl_make_nan();
+
+        //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53.
+        //However, when starting with single precision inputs, there will be no odd values.
+
+        return 1.0;
+    }
+
+    x = reduce1(x+0.5);
+
+    // reduce to [-0.5, 0.5]
+    if( x < -0.5 )
+        x = -1 - x;
+    else if ( x > 0.5 )
+        x = 1 - x;
+
+    // cosPi zeros are all +0
+    if( x == 0.0 )
+        return 0.0;
+
+    return reference_sin( x * M_PI );
+}
+
+double reference_relaxed_divide( double x, double y ) { return (float)(((float) x ) / ( (float) y )); }
+
+double reference_divide( double x, double y ) { return x / y; }
+
+// Add a + b. If the result modulo overflowed, write 1 to *carry, otherwise 0
+static inline cl_ulong  add_carry( cl_ulong a, cl_ulong b, cl_ulong *carry )
+{
+    cl_ulong result = a + b;
+    *carry = result < a;
+    return result;
+}
+
+// Subtract a - b. If the result modulo overflowed, write 1 to *carry, otherwise 0
+static inline cl_ulong  sub_carry( cl_ulong a, cl_ulong b, cl_ulong *carry )
+{
+    cl_ulong result = a - b;
+    *carry = result > a;
+    return result;
+}
+
+static float fallback_frexpf( float x, int *iptr )
+{
+    cl_uint u, v;
+    float fu, fv;
+
+    memcpy( &u, &x, sizeof(u));
+
+    cl_uint exponent = u &  0x7f800000U;
+    cl_uint mantissa = u & ~0x7f800000U;
+
+    // add 1 to the exponent
+    exponent += 0x00800000U;
+
+    if( (cl_int) exponent < (cl_int) 0x01000000 )
+    { // subnormal, NaN, Inf
+        mantissa |= 0x3f000000U;
+
+        v = mantissa & 0xff800000U;
+        u = mantissa;
+        memcpy( &fv, &v, sizeof(v));
+        memcpy( &fu, &u, sizeof(u));
+
+        fu -= fv;
+
+        memcpy( &v, &fv, sizeof(v));
+        memcpy( &u, &fu, sizeof(u));
+
+        exponent = u &  0x7f800000U;
+        mantissa = u & ~0x7f800000U;
+
+        *iptr = (exponent >> 23) + (-126 + 1 -126);
+        u = mantissa | 0x3f000000U;
+        memcpy( &fu, &u, sizeof(u));
+        return fu;
+    }
+
+    *iptr = (exponent >> 23) - 127;
+    u = mantissa | 0x3f000000U;
+    memcpy( &fu, &u, sizeof(u));
+    return fu;
+}
+
+static inline int extractf( float, cl_uint * );
+static inline int extractf( float x, cl_uint *mant )
+{
+    static float (*frexppf)(float, int*) = NULL;
+    int e;
+
+    // verify that frexp works properly
+    if( NULL == frexppf )
+    {
+        if( 0.5f == frexpf( HEX_FLT( +, 1, 0, -, 130 ), &e ) && e == -129 )
+            frexppf = frexpf;
+        else
+            frexppf = fallback_frexpf;
+    }
+
+    *mant = (cl_uint) (HEX_FLT( +, 1, 0, +, 32 ) * fabsf( frexppf( x, &e )));
+    return e - 1;
+}
+
+// Shift right by shift bits. Any bits lost on the right side are bitwise OR'd together and ORd into the LSB of the result
+static inline void shift_right_sticky_64( cl_ulong *p, int shift );
+static inline void shift_right_sticky_64( cl_ulong *p, int shift )
+{
+    cl_ulong sticky = 0;
+    cl_ulong r = *p;
+
+    // C doesn't handle shifts greater than the size of the variable dependably
+    if( shift >= 64 )
+    {
+        sticky |= (0 != r);
+        r = 0;
+    }
+    else
+    {
+        sticky |= (0 != (r << (64-shift)));
+        r >>= shift;
+    }
+
+    *p = r | sticky;
+}
+
+// Add two 64 bit mantissas. Bits that are below the LSB of the result are OR'd into the LSB of the result
+static inline void add64( cl_ulong *p, cl_ulong c, int *exponent );
+static inline void add64( cl_ulong *p, cl_ulong c, int *exponent )
+{
+    cl_ulong carry;
+    c = add_carry(c, *p, &carry);
+    if( carry )
+    {
+        carry = c & 1;                              // set aside sticky bit
+        c >>= 1;                                    // right shift to deal with overflow
+        c |= carry | 0x8000000000000000ULL;         // or in carry bit, and sticky bit. The latter is to prevent rounding from believing we are exact half way case
+        *exponent = *exponent + 1;                  // adjust exponent
+    }
+
+    *p = c;
+}
+
+// IEEE-754 round to nearest, ties to even rounding
+static float round_to_nearest_even_float( cl_ulong p, int exponent );
+static float round_to_nearest_even_float( cl_ulong p, int exponent )
+{
+    union{ cl_uint u; cl_float d;} u;
+
+    // If mantissa is zero, return 0.0f
+    if (p == 0) return 0.0f;
+
+    // edges
+    if( exponent > 127 )
+    {
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
+
+        // attempt to fool the compiler into not optimizing the above line away
+        if( r > CL_FLT_MAX )
+            return INFINITY;
+
+        return r;
+    }
+    if( exponent == -150 && p > 0x8000000000000000ULL)
+        return HEX_FLT( +, 1, 0, -, 149 );
+    if( exponent <= -150 )       return 0.0f;
+
+    //Figure out which bits go where
+    int shift = 8 + 32;
+    if( exponent < -126 )
+    {
+        shift -= 126 + exponent;                    // subnormal: shift is not 52
+        exponent = -127;                            //            set exponent to 0
+    }
+    else
+        p &= 0x7fffffffffffffffULL;                 // normal: leading bit is implicit. Remove it.
+
+    // Assemble the double (round toward zero)
+    u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23);
+
+    // put a representation of the residual bits into hi
+    p <<= (64-shift);
+
+    //round to nearest, ties to even  based on the unused portion of p
+    if( p < 0x8000000000000000ULL )        return u.d;
+    if( p == 0x8000000000000000ULL )       u.u += u.u & 1U;
+    else                                   u.u++;
+
+    return u.d;
+}
+
+static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent );
+static float round_to_nearest_even_float_ftz( cl_ulong p, int exponent )
+{
+    extern int gCheckTininessBeforeRounding;
+
+    union{ cl_uint u; cl_float d;} u;
+    int shift = 8 + 32;
+
+    // If mantissa is zero, return 0.0f
+    if (p == 0) return 0.0f;
+
+    // edges
+    if( exponent > 127 )
+    {
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
+
+        // attempt to fool the compiler into not optimizing the above line away
+        if( r > CL_FLT_MAX )
+        return INFINITY;
+
+        return r;
+    }
+
+    // Deal with FTZ for gCheckTininessBeforeRounding
+    if( exponent < (gCheckTininessBeforeRounding - 127) )
+        return 0.0f;
+
+    if( exponent == -127 ) // only happens for machines that check tininess after rounding
+        p = (p&1) | (p>>1);
+    else
+        p &= 0x7fffffffffffffffULL;     // normal: leading bit is implicit. Remove it.
+
+    cl_ulong q = p;
+
+
+    // Assemble the double (round toward zero)
+    u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23);
+
+    // put a representation of the residual bits into hi
+    q <<= (64-shift);
+
+    //round to nearest, ties to even  based on the unused portion of p
+    if( q > 0x8000000000000000ULL )
+        u.u++;
+    else if( q == 0x8000000000000000ULL )
+        u.u += u.u & 1U;
+
+    // Deal with FTZ for ! gCheckTininessBeforeRounding
+    if( 0 == (u.u & 0x7f800000U )  )
+        return 0.0f;
+
+    return u.d;
+}
+
+
+// IEEE-754 round toward zero.
+static float round_toward_zero_float( cl_ulong p, int exponent );
+static float round_toward_zero_float( cl_ulong p, int exponent )
+{
+    union{ cl_uint u; cl_float d;} u;
+
+    // If mantissa is zero, return 0.0f
+    if (p == 0) return 0.0f;
+
+    // edges
+    if( exponent > 127 )
+    {
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
+
+        // attempt to fool the compiler into not optimizing the above line away
+        if( r > CL_FLT_MAX )
+            return CL_FLT_MAX;
+
+        return r;
+    }
+
+    if( exponent <= -149 )
+        return 0.0f;
+
+    //Figure out which bits go where
+    int shift = 8 + 32;
+    if( exponent < -126 )
+    {
+        shift -= 126 + exponent;                    // subnormal: shift is not 52
+        exponent = -127;                            //            set exponent to 0
+    }
+    else
+        p &= 0x7fffffffffffffffULL;                 // normal: leading bit is implicit. Remove it.
+
+    // Assemble the double (round toward zero)
+    u.u = (cl_uint)(p >> shift) | ((cl_uint) (exponent + 127) << 23);
+
+    return u.d;
+}
+
+static float round_toward_zero_float_ftz( cl_ulong p, int exponent );
+static float round_toward_zero_float_ftz( cl_ulong p, int exponent )
+{
+    extern int gCheckTininessBeforeRounding;
+
+    union{ cl_uint u; cl_float d;} u;
+    int shift = 8 + 32;
+
+    // If mantissa is zero, return 0.0f
+    if (p == 0) return 0.0f;
+
+    // edges
+    if( exponent > 127 )
+    {
+        volatile float r = exponent * CL_FLT_MAX;       // signal overflow
+
+        // attempt to fool the compiler into not optimizing the above line away
+        if( r > CL_FLT_MAX )
+            return CL_FLT_MAX;
+
+        return r;
+    }
+
+    // Deal with FTZ for gCheckTininessBeforeRounding
+    if( exponent < -126 )
+        return 0.0f;
+
+    cl_ulong q = p &= 0x7fffffffffffffffULL;     // normal: leading bit is implicit. Remove it.
+
+    // Assemble the double (round toward zero)
+    u.u = (cl_uint)(q >> shift) | ((cl_uint) (exponent + 127) << 23);
+
+    // put a representation of the residual bits into hi
+    q <<= (64-shift);
+
+    return u.d;
+}
+
+// Subtract two significands.
+static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC );
+static inline void sub64( cl_ulong *c, cl_ulong p, cl_uint *signC, int *expC )
+{
+    cl_ulong carry;
+    p = sub_carry( *c, p, &carry );
+
+    if( carry )
+    {
+        *signC ^= 0x80000000U;
+        p = -p;
+    }
+
+    // normalize
+    if( p )
+    {
+        int shift = 32;
+        cl_ulong test = 1ULL << 32;
+        while( 0 == (p & 0x8000000000000000ULL))
+        {
+            if( p < test )
+            {
+                p <<= shift;
+                *expC = *expC - shift;
+            }
+            shift >>= 1;
+            test <<= shift;
+        }
+    }
+    else
+    {
+        // zero result.
+        *expC = -200;
+        *signC = 0;     // IEEE rules say a - a = +0 for all rounding modes except -inf
+    }
+
+    *c = p;
+}
+
+
+float reference_fma( float a, float b, float c, int shouldFlush )
+{
+    static const cl_uint kMSB = 0x80000000U;
+
+    // Make bits accessible
+    union{ cl_uint u; cl_float d; } ua; ua.d = a;
+    union{ cl_uint u; cl_float d; } ub; ub.d = b;
+    union{ cl_uint u; cl_float d; } uc; uc.d = c;
+
+    // deal with Nans, infinities and zeros
+    if( isnan( a ) || isnan( b ) || isnan(c)    ||
+        isinf( a ) || isinf( b ) || isinf(c)    ||
+        0 == ( ua.u & ~kMSB)                ||  // a == 0, defeat host FTZ behavior
+        0 == ( ub.u & ~kMSB)                ||  // b == 0, defeat host FTZ behavior
+        0 == ( uc.u & ~kMSB)                )   // c == 0, defeat host FTZ behavior
+    {
+        FPU_mode_type oldMode;
+        RoundingMode oldRoundMode = kRoundToNearestEven;
+        if( isinf( c ) && !isinf(a) && !isinf(b) )
+            return (c + a) + b;
+
+        if (gIsInRTZMode)
+            oldRoundMode = set_round(kRoundTowardZero, kfloat);
+
+        memset( &oldMode, 0, sizeof( oldMode ) );
+        if( shouldFlush )
+            ForceFTZ( &oldMode );
+
+        a = (float) reference_multiply( a, b );    // some risk that the compiler will insert a non-compliant fma here on some platforms.
+        a = (float) reference_add( a, c );           // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
+
+        if( shouldFlush )
+            RestoreFPState( &oldMode );
+
+        if( gIsInRTZMode )
+            set_round(oldRoundMode, kfloat);
+        return a;
+    }
+
+    // extract exponent and mantissa
+    //   exponent is a standard unbiased signed integer
+    //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
+    cl_uint mantA, mantB, mantC;
+    int expA = extractf( a, &mantA );
+    int expB = extractf( b, &mantB );
+    int expC = extractf( c, &mantC );
+    cl_uint signC = uc.u & kMSB;                // We'll need the sign bit of C later to decide if we are adding or subtracting
+
+// exact product of A and B
+    int exponent = expA + expB;
+    cl_uint sign = (ua.u ^ ub.u) & kMSB;
+    cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB;
+
+    // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
+    //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication
+    if( 0 == (0x8000000000000000ULL & product) )
+        product <<= 1;
+    else
+        exponent++;         // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased.
+
+//infinite precision add
+    cl_ulong addend = (cl_ulong) mantC << 32;
+    if( exponent >= expC )
+    {
+        // Shift C relative to the product so that their exponents match
+        if( exponent > expC )
+            shift_right_sticky_64( &addend, exponent - expC );
+
+        // Add
+        if( sign ^ signC )
+            sub64( &product, addend, &sign, &exponent );
+        else
+            add64( &product, addend, &exponent );
+    }
+    else
+    {
+        // Shift the product relative to C so that their exponents match
+        shift_right_sticky_64( &product, expC - exponent );
+
+        // add
+        if( sign ^ signC )
+            sub64( &addend, product, &signC, &expC );
+        else
+            add64( &addend, product, &expC );
+
+        product = addend;
+        exponent = expC;
+        sign = signC;
+    }
+
+    // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c.
+    if (gIsInRTZMode)
+    {
+        if( shouldFlush )
+            ua.d = round_toward_zero_float_ftz( product, exponent);
+        else
+            ua.d = round_toward_zero_float( product, exponent);
+    }
+    else
+    {
+        if( shouldFlush )
+            ua.d = round_to_nearest_even_float_ftz( product, exponent);
+        else
+            ua.d = round_to_nearest_even_float( product, exponent);
+    }
+
+    // Set the sign
+    ua.u |= sign;
+
+    return ua.d;
+}
+
+double reference_relaxed_exp10( double x)
+{
+  return reference_exp10(x);
+}
+
+double reference_exp10( double x) {   return reference_exp2( x * HEX_DBL( +, 1, a934f0979a371, +, 1 ) );    }
+
+
+int   reference_ilogb( double x )
+{
+    extern int gDeviceILogb0, gDeviceILogbNaN;
+    union { cl_double f; cl_ulong u;} u;
+
+    u.f = (float) x;
+    cl_int exponent = (cl_int) (u.u >> 52) & 0x7ff;
+    if( exponent == 0x7ff )
+    {
+        if( u.u & 0x000fffffffffffffULL )
+            return gDeviceILogbNaN;
+
+        return CL_INT_MAX;
+    }
+
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f = x * HEX_DBL( +, 1, 0, +, 64 );
+        exponent = (cl_int) (u.u >> 52) & 0x7ff;
+        if( exponent == 0 )
+            return gDeviceILogb0;
+
+        return exponent - (1023 + 64);
+    }
+
+    return exponent - 1023;
+}
+
+double reference_nan( cl_uint x )
+{
+    union{ cl_uint u; cl_float f; }u;
+    u.u = x | 0x7fc00000U;
+    return (double) u.f;
+}
+
+double reference_maxmag( double x, double y )
+{
+    double fabsx = fabs(x);
+    double fabsy = fabs(y);
+
+    if( fabsx < fabsy )
+        return y;
+
+    if( fabsy < fabsx )
+        return x;
+
+    return reference_fmax( x, y );
+}
+
+double reference_minmag( double x, double y )
+{
+    double fabsx = fabs(x);
+    double fabsy = fabs(y);
+
+    if( fabsx > fabsy )
+        return y;
+
+    if( fabsy > fabsx )
+        return x;
+
+    return reference_fmin( x, y );
+}
+
+//double my_nextafter( double x, double y ){  return (double) nextafterf( (float) x, (float) y ); }
+
+double reference_relaxed_mad( double a, double b, double c)
+{
+  return ((float) a )* ((float) b) + (float) c;
+}
+
+double reference_mad( double a, double b, double c )
+{
+    return a * b + c;
+}
+
+double reference_recip( double x) {   return 1.0 / x; }
+double reference_rootn( double x, int i )
+{
+
+    //rootn ( x, 0 )  returns a NaN.
+    if( 0 == i )
+        return cl_make_nan();
+
+    //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+    if( x < 0 && 0 == (i&1) )
+        return cl_make_nan();
+
+    if( x == 0.0 )
+    {
+        switch( i & 0x80000001 )
+        {
+            //rootn ( +-0,  n ) is +0 for even n > 0.
+            case 0:
+                return 0.0f;
+
+            //rootn ( +-0,  n ) is +-0 for odd n > 0.
+            case 1:
+                return x;
+
+            //rootn ( +-0,  n ) is +inf for even n < 0.
+            case 0x80000000:
+                return INFINITY;
+
+            //rootn ( +-0,  n ) is +-inf for odd n < 0.
+            case 0x80000001:
+                return copysign(INFINITY, x);
+        }
+    }
+
+    double sign = x;
+    x = reference_fabs(x);
+    x = reference_exp2( reference_log2(x) / (double) i );
+    return reference_copysignd( x, sign );
+}
+
+double reference_rsqrt( double x) {   return 1.0 / reference_sqrt(x);   }
+//double reference_sincos( double x, double *c ){ *c = cos(x); return sin(x); }
+double reference_sinpi( double x)
+{
+    double r = reduce1(x);
+
+    // reduce to [-0.5, 0.5]
+    if( r < -0.5 )
+        r = -1 - r;
+    else if ( r > 0.5 )
+        r = 1 - r;
+
+    // sinPi zeros have the same sign as x
+    if( r == 0.0 )
+        return reference_copysignd(0.0, x);
+
+    return reference_sin( r * M_PI );
+}
+
+double reference_tanpi( double x)
+{
+    // set aside the sign  (allows us to preserve sign of -0)
+    double sign = reference_copysignd( 1.0, x);
+    double z = reference_fabs(x);
+
+    // if big and even  -- caution: only works if x only has single precision
+    if( z >= HEX_DBL( +, 1, 0, +, 24 ) )
+    {
+        if( z == INFINITY )
+            return x - x;       // nan
+
+        return reference_copysignd( 0.0, x);   // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
+    }
+
+    // reduce to the range [ -0.5, 0.5 ]
+    double nearest = reference_rint( z );     // round to nearest even places n + 0.5 values in the right place for us
+    int i = (int) nearest;          // test above against 0x1.0p24 avoids overflow here
+    z -= nearest;
+
+    //correction for odd integer x for the right sign of zero
+    if( (i&1) && z == 0.0 )
+        sign = -sign;
+
+    // track changes to the sign
+    sign *= reference_copysignd(1.0, z);       // really should just be an xor
+    z = reference_fabs(z);                    // remove the sign again
+
+    // reduce once more
+    // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly
+    if( z > 0.25 )
+    {
+        z = 0.5 - z;
+        return sign / reference_tan( z * M_PI );      // use system tan to get the right result
+    }
+
+    //
+    return sign * reference_tan( z * M_PI );          // use system tan to get the right result
+}
+
+double reference_pown( double x, int i) { return reference_pow( x, (double) i ); }
+double reference_powr( double x, double y )
+{
+    //powr ( x, y ) returns NaN for x < 0.
+    if( x < 0.0 )
+        return cl_make_nan();
+
+    //powr ( x, NaN ) returns the NaN for x >= 0.
+    //powr ( NaN, y ) returns the NaN.
+    if( isnan(x) || isnan(y) )
+        return x + y;       // Note: behavior different here than for pow(1,NaN), pow(NaN, 0)
+
+    if( x == 1.0 )
+    {
+        //powr ( +1, +-inf ) returns NaN.
+        if( reference_fabs(y) == INFINITY )
+            return cl_make_nan();
+
+        //powr ( +1, y ) is 1 for finite y.    (NaN handled above)
+        return 1.0;
+    }
+
+    if( y == 0.0 )
+    {
+        //powr ( +inf, +-0 ) returns NaN.
+        //powr ( +-0, +-0 ) returns NaN.
+        if( x == 0.0 || x == INFINITY )
+            return cl_make_nan();
+
+        //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
+        return 1.0;
+    }
+
+    if( x == 0.0 )
+    {
+        //powr ( +-0, -inf) is +inf.
+        //powr ( +-0, y ) is +inf for finite y < 0.
+        if( y < 0.0 )
+            return INFINITY;
+
+        //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+        return 0.0;
+    }
+
+    // x = +inf
+    if( isinf(x) )
+    {
+        if( y < 0 )
+            return 0;
+        return INFINITY;
+    }
+
+    double fabsx = reference_fabs(x);
+    double fabsy = reference_fabs(y);
+
+    //y = +-inf cases
+    if( isinf(fabsy) )
+    {
+        if( y < 0 )
+        {
+            if( fabsx < 1 )
+                return INFINITY;
+            return 0;
+        }
+        if( fabsx < 1 )
+            return 0;
+        return INFINITY;
+    }
+
+    double hi, lo;
+    __log2_ep(&hi, &lo, x);
+    double prod = y * hi;
+    double result = reference_exp2(prod);
+
+    return result;
+}
+
+double reference_fract( double x, double *ip )
+{
+    if(isnan(x)) {
+        *ip = cl_make_nan();
+        return cl_make_nan();
+    }
+
+    float i;
+    float f = modff((float) x, &i );
+    if( f < 0.0 )
+    {
+        f = 1.0f + f;
+        i -= 1.0f;
+        if( f == 1.0f )
+            f = HEX_FLT( +, 1, fffffe, -, 1 );
+    }
+    *ip = i;
+    return f;
+}
+
+
+//double my_fdim( double x, double y){ return fdimf( (float) x, (float) y ); }
+double reference_add( double x, double y )
+{
+    volatile float a = (float) x;
+    volatile float b = (float) y;
+
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    // defeat x87
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_add_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
+#elif defined(__PPC__)
+    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero.
+    // As such, the reference add with FTZ must be emulated in sw.
+    if (fpu_control & _FPU_MASK_NI) {
+      union{ cl_uint u; cl_float d; } ua; ua.d = a;
+      union{ cl_uint u; cl_float d; } ub; ub.d = b;
+      cl_uint mantA, mantB;
+      cl_ulong addendA, addendB, sum;
+      int expA = extractf( a, &mantA );
+      int expB = extractf( b, &mantB );
+      cl_uint signA = ua.u & 0x80000000U;
+      cl_uint signB = ub.u & 0x80000000U;
+
+      // Force matching exponents if an operand is 0
+      if (a == 0.0f) {
+    expA = expB;
+      } else if (b == 0.0f) {
+    expB = expA;
+      }
+
+      addendA = (cl_ulong)mantA << 32;
+      addendB = (cl_ulong)mantB << 32;
+
+      if (expA >= expB) {
+        // Shift B relative to the A so that their exponents match
+        if( expA > expB )
+      shift_right_sticky_64( &addendB, expA - expB );
+
+        // add
+        if( signA ^ signB )
+      sub64( &addendA, addendB, &signA, &expA );
+        else
+      add64( &addendA, addendB, &expA );
+      } else  {
+        // Shift the A relative to B so that their exponents match
+        shift_right_sticky_64( &addendA, expB - expA );
+
+        // add
+        if( signA ^ signB )
+      sub64( &addendB, addendA, &signB, &expB );
+        else
+      add64( &addendB, addendA, &expB );
+
+        addendA = addendB;
+        expA = expB;
+        signA = signB;
+      }
+
+      // round to IEEE result
+      if (gIsInRTZMode)    {
+    ua.d = round_toward_zero_float_ftz( addendA, expA );
+      } else {
+    ua.d = round_to_nearest_even_float_ftz( addendA, expA );
+      }
+      // Set the sign
+      ua.u |= signA;
+      a = ua.d;
+    } else {
+      a += b;
+    }
+#else
+    a += b;
+#endif
+    return (double) a;
+ }
+
+
+double reference_subtract( double x, double y )
+{
+    volatile float a = (float) x;
+    volatile float b = (float) y;
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    // defeat x87
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_sub_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
+#else
+    a -= b;
+#endif
+    return a;
+}
+
+//double reference_divide( double x, double y ){ return (float) x / (float) y; }
+double reference_multiply( double x, double y)
+{
+    volatile float a = (float) x;
+    volatile float b = (float) y;
+#if defined( __SSE__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    // defeat x87
+    __m128 va = _mm_set_ss( (float) a );
+    __m128 vb = _mm_set_ss( (float) b );
+    va = _mm_mul_ss( va, vb );
+    _mm_store_ss( (float*) &a, va );
+#elif defined(__PPC__)
+    // Most Power host CPUs do not support the non-IEEE mode (NI) which flushes denorm's to zero.
+    // As such, the reference multiply with FTZ must be emulated in sw.
+    if (fpu_control & _FPU_MASK_NI) {
+      // extract exponent and mantissa
+      //   exponent is a standard unbiased signed integer
+      //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
+      union{ cl_uint u; cl_float d; } ua; ua.d = a;
+      union{ cl_uint u; cl_float d; } ub; ub.d = b;
+      cl_uint mantA, mantB;
+      int expA = extractf( a, &mantA );
+      int expB = extractf( b, &mantB );
+
+      // exact product of A and B
+      int exponent = expA + expB;
+      cl_uint sign = (ua.u ^ ub.u) & 0x80000000U;
+      cl_ulong product = (cl_ulong) mantA * (cl_ulong) mantB;
+
+      // renormalize -- 1.m * 1.n yields a number between 1.0 and 3.99999..
+      //  The MSB might not be set. If so, fix that. Otherwise, reflect the fact that we got another power of two from the multiplication
+      if( 0 == (0x8000000000000000ULL & product) )
+        product <<= 1;
+      else
+        exponent++;         // 2**31 * 2**31 gives 2**62. If the MSB was set, then our exponent increased.
+
+      // round to IEEE result -- we do not do flushing to zero here. That part is handled manually in ternary.c.
+      if (gIsInRTZMode)    {
+    ua.d = round_toward_zero_float_ftz( product, exponent);
+      } else {
+    ua.d = round_to_nearest_even_float_ftz( product, exponent);
+      }
+      // Set the sign
+      ua.u |= sign;
+      a = ua.d;
+    } else {
+      a *= b;
+    }
+#else
+    a *= b;
+#endif
+    return a;
+}
+
+/*double my_remquo( double x, double y, int *iptr )
+{
+    if( isnan(x) || isnan(y) ||
+        fabs(x) == INFINITY  ||
+        y == 0.0 )
+    {
+        *iptr = 0;
+        return NAN;
+    }
+
+    return (double) remquof( (float) x, (float) y, iptr );
+}*/
+double reference_lgamma_r( double x, int *signp )
+{
+    // This is not currently tested
+    *signp = 0;
+    return x;
+}
+
+
+int reference_isequal( double x, double y ){ return x == y; }
+int reference_isfinite( double x ){ return 0 != isfinite(x); }
+int reference_isgreater( double x, double y ){ return x > y; }
+int reference_isgreaterequal( double x, double y ){ return x >= y; }
+int reference_isinf( double x ){ return 0 != isinf(x); }
+int reference_isless( double x, double y ){ return x < y; }
+int reference_islessequal( double x, double y ){ return x <= y; }
+int reference_islessgreater( double x, double y ){  return 0 != islessgreater( x, y ); }
+int reference_isnan( double x ){ return 0 != isnan( x ); }
+int reference_isnormal( double x ){ return 0 != isnormal( (float) x ); }
+int reference_isnotequal( double x, double y ){ return x != y; }
+int reference_isordered( double x, double y){ return x == x && y == y; }
+int reference_isunordered( double x, double y ){ return isnan(x) || isnan( y ); }
+int reference_signbit( float x ){ return 0 != signbit( x ); }
+
+#if 1 // defined( _MSC_VER )
+
+//Missing functions for win32
+
+
+float reference_copysign( float x, float y )
+{
+    union { float f; cl_uint u;} ux, uy;
+    ux.f = x; uy.f = y;
+    ux.u &= 0x7fffffffU;
+    ux.u |= uy.u & 0x80000000U;
+    return ux.f;
+}
+
+
+double reference_copysignd( double x, double y )
+{
+    union { double f; cl_ulong u;} ux, uy;
+    ux.f = x; uy.f = y;
+    ux.u &= 0x7fffffffffffffffULL;
+    ux.u |= uy.u & 0x8000000000000000ULL;
+    return ux.f;
+}
+
+
+double reference_round( double x )
+{
+    double absx = reference_fabs(x);
+    if( absx < 0.5 )
+        return reference_copysignd( 0.0, x );
+
+    if( absx < HEX_DBL( +, 1, 0, +, 53 ) )
+        x = reference_trunc( x + reference_copysignd( 0.5, x ) );
+
+    return x;
+}
+
+double reference_trunc( double x )
+{
+    if( fabs(x) < HEX_DBL( +, 1, 0, +, 53 ) )
+    {
+        cl_long l = (cl_long) x;
+
+        return reference_copysignd( (double) l, x );
+    }
+
+    return x;
+}
+
+#ifndef FP_ILOGB0
+    #define FP_ILOGB0   INT_MIN
+#endif
+
+#ifndef FP_ILOGBNAN
+    #define FP_ILOGBNAN   INT_MAX
+#endif
+
+
+
+double reference_cbrt(double x){ return reference_copysignd( reference_pow( reference_fabs(x), 1.0/3.0 ), x ); }
+
+/*
+double reference_scalbn(double x, int i)
+{ // suitable for checking single precision scalbnf only
+
+    if( i > 300 )
+        return copysign( INFINITY, x);
+    if( i < -300 )
+        return copysign( 0.0, x);
+
+    union{ cl_ulong u; double d;} u;
+    u.u = ((cl_ulong) i + 1023) << 52;
+
+    return x * u.d;
+}
+*/
+
+double reference_rint( double x )
+{
+    if( reference_fabs(x) < HEX_DBL( +, 1, 0, +, 52 )  )
+    {
+        double magic = reference_copysignd( HEX_DBL( +, 1, 0, +, 52 ), x );
+        double rounded = (x + magic) - magic;
+        x = reference_copysignd( rounded, x );
+    }
+
+    return x;
+}
+
+double reference_acosh( double x )
+{ // not full precision. Sufficient precision to cover float
+    if( isnan(x) )
+        return x + x;
+
+    if( x < 1.0 )
+        return cl_make_nan();
+
+    return reference_log( x + reference_sqrt(x + 1) * reference_sqrt(x-1) );
+}
+
+double reference_asinh( double x )
+{
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x) || isinf(x) )
+        return x + x;
+
+    double absx = reference_fabs(x);
+    if( absx < HEX_DBL( +, 1, 0, -, 28 ) )
+        return x;
+
+    double sign = reference_copysignd(1.0, x);
+
+    if( absx > HEX_DBL( +, 1, 0, +, 28 ) )
+        return sign * (reference_log( absx ) + 0.693147180559945309417232121458176568);    // log(2)
+
+    if( absx > 2.0 )
+        return sign * reference_log( 2.0 * absx + 1.0 / (reference_sqrt( x * x + 1.0 ) + absx));
+
+    return sign * reference_log1p( absx + x*x / (1.0 + reference_sqrt(1.0 + x*x)));
+}
+
+
+double reference_atanh( double x )
+{
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x)  )
+        return x + x;
+
+    double signed_half = reference_copysignd( 0.5, x );
+    x = reference_fabs(x);
+    if( x > 1.0 )
+        return cl_make_nan();
+
+    if( x < 0.5 )
+        return signed_half * reference_log1p( 2.0 * ( x + x*x / (1-x) ) );
+
+    return signed_half * reference_log1p(2.0 * x / (1-x));
+}
+
+double reference_relaxed_exp2( double x )
+{
+  return reference_exp2(x);
+}
+
+double reference_exp2( double x )
+{ // Note: only suitable for verifying single precision. Doesn't have range of a full double exp2 implementation.
+    if( x == 0.0 )
+        return 1.0;
+
+    // separate x into fractional and integer parts
+    double i = reference_rint( x );        // round to nearest integer
+
+    if( i < -150 )
+        return 0.0;
+
+    if( i > 129 )
+        return INFINITY;
+
+    double f = x - i;            // -0.5 <= f <= 0.5
+
+    // find exp2(f)
+    // calculate as p(f) = (exp2(f)-1)/f
+    //              exp2(f) = f * p(f) + 1
+    // p(f) is a minimax polynomial with error within 0x1.c1fd80f0d1ab7p-50
+
+    double p = 0.693147180560184539289 +
+               (0.240226506955902863183 +
+               (0.055504108656833424373 +
+               (0.009618129212846484796 +
+               (0.001333355902958566035 +
+               (0.000154034191902497930 +
+               (0.000015252317761038105 +
+               (0.000001326283129417092 + 0.000000102593187638680 * f)*f)*f)*f)*f)*f)*f)*f;
+    f *= p;
+    f += 1.0;
+
+    // scale by 2 ** i
+    union{ cl_ulong u; double d; } u;
+    int exponent = (int) i + 1023;
+    u.u = (cl_ulong) exponent << 52;
+
+    return f * u.d;
+}
+
+
+double reference_expm1( double x )
+{ // Note: only suitable for verifying single precision. Doesn't have range of a full double expm1 implementation. It is only accurate to 47 bits or less.
+
+    // early out for small numbers and NaNs
+    if( ! (reference_fabs(x) > HEX_DBL( +, 1, 0, -, 24 )) )
+        return x;
+
+    // early out for large negative numbers
+    if( x < -130.0 )
+        return -1.0;
+
+    // early out for large positive numbers
+    if( x > 100.0 )
+        return INFINITY;
+
+    // separate x into fractional and integer parts
+    double i = reference_rint( x );        // round to nearest integer
+    double f = x - i;            // -0.5 <= f <= 0.5
+
+    // reduce f to the range -0.0625 .. f.. 0.0625
+    int index = (int) (f * 16.0) + 8;       // 0...16
+
+    static const double reduction[17] = { -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625,
+                                           0.0,
+                                          +0.0625, +0.125, +0.1875, +0.25, +0.3125, +0.375, +0.4375, +0.5  };
+
+
+    // exponentials[i] = expm1(reduction[i])
+    static const double exponentials[17] = {    HEX_DBL( -, 1, 92e9a0720d3ec, -, 2 ),    HEX_DBL( -, 1, 6adb1cd9205ee, -, 2 ),
+                                                HEX_DBL( -, 1, 40373d42ce2e3, -, 2 ),    HEX_DBL( -, 1, 12d35a41ba104, -, 2 ),
+                                                HEX_DBL( -, 1, c5041854df7d4, -, 3 ),    HEX_DBL( -, 1, 5e25fb4fde211, -, 3 ),
+                                                HEX_DBL( -, 1, e14aed893eef4, -, 4 ),    HEX_DBL( -, 1, f0540438fd5c3, -, 5 ),
+                                                HEX_DBL( +, 0, 0,             +, 0 ),
+                                                HEX_DBL( +, 1, 082b577d34ed8, -, 4 ),    HEX_DBL( +, 1, 10b022db7ae68, -, 3 ),
+                                                HEX_DBL( +, 1, a65c0b85ac1a9, -, 3 ),    HEX_DBL( +, 1, 22d78f0fa061a, -, 2 ),
+                                                HEX_DBL( +, 1, 77a45d8117fd5, -, 2 ),    HEX_DBL( +, 1, d1e944f6fbdaa, -, 2 ),
+                                                HEX_DBL( +, 1, 190048ef6002,  -, 1 ),    HEX_DBL( +, 1, 4c2531c3c0d38, -, 1 ),
+                                            };
+
+
+    f -= reduction[index];
+
+    // find expm1(f)
+    // calculate as p(f) = (exp(f)-1)/f
+    //              expm1(f) = f * p(f)
+    // p(f) is a minimax polynomial with error within 0x1.1d7693618d001p-48 over the range +- 0.0625
+    double p = 0.999999999999998001599 +
+               (0.499999999999839628284 +
+               (0.166666666672817459505 +
+               (0.041666666612283048687 +
+               (0.008333330214567431435 +
+               (0.001389005319303770070 + 0.000198833381525156667 * f)*f)*f)*f)*f)*f;
+    f *= p; // expm1( reduced f )
+
+    // expm1(f) = (exmp1( reduced_f) + 1.0) * ( exponentials[index] + 1 ) - 1
+    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index] + 1 -1
+    //          =  exmp1( reduced_f) * exponentials[index] + exmp1( reduced_f) + exponentials[index]
+    f +=  exponentials[index] + f * exponentials[index];
+
+    // scale by e ** i
+    int exponent = (int) i;
+    if( 0 == exponent )
+        return f;       // precise answer for x near 1
+
+    // table of e**(i-150)
+    static const double exp_table[128+150+1] =
+    {
+        HEX_DBL( +, 1, 82e16284f5ec5, -, 217 ),    HEX_DBL( +, 1, 06e9996332ba1, -, 215 ),
+        HEX_DBL( +, 1, 6555cb289e44b, -, 214 ),    HEX_DBL( +, 1, e5ab364643354, -, 213 ),
+        HEX_DBL( +, 1, 4a0bd18e64df7, -, 211 ),    HEX_DBL( +, 1, c094499cc578e, -, 210 ),
+        HEX_DBL( +, 1, 30d759323998c, -, 208 ),    HEX_DBL( +, 1, 9e5278ab1d4cf, -, 207 ),
+        HEX_DBL( +, 1, 198fa3f30be25, -, 205 ),    HEX_DBL( +, 1, 7eae636d6144e, -, 204 ),
+        HEX_DBL( +, 1, 040f1036f4863, -, 202 ),    HEX_DBL( +, 1, 6174e477a895f, -, 201 ),
+        HEX_DBL( +, 1, e065b82dd95a,  -, 200 ),    HEX_DBL( +, 1, 4676be491d129, -, 198 ),
+        HEX_DBL( +, 1, bbb5da5f7c823, -, 197 ),    HEX_DBL( +, 1, 2d884eef5fdcb, -, 195 ),
+        HEX_DBL( +, 1, 99d3397ab8371, -, 194 ),    HEX_DBL( +, 1, 1681497ed15b3, -, 192 ),
+        HEX_DBL( +, 1, 7a870f597fdbd, -, 191 ),    HEX_DBL( +, 1, 013c74edba307, -, 189 ),
+        HEX_DBL( +, 1, 5d9ec4ada7938, -, 188 ),    HEX_DBL( +, 1, db2edfd20fa7c, -, 187 ),
+        HEX_DBL( +, 1, 42eb9f39afb0b, -, 185 ),    HEX_DBL( +, 1, b6e4f282b43f4, -, 184 ),
+        HEX_DBL( +, 1, 2a42764857b19, -, 182 ),    HEX_DBL( +, 1, 9560792d19314, -, 181 ),
+        HEX_DBL( +, 1, 137b6ce8e052c, -, 179 ),    HEX_DBL( +, 1, 766b45dd84f18, -, 178 ),
+        HEX_DBL( +, 1, fce362fe6e7d,  -, 177 ),    HEX_DBL( +, 1, 59d34dd8a5473, -, 175 ),
+        HEX_DBL( +, 1, d606847fc727a, -, 174 ),    HEX_DBL( +, 1, 3f6a58b795de3, -, 172 ),
+        HEX_DBL( +, 1, b2216c6efdac1, -, 171 ),    HEX_DBL( +, 1, 2705b5b153fb8, -, 169 ),
+        HEX_DBL( +, 1, 90fa1509bd50d, -, 168 ),    HEX_DBL( +, 1, 107df698da211, -, 166 ),
+        HEX_DBL( +, 1, 725ae6e7b9d35, -, 165 ),    HEX_DBL( +, 1, f75d6040aeff6, -, 164 ),
+        HEX_DBL( +, 1, 56126259e093c, -, 162 ),    HEX_DBL( +, 1, d0ec7df4f7bd4, -, 161 ),
+        HEX_DBL( +, 1, 3bf2cf6722e46, -, 159 ),    HEX_DBL( +, 1, ad6b22f55db42, -, 158 ),
+        HEX_DBL( +, 1, 23d1f3e5834a,  -, 156 ),    HEX_DBL( +, 1, 8c9feab89b876, -, 155 ),
+        HEX_DBL( +, 1, 0d88cf37f00dd, -, 153 ),    HEX_DBL( +, 1, 6e55d2bf838a7, -, 152 ),
+        HEX_DBL( +, 1, f1e6b68529e33, -, 151 ),    HEX_DBL( +, 1, 525be4e4e601d, -, 149 ),
+        HEX_DBL( +, 1, cbe0a45f75eb1, -, 148 ),    HEX_DBL( +, 1, 3884e838aea68, -, 146 ),
+        HEX_DBL( +, 1, a8c1f14e2af5d, -, 145 ),    HEX_DBL( +, 1, 20a717e64a9bd, -, 143 ),
+        HEX_DBL( +, 1, 8851d84118908, -, 142 ),    HEX_DBL( +, 1, 0a9bdfb02d24,  -, 140 ),
+        HEX_DBL( +, 1, 6a5bea046b42e, -, 139 ),    HEX_DBL( +, 1, ec7f3b269efa8, -, 138 ),
+        HEX_DBL( +, 1, 4eafb87eab0f2, -, 136 ),    HEX_DBL( +, 1, c6e2d05bbc,    -, 135 ),
+        HEX_DBL( +, 1, 35208867c2683, -, 133 ),    HEX_DBL( +, 1, a425b317eeacd, -, 132 ),
+        HEX_DBL( +, 1, 1d8508fa8246a, -, 130 ),    HEX_DBL( +, 1, 840fbc08fdc8a, -, 129 ),
+        HEX_DBL( +, 1, 07b7112bc1ffe, -, 127 ),    HEX_DBL( +, 1, 666d0dad2961d, -, 126 ),
+        HEX_DBL( +, 1, e726c3f64d0fe, -, 125 ),    HEX_DBL( +, 1, 4b0dc07cabf98, -, 123 ),
+        HEX_DBL( +, 1, c1f2daf3b6a46, -, 122 ),    HEX_DBL( +, 1, 31c5957a47de2, -, 120 ),
+        HEX_DBL( +, 1, 9f96445648b9f, -, 119 ),    HEX_DBL( +, 1, 1a6baeadb4fd1, -, 117 ),
+        HEX_DBL( +, 1, 7fd974d372e45, -, 116 ),    HEX_DBL( +, 1, 04da4d1452919, -, 114 ),
+        HEX_DBL( +, 1, 62891f06b345,  -, 113 ),    HEX_DBL( +, 1, e1dd273aa8a4a, -, 112 ),
+        HEX_DBL( +, 1, 4775e0840bfdd, -, 110 ),    HEX_DBL( +, 1, bd109d9d94bda, -, 109 ),
+        HEX_DBL( +, 1, 2e73f53fba844, -, 107 ),    HEX_DBL( +, 1, 9b138170d6bfe, -, 106 ),
+        HEX_DBL( +, 1, 175af0cf60ec5, -, 104 ),    HEX_DBL( +, 1, 7baee1bffa80b, -, 103 ),
+        HEX_DBL( +, 1, 02057d1245ceb, -, 101 ),    HEX_DBL( +, 1, 5eafffb34ba31, -, 100 ),
+        HEX_DBL( +, 1, dca23bae16424, -, 99 ),    HEX_DBL( +, 1, 43e7fc88b8056, -, 97 ),
+        HEX_DBL( +, 1, b83bf23a9a9eb, -, 96 ),    HEX_DBL( +, 1, 2b2b8dd05b318, -, 94 ),
+        HEX_DBL( +, 1, 969d47321e4cc, -, 93 ),    HEX_DBL( +, 1, 1452b7723aed2, -, 91 ),
+        HEX_DBL( +, 1, 778fe2497184c, -, 90 ),    HEX_DBL( +, 1, fe7116182e9cc, -, 89 ),
+        HEX_DBL( +, 1, 5ae191a99585a, -, 87 ),    HEX_DBL( +, 1, d775d87da854d, -, 86 ),
+        HEX_DBL( +, 1, 4063f8cc8bb98, -, 84 ),    HEX_DBL( +, 1, b374b315f87c1, -, 83 ),
+        HEX_DBL( +, 1, 27ec458c65e3c, -, 81 ),    HEX_DBL( +, 1, 923372c67a074, -, 80 ),
+        HEX_DBL( +, 1, 1152eaeb73c08, -, 78 ),    HEX_DBL( +, 1, 737c5645114b5, -, 77 ),
+        HEX_DBL( +, 1, f8e6c24b5592e, -, 76 ),    HEX_DBL( +, 1, 571db733a9d61, -, 74 ),
+        HEX_DBL( +, 1, d257d547e083f, -, 73 ),    HEX_DBL( +, 1, 3ce9b9de78f85, -, 71 ),
+        HEX_DBL( +, 1, aebabae3a41b5, -, 70 ),    HEX_DBL( +, 1, 24b6031b49bda, -, 68 ),
+        HEX_DBL( +, 1, 8dd5e1bb09d7e, -, 67 ),    HEX_DBL( +, 1, 0e5b73d1ff53d, -, 65 ),
+        HEX_DBL( +, 1, 6f741de1748ec, -, 64 ),    HEX_DBL( +, 1, f36bd37f42f3e, -, 63 ),
+        HEX_DBL( +, 1, 536452ee2f75c, -, 61 ),    HEX_DBL( +, 1, cd480a1b7482,  -, 60 ),
+        HEX_DBL( +, 1, 39792499b1a24, -, 58 ),    HEX_DBL( +, 1, aa0de4bf35b38, -, 57 ),
+        HEX_DBL( +, 1, 2188ad6ae3303, -, 55 ),    HEX_DBL( +, 1, 898471fca6055, -, 54 ),
+        HEX_DBL( +, 1, 0b6c3afdde064, -, 52 ),    HEX_DBL( +, 1, 6b7719a59f0e,  -, 51 ),
+        HEX_DBL( +, 1, ee001eed62aa, -, 50 ),    HEX_DBL( +, 1, 4fb547c775da8, -, 48 ),
+        HEX_DBL( +, 1, c8464f7616468, -, 47 ),    HEX_DBL( +, 1, 36121e24d3bba, -, 45 ),
+        HEX_DBL( +, 1, a56e0c2ac7f75, -, 44 ),    HEX_DBL( +, 1, 1e642baeb84a,  -, 42 ),
+        HEX_DBL( +, 1, 853f01d6d53ba, -, 41 ),    HEX_DBL( +, 1, 0885298767e9a, -, 39 ),
+        HEX_DBL( +, 1, 67852a7007e42, -, 38 ),    HEX_DBL( +, 1, e8a37a45fc32e, -, 37 ),
+        HEX_DBL( +, 1, 4c1078fe9228a, -, 35 ),    HEX_DBL( +, 1, c3527e433fab1, -, 34 ),
+        HEX_DBL( +, 1, 32b48bf117da2, -, 32 ),    HEX_DBL( +, 1, a0db0d0ddb3ec, -, 31 ),
+        HEX_DBL( +, 1, 1b48655f37267, -, 29 ),    HEX_DBL( +, 1, 81056ff2c5772, -, 28 ),
+        HEX_DBL( +, 1, 05a628c699fa1, -, 26 ),    HEX_DBL( +, 1, 639e3175a689d, -, 25 ),
+        HEX_DBL( +, 1, e355bbaee85cb, -, 24 ),    HEX_DBL( +, 1, 4875ca227ec38, -, 22 ),
+        HEX_DBL( +, 1, be6c6fdb01612, -, 21 ),    HEX_DBL( +, 1, 2f6053b981d98, -, 19 ),
+        HEX_DBL( +, 1, 9c54c3b43bc8b, -, 18 ),    HEX_DBL( +, 1, 18354238f6764, -, 16 ),
+        HEX_DBL( +, 1, 7cd79b5647c9b, -, 15 ),    HEX_DBL( +, 1, 02cf22526545a, -, 13 ),
+        HEX_DBL( +, 1, 5fc21041027ad, -, 12 ),    HEX_DBL( +, 1, de16b9c24a98f, -, 11 ),
+        HEX_DBL( +, 1, 44e51f113d4d6, -, 9 ),    HEX_DBL( +, 1, b993fe00d5376, -, 8 ),
+        HEX_DBL( +, 1, 2c155b8213cf4, -, 6 ),    HEX_DBL( +, 1, 97db0ccceb0af, -, 5 ),
+        HEX_DBL( +, 1, 152aaa3bf81cc, -, 3 ),    HEX_DBL( +, 1, 78b56362cef38, -, 2 ),
+        HEX_DBL( +, 1, 0, +, 0 ),                HEX_DBL( +, 1, 5bf0a8b145769, +, 1 ),
+        HEX_DBL( +, 1, d8e64b8d4ddae, +, 2 ),    HEX_DBL( +, 1, 415e5bf6fb106, +, 4 ),
+        HEX_DBL( +, 1, b4c902e273a58, +, 5 ),    HEX_DBL( +, 1, 28d389970338f, +, 7 ),
+        HEX_DBL( +, 1, 936dc5690c08f, +, 8 ),    HEX_DBL( +, 1, 122885aaeddaa, +, 10 ),
+        HEX_DBL( +, 1, 749ea7d470c6e, +, 11 ),    HEX_DBL( +, 1, fa7157c470f82, +, 12 ),
+        HEX_DBL( +, 1, 5829dcf95056,  +, 14 ),    HEX_DBL( +, 1, d3c4488ee4f7f, +, 15 ),
+        HEX_DBL( +, 1, 3de1654d37c9a, +, 17 ),    HEX_DBL( +, 1, b00b5916ac955, +, 18 ),
+        HEX_DBL( +, 1, 259ac48bf05d7, +, 20 ),    HEX_DBL( +, 1, 8f0ccafad2a87, +, 21 ),
+        HEX_DBL( +, 1, 0f2ebd0a8002,  +, 23 ),    HEX_DBL( +, 1, 709348c0ea4f9, +, 24 ),
+        HEX_DBL( +, 1, f4f22091940bd, +, 25 ),    HEX_DBL( +, 1, 546d8f9ed26e1, +, 27 ),
+        HEX_DBL( +, 1, ceb088b68e804, +, 28 ),    HEX_DBL( +, 1, 3a6e1fd9eecfd, +, 30 ),
+        HEX_DBL( +, 1, ab5adb9c436,   +, 31 ),    HEX_DBL( +, 1, 226af33b1fdc1, +, 33 ),
+        HEX_DBL( +, 1, 8ab7fb5475fb7, +, 34 ),    HEX_DBL( +, 1, 0c3d3920962c9, +, 36 ),
+        HEX_DBL( +, 1, 6c932696a6b5d, +, 37 ),    HEX_DBL( +, 1, ef822f7f6731d, +, 38 ),
+        HEX_DBL( +, 1, 50bba3796379a, +, 40 ),    HEX_DBL( +, 1, c9aae4631c056, +, 41 ),
+        HEX_DBL( +, 1, 370470aec28ed, +, 43 ),    HEX_DBL( +, 1, a6b765d8cdf6d, +, 44 ),
+        HEX_DBL( +, 1, 1f43fcc4b662c, +, 46 ),    HEX_DBL( +, 1, 866f34a725782, +, 47 ),
+        HEX_DBL( +, 1, 0953e2f3a1ef7, +, 49 ),    HEX_DBL( +, 1, 689e221bc8d5b, +, 50 ),
+        HEX_DBL( +, 1, ea215a1d20d76, +, 51 ),    HEX_DBL( +, 1, 4d13fbb1a001a, +, 53 ),
+        HEX_DBL( +, 1, c4b334617cc67, +, 54 ),    HEX_DBL( +, 1, 33a43d282a519, +, 56 ),
+        HEX_DBL( +, 1, a220d397972eb, +, 57 ),    HEX_DBL( +, 1, 1c25c88df6862, +, 59 ),
+        HEX_DBL( +, 1, 8232558201159, +, 60 ),    HEX_DBL( +, 1, 0672a3c9eb871, +, 62 ),
+        HEX_DBL( +, 1, 64b41c6d37832, +, 63 ),    HEX_DBL( +, 1, e4cf766fe49be, +, 64 ),
+        HEX_DBL( +, 1, 49767bc0483e3, +, 66 ),    HEX_DBL( +, 1, bfc951eb8bb76, +, 67 ),
+        HEX_DBL( +, 1, 304d6aeca254b, +, 69 ),    HEX_DBL( +, 1, 9d97010884251, +, 70 ),
+        HEX_DBL( +, 1, 19103e4080b45, +, 72 ),    HEX_DBL( +, 1, 7e013cd114461, +, 73 ),
+        HEX_DBL( +, 1, 03996528e074c, +, 75 ),    HEX_DBL( +, 1, 60d4f6fdac731, +, 76 ),
+        HEX_DBL( +, 1, df8c5af17ba3b, +, 77 ),    HEX_DBL( +, 1, 45e3076d61699, +, 79 ),
+        HEX_DBL( +, 1, baed16a6e0da7, +, 80 ),    HEX_DBL( +, 1, 2cffdfebde1a1, +, 82 ),
+        HEX_DBL( +, 1, 9919cabefcb69, +, 83 ),    HEX_DBL( +, 1, 160345c9953e3, +, 85 ),
+        HEX_DBL( +, 1, 79dbc9dc53c66, +, 86 ),    HEX_DBL( +, 1, 00c810d464097, +, 88 ),
+        HEX_DBL( +, 1, 5d009394c5c27, +, 89 ),    HEX_DBL( +, 1, da57de8f107a8, +, 90 ),
+        HEX_DBL( +, 1, 425982cf597cd, +, 92 ),    HEX_DBL( +, 1, b61e5ca3a5e31, +, 93 ),
+        HEX_DBL( +, 1, 29bb825dfcf87, +, 95 ),    HEX_DBL( +, 1, 94a90db0d6fe2, +, 96 ),
+        HEX_DBL( +, 1, 12fec759586fd, +, 98 ),    HEX_DBL( +, 1, 75c1dc469e3af, +, 99 ),
+        HEX_DBL( +, 1, fbfd219c43b04, +, 100 ),    HEX_DBL( +, 1, 5936d44e1a146, +, 102 ),
+        HEX_DBL( +, 1, d531d8a7ee79c, +, 103 ),    HEX_DBL( +, 1, 3ed9d24a2d51b, +, 105 ),
+        HEX_DBL( +, 1, b15cfe5b6e17b, +, 106 ),    HEX_DBL( +, 1, 268038c2c0e,   +, 108 ),
+        HEX_DBL( +, 1, 9044a73545d48, +, 109 ),    HEX_DBL( +, 1, 1002ab6218b38, +, 111 ),
+        HEX_DBL( +, 1, 71b3540cbf921, +, 112 ),    HEX_DBL( +, 1, f6799ea9c414a, +, 113 ),
+        HEX_DBL( +, 1, 55779b984f3eb, +, 115 ),    HEX_DBL( +, 1, d01a210c44aa4, +, 116 ),
+        HEX_DBL( +, 1, 3b63da8e9121,  +, 118 ),    HEX_DBL( +, 1, aca8d6b0116b8, +, 119 ),
+        HEX_DBL( +, 1, 234de9e0c74e9, +, 121 ),    HEX_DBL( +, 1, 8bec7503ca477, +, 122 ),
+        HEX_DBL( +, 1, 0d0eda9796b9,  +, 124 ),    HEX_DBL( +, 1, 6db0118477245, +, 125 ),
+        HEX_DBL( +, 1, f1056dc7bf22d, +, 126 ),    HEX_DBL( +, 1, 51c2cc3433801, +, 128 ),
+        HEX_DBL( +, 1, cb108ffbec164, +, 129 ),    HEX_DBL( +, 1, 37f780991b584, +, 131 ),
+        HEX_DBL( +, 1, a801c0ea8ac4d, +, 132 ),    HEX_DBL( +, 1, 20247cc4c46c1, +, 134 ),
+        HEX_DBL( +, 1, 87a0553328015, +, 135 ),    HEX_DBL( +, 1, 0a233dee4f9bb, +, 137 ),
+        HEX_DBL( +, 1, 69b7f55b808ba, +, 138 ),    HEX_DBL( +, 1, eba064644060a, +, 139 ),
+        HEX_DBL( +, 1, 4e184933d9364, +, 141 ),    HEX_DBL( +, 1, c614fe2531841, +, 142 ),
+        HEX_DBL( +, 1, 3494a9b171bf5, +, 144 ),    HEX_DBL( +, 1, a36798b9d969b, +, 145 ),
+        HEX_DBL( +, 1, 1d03d8c0c04af, +, 147 ),    HEX_DBL( +, 1, 836026385c974, +, 148 ),
+        HEX_DBL( +, 1, 073fbe9ac901d, +, 150 ),    HEX_DBL( +, 1, 65cae0969f286, +, 151 ),
+        HEX_DBL( +, 1, e64a58639cae8, +, 152 ),    HEX_DBL( +, 1, 4a77f5f9b50f9, +, 154 ),
+        HEX_DBL( +, 1, c12744a3a28e3, +, 155 ),    HEX_DBL( +, 1, 313b3b6978e85, +, 157 ),
+        HEX_DBL( +, 1, 9eda3a31e587e, +, 158 ),    HEX_DBL( +, 1, 19ebe56b56453, +, 160 ),
+        HEX_DBL( +, 1, 7f2bc6e599b7e, +, 161 ),    HEX_DBL( +, 1, 04644610df2ff, +, 163 ),
+        HEX_DBL( +, 1, 61e8b490ac4e6, +, 164 ),    HEX_DBL( +, 1, e103201f299b3, +, 165 ),
+        HEX_DBL( +, 1, 46e1b637beaf5, +, 167 ),    HEX_DBL( +, 1, bc473cfede104, +, 168 ),
+        HEX_DBL( +, 1, 2deb1b9c85e2d, +, 170 ),    HEX_DBL( +, 1, 9a5981ca67d1,  +, 171 ),
+        HEX_DBL( +, 1, 16dc8a9ef670b, +, 173 ),    HEX_DBL( +, 1, 7b03166942309, +, 174 ),
+        HEX_DBL( +, 1, 0190be03150a7, +, 176 ),    HEX_DBL( +, 1, 5e1152f9a8119, +, 177 ),
+        HEX_DBL( +, 1, dbca9263f8487, +, 178 ),    HEX_DBL( +, 1, 43556dee93bee, +, 180 ),
+        HEX_DBL( +, 1, b774c12967dfa, +, 181 ),    HEX_DBL( +, 1, 2aa4306e922c2, +, 183 ),
+        HEX_DBL( +, 1, 95e54c5dd4217, +, 184 )    };
+
+    // scale by e**i --  (expm1(f) + 1)*e**i - 1  = expm1(f) * e**i + e**i - 1 = e**i
+    return exp_table[exponent+150] + (f * exp_table[exponent+150] - 1.0);
+}
+
+
+double reference_fmax( double x, double y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+double reference_fmin( double x, double y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+double reference_hypot( double x, double y )
+{
+    // Since the inputs are actually floats, we don't have to worry about range here
+    if( isinf(x) || isinf(y) )
+        return INFINITY;
+
+    return sqrt( x * x + y * y );
+}
+
+int    reference_ilogbl( long double x)
+{
+    extern int gDeviceILogb0, gDeviceILogbNaN;
+
+    // Since we are just using this to verify double precision, we can
+    // use the double precision ilogb here
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
+
+    int exponent = (int)(u.u >> 52) & 0x7ff;
+    if( exponent == 0x7ff )
+    {
+        if( u.u & 0x000fffffffffffffULL )
+            return gDeviceILogbNaN;
+
+        return CL_INT_MAX;
+    }
+
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f =  x * HEX_DBL( +, 1, 0, +, 64 );
+        exponent = (cl_uint)(u.u >> 52) & 0x7ff;
+        if( exponent == 0 )
+            return gDeviceILogb0;
+
+        exponent -= 1023 + 64;
+        return exponent;
+    }
+
+    return exponent - 1023;
+}
+
+//double reference_log2( double x )
+//{
+//    return log( x ) * 1.44269504088896340735992468100189214;
+//}
+
+
+double reference_relaxed_log2( double x )
+{
+  return reference_log2(x);
+}
+
+double reference_log2( double x )
+{
+    if( isnan(x) || x < 0.0 || x == -INFINITY)
+        return cl_make_nan();
+
+    if( x == 0.0f)
+        return -INFINITY;
+
+    if( x == INFINITY )
+        return INFINITY;
+
+    double hi, lo;
+    __log2_ep( &hi, &lo, x );
+    return hi;
+}
+
+double reference_log1p( double x )
+{   // This function is suitable only for verifying log1pf(). It produces several double precision ulps of error.
+
+    // Handle small and NaN
+    if( ! ( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 53 ) ) )
+        return x;
+
+    // deal with special values
+    if( x <= -1.0 )
+    {
+        if( x < -1.0 )
+            return cl_make_nan();
+        return -INFINITY;
+    }
+
+    // infinity
+    if( x == INFINITY )
+        return INFINITY;
+
+    // High precision result for when near 0, to avoid problems with the reference result falling in the wrong binade.
+    if( reference_fabs(x) < HEX_DBL( +, 1, 0, -, 28 ) )
+        return (1.0 - 0.5 * x) * x;
+
+    // Our polynomial is only good in the region +-2**-4.
+    // If we aren't in that range then we need to reduce to be in that range
+    double correctionLo = -0.0;           // correction down stream to compensate for the reduction, if any
+    double correctionHi = -0.0;           // correction down stream to compensate for the exponent, if any
+    if( reference_fabs(x) > HEX_DBL( +, 1, 0, -, 4 ) )
+    {
+        x += 1.0;   // double should cover any loss of precision here
+
+        // separate x into (1+f) * 2**i
+        union{ double d; cl_ulong u;} u;        u.d = x;
+        int i = (int) ((u.u >> 52) & 0x7ff) - 1023;
+        u.u &= 0x000fffffffffffffULL;
+        int index = (int) (u.u >> 48 );
+        u.u |= 0x3ff0000000000000ULL;
+        double f = u.d;
+
+        // further reduce f to be within 1/16 of 1.0
+        static const double scale_table[16] = {                  1.0, HEX_DBL( +, 1, d2d2d2d6e3f79, -, 1 ), HEX_DBL( +, 1, b8e38e42737a1, -, 1 ), HEX_DBL( +, 1, a1af28711adf3, -, 1 ),
+                                                HEX_DBL( +, 1, 8cccccd88dd65, -, 1 ), HEX_DBL( +, 1, 79e79e810ec8f, -, 1 ), HEX_DBL( +, 1, 68ba2e94df404, -, 1 ), HEX_DBL( +, 1, 590b216defb29, -, 1 ),
+                                                HEX_DBL( +, 1, 4aaaaab1500ed, -, 1 ), HEX_DBL( +, 1, 3d70a3e0d6f73, -, 1 ), HEX_DBL( +, 1, 313b13bb39f4f, -, 1 ), HEX_DBL( +, 1, 25ed09823f1cc, -, 1 ),
+                                                HEX_DBL( +, 1, 1b6db6e77457b, -, 1 ), HEX_DBL( +, 1, 11a7b96a3a34f, -, 1 ), HEX_DBL( +, 1, 0888888e46fea, -, 1 ), HEX_DBL( +, 1, 00000038e9862, -, 1 ) };
+
+        // correction_table[i] = -log( scale_table[i] )
+        // All entries have >= 64 bits of precision (rather than the expected 53)
+        static const double correction_table[16] = {                   -0.0, HEX_DBL( +, 1, 7a5c722c16058, -, 4 ), HEX_DBL( +, 1, 323db16c89ab1, -, 3 ), HEX_DBL( +, 1, a0f87d180629, -, 3 ),
+                                                       HEX_DBL( +, 1, 050279324e17c, -, 2 ), HEX_DBL( +, 1, 36f885bb270b0, -, 2 ), HEX_DBL( +, 1, 669b771b5cc69, -, 2 ), HEX_DBL( +, 1, 94203a6292a05, -, 2 ),
+                                                       HEX_DBL( +, 1, bfb4f9cb333a4, -, 2 ), HEX_DBL( +, 1, e982376ddb80e, -, 2 ), HEX_DBL( +, 1, 08d5d8769b2b2, -, 1 ), HEX_DBL( +, 1, 1c288bc00e0cf, -, 1 ),
+                                                       HEX_DBL( +, 1, 2ec7535b31ecb, -, 1 ), HEX_DBL( +, 1, 40bed0adc63fb, -, 1 ), HEX_DBL( +, 1, 521a5c0330615, -, 1 ), HEX_DBL( +, 1, 62e42f7dd092c, -, 1 ) };
+
+        f *= scale_table[index];
+        correctionLo = correction_table[index];
+
+        // log( 2**(i) ) = i * log(2)
+        correctionHi = (double)i * 0.693147180559945309417232121458176568;
+
+        x = f - 1.0;
+    }
+
+
+    // minmax polynomial for p(x) = (log(x+1) - x)/x valid over the range x = [-1/16, 1/16]
+    //          max error HEX_DBL( +, 1, 048f61f9a5eca, -, 52 )
+    double p = HEX_DBL( -, 1, cc33de97a9d7b,  -, 46 ) +
+               (HEX_DBL( -, 1, fffffffff3eb7, -, 2 ) +
+               (HEX_DBL( +, 1, 5555555633ef7, -, 2 ) +
+               (HEX_DBL( -, 1, 00000062c78,   -, 2 ) +
+               (HEX_DBL( +, 1, 9999958a3321,  -, 3 ) +
+               (HEX_DBL( -, 1, 55534ce65c347, -, 3 ) +
+               (HEX_DBL( +, 1, 24957208391a5, -, 3 ) +
+               (HEX_DBL( -, 1, 02287b9a5b4a1, -, 3 ) +
+                HEX_DBL( +, 1, c757d922180ed, -, 4 ) * x)*x)*x)*x)*x)*x)*x)*x;
+
+    // log(x+1) = x * p(x) + x
+    x += x * p;
+
+    return correctionHi + (correctionLo + x);
+}
+
+double reference_logb( double x )
+{
+    union { float f; cl_uint u;} u;
+    u.f = (float) x;
+
+    cl_int exponent = (u.u >> 23) & 0xff;
+    if( exponent == 0xff )
+        return x * x;
+
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.u = (u.u & 0x007fffff) | 0x3f800000;
+        u.f -= 1.0f;
+        exponent = (u.u >> 23) & 0xff;
+        if( exponent == 0 )
+            return -INFINITY;
+
+        return exponent - (127 + 126);
+    }
+
+    return exponent - 127;
+}
+
+double reference_relaxed_reciprocal(double x)
+{
+  return 1.0f / ((float) x);
+}
+
+double reference_reciprocal( double x )
+{
+  return 1.0 / x;
+}
+
+double reference_remainder( double x, double y )
+{
+    int i;
+    return reference_remquo( x, y, &i );
+}
+
+double reference_lgamma( double x)
+{
+/*
+ * ====================================================
+ * This function is from fdlibm. http://www.netlib.org
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ */
+
+static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000000 */
+                    half=  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
+                    one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
+                    pi  =  3.14159265358979311600e+00, /* 0x400921FB, 0x54442D18 */
+                    a0  =  7.72156649015328655494e-02, /* 0x3FB3C467, 0xE37DB0C8 */
+                    a1  =  3.22467033424113591611e-01, /* 0x3FD4A34C, 0xC4A60FAD */
+                    a2  =  6.73523010531292681824e-02, /* 0x3FB13E00, 0x1A5562A7 */
+                    a3  =  2.05808084325167332806e-02, /* 0x3F951322, 0xAC92547B */
+                    a4  =  7.38555086081402883957e-03, /* 0x3F7E404F, 0xB68FEFE8 */
+                    a5  =  2.89051383673415629091e-03, /* 0x3F67ADD8, 0xCCB7926B */
+                    a6  =  1.19270763183362067845e-03, /* 0x3F538A94, 0x116F3F5D */
+                    a7  =  5.10069792153511336608e-04, /* 0x3F40B6C6, 0x89B99C00 */
+                    a8  =  2.20862790713908385557e-04, /* 0x3F2CF2EC, 0xED10E54D */
+                    a9  =  1.08011567247583939954e-04, /* 0x3F1C5088, 0x987DFB07 */
+                    a10 =  2.52144565451257326939e-05, /* 0x3EFA7074, 0x428CFA52 */
+                    a11 =  4.48640949618915160150e-05, /* 0x3F07858E, 0x90A45837 */
+                    tc  =  1.46163214496836224576e+00, /* 0x3FF762D8, 0x6356BE3F */
+                    tf  = -1.21486290535849611461e-01, /* 0xBFBF19B9, 0xBCC38A42 */
+                    /* tt = -(tail of tf) */
+                    tt  = -3.63867699703950536541e-18, /* 0xBC50C7CA, 0xA48A971F */
+                    t0  =  4.83836122723810047042e-01, /* 0x3FDEF72B, 0xC8EE38A2 */
+                    t1  = -1.47587722994593911752e-01, /* 0xBFC2E427, 0x8DC6C509 */
+                    t2  =  6.46249402391333854778e-02, /* 0x3FB08B42, 0x94D5419B */
+                    t3  = -3.27885410759859649565e-02, /* 0xBFA0C9A8, 0xDF35B713 */
+                    t4  =  1.79706750811820387126e-02, /* 0x3F9266E7, 0x970AF9EC */
+                    t5  = -1.03142241298341437450e-02, /* 0xBF851F9F, 0xBA91EC6A */
+                    t6  =  6.10053870246291332635e-03, /* 0x3F78FCE0, 0xE370E344 */
+                    t7  = -3.68452016781138256760e-03, /* 0xBF6E2EFF, 0xB3E914D7 */
+                    t8  =  2.25964780900612472250e-03, /* 0x3F6282D3, 0x2E15C915 */
+                    t9  = -1.40346469989232843813e-03, /* 0xBF56FE8E, 0xBF2D1AF1 */
+                    t10 =  8.81081882437654011382e-04, /* 0x3F4CDF0C, 0xEF61A8E9 */
+                    t11 = -5.38595305356740546715e-04, /* 0xBF41A610, 0x9C73E0EC */
+                    t12 =  3.15632070903625950361e-04, /* 0x3F34AF6D, 0x6C0EBBF7 */
+                    t13 = -3.12754168375120860518e-04, /* 0xBF347F24, 0xECC38C38 */
+                    t14 =  3.35529192635519073543e-04, /* 0x3F35FD3E, 0xE8C2D3F4 */
+                    u0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
+                    u1  =  6.32827064025093366517e-01, /* 0x3FE4401E, 0x8B005DFF */
+                    u2  =  1.45492250137234768737e+00, /* 0x3FF7475C, 0xD119BD6F */
+                    u3  =  9.77717527963372745603e-01, /* 0x3FEF4976, 0x44EA8450 */
+                    u4  =  2.28963728064692451092e-01, /* 0x3FCD4EAE, 0xF6010924 */
+                    u5  =  1.33810918536787660377e-02, /* 0x3F8B678B, 0xBF2BAB09 */
+                    v1  =  2.45597793713041134822e+00, /* 0x4003A5D7, 0xC2BD619C */
+                    v2  =  2.12848976379893395361e+00, /* 0x40010725, 0xA42B18F5 */
+                    v3  =  7.69285150456672783825e-01, /* 0x3FE89DFB, 0xE45050AF */
+                    v4  =  1.04222645593369134254e-01, /* 0x3FBAAE55, 0xD6537C88 */
+                    v5  =  3.21709242282423911810e-03, /* 0x3F6A5ABB, 0x57D0CF61 */
+                    s0  = -7.72156649015328655494e-02, /* 0xBFB3C467, 0xE37DB0C8 */
+                    s1  =  2.14982415960608852501e-01, /* 0x3FCB848B, 0x36E20878 */
+                    s2  =  3.25778796408930981787e-01, /* 0x3FD4D98F, 0x4F139F59 */
+                    s3  =  1.46350472652464452805e-01, /* 0x3FC2BB9C, 0xBEE5F2F7 */
+                    s4  =  2.66422703033638609560e-02, /* 0x3F9B481C, 0x7E939961 */
+                    s5  =  1.84028451407337715652e-03, /* 0x3F5E26B6, 0x7368F239 */
+                    s6  =  3.19475326584100867617e-05, /* 0x3F00BFEC, 0xDD17E945 */
+                    r1  =  1.39200533467621045958e+00, /* 0x3FF645A7, 0x62C4AB74 */
+                    r2  =  7.21935547567138069525e-01, /* 0x3FE71A18, 0x93D3DCDC */
+                    r3  =  1.71933865632803078993e-01, /* 0x3FC601ED, 0xCCFBDF27 */
+                    r4  =  1.86459191715652901344e-02, /* 0x3F9317EA, 0x742ED475 */
+                    r5  =  7.77942496381893596434e-04, /* 0x3F497DDA, 0xCA41A95B */
+                    r6  =  7.32668430744625636189e-06, /* 0x3EDEBAF7, 0xA5B38140 */
+                    w0  =  4.18938533204672725052e-01, /* 0x3FDACFE3, 0x90C97D69 */
+                    w1  =  8.33333333333329678849e-02, /* 0x3FB55555, 0x5555553B */
+                    w2  = -2.77777777728775536470e-03, /* 0xBF66C16C, 0x16B02E5C */
+                    w3  =  7.93650558643019558500e-04, /* 0x3F4A019F, 0x98CF38B6 */
+                    w4  = -5.95187557450339963135e-04, /* 0xBF4380CB, 0x8C0FE741 */
+                    w5  =  8.36339918996282139126e-04, /* 0x3F4B67BA, 0x4CDAD5D1 */
+                    w6  = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
+
+    static const double zero=  0.00000000000000000000e+00;
+    double t,y,z,nadj,p,p1,p2,p3,q,r,w;
+    cl_int i,hx,lx,ix;
+
+    union{ double d; cl_ulong u;}u; u.d = x;
+
+    hx = (cl_int) (u.u >> 32);
+    lx = (cl_int) (u.u & 0xffffffffULL);
+
+    /* purge off +-inf, NaN, +-0, and negative arguments */
+//    *signgamp = 1;
+    ix = hx&0x7fffffff;
+    if(ix>=0x7ff00000) return x*x;
+    if((ix|lx)==0) return (double)(one)/(double)(zero);
+    if(ix<0x3b900000) {    /* |x|<2**-70, return -log(|x|) */
+        if(hx<0) {
+//            *signgamp = -1;
+            return -reference_log(-x);
+        } else return -reference_log(x);
+    }
+    if(hx<0) {
+        if(ix>=0x43300000)     /* |x|>=2**52, must be -integer */
+        return (double)(one)/(double)(zero);
+        t = reference_sinpi(x);
+        if(t==zero) (double)(one)/(double)(zero); /* -integer */
+        nadj = reference_log(pi/reference_fabs(t*x));
+//        if(t<zero) *signgamp = -1;
+        x = -x;
+    }
+
+    /* purge off 1 and 2 */
+    if((((ix-0x3ff00000)|lx)==0)||(((ix-0x40000000)|lx)==0)) r = 0;
+    /* for x < 2.0 */
+    else if(ix<0x40000000) {
+        if(ix<=0x3feccccc) {     /* lgamma(x) = lgamma(x+1)-log(x) */
+        r = -reference_log(x);
+        if(ix>=0x3FE76944) {y = 1.0-x; i= 0;}
+        else if(ix>=0x3FCDA661) {y= x-(tc-one); i=1;}
+          else {y = x; i=2;}
+        } else {
+          r = zero;
+            if(ix>=0x3FFBB4C3) {y=2.0-x;i=0;} /* [1.7316,2] */
+            else if(ix>=0x3FF3B4C4) {y=x-tc;i=1;} /* [1.23,1.73] */
+        else {y=x-one;i=2;}
+        }
+        switch(i) {
+          case 0:
+        z = y*y;
+        p1 = a0+z*(a2+z*(a4+z*(a6+z*(a8+z*a10))));
+        p2 = z*(a1+z*(a3+z*(a5+z*(a7+z*(a9+z*a11)))));
+        p  = y*p1+p2;
+        r  += (p-0.5*y); break;
+          case 1:
+        z = y*y;
+        w = z*y;
+        p1 = t0+w*(t3+w*(t6+w*(t9 +w*t12)));    /* parallel comp */
+        p2 = t1+w*(t4+w*(t7+w*(t10+w*t13)));
+        p3 = t2+w*(t5+w*(t8+w*(t11+w*t14)));
+        p  = z*p1-(tt-w*(p2+y*p3));
+        r += (tf + p); break;
+          case 2:
+        p1 = y*(u0+y*(u1+y*(u2+y*(u3+y*(u4+y*u5)))));
+        p2 = one+y*(v1+y*(v2+y*(v3+y*(v4+y*v5))));
+        r += (-0.5*y + p1/p2);
+        }
+    }
+    else if(ix<0x40200000) {             /* x < 8.0 */
+        i = (int)x;
+        t = zero;
+        y = x-(double)i;
+        p = y*(s0+y*(s1+y*(s2+y*(s3+y*(s4+y*(s5+y*s6))))));
+        q = one+y*(r1+y*(r2+y*(r3+y*(r4+y*(r5+y*r6)))));
+        r = half*y+p/q;
+        z = one;    /* lgamma(1+s) = log(s) + lgamma(s) */
+        switch(i) {
+        case 7: z *= (y+6.0);    /* FALLTHRU */
+        case 6: z *= (y+5.0);    /* FALLTHRU */
+        case 5: z *= (y+4.0);    /* FALLTHRU */
+        case 4: z *= (y+3.0);    /* FALLTHRU */
+        case 3: z *= (y+2.0);    /* FALLTHRU */
+            r += reference_log(z); break;
+        }
+    /* 8.0 <= x < 2**58 */
+    } else if (ix < 0x43900000) {
+        t = reference_log(x);
+        z = one/x;
+        y = z*z;
+        w = w0+z*(w1+y*(w2+y*(w3+y*(w4+y*(w5+y*w6)))));
+        r = (x-half)*(t-one)+w;
+    } else
+    /* 2**58 <= x <= inf */
+        r =  x*(reference_log(x)-one);
+    if(hx<0) r = nadj - r;
+    return r;
+
+}
+
+#endif // _MSC_VER
+
+double reference_assignment( double x ){ return x; }
+
+int reference_not( double x )
+{
+  int r = !x;
+  return r;
+}
+
+#pragma mark -
+#pragma mark Double testing
+
+#ifndef M_PIL
+    #define M_PIL        3.14159265358979323846264338327950288419716939937510582097494459230781640628620899L
+#endif
+
+static long double reduce1l( long double x );
+
+#ifdef __PPC__
+// Since long double on PPC is really extended precision double arithmetic
+// consisting of two doubles (a high and low). This form of long double has
+// the potential of representing a number with more than LDBL_MANT_DIG digits
+// such that reduction algorithm used for other architectures will not work.
+// Instead and alternate reduction method is used.
+
+static long double reduce1l( long double x )
+{
+  union {
+    long double ld;
+    double d[2];
+  } u;
+
+  // Reduce the high and low halfs separately.
+  u.ld = x;
+  return ((long double)reduce1(u.d[0]) + reduce1(u.d[1]));
+}
+
+#else // !__PPC__
+
+static long double reduce1l( long double x )
+{
+    static long double unit_exp = 0;
+    if( 0.0L == unit_exp )
+        unit_exp = scalbnl( 1.0L, LDBL_MANT_DIG);
+
+    if( reference_fabsl(x) >= unit_exp )
+    {
+        if( reference_fabsl(x) == INFINITY )
+            return cl_make_nan();
+
+        return 0.0L; //we patch up the sign for sinPi and cosPi later, since they need different signs
+    }
+
+    // Find the nearest multiple of 2
+    const long double r = reference_copysignl( unit_exp, x );
+    long double z = x + r;
+    z -= r;
+
+    // subtract it from x. Value is now in the range -1 <= x <= 1
+    return x - z;
+}
+#endif // __PPC__
+
+long double reference_acospil( long double x){  return reference_acosl( x ) / M_PIL;    }
+long double reference_asinpil( long double x){  return reference_asinl( x ) / M_PIL;    }
+long double reference_atanpil( long double x){  return reference_atanl( x ) / M_PIL;    }
+long double reference_atan2pil( long double y, long double x){ return reference_atan2l( y, x) / M_PIL; }
+long double reference_cospil( long double x)
+{
+    if( reference_fabsl(x) >= HEX_LDBL( +, 1, 0, +, 54 ) )
+    {
+        if( reference_fabsl(x) == INFINITY )
+            return cl_make_nan();
+
+        //Note this probably fails for odd values between 0x1.0p52 and 0x1.0p53.
+        //However, when starting with single precision inputs, there will be no odd values.
+
+        return 1.0L;
+    }
+
+    x = reduce1l(x);
+
+#if DBL_MANT_DIG >= LDBL_MANT_DIG
+
+    // phase adjust
+    double xhi = 0.0;
+    double xlo = 0.0;
+    xhi = (double) x + 0.5;
+
+    if(reference_fabsl(x) > 0.5L)
+    {
+        xlo = xhi - x;
+        xlo = 0.5 - xlo;
+    }
+    else
+    {
+        xlo = xhi - 0.5;
+        xlo = x - xlo;
+    }
+
+    // reduce to [-0.5, 0.5]
+    if( xhi < -0.5 )
+    {
+        xhi = -1.0 - xhi;
+        xlo = -xlo;
+    }
+    else if ( xhi > 0.5 )
+    {
+        xhi = 1.0 - xhi;
+        xlo = -xlo;
+    }
+
+    // cosPi zeros are all +0
+    if( xhi == 0.0 && xlo == 0.0 )
+        return 0.0;
+
+    xhi *= M_PI;
+    xlo *= M_PI;
+
+    xhi += xlo;
+
+    return reference_sinl( xhi );
+
+#else
+    // phase adjust
+    x += 0.5L;
+
+    // reduce to [-0.5, 0.5]
+    if( x < -0.5L )
+        x = -1.0L - x;
+    else if ( x > 0.5L )
+        x = 1.0L - x;
+
+    // cosPi zeros are all +0
+    if( x == 0.0L )
+        return 0.0L;
+
+    return reference_sinl( x * M_PIL );
+#endif
+}
+
+long double reference_dividel( long double x, long double y)
+{
+    double dx = x;
+    double dy = y;
+    return dx/dy;
+}
+
+typedef struct{ double hi, lo; } double_double;
+
+// Split doubles_double into a series of consecutive 26-bit precise doubles and a remainder.
+// Note for later -- for multiplication, it might be better to split each double into a power of two and two 26 bit portions
+//                      multiplication of a double double by a known power of two is cheap. The current approach causes some inexact arithmetic in mul_dd.
+static inline void split_dd( double_double x, double_double *hi, double_double *lo )
+{
+    union{ double d; cl_ulong u;}u;
+    u.d = x.hi;
+    u.u &= 0xFFFFFFFFF8000000ULL;
+    hi->hi = u.d;
+    x.hi -= u.d;
+
+    u.d = x.hi;
+    u.u &= 0xFFFFFFFFF8000000ULL;
+    hi->lo = u.d;
+    x.hi -= u.d;
+
+    double temp = x.hi;
+    x.hi += x.lo;
+    x.lo -= x.hi - temp;
+    u.d = x.hi;
+    u.u &= 0xFFFFFFFFF8000000ULL;
+    lo->hi = u.d;
+    x.hi -= u.d;
+
+    lo->lo = x.hi + x.lo;
+}
+
+static inline double_double accum_d( double_double a, double b )
+{
+    double temp;
+    if( fabs(b) > fabs(a.hi) )
+    {
+        temp = a.hi;
+        a.hi += b;
+        a.lo += temp - (a.hi - b);
+    }
+    else
+    {
+        temp = a.hi;
+        a.hi += b;
+        a.lo += b - (a.hi - temp);
+    }
+
+    if( isnan( a.lo ) )
+        a.lo = 0.0;
+
+    return a;
+}
+
+static inline double_double add_dd( double_double a, double_double b )
+{
+    double_double r = {-0.0 -0.0 };
+
+    if( isinf(a.hi) || isinf( b.hi )  ||
+       isnan(a.hi) || isnan( b.hi )  ||
+       0.0 == a.hi || 0.0 == b.hi )
+    {
+        r.hi = a.hi + b.hi;
+        r.lo = a.lo + b.lo;
+        if( isnan( r.lo ) )
+            r.lo = 0.0;
+        return r;
+    }
+
+    //merge sort terms by magnitude -- here we assume that |a.hi| > |a.lo|, |b.hi| > |b.lo|, so we don't have to do the first merge pass
+    double terms[4] = { a.hi, b.hi, a.lo, b.lo };
+    double temp;
+
+    //Sort hi terms
+    if( fabs(terms[0]) < fabs(terms[1]) )
+    {
+        temp = terms[0];
+        terms[0] = terms[1];
+        terms[1] = temp;
+    }
+    //sort lo terms
+    if( fabs(terms[2]) < fabs(terms[3]) )
+    {
+        temp = terms[2];
+        terms[2] = terms[3];
+        terms[3] = temp;
+    }
+    // Fix case where small high term is less than large low term
+    if( fabs(terms[1]) < fabs(terms[2]) )
+    {
+        temp = terms[1];
+        terms[1] = terms[2];
+        terms[2] = temp;
+    }
+
+    // accumulate the results
+    r.hi = terms[2] + terms[3];
+    r.lo = terms[3] - (r.hi - terms[2]);
+
+    temp = r.hi;
+    r.hi += terms[1];
+    r.lo += temp - (r.hi - terms[1]);
+
+    temp = r.hi;
+    r.hi += terms[0];
+    r.lo += temp - (r.hi - terms[0]);
+
+    // canonicalize the result
+    temp = r.hi;
+    r.hi += r.lo;
+    r.lo = r.lo - (r.hi - temp);
+    if( isnan( r.lo ) )
+        r.lo = 0.0;
+
+    return r;
+}
+
+static inline double_double mul_dd( double_double a, double_double b )
+{
+    double_double result = {-0.0,-0.0};
+
+    // Inf, nan and 0
+    if( isnan( a.hi ) || isnan( b.hi ) ||
+       isinf( a.hi ) || isinf( b.hi ) ||
+       0.0 == a.hi || 0.0 == b.hi )
+    {
+        result.hi = a.hi * b.hi;
+        return result;
+    }
+
+    double_double ah, al, bh, bl;
+    split_dd( a, &ah, &al );
+    split_dd( b, &bh, &bl );
+
+    double p0 = ah.hi * bh.hi;        // exact    (52 bits in product) 0
+    double p1 = ah.hi * bh.lo;        // exact    (52 bits in product) 26
+    double p2 = ah.lo * bh.hi;        // exact    (52 bits in product) 26
+    double p3 = ah.lo * bh.lo;        // exact    (52 bits in product) 52
+    double p4 = al.hi * bh.hi;        // exact    (52 bits in product) 52
+    double p5 = al.hi * bh.lo;        // exact    (52 bits in product) 78
+    double p6 = al.lo * bh.hi;        // inexact  (54 bits in product) 78
+    double p7 = al.lo * bh.lo;        // inexact  (54 bits in product) 104
+    double p8 = ah.hi * bl.hi;        // exact    (52 bits in product) 52
+    double p9 = ah.hi * bl.lo;        // inexact  (54 bits in product) 78
+    double pA = ah.lo * bl.hi;        // exact    (52 bits in product) 78
+    double pB = ah.lo * bl.lo;        // inexact  (54 bits in product) 104
+    double pC = al.hi * bl.hi;        // exact    (52 bits in product) 104
+    // the last 3 terms are two low to appear in the result
+
+
+    // accumulate from bottom up
+#if 0
+    // works but slow
+    result.hi = pC;
+    result = accum_d( result, pB );
+    result = accum_d( result, p7 );
+    result = accum_d( result, pA );
+    result = accum_d( result, p9 );
+    result = accum_d( result, p6 );
+    result = accum_d( result, p5 );
+    result = accum_d( result, p8 );
+    result = accum_d( result, p4 );
+    result = accum_d( result, p3 );
+    result = accum_d( result, p2 );
+    result = accum_d( result, p1 );
+    result = accum_d( result, p0 );
+
+    // canonicalize the result
+    double temp = result.hi;
+    result.hi += result.lo;
+    result.lo -= (result.hi - temp);
+    if( isnan( result.lo ) )
+        result.lo = 0.0;
+
+    return result;
+#else
+    // take advantage of the known relative magnitudes of the partial products to avoid some sorting
+    // Combine 2**-78 and 2**-104 terms. Here we are a bit sloppy about canonicalizing the double_doubles
+    double_double t0 = { pA, pC };
+    double_double t1 = { p9, pB };
+    double_double t2 = { p6, p7 };
+    double temp0, temp1, temp2;
+
+    t0 = accum_d( t0, p5 );  // there is an extra 2**-78 term to deal with
+
+    // Add in 2**-52 terms. Here we are a bit sloppy about canonicalizing the double_doubles
+    temp0 = t0.hi;      temp1 = t1.hi;      temp2 = t2.hi;
+    t0.hi += p3;        t1.hi += p4;        t2.hi += p8;
+    temp0 -= t0.hi-p3;  temp1 -= t1.hi-p4;  temp2 -= t2.hi - p8;
+    t0.lo += temp0;     t1.lo += temp1;     t2.lo += temp2;
+
+    // Add in 2**-26 terms. Here we are a bit sloppy about canonicalizing the double_doubles
+    temp1 = t1.hi;      temp2 = t2.hi;
+    t1.hi += p1;        t2.hi += p2;
+    temp1 -= t1.hi-p1;  temp2 -= t2.hi - p2;
+    t1.lo += temp1;     t2.lo += temp2;
+
+    // Combine accumulators to get the low bits of result
+    t1 = add_dd( t1, add_dd( t2, t0 ) );
+
+    // Add in MSB's, and round to precision
+    return accum_d( t1, p0 );  // canonicalizes
+#endif
+
+}
+
+
+long double reference_exp10l( long double z )
+{
+    const double_double log2_10 = { HEX_DBL( +, 1, a934f0979a371, +, 1 ), HEX_DBL( +, 1, 7f2495fb7fa6d, -, 53 ) };
+    double_double x;
+    int j;
+
+    // Handle NaNs
+    if( isnan(z) )
+        return z;
+
+    // init x
+    x.hi = z;
+    x.lo = z - x.hi;
+
+
+    // 10**x = exp2( x * log2(10) )
+
+    x = mul_dd( x, log2_10);    // x * log2(10)
+
+    //Deal with overflow and underflow for exp2(x) stage next
+    if( x.hi >= 1025 )
+        return INFINITY;
+
+    if( x.hi < -1075-24 )
+        return +0.0;
+
+    // find nearest integer to x
+    int i = (int) rint(x.hi);
+
+    // x now holds fractional part.  The result would be then 2**i  * exp2( x )
+    x.hi -= i;
+
+    // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5].
+    // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different
+    // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits
+    // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that
+    //
+    //  2**(a+b) = 2**a * 2**b
+    //
+    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range
+    // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x.
+    const double_double corrections[17] =
+    {
+        { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) },
+        { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) },
+        { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) },
+        { HEX_DBL( +, 1, 9c49182a3f09,  -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) },
+        { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) },
+        { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) },
+        { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) },
+        { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) },
+        { HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ) },
+        { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) },
+        { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) },
+        { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) },
+        { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) },
+        { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) },
+        { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) },
+        { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) },
+        { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) }
+
+    };
+    int index = (int) rint( x.hi * 16.0 );
+    x.hi -= (double) index * 0.0625;
+
+    // canonicalize x
+    double temp = x.hi;
+    x.hi += x.lo;
+    x.lo -= x.hi - temp;
+
+    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max Error: 2 * 0x1.e112p-87
+    const double_double c[] = {
+        {HEX_DBL( +, 1, 62e42fefa39ef, -,  1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )},
+        {HEX_DBL( +, 1, ebfbdff82c58f, -,  3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )},
+        {HEX_DBL( +, 1, c6b08d704a0c,  -,  5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )},
+        {HEX_DBL( +, 1, 3b2ab6fba4e7a, -,  7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )},
+        {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )},
+        {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )},
+        {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )},
+        {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )},
+        {HEX_DBL( +, 1, b52502b5e953,  -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )}
+    };
+    size_t count = sizeof( c ) / sizeof( c[0] );
+
+    // Do polynomial
+    double_double r = c[count-1];
+    for( j = (int) count-2; j >= 0; j-- )
+        r = add_dd( c[j], mul_dd( r, x ) );
+
+    // unwind approximation
+    r = mul_dd( r, x );     // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
+
+    // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above
+    //  exp2(x) = (r + 1) * correction = r * correction + correction
+    r = mul_dd( r, corrections[index+8] );
+    r = add_dd( r, corrections[index+8] );
+
+// Format result for output:
+
+    // Get mantissa
+    long double m = ((long double) r.hi + (long double) r.lo );
+
+    // Handle a pesky overflow cases when long double = double
+    if( i > 512 )
+    {
+        m *=  HEX_DBL( +, 1, 0, +, 512 );
+        i -= 512;
+    }
+    else if( i < -512 )
+    {
+        m *= HEX_DBL( +, 1, 0, -, 512 );
+        i += 512;
+    }
+
+    return m * ldexpl( 1.0L, i );
+}
+
+
+static double fallback_frexp( double x, int *iptr )
+{
+    cl_ulong u, v;
+    double fu, fv;
+
+    memcpy( &u, &x, sizeof(u));
+
+    cl_ulong exponent = u &  0x7ff0000000000000ULL;
+    cl_ulong mantissa = u & ~0x7ff0000000000000ULL;
+
+    // add 1 to the exponent
+    exponent += 0x0010000000000000ULL;
+
+    if( (cl_long) exponent < (cl_long) 0x0020000000000000LL )
+    { // subnormal, NaN, Inf
+        mantissa |= 0x3fe0000000000000ULL;
+
+        v = mantissa & 0xfff0000000000000ULL;
+        u = mantissa;
+        memcpy( &fv, &v, sizeof(v));
+        memcpy( &fu, &u, sizeof(u));
+
+        fu -= fv;
+
+        memcpy( &v, &fv, sizeof(v));
+        memcpy( &u, &fu, sizeof(u));
+
+        exponent = u &  0x7ff0000000000000ULL;
+        mantissa = u & ~0x7ff0000000000000ULL;
+
+        *iptr = (exponent >> 52) + (-1022 + 1 -1022);
+        u = mantissa | 0x3fe0000000000000ULL;
+        memcpy( &fu, &u, sizeof(u));
+        return fu;
+    }
+
+    *iptr = (exponent >> 52) - 1023;
+    u = mantissa | 0x3fe0000000000000ULL;
+    memcpy( &fu, &u, sizeof(u));
+    return fu;
+}
+
+// Assumes zeros, infinities and NaNs handed elsewhere
+static inline int extract( double x, cl_ulong *mant );
+static inline int extract( double x, cl_ulong *mant )
+{
+    static double (*frexpp)(double, int*) = NULL;
+    int e;
+
+    // verify that frexp works properly
+    if( NULL == frexpp )
+    {
+        if( 0.5 == frexp( HEX_DBL( +, 1, 0, -, 1030 ), &e ) && e == -1029 )
+            frexpp = frexp;
+        else
+            frexpp = fallback_frexp;
+    }
+
+    *mant = (cl_ulong) (HEX_DBL( +, 1, 0, +, 64 ) * fabs( frexpp( x, &e )));
+    return e - 1;
+}
+
+// Return 128-bit product of a*b  as (hi << 64) + lo
+static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo );
+static inline void mul128( cl_ulong a, cl_ulong b, cl_ulong *hi, cl_ulong *lo )
+{
+    cl_ulong alo = a & 0xffffffffULL;
+    cl_ulong ahi = a >> 32;
+    cl_ulong blo = b & 0xffffffffULL;
+    cl_ulong bhi = b >> 32;
+    cl_ulong aloblo = alo * blo;
+    cl_ulong alobhi = alo * bhi;
+    cl_ulong ahiblo = ahi * blo;
+    cl_ulong ahibhi = ahi * bhi;
+
+    alobhi += (aloblo >> 32) + (ahiblo & 0xffffffffULL);  // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
+    *hi = ahibhi + (alobhi >> 32) + (ahiblo >> 32);       // cannot overflow: (2^32-1)^2 + 2 * (2^32-1)   = (2^64 - 2^33 + 1) + (2^33 - 2) = 2^64 - 1
+    *lo = (aloblo & 0xffffffffULL) | (alobhi << 32);
+}
+
+// Move the most significant non-zero bit to the MSB
+// Note: not general. Only works if the most significant non-zero bit is at MSB-1
+static inline void renormalize( cl_ulong *hi, cl_ulong *lo, int *exponent )
+{
+    if( 0 == (0x8000000000000000ULL & *hi ))
+    {
+        *hi <<= 1;
+        *hi |= *lo >> 63;
+        *lo <<= 1;
+        *exponent -= 1;
+    }
+}
+
+static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent );
+static double round_to_nearest_even_double( cl_ulong hi, cl_ulong lo, int exponent )
+{
+    union{ cl_ulong u; cl_double d;} u;
+
+    // edges
+    if( exponent > 1023 )        return INFINITY;
+    if( exponent == -1075 && (hi | (lo!=0)) > 0x8000000000000000ULL )
+        return HEX_DBL( +, 1, 0, -, 1074 );
+    if( exponent <= -1075 )       return 0.0;
+
+    //Figure out which bits go where
+    int shift = 11;
+    if( exponent < -1022 )
+    {
+        shift -= 1022 + exponent;               // subnormal: shift is not 52
+        exponent = -1023;                       //              set exponent to 0
+    }
+    else
+        hi &= 0x7fffffffffffffffULL;           // normal: leading bit is implicit. Remove it.
+
+    // Assemble the double (round toward zero)
+    u.u = (hi >> shift) | ((cl_ulong) (exponent + 1023) << 52);
+
+    // put a representation of the residual bits into hi
+    hi <<= (64-shift);
+    hi |= lo >> shift;
+    lo <<= (64-shift );
+    hi |= lo != 0;
+
+    //round to nearest, ties to even
+    if( hi < 0x8000000000000000ULL )    return u.d;
+    if( hi == 0x8000000000000000ULL )   u.u += u.u & 1ULL;
+    else                                u.u++;
+
+    return u.d;
+}
+
+// Shift right.  Bits lost on the right will be OR'd together and OR'd with the LSB
+static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift );
+static inline void shift_right_sticky_128( cl_ulong *hi, cl_ulong *lo, int shift )
+{
+    cl_ulong sticky = 0;
+    cl_ulong h = *hi;
+    cl_ulong l = *lo;
+
+    if( shift >= 64 )
+    {
+        shift -= 64;
+        sticky = 0 != lo;
+        l = h;
+        h = 0;
+        if( shift >= 64 )
+        {
+            sticky |= (0 != l);
+            l = 0;
+        }
+        else
+        {
+            sticky |= (0 != (l << (64-shift)));
+            l >>= shift;
+        }
+    }
+    else
+    {
+        sticky |= (0 != (l << (64-shift)));
+        l >>= shift;
+        l |=  h << (64-shift);
+        h >>= shift;
+    }
+
+    *lo = l | sticky;
+    *hi = h;
+}
+
+// 128-bit add  of ((*hi << 64) + *lo) + ((chi << 64) + clo)
+// If the 129 bit result doesn't fit, bits lost off the right end will be OR'd with the LSB
+static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exp );
+static inline void add128( cl_ulong *hi, cl_ulong *lo, cl_ulong chi, cl_ulong clo, int *exponent )
+{
+    cl_ulong carry, carry2;
+    // extended precision add
+    clo = add_carry(*lo, clo, &carry);
+    chi = add_carry(*hi, chi, &carry2);
+    chi = add_carry(chi, carry, &carry);
+
+    //If we overflowed the 128 bit result
+    if( carry || carry2 )
+    {
+        carry = clo & 1;                        // set aside low bit
+        clo >>= 1;                              // right shift low 1
+        clo |= carry;                           // or back in the low bit, so we don't come to believe this is an exact half way case for rounding
+        clo |= chi << 63;                       // move lowest high bit into highest bit of lo
+        chi >>= 1;                              // right shift hi
+        chi |= 0x8000000000000000ULL;           // move the carry bit into hi.
+        *exponent = *exponent + 1;
+    }
+
+    *hi = chi;
+    *lo = clo;
+}
+
+// 128-bit subtract  of ((chi << 64) + clo)  - ((*hi << 64) + *lo)
+static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC );
+static inline void sub128( cl_ulong *chi, cl_ulong *clo, cl_ulong hi, cl_ulong lo, cl_ulong *signC, int *expC )
+{
+    cl_ulong rHi = *chi;
+    cl_ulong rLo = *clo;
+    cl_ulong carry, carry2;
+
+    //extended precision subtract
+    rLo = sub_carry(rLo, lo, &carry);
+    rHi = sub_carry(rHi, hi, &carry2);
+    rHi = sub_carry(rHi, carry, &carry);
+
+    // Check for sign flip
+    if( carry || carry2 )
+    {
+        *signC ^= 0x8000000000000000ULL;
+
+        //negate rLo, rHi:   -x = (x ^ -1) + 1
+        rLo ^= -1ULL;
+        rHi ^= -1ULL;
+        rLo++;
+        rHi += 0 == rLo;
+    }
+
+    // normalize -- move the most significant non-zero bit to the MSB, and adjust exponent accordingly
+    if( rHi == 0 )
+    {
+        rHi = rLo;
+        *expC = *expC - 64;
+        rLo = 0;
+    }
+
+    if( rHi )
+    {
+        int shift = 32;
+        cl_ulong test = 1ULL << 32;
+        while( 0 == (rHi & 0x8000000000000000ULL))
+        {
+            if( rHi < test )
+            {
+                rHi <<= shift;
+                rHi |= rLo >> (64-shift);
+                rLo <<= shift;
+                *expC = *expC - shift;
+            }
+            shift >>= 1;
+            test <<= shift;
+        }
+    }
+    else
+    {
+        //zero
+        *expC = INT_MIN;
+        *signC = 0;
+    }
+
+
+    *chi = rHi;
+    *clo = rLo;
+}
+
+long double reference_fmal( long double x, long double y, long double z)
+{
+    static const cl_ulong kMSB = 0x8000000000000000ULL;
+
+    // cast values back to double. This is an exact function, so
+    double a = x;
+    double b = y;
+    double c = z;
+
+    // Make bits accessible
+    union{ cl_ulong u; cl_double d; } ua; ua.d = a;
+    union{ cl_ulong u; cl_double d; } ub; ub.d = b;
+    union{ cl_ulong u; cl_double d; } uc; uc.d = c;
+
+    // deal with Nans, infinities and zeros
+    if( isnan( a ) || isnan( b ) || isnan(c)    ||
+        isinf( a ) || isinf( b ) || isinf(c)    ||
+        0 == ( ua.u & ~kMSB)                ||  // a == 0, defeat host FTZ behavior
+        0 == ( ub.u & ~kMSB)                ||  // b == 0, defeat host FTZ behavior
+        0 == ( uc.u & ~kMSB)                )   // c == 0, defeat host FTZ behavior
+    {
+        if( isinf( c ) && !isinf(a) && !isinf(b) )
+            return (c + a) + b;
+
+        a = (double) reference_multiplyl( a, b );   // some risk that the compiler will insert a non-compliant fma here on some platforms.
+        return reference_addl(a, c);                // We use STDC FP_CONTRACT OFF above to attempt to defeat that.
+    }
+
+    // extract exponent and mantissa
+    //   exponent is a standard unbiased signed integer
+    //   mantissa is a cl_uint, with leading non-zero bit positioned at the MSB
+    cl_ulong mantA, mantB, mantC;
+    int expA = extract( a, &mantA );
+    int expB = extract( b, &mantB );
+    int expC = extract( c, &mantC );
+    cl_ulong signC = uc.u & kMSB;               // We'll need the sign bit of C later to decide if we are adding or subtracting
+
+// exact product of A and B
+    int exponent = expA + expB;
+    cl_ulong sign = (ua.u ^ ub.u) & kMSB;
+    cl_ulong hi, lo;
+    mul128( mantA, mantB, &hi, &lo );
+
+    // renormalize
+    if( 0 == (kMSB & hi) )
+    {
+        hi <<= 1;
+        hi |= lo >> 63;
+        lo <<= 1;
+    }
+    else
+        exponent++;         // 2**63 * 2**63 gives 2**126. If the MSB was set, then our exponent increased.
+
+//infinite precision add
+    cl_ulong chi = mantC;
+    cl_ulong clo = 0;
+
+    if( exponent >= expC )
+    {
+        // Normalize C relative to the product
+        if( exponent > expC )
+            shift_right_sticky_128( &chi, &clo, exponent - expC );
+
+        // Add
+        if( sign ^ signC )
+            sub128( &hi, &lo, chi, clo, &sign, &exponent );
+        else
+            add128( &hi, &lo, chi, clo, &exponent );
+    }
+    else
+    {
+        // Shift the product relative to C so that their exponents match
+        shift_right_sticky_128( &hi, &lo, expC - exponent );
+
+        // add
+        if( sign ^ signC )
+            sub128( &chi, &clo, hi, lo, &signC, &expC );
+        else
+            add128( &chi, &clo, hi, lo, &expC );
+
+        hi = chi;
+        lo = clo;
+        exponent = expC;
+        sign = signC;
+    }
+
+    // round
+    ua.d = round_to_nearest_even_double(hi, lo, exponent);
+
+    // Set the sign
+    ua.u |= sign;
+
+    return ua.d;
+}
+
+
+
+
+long double reference_madl( long double a, long double b, long double c) { return a * b + c; }
+
+//long double my_nextafterl(long double x, long double y){  return (long double) nextafter( (double) x, (double) y ); }
+
+long double reference_recipl( long double x){ return 1.0L / x; }
+
+long double reference_rootnl( long double x, int i)
+{
+    double hi,  lo;
+    long double l;
+    //rootn ( x, 0 )  returns a NaN.
+    if( 0 == i )
+        return cl_make_nan();
+
+    //rootn ( x, n )  returns a NaN for x < 0 and n is even.
+    if( x < 0.0L && 0 == (i&1) )
+        return cl_make_nan();
+
+    if( isinf(x) )
+    {
+        if( i < 0 )
+            return reference_copysignl(0.0L, x);
+
+        return x;
+    }
+
+    if( x == 0.0 )
+    {
+        switch( i & 0x80000001 )
+        {
+            //rootn ( +-0,  n ) is +0 for even n > 0.
+            case 0:
+                return 0.0L;
+
+            //rootn ( +-0,  n ) is +-0 for odd n > 0.
+            case 1:
+                return x;
+
+            //rootn ( +-0,  n ) is +inf for even n < 0.
+            case 0x80000000:
+                return INFINITY;
+
+            //rootn ( +-0,  n ) is +-inf for odd n < 0.
+            case 0x80000001:
+                return copysign(INFINITY, x);
+        }
+    }
+
+    if( i == 1 )
+        return x;
+
+    if( i == -1 )
+        return 1.0 / x;
+
+    long double sign = x;
+    x = reference_fabsl(x);
+    double iHi, iLo;
+    DivideDD(&iHi, &iLo, 1.0, i);
+    x = reference_powl(x, iHi) * reference_powl(x, iLo);
+
+    return reference_copysignl( x, sign );
+
+}
+
+long double reference_rsqrtl( long double x){ return 1.0L / sqrtl(x); }
+//long double reference_sincosl( long double x, long double *c ){ *c = reference_cosl(x); return reference_sinl(x); }
+long double reference_sinpil( long double x)
+{
+    double r = reduce1l(x);
+
+    // reduce to [-0.5, 0.5]
+    if( r < -0.5L )
+        r = -1.0L - r;
+    else if ( r > 0.5L )
+        r = 1.0L - r;
+
+    // sinPi zeros have the same sign as x
+    if( r == 0.0L )
+        return reference_copysignl(0.0L, x);
+
+    return reference_sinl( r * M_PIL );
+}
+
+long double reference_tanpil( long double x)
+{
+    // set aside the sign  (allows us to preserve sign of -0)
+    long double sign = reference_copysignl( 1.0L, x);
+    long double z = reference_fabsl(x);
+
+    // if big and even  -- caution: only works if x only has single precision
+    if( z >= HEX_LDBL( +, 1, 0, +, 53 ) )
+    {
+        if( z == INFINITY )
+            return x - x;       // nan
+
+        return reference_copysignl( 0.0L, x);   // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
+    }
+
+    // reduce to the range [ -0.5, 0.5 ]
+    long double nearest = reference_rintl( z );     // round to nearest even places n + 0.5 values in the right place for us
+    int64_t i = (int64_t) nearest;          // test above against 0x1.0p53 avoids overflow here
+    z -= nearest;
+
+    //correction for odd integer x for the right sign of zero
+    if( (i&1) && z == 0.0L )
+        sign = -sign;
+
+    // track changes to the sign
+    sign *= reference_copysignl(1.0L, z);       // really should just be an xor
+    z = reference_fabsl(z);                    // remove the sign again
+
+    // reduce once more
+    // If we don't do this, rounding error in z * M_PI will cause us not to return infinities properly
+    if( z > 0.25L )
+    {
+        z = 0.5L - z;
+        return sign / reference_tanl( z * M_PIL );      // use system tan to get the right result
+    }
+
+    //
+    return sign * reference_tanl( z * M_PIL );          // use system tan to get the right result
+}
+
+long double reference_pownl( long double x, int i ){ return reference_powl( x, (long double) i ); }
+
+long double reference_powrl( long double x, long double y )
+{
+    //powr ( x, y ) returns NaN for x < 0.
+    if( x < 0.0L )
+        return cl_make_nan();
+
+    //powr ( x, NaN ) returns the NaN for x >= 0.
+    //powr ( NaN, y ) returns the NaN.
+    if( isnan(x) || isnan(y) )
+        return x + y;   // Note: behavior different here than for pow(1,NaN), pow(NaN, 0)
+
+    if( x == 1.0L )
+    {
+        //powr ( +1, +-inf ) returns NaN.
+        if( reference_fabsl(y) == INFINITY )
+            return cl_make_nan();
+
+        //powr ( +1, y ) is 1 for finite y.    (NaN handled above)
+        return 1.0L;
+    }
+
+    if( y == 0.0L )
+    {
+        //powr ( +inf, +-0 ) returns NaN.
+        //powr ( +-0, +-0 ) returns NaN.
+        if( x == 0.0L || x == INFINITY )
+            return cl_make_nan();
+
+        //powr ( x, +-0 ) is 1 for finite x > 0.  (x <= 0, NaN, INF already handled above)
+        return 1.0L;
+    }
+
+    if( x == 0.0L )
+    {
+        //powr ( +-0, -inf) is +inf.
+        //powr ( +-0, y ) is +inf for finite y < 0.
+        if( y < 0.0L )
+            return INFINITY;
+
+        //powr ( +-0, y ) is +0 for y > 0.    (NaN, y==0 handled above)
+        return 0.0L;
+    }
+
+    return reference_powl( x, y );
+}
+
+//long double my_fdiml( long double x, long double y){ return fdim( (double) x, (double) y ); }
+long double reference_addl( long double x, long double y)
+{
+    volatile double a = (double) x;
+    volatile double b = (double) y;
+
+#if defined( __SSE2__ )
+    // defeat x87
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_add_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
+#else
+    a += b;
+#endif
+    return (long double) a;
+}
+
+long double reference_subtractl( long double x, long double y)
+{
+    volatile double a = (double) x;
+    volatile double b = (double) y;
+
+#if defined( __SSE2__ )
+    // defeat x87
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_sub_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
+#else
+    a -= b;
+#endif
+    return (long double) a;
+}
+
+long double reference_multiplyl( long double x, long double y)
+{
+    volatile double a = (double) x;
+    volatile double b = (double) y;
+
+#if defined( __SSE2__ )
+    // defeat x87
+    __m128d va = _mm_set_sd( (double) a );
+    __m128d vb = _mm_set_sd( (double) b );
+    va = _mm_mul_sd( va, vb );
+    _mm_store_sd( (double*) &a, va );
+#else
+    a *= b;
+#endif
+    return (long double) a;
+}
+
+/*long double my_remquol( long double x, long double y, int *iptr )
+{
+    if( isnan(x) || isnan(y) ||
+        fabs(x) == INFINITY  ||
+        y == 0.0 )
+    {
+        *iptr = 0;
+        return NAN;
+    }
+
+    return remquo( (double) x, (double) y, iptr );
+}*/
+long double reference_lgamma_rl( long double x, int *signp )
+{
+//    long double lgamma_val = (long double)reference_lgamma( (double)x );
+//    *signp = signgam;
+    *signp = 0;
+    return x;
+}
+
+
+int reference_isequall( long double x, long double y){ return x == y; }
+int reference_isfinitel( long double x){ return 0 != isfinite(x); }
+int reference_isgreaterl( long double x, long double y){ return x > y; }
+int reference_isgreaterequall( long double x, long double y){ return x >= y; }
+int reference_isinfl( long double x){ return 0 != isinf(x); }
+int reference_islessl( long double x, long double y){ return x < y; }
+int reference_islessequall( long double x, long double y){ return x <= y; }
+int reference_islessgreaterl( long double x, long double y){  return 0 != islessgreater( x, y ); }
+int reference_isnanl( long double x){ return 0 != isnan( x ); }
+int reference_isnormall( long double x){ return 0 != isnormal( (double) x ); }
+int reference_isnotequall( long double x, long double y){ return x != y; }
+int reference_isorderedl( long double x, long double y){ return x == x && y == y; }
+int reference_isunorderedl( long double x, long double y){ return isnan(x) || isnan( y ); }
+int reference_signbitl( long double x){ return 0 != signbit( x ); }
+
+long double reference_copysignl( long double x, long double y);
+long double reference_roundl( long double x );
+long double reference_cbrtl(long double x);
+
+long double reference_copysignl( long double x, long double y )
+{
+    // We hope that the long double to double conversion proceeds with sign fidelity,
+    // even for zeros and NaNs
+    union{ double d; cl_ulong u;}u; u.d = (double) y;
+
+    x = reference_fabsl(x);
+    if( u.u >> 63 )
+        x = -x;
+
+    return x;
+}
+
+long double reference_roundl( long double x )
+{
+    // Since we are just using this to verify double precision, we can
+    // use the double precision copysign here
+
+#if defined(__MINGW32__) && defined(__x86_64__)
+    long double absx = reference_fabsl(x);
+    if (absx < 0.5L)
+    return reference_copysignl(0.0L, x);
+#endif
+    return round( (double) x );
+}
+
+long double reference_truncl( long double x )
+{
+    // Since we are just using this to verify double precision, we can
+    // use the double precision copysign here
+    return trunc( (double) x );
+}
+
+static long double reference_scalblnl(long double x, long n);
+
+long double reference_cbrtl(long double x)
+{
+    double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 );
+    double ylo = HEX_DBL( +, 1, 558, -, 56 );
+
+    double fabsx = reference_fabs( x );
+
+    if( isnan(x) || fabsx == 1.0 || fabsx == 0.0 || isinf(x) )
+        return x;
+
+    double iy = 0.0;
+    double log2x_hi, log2x_lo;
+
+    // extended precision log .... accurate to at least 64-bits + couple of guard bits
+    __log2_ep(&log2x_hi, &log2x_lo, fabsx);
+
+    double ylog2x_hi, ylog2x_lo;
+
+    double y_hi = yhi;
+    double y_lo = ylo;
+
+    // compute product of y*log2(x)
+    MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
+
+    long double powxy;
+    if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) {
+        powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY;
+    } else {
+        // separate integer + fractional part
+        long int m = lrint(ylog2x_hi);
+        AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0);
+
+        // revert to long double arithemtic
+        long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo;
+        powxy = reference_exp2l( ylog2x );
+        powxy = reference_scalblnl(powxy, m);
+    }
+
+    return reference_copysignl( powxy, x );
+}
+
+/*
+long double scalbnl( long double x, int i )
+{
+    //suitable for checking double precision scalbn only
+
+    if( i > 3000 )
+        return copysignl( INFINITY, x);
+    if( i < -3000 )
+        return copysignl( 0.0L, x);
+
+    if( i > 0 )
+    {
+        while( i >= 1000 )
+        {
+            x *= HEX_LDBL( +, 1, 0, +, 1000 );
+            i -= 1000;
+        }
+
+        union{ cl_ulong u; double d;}u;
+        u.u = (cl_ulong)( i + 1023 ) << 52;
+        x *= (long double) u.d;
+    }
+    else if( i < 0 )
+    {
+        while( i <= -1000 )
+        {
+            x *= HEX_LDBL( +, 1, 0, -, 1000 );
+            i += 1000;
+        }
+
+        union{ cl_ulong u; double d;}u;
+        u.u = (cl_ulong)( i + 1023 ) << 52;
+        x *= (long double) u.d;
+    }
+
+    return x;
+}
+*/
+
+long double reference_rintl( long double x )
+{
+#if defined(__PPC__)
+  // On PPC, long doubles are maintained as 2 doubles. Therefore, the combined
+  // mantissa can represent more than LDBL_MANT_DIG binary digits.
+  x = rintl(x);
+#else
+    static long double magic[2] = { 0.0L, 0.0L};
+
+    if( 0.0L == magic[0] )
+    {
+        magic[0] = scalbnl(0.5L, LDBL_MANT_DIG);
+        magic[1] = scalbnl(-0.5L, LDBL_MANT_DIG);
+    }
+
+    if( reference_fabsl(x) < magic[0] && x != 0.0L )
+    {
+        long double m = magic[ x < 0 ];
+        x += m;
+        x -= m;
+    }
+#endif // __PPC__
+    return x;
+}
+
+// extended precision sqrt using newton iteration on 1/sqrt(x).
+// Final result is computed as x * 1/sqrt(x)
+static void __sqrt_ep(double *rhi, double *rlo, double xhi, double xlo)
+{
+    // approximate reciprocal sqrt
+    double thi = 1.0 / sqrt( xhi );
+    double tlo = 0.0;
+
+    // One newton iteration in double-double
+    double yhi, ylo;
+    MulDD(&yhi, &ylo, thi, tlo, thi, tlo);
+    MulDD(&yhi, &ylo, yhi, ylo, xhi, xlo);
+    AddDD(&yhi, &ylo, -yhi, -ylo, 3.0, 0.0);
+    MulDD(&yhi, &ylo, yhi, ylo, thi, tlo);
+    MulDD(&yhi, &ylo, yhi, ylo, 0.5, 0.0);
+
+    MulDD(rhi, rlo, yhi, ylo, xhi, xlo);
+}
+
+long double reference_acoshl( long double x )
+{
+/*
+ * ====================================================
+ * This function derived from fdlibm http://www.netlib.org
+ * It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ */
+    if( isnan(x) || isinf(x))
+        return x + fabsl(x);
+
+    if( x < 1.0L )
+        return cl_make_nan();
+
+    if( x == 1.0L )
+        return 0.0L;
+
+    if( x > HEX_LDBL( +, 1, 0, +, 60 ) )
+        return reference_logl(x) + 0.693147180559945309417232121458176568L;
+
+    if( x > 2.0L )
+        return reference_logl(2.0L * x - 1.0L / (x + sqrtl(x*x - 1.0L)));
+
+    double hi, lo;
+    MulD(&hi, &lo, x, x);
+    AddDD(&hi, &lo, hi, lo, -1.0, 0.0);
+    __sqrt_ep(&hi, &lo, hi, lo);
+    AddDD(&hi, &lo, hi, lo, x, 0.0);
+    double correction = lo / hi;
+    __log2_ep(&hi, &lo, hi);
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
+    double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 );
+    MulDD(&hi, &lo, hi, lo, log2Hi, log2Lo);
+    AddDD(&hi, &lo, hi, lo, correction, 0.0);
+
+    return hi + lo;
+}
+
+long double reference_asinhl( long double x )
+{
+    long double cutoff = 0.0L;
+    const long double ln2 = HEX_LDBL( +, b, 17217f7d1cf79ab, -, 4 );
+
+    if( cutoff == 0.0L )
+        cutoff = reference_ldexpl(1.0L, -LDBL_MANT_DIG);
+
+    if( isnan(x) || isinf(x) )
+        return x + x;
+
+    long double absx = reference_fabsl(x);
+    if( absx < cutoff )
+        return x;
+
+    long double sign = reference_copysignl(1.0L, x);
+
+    if( absx <= 4.0/3.0 ) {
+        return sign * reference_log1pl( absx + x*x / (1.0 + sqrtl(1.0 + x*x)));
+    }
+    else if( absx <= HEX_LDBL( +, 1, 0, +, 27 ) ) {
+        return sign * reference_logl( 2.0L * absx + 1.0L / (sqrtl( x * x + 1.0 ) + absx));
+    }
+    else {
+        return sign * ( reference_logl( absx ) + ln2 );
+    }
+}
+
+long double reference_atanhl( long double x )
+{
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if( isnan(x)  )
+        return x + x;
+
+    long double signed_half = reference_copysignl( 0.5L, x );
+    x = reference_fabsl(x);
+    if( x > 1.0L )
+        return cl_make_nan();
+
+    if( x < 0.5L )
+        return signed_half * reference_log1pl( 2.0L * ( x + x*x / (1-x) ) );
+
+    return signed_half * reference_log1pl(2.0L * x / (1-x));
+}
+
+long double reference_exp2l(  long double z)
+{
+    double_double x;
+    int j;
+
+    // Handle NaNs
+    if( isnan(z) )
+        return z;
+
+    // init x
+    x.hi = z;
+    x.lo = z - x.hi;
+
+    //Deal with overflow and underflow for exp2(x) stage next
+    if( x.hi >= 1025 )
+        return INFINITY;
+
+    if( x.hi < -1075-24 )
+        return +0.0;
+
+    // find nearest integer to x
+    int i = (int) rint(x.hi);
+
+    // x now holds fractional part.  The result would be then 2**i  * exp2( x )
+    x.hi -= i;
+
+    // We could attempt to find a minimax polynomial for exp2(x) over the range x = [-0.5, 0.5].
+    // However, this would converge very slowly near the extrema, where 0.5**n is not a lot different
+    // from 0.5**(n+1), thereby requiring something like a 20th order polynomial to get 53 + 24 bits
+    // of precision. Instead we further reduce the range to [-1/32, 1/32] by observing that
+    //
+    //  2**(a+b) = 2**a * 2**b
+    //
+    // We can thus build a table of 2**a values for a = n/16, n = [-8, 8], and reduce the range
+    // of x to [-1/32, 1/32] by subtracting away the nearest value of n/16 from x.
+    const double_double corrections[17] =
+    {
+        { HEX_DBL( +, 1, 6a09e667f3bcd, -, 1 ), HEX_DBL( -, 1, bdd3413b26456, -, 55 ) },
+        { HEX_DBL( +, 1, 7a11473eb0187, -, 1 ), HEX_DBL( -, 1, 41577ee04992f, -, 56 ) },
+        { HEX_DBL( +, 1, 8ace5422aa0db, -, 1 ), HEX_DBL( +, 1, 6e9f156864b27, -, 55 ) },
+        { HEX_DBL( +, 1, 9c49182a3f09,  -, 1 ), HEX_DBL( +, 1, c7c46b071f2be, -, 57 ) },
+        { HEX_DBL( +, 1, ae89f995ad3ad, -, 1 ), HEX_DBL( +, 1, 7a1cd345dcc81, -, 55 ) },
+        { HEX_DBL( +, 1, c199bdd85529c, -, 1 ), HEX_DBL( +, 1, 11065895048dd, -, 56 ) },
+        { HEX_DBL( +, 1, d5818dcfba487, -, 1 ), HEX_DBL( +, 1, 2ed02d75b3707, -, 56 ) },
+        { HEX_DBL( +, 1, ea4afa2a490da, -, 1 ), HEX_DBL( -, 1, e9c23179c2893, -, 55 ) },
+        { HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ) },
+        { HEX_DBL( +, 1, 0b5586cf9890f, +, 0 ), HEX_DBL( +, 1, 8a62e4adc610b, -, 54 ) },
+        { HEX_DBL( +, 1, 172b83c7d517b, +, 0 ), HEX_DBL( -, 1, 19041b9d78a76, -, 55 ) },
+        { HEX_DBL( +, 1, 2387a6e756238, +, 0 ), HEX_DBL( +, 1, 9b07eb6c70573, -, 54 ) },
+        { HEX_DBL( +, 1, 306fe0a31b715, +, 0 ), HEX_DBL( +, 1, 6f46ad23182e4, -, 55 ) },
+        { HEX_DBL( +, 1, 3dea64c123422, +, 0 ), HEX_DBL( +, 1, ada0911f09ebc, -, 55 ) },
+        { HEX_DBL( +, 1, 4bfdad5362a27, +, 0 ), HEX_DBL( +, 1, d4397afec42e2, -, 56 ) },
+        { HEX_DBL( +, 1, 5ab07dd485429, +, 0 ), HEX_DBL( +, 1, 6324c054647ad, -, 54 ) },
+        { HEX_DBL( +, 1, 6a09e667f3bcd, +, 0 ), HEX_DBL( -, 1, bdd3413b26456, -, 54 ) }
+    };
+    int index = (int) rint( x.hi * 16.0 );
+    x.hi -= (double) index * 0.0625;
+
+    // canonicalize x
+    double temp = x.hi;
+    x.hi += x.lo;
+    x.lo -= x.hi - temp;
+
+    // Minimax polynomial for (exp2(x)-1)/x, over the range [-1/32, 1/32].  Max Error: 2 * 0x1.e112p-87
+    const double_double c[] = {
+        {HEX_DBL( +, 1, 62e42fefa39ef, -,  1 ), HEX_DBL( +, 1, abc9e3ac1d244, -, 56 )},
+        {HEX_DBL( +, 1, ebfbdff82c58f, -,  3 ), HEX_DBL( -, 1, 5e4987a631846, -, 57 )},
+        {HEX_DBL( +, 1, c6b08d704a0c,  -,  5 ), HEX_DBL( -, 1, d323200a05713, -, 59 )},
+        {HEX_DBL( +, 1, 3b2ab6fba4e7a, -,  7 ), HEX_DBL( +, 1, c5ee8f8b9f0c1, -, 63 )},
+        {HEX_DBL( +, 1, 5d87fe78a672a, -, 10 ), HEX_DBL( +, 1, 884e5e5cc7ecc, -, 64 )},
+        {HEX_DBL( +, 1, 430912f7e8373, -, 13 ), HEX_DBL( +, 1, 4f1b59514a326, -, 67 )},
+        {HEX_DBL( +, 1, ffcbfc5985e71, -, 17 ), HEX_DBL( -, 1, db7d6a0953b78, -, 71 )},
+        {HEX_DBL( +, 1, 62c150eb16465, -, 20 ), HEX_DBL( +, 1, e0767c2d7abf5, -, 80 )},
+        {HEX_DBL( +, 1, b52502b5e953,  -, 24 ), HEX_DBL( +, 1, 6797523f944bc, -, 78 )}
+    };
+    size_t count = sizeof( c ) / sizeof( c[0] );
+
+    // Do polynomial
+    double_double r = c[count-1];
+    for( j = (int) count-2; j >= 0; j-- )
+        r = add_dd( c[j], mul_dd( r, x ) );
+
+    // unwind approximation
+    r = mul_dd( r, x );     // before: r =(exp2(x)-1)/x;   after: r = exp2(x) - 1
+
+    // correct for [-0.5, 0.5] -> [-1/32, 1/32] reduction above
+    //  exp2(x) = (r + 1) * correction = r * correction + correction
+    r = mul_dd( r, corrections[index+8] );
+    r = add_dd( r, corrections[index+8] );
+
+// Format result for output:
+
+    // Get mantissa
+    long double m = ((long double) r.hi + (long double) r.lo );
+
+    // Handle a pesky overflow cases when long double = double
+    if( i > 512 )
+    {
+        m *= HEX_DBL( +, 1, 0, +, 512 );
+        i -= 512;
+    }
+    else if( i < -512 )
+    {
+        m *= HEX_DBL( +, 1, 0, -, 512 );
+        i += 512;
+    }
+
+    return m * ldexpl( 1.0L, i );
+}
+
+long double reference_expm1l(  long double x)
+{
+#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    //unimplemented
+    return x;
+#else
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
+
+    if (reference_isnanl(x))
+        return x;
+
+    if ( x > 710 )
+        return INFINITY;
+
+    long double y = expm1l(x);
+
+    // Range of expm1l is -1.0L to +inf. Negative inf
+    // on a few Linux platforms is clearly the wrong sign.
+    if (reference_isinfl(y))
+        y = INFINITY;
+
+    return y;
+#endif
+}
+
+long double reference_fmaxl( long double x, long double y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x >= y ? x : y;
+}
+
+long double reference_fminl( long double x, long double y )
+{
+    if( isnan(y) )
+        return x;
+
+    return x <= y ? x : y;
+}
+
+long double reference_hypotl( long double x, long double y )
+{
+  static const double tobig = HEX_DBL( +, 1, 0, +, 511 );
+  static const double big = HEX_DBL( +, 1, 0, +, 513 );
+  static const double rbig = HEX_DBL( +, 1, 0, -, 513 );
+  static const double tosmall = HEX_DBL( +, 1, 0, -, 511 );
+  static const double smalll = HEX_DBL( +, 1, 0, -, 607 );
+  static const double rsmall = HEX_DBL( +, 1, 0, +, 607 );
+
+    long double max, min;
+
+    if( isinf(x) || isinf(y) )
+        return INFINITY;
+
+    if( isnan(x) || isnan(y) )
+        return x + y;
+
+    x = reference_fabsl(x);
+    y = reference_fabsl(y);
+
+    max = reference_fmaxl( x, y );
+    min = reference_fminl( x, y );
+
+  if( max > tobig )
+    {
+        max *= rbig;
+        min *= rbig;
+        return big * sqrtl( max * max + min * min );
+    }
+
+  if( max < tosmall )
+    {
+        max *= rsmall;
+        min *= rsmall;
+      return smalll * sqrtl( max * max + min * min );
+    }
+    return sqrtl( x * x + y * y );
+}
+
+//long double reference_log2l( long double x )
+//{
+//    return log( x ) * 1.44269504088896340735992468100189214L;
+//}
+
+long double reference_log2l( long double x )
+{
+    if( isnan(x) || x < 0.0 || x == -INFINITY)
+        return NAN;
+
+    if( x == 0.0f)
+        return -INFINITY;
+
+    if( x == INFINITY )
+        return INFINITY;
+
+    double hi, lo;
+    __log2_ep( &hi, &lo, x);
+
+    return (long double) hi + (long double) lo;
+}
+
+long double reference_log1pl(  long double x)
+{
+#if defined( _MSC_VER ) && ! defined( __INTEL_COMPILER )
+    //unimplemented
+    return x;
+#elif defined(__PPC__)
+    // log1pl on PPC inadvertantly returns NaN for very large values. Work
+    // around this limitation by returning logl for large values.
+    return ((x > (long double)(0x1.0p+1022)) ? logl(x) : log1pl(x));
+#else
+    return log1pl(x);
+#endif
+}
+
+long double reference_logbl( long double x )
+{
+    // Since we are just using this to verify double precision, we can
+    // use the double precision copysign here
+    union { double f; cl_ulong u;} u;
+    u.f = (double) x;
+
+    cl_int exponent = (cl_uint)(u.u >> 52) & 0x7ff;
+    if( exponent == 0x7ff )
+        return x * x;
+
+    if( exponent == 0 )
+    {   // deal with denormals
+        u.f =  x * HEX_DBL( +, 1, 0, +, 64 );
+        exponent = (cl_int)(u.u >> 52) & 0x7ff;
+        if( exponent == 0 )
+            return -INFINITY;
+
+        return exponent - (1023 + 64);
+    }
+
+    return exponent - 1023;
+}
+
+long double reference_maxmagl( long double x, long double y )
+{
+    long double fabsx = fabsl(x);
+    long double fabsy = fabsl(y);
+
+    if( fabsx < fabsy )
+        return y;
+
+    if( fabsy < fabsx )
+        return x;
+
+    return reference_fmaxl(x, y);
+}
+
+long double reference_minmagl( long double x, long double y )
+{
+    long double fabsx = fabsl(x);
+    long double fabsy = fabsl(y);
+
+    if( fabsx > fabsy )
+        return y;
+
+    if( fabsy > fabsx )
+        return x;
+
+    return reference_fminl(x, y);
+}
+
+long double reference_nanl( cl_ulong x )
+{
+    union{ cl_ulong u; cl_double f; }u;
+    u.u = x | 0x7ff8000000000000ULL;
+    return (long double) u.f;
+}
+
+
+long double reference_reciprocall( long double x )
+{
+    return 1.0L / x;
+}
+
+long double reference_remainderl( long double x, long double y );
+long double reference_remainderl( long double x, long double y )
+{
+    int i;
+    return reference_remquol( x, y, &i );
+}
+
+long double reference_lgammal( long double x);
+long double reference_lgammal( long double x)
+{
+    // lgamma is currently not tested
+    return reference_lgamma( x );
+}
+
+static uint32_t two_over_pi[] = { 0x0, 0x28be60db, 0x24e44152, 0x27f09d5f, 0x11f534dd, 0x3036d8a5, 0x1993c439, 0x107f945, 0x23abdebb, 0x31586dc9,
+0x6e3a424, 0x374b8019, 0x92eea09, 0x3464873f, 0x21deb1cb, 0x4a69cfb, 0x288235f5, 0xbaed121, 0xe99c702, 0x1ad17df9,
+0x13991d6, 0xe60d4ce, 0x1f49c845, 0x3e2ef7e4, 0x283b1ff8, 0x25fff781, 0x1980fef2, 0x3c462d68, 0xa6d1f6d, 0xd9fb3c9,
+0x3cb09b74, 0x3d18fd9a, 0x1e5fea2d, 0x1d49eeb1, 0x3ebe5f17, 0x2cf41ce7, 0x378a5292, 0x3a9afed7, 0x3b11f8d5, 0x3421580c,
+0x3046fc7b, 0x1aeafc33, 0x3bc209af, 0x10d876a7, 0x2391615e, 0x3986c219, 0x199855f1, 0x1281a102, 0xdffd880, 0x135cc9cc,
+0x10606155
+};
+
+static uint32_t pi_over_two[] = { 0x1, 0x2487ed51, 0x42d1846, 0x26263314, 0x1701b839, 0x28948127 };
+
+typedef union
+    {
+        uint64_t u;
+        double   d;
+    }d_ui64_t;
+
+// radix or base of representation
+#define RADIX (30)
+#define DIGITS 6
+
+d_ui64_t two_pow_pradix = { (uint64_t) (1023 + RADIX) << 52 };
+d_ui64_t two_pow_mradix = { (uint64_t) (1023 - RADIX) << 52 };
+d_ui64_t two_pow_two_mradix = { (uint64_t) (1023-2*RADIX) << 52 };
+
+#define tp_pradix two_pow_pradix.d
+#define tp_mradix two_pow_mradix.d
+
+// extended fixed point representation of double precision
+// floating point number.
+// x = sign * [ sum_{i = 0 to 2} ( X[i] * 2^(index - i)*RADIX ) ]
+typedef struct
+    {
+        uint32_t X[3];        // three 32 bit integers are sufficient to represnt double in base_30
+        int index;            // exponent bias
+        int sign;            // sign of double
+    }eprep_t;
+
+static eprep_t double_to_eprep(double x);
+
+static eprep_t double_to_eprep(double x)
+{
+    eprep_t result;
+
+    result.sign = (signbit( x ) == 0) ? 1 : -1;
+    x = fabs( x );
+
+    int index = 0;
+    while( x > tp_pradix ) {
+        index++;
+        x *= tp_mradix;
+    }
+    while( x < 1 ) {
+        index--;
+        x *= tp_pradix;
+    }
+
+    result.index = index;
+    int i = 0;
+    result.X[0] = result.X[1] = result.X[2] = 0;
+    while( x != 0.0 ) {
+        result.X[i] = (uint32_t) x;
+        x = (x - (double) result.X[i]) * tp_pradix;
+        i++;
+    }
+    return result;
+}
+
+/*
+ double eprep_to_double( uint32_t *R, int digits, int index, int sgn )
+ {
+ d_ui64_t nb, rndcorr;
+ uint64_t lowpart, roundbits, t1;
+ int expo, expofinal, shift;
+ double res;
+
+ nb.d = (double) R[0];
+
+ t1   = R[1];
+ lowpart  = (t1 << RADIX) + R[2];
+ expo = ((nb.u & 0x7ff0000000000000ULL) >> 52) - 1023;
+
+ expofinal = expo + RADIX*index;
+
+ if (expofinal >  1023) {
+ d_ui64_t inf = { 0x7ff0000000000000ULL };
+ res = inf.d;
+ }
+
+ else if (expofinal >= -1022){
+ shift = expo + 2*RADIX - 53;
+ roundbits = lowpart << (64-shift);
+ lowpart = lowpart >> shift;
+ if (lowpart & 0x0000000000000001ULL) {
+ if(roundbits == 0) {
+ int i;
+ for (i=3; i < digits; i++)
+ roundbits = roundbits | R[i];
+ }
+ if(roundbits == 0) {
+ if (lowpart & 0x0000000000000002ULL)
+ rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52;
+ else
+ rndcorr.d = 0.0;
+ }
+ else
+ rndcorr.u = (uint64_t) (expo - 52 + 1023) << 52;
+ }
+ else{
+ rndcorr.d = 0.0;
+ }
+
+ lowpart = lowpart >> 1;
+ nb.u = nb.u | lowpart;
+ res  = nb.d + rndcorr.d;
+
+ if(index*RADIX + 1023 > 0) {
+ nb.u = 0;
+ nb.u = (uint64_t) (index*RADIX + 1023) << 52;
+ res *= nb.d;
+ }
+ else {
+ nb.u = 0;
+ nb.u = (uint64_t) (index*RADIX + 1023 + 2*RADIX) << 52;
+ res *= two_pow_two_mradix.d;
+ res *= nb.d;
+ }
+ }
+ else {
+ if (expofinal < -1022 - 53 ) {
+ res = 0.0;
+ }
+ else {
+ lowpart = lowpart >> (expo + (2*RADIX) - 52);
+ nb.u = nb.u | lowpart;
+ nb.u = (nb.u & 0x000FFFFFFFFFFFFFULL) | 0x0010000000000000ULL;
+ nb.u = nb.u >> (-1023 - expofinal);
+ if(nb.u & 0x0000000000000001ULL)
+ rndcorr.u = 1;
+ else
+ rndcorr.d = 0.0;
+ res  = 0.5*(nb.d + rndcorr.d);
+ }
+ }
+
+ return sgn*res;
+ }
+ */
+static double eprep_to_double( eprep_t epx );
+
+static double eprep_to_double( eprep_t epx )
+{
+    double res = 0.0;
+
+    res += ldexp((double) epx.X[0], (epx.index - 0)*RADIX);
+    res += ldexp((double) epx.X[1], (epx.index - 1)*RADIX);
+    res += ldexp((double) epx.X[2], (epx.index - 2)*RADIX);
+
+    return copysign(res, epx.sign);
+}
+
+static int payne_hanek( double *y, int *exception );
+
+static int payne_hanek( double *y, int *exception )
+{
+    double x = *y;
+
+    // exception cases .. no reduction required
+    if( isnan( x ) || isinf( x ) || (fabs( x ) <= M_PI_4) ) {
+        *exception = 1;
+        return 0;
+    }
+
+    *exception = 0;
+
+    // After computation result[0] contains integer part while result[1]....result[DIGITS-1]
+    // contain fractional part. So we are doing computation with (DIGITS-1)*RADIX precision.
+    // Default DIGITS=6 and RADIX=30 so default precision is 150 bits. Kahan-McDonald algorithm
+    // shows that a double precision x, closest to pi/2 is 6381956970095103 x 2^797 which can
+    // cause 61 digits of cancellation in computation of f = x*2/pi - floor(x*2/pi) ... thus we need
+    // at least 114 bits (61 leading zeros + 53 bits of mentissa of f) of precision to accurately compute
+    // f in double precision. Since we are using 150 bits (still an overkill), we should be safe. Extra
+    // bits can act as guard bits for correct rounding.
+    uint64_t result[DIGITS+2];
+
+    // compute extended precision representation of x
+    eprep_t epx = double_to_eprep( x );
+    int index = epx.index;
+    int i, j;
+    // extended precision multiplication of 2/pi*x .... we will loose at max two RADIX=30 bit digits in
+    // the worst case
+    for(i = 0; i < (DIGITS+2); i++) {
+        result[i] = 0;
+        result[i] += ((index + i - 0) >= 0) ? ((uint64_t) two_over_pi[index + i - 0] * (uint64_t) epx.X[0]) : 0;
+        result[i] += ((index + i - 1) >= 0) ? ((uint64_t) two_over_pi[index + i - 1] * (uint64_t) epx.X[1]) : 0;
+        result[i] += ((index + i - 2) >= 0) ? ((uint64_t) two_over_pi[index + i - 2] * (uint64_t) epx.X[2]) : 0;
+    }
+
+    // Carry propagation.
+    uint64_t tmp;
+    for(i = DIGITS+2-1; i > 0; i--) {
+        tmp = result[i] >> RADIX;
+        result[i - 1] += tmp;
+        result[i] -= (tmp << RADIX);
+    }
+
+    // we dont ned to normalize the integer part since only last two bits of this will be used
+    // subsequently algorithm which remain unaltered by this normalization.
+    // tmp = result[0] >> RADIX;
+    // result[0] -= (tmp << RADIX);
+    unsigned int N = (unsigned int) result[0];
+
+    // if the result is > pi/4, bring it to (-pi/4, pi/4] range. Note that testing if the final
+    // x_star = pi/2*(x*2/pi - k) > pi/4 is equivalent to testing, at this stage, if r[1] (the first fractional
+    // digit) is greater than (2^RADIX)/2 and substracting pi/4 from x_star to bring it to mentioned
+    // range is equivalent to substracting fractional part at this stage from one and changing the sign.
+    int sign = 1;
+    if(result[1] > (uint64_t)(1 << (RADIX - 1))) {
+        for(i = 1; i < (DIGITS + 2); i++)
+            result[i] = (~((unsigned int)result[i]) & 0x3fffffff);
+        N += 1;
+        sign = -1;
+    }
+
+    // Again as per Kahan-McDonald algorithim there may be 61 leading zeros in the worst case
+    // (when x is multiple of 2/pi very close to an integer) so we need to get rid of these zeros
+    // and adjust the index of final result. So in the worst case, precision of comupted result is
+    // 90 bits (150 bits original bits - 60 lost in cancellation).
+    int ind = 1;
+    for(i = 1; i < (DIGITS+2); i++) {
+        if(result[i] != 0)
+            break;
+        else
+            ind++;
+    }
+
+    uint64_t r[DIGITS-1];
+    for(i = 0; i < (DIGITS-1); i++) {
+        r[i] = 0;
+        for(j = 0; j <= i; j++) {
+            r[i] += (result[ind+i-j] * (uint64_t) pi_over_two[j]);
+        }
+    }
+    for(i = (DIGITS-2); i > 0; i--) {
+        tmp = r[i] >> RADIX;
+        r[i - 1] += tmp;
+        r[i] -= (tmp << RADIX);
+    }
+    tmp = r[0] >> RADIX;
+    r[0] -= (tmp << RADIX);
+
+    eprep_t epr;
+    epr.sign = epx.sign*sign;
+    if(tmp != 0) {
+        epr.index = -ind + 1;
+        epr.X[0] = (uint32_t) tmp;
+        epr.X[1] = (uint32_t) r[0];
+        epr.X[2] = (uint32_t) r[1];
+    }
+    else {
+        epr.index = -ind;
+        epr.X[0] = (uint32_t) r[0];
+        epr.X[1] = (uint32_t) r[1];
+        epr.X[2] = (uint32_t) r[2];
+    }
+
+    *y = eprep_to_double( epr );
+    return epx.sign*N;
+}
+
+double reference_relaxed_cos(double x)
+{
+  if(isnan(x))
+    return NAN;
+  return (float)cos((float)x);
+}
+
+double reference_cos(double x)
+{
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return cos( x );
+    unsigned int c = N & 3;
+    switch ( c ) {
+        case 0:
+            return  cos( x );
+        case 1:
+            return -sin( x );
+        case 2:
+            return -cos( x );
+        case 3:
+            return  sin( x );
+    }
+    return 0.0;
+}
+
+double reference_relaxed_sin(double x){
+  return (float)sin((float)x);
+}
+
+double reference_sin(double x)
+{
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return sin( x );
+    int c = N & 3;
+    switch ( c ) {
+        case 0:
+            return  sin( x );
+        case 1:
+            return  cos( x );
+        case 2:
+            return -sin( x );
+        case 3:
+            return -cos( x );
+    }
+    return 0.0;
+}
+
+double reference_relaxed_sincos(double x, double * y){
+  *y = reference_relaxed_cos(x);
+  return reference_relaxed_sin(x);
+}
+
+double reference_sincos(double x, double *y)
+{
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception ) {
+        *y = cos( x );
+        return sin( x );
+    }
+    int c = N & 3;
+    switch ( c ) {
+        case 0:
+            *y = cos( x );
+            return  sin( x );
+        case 1:
+            *y = -sin( x );
+            return  cos( x );
+        case 2:
+            *y = -cos( x );
+            return -sin( x );
+        case 3:
+            *y = sin( x );
+            return -cos( x );
+    }
+    return 0.0;
+}
+
+double reference_relaxed_tan(double x){
+  return ((float) reference_relaxed_sin((float)x))/((float) reference_relaxed_cos((float)x));
+}
+
+double reference_tan(double x)
+{
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return tan( x );
+    int c = N & 3;
+    switch ( c ) {
+        case 0:
+            return  tan( x );
+        case 1:
+            return -1.0 / tan( x );
+        case 2:
+            return tan( x );
+        case 3:
+            return -1.0 / tan( x );
+    }
+    return 0.0;
+}
+
+long double reference_cosl(long double xx)
+{
+    double x = (double) xx;
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return cosl( x );
+    unsigned int c = N & 3;
+    switch ( c ) {
+        case 0:
+            return  cosl( x );
+        case 1:
+            return -sinl( x );
+        case 2:
+            return -cosl( x );
+        case 3:
+            return  sinl( x );
+    }
+    return 0.0;
+}
+
+long double reference_sinl(long double xx)
+{
+    // we use system tanl after reduction which
+    // can flush denorm input to zero so
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
+        return xx;
+
+    double x = (double) xx;
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return sinl( x );
+    int c = N & 3;
+    switch ( c ) {
+        case 0:
+            return  sinl( x );
+        case 1:
+            return  cosl( x );
+        case 2:
+            return -sinl( x );
+        case 3:
+            return -cosl( x );
+    }
+    return 0.0;
+}
+
+long double reference_sincosl(long double xx, long double *y)
+{
+    // we use system tanl after reduction which
+    // can flush denorm input to zero so
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
+    {
+        *y = cosl(xx);
+        return xx;
+    }
+
+    double x = (double) xx;
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception ) {
+        *y = cosl( x );
+        return sinl( x );
+    }
+    int c = N & 3;
+    switch ( c ) {
+        case 0:
+            *y = cosl( x );
+            return  sinl( x );
+        case 1:
+            *y = -sinl( x );
+            return  cosl( x );
+        case 2:
+            *y = -cosl( x );
+            return -sinl( x );
+        case 3:
+            *y = sinl( x );
+            return -cosl( x );
+    }
+    return 0.0;
+}
+
+long double reference_tanl(long double xx)
+{
+    // we use system tanl after reduction which
+    // can flush denorm input to zero so
+    //take care of it here.
+    if(reference_fabsl(xx) < HEX_DBL( +, 1, 0, -, 1022 ))
+        return xx;
+
+    double x = (double) xx;
+    int exception;
+    int N = payne_hanek( &x, &exception );
+    if( exception )
+        return tanl( x );
+    int c = N & 3;
+    switch ( c ) {
+        case 0:
+            return  tanl( x );
+        case 1:
+            return -1.0 / tanl( x );
+        case 2:
+            return tanl( x );
+        case 3:
+            return -1.0 / tanl( x );
+    }
+    return 0.0;
+}
+
+static double __loglTable1[64][3] = {
+{HEX_DBL( +, 1, 5390948f40fea, +, 0 ), HEX_DBL( -, 1, a152f142a,  -, 2 ), HEX_DBL( +, 1, f93e27b43bd2c, -, 40 )},
+{HEX_DBL( +, 1, 5015015015015, +, 0 ), HEX_DBL( -, 1, 921800925,  -, 2 ), HEX_DBL( +, 1, 162432a1b8df7, -, 41 )},
+{HEX_DBL( +, 1, 4cab88725af6e, +, 0 ), HEX_DBL( -, 1, 8304d90c18, -, 2 ), HEX_DBL( +, 1, 80bb749056fe7, -, 40 )},
+{HEX_DBL( +, 1, 49539e3b2d066, +, 0 ), HEX_DBL( -, 1, 7418acebc,  -, 2 ), HEX_DBL( +, 1, ceac7f0607711, -, 43 )},
+{HEX_DBL( +, 1, 460cbc7f5cf9a, +, 0 ), HEX_DBL( -, 1, 6552b49988, -, 2 ), HEX_DBL( +, 1, d8913d0e89fa,  -, 42 )},
+{HEX_DBL( +, 1, 42d6625d51f86, +, 0 ), HEX_DBL( -, 1, 56b22e6b58, -, 2 ), HEX_DBL( +, 1, c7eaf515033a1, -, 44 )},
+{HEX_DBL( +, 1, 3fb013fb013fb, +, 0 ), HEX_DBL( -, 1, 48365e696,  -, 2 ), HEX_DBL( +, 1, 434adcde7edc7, -, 41 )},
+{HEX_DBL( +, 1, 3c995a47babe7, +, 0 ), HEX_DBL( -, 1, 39de8e156,  -, 2 ), HEX_DBL( +, 1, 8246f8e527754, -, 40 )},
+{HEX_DBL( +, 1, 3991c2c187f63, +, 0 ), HEX_DBL( -, 1, 2baa0c34c,  -, 2 ), HEX_DBL( +, 1, e1513c28e180d, -, 42 )},
+{HEX_DBL( +, 1, 3698df3de0747, +, 0 ), HEX_DBL( -, 1, 1d982c9d58, -, 2 ), HEX_DBL( +, 1, 63ea3fed4b8a2, -, 40 )},
+{HEX_DBL( +, 1, 33ae45b57bcb1, +, 0 ), HEX_DBL( -, 1, 0fa848045,  -, 2 ), HEX_DBL( +, 1, 32ccbacf1779b, -, 40 )},
+{HEX_DBL( +, 1, 30d190130d19,  +, 0 ), HEX_DBL( -, 1, 01d9bbcfa8, -, 2 ), HEX_DBL( +, 1, e2bfeb2b884aa, -, 42 )},
+{HEX_DBL( +, 1, 2e025c04b8097, +, 0 ), HEX_DBL( -, 1, e857d3d37,  -, 3 ), HEX_DBL( +, 1, d9309b4d2ea85, -, 40 )},
+{HEX_DBL( +, 1, 2b404ad012b4,  +, 0 ), HEX_DBL( -, 1, cd3c712d4,  -, 3 ), HEX_DBL( +, 1, ddf360962d7ab, -, 40 )},
+{HEX_DBL( +, 1, 288b01288b012, +, 0 ), HEX_DBL( -, 1, b2602497e,  -, 3 ), HEX_DBL( +, 1, 597f8a121640f, -, 40 )},
+{HEX_DBL( +, 1, 25e22708092f1, +, 0 ), HEX_DBL( -, 1, 97c1cb13d,  -, 3 ), HEX_DBL( +, 1, 02807d15580dc, -, 40 )},
+{HEX_DBL( +, 1, 23456789abcdf, +, 0 ), HEX_DBL( -, 1, 7d60496d,   -, 3 ), HEX_DBL( +, 1, 12ce913d7a827, -, 41 )},
+{HEX_DBL( +, 1, 20b470c67c0d8, +, 0 ), HEX_DBL( -, 1, 633a8bf44,  -, 3 ), HEX_DBL( +, 1, 0648bca9c96bd, -, 40 )},
+{HEX_DBL( +, 1, 1e2ef3b3fb874, +, 0 ), HEX_DBL( -, 1, 494f863b9,  -, 3 ), HEX_DBL( +, 1, 066fceb89b0eb, -, 42 )},
+{HEX_DBL( +, 1, 1bb4a4046ed29, +, 0 ), HEX_DBL( -, 1, 2f9e32d5c,  -, 3 ), HEX_DBL( +, 1, 17b8b6c4f846b, -, 46 )},
+{HEX_DBL( +, 1, 19453808ca29c, +, 0 ), HEX_DBL( -, 1, 162593187,  -, 3 ), HEX_DBL( +, 1, 2c83506452154, -, 42 )},
+{HEX_DBL( +, 1, 16e0689427378, +, 0 ), HEX_DBL( -, 1, f9c95dc1e,  -, 4 ), HEX_DBL( +, 1, dd5d2183150f3, -, 41 )},
+{HEX_DBL( +, 1, 1485f0e0acd3b, +, 0 ), HEX_DBL( -, 1, c7b528b72,  -, 4 ), HEX_DBL( +, 1, 0e43c4f4e619d, -, 40 )},
+{HEX_DBL( +, 1, 12358e75d3033, +, 0 ), HEX_DBL( -, 1, 960caf9ac,  -, 4 ), HEX_DBL( +, 1, 20fbfd5902a1e, -, 42 )},
+{HEX_DBL( +, 1, 0fef010fef01,  +, 0 ), HEX_DBL( -, 1, 64ce26c08,  -, 4 ), HEX_DBL( +, 1, 8ebeefb4ac467, -, 40 )},
+{HEX_DBL( +, 1, 0db20a88f4695, +, 0 ), HEX_DBL( -, 1, 33f7cde16,  -, 4 ), HEX_DBL( +, 1, 30b3312da7a7d, -, 40 )},
+{HEX_DBL( +, 1, 0b7e6ec259dc7, +, 0 ), HEX_DBL( -, 1, 0387efbcc,  -, 4 ), HEX_DBL( +, 1, 796f1632949c3, -, 40 )},
+{HEX_DBL( +, 1, 0953f39010953, +, 0 ), HEX_DBL( -, 1, a6f9c378,   -, 5 ), HEX_DBL( +, 1, 1687e151172cc, -, 40 )},
+{HEX_DBL( +, 1, 073260a47f7c6, +, 0 ), HEX_DBL( -, 1, 47aa07358,  -, 5 ), HEX_DBL( +, 1, 1f87e4a9cc778, -, 42 )},
+{HEX_DBL( +, 1, 05197f7d73404, +, 0 ), HEX_DBL( -, 1, d23afc498,  -, 6 ), HEX_DBL( +, 1, b183a6b628487, -, 40 )},
+{HEX_DBL( +, 1, 03091b51f5e1a, +, 0 ), HEX_DBL( -, 1, 16a21e21,   -, 6 ), HEX_DBL( +, 1, 7d75c58973ce5, -, 40 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,          +, 0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,          +, 0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, f44659e4a4271, -, 1 ), HEX_DBL( +, 1, 11cd1d51,   -, 5 ), HEX_DBL( +, 1, 9a0d857e2f4b2, -, 40 )},
+{HEX_DBL( +, 1, ecc07b301ecc,  -, 1 ), HEX_DBL( +, 1, c4dfab908,  -, 5 ), HEX_DBL( +, 1, 55b53fce557fd, -, 40 )},
+{HEX_DBL( +, 1, e573ac901e573, -, 1 ), HEX_DBL( +, 1, 3aa2fdd26,  -, 4 ), HEX_DBL( +, 1, f1cb0c9532089, -, 40 )},
+{HEX_DBL( +, 1, de5d6e3f8868a, -, 1 ), HEX_DBL( +, 1, 918a16e46,  -, 4 ), HEX_DBL( +, 1, 9af0dcd65a6e1, -, 43 )},
+{HEX_DBL( +, 1, d77b654b82c33, -, 1 ), HEX_DBL( +, 1, e72ec117e,  -, 4 ), HEX_DBL( +, 1, a5b93c4ebe124, -, 40 )},
+{HEX_DBL( +, 1, d0cb58f6ec074, -, 1 ), HEX_DBL( +, 1, 1dcd19755,  -, 3 ), HEX_DBL( +, 1, 5be50e71ddc6c, -, 42 )},
+{HEX_DBL( +, 1, ca4b3055ee191, -, 1 ), HEX_DBL( +, 1, 476a9f983,  -, 3 ), HEX_DBL( +, 1, ee9a798719e7f, -, 40 )},
+{HEX_DBL( +, 1, c3f8f01c3f8f,  -, 1 ), HEX_DBL( +, 1, 70742d4ef,  -, 3 ), HEX_DBL( +, 1, 3ff1352c1219c, -, 46 )},
+{HEX_DBL( +, 1, bdd2b899406f7, -, 1 ), HEX_DBL( +, 1, 98edd077e,  -, 3 ), HEX_DBL( +, 1, c383cd11362f4, -, 41 )},
+{HEX_DBL( +, 1, b7d6c3dda338b, -, 1 ), HEX_DBL( +, 1, c0db6cdd9,  -, 3 ), HEX_DBL( +, 1, 37bd85b1a824e, -, 41 )},
+{HEX_DBL( +, 1, b2036406c80d9, -, 1 ), HEX_DBL( +, 1, e840be74e,  -, 3 ), HEX_DBL( +, 1, a9334d525e1ec, -, 41 )},
+{HEX_DBL( +, 1, ac5701ac5701a, -, 1 ), HEX_DBL( +, 1, 0790adbb,   -, 2 ), HEX_DBL( +, 1, 8060bfb6a491,  -, 41 )},
+{HEX_DBL( +, 1, a6d01a6d01a6d, -, 1 ), HEX_DBL( +, 1, 1ac05b2918, -, 2 ), HEX_DBL( +, 1, c1c161471580a, -, 40 )},
+{HEX_DBL( +, 1, a16d3f97a4b01, -, 1 ), HEX_DBL( +, 1, 2db10fc4d8, -, 2 ), HEX_DBL( +, 1, ab1aa62214581, -, 42 )},
+{HEX_DBL( +, 1, 9c2d14ee4a101, -, 1 ), HEX_DBL( +, 1, 406463b1b,  -, 2 ), HEX_DBL( +, 1, 12e95dbda6611, -, 44 )},
+{HEX_DBL( +, 1, 970e4f80cb872, -, 1 ), HEX_DBL( +, 1, 52dbdfc4c8, -, 2 ), HEX_DBL( +, 1, 6b53fee511af,  -, 42 )},
+{HEX_DBL( +, 1, 920fb49d0e228, -, 1 ), HEX_DBL( +, 1, 6518fe467,  -, 2 ), HEX_DBL( +, 1, eea7d7d7d1764, -, 40 )},
+{HEX_DBL( +, 1, 8d3018d3018d3, -, 1 ), HEX_DBL( +, 1, 771d2ba7e8, -, 2 ), HEX_DBL( +, 1, ecefa8d4fab97, -, 40 )},
+{HEX_DBL( +, 1, 886e5f0abb049, -, 1 ), HEX_DBL( +, 1, 88e9c72e08, -, 2 ), HEX_DBL( +, 1, 913ea3d33fd14, -, 41 )},
+{HEX_DBL( +, 1, 83c977ab2bedd, -, 1 ), HEX_DBL( +, 1, 9a802391e,  -, 2 ), HEX_DBL( +, 1, 197e845877c94, -, 41 )},
+{HEX_DBL( +, 1, 7f405fd017f4,  -, 1 ), HEX_DBL( +, 1, abe18797f,  -, 2 ), HEX_DBL( +, 1, f4a52f8e8a81,  -, 42 )},
+{HEX_DBL( +, 1, 7ad2208e0ecc3, -, 1 ), HEX_DBL( +, 1, bd0f2e9e78, -, 2 ), HEX_DBL( +, 1, 031f4336644cc, -, 42 )},
+{HEX_DBL( +, 1, 767dce434a9b1, -, 1 ), HEX_DBL( +, 1, ce0a4923a,  -, 2 ), HEX_DBL( +, 1, 61f33c897020c, -, 40 )},
+{HEX_DBL( +, 1, 724287f46debc, -, 1 ), HEX_DBL( +, 1, ded3fd442,  -, 2 ), HEX_DBL( +, 1, b2632e830632,  -, 41 )},
+{HEX_DBL( +, 1, 6e1f76b4337c6, -, 1 ), HEX_DBL( +, 1, ef6d673288, -, 2 ), HEX_DBL( +, 1, 888ec245a0bf,  -, 40 )},
+{HEX_DBL( +, 1, 6a13cd153729,  -, 1 ), HEX_DBL( +, 1, ffd799a838, -, 2 ), HEX_DBL( +, 1, fe6f3b2f5fc8e, -, 40 )},
+{HEX_DBL( +, 1, 661ec6a5122f9, -, 1 ), HEX_DBL( +, 1, 0809cf27f4, -, 1 ), HEX_DBL( +, 1, 81eaa9ef284dd, -, 40 )},
+{HEX_DBL( +, 1, 623fa7701623f, -, 1 ), HEX_DBL( +, 1, 10113b153c, -, 1 ), HEX_DBL( +, 1, 1d7b07d6b1143, -, 42 )},
+{HEX_DBL( +, 1, 5e75bb8d015e7, -, 1 ), HEX_DBL( +, 1, 18028cf728, -, 1 ), HEX_DBL( +, 1, 76b100b1f6c6,  -, 41 )},
+{HEX_DBL( +, 1, 5ac056b015ac,  -, 1 ), HEX_DBL( +, 1, 1fde3d30e8, -, 1 ), HEX_DBL( +, 1, 26faeb9870945, -, 45 )},
+{HEX_DBL( +, 1, 571ed3c506b39, -, 1 ), HEX_DBL( +, 1, 27a4c0585c, -, 1 ), HEX_DBL( +, 1, 7f2c5344d762b, -, 42 )}
+};
+
+static double __loglTable2[64][3] = {
+{HEX_DBL( +, 1, 01fbe7f0a1be6, +, 0 ), HEX_DBL( -, 1, 6cf6ddd26112a, -,  7 ), HEX_DBL( +, 1, 0725e5755e314, -, 60 )},
+{HEX_DBL( +, 1, 01eba93a97b12, +, 0 ), HEX_DBL( -, 1, 6155b1d99f603, -,  7 ), HEX_DBL( +, 1, 4bcea073117f4, -, 60 )},
+{HEX_DBL( +, 1, 01db6c9029cd1, +, 0 ), HEX_DBL( -, 1, 55b54153137ff, -,  7 ), HEX_DBL( +, 1, 21e8faccad0ec, -, 61 )},
+{HEX_DBL( +, 1, 01cb31f0f534c, +, 0 ), HEX_DBL( -, 1, 4a158c27245bd, -,  7 ), HEX_DBL( +, 1, 1a5b7bfbf35d3, -, 60 )},
+{HEX_DBL( +, 1, 01baf95c9723c, +, 0 ), HEX_DBL( -, 1, 3e76923e3d678, -,  7 ), HEX_DBL( +, 1, eee400eb5fe34, -, 62 )},
+{HEX_DBL( +, 1, 01aac2d2acee6, +, 0 ), HEX_DBL( -, 1, 32d85380ce776, -,  7 ), HEX_DBL( +, 1, cbf7a513937bd, -, 61 )},
+{HEX_DBL( +, 1, 019a8e52d401e, +, 0 ), HEX_DBL( -, 1, 273acfd74be72, -,  7 ), HEX_DBL( +, 1, 5c64599efa5e6, -, 60 )},
+{HEX_DBL( +, 1, 018a5bdca9e42, +, 0 ), HEX_DBL( -, 1, 1b9e072a2e65,  -,  7 ), HEX_DBL( +, 1, 364180e0a5d37, -, 60 )},
+{HEX_DBL( +, 1, 017a2b6fcc33e, +, 0 ), HEX_DBL( -, 1, 1001f961f3243, -,  7 ), HEX_DBL( +, 1, 63d795746f216, -, 60 )},
+{HEX_DBL( +, 1, 0169fd0bd8a8a, +, 0 ), HEX_DBL( -, 1, 0466a6671bca4, -,  7 ), HEX_DBL( +, 1, 4c99ff1907435, -, 60 )},
+{HEX_DBL( +, 1, 0159d0b06d129, +, 0 ), HEX_DBL( -, 1, f1981c445cd05, -,  8 ), HEX_DBL( +, 1, 4bfff6366b723, -, 62 )},
+{HEX_DBL( +, 1, 0149a65d275a6, +, 0 ), HEX_DBL( -, 1, da6460f76ab8c, -,  8 ), HEX_DBL( +, 1, 9c5404f47589c, -, 61 )},
+{HEX_DBL( +, 1, 01397e11a581b, +, 0 ), HEX_DBL( -, 1, c3321ab87f4ef, -,  8 ), HEX_DBL( +, 1, c0da537429cea, -, 61 )},
+{HEX_DBL( +, 1, 012957cd85a28, +, 0 ), HEX_DBL( -, 1, ac014958c112c, -,  8 ), HEX_DBL( +, 1, 000c2a1b595e3, -, 64 )},
+{HEX_DBL( +, 1, 0119339065ef7, +, 0 ), HEX_DBL( -, 1, 94d1eca95f67a, -,  8 ), HEX_DBL( +, 1, d8d20b0564d5,  -, 61 )},
+{HEX_DBL( +, 1, 01091159e4b3d, +, 0 ), HEX_DBL( -, 1, 7da4047b92b3e, -,  8 ), HEX_DBL( +, 1, 6194a5d68cf2,  -, 66 )},
+{HEX_DBL( +, 1, 00f8f129a0535, +, 0 ), HEX_DBL( -, 1, 667790a09bf77, -,  8 ), HEX_DBL( +, 1, ca230e0bea645, -, 61 )},
+{HEX_DBL( +, 1, 00e8d2ff374a1, +, 0 ), HEX_DBL( -, 1, 4f4c90e9c4ead, -,  8 ), HEX_DBL( +, 1, 1de3e7f350c1,  -, 61 )},
+{HEX_DBL( +, 1, 00d8b6da482ce, +, 0 ), HEX_DBL( -, 1, 3823052860649, -,  8 ), HEX_DBL( +, 1, 5789b4c5891b8, -, 64 )},
+{HEX_DBL( +, 1, 00c89cba71a8c, +, 0 ), HEX_DBL( -, 1, 20faed2dc9a9e, -,  8 ), HEX_DBL( +, 1, 9e7c40f9839fd, -, 62 )},
+{HEX_DBL( +, 1, 00b8849f52834, +, 0 ), HEX_DBL( -, 1, 09d448cb65014, -,  8 ), HEX_DBL( +, 1, 387e3e9b6d02,  -, 62 )},
+{HEX_DBL( +, 1, 00a86e88899a4, +, 0 ), HEX_DBL( -, 1, e55e2fa53ebf1, -,  9 ), HEX_DBL( +, 1, cdaa71fddfddf, -, 62 )},
+{HEX_DBL( +, 1, 00985a75b5e3f, +, 0 ), HEX_DBL( -, 1, b716b429dce0f, -,  9 ), HEX_DBL( +, 1, 2f2af081367bf, -, 63 )},
+{HEX_DBL( +, 1, 00884866766ee, +, 0 ), HEX_DBL( -, 1, 88d21ec7a16d7, -,  9 ), HEX_DBL( +, 1, fb95c228d6f16, -, 62 )},
+{HEX_DBL( +, 1, 0078385a6a61d, +, 0 ), HEX_DBL( -, 1, 5a906f219a9e8, -,  9 ), HEX_DBL( +, 1, 18aff10a89f29, -, 64 )},
+{HEX_DBL( +, 1, 00682a5130fbe, +, 0 ), HEX_DBL( -, 1, 2c51a4dae87f1, -,  9 ), HEX_DBL( +, 1, bcc7e33ddde3,  -, 63 )},
+{HEX_DBL( +, 1, 00581e4a69944, +, 0 ), HEX_DBL( -, 1, fc2b7f2d782b1, -, 10 ), HEX_DBL( +, 1, fe3ef3300a9fa, -, 64 )},
+{HEX_DBL( +, 1, 00481445b39a8, +, 0 ), HEX_DBL( -, 1, 9fb97df0b0b83, -, 10 ), HEX_DBL( +, 1, 0d9a601f2f324, -, 65 )},
+{HEX_DBL( +, 1, 00380c42ae963, +, 0 ), HEX_DBL( -, 1, 434d4546227ae, -, 10 ), HEX_DBL( +, 1, 0b9b6a5868f33, -, 63 )},
+{HEX_DBL( +, 1, 00280640fa271, +, 0 ), HEX_DBL( -, 1, cdcda8e930c19, -, 11 ), HEX_DBL( +, 1, 3d424ab39f789, -, 64 )},
+{HEX_DBL( +, 1, 0018024036051, +, 0 ), HEX_DBL( -, 1, 150c558601261, -, 11 ), HEX_DBL( +, 1, 285bb90327a0f, -, 64 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, ffa011fca0a1e, -, 1 ), HEX_DBL( +, 1, 14e5640c4197b, -, 10 ), HEX_DBL( +, 1, 95728136ae401, -, 63 )},
+{HEX_DBL( +, 1, ff6031f064e07, -, 1 ), HEX_DBL( +, 1, cd61806bf532d, -, 10 ), HEX_DBL( +, 1, 568a4f35d8538, -, 63 )},
+{HEX_DBL( +, 1, ff2061d532b9c, -, 1 ), HEX_DBL( +, 1, 42e34af550eda, -,  9 ), HEX_DBL( +, 1, 8f69cee55fec,  -, 62 )},
+{HEX_DBL( +, 1, fee0a1a513253, -, 1 ), HEX_DBL( +, 1, 9f0a5523902ea, -,  9 ), HEX_DBL( +, 1, daec734b11615, -, 63 )},
+{HEX_DBL( +, 1, fea0f15a12139, -, 1 ), HEX_DBL( +, 1, fb25e19f11b26, -,  9 ), HEX_DBL( +, 1, 8bafca62941da, -, 62 )},
+{HEX_DBL( +, 1, fe6150ee3e6d4, -, 1 ), HEX_DBL( +, 1, 2b9af9a28e282, -,  8 ), HEX_DBL( +, 1, 0fd3674e1dc5b, -, 61 )},
+{HEX_DBL( +, 1, fe21c05baa109, -, 1 ), HEX_DBL( +, 1, 599d4678f24b9, -,  8 ), HEX_DBL( +, 1, dafce1f09937b, -, 61 )},
+{HEX_DBL( +, 1, fde23f9c69cf9, -, 1 ), HEX_DBL( +, 1, 8799d8c046eb,  -,  8 ), HEX_DBL( +, 1, ffa0ce0bdd217, -, 65 )},
+{HEX_DBL( +, 1, fda2ceaa956e8, -, 1 ), HEX_DBL( +, 1, b590b1e5951ee, -,  8 ), HEX_DBL( +, 1, 645a769232446, -, 62 )},
+{HEX_DBL( +, 1, fd636d8047a1f, -, 1 ), HEX_DBL( +, 1, e381d3555dbcf, -,  8 ), HEX_DBL( +, 1, 882320d368331, -, 61 )},
+{HEX_DBL( +, 1, fd241c179e0cc, -, 1 ), HEX_DBL( +, 1, 08b69f3dccde,  -,  7 ), HEX_DBL( +, 1, 01ad5065aba9e, -, 61 )},
+{HEX_DBL( +, 1, fce4da6ab93e8, -, 1 ), HEX_DBL( +, 1, 1fa97a61dd298, -,  7 ), HEX_DBL( +, 1, 84cd1f931ae34, -, 60 )},
+{HEX_DBL( +, 1, fca5a873bcb19, -, 1 ), HEX_DBL( +, 1, 36997bcc54a3f, -,  7 ), HEX_DBL( +, 1, 1485e97eaee03, -, 60 )},
+{HEX_DBL( +, 1, fc66862ccec93, -, 1 ), HEX_DBL( +, 1, 4d86a43264a4f, -,  7 ), HEX_DBL( +, 1, c75e63370988b, -, 61 )},
+{HEX_DBL( +, 1, fc27739018cfe, -, 1 ), HEX_DBL( +, 1, 6470f448fb09d, -,  7 ), HEX_DBL( +, 1, d7361eeaed0a1, -, 65 )},
+{HEX_DBL( +, 1, fbe87097c6f5a, -, 1 ), HEX_DBL( +, 1, 7b586cc4c2523, -,  7 ), HEX_DBL( +, 1, b3df952cc473c, -, 61 )},
+{HEX_DBL( +, 1, fba97d3e084dd, -, 1 ), HEX_DBL( +, 1, 923d0e5a21e06, -,  7 ), HEX_DBL( +, 1, cf56c7b64ae5d, -, 62 )},
+{HEX_DBL( +, 1, fb6a997d0ecdc, -, 1 ), HEX_DBL( +, 1, a91ed9bd3df9a, -,  7 ), HEX_DBL( +, 1, b957bdcd89e43, -, 61 )},
+{HEX_DBL( +, 1, fb2bc54f0f4ab, -, 1 ), HEX_DBL( +, 1, bffdcfa1f7fbb, -,  7 ), HEX_DBL( +, 1, ea8cad9a21771, -, 62 )},
+{HEX_DBL( +, 1, faed00ae41783, -, 1 ), HEX_DBL( +, 1, d6d9f0bbee6f6, -,  7 ), HEX_DBL( +, 1, 5762a9af89c82, -, 60 )},
+{HEX_DBL( +, 1, faae4b94dfe64, -, 1 ), HEX_DBL( +, 1, edb33dbe7d335, -,  7 ), HEX_DBL( +, 1, 21e24fc245697, -, 62 )},
+{HEX_DBL( +, 1, fa6fa5fd27ff8, -, 1 ), HEX_DBL( +, 1, 0244dbae5ed05, -,  6 ), HEX_DBL( +, 1, 12ef51b967102, -, 60 )},
+{HEX_DBL( +, 1, fa310fe15a078, -, 1 ), HEX_DBL( +, 1, 0daeaf24c3529, -,  6 ), HEX_DBL( +, 1, 10d3cfca60b45, -, 59 )},
+{HEX_DBL( +, 1, f9f2893bb9192, -, 1 ), HEX_DBL( +, 1, 1917199bb66bc, -,  6 ), HEX_DBL( +, 1, 6cf6034c32e19, -, 60 )},
+{HEX_DBL( +, 1, f9b412068b247, -, 1 ), HEX_DBL( +, 1, 247e1b6c615d5, -,  6 ), HEX_DBL( +, 1, 42f0fffa229f7, -, 61 )},
+{HEX_DBL( +, 1, f975aa3c18ed6, -, 1 ), HEX_DBL( +, 1, 2fe3b4efcc5ad, -,  6 ), HEX_DBL( +, 1, 70106136a8919, -, 60 )},
+{HEX_DBL( +, 1, f93751d6ae09b, -, 1 ), HEX_DBL( +, 1, 3b47e67edea93, -,  6 ), HEX_DBL( +, 1, 38dd5a4f6959a, -, 59 )},
+{HEX_DBL( +, 1, f8f908d098df6, -, 1 ), HEX_DBL( +, 1, 46aab0725ea6c, -,  6 ), HEX_DBL( +, 1, 821fc1e799e01, -, 60 )},
+{HEX_DBL( +, 1, f8bacf242aa2c, -, 1 ), HEX_DBL( +, 1, 520c1322f1e4e, -,  6 ), HEX_DBL( +, 1, 129dcda3ad563, -, 60 )},
+{HEX_DBL( +, 1, f87ca4cbb755,  -, 1 ), HEX_DBL( +, 1, 5d6c0ee91d2ab, -,  6 ), HEX_DBL( +, 1, c5b190c04606e, -, 62 )},
+{HEX_DBL( +, 1, f83e89c195c25, -, 1 ), HEX_DBL( +, 1, 68caa41d448c3, -,  6 ), HEX_DBL( +, 1, 4723441195ac9, -, 59 )}
+};
+
+static double __loglTable3[8][3] = {
+{HEX_DBL( +, 1, 000e00c40ab89, +, 0 ), HEX_DBL( -, 1, 4332be0032168, -, 12 ), HEX_DBL( +, 1, a1003588d217a, -, 65 )},
+{HEX_DBL( +, 1, 000a006403e82, +, 0 ), HEX_DBL( -, 1, cdb2987366fcc, -, 13 ), HEX_DBL( +, 1, 5c86001294bbc, -, 67 )},
+{HEX_DBL( +, 1, 0006002400d8,  +, 0 ), HEX_DBL( -, 1, 150297c90fa6f, -, 13 ), HEX_DBL( +, 1, 01fb4865fae32, -, 66 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, 0,             +, 0 ), HEX_DBL( +, 0, 0,             +,  0 ), HEX_DBL( +, 0, 0,             +,  0 )},
+{HEX_DBL( +, 1, ffe8011ff280a, -, 1 ), HEX_DBL( +, 1, 14f8daf5e3d3b, -, 12 ), HEX_DBL( +, 1, 3c933b4b6b914, -, 68 )},
+{HEX_DBL( +, 1, ffd8031fc184e, -, 1 ), HEX_DBL( +, 1, cd978c38042bb, -, 12 ), HEX_DBL( +, 1, 10f8e642e66fd, -, 65 )},
+{HEX_DBL( +, 1, ffc8061f5492b, -, 1 ), HEX_DBL( +, 1, 43183c878274e, -, 11 ), HEX_DBL( +, 1, 5885dd1eb6582, -, 65 )}
+};
+
+static void __log2_ep(double *hi, double *lo, double x)
+{
+    union { uint64_t i; double d; } uu;
+
+    int m;
+    double f = reference_frexp(x, &m);
+
+    // bring f in [0.75, 1.5)
+    if( f < 0.75 ) {
+        f *= 2.0;
+        m -= 1;
+    }
+
+    // index first table .... brings down to [1-2^-7, 1+2^6)
+    uu.d = f;
+    int index = (int) (((uu.i + ((uint64_t) 1 << 51)) & 0x000fc00000000000ULL) >> 46);
+    double r1 = __loglTable1[index][0];
+    double logr1hi = __loglTable1[index][1];
+    double logr1lo = __loglTable1[index][2];
+    // since log1rhi has 39 bits of precision, we have 14 bit in hand ... since |m| <= 1023
+    // which needs 10bits at max, we can directly add m to log1hi without spilling
+    logr1hi += m;
+
+    // argument reduction needs to be in double-double since reduced argument will form the
+    // leading term of polynomial approximation which sets the precision we eventually achieve
+    double zhi, zlo;
+    MulD(&zhi, &zlo, r1, uu.d);
+
+    // second index table .... brings down to [1-2^-12, 1+2^-11)
+    uu.d = zhi;
+    index = (int) (((uu.i + ((uint64_t) 1 << 46)) & 0x00007e0000000000ULL) >> 41);
+    double r2 = __loglTable2[index][0];
+    double logr2hi = __loglTable2[index][1];
+    double logr2lo = __loglTable2[index][2];
+
+    // reduce argument
+    MulDD(&zhi, &zlo, zhi, zlo, r2, 0.0);
+
+    // third index table .... brings down to [1-2^-14, 1+2^-13)
+    // Actually reduction to 2^-11 would have been sufficient to calculate
+    // second order term in polynomial in double rather than double-double, I
+    // reduced it a bit more to make sure other systematic arithmetic errors
+    // are guarded against .... also this allow lower order product of leading polynomial
+    // term i.e. Ao_hi*z_lo + Ao_lo*z_hi to be done in double rather than double-double ...
+    // hence only term that needs to be done in double-double is Ao_hi*z_hi
+    uu.d = zhi;
+    index = (int) (((uu.i + ((uint64_t) 1 << 41)) & 0x0000038000000000ULL) >> 39);
+    double r3 = __loglTable3[index][0];
+    double logr3hi = __loglTable3[index][1];
+    double logr3lo = __loglTable3[index][2];
+
+    // log2(x) = m + log2(r1) + log2(r1) + log2(1 + (zh + zlo))
+    // calculate sum of first three terms ... note that m has already
+    // been added to log2(r1)_hi
+    double log2hi, log2lo;
+    AddDD(&log2hi, &log2lo, logr1hi, logr1lo, logr2hi, logr2lo);
+    AddDD(&log2hi, &log2lo, logr3hi, logr3lo, log2hi, log2lo);
+
+    // final argument reduction .... zhi will be in [1-2^-14, 1+2^-13) after this
+    MulDD(&zhi, &zlo, zhi, zlo, r3, 0.0);
+    // we dont need to do full double-double substract here. substracting 1.0 for higher
+    // term is exact
+    zhi = zhi - 1.0;
+    // normalize
+    AddD(&zhi, &zlo, zhi, zlo);
+
+    // polynomail fitting to compute log2(1 + z) ... forth order polynomial fit
+    // to log2(1 + z)/z gives minimax absolute error of O(2^-76) with z in [-2^-14, 2^-13]
+    // log2(1 + z)/z = Ao + A1*z + A2*z^2 + A3*z^3 + A4*z^4
+    // => log2(1 + z) = Ao*z + A1*z^2 + A2*z^3 + A3*z^4 + A4*z^5
+    // => log2(1 + z) = (Aohi + Aolo)*(zhi + zlo) + z^2*(A1 + A2*z + A3*z^2 + A4*z^3)
+    // since we are looking for at least 64 digits of precision and z in [-2^-14, 2^-13], final term
+    // can be done in double .... also Aolo*zhi + Aohi*zlo can be done in double ....
+    // Aohi*zhi needs to be done in double-double
+
+    double Aohi = HEX_DBL( +, 1, 71547652b82fe, +, 0 );
+    double Aolo = HEX_DBL( +, 1, 777c9cbb675c, -, 56 );
+    double y;
+    y = HEX_DBL( +, 1, 276d2736fade7, -, 2 );
+    y = HEX_DBL( -, 1, 7154765782df1, -, 2 ) + y*zhi;
+    y = HEX_DBL( +, 1, ec709dc3a0f67, -, 2 ) + y*zhi;
+    y = HEX_DBL( -, 1, 71547652b82fe, -, 1 ) + y*zhi;
+    double zhisq = zhi*zhi;
+    y = y*zhisq;
+    y = y + zhi*Aolo;
+    y = y + zlo*Aohi;
+
+    MulD(&zhi, &zlo, Aohi, zhi);
+    AddDD(&zhi, &zlo, zhi, zlo, y, 0.0);
+    AddDD(&zhi, &zlo, zhi, zlo, log2hi, log2lo);
+
+    *hi = zhi;
+    *lo = zlo;
+}
+
+long double reference_powl( long double x, long double y )
+{
+
+
+    // this will be used for testing doubles i.e. arguments will
+    // be doubles so cast the input back to double ... returned
+    // result will be long double though .... > 53 bits of precision
+    // if platform allows.
+    // ===========
+    // New finding.
+    // ===========
+    // this function is getting used for computing reference cube root (cbrt)
+    // as follows __powl( x, 1.0L/3.0L ) so if the y are assumed to
+    // be double and is converted from long double to double, truncation
+    // causes errors. So we need to tread y as long double and convert it
+    // to hi, lo doubles when performing y*log2(x).
+
+//    double x = (double) xx;
+//    double y = (double) yy;
+
+    static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 );
+
+    //if x = 1, return x for any y, even NaN
+    if( x == 1.0 )
+        return x;
+
+    //if y == 0, return 1 for any x, even NaN
+    if( y == 0.0 )
+        return 1.0L;
+
+    //get NaNs out of the way
+    if( x != x  || y != y )
+        return x + y;
+
+    //do the work required to sort out edge cases
+    double fabsy = reference_fabs( y );
+    double fabsx = reference_fabs( x );
+    double iy = reference_rint( fabsy );            //we do round to nearest here so that |fy| <= 0.5
+    if( iy > fabsy )//convert nearbyint to floor
+        iy -= 1.0;
+    int isOddInt = 0;
+    if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon )
+        isOddInt =     (int) (iy - 2.0 * rint( 0.5 * iy ));        //might be 0, -1, or 1
+
+    ///test a few more edge cases
+    //deal with x == 0 cases
+    if( x == 0.0 )
+    {
+        if( ! isOddInt )
+            x = 0.0;
+
+        if( y < 0 )
+            x = 1.0/ x;
+
+        return x;
+    }
+
+    //x == +-Inf cases
+    if( isinf(fabsx) )
+    {
+        if( x < 0 )
+        {
+            if( isOddInt )
+            {
+                if( y < 0 )
+                    return -0.0;
+                else
+                    return -INFINITY;
+            }
+            else
+            {
+                if( y < 0 )
+                    return 0.0;
+                else
+                    return INFINITY;
+            }
+        }
+
+        if( y < 0 )
+            return 0;
+        return INFINITY;
+    }
+
+    //y = +-inf cases
+    if( isinf(fabsy) )
+    {
+        if( x == -1 )
+            return 1;
+
+        if( y < 0 )
+        {
+            if( fabsx < 1 )
+                return INFINITY;
+            return 0;
+        }
+        if( fabsx < 1 )
+            return 0;
+        return INFINITY;
+    }
+
+    // x < 0 and y non integer case
+    if( x < 0 && iy != fabsy )
+    {
+        //return nan;
+        return cl_make_nan();
+    }
+
+    //speedy resolution of sqrt and reciprocal sqrt
+    if( fabsy == 0.5 )
+    {
+        long double xl = sqrtl( x );
+        if( y < 0 )
+            xl = 1.0/ xl;
+        return xl;
+    }
+
+    double log2x_hi, log2x_lo;
+
+    // extended precision log .... accurate to at least 64-bits + couple of guard bits
+    __log2_ep(&log2x_hi, &log2x_lo, fabsx);
+
+    double ylog2x_hi, ylog2x_lo;
+
+    double y_hi = (double) y;
+    double y_lo = (double) ( y - (long double) y_hi);
+
+    // compute product of y*log2(x)
+    // scale to avoid overflow in double-double multiplication
+    if( reference_fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) {
+        y_hi = reference_ldexp(y_hi, -53);
+        y_lo = reference_ldexp(y_lo, -53);
+    }
+    MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
+    if( fabs( y ) > HEX_DBL( +, 1, 0, +, 970 ) ) {
+        ylog2x_hi = reference_ldexp(ylog2x_hi, 53);
+        ylog2x_lo = reference_ldexp(ylog2x_lo, 53);
+    }
+
+    long double powxy;
+    if(isinf(ylog2x_hi) || (reference_fabs(ylog2x_hi) > 2200)) {
+        powxy = reference_signbit(ylog2x_hi) ? HEX_DBL( +, 0, 0, +, 0 ) : INFINITY;
+    } else {
+        // separate integer + fractional part
+        long int m = lrint(ylog2x_hi);
+        AddDD(&ylog2x_hi, &ylog2x_lo, ylog2x_hi, ylog2x_lo, -m, 0.0);
+
+        // revert to long double arithemtic
+        long double ylog2x = (long double) ylog2x_hi + (long double) ylog2x_lo;
+        long double tmp = reference_exp2l( ylog2x );
+        powxy = reference_scalblnl(tmp, m);
+    }
+
+    // if y is odd integer and x is negative, reverse sign
+    if( isOddInt & reference_signbit(x))
+        powxy = -powxy;
+    return powxy;
+}
+
+double reference_nextafter(double xx, double yy)
+{
+    float x = (float) xx;
+    float y = (float) yy;
+
+    // take care of nans
+    if( x != x )
+        return x;
+
+    if( y != y )
+        return y;
+
+    if( x == y )
+        return y;
+
+    int32f_t a, b;
+
+    a.f  = x;
+    b.f  = y;
+
+    if( a.i & 0x80000000 )
+        a.i = 0x80000000 - a.i;
+    if(b.i & 0x80000000 )
+        b.i = 0x80000000 - b.i;
+
+    a.i += (a.i < b.i) ? 1 : -1;
+    a.i = (a.i < 0) ? (cl_int) 0x80000000 - a.i : a.i;
+
+    return a.f;
+}
+
+
+long double reference_nextafterl(long double xx, long double yy)
+{
+    double x = (double) xx;
+    double y = (double) yy;
+
+    // take care of nans
+    if( x != x )
+        return x;
+
+    if( y != y )
+        return y;
+
+    int64d_t a, b;
+
+    a.d  = x;
+    b.d  = y;
+
+    int64_t tmp = 0x8000000000000000LL;
+
+    if( a.l & tmp )
+        a.l = tmp - a.l;
+    if(b.l & tmp )
+        b.l = tmp - b.l;
+
+    // edge case. if (x == y) or (x = 0.0f and y = -0.0f) or (x = -0.0f and y = 0.0f)
+    // test needs to be done using integer rep because
+    // subnormals may be flushed to zero on some platforms
+    if( a.l == b.l )
+        return y;
+
+    a.l += (a.l < b.l) ? 1 : -1;
+    a.l = (a.l < 0) ? tmp - a.l : a.l;
+
+    return a.d;
+}
+
+double reference_fdim(double xx, double yy)
+{
+    float x = (float) xx;
+    float y = (float) yy;
+
+    if( x != x )
+        return x;
+
+    if( y != y )
+        return y;
+
+    float r = ( x > y ) ? (float) reference_subtract( x, y) : 0.0f;
+    return r;
+
+}
+
+
+long double reference_fdiml(long double xx, long double yy)
+{
+    double x = (double) xx;
+    double y = (double) yy;
+
+    if( x != x )
+        return x;
+
+    if( y != y )
+        return y;
+
+    double r = ( x > y ) ? (double) reference_subtractl(x, y) : 0.0;
+    return r;
+}
+
+double reference_remquo(double xd, double yd, int *n)
+{
+    float xx = (float) xd;
+    float yy = (float) yd;
+
+    if( isnan(xx) || isnan(yy) ||
+        fabsf(xx) == INFINITY  ||
+        yy == 0.0 )
+    {
+        *n = 0;
+        return cl_make_nan();
+    }
+
+    if( fabsf(yy) == INFINITY || xx == 0.0f ) {
+        *n = 0;
+        return xd;
+    }
+
+    if( fabsf(xx) == fabsf(yy) ) {
+        *n = (xx == yy) ? 1 : -1;
+        return reference_signbit( xx ) ? -0.0 : 0.0;
+    }
+
+    int signx = reference_signbit( xx ) ? -1 : 1;
+    int signy = reference_signbit( yy ) ? -1 : 1;
+    int signn = (signx == signy) ? 1 : -1;
+    float x = fabsf(xx);
+    float y = fabsf(yy);
+
+    int ex, ey;
+    ex = reference_ilogb( x );
+    ey = reference_ilogb( y );
+    float xr = x;
+    float yr = y;
+    uint32_t q = 0;
+
+    if(ex-ey >= -1) {
+
+        yr = (float) reference_ldexp( y, -ey );
+        xr = (float) reference_ldexp( x, -ex );
+
+        if(ex-ey >= 0) {
+
+
+            int i;
+            for(i = ex-ey; i > 0; i--) {
+                q <<= 1;
+                if(xr >= yr) {
+                    xr -= yr;
+                    q += 1;
+                }
+                xr += xr;
+            }
+            q <<= 1;
+            if( xr > yr ) {
+                xr -= yr;
+                q += 1;
+            }
+        }
+        else //ex-ey = -1
+            xr = reference_ldexp(xr, ex-ey);
+    }
+
+    if( (yr < 2.0f*xr) || ( (yr == 2.0f*xr) && (q & 0x00000001) ) ) {
+        xr -= yr;
+        q += 1;
+    }
+
+    if(ex-ey >= -1)
+        xr = reference_ldexp(xr, ey);
+
+    int qout = q & 0x0000007f;
+    if( signn < 0)
+        qout = -qout;
+    if( xx < 0.0 )
+        xr = -xr;
+
+    *n = qout;
+
+    return xr;
+}
+
+long double reference_remquol(long double xd, long double yd, int *n)
+{
+
+    double xx = (double) xd;
+    double yy = (double) yd;
+
+    if( isnan(xx) || isnan(yy) ||
+        fabs(xx) == INFINITY  ||
+        yy == 0.0 )
+    {
+        *n = 0;
+        return cl_make_nan();
+    }
+
+    if( reference_fabs(yy) == INFINITY || xx == 0.0 ) {
+        *n = 0;
+        return xd;
+    }
+
+    if( reference_fabs(xx) == reference_fabs(yy) ) {
+        *n = (xx == yy) ? 1 : -1;
+        return reference_signbit( xx ) ? -0.0 : 0.0;
+    }
+
+    int signx = reference_signbit( xx ) ? -1 : 1;
+    int signy = reference_signbit( yy ) ? -1 : 1;
+    int signn = (signx == signy) ? 1 : -1;
+    double x = reference_fabs(xx);
+    double y = reference_fabs(yy);
+
+    int ex, ey;
+    ex = reference_ilogbl( x );
+    ey = reference_ilogbl( y );
+    double xr = x;
+    double yr = y;
+    uint32_t q = 0;
+
+    if(ex-ey >= -1) {
+
+        yr = reference_ldexp( y, -ey );
+        xr = reference_ldexp( x, -ex );
+        int i;
+
+        if(ex-ey >= 0) {
+
+            for(i = ex-ey; i > 0; i--) {
+                q <<= 1;
+                if(xr >= yr) {
+                    xr -= yr;
+                    q += 1;
+                }
+                xr += xr;
+            }
+            q <<= 1;
+            if( xr > yr ) {
+                xr -= yr;
+                q += 1;
+            }
+        }
+        else
+            xr = reference_ldexp(xr, ex-ey);
+    }
+
+    if( (yr < 2.0*xr) || ( (yr == 2.0*xr) && (q & 0x00000001) ) ) {
+        xr -= yr;
+        q += 1;
+    }
+
+    if(ex-ey >= -1)
+        xr = reference_ldexp(xr, ey);
+
+    int qout = q & 0x0000007f;
+    if( signn < 0)
+        qout = -qout;
+    if( xx < 0.0 )
+        xr = -xr;
+
+    *n = qout;
+    return xr;
+}
+
+static double reference_scalbn(double x, int n)
+{
+    if(reference_isinf(x) || reference_isnan(x) || x == 0.0)
+        return x;
+
+    int bias = 1023;
+    union { double d; cl_long l; } u;
+    u.d = (double) x;
+    int e = (int)((u.l & 0x7ff0000000000000LL) >> 52);
+    if(e == 0)
+    {
+        u.l |= ((cl_long)1023 << 52);
+        u.d -= 1.0;
+        e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
+    }
+    e += n;
+    if(e >= 2047 || n >= 2098 )
+        return reference_copysign(INFINITY, x);
+    if(e < -51 || n <-2097 )
+        return reference_copysign(0.0, x);
+    if(e <= 0)
+    {
+        bias += (e-1);
+        e = 1;
+    }
+    u.l &= 0x800fffffffffffffLL;
+    u.l |= ((cl_long)e << 52);
+    x = u.d;
+    u.l = ((cl_long)bias << 52);
+    return x * u.d;
+}
+
+static long double reference_scalblnl(long double x, long n)
+{
+#if defined(__i386__) || defined(__x86_64__) // INTEL
+    union
+    {
+        long double d;
+        struct{ cl_ulong m; cl_ushort sexp;}u;
+    }u;
+    u.u.m = CL_LONG_MIN;
+
+    if ( reference_isinf(x) )
+        return x;
+
+    if( x == 0.0L || n < -2200)
+        return reference_copysignl( 0.0L, x );
+
+    if( n > 2200 )
+        return reference_copysignl( INFINITY, x );
+
+    if( n < 0 )
+    {
+        u.u.sexp = 0x3fff - 1022;
+        while( n <= -1022 )
+        {
+            x *= u.d;
+            n += 1022;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    if( n > 0 )
+    {
+        u.u.sexp = 0x3fff + 1023;
+        while( n >= 1023 )
+        {
+            x *= u.d;
+            n -= 1023;
+        }
+        u.u.sexp = 0x3fff + n;
+        x *= u.d;
+        return x;
+    }
+
+    return x;
+
+#elif defined(__arm__) // ARM .. sizeof(long double) == sizeof(double)
+
+#if __DBL_MAX_EXP__ >= __LDBL_MAX_EXP__
+    if(reference_isinfl(x) || reference_isnanl(x))
+        return x;
+
+    int bias = 1023;
+    union { double d; cl_long l; } u;
+    u.d = (double) x;
+    int e = (int)((u.l & 0x7ff0000000000000LL) >> 52);
+    if(e == 0)
+    {
+        u.l |= ((cl_long)1023 << 52);
+        u.d -= 1.0;
+        e = (int)((u.l & 0x7ff0000000000000LL) >> 52) - 1022;
+    }
+    e += n;
+    if(e >= 2047)
+        return reference_copysignl(INFINITY, x);
+    if(e < -51)
+        return reference_copysignl(0.0, x);
+    if(e <= 0)
+    {
+        bias += (e-1);
+        e = 1;
+    }
+    u.l &= 0x800fffffffffffffLL;
+    u.l |= ((cl_long)e << 52);
+    x = u.d;
+    u.l = ((cl_long)bias << 52);
+    return x * u.d;
+#endif
+
+#else  // PPC
+    return scalblnl(x, n);
+#endif
+}
+
+double reference_relaxed_exp( double x )
+{
+  return reference_exp(x);
+}
+
+double reference_exp(double x)
+{
+  return reference_exp2( x * HEX_DBL( +, 1, 71547652b82fe, +, 0 ) );
+}
+
+long double reference_expl(long double x)
+{
+#if defined(__PPC__)
+  long double scale, bias;
+
+  // The PPC double long version of expl fails to produce denorm results
+  // and instead generates a 0.0. Compensate for this limitation by
+  // computing expl as:
+  //     expl(x + 40) * expl(-40)
+  // Likewise, overflows can prematurely produce an infinity, so we
+  // compute expl as:
+  //     expl(x - 40) * expl(40)
+  scale = 1.0L;
+  bias = 0.0L;
+  if (x < -708.0L) {
+    bias = 40.0;
+    scale = expl(-40.0L);
+  } else if (x > 708.0L) {
+    bias = -40.0L;
+    scale = expl(40.0L);
+  }
+  return expl(x + bias) * scale;
+#else
+    return expl( x );
+#endif
+}
+
+double reference_sinh(double x)
+{
+    return sinh(x);
+}
+
+long double reference_sinhl(long double x)
+{
+    return sinhl(x);
+}
+
+double reference_fmod(double x, double y)
+{
+    if( x == 0.0 && fabs(y) > 0.0 )
+        return x;
+
+    if( fabs(x) == INFINITY || y == 0 )
+        return cl_make_nan();
+
+    if( fabs(y) == INFINITY )    // we know x is finite from above
+        return x;
+#if defined(_MSC_VER) && defined(_M_X64)
+    return fmod( x, y );
+#else
+    return fmodf( (float) x, (float) y );
+#endif
+}
+
+long double reference_fmodl(long double x, long double y)
+{
+    if( x == 0.0L && fabsl(y) > 0.0L )
+        return x;
+
+    if( fabsl(x) == INFINITY || y == 0.0L )
+        return cl_make_nan();
+
+    if( fabsl(y) == INFINITY )    // we know x is finite from above
+        return x;
+
+    return fmod( (double) x, (double) y );
+}
+
+double reference_modf(double x, double *n)
+{
+    if(isnan(x)) {
+        *n = cl_make_nan();
+        return cl_make_nan();
+    }
+    float nr;
+    float yr = modff((float) x, &nr);
+    *n = nr;
+    return yr;
+}
+
+long double reference_modfl(long double x, long double *n)
+{
+    if(isnan(x)) {
+        *n = cl_make_nan();
+        return cl_make_nan();
+    }
+    double nr;
+    double yr = modf((double) x, &nr);
+    *n = nr;
+    return yr;
+}
+
+long double reference_fractl(long double x, long double *ip )
+{
+    if(isnan(x)) {
+        *ip = cl_make_nan();
+        return cl_make_nan();
+    }
+
+    double i;
+    double f = modf((double) x, &i );
+    if( f < 0.0 )
+    {
+        f = 1.0 + f;
+        i -= 1.0;
+        if( f == 1.0 )
+            f = HEX_DBL( +, 1, fffffffffffff, -, 1 );
+    }
+    *ip = i;
+    return f;
+}
+
+long double reference_fabsl(long double x)
+{
+    return fabsl( x );
+}
+
+double reference_relaxed_log( double x )
+{
+  return (float)reference_log((float)x);
+}
+
+double reference_log(double x)
+{
+    if( x == 0.0 )
+        return -INFINITY;
+
+    if( x < 0.0 )
+        return cl_make_nan();
+
+    if( isinf(x) )
+        return INFINITY;
+
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
+    double logxHi, logxLo;
+    __log2_ep(&logxHi, &logxLo, x);
+    return logxHi*log2Hi;
+}
+
+long double reference_logl(long double x)
+{
+    if( x == 0.0 )
+        return -INFINITY;
+
+    if( x < 0.0 )
+        return cl_make_nan();
+
+    if( isinf(x) )
+        return INFINITY;
+
+    double log2Hi = HEX_DBL( +, 1, 62e42fefa39ef, -, 1 );
+    double log2Lo = HEX_DBL( +, 1, abc9e3b39803f, -, 56 );
+    double logxHi, logxLo;
+    __log2_ep(&logxHi, &logxLo, x);
+
+    //double rhi, rlo;
+    //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo);
+    //return (long double) rhi + (long double) rlo;
+
+    long double lg2 = (long double) log2Hi + (long double) log2Lo;
+    long double logx = (long double) logxHi + (long double) logxLo;
+    return logx*lg2;
+}
+
+double reference_relaxed_pow( double x, double y) {
+  return (float)reference_exp2( ((float)y) * (float)reference_log2((float)x));
+}
+
+double reference_pow( double x, double y )
+{
+    static const double neg_epsilon = HEX_DBL( +, 1, 0, +, 53 );
+
+    //if x = 1, return x for any y, even NaN
+    if( x == 1.0 )
+        return x;
+
+    //if y == 0, return 1 for any x, even NaN
+    if( y == 0.0 )
+        return 1.0;
+
+    //get NaNs out of the way
+    if( x != x  || y != y )
+        return x + y;
+
+    //do the work required to sort out edge cases
+    double fabsy = reference_fabs( y );
+    double fabsx = reference_fabs( x );
+    double iy = reference_rint( fabsy );            //we do round to nearest here so that |fy| <= 0.5
+    if( iy > fabsy )//convert nearbyint to floor
+        iy -= 1.0;
+    int isOddInt = 0;
+    if( fabsy == iy && !reference_isinf(fabsy) && iy < neg_epsilon )
+        isOddInt =     (int) (iy - 2.0 * rint( 0.5 * iy ));        //might be 0, -1, or 1
+
+    ///test a few more edge cases
+    //deal with x == 0 cases
+    if( x == 0.0 )
+    {
+        if( ! isOddInt )
+            x = 0.0;
+
+        if( y < 0 )
+            x = 1.0/ x;
+
+        return x;
+    }
+
+    //x == +-Inf cases
+    if( isinf(fabsx) )
+    {
+        if( x < 0 )
+        {
+            if( isOddInt )
+            {
+                if( y < 0 )
+                    return -0.0;
+                else
+                    return -INFINITY;
+            }
+            else
+            {
+                if( y < 0 )
+                    return 0.0;
+                else
+                    return INFINITY;
+            }
+        }
+
+        if( y < 0 )
+            return 0;
+        return INFINITY;
+    }
+
+    //y = +-inf cases
+    if( isinf(fabsy) )
+    {
+        if( x == -1 )
+            return 1;
+
+        if( y < 0 )
+        {
+            if( fabsx < 1 )
+                return INFINITY;
+            return 0;
+        }
+        if( fabsx < 1 )
+            return 0;
+        return INFINITY;
+    }
+
+    // x < 0 and y non integer case
+    if( x < 0 && iy != fabsy )
+    {
+        //return nan;
+        return cl_make_nan();
+    }
+
+    //speedy resolution of sqrt and reciprocal sqrt
+    if( fabsy == 0.5 )
+    {
+        long double xl = reference_sqrt( x );
+        if( y < 0 )
+            xl = 1.0/ xl;
+        return xl;
+    }
+
+    double hi, lo;
+    __log2_ep(&hi, &lo, fabsx);
+    double prod = y * hi;
+    double result = reference_exp2(prod);
+    return isOddInt ? reference_copysignd(result, x) : result;
+}
+
+double reference_sqrt(double x)
+{
+    return sqrt(x);
+}
+
+double reference_floor(double x)
+{
+    return floorf((float) x);
+}
+
+double reference_ldexp(double value, int exponent)
+{
+#ifdef __MINGW32__
+/*
+ * ====================================================
+ * This function is from fdlibm: http://www.netlib.org
+ *   It is Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+    if(!finite(value)||value==0.0) return value;
+    return scalbn(value,exponent);
+#else
+    return reference_scalbn(value, exponent);
+#endif
+}
+
+long double reference_ldexpl(long double x, int n)
+{
+    return ldexpl( x, n);
+}
+
+long double reference_coshl(long double x)
+{
+    return coshl(x);
+}
+
+double reference_ceil(double x)
+{
+    return ceilf((float) x);
+}
+
+long double reference_ceill(long double x)
+{
+    if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) )
+        return x;
+
+    long double absx = reference_fabsl(x);
+    if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) )
+        return x;
+
+    if( absx < 1.0 )
+    {
+        if( x < 0.0 )
+            return 0.0;
+        else
+            return 1.0;
+    }
+
+    long double r = (long double) ((cl_long) x);
+
+    if( x > 0.0 && r < x )
+        r += 1.0;
+
+    return r;
+}
+
+
+long double reference_acosl(long double x)
+{
+    long double x2 = x * x;
+    int i;
+
+    //Prepare a head + tail representation of PI in long double.  A good compiler should get rid of all of this work.
+    static const cl_ulong pi_bits[2] = { 0x3243F6A8885A308DULL, 0x313198A2E0370734ULL};  // first 126 bits of pi http://www.super-computing.org/pi-hexa_current.html
+    long double head, tail, temp;
+#if __LDBL_MANT_DIG__ >= 64
+    // long double has 64-bits of precision or greater
+    temp = (long double) pi_bits[0] * 0x1.0p64L;
+    head = temp + (long double) pi_bits[1];
+    temp -= head;           // rounding err rounding pi_bits[1] into head
+    tail = (long double) pi_bits[1] + temp;
+    head *= HEX_LDBL( +, 1, 0, -, 125 );
+    tail *= HEX_LDBL( +, 1, 0, -, 125 );
+#else
+    head = (long double) pi_bits[0];
+    tail = (long double) ((cl_long) pi_bits[0] - (cl_long) head );       // residual part of pi_bits[0] after rounding
+    tail = tail * HEX_LDBL( +, 1, 0, +, 64 ) + (long double) pi_bits[1];
+    head *= HEX_LDBL( +, 1, 0, -, 61 );
+    tail *= HEX_LDBL( +, 1, 0, -, 125 );
+#endif
+
+    // oversize values and NaNs go to NaN
+    if( ! (x2 <= 1.0) )
+        return sqrtl(1.0L - x2 );
+
+    //
+    // deal with large |x|:
+    //                                                      sqrt( 1 - x**2)
+    // acos(|x| > sqrt(0.5)) = 2 * atan( z );       z = -------------------- ;      z in [0, sqrt(0.5)/(1+sqrt(0.5) = .4142135...]
+    //                                                          1 + x
+    if( x2 > 0.5 )
+    {
+        // we handle the x < 0 case as pi - acos(|x|)
+
+        long double sign = reference_copysignl( 1.0L, x );
+        long double fabsx = reference_fabsl( x );
+        head -= head * sign;        // x > 0 ? 0 : pi.hi
+        tail -= tail * sign;        // x > 0 ? 0 : pi.low
+
+        // z = sqrt( 1-x**2 ) / (1+x) = sqrt( (1-x)(1+x) / (1+x)**2 ) = sqrt( (1-x)/(1+x) )
+        long double z2 = (1.0L - fabsx) / (1.0L + fabsx);   // z**2
+        long double z = sign * sqrtl(z2);
+
+        //                     atan(sqrt(q))
+        // Minimax fit p(x) = ---------------- - 1
+        //                        sqrt(q)
+        //
+        // Define q = r*r, and solve for atan(r):
+        //
+        //  atan(r) = (p(r) + 1) * r = rp(r) + r
+        static long double atan_coeffs[] = { HEX_LDBL( -, b, 3f52e0c278293b3, -, 67 ), HEX_LDBL( -, a, aaaaaaaaaaa95b8, -, 5 ),
+                                             HEX_LDBL( +, c, ccccccccc992407, -,  6 ), HEX_LDBL( -, 9, 24924923024398,  -, 6 ),
+                                             HEX_LDBL( +, e, 38e38d6f92c98f3, -,  7 ), HEX_LDBL( -, b, a2e89bfb8393ec6, -, 7 ),
+                                             HEX_LDBL( +, 9, d89a9f574d412cb, -,  7 ), HEX_LDBL( -, 8, 88580517884c547, -, 7 ),
+                                             HEX_LDBL( +, f, 0ab6756abdad408, -,  8 ), HEX_LDBL( -, d, 56a5b07a2f15b49, -, 8 ),
+                                             HEX_LDBL( +, b, 72ab587e46d80b2, -,  8 ), HEX_LDBL( -, 8, 62ea24bb5b2e636, -, 8 ),
+                                             HEX_LDBL( +, e, d67c16582123937, -, 10 ) }; // minimax fit over [ 0x1.0p-52, 0.18]   Max error:  0x1.67ea5c184e5d9p-64
+
+        // Calculate y = p(r)
+        const size_t atan_coeff_count = sizeof( atan_coeffs ) / sizeof( atan_coeffs[0] );
+        long double y = atan_coeffs[ atan_coeff_count - 1];
+        for( i = (int)atan_coeff_count - 2; i >= 0; i-- )
+            y = atan_coeffs[i] + y * z2;
+
+        z *= 2.0L;   // fold in 2.0 for 2.0 * atan(z)
+        y *= z;      // rp(r)
+
+        return head + ((y + tail) + z);
+    }
+
+    // do |x| <= sqrt(0.5) here
+    //                                                     acos( sqrt(z) ) - PI/2
+    //  Piecewise minimax polynomial fits for p(z) = 1 + ------------------------;
+    //                                                            sqrt(z)
+    //
+    //  Define z = x*x, and solve for acos(x) over x in  x >= 0:
+    //
+    //      acos( sqrt(z) ) = acos(x) = x*(p(z)-1) + PI/2 = xp(x**2) - x + PI/2
+    //
+    const long double coeffs[4][14] = {
+                                    { HEX_LDBL( -, a, fa7382e1f347974, -, 10 ), HEX_LDBL( -, b, 4d5a992de1ac4da, -,  6 ),
+                                      HEX_LDBL( -, a, c526184bd558c17, -,  7 ), HEX_LDBL( -, d, 9ed9b0346ec092a, -,  8 ),
+                                      HEX_LDBL( -, 9, dca410c1f04b1f,  -,  8 ), HEX_LDBL( -, f, 76e411ba9581ee5, -,  9 ),
+                                      HEX_LDBL( -, c, c71b00479541d8e, -,  9 ), HEX_LDBL( -, a, f527a3f9745c9de, -,  9 ),
+                                      HEX_LDBL( -, 9, a93060051f48d14, -,  9 ), HEX_LDBL( -, 8, b3d39ad70e06021, -,  9 ),
+                                      HEX_LDBL( -, f, f2ab95ab84f79c,  -, 10 ), HEX_LDBL( -, e, d1af5f5301ccfe4, -, 10 ),
+                                      HEX_LDBL( -, e, 1b53ba562f0f74a, -, 10 ), HEX_LDBL( -, d, 6a3851330e15526, -, 10 ) },  // x - 0.0625 in [ -0x1.fffffffffp-5, 0x1.0p-4 ]    Error: 0x1.97839bf07024p-76
+
+                                    { HEX_LDBL( -, 8, c2f1d638e4c1b48, -,  8 ), HEX_LDBL( -, c, d47ac903c311c2c, -,  6 ),
+                                      HEX_LDBL( -, d, e020b2dabd5606a, -,  7 ), HEX_LDBL( -, a, 086fafac220f16b, -,  7 ),
+                                      HEX_LDBL( -, 8, 55b5efaf6b86c3e, -,  7 ), HEX_LDBL( -, f, 05c9774fed2f571, -,  8 ),
+                                      HEX_LDBL( -, e, 484a93f7f0fc772, -,  8 ), HEX_LDBL( -, e, 1a32baef01626e4, -,  8 ),
+                                      HEX_LDBL( -, e, 528e525b5c9c73d, -,  8 ), HEX_LDBL( -, e, ddd5d27ad49b2c8, -,  8 ),
+                                      HEX_LDBL( -, f, b3259e7ae10c6f,  -,  8 ), HEX_LDBL( -, 8, 68998170d5b19b7, -,  7 ),
+                                      HEX_LDBL( -, 9, 4468907f007727,  -,  7 ), HEX_LDBL( -, a, 2ad5e4906a8e7b3, -,  7 ) },// x - 0.1875 in [ -0x1.0p-4, 0x1.0p-4 ]    Error: 0x1.647af70073457p-73
+
+                                    { HEX_LDBL( -, f, a76585ad399e7ac, -,  8 ), HEX_LDBL( -, e, d665b7dd504ca7c, -,  6 ),
+                                      HEX_LDBL( -, 9, 4c7c2402bd4bc33, -,  6 ), HEX_LDBL( -, f, ba76b69074ff71c, -,  7 ),
+                                      HEX_LDBL( -, f, 58117784bdb6d5f, -,  7 ), HEX_LDBL( -, 8, 22ddd8eef53227d, -,  6 ),
+                                      HEX_LDBL( -, 9, 1d1d3b57a63cdb4, -,  6 ), HEX_LDBL( -, a, 9c4bdc40cca848,  -,  6 ),
+                                      HEX_LDBL( -, c, b673b12794edb24, -,  6 ), HEX_LDBL( -, f, 9290a06e31575bf, -,  6 ),
+                                      HEX_LDBL( -, 9, b4929c16aeb3d1f, -,  5 ), HEX_LDBL( -, c, 461e725765a7581, -,  5 ),
+                                      HEX_LDBL( -, 8, 0a59654c98d9207, -,  4 ), HEX_LDBL( -, a, 6de6cbd96c80562, -,  4 ) }, // x - 0.3125 in [ -0x1.0p-4, 0x1.0p-4 ]   Error: 0x1.b0246c304ce1ap-70
+
+                                    { HEX_LDBL( -, b, dca8b0359f96342, -,  7 ), HEX_LDBL( -, 8, cd2522fcde9823,  -,  5 ),
+                                      HEX_LDBL( -, d, 2af9397b27ff74d, -,  6 ), HEX_LDBL( -, d, 723f2c2c2409811, -,  6 ),
+                                      HEX_LDBL( -, f, ea8f8481ecc3cd1, -,  6 ), HEX_LDBL( -, a, 43fd8a7a646b0b2, -,  5 ),
+                                      HEX_LDBL( -, e, 01b0bf63a4e8d76, -,  5 ), HEX_LDBL( -, 9, f0b7096a2a7b4d,  -,  4 ),
+                                      HEX_LDBL( -, e, 872e7c5a627ab4c, -,  4 ), HEX_LDBL( -, a, dbd760a1882da48, -,  3 ),
+                                      HEX_LDBL( -, 8, 424e4dea31dd273, -,  2 ), HEX_LDBL( -, c, c05d7730963e793, -,  2 ),
+                                      HEX_LDBL( -, a, 523d97197cd124a, -,  1 ), HEX_LDBL( -, 8, 307ba943978aaee, +,  0 ) } // x - 0.4375 in [ -0x1.0p-4, 0x1.0p-4 ]  Error: 0x1.9ecff73da69c9p-66
+                                 };
+
+    const long double offsets[4] = { 0.0625, 0.1875, 0.3125, 0.4375 };
+    const size_t coeff_count = sizeof( coeffs[0] ) / sizeof( coeffs[0][0] );
+
+    // reduce the incoming values a bit so that they are in the range [-0x1.0p-4, 0x1.0p-4]
+    const long double *c;
+    i = x2 * 8.0L;
+    c = coeffs[i];
+    x2 -= offsets[i];       // exact
+
+    // calcualte p(x2)
+    long double y = c[ coeff_count - 1];
+    for( i = (int)coeff_count - 2; i >= 0; i-- )
+        y = c[i] + y * x2;
+
+    // xp(x2)
+    y *= x;
+
+    // return xp(x2) - x + PI/2
+    return head + ((y + tail) - x);
+}
+
+double reference_log10(double x)
+{
+    if( x == 0.0 )
+        return -INFINITY;
+
+    if( x < 0.0 )
+        return cl_make_nan();
+
+    if( isinf(x) )
+        return INFINITY;
+
+    double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 );
+    double logxHi, logxLo;
+    __log2_ep(&logxHi, &logxLo, x);
+    return logxHi*log2Hi;
+}
+
+long double reference_log10l(long double x)
+{
+    if( x == 0.0 )
+        return -INFINITY;
+
+    if( x < 0.0 )
+        return cl_make_nan();
+
+    if( isinf(x) )
+        return INFINITY;
+
+    double log2Hi = HEX_DBL( +, 1, 34413509f79fe, -, 2 );
+    double log2Lo = HEX_DBL( +, 1, e623e2566b02d, -, 55 );
+    double logxHi, logxLo;
+    __log2_ep(&logxHi, &logxLo, x);
+
+    //double rhi, rlo;
+    //MulDD(&rhi, &rlo, logxHi, logxLo, log2Hi, log2Lo);
+    //return (long double) rhi + (long double) rlo;
+
+    long double lg2 = (long double) log2Hi + (long double) log2Lo;
+    long double logx = (long double) logxHi + (long double) logxLo;
+    return logx*lg2;
+}
+
+double reference_acos(double x)
+{
+    return acos( x );
+}
+
+double reference_atan2(double x, double y)
+{
+#if defined(_WIN32)
+    // fix edge cases for Windows
+    if (isinf(x) && isinf(y)) {
+        double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4;
+        return (x > 0) ? retval : -retval;
+    }
+#endif // _WIN32
+    return atan2(x, y);
+}
+
+long double reference_atan2l(long double x, long double y)
+{
+#if defined(_WIN32)
+    // fix edge cases for Windows
+    if (isinf(x) && isinf(y)) {
+        long double retval = (y > 0) ? M_PI_4 : 3.f * M_PI_4;
+        return (x > 0) ? retval : -retval;
+    }
+#endif // _WIN32
+    return atan2l(x, y);
+}
+
+double reference_frexp(double a, int *exp)
+{
+    if(isnan(a) || isinf(a) || a == 0.0)
+    {
+        *exp = 0;
+        return a;
+    }
+
+    union {
+        cl_double d;
+        cl_ulong l;
+    } u;
+
+    u.d = a;
+
+    // separate out sign
+    cl_ulong s = u.l & 0x8000000000000000ULL;
+    u.l &= 0x7fffffffffffffffULL;
+    int bias = -1022;
+
+    if((u.l & 0x7ff0000000000000ULL) == 0)
+    {
+        double d = u.l;
+        u.d = d;
+        bias -= 1074;
+    }
+
+    int e = (int)((u.l & 0x7ff0000000000000ULL) >> 52);
+    u.l &= 0x000fffffffffffffULL;
+    e += bias;
+    u.l |= ((cl_ulong)1022 << 52);
+    u.l |= s;
+
+    *exp = e;
+    return u.d;
+}
+
+long double reference_frexpl(long double a, int *exp)
+{
+    if(isnan(a) || isinf(a) || a == 0.0)
+    {
+        *exp = 0;
+        return a;
+    }
+
+    if(sizeof(long double) == sizeof(double))
+    {
+        return reference_frexp(a, exp);
+    }
+    else
+    {
+        return frexpl(a, exp);
+    }
+}
+
+
+double reference_atan(double x)
+{
+    return atan( x );
+}
+
+long double reference_atanl(long double x)
+{
+    return atanl( x );
+}
+
+long double reference_asinl(long double x)
+{
+    return asinl( x );
+}
+
+double reference_asin(double x)
+{
+    return asin( x );
+}
+
+double reference_fabs(double x)
+{
+    return fabs( x);
+}
+
+double reference_cosh(double x)
+{
+    return cosh( x );
+}
+
+long double reference_sqrtl(long double x)
+{
+#if defined( __SSE2__ ) || (defined( _MSC_VER ) && (defined(_M_IX86) || defined(_M_X64)))
+    __m128d result128 = _mm_set_sd((double) x);
+    result128 = _mm_sqrt_sd(result128, result128);
+    return _mm_cvtsd_f64(result128);
+#else
+    volatile double dx = x;
+    return sqrt( dx );
+#endif
+}
+
+long double reference_tanhl(long double x)
+{
+    return tanhl( x );
+}
+
+long double reference_floorl(long double x)
+{
+    if( x == 0.0 || reference_isinfl(x) || reference_isnanl(x) )
+        return x;
+
+    long double absx = reference_fabsl(x);
+    if( absx >= HEX_LDBL( +, 1, 0, +, 52 ) )
+        return x;
+
+    if( absx < 1.0 )
+    {
+        if( x < 0.0 )
+            return -1.0;
+        else
+            return 0.0;
+    }
+
+    long double r = (long double) ((cl_long) x);
+
+    if( x < 0.0 && r > x )
+        r -= 1.0;
+
+    return r;
+}
+
+
+double reference_tanh(double x)
+{
+    return tanh( x );
+}
+
+long double reference_assignmentl( long double x ){ return x; }
+
+int reference_notl( long double x )
+{
+    int r = !x;
+    return r;
+}
+
+
diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h
new file mode 100644
index 00000000..bcd0df8e
--- /dev/null
+++ b/test_conformance/math_brute_force/reference_math.h
@@ -0,0 +1,232 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef REFERENCE_MATH_H
+#define REFERENCE_MATH_H
+
+#if defined( __APPLE__ )
+    #include <OpenCL/opencl.h>
+#else
+    #include <CL/cl.h>
+#endif
+
+// --  for testing float --
+double reference_sinh( double x );
+double reference_sqrt( double x );
+double reference_tanh( double x );
+double reference_acos( double );
+double reference_asin( double );
+double reference_atan( double );
+double reference_atan2( double, double );
+double reference_ceil( double );
+double reference_cosh( double );
+double reference_exp( double );
+double reference_fabs( double );
+double reference_acospi( double );
+double reference_asinpi( double );
+double reference_atanpi( double );
+double reference_atan2pi( double, double );
+double reference_cospi( double );
+double reference_divide( double, double );
+double reference_fract( double, double * );
+float  reference_fma( float, float, float, int );
+double reference_mad( double, double, double );
+double reference_nextafter(double, double );
+double reference_recip( double );
+double reference_rootn( double, int );
+double reference_rsqrt( double );
+double reference_sincos( double, double * );
+double reference_sinpi( double );
+double reference_tanpi( double );
+double reference_pow(double x, double y);
+double reference_pown( double, int );
+double reference_powr( double, double );
+double reference_cos( double );
+double reference_sin( double );
+double reference_tan( double );
+double reference_log( double );
+double reference_log10( double );
+double reference_modf( double, double *n );
+
+double reference_fdim( double, double );
+double reference_add( double, double );
+double reference_subtract( double, double );
+double reference_divide( double, double );
+double reference_multiply( double, double );
+double reference_remquo( double, double, int* );
+double reference_lgamma_r( double, int* );
+
+int reference_isequal( double, double );
+int reference_isfinite( double );
+int reference_isgreater( double, double );
+int reference_isgreaterequal( double, double );
+int reference_isinf( double );
+int reference_isless( double, double );
+int reference_islessequal( double, double );
+int reference_islessgreater( double, double );
+int reference_isnan( double );
+int reference_isnormal( double );
+int reference_isnotequal( double, double );
+int reference_isordered( double, double );
+int reference_isunordered( double, double );
+int reference_signbit( float );
+
+double reference_acosh( double x );
+double reference_asinh( double x );
+double reference_atanh( double x );
+double reference_cbrt(double x);
+float reference_copysign( float x, float y);
+double reference_copysignd( double x, double y);
+double reference_exp10( double );
+double reference_exp2( double x );
+double reference_expm1( double x );
+double reference_fmax( double x, double y );
+double reference_fmin( double x, double y );
+double reference_hypot( double x, double y );
+double reference_lgamma( double x);
+int    reference_ilogb( double );
+double reference_log2( double x );
+double reference_log1p( double x );
+double reference_logb( double x );
+double reference_maxmag( double x, double y );
+double reference_minmag( double x, double y );
+double reference_nan( cl_uint x );
+double reference_reciprocal( double x );
+double reference_remainder( double x, double y );
+double reference_rint( double x );
+double reference_round( double x );
+double reference_trunc( double x );
+double reference_floor( double x );
+double reference_fmod( double x, double y );
+double reference_frexp( double x, int *n );
+double reference_ldexp( double x, int n );
+
+double reference_assignment( double x );
+int    reference_not( double x );
+// -- for testing fast-relaxed
+
+double reference_relaxed_mad( double, double, double );
+double reference_relaxed_divide( double x, double y );
+double reference_relaxed_sin( double x );
+double reference_relaxed_cos( double x );
+double reference_relaxed_sincos( double x, double * y);
+double reference_relaxed_tan( double x );
+double reference_relaxed_exp( double x );
+double reference_relaxed_exp2( double x );
+double reference_relaxed_exp10( double x );
+double reference_relaxed_log( double x );
+double reference_relaxed_log2( double x );
+double reference_relaxed_pow( double x, double y);
+double reference_relaxed_reciprocal( double x );
+
+// -- for testing double --
+
+long double reference_sinhl( long double x );
+long double reference_sqrtl( long double x );
+long double reference_tanhl( long double x );
+long double reference_acosl( long double );
+long double reference_asinl( long double );
+long double reference_atanl( long double );
+long double reference_atan2l( long double, long double );
+long double reference_ceill( long double );
+long double reference_coshl( long double );
+long double reference_expl( long double );
+long double reference_fabsl( long double );
+long double reference_acospil( long double );
+long double reference_asinpil( long double );
+long double reference_atanpil( long double );
+long double reference_atan2pil( long double, long double );
+long double reference_cospil( long double );
+long double reference_dividel( long double, long double );
+long double reference_fractl( long double, long double * );
+long double reference_fmal( long double, long double, long double );
+long double reference_madl( long double, long double, long double );
+long double reference_nextafterl(long double, long double );
+long double reference_recipl( long double );
+long double reference_rootnl( long double, int );
+long double reference_rsqrtl( long double );
+long double reference_sincosl( long double, long double * );
+long double reference_sinpil( long double );
+long double reference_tanpil( long double );
+long double reference_powl(long double x, long double y);
+long double reference_pownl( long double, int );
+long double reference_powrl( long double, long double );
+long double reference_cosl( long double );
+long double reference_sinl(long double );
+long double reference_tanl( long double );
+long double reference_logl( long double );
+long double reference_log10l( long double );
+long double reference_modfl( long double, long double *n );
+
+
+long double reference_fdiml( long double, long double );
+long double reference_addl( long double, long double );
+long double reference_subtractl( long double, long double );
+long double reference_dividel( long double, long double );
+long double reference_multiplyl( long double, long double );
+long double reference_remquol( long double, long double, int* );
+long double reference_lgamma_rl( long double, int* );
+
+
+int reference_isequall( long double, long double );
+int reference_isfinitel( long double );
+int reference_isgreaterl( long double, long double );
+int reference_isgreaterequall( long double, long double );
+int reference_isinfl( long double );
+int reference_islessl( long double, long double );
+int reference_islessequall( long double, long double );
+int reference_islessgreaterl( long double, long double );
+int reference_isnanl( long double );
+int reference_isnormall( long double );
+int reference_isnotequall( long double, long double );
+int reference_isorderedl( long double, long double );
+int reference_isunorderedl( long double, long double );
+int reference_signbitl( long double );
+
+long double reference_acoshl( long double x );
+long double reference_asinhl( long double x );
+long double reference_atanhl( long double x );
+long double reference_cbrtl(long double x);
+long double reference_copysignl( long double x, long double y);
+long double reference_exp10l( long double );
+long double reference_exp2l( long double x );
+long double reference_expm1l( long double x );
+long double reference_fmaxl( long double x, long double y );
+long double reference_fminl( long double x, long double y );
+long double reference_hypotl( long double x, long double y );
+long double reference_lgammal( long double x);
+int    reference_ilogbl( long double );
+long double reference_log2l( long double x );
+long double reference_log1pl( long double x );
+long double reference_logbl( long double x );
+long double reference_maxmagl( long double x, long double y );
+long double reference_minmagl( long double x, long double y );
+long double reference_nanl( cl_ulong x );
+long double reference_reciprocall( long double x );
+long double reference_remainderl( long double x, long double y );
+long double reference_rintl( long double x );
+long double reference_roundl( long double x );
+long double reference_truncl( long double x );
+long double reference_floorl( long double x );
+long double reference_fmodl( long double x, long double y );
+long double reference_frexpl( long double x, int *n );
+long double reference_ldexpl( long double x, int n );
+
+long double reference_assignmentl( long double x );
+int reference_notl( long double x );
+
+#endif
+
+
diff --git a/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py b/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
new file mode 100644
index 00000000..51c49ad2
--- /dev/null
+++ b/test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
@@ -0,0 +1,110 @@
+#! /usr/bin/python
+
+#  //  OpenCL Conformance Tests
+#  // 
+#  //  Copyright:	(c) 2009-2013 by Apple Inc. All Rights Reserved.
+#  //
+
+import os, re, sys, subprocess, time
+
+# A script to run the entierty of math_brute_force, but to run each separate job in parallel.
+
+def DEBUG(text, level=1):
+ if (DEBUG_LEVEL >= level): print(text)
+
+def write_info(text):
+ print text,
+ if (ATF):
+  ATF_log.write("<Info>"+text+"</Info>\n")
+  ATF_log.flush()
+  
+def write_error(text):
+ print "ERROR:" + text,
+ if (ATF):
+  ATF_log.write("<Error>"+text+"</Error>\n")
+  ATF_log.flush()
+  
+def start_atf():
+ global ATF, ATF_log
+ DEBUG("start_atf()")
+ if (os.environ.get("ATF_RESULTSDIRECTORY") == None):
+  ATF = False
+  DEBUG("\tATF not defined",0)
+  return
+ ATF = True
+ ATF_output_file_name = "TestLog.xml"
+ output_path = os.environ.get("ATF_RESULTSDIRECTORY")
+ try:
+	ATF_log = open(output_path+ATF_output_file_name, "w")
+ except IOError:
+  DEBUG("Could not open ATF file " + ATF_output_file_name, 0)
+  ATF = False
+  return
+ DEBUG("ATF Enabled")
+ # Generate the XML header
+ ATF_log.write("<Log>\n")
+ ATF_log.write("<TestStart/>\n")
+ DEBUG("Done start_atf()")
+
+def stop_atf():
+ DEBUG("stop_atf()")
+ if (ATF):
+  ATF.write("<TestFinish/>\n")
+  ATF.write("</Log>\n")
+  ATF.close()
+
+def get_time() :
+ return time.strftime("%A %H:%M:%S", time.localtime())
+
+def start_test(id):
+ DEBUG("start_test("+str(id) + ")")
+ command = test + " " + str(id) + " " + str(id)
+ try:
+  write_info(get_time() + " Executing " + command + "...")
+  p = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+ except OSError:
+  write_error("Failed to execute " + command)
+  return
+ running_tests[id] = p
+ DEBUG("start_test("+str(id) + ") added: " + str(running_tests[id]) + \
+ ", now " + str(len(running_tests.keys())) + " tests running")
+
+
+
+
+DEBUG_LEVEL = 2
+test = "./bruteforce -w"
+instances = 4
+max_test_ID = 12
+running_tests = {}
+ATF_log = None
+ATF = False
+
+# Start the ATF log
+start_atf()
+next_test = 0
+next_test_to_finish = 0
+
+while ( (next_test <= max_test_ID) | (next_test_to_finish <= max_test_ID)):
+ # If we want to run more tests, start them
+ while ((len(running_tests.keys()) < instances) & (next_test <= max_test_ID)):
+  start_test(next_test)
+  next_test = next_test + 1
+  time.sleep(1)
+ # Check if the oldest test has finished
+ p = running_tests[next_test_to_finish]
+ if (p.poll() != None):
+  write_info(get_time() + " Test " + str(next_test_to_finish) +" finished.")
+  del running_tests[next_test_to_finish]
+  next_test_to_finish = next_test_to_finish + 1
+  # Write the results from the test out
+  for line in p.stdout.readlines():
+   write_info(line)
+  for line in p.stderr.readlines():
+   write_error(line)
+   
+ time.sleep(1)
+ 
+
+# Stop the ATF log
+stop_atf()
diff --git a/test_conformance/math_brute_force/ternary.c b/test_conformance/math_brute_force/ternary.c
new file mode 100644
index 00000000..a90271d9
--- /dev/null
+++ b/test_conformance/math_brute_force/ternary.c
@@ -0,0 +1,1361 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+#define CORRECTLY_ROUNDED 0
+#define FLUSHED 1
+
+int TestFunc_Float_Float_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double_Double_Double(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+extern "C"
+#endif
+const vtbl _ternary = { "ternary", TestFunc_Float_Float_Float_Float, TestFunc_Double_Double_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2,  __global float", sizeNames[vectorSize], "* in3 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2 , __global float* in3)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       float3 f0 = vload3( 0, in + 3 * i );\n"
+        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
+        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
+        "       f0 = ", name, "( f0, f1, f2 );\n"
+        "       vstore3( f0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       float3 f0, f1, f2;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
+        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       f0 = ", name, "( f0, f1, f2 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = f0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = f0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2,  __global double", sizeNames[vectorSize], "* in3 )\n"
+        "{\n"
+        "   int i = get_global_id(0);\n"
+        "   out[i] = ", name, "( in1[i], in2[i], in3[i] );\n"
+        "}\n"
+    };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2 , __global double* in3)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   if( i + 1 < get_global_size(0) )\n"
+        "   {\n"
+        "       double3 d0 = vload3( 0, in + 3 * i );\n"
+        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
+        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
+        "       d0 = ", name, "( d0, d1, d2 );\n"
+        "       vstore3( d0, 0, out + 3*i );\n"
+        "   }\n"
+        "   else\n"
+        "   {\n"
+        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+        "       double3 d0, d1, d2;\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 1:\n"
+        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
+        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
+        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
+        "               break;\n"
+        "           case 0:\n"
+        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
+        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
+        "               break;\n"
+        "       }\n"
+        "       d0 = ", name, "( d0, d1, d2 );\n"
+        "       switch( parity )\n"
+        "       {\n"
+        "           case 0:\n"
+        "               out[3*i+1] = d0.y; \n"
+        "               // fall through\n"
+        "           case 1:\n"
+        "               out[3*i] = d0.x; \n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+
+// A table of more difficult cases to get right
+static const float specialValuesFloat[] = {
+    -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),  MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+    -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.75f, -1.5f, -1.25f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), MAKE_HEX_FLOAT(-0x1.003p0f, -0x1003000L, -24), -MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
+    MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
+
+    +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+    +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.75f, 1.5f, 1.25f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), MAKE_HEX_FLOAT(0x1.003p0f, 0x1003000L, -24), +MAKE_HEX_FLOAT(0x1.001p0f, 0x1001000L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+    MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
+    MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
+
+static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
+
+
+int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    uint64_t step = bufferSize / sizeof( float );
+    int skipNanInf = (0 == strcmp( "fma", f->nameInCode )) && ! gInfNanSupport;
+    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
+    float float_ulps;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+    /*
+     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+     if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+     return error;
+     */
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        j = 0;
+        if( i == 0 )
+        { // test edge cases
+            float *fp = (float *)gIn;
+            float *fp2 = (float *)gIn2;
+            float *fp3 = (float *)gIn3;
+            uint32_t x, y, z;  x = y = z = 0;
+            for( ; j < bufferSize / sizeof( float ); j++ )
+            {
+                fp[j] = specialValuesFloat[x];
+                fp2[j] = specialValuesFloat[y];
+                fp3[j] = specialValuesFloat[z];
+
+                if( ++x >= specialValuesFloatCount )
+                {
+                    x = 0;
+                    if( ++y >= specialValuesFloatCount )
+                    {
+                        y = 0;
+                        if( ++z >= specialValuesFloatCount )
+                            break;
+                    }
+                }
+            }
+            if( j == bufferSize / sizeof( float ) )
+                vlog_error( "Test Error: not all special cases tested!\n" );
+        }
+
+        for( ; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *s = (float *)gIn;
+        float *s2 = (float *)gIn2;
+        float *s3 = (float *)gIn3;
+        if( skipNanInf )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                feclearexcept(FE_OVERFLOW);
+                r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+                overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                r[j] = (float) f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+        }
+
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float err;
+                    int fail;
+                    float test = ((float*) q)[j];
+                    float correct = f->func.f_fma( s[j], s2[j], s3[j], CORRECTLY_ROUNDED );
+
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    if( skipNanInf )
+                    {
+                        if( overflow[j]                                         ||
+                           IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                           IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        ||
+                           IsFloatInfinity(s2[j])   || IsFloatNaN(s2[j])       ||
+                           IsFloatInfinity(s3[j])   || IsFloatNaN(s3[j])       )
+                            continue;
+                    }
+
+
+                    err = Ulp_Error( test, correct );
+                    fail = ! (fabsf(err) <= float_ulps);
+
+                    if( fail && ftz )
+                    {
+                        float correct2, err2;
+
+                        // retry per section 6.5.3.2  with flushing on
+                        if( 0.0f == test && 0.0f == f->func.f_fma( s[j], s2[j], s3[j], FLUSHED ) )
+                        {
+                            fail = 0;
+                            err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsFloatSubnormal( s[j] ) )
+                        { // look at me,
+                            float err3, correct3;
+
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+
+                            correct2 = f->func.f_fma( 0.0f, s2[j], s3[j], CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( -0.0f, s2[j], s3[j], CORRECTLY_ROUNDED );
+
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( 0.0f == test &&
+                                ( 0.0f == f->func.f_fma(  0.0f, s2[j], s3[j], FLUSHED )  ||
+                                  0.0f == f->func.f_fma( -0.0f, s2[j], s3[j], FLUSHED ) )
+                              )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsFloatSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                double correct4, correct5;
+                                float err4, err5;
+
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+
+                                correct2 = f->func.f_fma( 0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( -0.0f, 0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( 0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( -0.0f, -0.0f, s3[j], CORRECTLY_ROUNDED );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( 0.0f == test &&
+                                    ( 0.0f == f->func.f_fma(  0.0f,  0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma( -0.0f,  0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma(  0.0f, -0.0f, s3[j], FLUSHED )  ||
+                                      0.0f == f->func.f_fma( -0.0f, -0.0f, s3[j], FLUSHED )  )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+
+                                if( IsFloatSubnormal( s3[j] )  )
+                                {
+                                    if( test == 0.0f )  // 0*0+0 is 0
+                                    {
+                                        fail = 0;
+                                        err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                double correct4, correct5;
+                                float err4, err5;
+
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+
+                                correct2 = f->func.f_fma( 0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( -0.0f, s2[j], 0.0f, CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( 0.0f,  s2[j], -0.0f, CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( -0.0f, s2[j], -0.0f, CORRECTLY_ROUNDED );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( 0.0f == test &&
+                                    (   0.0f == f->func.f_fma( 0.0f, s2[j], 0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma(-0.0f, s2[j], 0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma( 0.0f, s2[j],-0.0f, FLUSHED )   ||
+                                        0.0f == f->func.f_fma(-0.0f, s2[j],-0.0f, FLUSHED )   )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal( s2[j] ) )
+                        {
+                            double correct2, correct3;
+                            float err2, err3;
+
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+
+                            correct2 = f->func.f_fma( s[j], 0.0f, s3[j], CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( s[j], -0.0f, s3[j], CORRECTLY_ROUNDED );
+
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( 0.0f == test &&
+                                (   0.0f == f->func.f_fma( s[j], 0.0f, s3[j], FLUSHED )  ||
+                                    0.0f == f->func.f_fma( s[j], -0.0f, s3[j], FLUSHED ) )
+                            )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsFloatSubnormal( s3[j] ) )
+                            {
+                                double correct4, correct5;
+                                float err4, err5;
+
+                                if( skipNanInf )
+                                    feclearexcept( FE_OVERFLOW );
+
+                                correct2 = f->func.f_fma( s[j], 0.0f, 0.0f, CORRECTLY_ROUNDED );
+                                correct3 = f->func.f_fma( s[j], -0.0f, 0.0f, CORRECTLY_ROUNDED );
+                                correct4 = f->func.f_fma( s[j], 0.0f, -0.0f, CORRECTLY_ROUNDED );
+                                correct5 = f->func.f_fma( s[j], -0.0f, -0.0f, CORRECTLY_ROUNDED );
+
+                                // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                                if( !gInfNanSupport )
+                                {
+                                    if( fetestexcept(FE_OVERFLOW) )
+                                        continue;
+
+                                    // Note: no double rounding here.  Reference functions calculate in single precision.
+                                    if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                       IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
+                                       IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
+                                       IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
+                                        continue;
+                                }
+
+                                err2 = Ulp_Error( test, correct2  );
+                                err3 = Ulp_Error( test, correct3  );
+                                err4 = Ulp_Error( test, correct4  );
+                                err5 = Ulp_Error( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)) &&
+                                                 (!(fabsf(err4) <= float_ulps)) && (!(fabsf(err5) <= float_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( 0.0f == test &&
+                                    (   0.0f == f->func.f_fma( s[j], 0.0f, 0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j],-0.0f, 0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j], 0.0f,-0.0f, FLUSHED )    ||
+                                        0.0f == f->func.f_fma( s[j],-0.0f,-0.0f, FLUSHED )    )
+                                )
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsFloatSubnormal(s3[j]) )
+                        {
+                            double correct2, correct3;
+                            float err2, err3;
+
+                            if( skipNanInf )
+                                feclearexcept( FE_OVERFLOW );
+
+                            correct2 = f->func.f_fma( s[j], s2[j], 0.0f, CORRECTLY_ROUNDED );
+                            correct3 = f->func.f_fma( s[j], s2[j], -0.0f, CORRECTLY_ROUNDED );
+
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept( FE_OVERFLOW ) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
+                                   IsFloatInfinity(correct3) || IsFloatNaN(correct3)   )
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error( test, correct2  );
+                            err3 = Ulp_Error( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= float_ulps)) && (!(fabsf(err3) <= float_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( 0.0f == test &&
+                                (   0.0f == f->func.f_fma( s[j], s2[j], 0.0f, FLUSHED ) ||
+                                    0.0f == f->func.f_fma( s[j], s2[j],-0.0f, FLUSHED )  )
+                            )
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: %f ulp error at {%a, %a, %a} ({0x%8.8x, 0x%8.8x, 0x%8.8x}): *%a vs. %a\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((cl_uint*)s)[j], ((cl_uint*)s2)[j], ((cl_uint*)s3)[j],  ((float*) gOut_Ref)[j], test );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,  bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        uint32_t *p2 = (uint32_t *)gIn2;
+        uint32_t *p3 = (uint32_t *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            p[j] = genrand_int32(d);
+            p2[j] = genrand_int32(d);
+            p3[j] = genrand_int32(d);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_float ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+// A table of more difficult cases to get right
+static const double specialValuesDouble[] = {
+    -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+    -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
+
+    +NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),  MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+    +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
+    MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
+    MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
+};
+
+static const size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
+
+
+int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    double maxErrorVal3 = 0.0f;
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( double );
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info ) ))
+    {
+        return error;
+    }
+    /*
+     for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+     if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+     return error;
+     */
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        j = 0;
+        if( i == 0 )
+        { // test edge cases
+            uint32_t x, y, z;  x = y = z = 0;
+            for( ; j < bufferSize / sizeof( double ); j++ )
+            {
+                p[j] = specialValuesDouble[x];
+                p2[j] = specialValuesDouble[y];
+                p3[j] = specialValuesDouble[z];
+                if( ++x >= specialValuesDoubleCount )
+                {
+                    x = 0;
+                    if( ++y >= specialValuesDoubleCount )
+                    {
+                        y = 0;
+                        if( ++z >= specialValuesDoubleCount )
+                            break;
+                    }
+                }
+            }
+            if( j == bufferSize / sizeof( double ) )
+                vlog_error( "Test Error: not all special cases tested!\n" );
+        }
+
+        for( ; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *s = (double *)gIn;
+        double *s2 = (double *)gIn2;
+        double *s3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fff( s[j], s2[j], s3[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_fff( s[j], s2[j], s3[j] );
+                    float err = Ulp_Error_Double( test, correct );
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+
+                    if( fail && ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleSubnormal(correct) )
+                        { // look at me,
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( fail && IsDoubleSubnormal( s[j] ) )
+                        { // look at me,
+                            long double correct2 = f->dfunc.f_fff( 0.0, s2[j], s3[j] );
+                            long double correct3 = f->dfunc.f_fff( -0.0, s2[j], s3[j] );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            { // look at me now,
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with first two args as zero
+                            if( IsDoubleSubnormal( s2[j] ) )
+                            { // its fun to have fun,
+                                correct2 = f->dfunc.f_fff( 0.0, 0.0, s3[j] );
+                                correct3 = f->dfunc.f_fff( -0.0, 0.0, s3[j] );
+                                long double correct4 = f->dfunc.f_fff( 0.0, -0.0, s3[j] );
+                                long double correct5 = f->dfunc.f_fff( -0.0, -0.0, s3[j] );
+                                err2 = Ulp_Error_Double( test, correct2  );
+                                err3 = Ulp_Error_Double( test, correct3  );
+                                float err4 = Ulp_Error_Double( test, correct4  );
+                                float err5 = Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+
+                                if( IsDoubleSubnormal( s3[j] )  )
+                                { // but you have to know how!
+                                    correct2 = f->dfunc.f_fff( 0.0, 0.0, 0.0f );
+                                    correct3 = f->dfunc.f_fff( -0.0, 0.0, 0.0f );
+                                    correct4 = f->dfunc.f_fff( 0.0, -0.0, 0.0f );
+                                    correct5 = f->dfunc.f_fff( -0.0, -0.0, 0.0f );
+                                    long double correct6 = f->dfunc.f_fff( 0.0, 0.0, -0.0f );
+                                    long double correct7 = f->dfunc.f_fff( -0.0, 0.0, -0.0f );
+                                    long double correct8 = f->dfunc.f_fff( 0.0, -0.0, -0.0f );
+                                    long double correct9 = f->dfunc.f_fff( -0.0, -0.0, -0.0f );
+                                    err2 = Ulp_Error_Double( test, correct2  );
+                                    err3 = Ulp_Error_Double( test, correct3  );
+                                    err4 = Ulp_Error_Double( test, correct4  );
+                                    err5 = Ulp_Error_Double( test, correct5  );
+                                    float err6 = Ulp_Error_Double( test, correct6  );
+                                    float err7 = Ulp_Error_Double( test, correct7  );
+                                    float err8 = Ulp_Error_Double( test, correct8  );
+                                    float err9 = Ulp_Error_Double( test, correct9  );
+                                    fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                     (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)) &&
+                                                     (!(fabsf(err5) <= f->double_ulps)) && (!(fabsf(err6) <= f->double_ulps)) &&
+                                                     (!(fabsf(err7) <= f->double_ulps)) && (!(fabsf(err8) <= f->double_ulps)));
+                                    if( fabsf( err2 ) < fabsf(err ) )
+                                        err = err2;
+                                    if( fabsf( err3 ) < fabsf(err ) )
+                                        err = err3;
+                                    if( fabsf( err4 ) < fabsf(err ) )
+                                        err = err4;
+                                    if( fabsf( err5 ) < fabsf(err ) )
+                                        err = err5;
+                                    if( fabsf( err6 ) < fabsf(err ) )
+                                        err = err6;
+                                    if( fabsf( err7 ) < fabsf(err ) )
+                                        err = err7;
+                                    if( fabsf( err8 ) < fabsf(err ) )
+                                        err = err8;
+                                    if( fabsf( err9 ) < fabsf(err ) )
+                                        err = err9;
+
+                                    // retry per section 6.5.3.4
+                                    if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct6, f->double_ulps ) || IsDoubleResultSubnormal( correct7, f->double_ulps )  ||
+                                       IsDoubleResultSubnormal( correct8, f->double_ulps ) || IsDoubleResultSubnormal( correct9, f->double_ulps )  )
+                                    {
+                                        fail = fail && ( test != 0.0f);
+                                        if( ! fail )
+                                            err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( 0.0, s2[j], 0.0 );
+                                correct3 = f->dfunc.f_fff( -0.0, s2[j], 0.0 );
+                                long double correct4 = f->dfunc.f_fff( 0.0,  s2[j], -0.0 );
+                                long double correct5 = f->dfunc.f_fff( -0.0, s2[j], -0.0 );
+                                err2 = Ulp_Error_Double( test, correct2  );
+                                err3 = Ulp_Error_Double( test, correct3  );
+                                float err4 = Ulp_Error_Double( test, correct4  );
+                                float err5 = Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )  ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal( s2[j] ) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], 0.0, s3[j] );
+                            long double correct3 = f->dfunc.f_fff( s[j], -0.0, s3[j] );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps )  || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+
+                            //try with second two args as zero
+                            if( IsDoubleSubnormal( s3[j] ) )
+                            {
+                                correct2 = f->dfunc.f_fff( s[j], 0.0, 0.0 );
+                                correct3 = f->dfunc.f_fff( s[j], -0.0, 0.0 );
+                                long double correct4 = f->dfunc.f_fff( s[j], 0.0, -0.0 );
+                                long double correct5 = f->dfunc.f_fff( s[j], -0.0, -0.0 );
+                                err2 = Ulp_Error_Double( test, correct2  );
+                                err3 = Ulp_Error_Double( test, correct3  );
+                                float err4 = Ulp_Error_Double( test, correct4  );
+                                float err5 = Ulp_Error_Double( test, correct5  );
+                                fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)) &&
+                                                 (!(fabsf(err4) <= f->double_ulps)) && (!(fabsf(err5) <= f->double_ulps)));
+                                if( fabsf( err2 ) < fabsf(err ) )
+                                    err = err2;
+                                if( fabsf( err3 ) < fabsf(err ) )
+                                    err = err3;
+                                if( fabsf( err4 ) < fabsf(err ) )
+                                    err = err4;
+                                if( fabsf( err5 ) < fabsf(err ) )
+                                    err = err5;
+
+                                // retry per section 6.5.3.4
+                                if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) ||
+                                   IsDoubleResultSubnormal( correct4, f->double_ulps ) || IsDoubleResultSubnormal( correct5, f->double_ulps ) )
+                                {
+                                    fail = fail && ( test != 0.0f);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                        }
+                        else if( fail && IsDoubleSubnormal(s3[j]) )
+                        {
+                            long double correct2 = f->dfunc.f_fff( s[j], s2[j], 0.0 );
+                            long double correct3 = f->dfunc.f_fff( s[j], s2[j], -0.0 );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= f->double_ulps)) && (!(fabsf(err3) <= f->double_ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                        maxErrorVal2 = s2[j];
+                        maxErrorVal3 = s3[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: %f ulp error at {%.13la, %.13la, %.13la}: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, s[j], s2[j], s3[j], ((double*) gOut_Ref)[j], test );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        double *p2 = (double *)gIn2;
+        double *p3 = (double *)gIn3;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+            p2[j] = DoubleFromUInt32(genrand_int32(d));
+            p3[j] = DoubleFromUInt32(genrand_int32(d));
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0, bufferSize, gIn2, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
+            return error;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0, bufferSize, gIn3, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeof( cl_double ) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;    // bufferSize / vectorSize  rounded up
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 3, sizeof( gInBuffer3 ), &gInBuffer3 ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2, maxErrorVal3 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
diff --git a/test_conformance/math_brute_force/unary.c b/test_conformance/math_brute_force/unary.c
new file mode 100644
index 00000000..d8b3dbae
--- /dev/null
+++ b/test_conformance/math_brute_force/unary.c
@@ -0,0 +1,1191 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+#if defined( __APPLE__ )
+    #include <sys/time.h>
+#endif
+
+int TestFunc_Float_Float(const Func *f, MTdata);
+int TestFunc_Double_Double(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+    extern "C"
+#endif
+const vtbl _unary = { "unary", TestFunc_Float_Float, TestFunc_Double_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       f0 = ", name, "( f0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {     "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       f0 = ", name, "( f0 );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       f0 = ", name, "( f0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_uint     kernel_count;
+    cl_kernel   **kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernel_count, info->kernels[i], info->programs + i );
+}
+
+//Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    cl_mem      inBuf;                              // input buffer for the thread
+    cl_mem      outBuf[ VECTOR_SIZE_COUNT ];        // output buffers for the thread
+    float       maxError;                           // max error value. Init to 0.
+    double      maxErrorValue;                      // position of the max error value.  Init to 0.
+    cl_command_queue tQueue;                        // per thread command queue to improve performance
+}ThreadInfo;
+
+typedef struct TestInfo
+{
+    size_t      subBufferSize;                      // Size of the sub-buffer in elements
+    const Func  *f;                                 // A pointer to the function info
+    cl_program  programs[ VECTOR_SIZE_COUNT ];      // programs for various vector sizes
+    cl_kernel   *k[VECTOR_SIZE_COUNT ];             // arrays of thread-specific kernels for each worker thread:  k[vector_size][thread_id]
+    ThreadInfo  *tinfo;                             // An array of thread specific information for each worker thread
+    cl_uint     threadCount;                        // Number of worker threads
+    cl_uint     step;                               // step between each chunk and the next.
+    cl_uint     scale;                              // stride between individual test values
+    float       ulps;                               // max_allowed ulps
+    int         ftz;                                // non-zero if running in flush to zero mode
+
+    int         isRangeLimited;                     // 1 if the function is only to be evaluated over a range
+    float       half_sin_cos_tan_limit;
+}TestInfo;
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
+
+int TestFunc_Float_Float(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+    int skipTestingRelaxed = ( gTestFastRelaxed && strcmp(f->name,"tan") == 0 );
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_float) * 2 * gWimpyReductionFactor;
+    }
+    test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_float), test_info.subBufferSize * sizeof( cl_float) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+
+    }
+
+    // Check for special cases for unary float
+    test_info.isRangeLimited = 0;
+    test_info.half_sin_cos_tan_limit = 0;
+    if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") )
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = 1.0f + test_info.ulps * (FLT_EPSILON/2.0f);             // out of range results from finite inputs must be in [-1,1]
+    }
+    else if( 0 == strcmp( f->name, "half_tan"))
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = INFINITY;             // out of range resut from finite inputs must be numeric
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+            goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting || skipTestingRelaxed)
+    {
+        error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+
+        if( skipTestingRelaxed )
+        {
+          vlog(" (rlx skip correctness testing)\n");
+          goto exit;
+        }
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                ((float*)p)[j] = (float) genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                p[j] = genrand_int32(d) & 0x7fffffff;
+        else
+            for( j = 0; j < BUFFER_SIZE / sizeof( float ); j++ )
+                p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError( test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double current_time = SubtractTime( endTime, startTime );
+                sum += current_time;
+                if( current_time < bestTime )
+                    bestTime = current_time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_float );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    float   ulps = job->ulps;
+    fptr    func = job->f->func;
+    const char * fname = job->f->name;
+    if ( gTestFastRelaxed  )
+    {
+        ulps = job->f->relaxed_error;
+        func = job->f->rfunc;
+    }
+
+    cl_uint j, k;
+    cl_int error;
+
+    int isRangeLimited = job->isRangeLimited;
+    float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
+    int ftz = job->ftz;
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_uint  *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_uint *p = (cl_uint*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+      p[j] = base + j * scale;
+      if( gTestFastRelaxed )
+      {
+        float p_j = *(float *) &p[j];
+        if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 )  //the domain of the function is [-pi,pi]
+        {
+          if( fabs(p_j) > M_PI )
+            p[j] = NAN;
+        }
+
+        if ( strcmp( fname, "reciprocal" ) == 0 )
+        {
+          if( fabs(p_j) > 0x7E800000 ) //the domain of the function is [2^-126,2^126]
+            p[j] = NAN;
+        }
+      }
+    }
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    float *r = (float *)gOut_Ref + thread_id * buffer_elements;
+    float *s = (float *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (float) func.f_f( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (uint32_t*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+    //Verify data
+    uint32_t *t = (uint32_t *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            uint32_t *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                float test = ((float*) q)[j];
+                double correct = func.f_f( s[j] );
+                float err = Ulp_Error( test, correct );
+                float abs_error = Abs_Error( test, correct );
+                int fail = 0;
+                int use_abs_error = 0;
+
+                // it is possible for the output to not match the reference result but for Ulp_Error
+                // to be zero, for example -1.#QNAN vs. 1.#QNAN. In such cases there is no failure
+                if (err == 0.0f)
+                {
+                    fail = 0;
+                }
+                else if( gTestFastRelaxed )
+                {
+                    if ( strcmp(fname,"sin")==0 || strcmp(fname,"cos")==0 )
+                    {
+                        fail = ! (fabsf(abs_error) <= ulps);
+                        use_abs_error = 1;
+                    }
+
+                    if ( strcmp(fname, "reciprocal") == 0 )
+                    {
+                        fail = ! (fabsf(err) <= ulps);
+                    }
+
+                    if ( strcmp(fname, "exp") == 0 || strcmp(fname, "exp2") == 0 )
+                    {
+
+                        float exp_error = 3+floor(fabs(2*s[j]));
+                        fail = ! (fabsf(err) <= exp_error);
+                        ulps = exp_error;
+                    }
+                    if (strcmp(fname, "tan") == 0) {
+
+                        if(  !gFastRelaxedDerived )
+                        {
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+                        // Else fast math derived implementation does not require ULP verification
+                    }
+                    if (strcmp(fname, "exp10") == 0)
+                    {
+                        if(  !gFastRelaxedDerived )
+                        {
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+                        // Else fast math derived implementation does not require ULP verification
+                    }
+                    if ( strcmp(fname,"log") == 0 || strcmp(fname,"log2") == 0 )
+                    {
+                        if( s[j] >= 0.5 && s[j] <= 2 )
+                        {
+                            fail = ! (fabsf(abs_error) <= ulps );
+                        }
+                        else
+                        {
+                            ulps = gIsEmbedded ? job->f->float_embedded_ulps : job->f->float_ulps;
+                            fail = ! (fabsf(err) <= ulps);
+                        }
+
+                    }
+
+
+                    // fast-relaxed implies finite-only
+                    if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                        IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        ) {
+                        fail = 0;
+                        err = 0;
+                    }
+                }
+                else
+                {
+                  fail = ! (fabsf(err) <= ulps);
+                }
+
+                // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY )
+                {
+                    if( fabsf( test ) <= half_sin_cos_tan_limit )
+                    {
+                        err = 0;
+                        fail = 0;
+                    }
+                }
+
+                if( fail )
+                {
+                    if( ftz )
+                    {
+                        typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold.
+                        CheckForSubnormal isFloatResultSubnormalPtr;
+
+                        if ( gTestFastRelaxed )
+                        {
+                          isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError;
+                        }
+                        else
+                        {
+                          isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
+                        }
+                        // retry per section 6.5.3.2
+                        if( (*isFloatResultSubnormalPtr)(correct, ulps) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            double correct2 = func.f_f( 0.0 );
+                            double correct3 = func.f_f( -0.0 );
+                            float err2;
+                            float err3;
+                            if( use_abs_error )
+                            {
+                              err2 = Abs_Error( test, correct2  );
+                              err3 = Abs_Error( test, correct3  );
+                            }
+                            else
+                            {
+                              err2 = Ulp_Error( test, correct2  );
+                              err3 = Ulp_Error( test, correct3  );
+                            }
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( (*isFloatResultSubnormalPtr)(correct2, ulps ) || (*isFloatResultSubnormalPtr)(correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at %a (0x%8.8x): *%a vs. %a\n", job->f->name, sizeNames[k], err, ((float*) s)[j], ((uint32_t*) s)[j], ((float*) t)[j], test);
+                    return -1;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+    return CL_SUCCESS;
+}
+
+
+
+static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
+{
+    const TestInfo *job = (const TestInfo *) data;
+    size_t  buffer_elements = job->subBufferSize;
+    size_t  buffer_size = buffer_elements * sizeof( cl_double );
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint) job->step;
+    ThreadInfo *tinfo = job->tinfo + thread_id;
+    float   ulps = job->ulps;
+    dptr    func = job->f->dfunc;
+    cl_uint j, k;
+    cl_int error;
+    int ftz = job->ftz;
+
+    Force64BitFPUPrecision();
+
+    // start the map of the output arrays
+    cl_event e[ VECTOR_SIZE_COUNT ];
+    cl_ulong *out[ VECTOR_SIZE_COUNT ];
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0, buffer_size, 0, NULL, e + j, &error);
+        if( error || NULL == out[j])
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush failed\n" );
+
+    // Write the new values to the input array
+    cl_double *p = (cl_double*) gIn + thread_id * buffer_elements;
+    for( j = 0; j < buffer_elements; j++ )
+        p[j] = DoubleFromUInt32( base + j * scale);
+
+    if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
+    {
+        vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
+        return error;
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        //Wait for the map to finish
+        if( (error = clWaitForEvents(1, e + j) ))
+        {
+            vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
+            return error;
+        }
+        if( (error = clReleaseEvent( e[j] ) ))
+        {
+            vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
+            return error;
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry over
+        uint32_t pattern = 0xffffdead;
+        memset_pattern4(out[j], &pattern, buffer_size);
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL) ))
+        {
+            vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
+            return error;
+        }
+
+        // run the kernel
+        size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id];  //each worker thread has its own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
+        if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
+
+        if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error( "FAILED -- could not execute kernel\n" );
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 2 failed\n" );
+
+    if( gSkipCorrectnessTesting )
+        return CL_SUCCESS;
+
+    //Calculate the correctly rounded reference result
+    cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
+    cl_double *s = (cl_double *)p;
+    for( j = 0; j < buffer_elements; j++ )
+        r[j] = (cl_double) func.f_f( s[j] );
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
+    for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
+    {
+        out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+        if( error || NULL == out[j] )
+        {
+            vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_ulong*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
+    if( error || NULL == out[j] )
+    {
+        vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
+        return error;
+    }
+
+
+    //Verify data
+    cl_ulong *t = (cl_ulong *)r;
+    for( j = 0; j < buffer_elements; j++ )
+    {
+        for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+        {
+            cl_ulong *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if( t[j] != q[j] )
+            {
+                cl_double test = ((cl_double*) q)[j];
+                long double correct = func.f_f( s[j] );
+                float err = Ulp_Error_Double( test, correct );
+                int fail = ! (fabsf(err) <= ulps);
+
+                if( fail )
+                {
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, ulps) )
+                        {
+                            fail = fail && ( test != 0.0f );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2 = func.f_f( 0.0L );
+                            long double correct3 = func.f_f( -0.0L );
+                            float err2 = Ulp_Error_Double( test, correct2  );
+                            float err3 = Ulp_Error_Double( test, correct3  );
+                            fail =  fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
+                            if( fabsf( err2 ) < fabsf(err ) )
+                                err = err2;
+                            if( fabsf( err3 ) < fabsf(err ) )
+                                err = err3;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal(correct2, ulps ) || IsDoubleResultSubnormal(correct3, ulps ) )
+                            {
+                                fail = fail && ( test != 0.0f);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if( fabsf(err ) > tinfo->maxError )
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                }
+                if( fail )
+                {
+                    vlog_error( "\nERROR: %s%s: %f ulp error at %.13la (0x%16.16llx): *%.13la vs. %.13la\n", job->f->name, sizeNames[k], err, ((cl_double*) gIn)[j], ((cl_ulong*) gIn)[j], ((cl_double*) gOut_Ref)[j], test );
+                    return -1;
+                }
+            }
+        }
+    }
+
+    for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+    {
+        if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
+        {
+            vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
+            return error;
+        }
+    }
+
+    if( (error = clFlush(tinfo->tQueue) ))
+        vlog( "clFlush 3 failed\n" );
+
+
+    if( 0 == ( base & 0x0fffffff) )
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10zd buf_elements:%10u ulps:%5.3f ThreadCount:%2u\n", base, job->step, buffer_elements, job->scale, job->ulps, job->threadCount);
+        } else
+        {
+            vlog("." );
+        }
+        fflush(stdout);
+    }
+
+    return CL_SUCCESS;
+}
+
+int TestFunc_Double_Double(const Func *f, MTdata d)
+{
+    TestInfo    test_info;
+    cl_int      error;
+    size_t      i, j;
+    float       maxError = 0.0f;
+    double      maxErrorVal = 0.0;
+#if defined( __APPLE__ )
+    struct timeval  time_val;
+    gettimeofday( &time_val, NULL );
+    double start_time = time_val.tv_sec + 1e-6 * time_val.tv_usec;
+    double end_time;
+#endif
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    // Init test_info
+    memset( &test_info, 0, sizeof( test_info ) );
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale =  1;
+    if (gWimpyMode)
+    {
+        test_info.subBufferSize = gWimpyBufferSize / (sizeof( cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+        test_info.scale =  (cl_uint) sizeof(cl_double) * 2 * gWimpyReductionFactor;
+    }
+   test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for every thread
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        size_t array_size = test_info.threadCount * sizeof( cl_kernel );
+        test_info.k[i] = (cl_kernel*)malloc( array_size );
+        if( NULL == test_info.k[i] )
+        {
+            vlog_error( "Error: Unable to allocate storage for kernels!\n" );
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset( test_info.k[i], 0, array_size );
+    }
+    test_info.tinfo = (ThreadInfo*)malloc( test_info.threadCount * sizeof(*test_info.tinfo) );
+    if( NULL == test_info.tinfo )
+    {
+        vlog_error( "Error: Unable to allocate storage for thread specific data.\n" );
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset( test_info.tinfo, 0, test_info.threadCount * sizeof(*test_info.tinfo) );
+    for( i = 0; i < test_info.threadCount; i++ )
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize * sizeof( cl_double), test_info.subBufferSize * sizeof( cl_double) };
+        test_info.tinfo[i].inBuf = clCreateSubBuffer( gInBuffer, CL_MEM_READ_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if( error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+            goto exit;
+        }
+
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            /* Qualcomm fix: 9461 read-write flags must be compatible with parent buffer */
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+            /* Qualcomm fix: end */
+            if( error || NULL == test_info.tinfo[i].outBuf[j] )
+            {
+                vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%zd, %zd}\n", region.origin, region.size );
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue = clCreateCommandQueueWithProperties(gContext, gDevice, 0, &error);
+        if( NULL == test_info.tinfo[i].tQueue || error )
+        {
+            vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
+        if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+           goto exit;
+    }
+
+    if( !gSkipCorrectnessTesting )
+    {
+        error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
+
+        // Accumulate the arithmetic errors
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            if( test_info.tinfo[i].maxError > maxError )
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+
+        if( error )
+            goto exit;
+
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+
+#if defined( __APPLE__ )
+    gettimeofday( &time_val, NULL);
+    end_time = time_val.tv_sec + 1e-6 * time_val.tv_usec;
+#endif
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+
+        if( strstr( f->name, "exp" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = (double)genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = fabs(DoubleFromUInt32( genrand_int32(d)));
+        else
+            for( j = 0; j < BUFFER_SIZE / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32( genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( i = 0; i < PERF_LOOP_COUNT; i++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, test_info.k[j][0], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double current_time = SubtractTime( endTime, startTime );
+                sum += current_time;
+                if( current_time < bestTime )
+                    bestTime = current_time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+
+#if defined( __APPLE__ )
+    vlog( "\t(%2.2f seconds)", end_time - start_time );
+#endif
+    vlog( "\n" );
+
+exit:
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if( test_info.k[i] )
+        {
+            for( j = 0; j < test_info.threadCount; j++ )
+                clReleaseKernel(test_info.k[i][j]);
+
+            free( test_info.k[i] );
+        }
+    }
+    if( test_info.tinfo )
+    {
+        for( i = 0; i < test_info.threadCount; i++ )
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free( test_info.tinfo );
+    }
+
+    return error;
+}
+
+
diff --git a/test_conformance/math_brute_force/unary_two_results.c b/test_conformance/math_brute_force/unary_two_results.c
new file mode 100644
index 00000000..a3fb307d
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results.c
@@ -0,0 +1,993 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float2_Float(const Func *f, MTdata);
+int TestFunc_Double2_Double(const Func *f, MTdata);
+
+#if defined(__cplusplus)
+    extern "C"
+#endif
+const vtbl _unary_two_results = { "unary_two_results", TestFunc_Float2_Float, TestFunc_Double2_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* out2, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 iout = NAN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       float3 iout = NAN;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* out2, __global double* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       double3 iout = NAN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       double3 iout = NAN;\n"
+                            "       double3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_Float2_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    uint32_t l;
+    int error;
+    char const * testing_mode;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError0 = 0.0f;
+    float maxError1 = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal0 = 0.0f;
+    float maxErrorVal1 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+    cl_uchar overflow[BUFFER_SIZE / sizeof( float )];
+    int isFract = 0 == strcmp( "fract", f->nameInCode );
+    int skipNanInf = isFract  && ! gInfNanSupport;
+    float float_ulps;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    if (gTestFastRelaxed)
+      float_ulps = f->relaxed_error;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+          for( j = 0; j < bufferSize / sizeof( float ); j++ )
+          {
+            p[j] = (uint32_t) i + j * scale;
+            if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 )
+            {
+              float pj = *(float *)&p[j];
+              if(fabs(pj) > M_PI)
+                p[j] = NAN;
+            }
+          }
+        }
+        else
+        {
+          for( j = 0; j < bufferSize / sizeof( float ); j++ )
+          {
+            p[j] = (uint32_t) i + j;
+            if ( gTestFastRelaxed && strcmp(f->name,"sincos") == 0 )
+            {
+              float pj = *(float *)&p[j];
+              if(fabs(pj) > M_PI)
+                p[j] = NAN;
+            }
+          }
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        FPU_mode_type oldMode;
+        RoundingMode oldRoundMode = kRoundToNearestEven;
+        if( isFract )
+        {
+            //Calculate the correctly rounded reference result
+            memset( &oldMode, 0, sizeof( oldMode ) );
+            if( ftz )
+                ForceFTZ( &oldMode );
+
+            // Set the rounding mode to match the device
+            if (gIsInRTZMode)
+                oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        }
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        float *r2 = (float *)gOut_Ref2;
+        float *s = (float *)gIn;
+
+        if( skipNanInf )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                double dd;
+                feclearexcept(FE_OVERFLOW);
+
+                if( gTestFastRelaxed )
+                    r[j] = (float) f->rfunc.f_fpf( s[j], &dd );
+                else
+                    r[j] = (float) f->func.f_fpf( s[j], &dd );
+
+                r2[j] = (float) dd;
+                overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            {
+                double dd;
+                if( gTestFastRelaxed )
+                  r[j] = (float) f->rfunc.f_fpf( s[j], &dd );
+                else
+                  r[j] = (float) f->func.f_fpf( s[j], &dd );
+
+                r2[j] = (float) dd;
+            }
+        }
+
+        if( isFract && ftz )
+            RestoreFPState( &oldMode );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+        {
+            if (isFract && gIsInRTZMode)
+                (void)set_round(oldRoundMode, kfloat);
+            break;
+        }
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        uint32_t *t2 = (uint32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)gOut[k];
+                uint32_t *q2 = (uint32_t *)gOut2[k];
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j]  )
+                {
+                    double correct, correct2;
+                    float err, err2;
+                    float test = ((float*) q)[j];
+                    float test2 = ((float*) q2)[j];
+
+                    if( gTestFastRelaxed )
+                      correct = f->rfunc.f_fpf( s[j], &correct2 );
+                    else
+                      correct = f->func.f_fpf( s[j], &correct2 );
+
+                    // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                    if (gTestFastRelaxed || skipNanInf)
+                    {
+                        if (skipNanInf && overflow[j])
+                            continue;
+
+                        // Note: no double rounding here.  Reference functions calculate in single precision.
+                        if( IsFloatInfinity(correct) || IsFloatNaN(correct)     ||
+                            IsFloatInfinity(correct2)|| IsFloatNaN(correct2)    ||
+                            IsFloatInfinity(s[j])    || IsFloatNaN(s[j])        )
+                            continue;
+                    }
+
+                    typedef int (*CheckForSubnormal) (double,float); // If we are in fast relaxed math, we have a different calculation for the subnormal threshold.
+                    CheckForSubnormal isFloatResultSubnormalPtr;
+                    if( gTestFastRelaxed )
+                    {
+                      err = Abs_Error( test, correct);
+                      err2 = Abs_Error( test2, correct2);
+                      isFloatResultSubnormalPtr = &IsFloatResultSubnormalAbsError;
+                    }
+                    else
+                    {
+                        err = Ulp_Error( test, correct );
+                        err2 = Ulp_Error( test2, correct2 );
+                        isFloatResultSubnormalPtr = &IsFloatResultSubnormal;
+                    }
+                    int fail = ! (fabsf(err) <= float_ulps && fabsf(err2) <= float_ulps);
+
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( (*isFloatResultSubnormalPtr)(correct, float_ulps) )
+                        {
+                            if( (*isFloatResultSubnormalPtr) (correct2, float_ulps ))
+                            {
+                                fail = fail && ! ( test == 0.0f && test2 == 0.0f );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    err2 = 0.0f;
+                                }
+                            }
+                            else
+                            {
+                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= float_ulps);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                        else if( (*isFloatResultSubnormalPtr)(correct2, float_ulps ) )
+                        {
+                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= float_ulps);
+                            if( ! fail )
+                                err2 = 0.0f;
+                        }
+
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            double correctp, correctn;
+                            double correct2p, correct2n;
+                            float errp, err2p, errn, err2n;
+
+                            if( skipNanInf )
+                                feclearexcept(FE_OVERFLOW);
+                            if ( gTestFastRelaxed )
+                            {
+                              correctp = f->rfunc.f_fpf( 0.0, &correct2p );
+                              correctn = f->rfunc.f_fpf( -0.0, &correct2n );
+                            }
+                            else
+                            {
+                              correctp = f->func.f_fpf( 0.0, &correct2p );
+                              correctn = f->func.f_fpf( -0.0, &correct2n );
+                            }
+
+                            // Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
+                            if( skipNanInf )
+                            {
+                                if( fetestexcept(FE_OVERFLOW) )
+                                    continue;
+
+                                // Note: no double rounding here.  Reference functions calculate in single precision.
+                                if( IsFloatInfinity(correctp) || IsFloatNaN(correctp)   ||
+                                    IsFloatInfinity(correctn) || IsFloatNaN(correctn)   ||
+                                    IsFloatInfinity(correct2p) || IsFloatNaN(correct2p) ||
+                                    IsFloatInfinity(correct2n) || IsFloatNaN(correct2n) )
+                                    continue;
+                            }
+
+                            if ( gTestFastRelaxed )
+                            {
+                              errp = Abs_Error( test, correctp  );
+                              err2p = Abs_Error( test, correct2p  );
+                              errn = Abs_Error( test, correctn  );
+                              err2n = Abs_Error( test, correct2n  );
+                            }
+                            else
+                            {
+                              errp = Ulp_Error( test, correctp  );
+                              err2p = Ulp_Error( test, correct2p  );
+                              errn = Ulp_Error( test, correctn  );
+                              err2n = Ulp_Error( test, correct2n  );
+                            }
+
+                            fail =  fail && ((!(fabsf(errp) <= float_ulps)) && (!(fabsf(err2p) <= float_ulps))    &&
+                                            ((!(fabsf(errn) <= float_ulps)) && (!(fabsf(err2n) <= float_ulps))) );
+                            if( fabsf( errp ) < fabsf(err ) )
+                                err = errp;
+                            if( fabsf( errn ) < fabsf(err ) )
+                                err = errn;
+                            if( fabsf( err2p ) < fabsf(err2 ) )
+                                err2 = err2p;
+                            if( fabsf( err2n ) < fabsf(err2 ) )
+                                err2 = err2n;
+
+                            // retry per section 6.5.3.4
+                            if(  (*isFloatResultSubnormalPtr)( correctp, float_ulps ) || (*isFloatResultSubnormalPtr)( correctn, float_ulps )  )
+                            {
+                              if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                              {
+                                fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                if( ! fail )
+                                  err = err2 = 0.0f;
+                              }
+                              else
+                              {
+                                fail = fail && ! (test == 0.0f && fabsf(err2) <= float_ulps);
+                                if( ! fail )
+                                  err = 0.0f;
+                              }
+                            }
+                            else if( (*isFloatResultSubnormalPtr)( correct2p, float_ulps ) || (*isFloatResultSubnormalPtr)( correct2n, float_ulps ) )
+                            {
+                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= float_ulps));
+                                if( ! fail )
+                                    err2 = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError0 )
+                    {
+                        maxError0 = fabsf(err);
+                        maxErrorVal0 = s[j];
+                    }
+                    if( fabsf(err2 ) > maxError1 )
+                    {
+                        maxError1 = fabsf(err2);
+                        maxErrorVal1 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %f} ulp error at %a: *{%a, %a} vs. {%a, %a}\n", f->name, sizeNames[k], err, err2, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((float*) gOut_Ref2)[j], test, test2 );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if (isFract && gIsInRTZMode)
+            (void)set_round(oldRoundMode, kfloat);
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog(".");
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_Double2_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError0 = 0.0f;
+    float maxError1 = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal0 = 0.0f;
+    double maxErrorVal1 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( cl_double )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j * scale);
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        double *r2 = (double *)gOut_Ref2;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            long double dd;
+            r[j] = (double) f->dfunc.f_fpf( s[j], &dd );
+            r2[j] = (double) dd;
+        }
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        uint64_t *t2 = (uint64_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+                uint64_t *q2 = (uint64_t *)(gOut2[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j]  )
+                {
+                    double test = ((double*) q)[j];
+                    double test2 = ((double*) q2)[j];
+                    long double correct2;
+                    long double correct = f->dfunc.f_fpf( s[j], &correct2 );
+                    float err = Ulp_Error_Double( test, correct );
+                    float err2 = Ulp_Error_Double( test2, correct2 );
+                    int fail = ! (fabsf(err) <= f->double_ulps && fabsf(err2) <= f->double_ulps);
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                            {
+                                fail = fail && ! ( test == 0.0f && test2 == 0.0f );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    err2 = 0.0f;
+                                }
+                            }
+                            else
+                            {
+                                fail = fail && ! ( test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                        else if( IsDoubleResultSubnormal( correct2, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test2 == 0.0f && fabsf(err) <= f->double_ulps);
+                            if( ! fail )
+                                err2 = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            long double correct2p, correct2n;
+                            long double correctp = f->dfunc.f_fpf( 0.0, &correct2p );
+                            long double correctn = f->dfunc.f_fpf( -0.0, &correct2n );
+                            float errp = Ulp_Error_Double( test, correctp  );
+                            float err2p = Ulp_Error_Double( test, correct2p  );
+                            float errn = Ulp_Error_Double( test, correctn  );
+                            float err2n = Ulp_Error_Double( test, correct2n  );
+                            fail =  fail && ((!(fabsf(errp) <= f->double_ulps)) && (!(fabsf(err2p) <= f->double_ulps))    &&
+                                            ((!(fabsf(errn) <= f->double_ulps)) && (!(fabsf(err2n) <= f->double_ulps))) );
+                            if( fabsf( errp ) < fabsf(err ) )
+                                err = errp;
+                            if( fabsf( errn ) < fabsf(err ) )
+                                err = errn;
+                            if( fabsf( err2p ) < fabsf(err2 ) )
+                                err2 = err2p;
+                            if( fabsf( err2n ) < fabsf(err2 ) )
+                                err2 = err2n;
+
+                            // retry per section 6.5.3.4
+                            if( IsDoubleResultSubnormal( correctp, f->double_ulps ) || IsDoubleResultSubnormal( correctn, f->double_ulps ) )
+                            {
+                                if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                                {
+                                    fail = fail && !( test == 0.0f && test2 == 0.0f);
+                                    if( ! fail )
+                                        err = err2 = 0.0f;
+                                }
+                                else
+                                {
+                                    fail = fail && ! (test == 0.0f && fabsf(err2) <= f->double_ulps);
+                                    if( ! fail )
+                                        err = 0.0f;
+                                }
+                            }
+                            else if( IsDoubleResultSubnormal( correct2p, f->double_ulps ) || IsDoubleResultSubnormal( correct2n, f->double_ulps ) )
+                            {
+                                fail = fail && ! (test2 == 0.0f && (fabsf(err) <= f->double_ulps));
+                                if( ! fail )
+                                    err2 = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError0 )
+                    {
+                        maxError0 = fabsf(err);
+                        maxErrorVal0 = s[j];
+                    }
+                    if( fabsf(err2 ) > maxError1 )
+                    {
+                        maxError1 = fabsf(err2);
+                        maxErrorVal1 = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %f} ulp error at %.13la: *{%.13la, %.13la} vs. {%.13la, %.13la}\n", f->name, sizeNames[k], err, err2, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((double*) gOut_Ref2)[j], test, test2 );
+                      error = -1;
+                      goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double*) gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d) );
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg(kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j]) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0, maxErrorVal1 );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+
diff --git a/test_conformance/math_brute_force/unary_two_results_i.c b/test_conformance/math_brute_force/unary_two_results_i.c
new file mode 100644
index 00000000..c4eea8f1
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results_i.c
@@ -0,0 +1,802 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <limits.h>
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_FloatI_Float(const Func *f, MTdata);
+int TestFunc_DoubleI_Double(const Func *f, MTdata);
+
+#if defined(__cplusplus)
+    extern "C"
+#endif
+const vtbl _unary_two_results_i = { "unary_two_results_i", TestFunc_FloatI_Float, TestFunc_DoubleI_Double };
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global float", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global int* out2, __global float* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       float3 f0 = vload3( 0, in + 3 * i );\n"
+                            "       int3 iout = INT_MIN;\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "       vstore3( iout, 0, out2 + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       int3 iout = INT_MIN;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( f0, &iout );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               out2[3*i+1] = iout.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               out2[3*i] = iout.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global int", sizeNames[vectorSize], "* out2, __global double", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i], out2 + i );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global int* out2, __global double* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       double3 f0 = vload3( 0, in + 3 * i );\n"
+                        "       int3 iout = INT_MIN;\n"
+                        "       f0 = ", name, "( f0, &iout );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "       vstore3( iout, 0, out2 + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       int3 iout = INT_MIN;\n"
+                        "       double3 f0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       f0 = ", name, "( f0, &iout );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               out2[3*i+1] = iout.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               out2[3*i] = iout.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+cl_ulong  abs_cl_long( cl_long i );
+cl_ulong  abs_cl_long( cl_long i )
+{
+    cl_long mask = i >> 63;
+    return (i ^ mask) - mask;
+}
+
+int TestFunc_FloatI_Float(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    float float_ulps;
+     uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( float )) + 1);
+    cl_ulong  maxiError;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded )
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float *)gOut_Ref;
+        int *r2 = (int *)gOut_Ref2;
+        float *s = (float *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_fpI( s[j], r2+j );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                int32_t *q2 = (int32_t *)(gOut2[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    float test = ((float*) q)[j];
+                    int correct2 = INT_MIN;
+                    double correct = f->func.f_fpI( s[j], &correct2 );
+                    float err = Ulp_Error( test, correct );
+                    cl_long iErr = (int64_t) q2[j] - (int64_t) correct2;
+                    int fail = ! (fabsf(err) <= float_ulps && abs_cl_long( iErr ) <= maxiError );
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsFloatResultSubnormal(correct, float_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsFloatSubnormal( s[j] ) )
+                        {
+                            int correct5, correct6;
+                            double correct3 = f->func.f_fpI( 0.0, &correct5 );
+                            double correct4 = f->func.f_fpI( -0.0, &correct6 );
+                            float err2 = Ulp_Error( test, correct3  );
+                            float err3 = Ulp_Error( test, correct4  );
+                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+
+                            // Did +0 work?
+                            if( fabsf(err2) <= float_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            {
+                                err = err2;
+                                iErr = iErr2;
+                                fail = 0;
+                            }
+                            // Did -0 work?
+                            else if(fabsf(err3) <= float_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            {
+                                err = err3;
+                                iErr = iErr3;
+                                fail = 0;
+                            }
+
+                            // retry per section 6.5.3.4
+                            if( fail && (IsFloatResultSubnormal(correct2, float_ulps ) || IsFloatResultSubnormal(correct3, float_ulps )) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    iErr = 0;
+                                }
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %s%s: {%f, %d} ulp error at %a: *{%a, %d} vs. {%a, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((float*) gIn)[j], ((float*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+int TestFunc_DoubleI_Double(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    double maxErrorVal2 = 0.0f;
+    cl_ulong  maxiError = f->double_ulps == INFINITY ? CL_ULONG_MAX : 0;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    uint64_t step = bufferSize / sizeof( double );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j * scale);
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( double ); j++ )
+                p[j] = DoubleFromUInt32((uint32_t) i + j);
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+
+            memset_pattern4(gOut2[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE, 0, bufferSize, gOut2[j], 0, NULL, NULL) ))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+            {
+                vlog_error( "FAILED -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double *)gOut_Ref;
+        int *r2 = (int *)gOut_Ref2;
+        double *s = (double *)gIn;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            r[j] = (double) f->dfunc.f_fpI( s[j], r2+j );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0, bufferSize, gOut2[j], 0, NULL, NULL)) )
+            {
+                vlog_error( "ReadArray2 failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+        //Verify data
+        uint64_t *t = (uint64_t *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t *)(gOut[k]);
+                int32_t *q2 = (int32_t *)(gOut2[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] || t2[j] != q2[j] )
+                {
+                    double test = ((double*) q)[j];
+                    int correct2 = INT_MIN;
+                    long double correct = f->dfunc.f_fpI( s[j], &correct2 );
+                    float err = Ulp_Error_Double( test, correct );
+                    cl_long iErr = (long long) q2[j] - (long long) correct2;
+                    int fail = ! (fabsf(err) <= f->double_ulps && abs_cl_long( iErr ) <= maxiError );
+                    if( ftz )
+                    {
+                        // retry per section 6.5.3.2
+                        if( IsDoubleResultSubnormal(correct, f->double_ulps ) )
+                        {
+                            fail = fail && ! ( test == 0.0f && iErr == 0 );
+                            if( ! fail )
+                                err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if( IsDoubleSubnormal( s[j] ) )
+                        {
+                            int correct5, correct6;
+                            long double correct3 = f->dfunc.f_fpI( 0.0, &correct5 );
+                            long double correct4 = f->dfunc.f_fpI( -0.0, &correct6 );
+                            float err2 = Ulp_Error_Double( test, correct3  );
+                            float err3 = Ulp_Error_Double( test, correct4  );
+                            cl_long iErr2 = (long long) q2[j] - (long long) correct5;
+                            cl_long iErr3 = (long long) q2[j] - (long long) correct6;
+
+                            // Did +0 work?
+                            if( fabsf(err2) <= f->double_ulps && abs_cl_long( iErr2 ) <= maxiError )
+                            {
+                                err = err2;
+                                iErr = iErr2;
+                                fail = 0;
+                            }
+                            // Did -0 work?
+                            else if(fabsf(err3) <= f->double_ulps && abs_cl_long( iErr3 ) <= maxiError)
+                            {
+                                err = err3;
+                                iErr = iErr3;
+                                fail = 0;
+                            }
+
+                            // retry per section 6.5.3.4
+                            if( fail && (IsDoubleResultSubnormal( correct2, f->double_ulps ) || IsDoubleResultSubnormal( correct3, f->double_ulps )) )
+                            {
+                                fail = fail && ! ( test == 0.0f && (abs_cl_long( iErr2 ) <= maxiError || abs_cl_long( iErr3 ) <= maxiError) );
+                                if( ! fail )
+                                {
+                                    err = 0.0f;
+                                    iErr = 0;
+                                }
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( llabs(iErr) > maxError2 )
+                    {
+                        maxError2 = llabs(iErr );
+                        maxErrorVal2 = s[j];
+                    }
+
+                    if( fail )
+                    {
+                        vlog_error( "\nERROR: %sD%s: {%f, %d} ulp error at %.13la: *{%.13la, %d} vs. {%.13la, %d}\n", f->name, sizeNames[k], err, (int) iErr, ((double*) gIn)[j], ((double*) gOut_Ref)[j], ((int*) gOut_Ref2)[j], test, q2[j] );
+                        error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double *)gIn;
+
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = DoubleFromUInt32(genrand_int32(d));
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gOutBuffer2[j] ), &gOutBuffer2[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 2, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILED -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sd%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t{%8.2f, %lld} @ %a", maxError, maxError2, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
+
diff --git a/test_conformance/math_brute_force/unary_u.c b/test_conformance/math_brute_force/unary_u.c
new file mode 100644
index 00000000..131d1e7e
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_u.c
@@ -0,0 +1,694 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "Utility.h"
+
+#include <string.h>
+#include "FunctionList.h"
+
+int TestFunc_Float_UInt(const Func *f, MTdata);
+int TestFunc_Double_ULong(const Func *f, MTdata);
+
+#if defined( __cplusplus)
+    extern "C"
+#endif
+const vtbl _unary_u = { "unary_u", TestFunc_Float_UInt, TestFunc_Double_ULong };
+
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p );
+
+static int BuildKernel( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "__kernel void math_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global uint", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+    const char *c3[] = {    "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global uint* in)\n"
+                            "{\n"
+                            "   size_t i = get_global_id(0);\n"
+                            "   if( i + 1 < get_global_size(0) )\n"
+                            "   {\n"
+                            "       uint3 u0 = vload3( 0, in + 3 * i );\n"
+                            "       float3 f0 = ", name, "( u0 );\n"
+                            "       vstore3( f0, 0, out + 3*i );\n"
+                            "   }\n"
+                            "   else\n"
+                            "   {\n"
+                            "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                            "       uint3 u0;\n"
+                            "       float3 f0;\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 1:\n"
+                            "               u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n"
+                            "               break;\n"
+                            "           case 0:\n"
+                            "               u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
+                            "               break;\n"
+                            "       }\n"
+                            "       f0 = ", name, "( u0 );\n"
+                            "       switch( parity )\n"
+                            "       {\n"
+                            "           case 0:\n"
+                            "               out[3*i+1] = f0.y; \n"
+                            "               // fall through\n"
+                            "           case 1:\n"
+                            "               out[3*i] = f0.x; \n"
+                            "               break;\n"
+                            "       }\n"
+                            "   }\n"
+                            "}\n"
+                        };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+static int BuildKernelDouble( const char *name, int vectorSize, cl_kernel *k, cl_program *p )
+{
+    const char *c[] = {
+                            "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                "__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global ulong", sizeNames[vectorSize], "* in)\n"
+                            "{\n"
+                            "   int i = get_global_id(0);\n"
+                            "   out[i] = ", name, "( in[i] );\n"
+                            "}\n"
+                        };
+
+    const char *c3[] = {    "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
+                        "__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global ulong* in)\n"
+                        "{\n"
+                        "   size_t i = get_global_id(0);\n"
+                        "   if( i + 1 < get_global_size(0) )\n"
+                        "   {\n"
+                        "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
+                        "       double3 f0 = ", name, "( u0 );\n"
+                        "       vstore3( f0, 0, out + 3*i );\n"
+                        "   }\n"
+                        "   else\n"
+                        "   {\n"
+                        "       size_t parity = i & 1;   // Figure out how many elements are left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two buffer size \n"
+                        "       ulong3 u0;\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 1:\n"
+                        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, 0xdeaddeaddeaddeadUL ); \n"
+                        "               break;\n"
+                        "           case 0:\n"
+                        "               u0 = (ulong3)( in[3*i], in[3*i+1], 0xdeaddeaddeaddeadUL ); \n"
+                        "               break;\n"
+                        "       }\n"
+                        "       double3 f0 = ", name, "( u0 );\n"
+                        "       switch( parity )\n"
+                        "       {\n"
+                        "           case 0:\n"
+                        "               out[3*i+1] = f0.y; \n"
+                        "               // fall through\n"
+                        "           case 1:\n"
+                        "               out[3*i] = f0.x; \n"
+                        "               break;\n"
+                        "       }\n"
+                        "   }\n"
+                        "}\n"
+                    };
+
+    const char **kern = c;
+    size_t kernSize = sizeof(c)/sizeof(c[0]);
+
+    if( sizeValues[vectorSize] == 3 )
+    {
+        kern = c3;
+        kernSize = sizeof(c3)/sizeof(c3[0]);
+    }
+
+
+    char testName[32];
+    snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
+
+    return MakeKernel(kern, (cl_uint) kernSize, testName, k, p);
+}
+
+typedef struct BuildKernelInfo
+{
+    cl_uint     offset;            // the first vector size to build
+    cl_kernel   *kernels;
+    cl_program  *programs;
+    const char  *nameInCode;
+}BuildKernelInfo;
+
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_FloatFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernel( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p );
+static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, void *p )
+{
+    BuildKernelInfo *info = (BuildKernelInfo*) p;
+    cl_uint i = info->offset + job_id;
+    return BuildKernelDouble( info->nameInCode, i, info->kernels + i, info->programs + i );
+}
+
+int TestFunc_Float_UInt(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    float maxErrorVal = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+
+    uint64_t step = bufferSize / sizeof( float );
+    int scale = (int)((1ULL<<32) / (16 * bufferSize / sizeof( double )) + 1);
+    int isRangeLimited = 0;
+    float float_ulps;
+    float half_sin_cos_tan_limit = 0;
+
+    logFunctionInfo(f->name,sizeof(cl_float),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    if( gIsEmbedded)
+        float_ulps = f->float_embedded_ulps;
+    else
+        float_ulps = f->float_ulps;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
+        return error;
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernel( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    if( 0 == strcmp( f->name, "half_sin") || 0 == strcmp( f->name, "half_cos") )
+    {
+        isRangeLimited = 1;
+        half_sin_cos_tan_limit = 1.0f + float_ulps * (FLT_EPSILON/2.0f);             // out of range results from finite inputs must be in [-1,1]
+    }
+    else if( 0 == strcmp( f->name, "half_tan"))
+    {
+        isRangeLimited = 1;
+        half_sin_cos_tan_limit = INFINITY;             // out of range resut from finite inputs must be numeric
+    }
+
+
+    for( i = 0; i < (1ULL<<32); i += step  )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t *)gIn;
+        if( gWimpyMode )
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j * scale;
+        }
+        else
+        {
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = (uint32_t) i + j;
+        }
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            {
+                vlog_error( "FAILURE -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        float *r = (float*) gOut_Ref;
+        cl_uint *s = (cl_uint*) gIn;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+            r[j] = (float) f->func.f_u( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+
+        //Verify data
+        uint32_t *t = (uint32_t*) gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( float ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint32_t *q = (uint32_t*)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    float test = ((float*) q)[j];
+                    double correct = f->func.f_u( s[j] );
+                    float err = Ulp_Error( test, correct );
+                    int fail = ! (fabsf(err) <= float_ulps);
+
+                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                    if( isRangeLimited && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) && fabsf(s[j]) < INFINITY )
+                    {
+                        if( fabsf( test ) <= half_sin_cos_tan_limit )
+                        {
+                            err = 0;
+                            fail = 0;
+                        }
+                    }
+
+                     if( fail )
+                    {
+                        if( ftz )
+                        {
+                            // retry per section 6.5.3.2
+                            if( IsFloatResultSubnormal(correct, float_ulps) )
+                            {
+                                fail = fail && ( test != 0.0f );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n", f->name, sizeNames[k], err, ((uint32_t*) gIn)[j], ((float*) gOut_Ref)[j], test );
+                      error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        uint32_t *p = (uint32_t*)gIn;
+        if( strstr( f->name, "exp" ) || strstr( f->name, "sin" ) || strstr( f->name, "cos" ) || strstr( f->name, "tan" ) )
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                ((float*)p)[j] = (float) genrand_real1(d);
+        else if( strstr( f->name, "log" ) )
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = genrand_int32(d) & 0x7fffffff;
+        else
+            for( j = 0; j < bufferSize / sizeof( float ); j++ )
+                p[j] = genrand_int32(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_float);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILURE -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( float ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+static cl_ulong random64( MTdata d )
+{
+    return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+}
+
+int TestFunc_Double_ULong(const Func *f, MTdata d)
+{
+    uint64_t i;
+    uint32_t j, k;
+    int error;
+    cl_program programs[ VECTOR_SIZE_COUNT ];
+    cl_kernel kernels[ VECTOR_SIZE_COUNT ];
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ;
+    double maxErrorVal = 0.0f;
+    size_t bufferSize = (gWimpyMode)? gWimpyBufferSize: BUFFER_SIZE;
+    uint64_t step = bufferSize / sizeof( cl_double );
+
+    logFunctionInfo(f->name,sizeof(cl_double),gTestFastRelaxed);
+    if( gWimpyMode )
+    {
+        step = (1ULL<<32) * gWimpyReductionFactor / (512);
+    }
+    Force64BitFPUPrecision();
+
+    // Init the kernels
+    BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs, f->nameInCode };
+    if( (error = ThreadPool_Do( BuildKernel_DoubleFn,
+                                gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                &build_info ) ))
+    {
+        return error;
+    }
+/*
+    for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
+        if( (error =  BuildKernelDouble( f->nameInCode, (int) i, kernels + i, programs + i) ) )
+            return error;
+*/
+
+    for( i = 0; i < (1ULL<<32); i += step  )
+    {
+        //Init input array
+        cl_ulong *p = (cl_ulong *)gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_ulong ); j++ )
+            p[j] = random64(d);
+
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+        // write garbage into output arrays
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            uint32_t pattern = 0xffffdead;
+            memset_pattern4(gOut[j], &pattern, bufferSize);
+            if( (error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n", error, j );
+                goto exit;
+            }
+        }
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ))){ LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)))
+            {
+                vlog_error( "FAILURE -- could not execute kernel\n" );
+                goto exit;
+            }
+        }
+
+        // Get that moving
+        if( (error = clFlush(gQueue) ))
+            vlog( "clFlush failed\n" );
+
+        //Calculate the correctly rounded reference result
+        double *r = (double*) gOut_Ref;
+        cl_ulong *s = (cl_ulong*) gIn;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+            r[j] = (double) f->dfunc.f_u( s[j] );
+
+        // Read the data back
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            if( (error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0, bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error( "ReadArray failed %d\n", error );
+                goto exit;
+            }
+        }
+
+        if( gSkipCorrectnessTesting )
+            break;
+
+
+        //Verify data
+        uint64_t *t = (uint64_t*) gOut_Ref;
+        for( j = 0; j < bufferSize / sizeof( cl_double ); j++ )
+        {
+            for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+            {
+                uint64_t *q = (uint64_t*)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if( t[j] != q[j] )
+                {
+                    double test = ((double*) q)[j];
+                    long double correct = f->dfunc.f_u( s[j] );
+                    float err = Ulp_Error_Double(test, correct);
+                    int fail = ! (fabsf(err) <= f->double_ulps);
+
+                    // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                    if( fail )
+                    {
+                        if( ftz )
+                        {
+                            // retry per section 6.5.3.2
+                            if( IsDoubleResultSubnormal(correct, f->double_ulps) )
+                            {
+                                fail = fail && ( test != 0.0 );
+                                if( ! fail )
+                                    err = 0.0f;
+                            }
+                        }
+                    }
+                    if( fabsf(err ) > maxError )
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = s[j];
+                    }
+                    if( fail )
+                    {
+                        vlog_error( "\n%s%sD: %f ulp error at 0x%16.16llx: *%.13la vs. %.13la\n", f->name, sizeNames[k], err, ((uint64_t*) gIn)[j], ((double*) gOut_Ref)[j], test );
+                      error = -1;
+                        goto exit;
+                    }
+                }
+            }
+        }
+
+        if( 0 == (i & 0x0fffffff) )
+        {
+           if (gVerboseBruteForce)
+           {
+               vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step, bufferSize);
+           } else
+           {
+              vlog("." );
+           }
+           fflush(stdout);
+        }
+    }
+
+    if( ! gSkipCorrectnessTesting )
+    {
+        if( gWimpyMode )
+            vlog( "Wimp pass" );
+        else
+            vlog( "passed" );
+    }
+
+    if( gMeasureTimes )
+    {
+        //Init input array
+        double *p = (double*) gIn;
+
+        for( j = 0; j < bufferSize / sizeof( double ); j++ )
+            p[j] = random64(d);
+        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, bufferSize, gIn, 0, NULL, NULL) ))
+        {
+            vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
+            return error;
+        }
+
+
+        // Run the kernels
+        for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_double);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if( ( error = clSetKernelArg(kernels[j], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(programs[j]); goto exit; }
+            if( ( error = clSetKernelArg( kernels[j], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(programs[j]); goto exit; }
+
+            double sum = 0.0;
+            double bestTime = INFINITY;
+            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            {
+                uint64_t startTime = GetTime();
+                if( (error = clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL, &localCount, NULL, 0, NULL, NULL)) )
+                {
+                    vlog_error( "FAILURE -- could not execute kernel\n" );
+                    goto exit;
+                }
+
+                // Make sure OpenCL is done
+                if( (error = clFinish(gQueue) ) )
+                {
+                    vlog_error( "Error %d at clFinish\n", error );
+                    goto exit;
+                }
+
+                uint64_t endTime = GetTime();
+                double time = SubtractTime( endTime, startTime );
+                sum += time;
+                if( time < bestTime )
+                    bestTime = time;
+            }
+
+            if( gReportAverageTimes )
+                bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (bufferSize / sizeof( double ) );
+            vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sD%s", f->name, sizeNames[j] );
+        }
+        for( ; j < gMaxVectorSizeIndex; j++ )
+            vlog( "\t     -- " );
+    }
+
+    if( ! gSkipCorrectnessTesting )
+        vlog( "\t%8.2f @ %a", maxError, maxErrorVal );
+    vlog( "\n" );
+
+exit:
+    // Release
+    for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
+    {
+        clReleaseKernel(kernels[k]);
+        clReleaseProgram(programs[k]);
+    }
+
+    return error;
+}
+
+
diff --git a/test_conformance/mem_host_flags/CMakeLists.txt b/test_conformance/mem_host_flags/CMakeLists.txt
new file mode 100644
index 00000000..7325b34e
--- /dev/null
+++ b/test_conformance/mem_host_flags/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(MODULE_NAME MEM_HOST_FLAGS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    mem_host_buffer.cpp
+    mem_host_image.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/genericThread.cpp
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/ThreadPool.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h
new file mode 100644
index 00000000..90e0b6b3
--- /dev/null
+++ b/test_conformance/mem_host_flags/C_host_memory_block.h
@@ -0,0 +1,252 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_cHost_MemoryBlock_h
+#define test_conformance_cHost_MemoryBlock_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+template < class T> class C_host_memory_block
+{
+public:
+  int num_elements;
+  int element_size;
+  T *pData;
+
+  C_host_memory_block();
+  ~C_host_memory_block();
+  void Init(int num_elem, T &value);
+  void Init(int num_elem);
+  void Set_to(T & val);
+  void Set_to_zero();
+  bool Equal_to(T &val);
+  size_t Count(T &val);
+  bool Equal(C_host_memory_block < T > & another);
+  bool Equal_rect(C_host_memory_block < T > & another,
+                  size_t * host_origin,
+                  size_t * region,
+                  size_t host_row_pitch,
+                  size_t host_slice_pitch);
+  bool Equal(T *pData, int num_elements);
+
+  bool Equal_rect_from_orig(C_host_memory_block < T > & another,
+                            size_t * soffset,
+                            size_t * region,
+                            size_t host_row_pitch,
+                            size_t host_slice_pitch);
+
+  bool Equal_rect_from_orig(T* another_pdata,
+                            size_t * soffset,
+                            size_t * region,
+                            size_t host_row_pitch,
+                            size_t host_slice_pitch);
+};
+
+template < class T >
+C_host_memory_block<T>::C_host_memory_block()
+{
+  pData = NULL;
+  element_size = sizeof (T);
+  num_elements = 0;
+}
+
+template < class T>
+C_host_memory_block<T>::~C_host_memory_block()
+{
+  if (pData!=NULL) delete pData;
+  num_elements = 0;
+}
+
+template < class T >
+void C_host_memory_block<T>::Init(int num_elem, T & value)
+{
+  if (pData!=NULL) delete pData;
+  pData= new T [num_elem];
+  for (int i=0; i<num_elem; i++)
+    pData[i] = value;
+
+  num_elements= num_elem;
+}
+
+template < class T >
+void C_host_memory_block<T>::Init(int num_elem)
+{
+  if (pData!=NULL) delete pData;
+  pData = new T [num_elem];
+  for (int i=0; i<num_elem; i++)
+    pData[i]= (T) i;
+
+  num_elements = num_elem;
+
+}
+template < class T >
+void  C_host_memory_block<T>::Set_to_zero()
+{
+  T v = 0;
+  Set_to(v);
+}
+
+template < class T >
+void  C_host_memory_block<T>::Set_to(T &val)
+{
+  for (int i=0; i<num_elements; i++)
+    pData[i] = val;
+}
+
+template < class T >
+bool C_host_memory_block<T>::Equal_to(T &val)
+{
+  int count = 0;
+
+  for (int i=0; i<num_elements; i++) {
+    if (pData[i] == val)
+      count++;
+  }
+
+  return (count== num_elements);
+}
+
+template < class T >
+bool C_host_memory_block<T>::Equal(C_host_memory_block < T > & another)
+{
+  int count = 0;
+
+  for (int i=0; i<num_elements; i++) {
+    if (pData[i] == another.pData[i])
+      count++;
+  }
+
+  return (count== num_elements);
+}
+
+template < class T >
+bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements)
+{
+  if (this->num_elements!= Innum_elements)
+    return false;
+
+  int count = 0;
+
+  for (int i=0; i<num_elements ; i++ ) {
+    if (pData[i] == pIn_Data[i])
+      count++;
+  }
+
+  return ( count== num_elements);
+}
+
+template < class T >
+size_t C_host_memory_block<T>::Count(T &val)
+{
+  size_t count = 0;
+  for (int i=0; i<num_elements; i++) {
+    if (pData[i] == val)
+      count++;
+  }
+
+  return count;
+}
+
+template < class T >
+bool C_host_memory_block<T>::Equal_rect(C_host_memory_block < T > & another,
+                                        size_t  * soffset,
+                                        size_t  * region,
+                                        size_t host_row_pitch,
+                                        size_t host_slice_pitch)
+{
+  size_t row_pitch   = host_row_pitch ? host_row_pitch : region[0];
+  size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
+
+  size_t count = 0;
+
+  size_t total = region[0] * region[1] * region[2];
+
+  size_t x, y, z;
+  size_t orig = (size_t)(soffset[0] + row_pitch*soffset[1] + slice_pitch * soffset[2]);
+  for (z=0; z<region[2]; z++)
+    for (y=0; y<region[1]; y++)
+      for (x=0; x<region[0]; x++)
+      {
+        int p1 = (int)(x + row_pitch*y + slice_pitch* z + orig);
+        if (pData[p1] == another.pData[p1])
+          count++;
+      }
+
+  return (count == total);
+}
+
+template < class T >
+bool C_host_memory_block<T>::Equal_rect_from_orig(C_host_memory_block < T > & another,
+                                                  size_t * soffset,
+                                                  size_t * region,
+                                                  size_t host_row_pitch,
+                                                  size_t host_slice_pitch)
+{
+  size_t row_pitch   = host_row_pitch ? host_row_pitch : region[0];
+  size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
+
+  size_t count = 0;
+
+  size_t total = region[0] * region[1] * region[2];
+
+  size_t x, y, z;
+  size_t orig = soffset[0] + row_pitch * soffset[1] + slice_pitch * soffset[2];
+  for (z=0; z<region[2]; z++)
+    for (y=0; y<region[1]; y++)
+      for (x=0; x<region[0]; x++)
+      {
+        size_t p1 = x + (row_pitch*y) + (slice_pitch*z);
+        size_t p2 = p1 + orig;
+        if (pData[p2] == another.pData[p1])
+          count++;
+      }
+
+  return (count == total);
+}
+
+template < class T >
+bool C_host_memory_block<T>::Equal_rect_from_orig(T* another_pdata,
+                                                  size_t * soffset,
+                                                  size_t * region,
+                                                  size_t host_row_pitch,
+                                                  size_t host_slice_pitch)
+{
+  size_t row_pitch   = host_row_pitch ? host_row_pitch : region[0];
+  size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
+
+  size_t count = 0;
+
+  size_t total = region[0] * region[1] * region[2];
+
+  size_t x, y, z;
+  size_t orig = soffset[0] + row_pitch*soffset[1] + slice_pitch * soffset[2];
+  for (z=0; z<region[2]; z++)
+    for (y=0; y<region[1]; y++)
+      for (x=0; x<region[0]; x++)
+      {
+        size_t p1 = x + (row_pitch * y) + (slice_pitch * z);
+        size_t p2 = p1 + orig;
+        if (pData[p2] == another_pdata[p1])
+          count++;
+      }
+
+  return (count == total);
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/Makefile b/test_conformance/mem_host_flags/Makefile
new file mode 100644
index 00000000..8f88d415
--- /dev/null
+++ b/test_conformance/mem_host_flags/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp \
+		  mem_host_buffer.cpp  \
+		  mem_host_image.cpp  \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/genericThread.cpp \
+		  ../../test_common/harness/kernelHelpers.c \
+		  ../../test_common/harness/typeWrappers.cpp \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/ThreadPool.c \
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_mem_host_flags
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/mem_host_flags/checker.h b/test_conformance/mem_host_flags/checker.h
new file mode 100644
index 00000000..bf2e61b6
--- /dev/null
+++ b/test_conformance/mem_host_flags/checker.h
@@ -0,0 +1,326 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_checkers_h
+#define test_conformance_checkers_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "procs.h"
+#include "C_host_memory_block.h"
+
+#define TEST_VALUE 5
+typedef cl_char TEST_ELEMENT_TYPE;
+
+enum {SUCCESS, FAILURE=-1000};
+
+extern const char *buffer_write_kernel_code[];
+
+enum BUFFER_TYPE {_BUFFER, _Sub_BUFFER};
+
+template < class T > class cBuffer_checker
+{
+public:
+  cBuffer_checker(cl_device_id deviceID, cl_context context,
+                  cl_command_queue queue);
+  ~cBuffer_checker();
+
+  cl_device_id m_deviceID;
+  cl_context m_context;
+  cl_command_queue m_queue;
+
+  clMemWrapper m_buffer, m_buffer_parent;
+  enum BUFFER_TYPE m_buffer_type;
+
+  cl_buffer_region m_sub_buffer_region;
+
+  cl_int err;
+  cl_bool m_blocking;
+  cl_mem_flags buffer_mem_flag;
+
+  C_host_memory_block<T> host_m_0, host_m_1, host_m_2;
+  int m_nNumber_elements;
+
+  void *pData, *pData2;
+
+  void * pHost_ptr; // the host ptr at creation
+
+  size_t buffer_origin[3];
+  size_t host_origin[3];
+  size_t region[3];
+  size_t buffer_row_pitch;
+  size_t buffer_slice_pitch;
+  size_t host_row_pitch;
+  size_t host_slice_pitch;
+
+  size_t buffer_origin_bytes[3];
+  size_t host_origin_bytes[3];
+  size_t region_bytes[3];
+  size_t buffer_row_pitch_bytes;
+  size_t buffer_slice_pitch_bytes;
+  size_t host_row_pitch_bytes;
+  size_t host_slice_pitch_bytes;
+
+  cl_int CreateBuffer(cl_mem_flags buffer_mem_flag, void * pdata);
+  int get_block_size_bytes() { return (int)(m_nNumber_elements * sizeof(T)); };
+  virtual cl_int SetupBuffer() = 0;
+
+  virtual cl_int Setup_Test_Environment();
+
+  virtual cl_int SetupASSubBuffer(cl_mem_flags parent_buffer_flag);
+
+  virtual cl_int verify(cl_int err, cl_event & event);
+
+  virtual cl_int Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag);
+
+  void Init_rect(int bufforg[3], int host_org[3], int region[3],
+                 int buffer_pitch[2], int host_pitch[2]);
+
+  void Init_rect();
+
+  virtual cl_int verify_RW_Buffer() = 0;
+  virtual cl_int verify_RW_Buffer_rect() = 0;
+  virtual cl_int verify_RW_Buffer_mapping() = 0;
+};
+
+template < class T >
+cBuffer_checker<T>::cBuffer_checker(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue)
+{
+  m_nNumber_elements = 0;
+
+  m_deviceID = deviceID;
+  m_context = context;
+  m_queue = queue;
+
+  m_blocking = false;
+
+  buffer_mem_flag = CL_MEM_READ_WRITE;
+  pData = pData2 = NULL;
+
+  buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;
+  host_origin[0] = host_origin[1] = host_origin[2] = 0;
+  region[0] = region[1] = region[2] = 0;
+  buffer_row_pitch = buffer_slice_pitch = host_row_pitch = host_slice_pitch = 0;
+
+  buffer_origin_bytes[0] = buffer_origin_bytes[1] = buffer_origin_bytes[2] = 0;
+  host_origin_bytes[0] = host_origin_bytes[1] = host_origin_bytes[2] = 0;
+  region_bytes[0] = region_bytes[1] = region_bytes[2] = 0;
+  buffer_row_pitch_bytes = buffer_slice_pitch_bytes = 0;
+  host_row_pitch_bytes = host_slice_pitch_bytes = 0;
+
+  pHost_ptr = NULL;
+}
+
+template < class T >
+cBuffer_checker<T>::~cBuffer_checker()
+{
+}
+
+template < class T >
+cl_int cBuffer_checker<T>::SetupBuffer()
+{
+  m_buffer_type = _BUFFER;
+  return CL_SUCCESS;
+}
+
+template < class T >
+cl_int cBuffer_checker<T>::Setup_Test_Environment()
+{
+  return CL_SUCCESS;
+}
+
+template < class T >
+cl_int cBuffer_checker<T>::SetupASSubBuffer(cl_mem_flags parent_buffer_flag)
+{
+  m_buffer_type = _Sub_BUFFER;
+
+  int supersize = 8000;
+  this-> m_nNumber_elements = 1000;
+  T vv1= TEST_VALUE;
+
+  int block_size_in_byte = (int)(supersize * sizeof(T));
+
+  this->host_m_0.Init(supersize);
+
+  m_buffer_parent = clCreateBuffer(this->m_context, parent_buffer_flag,
+                                   block_size_in_byte, this->host_m_0.pData, &err);
+  test_error(err, "clCreateBuffer error");
+
+  int size = this->m_nNumber_elements; // the size of subbuffer in elements
+
+  cl_uint base_addr_align_bits;
+  err = clGetDeviceInfo(m_deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof base_addr_align_bits, &base_addr_align_bits, NULL);
+  test_error(err,"clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN");
+
+  int base_addr_align_bytes = base_addr_align_bits/8;
+
+  int buffer_origin[3] = {base_addr_align_bytes, 0, 0};
+  int host_origin[3] = {0, 0, 0};
+  int region[3] = {size, 1, 1};
+  int buffer_pitch[2] = {0, 0};
+  int host_pitch[2] = {0, 0};
+  this->Init_rect(buffer_origin, host_origin, region, buffer_pitch, host_pitch);
+
+  this->m_nNumber_elements = size; // the size of subbuffer in elements
+  this->host_m_1.Init(this->m_nNumber_elements, vv1);
+
+  this->m_sub_buffer_region.origin = this->buffer_origin_bytes[0]; // in bytes
+  this->m_sub_buffer_region.size = this->region_bytes[0];
+
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+  err = clEnqueueReadBufferRect(this->m_queue, m_buffer_parent, CL_TRUE,
+                                this->buffer_origin_bytes,
+                                this->host_origin_bytes,
+                                this->region_bytes,
+                                this->buffer_row_pitch_bytes,
+                                this->buffer_slice_pitch_bytes,
+                                this->host_row_pitch_bytes,
+                                this->host_slice_pitch_bytes,
+                                this->host_m_1.pData,
+                                0, NULL, &event); // update the mem_1
+
+  if (err == CL_SUCCESS && (parent_buffer_flag & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) {
+    log_error("Calling clEnqueueReadBufferRect on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag or the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return err;
+
+    if (this->m_blocking) {
+      err = clWaitForEvents(1, &event);
+      test_error(err, "clWaitForEvents error");
+    }
+  } else {
+    err = CL_SUCCESS;
+  }
+
+  cl_mem_flags f;
+  if (parent_buffer_flag & CL_MEM_HOST_READ_ONLY)
+    f = CL_MEM_HOST_READ_ONLY;
+  else if (parent_buffer_flag & CL_MEM_HOST_WRITE_ONLY)
+    f = CL_MEM_HOST_WRITE_ONLY;
+  else if (parent_buffer_flag & CL_MEM_HOST_NO_ACCESS)
+    f = CL_MEM_HOST_NO_ACCESS;
+
+  m_buffer = clCreateSubBuffer(m_buffer_parent, f, CL_BUFFER_CREATE_TYPE_REGION,
+                               &(this->m_sub_buffer_region), &err);
+  test_error(err, "clCreateSubBuffer error");
+
+  if (parent_buffer_flag | CL_MEM_USE_HOST_PTR)
+  {
+    this->pHost_ptr = (this->host_m_0.pData  + this->m_sub_buffer_region.origin/sizeof(T));
+  }
+
+  T vv2 = 0;
+  this->host_m_2.Init(this->m_nNumber_elements, vv2);
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_checker<T>::verify(cl_int err, cl_event & event)
+{
+  return CL_SUCCESS;
+}
+
+template < class T >
+cl_int cBuffer_checker<T>::CreateBuffer(cl_mem_flags buffer_mem_flag, void *pdata)
+{
+  cl_int err = CL_SUCCESS;
+  int block_size_in_byte= m_nNumber_elements* sizeof(T);
+
+  m_buffer = clCreateBuffer(m_context, buffer_mem_flag, block_size_in_byte, pdata, &err);
+
+  return err;
+};
+
+template < class T >
+cl_int cBuffer_checker<T>::Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag)
+{
+  cl_int err = CL_SUCCESS;
+  cl_mem_flags buffer_mem_flag_Check;
+  err = clGetMemObjectInfo(this->m_buffer, CL_MEM_FLAGS, sizeof(cl_mem_flags),
+                           &buffer_mem_flag_Check, NULL);
+
+  if (buffer_mem_flag_Check != buffer_mem_flag) {
+    log_error("clGetMemObjectInfo result differs from the specified result\n");
+    return err;
+  }
+
+  cl_uint count = 0;
+  err = clGetMemObjectInfo(this->m_buffer, CL_MEM_REFERENCE_COUNT,
+                           sizeof(cl_uint), &count, NULL);
+
+  if (count > 1)
+    log_info("========= buffer count %d\n", count);
+
+  test_error(err, "clGetMemObjectInfo failed");
+
+  return err;
+}
+
+template < class T >
+void cBuffer_checker<T>::Init_rect ()
+{
+  int buffer_origin[3] = {10, 0, 0};
+  int host_origin[3] = {10, 0, 0};
+  int region[3] = {8, 1, 1};
+  int buffer_pitch[2] = {0, 0};
+  int host_pitch[2] = {0, 0};
+
+  this->Init_rect(buffer_origin, host_origin, region, buffer_pitch, host_pitch);
+}
+
+template < class T >
+void cBuffer_checker<T>::Init_rect(int bufforg[3], int host_org[3],
+                                   int region_in[3], int buffer_pitch[2], int host_pitch[2])
+{
+  buffer_origin[0] = bufforg[0];
+  buffer_origin[1] = bufforg[1];
+  buffer_origin[2] = bufforg[2];
+
+  host_origin[0] = host_org[0];
+  host_origin[1] = host_org[1];
+  host_origin[2] = host_org[2];
+
+  region[0] = region_in[0];
+  region[1] = region_in[1];
+  region[2] = region_in[2];
+
+  buffer_row_pitch   = buffer_pitch[0];
+  buffer_slice_pitch = buffer_pitch[1];
+  host_row_pitch     = host_pitch[0];
+  host_slice_pitch   = host_pitch[1];
+
+  int sizeof_element = sizeof(T);
+  for (int k=0; k<3; k++)
+  {
+    buffer_origin_bytes[k] = buffer_origin[k] * sizeof_element;
+    host_origin_bytes [k]  = host_origin[k] * sizeof_element;
+  }
+
+  region_bytes[0]          = region[0] * sizeof_element;
+  region_bytes[1]          = region[1];
+  region_bytes[2]          = region[2];
+  buffer_row_pitch_bytes   = buffer_row_pitch*  sizeof_element;
+  buffer_slice_pitch_bytes = buffer_slice_pitch*  sizeof_element;
+  host_row_pitch_bytes     = host_row_pitch*  sizeof_element;
+  host_slice_pitch_bytes   = host_slice_pitch*  sizeof_element;
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp b/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
new file mode 100644
index 00000000..b1960ba4
--- /dev/null
+++ b/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
@@ -0,0 +1,151 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_checker_Image_MEM_HOST_NO_ACCESS_h
+#define test_conformance_checker_Image_MEM_HOST_NO_ACCESS_h
+
+#include "checker_image_mem_host_write_only.hpp"
+
+template < class T>
+class cImage_check_mem_host_no_access : public cImage_check_mem_host_write_only<T>
+{
+public:
+  cImage_check_mem_host_no_access (cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  : cImage_check_mem_host_write_only <T> (deviceID,context, queue)
+  {
+  }
+
+  ~cImage_check_mem_host_no_access() {};
+
+  cl_int verify_RW_Image();
+  cl_int verify_RW_Image_Mapping();
+};
+
+template < class T>
+cl_int cImage_check_mem_host_no_access<T>:: verify_RW_Image()
+{
+  this->Init_rect();
+
+  cl_event event;
+  size_t img_orig[3] = {0, 0, 0};
+  size_t img_region[3] = {0, 0, 0};
+  img_region[0] = this->m_cl_Image_desc.image_width;
+  img_region[1] = this->m_cl_Image_desc.image_height;
+  img_region[2] = this->m_cl_Image_desc.image_depth;
+
+  int color[4] = {0xFF, 0xFF, 0xFF, 0xFF};
+  cl_int err = CL_SUCCESS;
+  err = clEnqueueFillImage(this->m_queue, this->m_Image,
+                          &color,
+                          img_orig, img_region,
+                          0, NULL, &event);
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  this->update_host_mem_2();
+
+  int total = (int)(this->region[0] * this->region[1] * this->region[2]);
+
+  T v = 0xFFFFFFFF;
+  int tot = (int)(this->host_m_2.Count(v));
+  if(tot != total){
+    log_error("Buffer data content difference found\n");
+    return FAILURE;
+  }
+
+  err = clEnqueueWriteImage(this->m_queue, this->m_Image, this->m_blocking,
+                            this->buffer_origin, this->region,
+                            this-> buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+                            this->host_m_1.pData, 0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueWriteImage on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return err;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  v = 0;
+  this->host_m_2.Set_to(v);
+  err = clEnqueueReadImage(this->m_queue, this->m_Image, this->m_blocking,
+                           this->buffer_origin, this->region,
+                           this-> buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+                           this->host_m_2.pData, 0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueReadImage on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return err;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+template < class T>
+cl_int cImage_check_mem_host_no_access<T>::verify_RW_Image_Mapping()
+{
+  this->Init_rect();
+
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+
+  T * dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
+                                       CL_MAP_WRITE,
+                                       this->buffer_origin, this->region,
+                                       &(this-> buffer_row_pitch_bytes),
+                                       &(this->buffer_slice_pitch_bytes),
+                                       0, NULL, &event, &err);
+
+  if ( err ==  CL_SUCCESS)    {
+    log_error("Calling clEnqueueMapImage (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return err;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
+                                   CL_MAP_READ,
+                                   this->buffer_origin, this->region,
+                                   &(this-> buffer_row_pitch_bytes),
+                                   &(this->buffer_slice_pitch_bytes),
+                                   0, NULL, &event, &err);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueMapImage (CL_MAP_READ) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return err;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/checker_image_mem_host_read_only.hpp b/test_conformance/mem_host_flags/checker_image_mem_host_read_only.hpp
new file mode 100644
index 00000000..925d8546
--- /dev/null
+++ b/test_conformance/mem_host_flags/checker_image_mem_host_read_only.hpp
@@ -0,0 +1,281 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_checker_Image_MEM_HOST_READ_ONLY_h
+#define test_conformance_checker_Image_MEM_HOST_READ_ONLY_h
+
+#include "checker.h"
+
+template < class T> class cImage_check_mem_host_read_only : public cBuffer_checker<T>
+{
+public:
+  cImage_check_mem_host_read_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  : cBuffer_checker <T> (deviceID, context, queue)
+  {
+    m_cl_image_format.image_channel_order = CL_RGBA;
+    m_cl_image_format.image_channel_data_type = CL_UNSIGNED_INT8;
+
+    m_cl_Image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+    m_cl_Image_desc.image_width = 0;
+    m_cl_Image_desc.image_height = 0;
+    m_cl_Image_desc.image_depth = 0;
+    m_cl_Image_desc.image_array_size = 0;
+    m_cl_Image_desc.image_row_pitch = 0;
+    m_cl_Image_desc.image_slice_pitch = 0;
+    m_cl_Image_desc.num_mip_levels = 0;
+    m_cl_Image_desc.num_samples = 0;
+    m_cl_Image_desc.mem_object = NULL;
+
+    m_Image = NULL;
+  };
+
+  ~cImage_check_mem_host_read_only()
+  {
+  };
+
+  cl_int get_image_elements();
+
+  cl_image_format m_cl_image_format;
+  cl_image_desc m_cl_Image_desc;
+  clMemWrapper m_Image;
+
+  virtual cl_int SetupImage();
+  virtual cl_int SetupBuffer();
+  virtual cl_int verify_RW_Image();
+
+  virtual cl_int verify_RW_Image_Mapping();
+  virtual cl_int verify_data(T *pdtaIn);
+  virtual cl_int verify_data_with_offset(T *pdtaIn, size_t *offset);
+
+  cl_int get_image_content_size();
+  cl_int get_image_data_size();
+
+  virtual cl_int verify_RW_Buffer();
+  virtual cl_int verify_RW_Buffer_rect();
+  virtual cl_int verify_RW_Buffer_mapping();
+  cl_int verify_mapping_ptr(T *ptr);
+};
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_mapping_ptr( T* dataPtr)
+{
+  int offset_pixel = (int)(this->buffer_origin[0] + this->buffer_origin[1] *
+                           this->buffer_row_pitch_bytes/ sizeof(T) + this->buffer_origin[2] *
+                           this->buffer_slice_pitch_bytes/sizeof(T));
+
+  dataPtr = dataPtr - offset_pixel;
+
+  cl_int err = CL_SUCCESS;
+
+  if (this->buffer_mem_flag & CL_MEM_USE_HOST_PTR)
+  {
+    if (this->pHost_ptr != this->host_m_1.pData)
+    {
+      log_error("Host memory pointer difference found\n");
+      return FAILURE;
+    }
+
+    if(dataPtr != this->host_m_1.pData)
+    {
+      log_error("Mapped host pointer difference found\n");
+      return FAILURE;
+    }
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_RW_Buffer() { return CL_SUCCESS; };
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_RW_Buffer_rect()  { return CL_SUCCESS; };
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_RW_Buffer_mapping() { return CL_SUCCESS; };
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::SetupBuffer()
+{
+  return cBuffer_checker< T >::SetupBuffer();
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::get_image_content_size()
+{
+  return ((cl_int)(m_cl_Image_desc.image_width*m_cl_Image_desc.image_height *
+                   m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size));
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::get_image_data_size()
+{
+  size_t slice_pitch = m_cl_Image_desc.image_slice_pitch ? m_cl_Image_desc.image_slice_pitch :
+    (m_cl_Image_desc.image_height *  m_cl_Image_desc.image_width);
+  return (slice_pitch * m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size);
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::get_image_elements()
+{
+  return ((cl_int)(m_cl_Image_desc.image_width*m_cl_Image_desc.image_height *
+                   m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size));
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::SetupImage()
+{
+  int all = (int)(m_cl_Image_desc.image_width * m_cl_Image_desc.image_height *
+                  m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size);
+
+  T v = TEST_VALUE;
+  this->host_m_1.Init(all, v);
+
+  cl_int err = CL_SUCCESS;
+  this-> m_Image = clCreateImage(this->m_context, this->buffer_mem_flag,
+                                 &( this-> m_cl_image_format), &(this-> m_cl_Image_desc),
+                                 this->host_m_1.pData, &err);
+  test_error(err , "clCreateImage error");
+
+  this-> pHost_ptr = (void *) (this->host_m_1.pData);
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_data(T *pDataIN)
+{
+  cl_int err = CL_SUCCESS;
+  if (!this->host_m_1.Equal_rect_from_orig(pDataIN, this->buffer_origin,
+                                           this->region, this->host_row_pitch,
+                                           this->host_slice_pitch)) {
+    log_error("Buffer data difference found\n");
+    return FAILURE;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_data_with_offset(T *pDataIN,
+                                                                     size_t *offset)
+{
+  cl_int err = CL_SUCCESS;
+  if (!this->host_m_2.Equal_rect_from_orig(pDataIN, offset, this->region,
+                                           this->host_row_pitch,
+                                           this->host_slice_pitch)) {
+    log_error("Buffer data difference found\n");
+    return FAILURE;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_RW_Image()
+{
+  this->Init_rect();
+
+  int imge_content_size = this->get_image_content_size();
+  T v = 0;
+  this->host_m_2.Init( imge_content_size, v);
+
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+  err = clEnqueueReadImage(this->m_queue, this->m_Image, this->m_blocking,
+                           this->buffer_origin, this->region,
+                           this-> buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+                           this->host_m_2.pData, 0, NULL, &event);
+
+  test_error(err, "clEnqueueReadImage error");
+
+  if ( !this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  err = this->verify_data(this->host_m_2.pData);
+  test_error(err, "verify_data error");
+
+  err = clEnqueueWriteImage(this->m_queue, this->m_Image, this->m_blocking,
+                            this->buffer_origin, this->region,
+                            this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+                            this->host_m_2.pData, 0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueWriteImage on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_read_only< T >::verify_RW_Image_Mapping()
+{
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+
+  T * dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
+                                       CL_MAP_READ,
+                                       this->buffer_origin, this->region,
+                                       &(this-> buffer_row_pitch_bytes),
+                                       &(this->buffer_slice_pitch_bytes),
+                                       0, NULL, &event, &err);
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  err= this->verify_mapping_ptr(dataPtr);
+  test_error(err, "clEnqueueMapImage error");
+
+  err = this->verify_data(dataPtr);
+  test_error(err, "verify_data error");
+
+  err= clEnqueueUnmapMemObject (this->m_queue, this->m_Image, dataPtr, 0, NULL, &event);
+  test_error(err, "clEnqueueUnmapMemObject error");
+
+  err = clWaitForEvents(1, &event);
+  test_error(err, "clWaitForEvents error");
+
+  dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
+                                   CL_MAP_WRITE,
+                                   this->buffer_origin,
+                                   this->region,
+                                   &(this-> buffer_row_pitch_bytes),
+                                   &(this->buffer_slice_pitch_bytes),
+                                   0, NULL, &event, &err);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueMapImage (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/checker_image_mem_host_write_only.hpp b/test_conformance/mem_host_flags/checker_image_mem_host_write_only.hpp
new file mode 100644
index 00000000..94c446b9
--- /dev/null
+++ b/test_conformance/mem_host_flags/checker_image_mem_host_write_only.hpp
@@ -0,0 +1,293 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_checker_Image_MEM_HOST_WRITE_ONLY_h
+#define test_conformance_checker_Image_MEM_HOST_WRITE_ONLY_h
+
+#include "checker_image_mem_host_read_only.hpp"
+
+template < class T> class cImage_check_mem_host_write_only : public cImage_check_mem_host_read_only<T>
+{
+
+public:
+  cImage_check_mem_host_write_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  : cImage_check_mem_host_read_only <T> (deviceID, context, queue)
+  {
+  }
+
+  ~cImage_check_mem_host_write_only() {};
+
+  clMemWrapper m_Image_2;
+
+  cl_int verify_RW_Image();
+  cl_int verify_RW_Image_Mapping();
+
+  cl_int Setup_Test_Environment();
+  cl_int update_host_mem_2();
+
+  cl_int verify_data();
+};
+
+template < class T >
+cl_int cImage_check_mem_host_write_only<T>::Setup_Test_Environment()
+{
+  int all= this->get_image_elements();
+
+  T vv2 = 0;
+  this->host_m_2.Init( all, vv2);
+  vv2 = TEST_VALUE;
+  this->host_m_0.Init( all, vv2);
+
+  cl_int err = CL_SUCCESS;
+  this->m_Image_2 = clCreateImage(this->m_context,
+                                  CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                                  &( this-> m_cl_image_format), &(this->m_cl_Image_desc),
+                                  this->host_m_2.pData, &err);
+  test_error(err, "clCreateImage error");
+
+  return err;
+}
+
+// Copy image data from a write_only image to a read_write image and read the
+// contents.
+template < class T >
+cl_int cImage_check_mem_host_write_only< T >::update_host_mem_2()
+{
+  size_t orig[3] = {0, 0, 0};
+  size_t img_region[3] = {0, 0, 0};
+  img_region[0] = this->m_cl_Image_desc.image_width;
+  img_region[1] = this->m_cl_Image_desc.image_height;
+  img_region[2] = this->m_cl_Image_desc.image_depth;
+
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+  err = clEnqueueCopyImage(this->m_queue,
+                           this->m_Image,
+                           this->m_Image_2,
+                           orig,
+                           orig,
+                           img_region,
+                           0, NULL, &event);
+  test_error(err, "clEnqueueCopyImage error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  this->host_m_2.Set_to_zero();
+
+  err = clEnqueueReadImage(this->m_queue, this->m_Image_2, this->m_blocking,
+                           this->buffer_origin, this->region,
+                           this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+                           this->host_m_2.pData, 0, NULL, &event);
+  test_error(err, "clEnqueueReadImage error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_write_only<T>::verify_data()
+{
+  cl_int err = CL_SUCCESS;
+  if (!this->host_m_1.Equal_rect_from_orig(this->host_m_2, this->buffer_origin,
+                                           this->region, this->host_row_pitch,
+                                           this->host_slice_pitch)) {
+    log_error("Image and host data difference found\n");
+    return FAILURE;
+  }
+
+  int total = (int)(this->region[0] * this->region[1] * this->region[2]);
+  T v = TEST_VALUE;
+  int tot = (int)(this->host_m_2.Count(v));
+  if(tot != total) {
+    log_error("Image data content difference found\n");
+    return FAILURE;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_write_only<T>::verify_RW_Image()
+{
+  cl_int err = CL_SUCCESS;
+
+  this->Init_rect();
+
+  cl_event event;
+  size_t img_orig[3] = {0, 0, 0};
+  size_t img_region[3] = {0, 0, 0};
+  img_region[0] = this->m_cl_Image_desc.image_width;
+  img_region[1] = this->m_cl_Image_desc.image_height;
+  img_region[2] = this->m_cl_Image_desc.image_depth;
+
+  int color[4] = {0xFF, 0xFF, 0xFF, 0xFF};
+  err = clEnqueueFillImage(this->m_queue,
+                           this->m_Image,
+                           &color,
+                           img_orig, img_region,
+                           0, NULL, &event); // Fill the buffer with data
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+  test_error(err, "clEnqueueFillImage error");
+
+  T v = TEST_VALUE;
+
+  err= clEnqueueWriteImage(this->m_queue, this->m_Image, this->m_blocking,
+                           this->buffer_origin, this->region,
+                           this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+                           this->host_m_0.pData, 0, NULL, &event);
+  test_error(err, "clEnqueueWriteImage error"); // Test writing to buffer
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  update_host_mem_2(); // Read buffer contents into mem_2
+
+  err = this->verify_data(); // Compare the contents of mem_2 and mem_1,
+                             // mem_1 is same as mem_0 in setup test environment
+  test_error(err, "verify_data error");
+
+  v = 0;
+  this->host_m_2.Set_to(v);
+  err = clEnqueueReadImage(this->m_queue, this->m_Image, this->m_blocking,
+                           this->buffer_origin, this->region,
+                           this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+                           this->host_m_1.pData, 0, NULL, &event);
+
+  if (err == CL_SUCCESS){
+    log_error("Calling clEnqueueReadImage on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  /* Qualcomm fix: 12506 Do not wait on invalid event/ no need for syncronization calls after clEnqueueReadImage fails
+   *
+   * The call to clEnqueueReadImage fails as expected and returns an invalid event on
+   * which clWaitForEvents cannot be called. (It will rightly fail with a CL_INVALID_EVENT error)
+   * Further, we don't need to do any additional flushes or finishes here since we were in sync
+   * before the (failing) call to clEnqueueReadImage
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, " clWaitForEvents error")
+  }
+  Qualcomm fix: end*/
+
+  return err;
+}
+
+template < class T >
+cl_int cImage_check_mem_host_write_only<T>::verify_RW_Image_Mapping()
+{
+  this->Init_rect();
+
+  cl_event event;
+  size_t img_orig[3] = {0, 0, 0};
+  size_t img_region[3] = {0, 0, 0};
+  img_region[0] = this->m_cl_Image_desc.image_width;
+  img_region[1] = this->m_cl_Image_desc.image_height;
+  img_region[2] = this->m_cl_Image_desc.image_depth;
+
+  int color[4] = {0xFF, 0xFF, 0xFF, 0xFF};
+  cl_int err = CL_SUCCESS;
+
+
+  // Fill image with pattern
+  err = clEnqueueFillImage(this->m_queue, this->m_Image,
+                           &color, img_orig, img_region,
+                           0, NULL, &event);
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  // Map image for writing
+  T* dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image,
+                                      this->m_blocking, CL_MAP_WRITE,
+                                      this->buffer_origin, this->region,
+                                      &(this->buffer_row_pitch_bytes),
+                                      &(this->buffer_slice_pitch_bytes),
+                                      0, NULL, &event, &err);
+  test_error(err, "clEnqueueMapImage CL_MAP_WRITE pointer error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  // Verify map pointer
+  err = this->verify_mapping_ptr(dataPtr);
+  test_error(err, "clEnqueueMapImage CL_MAP_WRITE pointer error");
+
+  // Verify mapped data
+
+  // The verify_data_with_offset method below compares dataPtr against
+  // this->host_m_2.pData. The comparison should start at origin {0, 0, 0}.
+  update_host_mem_2();
+
+  // Check the content of mem and host_ptr
+  size_t offset[3] = {0, 0, 0};
+  err = cImage_check_mem_host_read_only<T>::verify_data_with_offset(dataPtr,
+                                                                    offset);
+  test_error(err, "verify_data error");
+
+  // Unmap memory object
+  err = clEnqueueUnmapMemObject(this->m_queue, this->m_Image, dataPtr,
+                                0, NULL, &event);
+  test_error(err, "clEnqueueUnmapMemObject error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
+                                   CL_MAP_READ,
+                                   this->buffer_origin, this->region,
+                                   &(this->buffer_row_pitch_bytes),
+                                   &(this->buffer_slice_pitch_bytes),
+                                   0, NULL, &event, &err);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueMapImage (CL_MAP_READ) on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp b/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
new file mode 100644
index 00000000..d1f96f25
--- /dev/null
+++ b/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
@@ -0,0 +1,202 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_check_mem_host_no_access_h
+#define test_conformance_check_mem_host_no_access_h
+
+
+#include "checker_mem_host_write_only.hpp"
+
+template < class T> class cBuffer_check_mem_host_no_access : public cBuffer_check_mem_host_write_only< T >
+{
+public:
+  cBuffer_check_mem_host_no_access(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  : cBuffer_check_mem_host_write_only < T > (deviceID, context, queue)
+  {
+  };
+
+  cBuffer_check_mem_host_no_access()
+  {
+  };
+
+  virtual cl_int SetupBuffer();
+  virtual cl_int SetupASSubBuffer(cl_mem_flags parent_buffer_flag);
+  virtual cl_int Setup_Test_Environment();
+
+  cl_int verify_RW_Buffer();
+  cl_int verify_RW_Buffer_rect();
+  cl_int verify_RW_Buffer_mapping();
+};
+
+template < class T >
+cl_int cBuffer_check_mem_host_no_access< T >::SetupBuffer()
+{
+  this->m_nNumber_elements = 1000;
+  T vv1 = TEST_VALUE;
+  this->host_m_1.Init( this->m_nNumber_elements, vv1);
+
+  T vv2 = 0;
+  this->host_m_2.Init( this->m_nNumber_elements, vv2);
+
+  cl_int err;
+  int block_size_in_byte = this->get_block_size_bytes();
+  this->m_buffer = clCreateBuffer(this->m_context, this->buffer_mem_flag,
+                                  block_size_in_byte, this->host_m_1.pData, &err);
+  test_error(err, "clCreateBuffer error");
+  err = this->Check_GetMemObjectInfo(this->buffer_mem_flag);
+
+  if (this->buffer_mem_flag | CL_MEM_USE_HOST_PTR)
+  {
+    this->pHost_ptr = (void *)this->host_m_1.pData;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_no_access< T >::SetupASSubBuffer(cl_mem_flags parent_buffer_flag)
+{
+  return cBuffer_checker<T>::SetupASSubBuffer(parent_buffer_flag);
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_no_access< T >::Setup_Test_Environment()
+{
+  cBuffer_check_mem_host_write_only<T>::Setup_Test_Environment();
+
+  return CL_SUCCESS;
+}
+
+template < class T>
+cl_int cBuffer_check_mem_host_no_access< T >::verify_RW_Buffer()
+{
+  cl_event event;
+  cl_int err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, this->m_blocking, 0,
+                                   this->get_block_size_bytes(), this->host_m_1.pData,
+                                   0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueWriteBuffer on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking, 0,
+                             this->get_block_size_bytes(), this->host_m_1.pData,
+                             0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueWriteBuffer on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_no_access< T >::verify_RW_Buffer_rect()
+{
+  this->Init_rect();
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+  err = clEnqueueReadBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
+                                this->buffer_origin_bytes,
+                                this->host_origin_bytes,
+                                this->region_bytes,
+                                this->buffer_row_pitch_bytes,
+                                this->buffer_slice_pitch_bytes,
+                                this->host_row_pitch_bytes,
+                                this->host_slice_pitch_bytes,
+                                this->host_m_2.pData,
+                                0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueReadBufferRect on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  err = clEnqueueWriteBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
+                                 this->buffer_origin_bytes ,
+                                 this->host_origin_bytes,
+                                 this->region_bytes,
+                                 this->buffer_row_pitch_bytes,
+                                 this->buffer_slice_pitch_bytes,
+                                 this->host_row_pitch_bytes,
+                                 this->host_slice_pitch_bytes,
+                                 this->host_m_2.pData,
+                                 0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueWriteBufferRect on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_no_access< T >::verify_RW_Buffer_mapping()
+{
+  cl_event event;
+  cl_int err;
+
+  void *dataPtr;
+  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking, CL_MAP_READ,
+                               0, this->get_block_size_bytes(), 0, NULL, &event, &err);
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueMapBuffer (CL_MAP_READ) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking, CL_MAP_WRITE,
+                               0, this->get_block_size_bytes(), 0, NULL, &event, &err);
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueMapBuffer (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/checker_mem_host_read_only.hpp b/test_conformance/mem_host_flags/checker_mem_host_read_only.hpp
new file mode 100644
index 00000000..669f3601
--- /dev/null
+++ b/test_conformance/mem_host_flags/checker_mem_host_read_only.hpp
@@ -0,0 +1,264 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_check_mem_host_read_only_h
+#define test_conformance_check_mem_host_read_only_h
+
+#include "checker.h"
+
+template < class T> class cBuffer_check_mem_host_read_only : public cBuffer_checker<T>
+{
+public:
+  cBuffer_check_mem_host_read_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  : cBuffer_checker <T> (deviceID, context, queue)
+  {
+  };
+
+  ~cBuffer_check_mem_host_read_only()
+  {
+  };
+
+  virtual cl_int Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag);
+  virtual cl_int SetupBuffer();
+  virtual cl_int SetupASSubBuffer( cl_mem_flags flag_p);
+  virtual cl_int Setup_Test_Environment();
+
+  cl_int verifyData(cl_int err, cl_event & event);
+  cl_int verify_RW_Buffer();
+  cl_int verify_RW_Buffer_rect();
+  cl_int verify_RW_Buffer_mapping();
+};
+
+template < class T >
+cl_int cBuffer_check_mem_host_read_only< T >::SetupBuffer()
+{
+  this->m_buffer_type = _BUFFER;
+
+  this->m_nNumber_elements = 888;
+  T vv1 = TEST_VALUE;
+  this->host_m_1.Init(this->m_nNumber_elements, vv1);
+  this->host_m_0.Init(this->m_nNumber_elements, vv1);
+
+  cl_int err = CL_SUCCESS;
+  int block_size_in_byte = (int)(this->m_nNumber_elements * sizeof(T));
+  this->m_buffer = clCreateBuffer(this->m_context, this->buffer_mem_flag,
+                                  block_size_in_byte, this->host_m_1.pData, &err);
+  test_error(err, "clCreateBuffer error");
+
+  if (this->buffer_mem_flag | CL_MEM_USE_HOST_PTR)
+  {
+    this->pHost_ptr = (void *)this->host_m_1.pData;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_read_only<T>::SetupASSubBuffer(cl_mem_flags flag_p)
+{
+  return cBuffer_checker<T>::SetupASSubBuffer(flag_p);
+}
+
+template < class T>
+cl_int cBuffer_check_mem_host_read_only<T>::Setup_Test_Environment()
+{
+  cBuffer_checker<T>::Setup_Test_Environment();
+  T vv2 = 0;
+  this->host_m_2.Init(this->m_nNumber_elements, vv2);
+
+  return CL_SUCCESS;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_read_only< T >::Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag)
+{
+  cl_int err = CL_SUCCESS;
+  cBuffer_checker<T>::Check_GetMemObjectInfo(buffer_mem_flag);
+
+  if (buffer_mem_flag & CL_MEM_ALLOC_HOST_PTR)
+  {
+    size_t size = 0;
+    err = clGetMemObjectInfo(this->m_buffer, CL_MEM_SIZE, sizeof(size), &size, NULL);
+    void *pp = NULL;
+    err = clGetMemObjectInfo(this->m_buffer, CL_MEM_HOST_PTR, sizeof( pp ), &pp, NULL);
+
+    if (!this->host_m_1.Equal( (T*) (this->pData), this->m_nNumber_elements )) {
+      log_error("Buffer data difference found\n");
+      return FAILURE;
+    }
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_read_only< T >::verifyData( cl_int err, cl_event  &   event )
+{
+  if (err != CL_SUCCESS) {
+    err = this->m_nERROR_RETURN_CODE;
+    test_error(err, "clEnqueueReadBuffer error");
+  }
+
+  if (!this->host_m_1.Equal(this->host_m_2)) {
+    err = this->m_nERROR_RETURN_CODE;
+    test_error(err, "clEnqueueReadBuffer data difference found");
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_read_only< T >::verify_RW_Buffer()
+{
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+
+  err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                            0, this->get_block_size_bytes(), this->host_m_2.pData,
+                            0, NULL, &event);
+  test_error(err, "clEnqueueReadBuffer error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  if (!this->host_m_1.Equal(this->host_m_2)) {
+    log_error("Buffer data difference found\n");
+    return FAILURE;
+  }
+
+  // test write
+  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                             0, this->get_block_size_bytes(), this->host_m_2.pData,
+                             0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueWriteBuffer on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_read_only< T >::verify_RW_Buffer_rect()
+{
+  this->Init_rect();
+
+  T vv2 = 0;
+  this->host_m_2.Set_to( vv2 );
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+
+  err = clEnqueueReadBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
+                                this->buffer_origin_bytes,
+                                this->host_origin_bytes,
+                                this->region_bytes,
+                                this->buffer_row_pitch_bytes,
+                                this->buffer_slice_pitch_bytes,
+                                this->host_row_pitch_bytes,
+                                this->host_slice_pitch_bytes,
+                                this->host_m_2.pData,
+                                0, NULL, &event);
+  test_error(err, "clEnqueueReadBufferRect error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  if (! this->host_m_1.Equal_rect(this->host_m_2, this->host_origin, this->region,
+                                  this->host_row_pitch, this->host_slice_pitch)) {
+    log_error("Buffer data diffeence found\n");
+    return FAILURE;
+  }
+
+  // test blocking write rect
+  err = clEnqueueWriteBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
+                                 this->buffer_origin_bytes,
+                                 this->host_origin_bytes,
+                                 this->region_bytes,
+                                 this->buffer_row_pitch_bytes,
+                                 this->buffer_slice_pitch_bytes,
+                                 this->host_row_pitch_bytes,
+                                 this->host_slice_pitch_bytes,
+                                 this->host_m_2.pData,
+                                 0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueWriteBufferRect on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_read_only< T >::verify_RW_Buffer_mapping()
+{
+  cl_int err = CL_SUCCESS;
+  cl_event event;
+  void *dataPtr;
+  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                               CL_MAP_READ,
+                               0, this->get_block_size_bytes(),
+                               0, NULL, &event, &err);
+  test_error(err, "clEnqueueMapBuffer error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event );
+    test_error(err, "clWaitForEvents error");
+  }
+
+  if ((this->buffer_mem_flag & CL_MEM_USE_HOST_PTR) && dataPtr != this->pHost_ptr ) {
+    log_error("Mapped host pointer difference found\n");
+  }
+
+  if(!this->host_m_1.Equal((T*)dataPtr, this->m_nNumber_elements)) {
+    log_error("Buffer content difference found\n");
+    return FAILURE;
+  }
+
+  //  test blocking map read
+  clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                     CL_MAP_WRITE,
+                     0, this->get_block_size_bytes(),
+                     0, NULL, &event, &err);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueMapBuffer (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/checker_mem_host_write_only.hpp b/test_conformance/mem_host_flags/checker_mem_host_write_only.hpp
new file mode 100644
index 00000000..04c82a8e
--- /dev/null
+++ b/test_conformance/mem_host_flags/checker_mem_host_write_only.hpp
@@ -0,0 +1,333 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef test_conformance_check_mem_host_write_only__h
+#define test_conformance_check_mem_host_write_only__h
+
+#include "checker.h"
+
+template < class T> class cBuffer_check_mem_host_write_only : public cBuffer_checker<T>
+{
+public:
+  cBuffer_check_mem_host_write_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+  : cBuffer_checker < T > (deviceID, context, queue)
+  {
+    this->m_nNumber_elements = 1000;
+  };
+
+  ~cBuffer_check_mem_host_write_only()
+  {
+  };
+
+  cl_program program;
+  cl_kernel kernel;
+
+  clMemWrapper m_buffer2;
+
+  cl_int Setup_Test_Environment();
+
+  cl_int SetupBuffer();
+  cl_int SetupASSubBuffer(cl_mem_flags flag_p);
+
+  cl_int verifyData(cl_int err, cl_event &event );
+  cl_int update_host_mem_2();
+
+  cl_int verify_RW_Buffer();
+  cl_int verify_RW_Buffer_rect();
+  cl_int verify_RW_Buffer_mapping();
+
+  C_host_memory_block<T> tmp_host_m;
+
+  virtual cl_int verify_Buffer_initialization();
+};
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only< T >::SetupBuffer()
+{
+  T vv1 = 0;
+  this->host_m_1.Init( this->m_nNumber_elements, vv1); // zero out buffer
+
+  // init buffer to 0
+  cl_int err;
+  int block_size_in_byte = this->get_block_size_bytes();
+
+  this->m_buffer = clCreateBuffer(this->m_context, this->buffer_mem_flag,
+                                  block_size_in_byte, this->host_m_1.pData, &err);
+  test_error(err, "clCreateBuffer error");
+
+  err = this->Check_GetMemObjectInfo(this->buffer_mem_flag);
+
+  if (this->buffer_mem_flag | CL_MEM_USE_HOST_PTR)
+  {
+    this->pHost_ptr = (void *)this->host_m_1.pData;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only<T>::SetupASSubBuffer(cl_mem_flags flag_p)
+{
+  return cBuffer_checker<T>::SetupASSubBuffer(flag_p);
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only< T >::Setup_Test_Environment()
+{
+  cl_int err;
+  T vv2 = 0;
+  this->host_m_2.Init(this->m_nNumber_elements, vv2);
+
+  // init buffer2 to 0
+  cl_mem_flags buffer_mem_flag2 = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY;
+  this->m_buffer2 = clCreateBuffer(this->m_context, buffer_mem_flag2,
+                                   this->get_block_size_bytes(), this->host_m_2.pData, &err);
+  test_error(err, "clCreateBuffer error\n");
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only< T >::verify_Buffer_initialization()
+{
+  cl_int err = CL_SUCCESS;
+
+  if (this->host_m_1.pData == NULL || this->host_m_2.pData == NULL) {
+    log_error("Data not ready\n");
+    return FAILURE;
+  }
+
+  update_host_mem_2();
+
+  if (!this->host_m_1.Equal(this->host_m_2)){
+    log_error("Buffer content difference found\n");
+    return FAILURE;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only< T >::verify_RW_Buffer()
+{
+  T vv1 = TEST_VALUE;
+  T vv2 = 0;
+  this->host_m_2.Set_to(vv2);
+
+  tmp_host_m.Init(this->host_m_1.num_elements, vv1) ;
+
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking, 0,
+                             this->get_block_size_bytes(), tmp_host_m.pData,
+                             0, NULL, &event);
+  if (err != CL_SUCCESS ) {
+    test_error(err, "clEnqueueWriteBuffer error");
+  }
+
+  if (!this->m_blocking){
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error")
+  }
+
+  if (tmp_host_m.Equal(this->host_m_2)){
+    log_error("Test data should be different\n");
+    return FAILURE;
+  }
+
+  update_host_mem_2();
+
+  if (!tmp_host_m.Equal(this->host_m_2)){
+    log_error("Buffer content difference found\n");
+    return FAILURE;
+  }
+
+  err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, CL_TRUE, 0,
+                            this->get_block_size_bytes(), this->host_m_2.pData,
+                            0, NULL, &event);
+
+  if ( err == CL_SUCCESS ) {
+    log_error("Calling clEnqueueReadBuffer on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return  err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only< T >::verify_RW_Buffer_rect()
+{
+  this->Init_rect();
+
+  T vv1= TEST_VALUE;
+  this->host_m_1.Set_to(vv1);
+
+  T vv2 = 0;
+  this->host_m_2.Set_to(vv2);
+
+  cl_event event, event_1;
+
+  cl_int err = CL_SUCCESS;
+
+  vv1 = 0;
+  C_host_memory_block< T > tmp_host_m;
+  tmp_host_m.Init(this->host_m_1.num_elements, vv1); // zero out the buffer
+  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, CL_TRUE, 0,
+                             this->get_block_size_bytes(), tmp_host_m.pData,
+                             0, NULL, &event_1);
+
+  vv1 = TEST_VALUE;
+  tmp_host_m.Set_to(vv1);
+  err = clEnqueueWriteBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
+                                 this->buffer_origin_bytes,
+                                 this->host_origin_bytes,
+                                 this->region_bytes,
+                                 this->buffer_row_pitch_bytes,
+                                 this->buffer_slice_pitch_bytes,
+                                 this->host_row_pitch_bytes,
+                                 this->host_slice_pitch_bytes,
+                                 tmp_host_m.pData,
+                                 1, &event_1, &event);
+  test_error(err, "clEnqueueWriteBuffer error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error")
+  }
+
+  if (tmp_host_m.Equal(this->host_m_2)) {
+    log_error("Test data should be different\n");
+    return FAILURE;
+  }
+
+  update_host_mem_2();
+
+  size_t tot_in_reg = this->region[0] * this->region[1] * this->region[2];
+  if (!tmp_host_m.Equal_rect(this->host_m_2, this->host_origin, this->region,
+                             this->host_row_pitch, this->host_slice_pitch)) {
+    log_error("Buffer rect content difference found\n");
+    return FAILURE;
+  }
+
+  if (this->host_m_2.Count(vv1) != tot_in_reg)
+  {
+    log_error("Buffer rect content difference found\n");
+    return FAILURE;
+  }
+
+  err = clEnqueueReadBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
+                                this->buffer_origin_bytes,
+                                this->host_origin_bytes,
+                                this->region_bytes,
+                                this->buffer_row_pitch_bytes,
+                                this->buffer_slice_pitch_bytes,
+                                this->host_row_pitch_bytes,
+                                this->host_slice_pitch_bytes,
+                                this->host_m_2.pData,
+                                0, NULL, &event);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueReadBufferRect on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+    return FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only< T >::update_host_mem_2()
+{
+  size_t global_work_size[3] = {0, 1, 1};
+  global_work_size[0] = this->get_block_size_bytes();
+
+  cl_event event, event_2;
+  cl_int err = clEnqueueCopyBuffer(this->m_queue, this->m_buffer, this->m_buffer2, 0, 0,
+                                   this->m_nNumber_elements* sizeof (T), 0, NULL, &event);
+
+  this->host_m_2.Set_to_zero();
+  err = clEnqueueReadBuffer(this->m_queue, this->m_buffer2, CL_TRUE, 0,
+                            this->get_block_size_bytes(), this->host_m_2.pData,
+                            1, &event, &event_2);
+  test_error(err, "clEnqueueReadBuffer error");
+
+  clWaitForEvents(1, &event_2);
+  test_error(err, "clWaitForEvents error");
+
+  return err;
+}
+
+template < class T >
+cl_int cBuffer_check_mem_host_write_only< T >::verify_RW_Buffer_mapping()
+{
+  T vv2 = 0;
+  this->host_m_2.Set_to(vv2);
+
+  cl_event event;
+  cl_int err = CL_SUCCESS;
+
+  void *dataPtr;
+  int size = this->get_block_size_bytes();
+  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                               CL_MAP_WRITE,
+                               0, size,
+                               0, NULL, &event, &err);
+  test_error(err, "clEnqueueMapBuffer error");
+
+  if (!this->m_blocking) {
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
+  }
+
+  update_host_mem_2();
+
+  if ((this->buffer_mem_flag & CL_MEM_USE_HOST_PTR) && dataPtr != this->pHost_ptr){
+    log_error("Mapped host pointer difference found\n");
+    return FAILURE;
+  }
+
+  if(!this->host_m_2.Equal((T*)dataPtr, this->m_nNumber_elements)) {
+    log_error("Buffer content difference found\n");
+    return FAILURE;
+  }
+
+  // test map read
+  clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                     CL_MAP_READ,
+                     0, this->get_block_size_bytes(),
+                     0, NULL, &event, &err);
+
+  if (err == CL_SUCCESS) {
+    log_error("Calling clEnqueueMapBuffer (CL_MAP_READ) on a memory object created with the MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+    err = FAILURE;
+
+  } else {
+    log_info("Test succeeded\n\n");
+    err = CL_SUCCESS;
+  }
+
+  return err;
+}
+
+#endif
diff --git a/test_conformance/mem_host_flags/main.cpp b/test_conformance/mem_host_flags/main.cpp
new file mode 100644
index 00000000..527468f2
--- /dev/null
+++ b/test_conformance/mem_host_flags/main.cpp
@@ -0,0 +1,68 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#endif
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn clfn_list[] = {test_mem_host_read_only_buffer,
+  test_mem_host_read_only_subbuffer,
+  test_mem_host_write_only_buffer,
+  test_mem_host_write_only_subbuffer,
+  test_mem_host_no_access_buffer,
+  test_mem_host_no_access_subbuffer,
+  test_mem_host_read_only_image,
+  test_mem_host_write_only_image,
+  test_mem_host_no_access_image};
+
+const char *clfn_names[] = {"test_mem_host_read_only_buffer",
+  "test_mem_host_read_only_subbuffer",
+  "test_mem_host_write_only_buffer",
+  "test_mem_host_write_only_subbuffer",
+  "test_mem_host_no_access_buffer",
+  "test_mem_host_no_access_subbuffer",
+  "test_mem_host_read_only_image",
+  "test_mem_host_write_only_image",
+  "test_mem_host_no_access_image",
+};
+
+ct_assert((sizeof(clfn_names) / sizeof(clfn_names[0])) == (sizeof(clfn_list) / sizeof(clfn_list[0])));
+
+int num_fns = sizeof(clfn_names) / sizeof(char *);
+
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = true;
+
+int main(int argc, const char *argv[])
+{
+  int error = 0;
+  test_start();// in fact no code
+  log_info("1st part, non gl-sharing objects...\n");
+  error = runTestHarness(argc, argv, num_fns, clfn_list, clfn_names, false, false, 0);
+
+  return error;
+}
diff --git a/test_conformance/mem_host_flags/mem_host_buffer.cpp b/test_conformance/mem_host_flags/mem_host_buffer.cpp
new file mode 100644
index 00000000..5416ec7f
--- /dev/null
+++ b/test_conformance/mem_host_flags/mem_host_buffer.cpp
@@ -0,0 +1,487 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#include "checker_mem_host_read_only.hpp"
+#include "checker_mem_host_write_only.hpp"
+#include "checker_mem_host_no_access.hpp"
+
+static int test_mem_host_read_only_buffer_RW(cl_device_id deviceID, cl_context context,
+                                             cl_command_queue queue, cl_bool blocking,
+                                             cl_mem_flags buffer_mem_flag,
+                                             cl_mem_flags parent_buffer_flag,
+                                             enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+  cBuffer_check_mem_host_read_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err = checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err = checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err= checker.verify_RW_Buffer();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+static int test_mem_host_read_only_buffer_RW_Rect(cl_device_id deviceID, cl_context context,
+                                                  cl_command_queue queue, cl_bool blocking,
+                                                  cl_mem_flags buffer_mem_flag,
+                                                  cl_mem_flags parent_buffer_flag,
+                                                  enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_read_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err= checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err= checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err = checker.verify_RW_Buffer_rect();
+  test_error(err,  __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+static int test_mem_host_read_only_buffer_RW_Mapping(cl_device_id deviceID, cl_context context,
+                                                     cl_command_queue queue, cl_bool blocking,
+                                                     cl_mem_flags buffer_mem_flag,
+                                                     cl_mem_flags parent_buffer_flag,
+                                                     enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_read_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err= checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err= checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err = checker.verify_RW_Buffer_mapping();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+int test_mem_host_read_only_buffer(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags buffer_mem_flags[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY};
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+  for (int k=0; k<2; k++)
+    for (int i=0; i< 2; i++)
+    {
+      err = test_mem_host_read_only_buffer_RW(deviceID, context, queue, blocking[i],
+                                              buffer_mem_flags[k], 0, _BUFFER);
+      test_error(err, __FUNCTION__);
+
+      err = test_mem_host_read_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
+                                                   buffer_mem_flags[k],0, _BUFFER);
+      test_error(err, __FUNCTION__);
+
+      err = test_mem_host_read_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
+                                                      buffer_mem_flags[k],0, _BUFFER);
+      test_error(err, __FUNCTION__);
+    }
+
+  return err;
+}
+
+int test_mem_host_read_only_subbuffer(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags parent_buffer_mem_flags[1] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY};
+
+  cl_mem_flags buffer_mem_flags[4] = {0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+    CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR};
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+
+  for (int p=0; p<1; p++) {
+    for (int k=0; k<4; k++)
+      for (int i=0; i<2; i++)
+      {
+        err = test_mem_host_read_only_buffer_RW(deviceID, context, queue, blocking[i],
+                                                buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
+        test_error(err, __FUNCTION__);
+
+        err = test_mem_host_read_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
+                                                     buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
+        test_error(err, __FUNCTION__);
+
+        err = test_mem_host_read_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
+                                                        buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
+        test_error(err, __FUNCTION__);
+      }
+  }
+
+  return err;
+}
+
+//=============================== Write only
+
+static cl_int test_mem_host_write_only_buffer_RW(cl_device_id deviceID, cl_context context,
+                                                 cl_command_queue queue, cl_bool blocking,
+                                                 cl_mem_flags buffer_mem_flag,
+                                                 cl_mem_flags parent_buffer_flag,
+                                                 enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_write_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err = checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err = checker.SetupASSubBuffer( parent_buffer_flag );
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err= checker.verify_RW_Buffer();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+static cl_int test_mem_host_write_only_buffer_RW_Rect(cl_device_id deviceID, cl_context context,
+                                                      cl_command_queue queue, cl_bool blocking,
+                                                      cl_mem_flags buffer_mem_flag,
+                                                      cl_mem_flags parent_buffer_flag,
+                                                      enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_write_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err= checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err= checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err= checker.verify_RW_Buffer_rect();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+static cl_int test_mem_host_write_only_buffer_RW_Mapping(cl_device_id deviceID, cl_context context,
+                                                         cl_command_queue queue, cl_bool blocking,
+                                                         cl_mem_flags buffer_mem_flag,
+                                                         cl_mem_flags parent_buffer_flag,
+                                                         enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_write_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err= checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err= checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err= checker.verify_RW_Buffer_mapping();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+int test_mem_host_write_only_buffer(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags buffer_mem_flags[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY};
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+  for (int k=0; k<2; k++)
+    for (int i=0; i<2; i++)
+    {
+      err = test_mem_host_write_only_buffer_RW(deviceID, context, queue, blocking[i],
+                                               buffer_mem_flags[k], 0, _BUFFER);
+      test_error(err, __FUNCTION__);
+
+      err = test_mem_host_write_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
+                                                    buffer_mem_flags[k], 0, _BUFFER);
+      test_error(err, __FUNCTION__);
+
+      err = test_mem_host_write_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
+                                                       buffer_mem_flags[k], 0, _BUFFER);
+      test_error(err, __FUNCTION__);
+    }
+
+  return err;
+}
+
+int test_mem_host_write_only_subbuffer(cl_device_id deviceID, cl_context context,
+                                       cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags parent_buffer_mem_flags[1] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY};
+
+  cl_mem_flags buffer_mem_flags[4] = {0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+    CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR};
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+
+  for (int p=0; p<1; p++) {
+    for (int m=0; m<4; m++) {
+      for (int i=0; i< 2; i++)
+      {
+        err = test_mem_host_write_only_buffer_RW(deviceID, context, queue, blocking[i],
+                                                 buffer_mem_flags[m], parent_buffer_mem_flags[p], _Sub_BUFFER);
+        test_error(err, __FUNCTION__);
+
+        err = test_mem_host_write_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
+                                                      buffer_mem_flags[m], parent_buffer_mem_flags[p], _Sub_BUFFER);
+        test_error(err, __FUNCTION__);
+
+        err = test_mem_host_write_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
+                                                         buffer_mem_flags[m] , parent_buffer_mem_flags[p], _Sub_BUFFER);
+        test_error(err, __FUNCTION__);
+      }
+    }
+  }
+
+  return err;
+}
+
+//=====================  NO ACCESS
+
+static cl_int test_mem_host_no_access_buffer_RW(cl_device_id deviceID, cl_context context,
+                                                cl_command_queue queue, cl_bool blocking,
+                                                cl_mem_flags buffer_mem_flag,
+                                                cl_mem_flags parent_buffer_flag,
+                                                enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_no_access< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+
+  cl_int err = CL_SUCCESS;
+  switch (buffer_type) {
+    case _BUFFER:
+      err= checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err= checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err= checker.verify_RW_Buffer_mapping();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+static cl_int test_mem_host_no_access_buffer_RW_Rect(cl_device_id deviceID, cl_context context,
+                                                     cl_command_queue queue, cl_bool blocking,
+                                                     cl_mem_flags buffer_mem_flag,
+                                                     cl_mem_flags parent_buffer_flag,
+                                                     enum BUFFER_TYPE buffer_type)
+{
+  log_info( "%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_no_access< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err= checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err= checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err= checker.verify_RW_Buffer_mapping();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+static cl_int test_mem_host_no_access_buffer_RW_Mapping(cl_device_id deviceID, cl_context context,
+                                                        cl_command_queue queue, cl_bool blocking,
+                                                        cl_mem_flags buffer_mem_flag,
+                                                        cl_mem_flags parent_buffer_flag,
+                                                        enum BUFFER_TYPE buffer_type)
+{
+  log_info("%s\n", __FUNCTION__);
+
+  cBuffer_check_mem_host_no_access< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
+
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+  cl_int err;
+  switch (buffer_type) {
+    case _BUFFER:
+      err= checker.SetupBuffer();
+      break;
+    case _Sub_BUFFER:
+      err= checker.SetupASSubBuffer(parent_buffer_flag);
+      break;
+  }
+
+  test_error(err, __FUNCTION__);
+  checker.Setup_Test_Environment();
+  err= checker.verify_RW_Buffer_mapping();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+
+  return err;
+}
+
+int test_mem_host_no_access_buffer(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags buffer_mem_flag[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS};
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+  for (int k=0; k<2; k++)
+    for (int i=0; i<2; i++) {
+      err = test_mem_host_no_access_buffer_RW(deviceID, context, queue, blocking[i],
+                                              buffer_mem_flag[k], 0, _BUFFER);
+      test_error(err, __FUNCTION__);
+
+      err = test_mem_host_no_access_buffer_RW_Rect(deviceID, context, queue, blocking[i],
+                                                   buffer_mem_flag[k], 0, _BUFFER);
+      test_error(err, __FUNCTION__);
+
+      err = test_mem_host_no_access_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
+                                                      buffer_mem_flag[k], 0, _BUFFER);
+      test_error(err, __FUNCTION__);
+    }
+
+  return err;
+}
+
+int test_mem_host_no_access_subbuffer(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags parent_buffer_mem_flags[3] = { CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+    CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+    CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS};
+
+  cl_mem_flags buffer_mem_flags[4] = {0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+    CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR};
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+  for (int p=0; p<3; p++) {
+    for (int k=0; k<4; k++) {
+      for (int i=0; i<2; i++) {
+        err += test_mem_host_no_access_buffer_RW(deviceID, context, queue, blocking[i],
+                                                buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
+
+        err += test_mem_host_no_access_buffer_RW_Rect(deviceID, context, queue, blocking[i],
+                                                     buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
+
+        err += test_mem_host_no_access_buffer_RW_Mapping( deviceID, context, queue, blocking[i],
+                                                        buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
+      }
+    }
+  }
+
+  return err;
+}
diff --git a/test_conformance/mem_host_flags/mem_host_image.cpp b/test_conformance/mem_host_flags/mem_host_image.cpp
new file mode 100644
index 00000000..b7eeb003
--- /dev/null
+++ b/test_conformance/mem_host_flags/mem_host_image.cpp
@@ -0,0 +1,364 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#include "checker_image_mem_host_read_only.hpp"
+#include "checker_image_mem_host_no_access.hpp"
+#include "checker_image_mem_host_write_only.hpp"
+
+//======================================
+static cl_int test_mem_host_read_only_RW_Image(cl_device_id deviceID, cl_context context,
+                                               cl_command_queue queue, cl_bool blocking,
+                                               cl_mem_flags buffer_mem_flag,
+                                               cl_mem_object_type image_type_in,
+                                               size_t array_size, size_t *img_dim)
+{
+  log_info("%s  ... \n ", __FUNCTION__);
+  cl_int err = CL_SUCCESS;
+
+  cImage_check_mem_host_read_only< int > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+
+  checker.m_cl_Image_desc.image_type = image_type_in;
+  checker.m_cl_Image_desc.image_width = img_dim[0];
+  checker.m_cl_Image_desc.image_height = img_dim[1];
+  checker.m_cl_Image_desc.image_depth = img_dim[2];
+  checker.m_cl_Image_desc.image_array_size = array_size;
+  checker.m_cl_Image_desc.image_row_pitch = 0;
+  checker.m_cl_Image_desc.image_slice_pitch = 0;
+  checker.m_cl_Image_desc.num_mip_levels = 0;
+  checker.m_cl_Image_desc.num_samples = 0;
+
+  checker.SetupImage();
+  checker.Init_rect();
+  err = checker.verify_RW_Image();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+  return err;
+}
+
+static cl_int test_mem_host_read_only_RW_Image_Mapping(cl_device_id deviceID, cl_context context,
+                                                       cl_command_queue queue, cl_bool blocking,
+                                                       cl_mem_flags buffer_mem_flag,
+                                                       cl_mem_object_type image_type_in,
+                                                       size_t array_size, size_t *img_dim)
+{
+  log_info("%s  ... \n ", __FUNCTION__);
+  cl_int err = CL_SUCCESS;
+
+  cImage_check_mem_host_read_only< int > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+
+  checker.m_cl_Image_desc.image_type = image_type_in;
+  checker.m_cl_Image_desc.image_width = img_dim[0];
+  checker.m_cl_Image_desc.image_height = img_dim[1];
+  checker.m_cl_Image_desc.image_depth = img_dim[2];
+  checker.m_cl_Image_desc.image_array_size = array_size;
+  checker.m_cl_Image_desc.image_row_pitch = 0;
+  checker.m_cl_Image_desc.image_slice_pitch = 0;
+  checker.m_cl_Image_desc.num_mip_levels = 0;
+  checker.m_cl_Image_desc.num_samples = 0;
+
+  checker.SetupImage();
+  checker.Init_rect();
+  err = checker.verify_RW_Image_Mapping();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+  return err;
+}
+
+int test_mem_host_read_only_image(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags buffer_mem_flags[2] = { CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY };
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool image_support;
+  err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof image_support, &image_support, NULL);
+  if (err) {
+    test_error(err, __FUNCTION__);
+    return err;
+  }
+  if (!image_support) {
+    log_info("Images are not supported by the device, skipping test...\n");
+    return 0;
+  }
+
+
+  cl_mem_object_type img_type[5] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY};
+
+  size_t img_dims[5][3] = {{200, 1, 1}, {200, 80, 1}, {200, 80, 5}, {200, 1, 1}, {200, 80, 10}}; // in elements
+
+  size_t array_size[5] = {1, 10, 1, 10, 1};
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+  for (int flag=0; flag<2; flag++)
+    for (int i=0; i<2; i++) // blocking
+    {
+      for(int p=0; p<3; p++)
+      {
+        err = test_mem_host_read_only_RW_Image(deviceID, context, queue, blocking[i],
+                                               buffer_mem_flags[flag], img_type[p],
+                                               array_size[p], img_dims[p]);
+
+        test_error(err, __FUNCTION__);
+
+        err = test_mem_host_read_only_RW_Image_Mapping(deviceID, context, queue, blocking[i],
+                                                       buffer_mem_flags[flag], img_type[p],
+                                                       array_size[p], img_dims[p]);
+
+        test_error(err, __FUNCTION__);
+      }
+    }
+
+  return err;
+}
+
+//----------------------------
+static cl_int test_MEM_HOST_WRIE_ONLY_Image_RW (cl_device_id deviceID, cl_context context,
+                                                cl_command_queue queue, cl_bool blocking,
+                                                cl_mem_flags buffer_mem_flag,
+                                                cl_mem_object_type image_type_in,
+                                                size_t array_size, size_t *img_dim)
+{
+  log_info(" %s  ... \n ", __FUNCTION__);
+  cl_int err = CL_SUCCESS;
+
+  cImage_check_mem_host_write_only< int > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+
+  checker.m_cl_Image_desc.image_type = image_type_in;
+  checker.m_cl_Image_desc.image_width = img_dim[0];
+  checker.m_cl_Image_desc.image_height = img_dim[1];
+  checker.m_cl_Image_desc.image_depth = img_dim[2];
+  checker.m_cl_Image_desc.image_array_size = array_size;
+  checker.m_cl_Image_desc.image_row_pitch = 0;
+  checker.m_cl_Image_desc.image_slice_pitch = 0;
+  checker.m_cl_Image_desc.num_mip_levels = 0;
+  checker.m_cl_Image_desc.num_samples = 0;
+
+  checker.SetupImage();
+  checker.Init_rect();
+  checker.Setup_Test_Environment();
+
+  err = checker.verify_RW_Image();
+  clFinish(queue);
+  test_error(err, __FUNCTION__);
+
+  return err;
+}
+
+static cl_int test_MEM_HOST_WRITE_ONLY_Image_RW_Mapping(cl_device_id deviceID, cl_context context,
+                                                        cl_command_queue queue, cl_bool blocking,
+                                                        cl_mem_flags buffer_mem_flag,
+                                                        cl_mem_object_type image_type_in,
+                                                        size_t array_size, size_t *img_dim)
+{
+  log_info("%s  ... \n ", __FUNCTION__);
+  cl_int err = CL_SUCCESS;
+
+  cImage_check_mem_host_write_only< int > checker(deviceID, context, queue);
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+
+  checker.m_cl_Image_desc.image_type = image_type_in;
+  checker.m_cl_Image_desc.image_width = img_dim[0];
+  checker.m_cl_Image_desc.image_height = img_dim[1];
+  checker.m_cl_Image_desc.image_depth = img_dim[2];
+  checker.m_cl_Image_desc.image_array_size = array_size;
+  checker.m_cl_Image_desc.image_row_pitch = 0;
+  checker.m_cl_Image_desc.image_slice_pitch = 0;
+  checker.m_cl_Image_desc.num_mip_levels = 0;
+  checker.m_cl_Image_desc.num_samples = 0;
+
+  checker.SetupImage();
+  checker.Init_rect();
+  checker.Setup_Test_Environment();
+
+  err = checker.verify_RW_Image_Mapping();
+  clFinish(queue);
+  test_error(err, __FUNCTION__);
+
+  return err;
+}
+
+int test_mem_host_write_only_image(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags buffer_mem_flags[2] = { CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY };
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool image_support;
+  err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof image_support, &image_support, NULL);
+  if (err) {
+    test_error(err, __FUNCTION__);
+    return err;
+  }
+  if (!image_support) {
+    log_info("Images are not supported by the device, skipping test...\n");
+    return 0;
+  }
+
+  cl_mem_object_type img_type[5]= {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,
+    CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY};
+
+  size_t img_dims[5][3]= {{200, 1, 1}, {200, 80, 1}, {200, 80, 5}, {200, 1, 1}, {200, 80, 1}  }; // in elements
+
+  size_t array_size[5] = {1, 10, 1, 10, 1};
+
+  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+  for (int k=0; k<2; k++)
+    for (int i=0; i<2; i++) // blocking
+    {
+      for (int p=0; p<3; p++)
+      {
+        err = test_MEM_HOST_WRIE_ONLY_Image_RW(deviceID, context, queue, blocking[i],
+                                               buffer_mem_flags[k],  img_type[p], array_size[p], img_dims[p]);
+        test_error(err, __FUNCTION__);
+
+        err = test_MEM_HOST_WRITE_ONLY_Image_RW_Mapping(deviceID, context, queue, blocking[i],
+                                                        buffer_mem_flags[k], img_type[p], array_size[p], img_dims[p]);
+        test_error(err, __FUNCTION__);
+      }
+    }
+
+  return err;
+}
+
+//--------
+
+static cl_int test_mem_host_no_access_Image_RW(cl_device_id deviceID, cl_context context,
+                                               cl_command_queue queue, cl_bool blocking,
+                                               cl_mem_flags buffer_mem_flag,
+                                               cl_mem_object_type image_type_in,
+                                               size_t array_size, size_t *img_dim)
+{
+  log_info("%s  ... \n", __FUNCTION__);
+  cl_int err = CL_SUCCESS;
+
+  cImage_check_mem_host_no_access< int > checker(deviceID, context, queue);
+
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+
+  checker.m_cl_Image_desc.image_type = image_type_in;
+  checker.m_cl_Image_desc.image_width = img_dim[0];
+  checker.m_cl_Image_desc.image_height = img_dim[1];
+  checker.m_cl_Image_desc.image_depth = img_dim[2];
+  checker.m_cl_Image_desc.image_array_size = array_size;
+  checker.m_cl_Image_desc.image_row_pitch = 0;
+  checker.m_cl_Image_desc.image_slice_pitch = 0;
+  checker.m_cl_Image_desc.num_mip_levels = 0;
+  checker.m_cl_Image_desc.num_samples = 0;
+
+  checker.SetupImage();
+  checker.Init_rect();
+  checker.Setup_Test_Environment();
+  err = checker.verify_RW_Image();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+  return err;
+}
+
+static cl_int test_mem_host_no_access_Image_RW_Mapping(cl_device_id deviceID, cl_context context,
+                                                       cl_command_queue queue, cl_bool blocking,
+                                                       cl_mem_flags buffer_mem_flag,
+                                                       cl_mem_object_type image_type_in,
+                                                       size_t array_size, size_t *img_dim)
+{
+  log_info("%s  ... \n ", __FUNCTION__);
+  cl_int err =CL_SUCCESS;
+
+  cImage_check_mem_host_no_access< int > checker(deviceID, context, queue);
+
+  checker.m_blocking = blocking;
+  checker.buffer_mem_flag = buffer_mem_flag;
+
+  checker.m_cl_Image_desc.image_type = image_type_in;
+  checker.m_cl_Image_desc.image_width = img_dim[0];
+  checker.m_cl_Image_desc.image_height = img_dim[1];
+  checker.m_cl_Image_desc.image_depth = img_dim[2];
+  checker.m_cl_Image_desc.image_array_size = array_size;
+  checker.m_cl_Image_desc.image_row_pitch = 0;
+  checker.m_cl_Image_desc.image_slice_pitch = 0;
+  checker.m_cl_Image_desc.num_mip_levels = 0;
+  checker.m_cl_Image_desc.num_samples = 0;
+
+  checker.SetupImage();
+  checker.Init_rect();
+  checker.Setup_Test_Environment();
+  err = checker.verify_RW_Image_Mapping();
+  test_error(err, __FUNCTION__);
+  clFinish(queue);
+  return err;
+}
+
+int  test_mem_host_no_access_image(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+  cl_mem_flags buffer_mem_flags[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS };
+
+  cl_int err = CL_SUCCESS;
+
+  cl_bool image_support;
+  err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof image_support, &image_support, NULL);
+  if (err) {
+    test_error(err, __FUNCTION__);
+    return err;
+  }
+  if (!image_support) {
+    log_info("Images are not supported by the device, skipping test...\n");
+    return 0;
+  }
+
+  cl_mem_object_type img_type[5] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,
+    CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY};
+
+  size_t img_dims[5][3]= {{200, 1, 1}, {200, 80, 1}, {100, 80, 5}, {200, 1, 1}, {200, 80, 1}}; // in elements
+
+  size_t array_size [5] = {1, 1, 1, 10, 10};
+
+  cl_bool blocking[2] = { CL_TRUE, CL_FALSE};
+  for (int k=0; k<2; k++)
+    for (int i=0; i<2; i++) // blocking
+    {
+      for (int p =0; p<3; p++)
+      {
+        err += test_mem_host_no_access_Image_RW (deviceID, context, queue, blocking[i],
+                                                buffer_mem_flags[k], img_type[p], array_size[p], img_dims[p]);
+
+        err +=  test_mem_host_no_access_Image_RW_Mapping(deviceID, context, queue, blocking[i],
+                                                       buffer_mem_flags[k], img_type[p], array_size[p], img_dims[p]);
+      }
+    }
+
+  return  err;
+}
diff --git a/test_conformance/mem_host_flags/procs.h b/test_conformance/mem_host_flags/procs.h
new file mode 100644
index 00000000..98f81491
--- /dev/null
+++ b/test_conformance/mem_host_flags/procs.h
@@ -0,0 +1,45 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __PROCS_H__
+#define __PROCS_H__
+
+#include "testBase.h"
+
+#define NUM_FLAGS 4
+
+extern int test_mem_host_read_only_buffer(cl_device_id deviceID, cl_context context,
+                                          cl_command_queue queue, int num_elements);
+extern int test_mem_host_read_only_subbuffer(cl_device_id deviceID, cl_context context,
+                                             cl_command_queue queue, int num_elements);
+
+extern int test_mem_host_write_only_buffer(cl_device_id deviceID, cl_context context,
+                                           cl_command_queue queue, int num_elements);
+extern int test_mem_host_write_only_subbuffer(cl_device_id deviceID, cl_context context,
+                                              cl_command_queue queue, int num_elements);
+
+extern int test_mem_host_no_access_buffer(cl_device_id deviceID, cl_context context,
+                                          cl_command_queue queue, int num_elements);
+extern int test_mem_host_no_access_subbuffer(cl_device_id deviceID, cl_context context,
+                                             cl_command_queue queue, int num_elements);
+
+extern int test_mem_host_read_only_image(cl_device_id deviceID, cl_context context,
+                                         cl_command_queue queue, int num_elements);
+extern int test_mem_host_write_only_image(cl_device_id deviceID, cl_context context,
+                                          cl_command_queue queue, int num_elements);
+extern int test_mem_host_no_access_image(cl_device_id deviceID, cl_context context,
+                                         cl_command_queue queue, int num_elements);
+
+#endif // #ifndef __PROCS_H__
diff --git a/test_conformance/mem_host_flags/testBase.h b/test_conformance/mem_host_flags/testBase.h
new file mode 100644
index 00000000..afcdf60e
--- /dev/null
+++ b/test_conformance/mem_host_flags/testBase.h
@@ -0,0 +1,40 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined (__APPLE__)
+#include <CL/cl.h>
+#else
+//#include <OpenCL/cl.h>
+#endif
+
+#include "../../test_common/harness/imageHelpers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+#endif // _testBase_h
diff --git a/test_conformance/multiple_device_context/CMakeLists.txt b/test_conformance/multiple_device_context/CMakeLists.txt
new file mode 100644
index 00000000..e69c884b
--- /dev/null
+++ b/test_conformance/multiple_device_context/CMakeLists.txt
@@ -0,0 +1,35 @@
+set(MODULE_NAME multiples)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_multiple_contexts.c
+    test_multiple_devices.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/genericThread.cpp
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+
+set_source_files_properties(
+        COMPILE_FLAGS -msse2)
+
+set_source_files_properties(
+    main.c
+    test_multiple_contexts.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+        PROPERTIES LANGUAGE CXX)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/multiple_device_context/Jamfile b/test_conformance/multiple_device_context/Jamfile
new file mode 100644
index 00000000..715c2f11
--- /dev/null
+++ b/test_conformance/multiple_device_context/Jamfile
@@ -0,0 +1,17 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_multiple_device_context
+    : main.c
+      test_multiple_contexts.c
+      test_multiple_devices.cpp
+    ;
+
+install dist
+    : test_multiple_device_context
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/multiple_device_context
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/multiple_device_context
+    ;
diff --git a/test_conformance/multiple_device_context/Makefile b/test_conformance/multiple_device_context/Makefile
new file mode 100644
index 00000000..4054a55a
--- /dev/null
+++ b/test_conformance/multiple_device_context/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		  test_multiple_contexts.c \
+		  test_multiple_devices.cpp \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/kernelHelpers.c \
+		  ../../test_common/harness/genericThread.cpp \
+                  ../../test_common/harness/mt19937.c \
+                  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/typeWrappers.cpp
+		  
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_multiples
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/multiple_device_context/main.c b/test_conformance/multiple_device_context/main.c
new file mode 100644
index 00000000..3982fde3
--- /dev/null
+++ b/test_conformance/multiple_device_context/main.c
@@ -0,0 +1,62 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/mt19937.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn    basefn_list[] = {
+          test_multiple_contexts_same_device,
+            test_two_contexts_same_device,
+            test_three_contexts_same_device,
+            test_four_contexts_same_device,
+
+            test_two_devices,
+            test_max_devices,
+
+            test_hundred_queues
+};
+
+
+const char    *basefn_names[] = {
+          "context_multiple_contexts_same_device",
+            "context_two_contexts_same_device",
+            "context_three_contexts_same_device",
+            "context_four_contexts_same_device",
+
+            "two_devices",
+            "max_devices",
+
+            "hundred_queues",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
+}
+
+
diff --git a/test_conformance/multiple_device_context/procs.h b/test_conformance/multiple_device_context/procs.h
new file mode 100644
index 00000000..d179ccbb
--- /dev/null
+++ b/test_conformance/multiple_device_context/procs.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/mt19937.h"
+
+extern int        test_multiple_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_two_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_three_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_four_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_two_devices(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_max_devices(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int        test_hundred_queues(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+
+
diff --git a/test_conformance/multiple_device_context/testBase.h b/test_conformance/multiple_device_context/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/multiple_device_context/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/multiple_device_context/test_multiple_contexts.c b/test_conformance/multiple_device_context/test_multiple_contexts.c
new file mode 100644
index 00000000..3f24b3af
--- /dev/null
+++ b/test_conformance/multiple_device_context/test_multiple_contexts.c
@@ -0,0 +1,529 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/testHarness.h"
+
+const char *context_test_kernels[] = {
+    "__kernel void sample_test_1(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = src[tid];\n"
+    "\n"
+    "}\n"
+
+    "__kernel void sample_test_2(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = src[tid] * 2;\n"
+    "\n"
+    "}\n"
+
+    "__kernel void sample_test_3(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = src[tid] / 2;\n"
+    "\n"
+    "}\n"
+
+    "__kernel void sample_test_4(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = src[tid] /3;\n"
+    "\n"
+    "}\n"
+};
+
+int sampleAction1( int source ) { return source; }
+int sampleAction2( int source ) { return source * 2; }
+int sampleAction3( int source ) { return source / 2; }
+int sampleAction4( int source ) { return source / 3; }
+
+
+typedef int (*sampleActionFn)( int source );
+
+sampleActionFn    sampleActions[4] = { sampleAction1, sampleAction2, sampleAction3, sampleAction4 };
+
+#define BUFFER_COUNT 2
+#define TEST_SIZE    512
+
+typedef struct TestItem
+{
+    struct TestItem     *next;
+    cl_context          c;
+    cl_command_queue    q;
+    cl_program          p;
+    cl_kernel           k[4];
+    cl_mem              m[BUFFER_COUNT];
+    MTdata              d;
+}TestItem;
+
+static void DestroyTestItem( TestItem *item );
+
+// Attempt to create a context and associated objects
+TestItem *CreateTestItem( cl_device_id deviceID, cl_int *err )
+{
+    cl_int error = 0;
+    size_t i;
+
+    // Allocate the TestItem struct
+    TestItem *item = (TestItem *) malloc( sizeof(TestItem ) );
+    if( NULL == item  )
+    {
+        if( err )
+        {
+            log_error( "FAILURE: Failed to allocate TestItem -- out of host memory!\n" );
+            *err = CL_OUT_OF_HOST_MEMORY;
+        }
+        return NULL;
+    }
+    //zero so we know which fields we have initialized
+    memset( item, 0, sizeof( *item ) );
+
+    item->d = init_genrand( gRandomSeed );
+    if( NULL == item->d )
+    {
+        if( err )
+        {
+            log_error( "FAILURE: Failed to allocate mtdata om CreateTestItem -- out of host memory!\n" );
+            *err = CL_OUT_OF_HOST_MEMORY;
+        }
+        DestroyTestItem( item );
+        return NULL;
+    }
+
+
+    // Create a context
+    item->c = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error );
+    if( item->c == NULL || error != CL_SUCCESS)
+    {
+        if (err) {
+            log_error( "FAILURE: clCreateContext failed in CreateTestItem: %d\n", error);
+            *err = error;
+        }
+        DestroyTestItem( item );
+        return NULL;
+    }
+
+    // Create a queue
+    item->q = clCreateCommandQueueWithProperties( item->c, deviceID, 0, &error);
+    if( item->q == NULL || error != CL_SUCCESS)
+    {
+        if (err) {
+            log_error( "FAILURE: clCreateCommandQueue failed in CreateTestItem: %d\n", error );
+            *err = error;
+        }
+        DestroyTestItem( item );
+        return NULL;
+    }
+
+    // Create a program
+    error = create_single_kernel_helper_create_program(item->c, &item->p, 1, context_test_kernels);
+    if( NULL == item->p || CL_SUCCESS != error )
+    {
+        if( err )
+        {
+            log_error( "FAILURE: clCreateProgram failed in CreateTestItem: %d\n", error );
+            *err = error;
+        }
+        DestroyTestItem( item );
+        return NULL;
+    }
+
+    error = clBuildProgram( item->p, 1, &deviceID, "", NULL, NULL );
+    if( error )
+    {
+        if( err )
+        {
+            log_error( "FAILURE: clBuildProgram failed in CreateTestItem: %d\n", error );
+            *err = error;
+        }
+        DestroyTestItem( item );
+        return NULL;
+    }
+
+    // create some kernels
+    for( i = 0; i < sizeof( item->k ) / sizeof( item->k[0] ); i++ )
+    {
+        static const char *kernelNames[] = { "sample_test_1", "sample_test_2", "sample_test_3", "sample_test_4" };
+        item->k[i] = clCreateKernel( item->p, kernelNames[i], &error );
+        if( NULL == item->k[i] || CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "FAILURE: clCreateKernel( \"%s\" ) failed in CreateTestItem: %d\n", kernelNames[i], error );
+                *err = error;
+            }
+            DestroyTestItem( item );
+            return NULL;
+        }
+    }
+
+    // create some mem objects
+    for( i = 0; i < BUFFER_COUNT; i++ )
+    {
+        item->m[i] = clCreateBuffer( item->c, CL_MEM_READ_WRITE, TEST_SIZE * sizeof(cl_int), NULL, &error );
+        if( NULL == item->m[i] || CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "FAILURE: clCreateBuffer( %ld bytes ) failed in CreateTestItem: %d\n", TEST_SIZE * sizeof(cl_int), error );
+                *err = error;
+            }
+            DestroyTestItem( item );
+            return NULL;
+        }
+    }
+
+
+    return item;
+}
+
+// Destroy a context and associate objects
+static void DestroyTestItem( TestItem *item )
+{
+    size_t i;
+
+    if( NULL == item )
+        return;
+
+    if( item->d )
+        free_mtdata( item->d );
+    if( item->c)
+        clReleaseContext( item->c );
+    if( item->q)
+        clReleaseCommandQueue( item->q );
+    if( item->p)
+        clReleaseProgram( item->p );
+    for( i = 0; i < sizeof( item->k ) / sizeof( item->k[0] ); i++ )
+    {
+        if( item->k[i])
+            clReleaseKernel( item->k[i] );
+    }
+    for( i = 0; i < BUFFER_COUNT; i++ )
+    {
+        if( item->m[i])
+            clReleaseMemObject( item->m[i] );
+    }
+    free(item );
+}
+
+
+cl_int UseTestItem( const TestItem *item, cl_int *err )
+{
+    size_t i, j;
+    cl_int error = CL_SUCCESS;
+
+    // Fill buffer 0 with random numbers
+    cl_int *mapped = (cl_int*) clEnqueueMapBuffer( item->q, item->m[0], CL_TRUE, CL_MAP_WRITE, 0, TEST_SIZE * sizeof( cl_int ), 0, NULL, NULL, &error );
+    if( NULL == mapped || CL_SUCCESS != error )
+    {
+        if( err )
+        {
+            log_error( "FAILURE: Failed to map buffer 0 for writing: %d\n", error );
+            *err = error;
+        }
+        return error;
+    }
+
+    for( j = 0; j < TEST_SIZE; j++ )
+        mapped[j] = genrand_int32(item->d);
+
+    error = clEnqueueUnmapMemObject( item->q, item->m[0], mapped, 0, NULL, NULL );
+    if( CL_SUCCESS != error )
+    {
+        if( err )
+        {
+            log_error( "FAILURE: failure to unmap buffer 0 for writing: %d\n", error );
+            *err = error;
+        }
+        return error;
+    }
+
+    // try each kernel in turn.
+    for( j = 0; j < sizeof(item->k) / sizeof( item->k[0] ); j++ )
+    {
+        // Fill buffer 1 with 0xdeaddead
+        mapped = (cl_int*) clEnqueueMapBuffer( item->q, item->m[1], CL_TRUE, CL_MAP_WRITE, 0, TEST_SIZE * sizeof( cl_int ), 0, NULL, NULL, &error );
+        if( NULL == mapped || CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "Failed to map buffer 1 for writing: %d\n", error );
+                *err = error;
+            }
+            return error;
+        }
+
+        for( i = 0; i < TEST_SIZE; i++ )
+            mapped[i] = 0xdeaddead;
+
+        error = clEnqueueUnmapMemObject( item->q, item->m[1], mapped, 0, NULL, NULL );
+        if( CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "Failed to unmap buffer 1 for writing: %d\n", error );
+                *err = error;
+            }
+            return error;
+        }
+
+        // Run the kernel
+        error = clSetKernelArg( item->k[j], 0, sizeof( cl_mem), &item->m[0] );
+        if( error )
+        {
+            if( err )
+            {
+                log_error( "FAILURE to set arg 0 for kernel # %ld :  %d\n", j, error );
+                *err = error;
+            }
+            return error;
+        }
+
+        error = clSetKernelArg( item->k[j], 1, sizeof( cl_mem), &item->m[1] );
+        if( error )
+        {
+            if( err )
+            {
+                log_error( "FAILURE: Unable to set arg 1 for kernel # %ld :  %d\n", j, error );
+                *err = error;
+            }
+            return error;
+        }
+
+        size_t work_size = TEST_SIZE;
+        size_t global_offset = 0;
+        error = clEnqueueNDRangeKernel( item->q, item->k[j], 1, &global_offset, &work_size, NULL, 0, NULL, NULL );
+        if( CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "FAILURE: Unable to enqueue kernel %ld: %d\n", j, error );
+                *err = error;
+            }
+            return error;
+        }
+
+        // Get the results back
+        mapped = (cl_int*) clEnqueueMapBuffer( item->q, item->m[1], CL_TRUE, CL_MAP_READ, 0, TEST_SIZE * sizeof( cl_int ), 0, NULL, NULL, &error );
+        if( NULL == mapped || CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "Failed to map buffer 1 for reading: %d\n", error );
+                *err = error;
+            }
+            return error;
+        }
+
+        // Get our input data so we can check against it
+        cl_int *inputData = (cl_int*) clEnqueueMapBuffer( item->q, item->m[0], CL_TRUE, CL_MAP_READ, 0, TEST_SIZE * sizeof( cl_int ), 0, NULL, NULL, &error );
+        if( NULL == mapped || CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "Failed to map buffer 0 for reading: %d\n", error );
+                *err = error;
+            }
+            return error;
+        }
+
+
+        //Verify the results
+        for( i = 0; i < TEST_SIZE; i++ )
+        {
+            int expected = sampleActions[j]( inputData[i] );
+            int result = mapped[i];
+            if( expected != result )
+            {
+                log_error( "FAILURE:  Sample data at position %ld does not match expected result: *0x%8.8x vs. 0x%8.8x\n", i, expected, result );
+                if( err )
+                    *err = -1;
+                return -1;
+            }
+        }
+
+        //Clean up
+        error = clEnqueueUnmapMemObject( item->q, item->m[0], inputData, 0, NULL, NULL );
+        if( CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "Failed to unmap buffer 0 for reading: %d\n", error );
+                *err = error;
+            }
+            return error;
+        }
+
+        error = clEnqueueUnmapMemObject( item->q, item->m[1], mapped, 0, NULL, NULL );
+        if( CL_SUCCESS != error )
+        {
+            if( err )
+            {
+                log_error( "Failed to unmap buffer 1 for reading: %d\n", error );
+                *err = error;
+            }
+            return error;
+        }
+
+    }
+
+    // Make sure that the last set of unmap calls get run
+    error = clFinish( item->q );
+    if( CL_SUCCESS != error )
+    {
+        if( err )
+        {
+            log_error( "Failed to clFinish: %d\n", error );
+            *err = error;
+        }
+        return error;
+    }
+
+    return CL_SUCCESS;
+}
+
+
+
+int test_multiple_contexts_same_device(cl_device_id deviceID, size_t maxCount, size_t minCount )
+{
+    size_t i, j;
+    cl_int err = CL_SUCCESS;
+
+    //Figure out how many of these we can make before the first failure
+    TestItem *list = NULL;
+
+    for( i = 0; i < maxCount; i++ )
+    {
+        // create a context and accompanying objects
+        TestItem *current = CreateTestItem( deviceID, NULL /*no error reporting*/ );
+        if( NULL == current )
+            break;
+
+        // Attempt to use it
+        cl_int failed = UseTestItem( current, NULL );
+
+        if( failed )
+        {
+            DestroyTestItem( current );
+            break;
+        }
+
+        // Add the successful test item to the list
+        current->next = list;
+        list = current;
+    }
+
+    // Check to make sure we made the minimum amount
+    if( i < minCount )
+    {
+        log_error( "FAILURE: only could make %ld of %ld contexts!\n", i, minCount );
+        err = -1;
+        goto exit;
+    }
+
+    // Report how many contexts we made
+    if( i == maxCount )
+        log_info( "Successfully created all %lu contexts.\n", i );
+    else
+        log_info( "Successfully created %lu contexts out of %lu\n", i, maxCount );
+
+    // Set the count to be the number we succesfully made
+    maxCount = i;
+
+    // Make sure we can do it again a few times
+    log_info( "Tring to do it 5 more times" );
+    fflush( stdout);
+    for( j = 0; j < 5; j++ )
+    {
+        //free all the contexts we already made
+        while( list )
+        {
+            TestItem *current = list;
+            list = list->next;
+            current->next = NULL;
+            DestroyTestItem( current );
+        }
+
+        // Attempt to make them again
+        for( i = 0; i < maxCount; i++ )
+        {
+            // create a context and accompanying objects
+            TestItem *current = CreateTestItem( deviceID, &err );
+            if( err )
+            {
+                log_error( "\nTest Failed with error at CreateTestItem: %d\n", err );
+                goto exit;
+            }
+
+            // Attempt to use it
+            cl_int failed = UseTestItem( current, &err );
+
+            if( failed || err )
+            {
+                DestroyTestItem( current );
+                log_error( "\nTest Failed with error at UseTestItem: %d\n", err );
+                goto exit;
+            }
+
+            // Add the successful test item to the list
+            current->next = list;
+            list = current;
+        }
+        log_info( "." );
+        fflush( stdout );
+    }
+
+    log_info( "Done.\n" );
+
+exit:
+    //free all the contexts we already made
+    while( list )
+    {
+        TestItem *current = list;
+        list = list->next;
+        current->next = NULL;
+
+        DestroyTestItem( current );
+    }
+
+    return err;
+}
+
+//  This test tests to make sure that your implementation isn't super leaky.  We make a bunch of contexts (up to some
+//  sane limit, currently 200), attempting to use each along the way. We keep track of how many we could make before
+//  a failure occurred.   We then free everything and attempt to go do it again a few times.  If you are able to make
+//  that many contexts 5 times over, then you pass.
+int test_multiple_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_multiple_contexts_same_device( deviceID, 200, 1 );
+}
+
+int test_two_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_multiple_contexts_same_device( deviceID, 2, 2 );
+}
+
+int test_three_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_multiple_contexts_same_device( deviceID, 3, 3 );
+}
+
+int test_four_contexts_same_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_multiple_contexts_same_device( deviceID, 4, 4 );
+}
+
diff --git a/test_conformance/multiple_device_context/test_multiple_devices.cpp b/test_conformance/multiple_device_context/test_multiple_devices.cpp
new file mode 100644
index 00000000..1766e669
--- /dev/null
+++ b/test_conformance/multiple_device_context/test_multiple_devices.cpp
@@ -0,0 +1,236 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/conversions.h"
+
+const char *test_kernels[] = {
+"__kernel void kernelA(__global int *dst)\n"
+"{\n"
+"\n"
+" dst[get_global_id(0)]*=3;\n"
+"\n"
+"}\n"
+"__kernel void kernelB(__global int *dst)\n"
+"{\n"
+"\n"
+" dst[get_global_id(0)]++;\n"
+"\n"
+"}\n"
+};
+
+#define TEST_SIZE    512
+#define MAX_DEVICES 8
+#define MAX_QUEUES 1000
+
+int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices, int num_elements)
+{
+    int error;
+    clContextWrapper context;
+    clProgramWrapper program;
+    clKernelWrapper kernels[2];
+    clMemWrapper      stream;
+    clCommandQueueWrapper queues[MAX_QUEUES];
+    size_t    threads[1], localThreads[1];
+    int data[TEST_SIZE];
+    int outputData[TEST_SIZE];
+    int expectedResults[TEST_SIZE];
+    int expectedResultsOneDevice[MAX_DEVICES][TEST_SIZE];
+    size_t i;
+
+  memset(queues, 0, sizeof(queues));
+
+    RandomSeed seed( gRandomSeed );
+
+    if (deviceCount > MAX_DEVICES) {
+       log_error("Number of devices in set (%ld) is greater than the number for which the test was written (%d).", deviceCount, MAX_DEVICES);
+    return -1;
+  }
+
+  if (queueCount > MAX_QUEUES) {
+       log_error("Number of queues (%ld) is greater than the number for which the test was written (%d).", queueCount, MAX_QUEUES);
+    return -1;
+  }
+
+  log_info("Testing with %ld queues on %ld devices, %ld kernel executions.\n", queueCount, deviceCount, queueCount*num_elements/TEST_SIZE);
+
+  for (i=0; i<deviceCount; i++) {
+    char deviceName[4096] = "";
+    error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
+    test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
+    log_info("Device %ld is \"%s\".\n", i, deviceName);
+  }
+
+    /* Create a context */
+    context = clCreateContext( NULL, (cl_uint)deviceCount, devices, notify_callback, NULL, &error );
+    test_error( error, "Unable to create testing context" );
+
+    /* Create our kernels (they all have the same arguments so we don't need multiple ones for each device) */
+  if( create_single_kernel_helper( context, &program, &kernels[0], 1, test_kernels, "kernelA" ) != 0 )
+  {
+    return -1;
+  }
+
+  kernels[1] = clCreateKernel(program, "kernelB", &error);
+  test_error(error, "clCreateKernel failed");
+
+
+    /* Now create I/O streams */
+  for( i = 0; i < TEST_SIZE; i++ )
+    data[i] = genrand_int32(seed);
+
+  stream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * TEST_SIZE, data, &error);
+    test_error( error, "Unable to create test array" );
+
+  // Update the expected results
+  for( i = 0; i < TEST_SIZE; i++ ) {
+    expectedResults[i] = data[i];
+    for (size_t j=0; j<deviceCount; j++)
+      expectedResultsOneDevice[j][i] = data[i];
+  }
+
+
+  // Set the arguments
+  error = clSetKernelArg( kernels[0], 0, sizeof( stream ), &stream);
+  test_error( error, "Unable to set kernel arguments" );
+  error = clSetKernelArg( kernels[1], 0, sizeof( stream ), &stream);
+  test_error( error, "Unable to set kernel arguments" );
+
+    /* Run the test */
+    threads[0] = (size_t)TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernels[0], threads[0], &localThreads[ 0 ] );
+    test_error( error, "Unable to calc work group size" );
+
+    /* Create work queues */
+    for( i = 0; i < queueCount; i++ )
+    {
+        queues[i] = clCreateCommandQueueWithProperties( context, devices[ i % deviceCount ], 0, &error );
+    if (error != CL_SUCCESS || queues[i] == NULL) {
+      log_info("Could not create queue[%d].\n", (int)i);
+      queueCount = i;
+      break;
+    }
+    }
+  log_info("Testing with %d queues.\n", (int)queueCount);
+
+    /* Enqueue executions */
+  for( int z = 0; z<num_elements/TEST_SIZE; z++) {
+    for( i = 0; i < queueCount; i++ )
+    {
+      // Randomly choose a kernel to execute.
+      int kernel_selection = (int)get_random_float(0, 2, seed);
+      error = clEnqueueNDRangeKernel( queues[ i ], kernels[ kernel_selection ], 1, NULL, threads, localThreads, 0, NULL, NULL );
+      test_error( error, "Kernel execution failed" );
+
+      // Update the expected results
+      for( int j = 0; j < TEST_SIZE; j++ ) {
+        expectedResults[j] = (kernel_selection) ? expectedResults[j]+1 : expectedResults[j]*3;
+        expectedResultsOneDevice[i % deviceCount][j] = (kernel_selection) ? expectedResultsOneDevice[i % deviceCount][j]+1 : expectedResultsOneDevice[i % deviceCount][j]*3;
+      }
+
+      // Force the queue to finish so the next one will be in sync
+      error = clFinish(queues[i]);
+      test_error( error, "clFinish failed");
+    }
+  }
+
+  /* Read results */
+  int errors = 0;
+  for (int q = 0; q<(int)queueCount; q++) {
+    error = clEnqueueReadBuffer( queues[ 0 ], stream, CL_TRUE, 0, sizeof(cl_int)*TEST_SIZE, (char *)outputData, 0, NULL, NULL );
+    test_error( error, "Unable to get result data set" );
+
+    int errorsThisTime = 0;
+    /* Verify all of the data now */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+      if( expectedResults[ i ] != outputData[ i ] )
+      {
+        log_error( "ERROR: Sample data did not verify for queue %d on device %ld (sample %d, expected %d, got %d)\n",
+                  q, q % deviceCount, (int)i, expectedResults[ i ], outputData[ i ] );
+        for (size_t j=0; j<deviceCount; j++) {
+          if (expectedResultsOneDevice[j][i] == outputData[i])
+            log_info("Sample consistent with only device %ld having modified the data.\n", j);
+        }
+        errorsThisTime++;
+        break;
+      }
+    }
+    if (errorsThisTime)
+      errors++;
+  }
+
+    /* All done now! */
+  if (errors)
+    return -1;
+    return 0;
+}
+
+int test_two_devices(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_platform_id platform;
+    cl_device_id devices[2];
+    int err;
+    cl_uint numDevices;
+
+    err = clGetPlatformIDs(1, &platform, NULL);
+    test_error( err, "Unable to get platform" );
+
+    /* Get some devices */
+    err = clGetDeviceIDs(platform,  CL_DEVICE_TYPE_ALL, 2, devices, &numDevices );
+    test_error( err, "Unable to get 2 devices" );
+
+    if( numDevices < 2 )
+    {
+        log_info( "WARNING: two device test unable to get two devices via CL_DEVICE_TYPE_ALL (got %d devices). Skipping test...\n", (int)numDevices );
+        return 0;
+    }
+  else if (numDevices > 2)
+  {
+    log_info("Note: got %d devices, using just the first two.\n", (int)numDevices);
+  }
+
+    /* Run test */
+    return test_device_set( 2, 2, devices, num_elements );
+}
+
+int test_max_devices(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_platform_id platform;
+    cl_device_id devices[MAX_DEVICES];
+    cl_uint deviceCount;
+    int err;
+
+    err = clGetPlatformIDs(1, &platform, NULL);
+    test_error( err, "Unable to get platform" );
+
+    /* Get some devices */
+    err = clGetDeviceIDs(platform,  CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &deviceCount );
+    test_error( err, "Unable to get multiple devices" );
+
+  log_info("Testing with %d devices.", deviceCount);
+
+    /* Run test */
+    return test_device_set( deviceCount, deviceCount, devices, num_elements );
+}
+
+int test_hundred_queues(cl_device_id device, cl_context contextIgnore, cl_command_queue queueIgnore, int num_elements)
+{
+  return test_device_set( 1, 100, &device, num_elements );
+}
+
diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt
new file mode 100644
index 00000000..69dc4adb
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(MODULE_NAME NON_UNIFORM_WORK_GROUP)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_advanced_2d.cpp
+    test_advanced_3d.cpp
+    test_advanced_other.cpp
+    test_basic.cpp
+    TestNonUniformWorkGroup.cpp
+    tools.cpp
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
+
+# end of file #
diff --git a/test_conformance/non_uniform_work_group/Makefile b/test_conformance/non_uniform_work_group/Makefile
new file mode 100644
index 00000000..3051c618
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+	test_basic.cpp \
+	test_advanced_2d.cpp \
+	test_advanced_3d.cpp \
+	test_advanced_other.cpp \
+	TestNonUniformWorkGroup.cpp \
+	tools.cpp \
+	../../test_common/harness/errorHelpers.c \
+	../../test_common/harness/threadTesting.c \
+	../../test_common/harness/testHarness.c \
+	../../test_common/harness/kernelHelpers.c \
+	../../test_common/harness/typeWrappers.cpp \
+
+DEFINES = DONT_TEST_GARBAGE_POINTERS
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+HEADERS = 
+TARGET = test_non_uniform_workgroup
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
new file mode 100644
index 00000000..381d29bc
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
@@ -0,0 +1,751 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "TestNonUniformWorkGroup.h"
+#include <vector>
+#include <sstream>
+#define NL "\n"
+
+size_t TestNonUniformWorkGroup::_maxLocalWorkgroupSize = 0;
+bool TestNonUniformWorkGroup::_strictMode = false;
+
+// Main Kernel source code
+static const char *KERNEL_FUNCTION =
+  NL "#define MAX_DIMS 3"
+  NL "typedef struct _DataContainerAttrib"
+  NL "{"
+  NL "    unsigned long get_global_size[MAX_DIMS];"
+  NL "    unsigned long get_global_offset[MAX_DIMS];"
+  NL "    unsigned long get_local_size[MAX_DIMS];"
+  NL "    unsigned long get_enqueued_local_size[MAX_DIMS];"
+  NL "    unsigned long get_global_id[MAX_DIMS];"
+  NL "    unsigned long get_local_id[MAX_DIMS];"
+  NL "    unsigned long get_group_id[MAX_DIMS];"
+  NL "    unsigned long get_num_groups[MAX_DIMS];"
+  NL "    unsigned long get_work_dim;"
+  NL "    unsigned short test_local_barrier_result_bool;"
+  NL "    unsigned short test_global_barrier_result_bool;"
+  NL "    unsigned short test_local_atomic_result_value;"
+  NL "}DataContainerAttrib;"
+
+  NL "enum Error{"
+  NL "  ERR_GLOBAL_SIZE=0,"
+  NL "  ERR_GLOBAL_WORK_OFFSET,"
+  NL "  ERR_LOCAL_SIZE,"
+  NL "  ERR_GLOBAL_ID,"
+  NL "  ERR_LOCAL_ID,"
+  NL "  ERR_ENQUEUED_LOCAL_SIZE,"
+  NL "  ERR_NUM_GROUPS,"
+  NL "  ERR_GROUP_ID,"
+  NL "  ERR_WORK_DIM,"
+  NL "  ERR_GLOBAL_BARRIER,"
+  NL "  ERR_LOCAL_BARRIER,"
+  NL "  ERR_GLOBAL_ATOMIC,"
+  NL "  ERR_LOCAL_ATOMIC,"
+  NL "  ERR_STRICT_MODE,"
+  NL "  ERR_BUILD_STATUS,"
+  NL "  ERR_UNKNOWN,"
+  NL "  ERR_DIFFERENT,"
+  NL "  _LAST_ELEM"
+  NL "};"
+
+  NL "uint getGlobalIndex (uint gid2, uint gid1, uint gid0) {"
+  NL "    return gid2*get_global_size(0)*get_global_size(1) + gid1*get_global_size(0) + gid0;"
+  NL "}"
+
+  NL "int getRegionIndex () {"
+  NL "    uint gid0 = get_global_id(0) - get_global_offset(0);"
+  NL "    uint gid1 = get_global_id(1) - get_global_offset(1);"
+  NL "    uint gid2 = get_global_id(2) - get_global_offset(2);"
+  NL "    if (gid0 == 0 && gid1 == 0 && gid2 == 0) {"
+  NL "      return 0;"
+  NL "    } else if (gid0 == get_global_size(0) - 1 && gid1 == 0 && gid2 == 0) {"
+  NL "      return 1;"
+  NL "    } else if (gid0 == 0 && gid1 == get_global_size(1) - 1 && gid2 == 0) {"
+  NL "      return 2;"
+  NL "    } else if (gid0 == get_global_size(0) - 1 && gid1 == get_global_size(1) - 1 && gid2 == 0) {"
+  NL "      return 3;"
+  NL "    } else if (gid0 == 0 && gid1 == 0 && gid2 == get_global_size(2) - 1) {"
+  NL "      return 4;"
+  NL "    } else if (gid0 == get_global_size(0) - 1 && gid1 == 0 && gid2 == get_global_size(2) - 1) {"
+  NL "      return 5;"
+  NL "    } else if (gid0 == 0 && gid1 == get_global_size(1) - 1 && gid2 == get_global_size(2) - 1) {"
+  NL "      return 6;"
+  NL "    } else if (gid0 == get_global_size(0) - 1 && gid1 == get_global_size(1) - 1 && gid2 == get_global_size(2) - 1) {"
+  NL "      return 7;"
+  NL "    }"
+  NL "    return -1;"
+  NL "}"
+
+  NL "void getLocalSize(__global DataContainerAttrib *results) {"
+  NL "  for (unsigned short i = 0; i < MAX_DIMS; i++) {"
+  NL "    results->get_local_size[i] = get_local_size(i);"
+  NL "  }"
+  NL "}"
+
+  NL "#ifdef TESTBASIC"
+  // values set by this function will be checked on the host side
+  NL "void testBasicHost(__global DataContainerAttrib *results) {"
+  NL "    for (unsigned short i = 0; i < MAX_DIMS; i++) {"
+  NL "      results->get_global_size[i] = get_global_size(i);"
+  NL "      results->get_global_offset[i] = get_global_offset(i);"
+  NL "      results->get_enqueued_local_size[i] = get_enqueued_local_size(i);"
+  NL "      results->get_global_id[i] = get_global_id(i);"
+  NL "      results->get_local_id[i] = get_local_id(i);"
+  NL "      results->get_group_id[i] = get_group_id(i);"
+  NL "      results->get_num_groups[i] = get_num_groups(i);"
+  NL "    }"
+  NL "    results->get_work_dim = get_work_dim();"
+  NL "}"
+  // values set by this function are checked on the kernel side
+  NL "void testBasicKernel(__global unsigned int *errorCounterBuffer, __local DataContainerAttrib *resultsForThread0) {"
+  NL "  uint lid0 = get_local_id(0);"
+  NL "  uint lid1 = get_local_id(1);"
+  NL "  uint lid2 = get_local_id(2);"
+  NL "  if (lid0 == 0 && lid1 == 0 && lid2 == 0) {"
+  NL "    for (unsigned short i = 0; i < MAX_DIMS; i++) {"
+  NL "      resultsForThread0->get_global_size[i] = get_global_size(i);"
+  NL "      resultsForThread0->get_global_offset[i] = get_global_offset(i);"
+  NL "      resultsForThread0->get_enqueued_local_size[i] = get_enqueued_local_size(i);"
+  NL "      resultsForThread0->get_group_id[i] = get_group_id(i);"
+  NL "      resultsForThread0->get_num_groups[i] = get_num_groups(i);"
+  NL "    }"
+  NL "    resultsForThread0->get_work_dim = get_work_dim();"
+  NL "  }"
+  NL "    barrier(CLK_LOCAL_MEM_FENCE);"
+  // verifies built in functions on the kernel side
+  NL "  if (lid0 != 0 || lid1 != 0 || lid2 != 0) {"
+  NL "    for (unsigned short i = 0; i < MAX_DIMS; i++) {"
+  NL "      if (resultsForThread0->get_global_size[i] != get_global_size(i)) {"
+  NL "        atomic_inc(&errorCounterBuffer[ERR_GLOBAL_SIZE]);"
+  NL "      }"
+  NL "      if (resultsForThread0->get_global_offset[i] != get_global_offset(i)) {"
+  NL "        atomic_inc(&errorCounterBuffer[ERR_GLOBAL_WORK_OFFSET]);"
+  NL "      }"
+  NL "      if (resultsForThread0->get_enqueued_local_size[i] != get_enqueued_local_size(i)) {"
+  NL "        atomic_inc(&errorCounterBuffer[ERR_ENQUEUED_LOCAL_SIZE]);"
+  NL "      }"
+  NL "      if (resultsForThread0->get_group_id[i] != get_group_id(i)) {"
+  NL "        atomic_inc(&errorCounterBuffer[ERR_GROUP_ID]);"
+  NL "      }"
+  NL "      if (resultsForThread0->get_num_groups[i] != get_num_groups(i)) {"
+  NL "        atomic_inc(&errorCounterBuffer[ERR_NUM_GROUPS]);"
+  NL "      }"
+  NL "    }"
+  NL "    if (resultsForThread0->get_work_dim != get_work_dim()) {"
+  NL "      atomic_inc(&errorCounterBuffer[ERR_WORK_DIM]);"
+  NL "    }"
+  NL "  }"
+  NL "}"
+  NL "#endif"
+
+  NL "#ifdef TESTBARRIERS"
+  NL "void testBarriers(__global unsigned int *errorCounterBuffer, __local unsigned int *testLocalBuffer, __global unsigned int *testGlobalBuffer) {"
+  NL "    uint gid0 = get_global_id(0);"
+  NL "    uint gid1 = get_global_id(1);"
+  NL "    uint gid2 = get_global_id(2);"
+  NL "    uint lid0 = get_local_id(0);"
+  NL "    uint lid1 = get_local_id(1);"
+  NL "    uint lid2 = get_local_id(2);"
+  NL
+  NL "    uint globalIndex = getGlobalIndex(gid2-get_global_offset(2), gid1-get_global_offset(1), gid0-get_global_offset(0));"
+  NL "    uint localIndex = lid2*get_local_size(0)*get_local_size(1) + lid1*get_local_size(0) + lid0;"
+  NL "    testLocalBuffer[localIndex] = 0;"
+  NL "    testGlobalBuffer[globalIndex] = 0;"
+  NL "    uint maxLocalIndex = get_local_size(0)*get_local_size(1)*get_local_size(2)-1;"
+  NL "    uint nextLocalIndex = (localIndex>=maxLocalIndex)?0:(localIndex+1);"
+  NL "    uint next_lid0 = (lid0+1>=get_local_size(0))?0:lid0+1;"
+  NL "    uint next_lid1 = (lid1+1>=get_local_size(1))?0:lid1+1;"
+  NL "    uint next_lid2 = (lid2+1>=get_local_size(2))?0:lid2+1;"
+  NL "    uint nextGlobalIndexInLocalWorkGroup = getGlobalIndex (get_group_id(2)*get_enqueued_local_size(2)+next_lid2, get_group_id(1)*get_enqueued_local_size(1)+next_lid1, get_group_id(0)*get_enqueued_local_size(0)+next_lid0);"
+  // testing local barriers
+  NL "    testLocalBuffer[localIndex] = localIndex;"
+  NL "    barrier(CLK_LOCAL_MEM_FENCE);"
+  NL "    uint temp = testLocalBuffer[nextLocalIndex];"
+  NL "    if (temp != nextLocalIndex) {"
+  NL "      atomic_inc(&errorCounterBuffer[ERR_LOCAL_BARRIER]);"
+  NL "    }"
+  // testing global barriers
+  NL "    testGlobalBuffer[globalIndex] = globalIndex;"
+  NL "    barrier(CLK_GLOBAL_MEM_FENCE);"
+  NL "    uint temp2 = testGlobalBuffer[nextGlobalIndexInLocalWorkGroup];"
+  NL "    if (temp2 != nextGlobalIndexInLocalWorkGroup) {"
+  NL "      atomic_inc(&errorCounterBuffer[ERR_GLOBAL_BARRIER]);"
+  NL "    }"
+  NL "}"
+  NL "#endif"
+
+  NL "#ifdef TESTATOMICS"
+  NL "void testAtomics(__global unsigned int *globalAtomicTestVariable, __local unsigned int *localAtomicTestVariable) {"
+  NL "    uint gid0 = get_global_id(0);"
+  NL "    uint gid1 = get_global_id(1);"
+  NL "    uint gid2 = get_global_id(2);"
+  NL
+  NL "    uint globalIndex = getGlobalIndex(gid2-get_global_offset(2), gid1-get_global_offset(1), gid0-get_global_offset(0));"
+  // testing atomic function on local memory
+  NL "    atomic_inc(localAtomicTestVariable);"
+  NL "    barrier(CLK_LOCAL_MEM_FENCE);"
+  // testing atomic function on global memory
+  NL "    atomic_inc(globalAtomicTestVariable);"
+  NL "}"
+  NL "#endif"
+
+  NL "#ifdef RWGSX"
+  NL "#ifdef RWGSY"
+  NL "#ifdef RWGSZ"
+  NL "__attribute__((reqd_work_group_size(RWGSX, RWGSY, RWGSZ)))"
+  NL "#endif"
+  NL "#endif"
+  NL "#endif"
+  NL "__kernel void testKernel(__global DataContainerAttrib *results, __local unsigned int *testLocalBuffer,"
+  NL "      __global unsigned int *testGlobalBuffer, __global unsigned int *globalAtomicTestVariable, __global unsigned int *errorCounterBuffer) {"
+  NL "    uint gid0 = get_global_id(0);"
+  NL "    uint gid1 = get_global_id(1);"
+  NL "    uint gid2 = get_global_id(2);"
+  NL
+  NL "    uint globalIndex = getGlobalIndex(gid2-get_global_offset(2), gid1-get_global_offset(1), gid0-get_global_offset(0));"
+  NL "    int regionIndex = getRegionIndex();"
+  NL "    if (regionIndex >= 0) {"
+  NL "      getLocalSize(&results[regionIndex]);"
+  NL "    }"
+  NL "#ifdef TESTBASIC"
+  NL "    if (regionIndex >= 0) {"
+  NL "      testBasicHost(&results[regionIndex]);"
+  NL "    }"
+  NL "    __local DataContainerAttrib resultsForThread0;"
+  NL "    testBasicKernel(errorCounterBuffer, &resultsForThread0);"
+  NL "#endif"
+  NL "#ifdef TESTBARRIERS"
+  NL "    testBarriers(errorCounterBuffer, testLocalBuffer, testGlobalBuffer);"
+  NL "#endif"
+  NL "#ifdef TESTATOMICS"
+  NL "    __local unsigned int localAtomicTestVariable;"
+  NL "    localAtomicTestVariable = 0;"
+  NL "    barrier(CLK_LOCAL_MEM_FENCE);"
+  NL "    testAtomics(globalAtomicTestVariable, &localAtomicTestVariable);"
+  NL "    barrier(CLK_LOCAL_MEM_FENCE);"
+  NL "    if (localAtomicTestVariable != get_local_size(0) * get_local_size(1) * get_local_size(2)) {"
+  NL "      atomic_inc(&errorCounterBuffer[ERR_LOCAL_ATOMIC]);"
+  NL "    }"
+  NL "#endif"
+  NL "}"
+  NL ;
+
+TestNonUniformWorkGroup::TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
+  const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize, const size_t *localSize, const size_t *buffersSize,
+  const size_t *globalWorkOffset, const size_t *reqdWorkGroupSize)
+  : _device(device), _context(context), _queue(queue), _dims (dims) {
+
+  if (globalSize == NULL || dims < 1 || dims > 3) {
+    //throw std::invalid_argument("globalSize is NULL value.");
+    // This is method of informing that parameters are wrong.
+    // It would be checked by prepareDevice() function.
+    // This is used because of lack of exception support.
+    _globalSize[0] = 0;
+    return;
+  }
+
+  cl_uint i;
+  _globalWorkOffset_IsNull = true;
+  _localSize_IsNull = true;
+
+  setGlobalWorkgroupSize(globalSize);
+  setLocalWorkgroupSize(globalSize,localSize);
+  for (i = _dims; i < MAX_DIMS; i++) {
+    _globalSize[i] = 1;
+  }
+
+  for (i = 0; i < MAX_DIMS; i++) {
+    _globalWorkOffset[i] = 0;
+  }
+
+  if (globalWorkOffset) {
+    _globalWorkOffset_IsNull = false;
+    for (i = 0; i < _dims; i++) {
+      _globalWorkOffset[i] = globalWorkOffset[i];
+    }
+  }
+
+  for (i = 0; i < MAX_DIMS; i++) {
+    _enqueuedLocalSize[i] = 1;
+  }
+
+  if (localSize) {
+    _localSize_IsNull = false;
+    for (i = 0; i < _dims; i++) {
+      _enqueuedLocalSize[i] = _localSize[i];
+    }
+  }
+
+  if (reqdWorkGroupSize) {
+    for (i = 0; i < _dims; i++) {
+      _reqdWorkGroupSize[i] = reqdWorkGroupSize[i];
+    }
+    for (i = _dims; i < MAX_DIMS; i++) {
+      _reqdWorkGroupSize[i] = 1;
+    }
+  } else {
+    _reqdWorkGroupSize[0] = 0;
+    _reqdWorkGroupSize[1] = 0;
+    _reqdWorkGroupSize[2] = 0;
+  }
+
+  _testRange = Range::ALL;
+
+  _numOfGlobalWorkItems = _globalSize[0]*_globalSize[1]*_globalSize[2];
+
+  DataContainerAttrib temp = {{0, 0, 0}};
+
+  // array with results from each region
+  _resultsRegionArray.resize(NUMBER_OF_REGIONS, temp);
+  _referenceRegionArray.resize(NUMBER_OF_REGIONS, temp);
+
+}
+
+TestNonUniformWorkGroup::~TestNonUniformWorkGroup () {
+  if (_err.checkError()) {
+    _err.showStats();
+  }
+}
+
+void TestNonUniformWorkGroup::setLocalWorkgroupSize (const size_t *globalSize, const size_t *localSize)
+{
+   cl_uint i;
+   // Enforce localSize should not exceed globalSize
+   if (localSize) {
+       for (i = 0; i < _dims; i++) {
+           if ((globalSize[i] < localSize[i])) {
+               _localSize[i] = globalSize[i];
+           }else{
+               _localSize[i] = localSize[i];
+           }
+      }
+   }
+}
+
+void TestNonUniformWorkGroup::setGlobalWorkgroupSize (const size_t *globalSize)
+{
+   cl_uint i;
+   for (i = 0; i < _dims; i++) {
+       _globalSize[i] = globalSize[i];
+   }
+}
+
+void TestNonUniformWorkGroup::verifyData (DataContainerAttrib * reference, DataContainerAttrib * results, short regionNumber) {
+
+  std::ostringstream tmp;
+  std::string errorLocation;
+
+  if (_testRange & Range::BASIC) {
+    for (unsigned short i = 0; i < MAX_DIMS; i++) {
+      tmp.str("");
+      tmp.clear();
+      tmp << "region number: " << regionNumber << " for dim: " << i;
+      errorLocation = tmp.str();
+
+      if (results->get_global_size[i] != reference->get_global_size[i]) {
+        _err.show(Error::ERR_GLOBAL_SIZE, errorLocation, results->get_global_size[i], reference->get_global_size[i]);
+      }
+
+      if (results->get_global_offset[i] != reference->get_global_offset[i]) {
+        _err.show(Error::ERR_GLOBAL_WORK_OFFSET, errorLocation, results->get_global_offset[i], reference->get_global_offset[i]);
+      }
+
+      if (results->get_local_size[i] != reference->get_local_size[i] || results->get_local_size[i] > _maxWorkItemSizes[i]) {
+        _err.show(Error::ERR_LOCAL_SIZE, errorLocation, results->get_local_size[i], reference->get_local_size[i]);
+      }
+
+      if (results->get_enqueued_local_size[i] != reference->get_enqueued_local_size[i] || results->get_enqueued_local_size[i] > _maxWorkItemSizes[i]) {
+        _err.show(Error::ERR_ENQUEUED_LOCAL_SIZE, errorLocation, results->get_enqueued_local_size[i], reference->get_enqueued_local_size[i]);
+      }
+
+      if (results->get_num_groups[i] != reference->get_num_groups[i]) {
+        _err.show(Error::ERR_NUM_GROUPS, errorLocation, results->get_num_groups[i], reference->get_num_groups[i]);
+      }
+    }
+  }
+
+  tmp.str("");
+  tmp.clear();
+  tmp << "region number: " << regionNumber;
+  errorLocation = tmp.str();
+  if (_testRange & Range::BASIC) {
+    if (results->get_work_dim != reference->get_work_dim) {
+      _err.show(Error::ERR_WORK_DIM, errorLocation, results->get_work_dim, reference->get_work_dim);
+    }
+  }
+}
+
+void TestNonUniformWorkGroup::calculateExpectedValues () {
+  size_t nonRemainderGlobalSize[MAX_DIMS];
+  size_t numberOfPossibleRegions[MAX_DIMS];
+
+  nonRemainderGlobalSize[0] = _globalSize[0] - (_globalSize[0] % _enqueuedLocalSize[0]);
+  nonRemainderGlobalSize[1] = _globalSize[1] - (_globalSize[1] % _enqueuedLocalSize[1]);
+  nonRemainderGlobalSize[2] = _globalSize[2] - (_globalSize[2] % _enqueuedLocalSize[2]);
+
+  numberOfPossibleRegions[0] = (_globalSize[0]>1)?2:1;
+  numberOfPossibleRegions[1] = (_globalSize[1]>1)?2:1;
+  numberOfPossibleRegions[2] = (_globalSize[2]>1)?2:1;
+
+  for (cl_ushort i = 0; i < NUMBER_OF_REGIONS; ++i) {
+
+    if (i & 0x01 && numberOfPossibleRegions[0] == 1) {
+      continue;
+    }
+
+    if (i & 0x02 && numberOfPossibleRegions[1] == 1) {
+      continue;
+    }
+
+    if (i & 0x04 && numberOfPossibleRegions[2] == 1) {
+      continue;
+    }
+
+    for (cl_ushort dim = 0; dim < MAX_DIMS; ++dim) {
+      _referenceRegionArray[i].get_global_size[dim] = static_cast<unsigned long>(_globalSize[dim]);
+      _referenceRegionArray[i].get_global_offset[dim] = static_cast<unsigned long>(_globalWorkOffset[dim]);
+      _referenceRegionArray[i].get_enqueued_local_size[dim] = static_cast<unsigned long>(_enqueuedLocalSize[dim]);
+      _referenceRegionArray[i].get_local_size[dim] = static_cast<unsigned long>(_enqueuedLocalSize[dim]);
+      _referenceRegionArray[i].get_num_groups[dim] = static_cast<unsigned long>(ceil(static_cast<float>(_globalSize[dim]) / _enqueuedLocalSize[dim]));
+    }
+    _referenceRegionArray[i].get_work_dim = _dims;
+
+    if (i & 0x01) {
+      _referenceRegionArray[i].get_local_size[0] = static_cast<unsigned long>((_globalSize[0] - 1) % _enqueuedLocalSize[0] + 1);
+    }
+
+    if (i & 0x02) {
+      _referenceRegionArray[i].get_local_size[1] = static_cast<unsigned long>((_globalSize[1] - 1) % _enqueuedLocalSize[1] + 1);
+    }
+
+    if (i & 0x04) {
+      _referenceRegionArray[i].get_local_size[2] = static_cast<unsigned long>((_globalSize[2] - 1) % _enqueuedLocalSize[2] + 1);
+    }
+  }
+}
+
+size_t TestNonUniformWorkGroup::getMaxLocalWorkgroupSize (const cl_device_id &device) {
+  int err;
+
+  if (TestNonUniformWorkGroup::_maxLocalWorkgroupSize == 0) {
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+      sizeof(TestNonUniformWorkGroup::_maxLocalWorkgroupSize), &TestNonUniformWorkGroup::_maxLocalWorkgroupSize, NULL);
+  }
+
+  return TestNonUniformWorkGroup::_maxLocalWorkgroupSize;
+}
+
+void TestNonUniformWorkGroup::enableStrictMode(bool state) {
+  TestNonUniformWorkGroup::_strictMode = state;
+}
+
+int TestNonUniformWorkGroup::prepareDevice () {
+  int err;
+  cl_uint device_max_dimensions;
+
+  if (_globalSize[0] == 0)
+  {
+    log_error("Some arguments passed into constructor were wrong.\n");
+    return -1;
+  }
+
+  if(_localSize_IsNull == false)
+    calculateExpectedValues();
+
+  err = clGetDeviceInfo(_device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+    sizeof(device_max_dimensions), &device_max_dimensions, NULL);
+  test_error(err, "clGetDeviceInfo failed");
+
+  err = clGetDeviceInfo(_device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+    sizeof(_maxWorkItemSizes), _maxWorkItemSizes, NULL);
+
+  test_error(err, "clGetDeviceInfo failed");
+
+  std::string buildOptions = BUILD_CL_STD_2_0;
+  if(_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
+    std::ostringstream tmp(" ");
+    tmp << " -D RWGSX=" << _reqdWorkGroupSize[0]
+      << " -D RWGSY=" << _reqdWorkGroupSize[1]
+      << " -D RWGSZ=" << _reqdWorkGroupSize[2] << " ";
+      buildOptions += tmp.str();
+  }
+
+  if (_testRange & Range::BASIC)
+    buildOptions += " -D TESTBASIC";
+  if (_testRange & Range::ATOMICS)
+    buildOptions += " -D TESTATOMICS";
+  if (_testRange & Range::BARRIERS)
+    buildOptions += " -D TESTBARRIERS";
+
+  err = create_single_kernel_helper_with_build_options (_context, &_program, &_testKernel, 1,
+    &KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
+  if (err)
+  {
+    log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
+    return -1;
+  }
+
+  return 0;
+}
+
+int TestNonUniformWorkGroup::verifyResults () {
+  if (_localSize_IsNull) {
+    // for global work groups where local work group size is not defined (set to NULL in clEnqueueNDRangeKernel)
+    // we need to check what optimal size was chosen by device
+    // we assumed that local size value for work item 0 is right for the rest work items
+    _enqueuedLocalSize[0] = static_cast<size_t>(_resultsRegionArray[0].get_local_size[0]);
+    _enqueuedLocalSize[1] = static_cast<size_t>(_resultsRegionArray[0].get_local_size[1]);
+    _enqueuedLocalSize[2] = static_cast<size_t>(_resultsRegionArray[0].get_local_size[2]);
+    calculateExpectedValues();
+
+    // strict mode verification
+    if(_strictMode) {
+      size_t localWorkGroupSize = _enqueuedLocalSize[0]*_enqueuedLocalSize[1]*_enqueuedLocalSize[2];
+      if (localWorkGroupSize != TestNonUniformWorkGroup::getMaxLocalWorkgroupSize(_device))
+          _err.show(Error::ERR_STRICT_MODE, "",localWorkGroupSize, TestNonUniformWorkGroup::getMaxLocalWorkgroupSize(_device));
+    }
+
+    log_info ("Local work group size calculated by driver: %s\n", showArray(_enqueuedLocalSize, _dims).c_str());
+ }
+
+  for (cl_ushort i = 0; i < NUMBER_OF_REGIONS; ++i) {
+    verifyData(&_referenceRegionArray[i], &_resultsRegionArray[i], i);
+  }
+
+  if (_testRange & Range::ATOMICS) {
+    if (_globalAtomicTestValue != _numOfGlobalWorkItems) {
+      _err.show(Error::ERR_GLOBAL_ATOMIC);
+    }
+  }
+
+  if (_err.checkError())
+    return -1;
+
+  return 0;
+}
+
+std::string showArray (const size_t *arr, cl_uint dims) {
+  std::ostringstream tmpStringStream ("");
+
+  tmpStringStream << "{";
+  for (cl_uint i=0; i < dims; i++) {
+    tmpStringStream << arr[i];
+    if (i+1 < dims)
+      tmpStringStream << ", ";
+  }
+  tmpStringStream << "}";
+
+  return tmpStringStream.str();
+}
+
+void TestNonUniformWorkGroup::showTestInfo () {
+  std::string tmpString;
+  log_info ("T E S T  P A R A M E T E R S :\n");
+  log_info ("\tNumber of dimensions:\t%d\n", _dims);
+
+  tmpString = showArray(_globalSize, _dims);
+
+  log_info("\tGlobal work group size:\t%s\n", tmpString.c_str());
+
+  if (!_localSize_IsNull) {
+    tmpString = showArray(_enqueuedLocalSize, _dims);
+  } else {
+    tmpString = "NULL";
+  }
+  log_info("\tLocal work group size:\t%s\n", tmpString.c_str());
+
+  if (!_globalWorkOffset_IsNull) {
+    tmpString = showArray(_globalWorkOffset, _dims);
+  } else {
+    tmpString = "NULL";
+  }
+  log_info("\tGlobal work group offset:\t%s\n", tmpString.c_str());
+
+  if (_reqdWorkGroupSize[0] != 0 && _reqdWorkGroupSize[1] != 0 && _reqdWorkGroupSize[2] != 0) {
+    tmpString = showArray(_reqdWorkGroupSize, _dims);
+  } else {
+    tmpString = "attribute disabled";
+  }
+  log_info ("\treqd_work_group_size attribute:\t%s\n", tmpString.c_str());
+
+  tmpString = "";
+  if(_testRange & Range::BASIC)
+     tmpString += "basic";
+  if(_testRange & Range::ATOMICS) {
+    if(tmpString != "") tmpString += ", ";
+    tmpString += "atomics";
+  }
+  if(_testRange & Range::BARRIERS) {
+    if(tmpString != "") tmpString += ", ";
+    tmpString += "barriers";
+  }
+  log_info ("\tTest range:\t%s\n", tmpString.c_str());
+  if(_strictMode) {
+    log_info ("\tStrict mode:\tON\n");
+    if (!_localSize_IsNull) {
+      log_info ("\tATTENTION: strict mode applies only NULL local work group size\n");
+    } else {
+      log_info ("\t\tExpected value of local work group size is %ld.\n",
+        TestNonUniformWorkGroup::getMaxLocalWorkgroupSize(_device));
+    }
+
+  }
+}
+
+int TestNonUniformWorkGroup::runKernel () {
+  int err;
+
+  // TEST INFO
+  showTestInfo();
+
+  size_t localArraySize = (_localSize_IsNull)?TestNonUniformWorkGroup::getMaxLocalWorkgroupSize(_device):(_enqueuedLocalSize[0]*_enqueuedLocalSize[1]*_enqueuedLocalSize[2]);
+  clMemWrapper resultsRegionArray = clCreateBuffer(_context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, _resultsRegionArray.size() * sizeof(DataContainerAttrib), &_resultsRegionArray.front(), &err);
+  test_error(err, "clCreateBuffer failed");
+
+  size_t *localSizePtr = (_localSize_IsNull)?NULL:_enqueuedLocalSize;
+  size_t *globalWorkOffsetPtr = (_globalWorkOffset_IsNull)?NULL:_globalWorkOffset;
+
+  err = clSetKernelArg(_testKernel, 0, sizeof(resultsRegionArray), &resultsRegionArray);
+  test_error(err, "clSetKernelArg failed");
+
+  //creating local buffer
+  err = clSetKernelArg(_testKernel, 1, localArraySize*sizeof(unsigned int), NULL);
+  test_error(err, "clSetKernelArg failed");
+
+  clMemWrapper testGlobalArray = clCreateBuffer(_context, CL_MEM_READ_WRITE, _numOfGlobalWorkItems*sizeof(cl_uint), NULL, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  err = clSetKernelArg(_testKernel, 2, sizeof(testGlobalArray), &testGlobalArray);
+  test_error(err, "clSetKernelArg failed");
+
+  _globalAtomicTestValue = 0;
+  clMemWrapper globalAtomicTestVariable = clCreateBuffer(_context, (CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(_globalAtomicTestValue), &_globalAtomicTestValue, &err);
+  test_error(err, "clCreateBuffer failed");
+
+  err = clSetKernelArg(_testKernel, 3, sizeof(globalAtomicTestVariable), &globalAtomicTestVariable);
+  test_error(err, "clSetKernelArg failed");
+
+  clMemWrapper errorArray = clCreateBuffer(_context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, _err.errorArrayCounterSize(), _err.errorArrayCounter(), &err);
+  test_error(err, "clCreateBuffer failed");
+
+  err = clSetKernelArg(_testKernel, 4, sizeof(errorArray), &errorArray);
+  test_error(err, "clSetKernelArg failed");
+
+  err = clEnqueueNDRangeKernel(_queue, _testKernel, _dims, globalWorkOffsetPtr, _globalSize,
+    localSizePtr, 0, NULL, NULL);
+  test_error(err, "clEnqueueNDRangeKernel failed");
+
+
+  err = clFinish(_queue);
+  test_error(err, "clFinish failed");
+
+  err = clEnqueueReadBuffer(_queue, globalAtomicTestVariable, CL_TRUE, 0, sizeof(unsigned int), &_globalAtomicTestValue, 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  if (_err.checkError()) {
+    return -1;
+  }
+
+  // synchronization of main buffer
+  err = clEnqueueReadBuffer(_queue, resultsRegionArray, CL_TRUE, 0, _resultsRegionArray.size() * sizeof(DataContainerAttrib), &_resultsRegionArray.front(), 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+
+  err = clEnqueueReadBuffer(_queue, errorArray, CL_TRUE, 0, _err.errorArrayCounterSize(), _err.errorArrayCounter(), 0, NULL, NULL);
+  test_error(err, "clEnqueueReadBuffer failed");
+  // Synchronization of errors occurred in kernel into general error stats
+  _err.synchronizeStatsMap();
+
+  return 0;
+}
+
+void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
+  const size_t *localSize, int range) {
+  runTestNonUniformWorkGroup (dims, globalSize, localSize, NULL, NULL, range);
+}
+
+void SubTestExecutor::runTestNonUniformWorkGroup(const cl_uint dims, const size_t *globalSize,
+  const size_t *localSize, const size_t *globalWorkOffset,
+  const size_t *reqdWorkGroupSize, int range) {
+
+
+  int err;
+  ++_overallCounter;
+  TestNonUniformWorkGroup test (_device, _context, _queue, dims, globalSize, localSize,
+    NULL, globalWorkOffset, reqdWorkGroupSize);
+
+  test.setTestRange(range);
+  err = test.prepareDevice();
+  if (err) {
+    log_error ("Error: prepare device\n");
+    ++_failCounter;
+    return;
+  }
+
+  err = test.runKernel();
+  if (err) {
+    log_error ("Error: run kernel\n");
+    ++_failCounter;
+    return;
+  }
+
+  err = test.verifyResults();
+  if (err) {
+    log_error ("Error: verify results\n");
+    ++_failCounter;
+    return;
+  }
+}
+
+int SubTestExecutor::calculateWorkGroupSize(size_t &maxWgSize, int testRange) {
+  int err;
+
+  clProgramWrapper program;
+  clKernelWrapper testKernel;
+  std::string buildOptions = BUILD_CL_STD_2_0;
+
+  if (testRange & Range::BASIC)
+    buildOptions += " -D TESTBASIC";
+  if (testRange & Range::ATOMICS)
+    buildOptions += " -D TESTATOMICS";
+  if (testRange & Range::BARRIERS)
+    buildOptions += " -D TESTBARRIERS";
+
+  err = create_single_kernel_helper_with_build_options (_context, &program, &testKernel, 1,
+    &KERNEL_FUNCTION, "testKernel", buildOptions.c_str());
+  if (err)
+  {
+    log_error("Error %d in line: %d of file %s\n", err, __LINE__, __FILE__);
+    return err;
+  }
+
+  err = clGetKernelWorkGroupInfo (testKernel, _device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(maxWgSize), &maxWgSize, NULL);
+  test_error(err, "clGetKernelWorkGroupInfo failed");
+
+  TestNonUniformWorkGroup::setMaxLocalWorkgroupSize(maxWgSize);
+
+  return 0;
+}
+
+int SubTestExecutor::status() {
+
+  if (_failCounter>0) {
+    log_error ("%d subtest(s) (of %d) failed\n", _failCounter, _overallCounter);
+    return -1;
+  } else {
+    log_info ("All %d subtest(s) passed\n", _overallCounter);
+    return 0;
+  }
+}
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
new file mode 100644
index 00000000..e7f261b5
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _TESTNONUNIFORMWORKGROUP_H
+#define _TESTNONUNIFORMWORKGROUP_H
+
+#include "procs.h"
+#include <vector>
+#include "tools.h"
+#include <algorithm>
+
+#define MAX_SIZE_OF_ALLOCATED_MEMORY (400*1024*1024)
+
+#define NUMBER_OF_REGIONS 8
+
+#define BUILD_CL_STD_2_0 "-cl-std=CL2.0"
+
+#define MAX_DIMS 3
+
+// This structure reflects data received from kernel.
+typedef struct _DataContainerAttrib
+{
+  cl_ulong get_global_size[MAX_DIMS];
+  cl_ulong get_global_offset[MAX_DIMS];
+  cl_ulong get_local_size[MAX_DIMS];
+  cl_ulong get_enqueued_local_size[MAX_DIMS];
+  cl_ulong get_global_id[MAX_DIMS];
+  cl_ulong get_local_id[MAX_DIMS];
+  cl_ulong get_group_id[MAX_DIMS];
+  cl_ulong get_num_groups[MAX_DIMS];
+  cl_ulong get_work_dim;
+  cl_ushort test_local_barrier_result_bool;
+  cl_ushort test_global_barrier_result_bool;
+  cl_ushort test_local_atomic_result_value;
+}DataContainerAttrib;
+
+// Describes range of testing.
+namespace Range {
+  enum RangeEnum {
+    BASIC = (1 << 0),
+    BARRIERS = (1 << 1),
+    ATOMICS = (1 << 2),
+
+    ALL = Range::BASIC | Range::BARRIERS | Range::ATOMICS
+  };
+}
+
+std::string showArray (const size_t *arr, cl_uint dims);
+
+// Main class responsible for testing
+class TestNonUniformWorkGroup {
+public:
+
+  TestNonUniformWorkGroup (const cl_device_id &device, const cl_context &context,
+    const cl_command_queue &queue, const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, const size_t *buffersSize, const size_t *globalWorkOffset,
+    const size_t *reqdWorkGroupSize=NULL);
+
+  ~TestNonUniformWorkGroup ();
+
+  static size_t getMaxLocalWorkgroupSize (const cl_device_id &device);
+  static void setMaxLocalWorkgroupSize (size_t workGroupSize) {
+    TestNonUniformWorkGroup::_maxLocalWorkgroupSize = workGroupSize;
+  }
+  static void enableStrictMode (bool state);
+
+  void setTestRange (int range) {_testRange = range;}
+  int prepareDevice ();
+  int verifyResults ();
+  int runKernel ();
+
+private:
+  size_t _globalSize[MAX_DIMS];
+  size_t _localSize[MAX_DIMS];
+  size_t _globalWorkOffset[MAX_DIMS];
+  bool _globalWorkOffset_IsNull;
+  size_t _enqueuedLocalSize[MAX_DIMS];
+  bool _localSize_IsNull;
+  size_t _reqdWorkGroupSize[MAX_DIMS];
+  static size_t _maxLocalWorkgroupSize;
+  size_t _maxWorkItemSizes[MAX_DIMS];
+  size_t _numOfGlobalWorkItems; // in global work group
+  const cl_device_id _device;
+  const cl_context _context;
+  const cl_command_queue _queue;
+  const cl_uint _dims;
+
+  int _testRange;
+
+  std::vector<DataContainerAttrib> _resultsRegionArray;
+  std::vector<DataContainerAttrib> _referenceRegionArray;
+  cl_uint _globalAtomicTestValue;
+
+  clProgramWrapper _program;
+  clKernelWrapper _testKernel;
+
+  Error::ErrorClass _err;
+
+  TestNonUniformWorkGroup ();
+
+  static bool _strictMode;
+  void setLocalWorkgroupSize (const size_t *globalSize, const size_t *localSize);
+  void setGlobalWorkgroupSize (const size_t *globalSize);
+  void verifyData (DataContainerAttrib * reference, DataContainerAttrib * results, short regionNumber);
+  void calculateExpectedValues ();
+  void showTestInfo ();
+};
+
+// Class responsible for running subtest scenarios in test function
+class SubTestExecutor {
+public:
+  SubTestExecutor(const cl_device_id &device, const cl_context &context, const cl_command_queue &queue)
+    : _device (device), _context (context), _queue (queue), _failCounter (0), _overallCounter (0) {}
+
+  void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, int range);
+
+  void runTestNonUniformWorkGroup (const cl_uint dims, const size_t *globalSize,
+    const size_t *localSize, const size_t *globalWorkOffset,
+    const size_t *reqdWorkGroupSize, int range);
+
+  int calculateWorkGroupSize(size_t &maxWgSize, int testRange);
+  int status();
+
+private:
+  SubTestExecutor();
+  const cl_device_id _device;
+  const cl_context _context;
+  const cl_command_queue _queue;
+  unsigned int _failCounter;
+  unsigned int _overallCounter;
+};
+
+#endif // _TESTNONUNIFORMWORKGROUP_H
diff --git a/test_conformance/non_uniform_work_group/main.cpp b/test_conformance/non_uniform_work_group/main.cpp
new file mode 100644
index 00000000..66218ef1
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/main.cpp
@@ -0,0 +1,88 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "tools.h"
+#include "../../test_common/harness/testHarness.h"
+#include "TestNonUniformWorkGroup.h"
+
+basefn    basefn_list[] = {
+  test_non_uniform_1d_basic,
+  test_non_uniform_1d_atomics,
+  test_non_uniform_1d_barriers,
+
+  test_non_uniform_2d_basic,
+  test_non_uniform_2d_atomics,
+  test_non_uniform_2d_barriers,
+
+  test_non_uniform_3d_basic,
+  test_non_uniform_3d_atomics,
+  test_non_uniform_3d_barriers,
+
+  test_non_uniform_other_basic,
+  test_non_uniform_other_atomics,
+  test_non_uniform_other_barriers
+};
+
+const char    *basefn_names[] = {
+  "non_uniform_1d_basic",
+  "non_uniform_1d_atomics",
+  "non_uniform_1d_barriers",
+
+  "non_uniform_2d_basic",
+  "non_uniform_2d_atomics",
+  "non_uniform_2d_barriers",
+
+  "non_uniform_3d_basic",
+  "non_uniform_3d_atomics",
+  "non_uniform_3d_barriers",
+
+  "non_uniform_other_basic",
+  "non_uniform_other_atomics",
+  "non_uniform_other_barriers",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+  typedef std::vector<const char *> ArgsVector;
+  ArgsVector programArgs;
+  programArgs.assign(argv, argv+argc);
+
+  int numFns = num_fns;
+  basefn *baseFnList = basefn_list;
+  const char **baseFnNames = basefn_names;
+
+  for (ArgsVector::iterator it = programArgs.begin(); it!=programArgs.end();) {
+
+    if(*it == std::string("-strict")) {
+      TestNonUniformWorkGroup::enableStrictMode(true);
+      it=programArgs.erase(it);
+    } else {
+      ++it;
+    }
+  }
+
+  PrimeNumbers::generatePrimeNumbers(100000);
+
+  return runTestHarness(static_cast<int>(programArgs.size()), &programArgs.front(), numFns, baseFnList, baseFnNames, false /* image support required */, false /* force no context creation */, 0 );
+}
+
+
+
+
diff --git a/test_conformance/non_uniform_work_group/procs.h b/test_conformance/non_uniform_work_group/procs.h
new file mode 100644
index 00000000..199bb836
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/procs.h
@@ -0,0 +1,32 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/typeWrappers.h"
+
+extern int test_non_uniform_1d_basic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_1d_atomics(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_1d_barriers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_non_uniform_2d_basic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_2d_atomics(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_2d_barriers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_non_uniform_3d_basic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_3d_atomics(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_3d_barriers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_non_uniform_other_basic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_other_atomics(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_non_uniform_other_barriers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/non_uniform_work_group/test_advanced_2d.cpp b/test_conformance/non_uniform_work_group/test_advanced_2d.cpp
new file mode 100644
index 00000000..3ed1082d
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/test_advanced_2d.cpp
@@ -0,0 +1,383 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "tools.h"
+
+#include "TestNonUniformWorkGroup.h"
+
+int
+  test_non_uniform_2d_basic(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BASIC);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_2d_max_wg_size_plus_1_basic
+  {
+    size_t globalSize[] = {maxWgSize+1, maxWgSize};
+    size_t localSize[] = {maxWgSize, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_prime_number_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber, maxWgSize};
+    size_t localSize[] = {maxWgSize/2, 2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_two_prime_numbers_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    int primeNumber2 = 1759;
+    size_t globalSize[] = {primeNumber2, primeNumber};
+    size_t localSize[] = {16, maxWgSize/16};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_prime_number_basic_2
+  {
+    int primeNumber = 1327;
+    size_t globalSize[] = {primeNumber, primeNumber};
+    size_t localSize[] = {maxWgSize/32, 32};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_combination_of_max_wg_size_basic
+  {
+    size_t globalSize[] = {maxWgSize + 2, maxWgSize + 4};
+    size_t localSize[] = {maxWgSize/32, 32};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_two_prime_numbers_and_ls_null_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 1669;
+    size_t globalSize[] = {primeNumber, primeNumber2};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_prime_number_and_ls_null_basic
+  {
+    unsigned int primeNumber = 1249;
+    size_t globalSize[] = {primeNumber, primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_four_prime_numbers_basic
+  {
+    unsigned int primeNumber = 1951;
+    unsigned int primeNumber2 = 911;
+    unsigned int primeNumber3 = 13;
+    unsigned int primeNumber4 = 17;
+
+    PrimeNumbers::Result2d fit2dResult;
+    fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize);
+
+    size_t globalSize[] = {primeNumber, primeNumber2};
+    size_t localSize[] =  {fit2dResult.Val1, fit2dResult.Val2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_2d_three_prime_numbers_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 42967;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3};
+    size_t localSize[] = {primeNumber, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_2d_atomics(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::ATOMICS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_2d_max_wg_size_plus_1_atomics
+  {
+    size_t globalSize[] = {maxWgSize+1, maxWgSize};
+    size_t localSize[] = {maxWgSize, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_prime_number_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber, maxWgSize};
+    size_t localSize[] = {maxWgSize/2, 2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_two_prime_numbers_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    int primeNumber2 = 1759;
+    size_t globalSize[] = {primeNumber2, primeNumber};
+    size_t localSize[] = {16, maxWgSize/16};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_prime_number_atomics_2
+  {
+    int primeNumber = 1327;
+    size_t globalSize[] = {primeNumber, primeNumber};
+    size_t localSize[] = {maxWgSize/32, 32};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_combination_of_max_wg_size_atomics
+  {
+    size_t globalSize[] = {maxWgSize + 2, maxWgSize + 4};
+    size_t localSize[] = {maxWgSize/32, 32};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_two_prime_numbers_and_ls_null_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 1669;
+    size_t globalSize[] = {primeNumber, primeNumber2};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_prime_number_and_ls_null_atomics
+  {
+    unsigned int primeNumber = 1249;
+    size_t globalSize[] = {primeNumber, primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_four_prime_numbers_atomics
+  {
+    unsigned int primeNumber = 1951;
+    unsigned int primeNumber2 = 911;
+    unsigned int primeNumber3 = 13;
+    unsigned int primeNumber4 = 17;
+
+    PrimeNumbers::Result2d fit2dResult;
+    fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize);
+
+    size_t globalSize[] = {primeNumber, primeNumber2};
+    size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_three_prime_numbers_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 42967;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3};
+    size_t localSize[] = {primeNumber, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_2d_barriers(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BARRIERS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_2d_max_wg_size_plus_1_barriers
+  {
+    size_t globalSize[] = {maxWgSize+1, maxWgSize};
+    size_t localSize[] = {maxWgSize, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_prime_number_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber, maxWgSize};
+    size_t localSize[] = {maxWgSize/2, 2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_two_prime_numbers_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    int primeNumber2 = 1759;
+    size_t globalSize[] = {primeNumber2, primeNumber};
+    size_t localSize[] = {16, maxWgSize/16};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_prime_number_barriers_2
+  {
+    int primeNumber = 1327;
+    size_t globalSize[] = {primeNumber, primeNumber};
+    size_t localSize[] = {maxWgSize/32, 32};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_combination_of_max_wg_size_barriers
+  {
+    size_t globalSize[] = {maxWgSize + 2, maxWgSize + 4};
+    size_t localSize[] = {maxWgSize/32, 32};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_two_prime_numbers_and_ls_null_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 1669;
+    size_t globalSize[] = {primeNumber, primeNumber2};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_prime_number_and_ls_null_barriers
+  {
+    unsigned int primeNumber = 1249;
+    size_t globalSize[] = {primeNumber, primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_four_prime_numbers_barriers
+  {
+    unsigned int primeNumber = 1951;
+    unsigned int primeNumber2 = 911;
+    unsigned int primeNumber3 = 13;
+    unsigned int primeNumber4 = 17;
+    PrimeNumbers::Result2d fit2dResult;
+    fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize);
+    size_t globalSize[] = {primeNumber, primeNumber2};
+    size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_three_prime_numbers_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 42967;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3};
+    size_t localSize[] = {primeNumber, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  return exec.status();
+}
diff --git a/test_conformance/non_uniform_work_group/test_advanced_3d.cpp b/test_conformance/non_uniform_work_group/test_advanced_3d.cpp
new file mode 100644
index 00000000..1e205525
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/test_advanced_3d.cpp
@@ -0,0 +1,410 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "tools.h"
+
+#include "TestNonUniformWorkGroup.h"
+
+int
+  test_non_uniform_3d_basic(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BASIC);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_3d_max_wg_size_plus_1_basic
+  {
+    size_t globalSize[] = {maxWgSize+1, maxWgSize/24, maxWgSize/24};
+    size_t localSize[] = {maxWgSize, 1, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_prime_number_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25};
+    size_t localSize[] = {2, std::max<size_t>(maxWgSize/4,1), 2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_two_prime_numbers_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    int primeNumber2 = 13;
+    size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber};
+    size_t localSize[] = {8, 4, std::max<size_t>(maxWgSize/32,1)};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_prime_number_basic_2
+  {
+    int primeNumber = 113;
+    size_t globalSize[] = {primeNumber, primeNumber, primeNumber};
+    size_t localSize[] = {8, std::max<size_t>(maxWgSize/32,1), 4};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_two_prime_numbers_and_ls_null_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 23;
+    size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_prime_number_and_ls_null_basic
+  {
+    unsigned int primeNumber = 113;
+    size_t globalSize[] = {primeNumber, primeNumber, primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_three_prime_numbers_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 10711;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3};
+    size_t localSize[] = {primeNumber, 1, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_four_prime_numbers_basic
+  {
+    unsigned int primeNumber = 541;
+    unsigned int primeNumber2 = 251;
+    unsigned int primeNumber3 = 13;
+    unsigned int primeNumber4 = 17;
+    PrimeNumbers::Result2d fit2dResult;
+    fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize);
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_3d_six_prime_numbers_basic
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4,primeNumber5,primeNumber6,maxWgSize );
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_3d_atomics(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::ATOMICS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_3d_max_wg_size_plus_1_atomics
+  {
+    size_t globalSize[] = {maxWgSize+1, maxWgSize/24, maxWgSize/24};
+    size_t localSize[] = {maxWgSize, 1, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_prime_number_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25};
+    size_t localSize[] = {2, std::max<size_t>(maxWgSize/4,1), 2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_two_prime_numbers_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    int primeNumber2 = 13;
+    size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber};
+    size_t localSize[] = {8, 4, std::max<size_t>(maxWgSize/32,1)};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_prime_number_atomics_2
+  {
+    int primeNumber = 113;
+    size_t globalSize[] = {primeNumber, primeNumber, primeNumber};
+    size_t localSize[] = {8, std::max<size_t>(maxWgSize/32,1), 4};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_two_prime_numbers_and_ls_null_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 23;
+    size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_prime_number_and_ls_null_atomics
+  {
+    unsigned int primeNumber = 113;
+    size_t globalSize[] = {primeNumber, primeNumber, primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_three_prime_numbers_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 10711;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3};
+    size_t localSize[] = {primeNumber, 1, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_four_prime_numbers_atomics
+  {
+    unsigned int primeNumber = 541;
+    unsigned int primeNumber2 = 251;
+    unsigned int primeNumber3 = 13;
+    unsigned int primeNumber4 = 17;
+    PrimeNumbers::Result2d fit2dResult;
+    fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize);
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_six_prime_numbers_atomics
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize);
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_3d_barriers(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BARRIERS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_3d_max_wg_size_plus_1_barriers
+  {
+    size_t globalSize[] = {maxWgSize+1, maxWgSize/24, maxWgSize/24};
+    size_t localSize[] = {maxWgSize, 1, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_prime_number_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {maxWgSize/25, primeNumber, maxWgSize/25};
+    size_t localSize[] = {2, std::max<size_t>(maxWgSize/4,1), 2};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_two_prime_numbers_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    int primeNumber2 = 13;
+    size_t globalSize[] = {primeNumber2, maxWgSize/8, primeNumber};
+    size_t localSize[] = {8, 4, std::max<size_t>(maxWgSize/32,1)};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_prime_number_barriers_2
+  {
+    int primeNumber = 113;
+    size_t globalSize[] = {primeNumber, primeNumber, primeNumber};
+    size_t localSize[] = {8, std::max<size_t>(maxWgSize/32,1), 4};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_two_prime_numbers_and_ls_null_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 23;
+    size_t globalSize[] = {primeNumber, primeNumber2, maxWgSize/16};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_prime_number_and_ls_null_barriers
+  {
+    unsigned int primeNumber = 113;
+    size_t globalSize[] = {primeNumber, primeNumber, primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_three_prime_numbers_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 10711;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3, primeNumber3};
+    size_t localSize[] = {primeNumber, 1, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_four_prime_numbers_barriers
+  {
+    unsigned int primeNumber = 541;
+    unsigned int primeNumber2 = 251;
+    unsigned int primeNumber3 = 13;
+    unsigned int primeNumber4 = 17;
+    PrimeNumbers::Result2d fit2dResult;
+    fit2dResult = PrimeNumbers::fitMaxPrime2d(primeNumber3, primeNumber4, maxWgSize);
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit2dResult.Val1, fit2dResult.Val2, 1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+
+  // non_uniform_3d_six_prime_numbers_barriers
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4,primeNumber5,primeNumber6,maxWgSize );
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  return exec.status();
+}
diff --git a/test_conformance/non_uniform_work_group/test_advanced_other.cpp b/test_conformance/non_uniform_work_group/test_advanced_other.cpp
new file mode 100644
index 00000000..6aed795c
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/test_advanced_other.cpp
@@ -0,0 +1,279 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "tools.h"
+
+#include "TestNonUniformWorkGroup.h"
+
+int
+  test_non_uniform_other_basic(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BASIC);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_1d_two_prime_numbers_offset_basic
+  {
+    unsigned int primeNumber = 42967;
+    unsigned int primeNumber2 = 113;
+    PrimeNumbers::Result1d fit1dResult;
+
+    fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize );
+
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {fit1dResult.Val1};
+    size_t offset[] = {23};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC);
+  }
+
+  // non_uniform_2d_three_prime_numbers_offset_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 42967;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3};
+    size_t localSize[] = {primeNumber, 1};
+    size_t offset[] = {23, 17};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC);
+  }
+
+  // non_uniform_3d_six_prime_numbers_offset_basic
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+
+    PrimeNumbers::Result3d fit3dResult;
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize );
+
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+    size_t offset[] = {11, 23, 17};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BASIC);
+  }
+
+  // non_uniform_3d_six_prime_numbers_rwgs_basic
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize );
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+    size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::BASIC);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_other_atomics(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::ATOMICS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_1d_two_prime_numbers_offset_atomics
+  {
+    unsigned int primeNumber = 42967;
+    unsigned int primeNumber2 = 113;
+    PrimeNumbers::Result1d fit1dResult;
+
+    fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize );
+
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {fit1dResult.Val1};
+    size_t offset[] = {23};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS);
+  }
+
+  // non_uniform_2d_three_prime_numbers_offset_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 42967;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3};
+    size_t localSize[] = {primeNumber, 1};
+    size_t offset[] = {23, 17};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_six_prime_numbers_offset_atomics
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize );
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+    size_t offset[] = {11, 23, 17};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::ATOMICS);
+  }
+
+  // non_uniform_3d_six_prime_numbers_rwgs_atomics
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize );
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+    size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::ATOMICS);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_other_barriers(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BARRIERS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_1d_two_prime_numbers_offset_barriers
+  {
+    unsigned int primeNumber = 42967;
+    unsigned int primeNumber2 = 113;
+    PrimeNumbers::Result1d fit1dResult;
+
+    fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize );
+
+    size_t globalSize[] = {primeNumber};
+
+    size_t localSize[] = {fit1dResult.Val1};
+    size_t offset[] = {23};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS);
+  }
+
+  // non_uniform_2d_three_prime_numbers_offset_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize/2, maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    unsigned int primeNumber2 = 42967;
+    unsigned int primeNumber3 = 13;
+    size_t globalSize[] = {primeNumber2, primeNumber3};
+    size_t localSize[] = {primeNumber, 1};
+    size_t offset[] = {23, 17};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_six_prime_numbers_offset_barriers
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize );
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+    size_t offset[] = {11, 23, 17};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, offset, NULL, Range::BARRIERS);
+  }
+
+  // non_uniform_3d_six_prime_numbers_rwgs_barriers
+  {
+    unsigned int primeNumber = 373;
+    unsigned int primeNumber2 = 13;
+    unsigned int primeNumber3 = 279;
+    unsigned int primeNumber4 = 3;
+    unsigned int primeNumber5 = 5;
+    unsigned int primeNumber6 = 7;
+    PrimeNumbers::Result3d fit3dResult;
+
+    fit3dResult = PrimeNumbers::fitMaxPrime3d(primeNumber4, primeNumber5, primeNumber6, maxWgSize );
+
+    size_t globalSize[] = {primeNumber, primeNumber2, primeNumber3};
+
+    size_t localSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+    size_t reqdWorkGroupSize[] = {fit3dResult.Val1, fit3dResult.Val2, fit3dResult.Val3};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, NULL, reqdWorkGroupSize, Range::BARRIERS);
+  }
+
+  return exec.status();
+}
diff --git a/test_conformance/non_uniform_work_group/test_basic.cpp b/test_conformance/non_uniform_work_group/test_basic.cpp
new file mode 100644
index 00000000..2bf410bc
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/test_basic.cpp
@@ -0,0 +1,398 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "tools.h"
+
+#include "TestNonUniformWorkGroup.h"
+
+int
+  test_non_uniform_1d_basic(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BASIC);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_1d_max_wg_size_plus_1_basic
+  {
+    size_t globalSize[] = {maxWgSize+1};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_prime_number_basic
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_max_wg_size_plus_prime_number_basic
+  {
+    int primeNumber = 11;
+    size_t globalSize[] = {maxWgSize+primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_max_wg_size_plus_prime_number_basic_2
+  {
+    int primeNumber = 53;
+    size_t globalSize[] = {maxWgSize+primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_2max_wg_size_minus_1_basic
+  {
+    size_t globalSize[] = {2*maxWgSize - 1};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_prime_number_basic_2
+  {
+    unsigned int primeNumber = 20101;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_prime_number_basic_3
+  {
+    unsigned int primeNumber = 42967;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_prime_number_basic_4
+  {
+    unsigned int primeNumber = 65521;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_prime_number_and_ls_null_basic_2
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_prime_number_and_ls_null_basic_3
+  {
+    unsigned int primeNumber = 65521;
+    size_t globalSize[] = {primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  // non_uniform_1d_two_prime_numbers_basic
+  {
+    unsigned int primeNumber = 42967;
+    unsigned int primeNumber2 = 113;
+    PrimeNumbers::Result1d fit1dResult;
+
+    fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize );
+
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {fit1dResult.Val1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BASIC);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_1d_atomics(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::ATOMICS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_1d_max_wg_size_plus_1_atomics
+  {
+    size_t globalSize[] = {maxWgSize+1};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_prime_number_atomics
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_max_wg_size_plus_prime_number_atomics
+  {
+    int primeNumber = 11;
+    size_t globalSize[] = {maxWgSize+primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_max_wg_size_plus_prime_number_atomics_2
+  {
+    int primeNumber = 53;
+    size_t globalSize[] = {maxWgSize+primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_2max_wg_size_minus_1_atomics
+  {
+    size_t globalSize[] = {2*maxWgSize - 1};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_prime_number_atomics_2
+  {
+    unsigned int primeNumber = 20101;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_prime_number_atomics_3
+  {
+    unsigned int primeNumber = 42967;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_prime_number_atomics_4
+  {
+    unsigned int primeNumber = 65521;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_prime_number_and_ls_null_atomics_2
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_prime_number_and_ls_null_atomics_3
+  {
+    unsigned int primeNumber = 65521;
+    size_t globalSize[] = {primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  // non_uniform_1d_two_prime_numbers_atomics
+  {
+    unsigned int primeNumber = 42967;
+    unsigned int primeNumber2 = 113;
+    PrimeNumbers::Result1d fit1dResult;
+
+    fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize );
+
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {fit1dResult.Val1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::ATOMICS);
+  }
+
+  return exec.status();
+}
+
+int
+  test_non_uniform_1d_barriers(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+  SubTestExecutor exec(device, context, queue);
+
+  size_t maxWgSize;
+  int err;
+  err = exec.calculateWorkGroupSize(maxWgSize, Range::BARRIERS);
+  if (err) {
+    log_error ("Cannot calculate work group size.");
+    return -1;
+  }
+
+  // non_uniform_1d_max_wg_size_plus_1_barriers
+  {
+    size_t globalSize[] = {maxWgSize+1};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_prime_number_barriers
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_max_wg_size_plus_prime_number_barriers
+  {
+    int primeNumber = 11;
+    size_t globalSize[] = {maxWgSize+primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_max_wg_size_plus_prime_number_barriers_2
+  {
+    int primeNumber = 53;
+    size_t globalSize[] = {maxWgSize+primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_2max_wg_size_minus_1_barriers
+  {
+    size_t globalSize[] = {2*maxWgSize - 1};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_prime_number_barriers_2
+  {
+    unsigned int primeNumber = 20101;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_prime_number_barriers_3
+  {
+    unsigned int primeNumber = 42967;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_prime_number_barriers_4
+  {
+    unsigned int primeNumber = 65521;
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {maxWgSize};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_prime_number_and_ls_null_barriers_2
+  {
+    int primeNumber = PrimeNumbers::getPrimeNumberInRange(maxWgSize, 2*maxWgSize);
+    if (primeNumber < 1) {
+      log_error ("Cannot find proper prime number.");
+      return -1;
+    }
+    size_t globalSize[] = {primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_prime_number_and_ls_null_barriers_3
+  {
+    unsigned int primeNumber = 65521;
+    size_t globalSize[] = {primeNumber};
+    size_t *localSize = NULL;
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  // non_uniform_1d_two_prime_numbers_barriers
+  {
+    unsigned int primeNumber = 42967;
+    unsigned int primeNumber2 = 113;
+
+    PrimeNumbers::Result1d fit1dResult;
+
+    fit1dResult = PrimeNumbers::fitMaxPrime1d(primeNumber2, maxWgSize );
+
+    size_t globalSize[] = {primeNumber};
+    size_t localSize[] = {fit1dResult.Val1};
+
+    exec.runTestNonUniformWorkGroup(sizeof(globalSize)/sizeof(globalSize[0]), globalSize, localSize, Range::BARRIERS);
+  }
+
+  return exec.status();
+}
diff --git a/test_conformance/non_uniform_work_group/tools.cpp b/test_conformance/non_uniform_work_group/tools.cpp
new file mode 100644
index 00000000..ef732d9d
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/tools.cpp
@@ -0,0 +1,274 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "tools.h"
+#include <sstream>
+#include "../../test_common/harness/errorHelpers.h"
+
+PrimeNumbersCollection PrimeNumbers::primeNumbers;
+// Method generates prime numbers using Sieve of Eratosthenes algorithm
+void PrimeNumbers::generatePrimeNumbers (unsigned int maxValue) {
+
+  primeNumbers.clear();
+
+  for (unsigned int i=2; i < maxValue; i++)
+    primeNumbers.push_back(i);
+
+  PrimeNumbersCollection::iterator it, it2;
+  it = primeNumbers.begin();
+  it2 = primeNumbers.begin();
+
+  unsigned int maxValueSqrt = (unsigned int)sqrt((double)maxValue);
+
+  for (; it != primeNumbers.end(); it++) {
+    it2 = it;
+    ++it2;
+    if(*it>maxValueSqrt)
+      break;
+    for (;it2 != primeNumbers.end();)
+      if (*it2 % *it == 0)
+        it2 = primeNumbers.erase(it2);
+      else
+        ++it2;
+  }
+}
+
+// Returns prime number for specified range
+int PrimeNumbers::getPrimeNumberInRange (size_t lowerValue, size_t higherValue) {
+  if(lowerValue >= higherValue)
+    return -1;
+
+  if(primeNumbers.back() < lowerValue)
+    return -2;
+
+  PrimeNumbersCollection::iterator it = primeNumbers.begin();
+
+  for (; it != primeNumbers.end(); ++it) {
+    if (lowerValue<*it) {
+      if(higherValue>*it)
+        return *it;
+      else
+        return -3;
+    }
+  }
+  return -1;
+}
+
+
+int PrimeNumbers::getNextLowerPrimeNumber(size_t upperValue) {
+    size_t retVal = 1;
+
+    PrimeNumbersCollection::iterator it = primeNumbers.begin();
+
+    for (; it != primeNumbers.end(); ++it) {
+        if (upperValue > *it) {
+            retVal = *it;
+        } else {
+            break;
+        }
+    }
+    return retVal;
+}
+
+PrimeNumbers::Result1d PrimeNumbers::fitMaxPrime1d(size_t val1, size_t maxVal){
+
+    PrimeNumbers::Result1d result;
+
+    if (maxVal == 1) {
+        result.Val1 = 1;
+        return result;
+    }
+
+    while(val1 > maxVal)
+    {
+        val1 = PrimeNumbers::getNextLowerPrimeNumber(val1);
+    }
+
+    result.Val1 = val1;
+    return result;
+}
+
+PrimeNumbers::Result2d PrimeNumbers::fitMaxPrime2d(size_t val1, size_t val2, size_t productMax) {
+
+    PrimeNumbers::Result2d result;
+
+    if (productMax == 1) {
+        result.Val1 = 1;
+        result.Val2 = 1;
+        return result;
+    }
+
+    while ((val2 * val1) > productMax) {
+        if ((val2 > val1) && (val2 > 1)) {
+            val2 = PrimeNumbers::getNextLowerPrimeNumber(val2);
+            continue;
+        }
+        if (val1 > 1) {
+            val1 = PrimeNumbers::getNextLowerPrimeNumber(val1);
+            continue;
+        }
+        break;
+    }
+    result.Val1 = val1;
+    result.Val2 = val2;
+    return result;
+}
+
+
+PrimeNumbers::Result3d PrimeNumbers::fitMaxPrime3d(size_t val1, size_t val2, size_t val3, size_t productMax) {
+
+    Result3d result;
+
+    if (productMax == 1) {
+        result.Val1 = 1;
+        result.Val2 = 1;
+        result.Val3 = 1;
+        return result;
+    }
+
+    while ((val3 * val2 * val1) > productMax) {
+        if ((val3 > val2) && (val3 > val1) && (val3 > 1)) {
+            val3 = PrimeNumbers::getNextLowerPrimeNumber(val3);
+            continue;
+        }
+        if ((val2 > val1) && (val2 > 1)) {
+            val2 = PrimeNumbers::getNextLowerPrimeNumber(val2);
+            continue;
+        }
+        if (val1 > 1) {
+            val1 = PrimeNumbers::getNextLowerPrimeNumber(val1);
+            continue;
+        }
+        break;
+    }
+    result.Val1 = val1;
+    result.Val2 = val2;
+    result.Val3 = val3;
+    return result;
+}
+
+namespace Error {
+ErrorMap::value_type rawDataErrorString[] = {
+  ErrorMap::value_type(ERR_GLOBAL_SIZE, "global size"),
+  ErrorMap::value_type(ERR_GLOBAL_WORK_OFFSET, "global work offset"),
+  ErrorMap::value_type(ERR_LOCAL_SIZE, "local size"),
+  ErrorMap::value_type(ERR_GLOBAL_ID, "global id"),
+  ErrorMap::value_type(ERR_LOCAL_ID, "local id"),
+  ErrorMap::value_type(ERR_ENQUEUED_LOCAL_SIZE, "enqueued local size"),
+  ErrorMap::value_type(ERR_LOCAL_SIZE, "local size"),
+  ErrorMap::value_type(ERR_NUM_GROUPS, "num groups"),
+  ErrorMap::value_type(ERR_GROUP_ID, "group id"),
+  ErrorMap::value_type(ERR_WORK_DIM, "work dim"),
+  ErrorMap::value_type(ERR_GLOBAL_BARRIER, "global barrier"),
+  ErrorMap::value_type(ERR_LOCAL_BARRIER, "local barrier"),
+  ErrorMap::value_type(ERR_GLOBAL_ATOMIC, "global atomic"),
+  ErrorMap::value_type(ERR_LOCAL_ATOMIC, "local atomic"),
+  ErrorMap::value_type(ERR_STRICT_MODE, "strict requirements failed. Wrong local work group size"),
+  ErrorMap::value_type(ERR_BUILD_STATUS, "build status"),
+  ErrorMap::value_type(ERR_UNKNOWN, "[unknown]"),
+  ErrorMap::value_type(ERR_DIFFERENT, "[different]"),
+};
+
+const int numElems = sizeof(rawDataErrorString)/sizeof(rawDataErrorString[0]);
+ErrorMap errorString (rawDataErrorString, rawDataErrorString+numElems);
+
+ErrorClass::ErrorClass() {
+  _overallNumberOfErrors = 0;
+  _stats.clear();
+  for (unsigned short i=0; i<sizeof(_errorArrayCounter)/sizeof(_errorArrayCounter[0]); i++) {
+   _errorArrayCounter[i] = 0;
+  }
+}
+
+void ErrorClass::show(Type err, std::string where, std::string additionalInfo) {
+  ++_overallNumberOfErrors;
+
+  err = (errorString.find(err) == errorString.end())?ERR_UNKNOWN:err;
+  ++_stats[err];
+
+  if (_overallNumberOfErrors == MAX_NUMBER_OF_PRINTED_ERRORS)
+    printError("\t. . . Too many errors. Application will skip printing them.");
+
+  if (_overallNumberOfErrors >= MAX_NUMBER_OF_PRINTED_ERRORS)
+    return;
+
+  std::string errString = "Error ";
+  errString += errorString[err];
+  errString += " appeared";
+
+  if(where.compare("") != 0) {
+    errString += " in ";
+    errString += where;
+  }
+
+  if(additionalInfo.compare("") != 0) {
+    errString += " ";
+    errString += additionalInfo;
+  }
+  printError(errString);
+}
+
+void ErrorClass::show(Type whatErr, std::string where, cl_ulong valueIs, cl_ulong valueExpected) {
+  std::ostringstream tmp;
+  tmp << "(is: " << valueIs << ", expected: " << valueExpected << ")";
+  show(whatErr, where, tmp.str());
+}
+
+
+void ErrorClass::show(std::string description) {
+  ++_overallNumberOfErrors;
+  ++_stats[ERR_DIFFERENT];
+  if (_overallNumberOfErrors < MAX_NUMBER_OF_PRINTED_ERRORS)
+    printError(description);
+
+  if (_overallNumberOfErrors == MAX_NUMBER_OF_PRINTED_ERRORS)
+    printError("\t. . . Too many errors. Application will skip printing them.");
+}
+
+void ErrorClass::printError(std::string errString) {
+  log_error ("%s\n", errString.c_str());
+}
+
+void ErrorClass::showStats() {
+
+  Type err;
+  log_info ("T E S T  S U M M A R Y:\n");
+  for (ErrorStats::iterator it = _stats.begin(); it != _stats.end(); it++) {
+    err = (errorString.find(it->first) == errorString.end())?ERR_UNKNOWN:it->first;
+    std::string errName = errorString[err];
+    log_info("Error %s:\t%d\n", errName.c_str(), it->second);
+  }
+
+  log_info("Overall number of errors:\t%d\n", _overallNumberOfErrors);
+
+}
+
+bool ErrorClass::checkError() {
+  return _overallNumberOfErrors > 0;
+}
+
+// This method is required to synchronize errors counters between kernel and host
+void ErrorClass::synchronizeStatsMap() {
+  for (unsigned short i=0; i<sizeof(_errorArrayCounter)/sizeof(_errorArrayCounter[0]); i++) {
+    if(_errorArrayCounter[i] == 0)
+      continue;
+
+    _stats[static_cast<Type>(i)] += _errorArrayCounter[i];
+    _overallNumberOfErrors += _errorArrayCounter[i];
+  }
+
+}
+
+}
diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h
new file mode 100644
index 00000000..2e63c3dd
--- /dev/null
+++ b/test_conformance/non_uniform_work_group/tools.h
@@ -0,0 +1,109 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _TOOLS_H
+#define _TOOLS_H
+
+#include "procs.h"
+#include <vector>
+#include <map>
+#include <string>
+
+typedef std::vector<unsigned int> PrimeNumbersCollection;
+
+
+
+// Class responsible for distributing prime numbers
+class PrimeNumbers {
+
+public:
+  struct Result1d{
+        size_t Val1;
+  };
+
+  struct Result2d{
+        size_t Val1;
+        size_t Val2;
+  };
+
+  struct Result3d{
+        size_t Val1;
+        size_t Val2;
+        size_t Val3;
+  };
+
+  static void generatePrimeNumbers (unsigned int maxValue);
+  static int getPrimeNumberInRange (size_t lowerValue, size_t higherValue);
+  static int getNextLowerPrimeNumber (size_t upperValue);
+  static Result1d fitMaxPrime1d(size_t Val1, size_t productMax);
+  // Return val1 and Val2 which are largest prime numbers who's product is <= productMax
+  static Result2d fitMaxPrime2d(size_t Val1, size_t Val2, size_t productMax);
+  // Return val1, val2 and val3, which are largest prime numbers who's product is <= productMax
+  static Result3d fitMaxPrime3d(size_t Val1, size_t Val2,  size_t Val3, size_t productMax);
+private:
+  static PrimeNumbersCollection primeNumbers;
+  PrimeNumbers();
+};
+
+// Stores information about errors
+namespace Error {
+#define MAX_NUMBER_OF_PRINTED_ERRORS 10
+  enum Type{
+    ERR_GLOBAL_SIZE=0,
+    ERR_GLOBAL_WORK_OFFSET,
+    ERR_LOCAL_SIZE,
+    ERR_GLOBAL_ID,
+    ERR_LOCAL_ID,
+    ERR_ENQUEUED_LOCAL_SIZE,
+    ERR_NUM_GROUPS,
+    ERR_GROUP_ID,
+    ERR_WORK_DIM,
+    ERR_GLOBAL_BARRIER,
+    ERR_LOCAL_BARRIER,
+    ERR_GLOBAL_ATOMIC,
+    ERR_LOCAL_ATOMIC,
+
+    ERR_STRICT_MODE,
+    ERR_BUILD_STATUS,
+
+    ERR_UNKNOWN,
+    ERR_DIFFERENT,
+    _LAST_ELEM
+  };
+
+  typedef std::map<Type, std::string> ErrorMap;
+  typedef std::map<Type, unsigned int> ErrorStats;
+
+  class ErrorClass {
+  public:
+    ErrorClass();
+    void show(Type whatErr, std::string where="", std::string additionalInfo="");
+    void show(Type whatErr, std::string where, cl_ulong valueIs, cl_ulong valueExpected);
+    void show(std::string description);
+    bool checkError();
+    void showStats();
+    void synchronizeStatsMap();
+    cl_uint * errorArrayCounter() {return _errorArrayCounter;};
+    size_t errorArrayCounterSize() {return sizeof(_errorArrayCounter);};
+  private:
+    cl_uint _errorArrayCounter[Error::_LAST_ELEM]; // this buffer is passed to kernel
+    int _overallNumberOfErrors;
+    ErrorStats _stats;
+    void printError(std::string errString);
+
+  };
+
+}
+#endif // _TOOLS_H
diff --git a/test_conformance/opencl_conformance_tests_21_full_spirv.csv b/test_conformance/opencl_conformance_tests_21_full_spirv.csv
new file mode 100644
index 00000000..cf4921f9
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_21_full_spirv.csv
@@ -0,0 +1,122 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic -offlineCompiler spir_v cache .
+API,api/test_api -offlineCompiler spir_v cache .
+Compiler,compiler/test_compiler -offlineCompiler spir_v cache .
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns -offlineCompiler spir_v cache .
+Geometric Functions,geometrics/test_geometrics -offlineCompiler spir_v cache .
+Relationals,relationals/test_relationals -offlineCompiler spir_v cache .
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full* -offlineCompiler spir_v cache .
+Multiple Device/Context,multiple_device_context/test_multiples -offlineCompiler spir_v cache .
+Atomics,atomics/test_atomics -offlineCompiler spir_v cache .
+Profiling,profiling/test_profiling -offlineCompiler spir_v cache .
+Events,events/test_events -offlineCompiler spir_v cache .
+Allocations (single maximum),allocations/test_allocations single 5 all -offlineCompiler spir_v cache .
+Allocations (total maximum),allocations/test_allocations multiple 5 all -offlineCompiler spir_v cache .
+VecAlign, vec_align/test_vecalign -offlineCompiler spir_v cache .
+VecStep, vec_step/test_vecstep -offlineCompiler spir_v cache .
+Printf,printf/test_printf -offlineCompiler spir_v cache .
+Device Partitioning,device_partition/test_device_partition -offlineCompiler spir_v cache .
+
+# #########################################
+# Buffers and images
+# #########################################
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Buffers,buffers/test_buffers -offlineCompiler spir_v cache .
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods -offlineCompiler spir_v cache .
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST -offlineCompiler spir_v cache .
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST -offlineCompiler spir_v cache .
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST -offlineCompiler spir_v cache .
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads -offlineCompiler spir_v cache .
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches -offlineCompiler spir_v cache .
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images -offlineCompiler spir_v cache .
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# Headers
+# #########################################
+Headers (cl_typen), headers/test_headers
+Headers (cl.h standalone), headers/test_cl_h
+Headers (cl_platform.h standalone), headers/test_cl_platform_h
+Headers (cl_gl.h standalone), headers/test_cl_gl_h
+Headers (opencl.h standalone), headers/test_opencl_h
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR -offlineCompiler spir_v cache .
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR -offlineCompiler spir_v cache .
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR -offlineCompiler spir_v cache .
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl -offlineCompiler spir_v cache .
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select -offlineCompiler spir_v cache .
+Conversions,conversions/test_conversions -offlineCompiler spir_v cache .
+Contractions,contractions/contractions -offlineCompiler spir_v cache .
+Math,math_brute_force/bruteforce -offlineCompiler spir_v cache .
+Integer Ops,integer_ops/test_integer_ops -offlineCompiler spir_v cache .
+Half Ops,half/Test_half -offlineCompiler spir_v cache .
+
+# #########################################
+# Compatibility with Previous Versions
+# #########################################
+Basic 1.2,compatibility/test_conformance/basic/test_basic
+API 1.2,compatibility/test_conformance/api/test_api
+
+#####################################
+# OpenCL 2.0 tests
+#####################################
+C11 Atomics,c11_atomics/test_c11_atomics -offlineCompiler spir_v cache .
+Execution Model,device_execution/test_device_execution -offlineCompiler spir_v cache .
+Generic Address Space,generic_address_space/test_generic_address_space -offlineCompiler spir_v cache .
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group -offlineCompiler spir_v cache .
+Pipes,pipes/test_pipes -offlineCompiler spir_v cache .
+SVM,SVM/test_SVM -offlineCompiler spir_v cache .
+Workgroups,workgroups/test_workgroups -offlineCompiler spir_v cache .
+
+#####################################
+# OpenCL 2.1 tests
+#####################################
+Device timer,device_timer/test_device_timer
+
+#########################################
+# Extensions
+#########################################
+SPIR,spir/test_spir
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST -offlineCompiler spir_v cache .
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps -offlineCompiler spir_v cache .
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps -offlineCompiler spir_v cache .
+Subgroups,subgroups/test_subgroups -offlineCompiler spir_v cache .
diff --git a/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv
new file mode 100644
index 00000000..ee7ab99e
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_21_legacy_wimpy.csv
@@ -0,0 +1,103 @@
+#
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# Headers
+# #########################################
+Headers (cl_typen), headers/test_headers
+Headers (cl.h standalone), headers/test_cl_h
+Headers (cl_platform.h standalone), headers/test_cl_platform_h
+Headers (cl_gl.h standalone), headers/test_cl_gl_h
+Headers (opencl.h standalone), headers/test_opencl_h
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Conversions,conversions/test_conversions -w
+Contractions,contractions/contractions
+Math,math_brute_force/bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/Test_half -w
+
+#####################################
+# OpenCL 2.0 tests
+#####################################
+C11 Atomics,c11_atomics/test_c11_atomics
+Execution Model,device_execution/test_device_execution
+Generic Address Space,generic_address_space/test_generic_address_space
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
+Pipes,pipes/test_pipes
+SVM,SVM/test_SVM
+Workgroups,workgroups/test_workgroups
+
+#########################################
+# Extensions
+#########################################
+SPIR,spir/test_spir
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
+Subgroups,subgroups/test_subgroups
\ No newline at end of file
diff --git a/test_conformance/opencl_conformance_tests_22.csv b/test_conformance/opencl_conformance_tests_22.csv
new file mode 100644
index 00000000..2ef864a6
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_22.csv
@@ -0,0 +1,45 @@
+#
+# OpenCL Conformance Test Suite (2.2 version)
+#
+
+# #########################################
+# New API features
+# #########################################
+API (ctors and dtors of global scope vars) , clcpp/api/test_cpp_api "test_global_scope*"
+API (specialization constants)             , clcpp/api/test_cpp_api "test_spec_consts*"
+
+# #########################################
+# New representation of types
+# #########################################
+Images and samplers                        , clcpp/images/test_cpp_images
+Pipes and reservations                     , clcpp/pipes/test_cpp_pipes "test_pipes_pipe"
+Device enqueue and events                  , clcpp/device_queue/test_cpp_device_queue
+Address spaces                             , clcpp/address_spaces/test_cpp_address_spaces
+
+# #########################################
+# New representation of functions
+# #########################################
+Conversions (convert_cast)                 , clcpp/convert/test_cpp_convert
+Reinterpreting (as_type)                   , clcpp/reinterpret/test_cpp_reinterpret
+Atomics                                    , clcpp/atomics/test_cpp_atomics
+Work-item functions                        , clcpp/workitems/test_cpp_workitems
+Work-group functions                       , clcpp/workgroups/test_cpp_workgroups
+Sub-group functions                        , clcpp/subgroups/test_cpp_subgroups
+Synchronization functions                  , clcpp/synchronization/test_cpp_synchronization "test_work_group_barrier*" "test_sub_group_barrier*"
+Math functions                             , clcpp/math_funcs/test_cpp_math_funcs
+Integer functions                          , clcpp/integer_funcs/test_cpp_integer_funcs
+Common functions                           , clcpp/common_funcs/test_cpp_common_funcs
+Geometric functions                        , clcpp/geometric_funcs/test_cpp_geometric_funcs
+Relational functions                       , clcpp/relational_funcs/test_cpp_relational_funcs
+vload and vstore functions                 , clcpp/vload_vstore/test_cpp_vload_vstore_funcs
+
+# #########################################
+# New in OpenCL C++
+# #########################################
+Specialization constants                   , clcpp/spec_constants/test_cpp_spec_constants
+Named barriers (KHR extension)             , clcpp/synchronization/test_cpp_synchronization "test_work_group_named_barrier*"
+required_num_sub_groups attribute          , clcpp/attributes/test_cpp_attributes "test_required_num_sub_groups*"
+ivdep attribute                            , clcpp/attributes/test_cpp_attributes "test_ivdep*"
+max_size attribute                         , clcpp/attributes/test_cpp_attributes "test_max_size*"
+Ctors and dtors of global scope objects    , clcpp/program_scope_ctors_dtors/test_cpp_program_scope_ctors_dtors
+Pipe storages                              , clcpp/pipes/test_cpp_pipes "test_pipes_pipe_storage"
diff --git a/test_conformance/opencl_conformance_tests_conversions.csv b/test_conformance/opencl_conformance_tests_conversions.csv
new file mode 100644
index 00000000..c8e283a6
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_conversions.csv
@@ -0,0 +1,4 @@
+#
+# OpenCL Conformance Test Suite (conversions only)
+#
+Conversions,conversions/test_conversions
diff --git a/test_conformance/opencl_conformance_tests_d3d.csv b/test_conformance/opencl_conformance_tests_d3d.csv
new file mode 100644
index 00000000..29d2267e
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_d3d.csv
@@ -0,0 +1,7 @@
+#
+# OpenCL Conformance Test for DirectX interop
+#
+
+DX9 media sharing,media_sharing\test_media_surface_sharing
+D3D10,d3d10\test_d3d10
+D3D11,d3d11\test_d3d11
\ No newline at end of file
diff --git a/test_conformance/opencl_conformance_tests_full.csv b/test_conformance/opencl_conformance_tests_full.csv
new file mode 100644
index 00000000..380870e0
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_full.csv
@@ -0,0 +1,117 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# Headers
+# #########################################
+Headers (cl_typen), headers/test_headers
+Headers (cl.h standalone), headers/test_cl_h
+Headers (cl_platform.h standalone), headers/test_cl_platform_h
+Headers (cl_gl.h standalone), headers/test_cl_gl_h
+Headers (opencl.h standalone), headers/test_opencl_h
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Conversions,conversions/test_conversions
+Contractions,contractions/contractions
+Math,math_brute_force/bruteforce
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/test_half
+
+# #########################################
+# Compatibility with Previous Versions
+# #########################################
+Basic 1.2,compatibility/test_conformance/basic/test_basic
+API 1.2,compatibility/test_conformance/api/test_api
+
+#####################################
+# OpenCL 2.0 tests
+#####################################
+C11 Atomics,c11_atomics/test_c11_atomics
+Execution Model,device_execution/test_device_execution
+Generic Address Space,generic_address_space/test_generic_address_space
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group
+Pipes,pipes/test_pipes
+SVM,SVM/test_SVM
+Workgroups,workgroups/test_workgroups
+
+#########################################
+# Extensions
+#########################################
+SPIR,spir/test_spir
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps
+Mipmaps (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images test_mipmaps
+Subgroups,subgroups/test_subgroups
\ No newline at end of file
diff --git a/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv b/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv
new file mode 100644
index 00000000..b52f20c0
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_full_no_math_or_conversions.csv
@@ -0,0 +1,92 @@
+#
+# OpenCL Conformance Test Suite (full version)
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions full*
+Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+VecAlign, vec_align/test_vecalign
+VecStep, vec_step/test_vecstep
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage small),images/clCopyImage/test_cl_copy_images small_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# Headers
+# #########################################
+Headers (cl_typen), headers/test_headers
+Headers (cl.h standalone), headers/test_cl_h
+Headers (cl_platform.h standalone), headers/test_cl_platform_h
+Headers (cl_gl.h standalone), headers/test_cl_gl_h
+Headers (opencl.h standalone), headers/test_opencl_h
+Headers (cl.h standalone C99), headers/test_cl_h_c99
+Headers (cl_platform.h standalone C99), headers/test_cl_platform_h_c99
+Headers (cl_gl.h standalone C99), headers/test_cl_gl_h_c99
+Headers (opencl.h standalone C99), headers/test_opencl_h_c99
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+Contractions,contractions/contractions
+Integer Ops,integer_ops/test_integer_ops
+Half Ops,half/Test_half
+
diff --git a/test_conformance/opencl_conformance_tests_generate_spirv.csv b/test_conformance/opencl_conformance_tests_generate_spirv.csv
new file mode 100644
index 00000000..1d5baf66
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_generate_spirv.csv
@@ -0,0 +1,73 @@
+#
+# OpenCL Conformance Test Suite (generate spirv binaries)
+#
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic -offlineCompiler spir_v generate .
+API,api/test_api -offlineCompiler spir_v generate .
+Compiler,compiler/test_compiler -offlineCompiler spir_v generate .
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns -offlineCompiler spir_v generate .
+Geometric Functions,geometrics/test_geometrics -offlineCompiler spir_v generate .
+Relationals,relationals/test_relationals -offlineCompiler spir_v generate .
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick* -offlineCompiler spir_v generate .
+Multiple Device/Context,multiple_device_context/test_multiples -offlineCompiler spir_v generate .
+Atomics,atomics/test_atomics -offlineCompiler spir_v generate .
+Profiling,profiling/test_profiling -offlineCompiler spir_v generate .
+Events,events/test_events -offlineCompiler spir_v generate .
+Allocations (single maximum),allocations/test_allocations single 5 all -offlineCompiler spir_v generate .
+Allocations (total maximum),allocations/test_allocations multiple 5 all  -offlineCompiler spir_v generate .
+VecAlign, vec_align/test_vecalign -offlineCompiler spir_v generate .
+VecStep, vec_step/test_vecstep -offlineCompiler spir_v generate .
+Printf,printf/test_printf -offlineCompiler spir_v generate .
+Device Partitioning,device_partition/test_device_partition -offlineCompiler spir_v generate .
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods  -offlineCompiler spir_v generate .
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST -offlineCompiler spir_v generate .
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads  -offlineCompiler spir_v generate .
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl -offlineCompiler spir_v generate .
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select -offlineCompiler spir_v generate .
+Conversions,conversions/test_conversions -w  -offlineCompiler spir_v generate .
+Contractions,contractions/contractions -offlineCompiler spir_v generate .
+Math,math_brute_force/bruteforce -w  -offlineCompiler spir_v generate .
+Integer Ops,integer_ops/test_integer_ops integer_* quick_* -offlineCompiler spir_v generate .
+Half Ops,half/test_half -w -offlineCompiler spir_v generate .
+
+#####################################
+# OpenCL 2.0 tests
+#####################################
+C11 Atomics,c11_atomics/test_c11_atomics -offlineCompiler spir_v generate .
+Execution Model,device_execution/test_device_execution -offlineCompiler spir_v generate .
+Generic Address Space,generic_address_space/test_generic_address_space -offlineCompiler spir_v generate .
+Non Uniform Work Groups,non_uniform_work_group/test_non_uniform_work_group -offlineCompiler spir_v generate .
+Pipes,pipes/test_pipes -offlineCompiler spir_v generate .
+SVM,SVM/test_SVM -offlineCompiler spir_v generate .
+Workgroups,workgroups/test_workgroups -offlineCompiler spir_v generate .
+
+#########################################
+# Extensions
+#########################################
+Mipmaps (Kernel),images/kernel_read_write/test_image_streams test_mipmaps CL_FILTER_NEAREST -offlineCompiler spir_v generate .
+Mipmaps (clCopyImage),images/clCopyImage/test_cl_copy_images test_mipmaps -offlineCompiler spir_v generate .
+Subgroups,subgroups/test_subgroups -offlineCompiler spir_v generate .
diff --git a/test_conformance/opencl_conformance_tests_math.csv b/test_conformance/opencl_conformance_tests_math.csv
new file mode 100644
index 00000000..ebc4e4a3
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_math.csv
@@ -0,0 +1,4 @@
+#
+# OpenCL Conformance Test Suite (math only)
+#
+Math,math_brute_force/bruteforce 
diff --git a/test_conformance/opencl_conformance_tests_quick.csv b/test_conformance/opencl_conformance_tests_quick.csv
new file mode 100644
index 00000000..b80a3737
--- /dev/null
+++ b/test_conformance/opencl_conformance_tests_quick.csv
@@ -0,0 +1,90 @@
+#
+# OpenCL Conformance Test Suite (quick version)
+# The quick version skips some long-running image tests, runs a shorter math test,
+# and does not run the conversion test.
+#
+
+# #########################################
+# Basic Information on the compute device
+# #########################################
+Compute Info,computeinfo/test_computeinfo
+
+# #########################################
+# Basic operation tests
+# #########################################
+Basic,basic/test_basic
+API,api/test_api
+Compiler,compiler/test_compiler
+
+# #########################################
+# Common mathematical functions
+# #########################################
+Common Functions,commonfns/test_commonfns
+Geometric Functions,geometrics/test_geometrics
+Relationals,relationals/test_relationals
+
+# #########################################
+# General operation
+# #########################################
+Thread Dimensions,thread_dimensions/test_thread_dimensions quick*
+#Multiple Device/Context,multiple_device_context/test_multiples
+Atomics,atomics/test_atomics
+Profiling,profiling/test_profiling
+Events,events/test_events
+Allocations (single maximum),allocations/test_allocations single 5 all
+Allocations (total maximum),allocations/test_allocations multiple 5 all 
+Printf,printf/test_printf
+Device Partitioning,device_partition/test_device_partition
+
+# #########################################
+# Buffers and images
+# #########################################
+Buffers,buffers/test_buffers
+Images (API Info),images/clGetInfo/test_cl_get_info 
+Images (Kernel Methods),images/kernel_image_methods/test_kernel_image_methods 
+Images (Kernel),images/kernel_read_write/test_image_streams CL_FILTER_NEAREST
+Images (Kernel pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_NEAREST
+Images (Kernel max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_NEAREST
+Images (clCopyImage),images/clCopyImage/test_cl_copy_images
+Images (clCopyImage max size),images/clCopyImage/test_cl_copy_images max_images 
+Images (clReadWriteImage),images/clReadWriteImage/test_cl_read_write_images 
+Images (clReadWriteImage pitch),images/clReadWriteImage/test_cl_read_write_images use_pitches 
+Images (clReadWriteImage max size),images/clReadWriteImage/test_cl_read_write_images max_images 
+Images (clFillImage),images/clFillImage/test_cl_fill_images 
+Images (clFillImage pitch),images/clFillImage/test_cl_fill_images use_pitches 
+Images (clFillImage max size),images/clFillImage/test_cl_fill_images max_images 
+Images (Samplerless),images/samplerlessReads/test_samplerless_reads 
+Images (Samplerless pitch),images/samplerlessReads/test_samplerless_reads use_pitches 
+Images (Samplerless max size),images/samplerlessReads/test_samplerless_reads max_images 
+Mem (Host Flags),mem_host_flags/test_mem_host_flags
+
+# #########################################
+# Headers
+# #########################################
+Headers (cl_typen), headers/test_headers
+Headers (cl.h standalone), headers/test_cl_h
+Headers (cl_platform.h standalone), headers/test_cl_platform_h
+Headers (cl_gl.h standalone), headers/test_cl_gl_h
+Headers (opencl.h standalone), headers/test_opencl_h
+
+# #########################################
+# CPU is required to pass linear and normalized image filtering
+# #########################################
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR),images/kernel_read_write/test_image_streams CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR pitch),images/kernel_read_write/test_image_streams use_pitches CL_FILTER_LINEAR
+CL_DEVICE_TYPE_CPU, Images (Kernel CL_FILTER_LINEAR max size),images/kernel_read_write/test_image_streams max_images CL_FILTER_LINEAR
+
+# #########################################
+# OpenGL/CL interaction
+# #########################################
+OpenCL-GL Sharing,gl/test_gl
+
+# #########################################
+# Thorough math and conversions tests
+# #########################################
+Select,select/test_select
+#Conversions,conversions/test_conversions
+Contractions,contractions/contractions
+Math,math_brute_force/bruteforce -w
+Integer Ops,integer_ops/test_integer_ops integer_* quick_*
+Half Ops,half/Test_half -w
diff --git a/test_conformance/pipes/CMakeLists.txt b/test_conformance/pipes/CMakeLists.txt
new file mode 100644
index 00000000..e833f0ec
--- /dev/null
+++ b/test_conformance/pipes/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(MODULE_NAME pipes)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_pipe_read_write.c
+    test_pipe_info.c
+    test_pipe_limits.c
+    test_pipe_query_functions.c
+    test_pipe_readwrite_errors.c
+    test_pipe_subgroups.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/pipes/kernels.h b/test_conformance/pipes/kernels.h
new file mode 100644
index 00000000..a2fb70c0
--- /dev/null
+++ b/test_conformance/pipes/kernels.h
@@ -0,0 +1,130 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _KERNELS_H_
+#define _KERNELS_H_
+
+static const char* pipe_readwrite_struct_kernel_code = {
+    "typedef struct{\n"
+    "char    a;\n"
+    "int    b;\n"
+    "}TestStruct;\n"
+    "__kernel void test_pipe_write_struct(__global TestStruct *src, __write_only pipe TestStruct out_pipe)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id; \n"
+    "\n"
+    "    res_id = reserve_write_pipe(out_pipe, 1);\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        write_pipe(out_pipe, res_id, 0, &src[gid]);\n"
+    "        commit_write_pipe(out_pipe, res_id);\n"
+    "    }\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_read_struct(__read_only pipe TestStruct in_pipe, __global TestStruct *dst)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id; \n"
+    "\n"
+    "    res_id = reserve_read_pipe(in_pipe, 1);\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        read_pipe(in_pipe, res_id, 0, &dst[gid]);\n"
+    "        commit_read_pipe(in_pipe, res_id);\n"
+    "    }\n"
+    "}\n" };
+
+static const char* pipe_workgroup_readwrite_struct_kernel_code = {
+    "typedef struct{\n"
+    "char    a;\n"
+    "int    b;\n"
+    "}TestStruct;\n"
+    "__kernel void test_pipe_workgroup_write_struct(__global TestStruct *src, __write_only pipe TestStruct out_pipe)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    __local reserve_id_t res_id; \n"
+    "\n"
+    "    res_id = work_group_reserve_write_pipe(out_pipe, get_local_size(0));\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        write_pipe(out_pipe, res_id, get_local_id(0), &src[gid]);\n"
+    "        work_group_commit_write_pipe(out_pipe, res_id);\n"
+    "    }\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_workgroup_read_struct(__read_only pipe TestStruct in_pipe, __global TestStruct *dst)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    __local reserve_id_t res_id; \n"
+    "\n"
+    "    res_id = work_group_reserve_read_pipe(in_pipe, get_local_size(0));\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        read_pipe(in_pipe, res_id, get_local_id(0), &dst[gid]);\n"
+    "        work_group_commit_read_pipe(in_pipe, res_id);\n"
+    "    }\n"
+    "}\n" };
+
+static const char* pipe_subgroup_readwrite_struct_kernel_code = {
+    "typedef struct{\n"
+    "char    a;\n"
+    "int    b;\n"
+    "}TestStruct;\n"
+    "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
+    "__kernel void test_pipe_subgroup_write_struct(__global TestStruct *src, __write_only pipe TestStruct out_pipe)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id; \n"
+    "\n"
+    "    res_id = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        write_pipe(out_pipe, res_id, get_sub_group_local_id(), &src[gid]);\n"
+    "        sub_group_commit_write_pipe(out_pipe, res_id);\n"
+    "    }\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_subgroup_read_struct(__read_only pipe TestStruct in_pipe, __global TestStruct *dst)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id; \n"
+    "\n"
+    "    res_id = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        read_pipe(in_pipe, res_id, get_sub_group_local_id(), &dst[gid]);\n"
+    "        sub_group_commit_read_pipe(in_pipe, res_id);\n"
+    "    }\n"
+    "}\n" };
+
+static const char* pipe_convenience_readwrite_struct_kernel_code = {
+    "typedef struct{\n"
+    "char    a;\n"
+    "int    b;\n"
+    "}TestStruct;\n"
+    "__kernel void test_pipe_convenience_write_struct(__global TestStruct *src, __write_only pipe TestStruct out_pipe)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    write_pipe(out_pipe, &src[gid]);\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_convenience_read_struct(__read_only pipe TestStruct in_pipe, __global TestStruct *dst)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    read_pipe(in_pipe, &dst[gid]);\n"
+    "}\n" };
+
+#endif //_KERNELS_H_
diff --git a/test_conformance/pipes/main.c b/test_conformance/pipes/main.c
new file mode 100644
index 00000000..e8597fbc
--- /dev/null
+++ b/test_conformance/pipes/main.c
@@ -0,0 +1,147 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+basefn  pipefn_list[] = {
+    test_pipe_readwrite_int,
+    test_pipe_readwrite_uint,
+    test_pipe_readwrite_long,
+    test_pipe_readwrite_ulong,
+    test_pipe_readwrite_short,
+    test_pipe_readwrite_ushort,
+    test_pipe_readwrite_float,
+    test_pipe_readwrite_half,
+    test_pipe_readwrite_char,
+    test_pipe_readwrite_uchar,
+    test_pipe_readwrite_double,
+    test_pipe_readwrite_struct,
+    test_pipe_workgroup_readwrite_int,
+    test_pipe_workgroup_readwrite_uint,
+    test_pipe_workgroup_readwrite_long,
+    test_pipe_workgroup_readwrite_ulong,
+    test_pipe_workgroup_readwrite_short,
+    test_pipe_workgroup_readwrite_ushort,
+    test_pipe_workgroup_readwrite_float,
+    test_pipe_workgroup_readwrite_half,
+    test_pipe_workgroup_readwrite_char,
+    test_pipe_workgroup_readwrite_uchar,
+    test_pipe_workgroup_readwrite_double,
+    test_pipe_workgroup_readwrite_struct,
+    test_pipe_subgroup_readwrite_int,
+    test_pipe_subgroup_readwrite_uint,
+    test_pipe_subgroup_readwrite_long,
+    test_pipe_subgroup_readwrite_ulong,
+    test_pipe_subgroup_readwrite_short,
+    test_pipe_subgroup_readwrite_ushort,
+    test_pipe_subgroup_readwrite_float,
+    test_pipe_subgroup_readwrite_half,
+    test_pipe_subgroup_readwrite_char,
+    test_pipe_subgroup_readwrite_uchar,
+    test_pipe_subgroup_readwrite_double,
+    test_pipe_subgroup_readwrite_struct,
+    test_pipe_convenience_readwrite_int,
+    test_pipe_convenience_readwrite_uint,
+    test_pipe_convenience_readwrite_long,
+    test_pipe_convenience_readwrite_ulong,
+    test_pipe_convenience_readwrite_short,
+    test_pipe_convenience_readwrite_ushort,
+    test_pipe_convenience_readwrite_float,
+    test_pipe_convenience_readwrite_half,
+    test_pipe_convenience_readwrite_char,
+    test_pipe_convenience_readwrite_uchar,
+    test_pipe_convenience_readwrite_double,
+    test_pipe_convenience_readwrite_struct,
+    test_pipe_info,
+    test_pipe_max_args,
+    test_pipe_max_packet_size,
+    test_pipe_max_active_reservations,
+    test_pipe_query_functions,
+    test_pipe_readwrite_errors,
+    test_pipe_subgroups_divergence
+};
+
+const char *pipefn_names[] = {
+    "pipe_readwrite_int",
+    "pipe_readwrite_uint",
+    "pipe_readwrite_long",
+    "pipe_readwrite_ulong",
+    "pipe_readwrite_short",
+    "pipe_readwrite_ushort",
+    "pipe_readwrite_float",
+    "pipe_readwrite_half",
+    "pipe_readwrite_char",
+    "pipe_readwrite_uchar",
+    "pipe_readwrite_double",
+    "pipe_readwrite_struct",
+    "pipe_workgroup_readwrite_int",
+    "pipe_workgroup_readwrite_uint",
+    "pipe_workgroup_readwrite_long",
+    "pipe_workgroup_readwrite_ulong",
+    "pipe_workgroup_readwrite_short",
+    "pipe_workgroup_readwrite_ushort",
+    "pipe_workgroup_readwrite_float",
+    "pipe_workgroup_readwrite_half",
+    "pipe_workgroup_readwrite_char",
+    "pipe_workgroup_readwrite_uchar",
+    "pipe_workgroup_readwrite_double",
+    "pipe_workgroup_readwrite_struct",
+    "pipe_subgroup_readwrite_int",
+    "pipe_subgroup_readwrite_uint",
+    "pipe_subgroup_readwrite_long",
+    "pipe_subgroup_readwrite_ulong",
+    "pipe_subgroup_readwrite_short",
+    "pipe_subgroup_readwrite_ushort",
+    "pipe_subgroup_readwrite_float",
+    "pipe_subgroup_readwrite_half",
+    "pipe_subgroup_readwrite_char",
+    "pipe_subgroup_readwrite_uchar",
+    "pipe_subgroup_readwrite_double",
+    "pipe_subgroup_readwrite_struct",
+    "pipe_convenience_readwrite_int",
+    "pipe_convenience_readwrite_uint",
+    "pipe_convenience_readwrite_long",
+    "pipe_convenience_readwrite_ulong",
+    "pipe_convenience_readwrite_short",
+    "pipe_convenience_readwrite_ushort",
+    "pipe_convenience_readwrite_float",
+    "pipe_convenience_readwrite_half",
+    "pipe_convenience_readwrite_char",
+    "pipe_convenience_readwrite_uchar",
+    "pipe_convenience_readwrite_double",
+    "pipe_convenience_readwrite_struct",
+    "pipe_info",
+    "pipe_max_args",
+    "pipe_max_packet_size",
+    "pipe_max_active_reservations",
+    "pipe_query_functions",
+    "pipe_readwrite_errors",
+    "pipe_subgroups_divergence",
+};
+
+ct_assert((sizeof(pipefn_names) / sizeof(pipefn_names[0])) == (sizeof(pipefn_list) / sizeof(pipefn_list[0])));
+
+int num_pipefns = sizeof(pipefn_names) / sizeof(char *);
+
+int main( int argc, const char *argv[] )
+{
+    return runTestHarness( argc, argv, num_pipefns, pipefn_list, pipefn_names,
+                           false, false, 0 );
+}
diff --git a/test_conformance/pipes/procs.h b/test_conformance/pipes/procs.h
new file mode 100644
index 00000000..39c1e5c4
--- /dev/null
+++ b/test_conformance/pipes/procs.h
@@ -0,0 +1,92 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __PROCS_H__
+#define __PROCS_H__
+
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/mt19937.h"
+#include "../../test_common/harness/conversions.h"
+
+#ifndef __APPLE__
+#include <CL/cl.h>
+#endif
+
+extern int      test_pipe_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+extern int        test_pipe_workgroup_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_workgroup_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+extern int        test_pipe_subgroup_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_subgroup_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+extern int        test_pipe_convenience_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int      test_pipe_convenience_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+
+extern int      test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int        test_pipe_max_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+#endif    // #ifndef __PROCS_H__
+
diff --git a/test_conformance/pipes/test_pipe_info.c b/test_conformance/pipes/test_pipe_info.c
new file mode 100644
index 00000000..5d3e3a49
--- /dev/null
+++ b/test_conformance/pipes/test_pipe_info.c
@@ -0,0 +1,109 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+
+const char* pipe_kernel_code = {
+    "__kernel void pipe_kernel(__write_only pipe int out_pipe)\n"
+    "{}\n" };
+
+int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem pipe;
+    cl_int err;
+    cl_uint pipe_width = 512;
+    cl_uint pipe_depth = 1024;
+    cl_uint returnVal;
+    cl_program  program;
+    cl_kernel   kernel;
+
+    pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, pipe_width, pipe_depth, NULL, &err);
+    test_error(err, "clCreatePipe failed.");
+
+    err = clGetPipeInfo(pipe, CL_PIPE_PACKET_SIZE, sizeof(pipe_width), (void *)&returnVal, NULL);
+    if ( err )
+    {
+        log_error( "Error calling clGetPipeInfo(): %d\n", err );
+        clReleaseMemObject(pipe);
+        return -1;
+    }
+
+    if(pipe_width != returnVal)
+    {
+        log_error( "Error in clGetPipeInfo() check of pipe packet size\n" );
+        clReleaseMemObject(pipe);
+        return -1;
+    }
+    else
+    {
+        log_info( " CL_PIPE_PACKET_SIZE passed.\n" );
+    }
+
+    err = clGetPipeInfo(pipe, CL_PIPE_MAX_PACKETS, sizeof(pipe_depth), (void *)&returnVal, NULL);
+    if ( err )
+    {
+        log_error( "Error calling clGetPipeInfo(): %d\n", err );
+        clReleaseMemObject(pipe);
+        return -1;
+    }
+
+    if(pipe_depth != returnVal)
+    {
+        log_error( "Error in clGetPipeInfo() check of pipe max packets\n" );
+        clReleaseMemObject(pipe);
+        return -1;
+    }
+    else
+    {
+        log_info( " CL_PIPE_MAX_PACKETS passed.\n" );
+    }
+
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, (const char**)&pipe_kernel_code, "pipe_kernel", "-cl-std=CL2.0 -cl-kernel-arg-info");
+    if(err)
+    {
+        clReleaseMemObject(pipe);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+
+    cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
+    cl_kernel_arg_type_qualifier expected_type_qualifier = CL_KERNEL_ARG_TYPE_PIPE;
+    err = clGetKernelArgInfo( kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(arg_type_qualifier), &arg_type_qualifier, NULL );
+    if(err)
+    {
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        print_error(err, "clSetKernelArg failed\n");
+        return -1;
+    }
+    err = (arg_type_qualifier != expected_type_qualifier);
+    if(err)
+    {
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel);
+        clReleaseProgram(program);
+        print_error(err, "ERROR: Bad type qualifier\n");
+        return -1;
+    }
+
+    // cleanup
+    clReleaseMemObject(pipe);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+
+    return err;
+
+}
diff --git a/test_conformance/pipes/test_pipe_limits.c b/test_conformance/pipes/test_pipe_limits.c
new file mode 100644
index 00000000..2c3ad581
--- /dev/null
+++ b/test_conformance/pipes/test_pipe_limits.c
@@ -0,0 +1,1085 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#define STRING_LENGTH  1024
+
+void createKernelSourceCode(char *source, int num_pipes)
+{
+    int i;
+    char str[256];
+    int str_length;
+
+    strcpy(source, "__kernel void test_multiple_pipe_write(__global int *src, ");
+
+    for(i = 0; i < num_pipes; i++) {
+        sprintf(str, "__write_only pipe int pipe%d, ", i);
+        strcat(source, str);
+    }
+    sprintf(str, "int num_pipes ) \n{\n  int gid = get_global_id(0);\n  reserve_id_t res_id;\n\n");
+    strcat(source, str);
+    sprintf(str, "  if(gid < (get_global_size(0))/num_pipes)\n  {\n    res_id = reserve_write_pipe(pipe0, 1);\n    if(is_valid_reserve_id(res_id))\n    {\n");
+    strcat(source, str);
+    sprintf(str, "      write_pipe(pipe0, res_id, 0, &src[gid]);\n      commit_write_pipe(pipe0, res_id);\n    }\n  }\n");
+    strcat(source, str);
+    for(i = 1; i < num_pipes; i++){
+        sprintf(str, "  else if(gid < (%d*get_global_size(0))/num_pipes)\n  {\n    res_id = reserve_write_pipe(pipe%d, 1);\n    if(is_valid_reserve_id(res_id))\n    {\n", i+1, i);
+        strcat(source, str);
+        sprintf(str, "      write_pipe(pipe%d, res_id, 0, &src[gid]);\n      commit_write_pipe(pipe%d, res_id);\n    }\n  }\n", i, i);
+        strcat(source, str);
+    }
+    strcat(source, "}\n\n__kernel void test_multiple_pipe_read(__global int *dst, ");
+
+    for(i = 0; i < num_pipes; i++) {
+        sprintf(str, "__read_only pipe int pipe%d, ", i);
+        strcat(source, str);
+    }
+    sprintf(str, "int num_pipes ) \n{\n  int gid = get_global_id(0);\n  reserve_id_t res_id;\n\n");
+    strcat(source, str);
+    sprintf(str, "  if(gid < (get_global_size(0))/num_pipes)\n  {\n    res_id = reserve_read_pipe(pipe0, 1);\n    if(is_valid_reserve_id(res_id))\n    {\n");
+    strcat(source, str);
+    sprintf(str, "      read_pipe(pipe0, res_id, 0, &dst[gid]);\n      commit_read_pipe(pipe0, res_id);\n    }\n  }\n");
+    strcat(source, str);
+    for(i = 1; i < num_pipes; i++){
+        sprintf(str, "  else if(gid < (%d*get_global_size(0))/num_pipes)\n  {\n    res_id = reserve_read_pipe(pipe%d, 1);\n    if(is_valid_reserve_id(res_id))\n    {\n", i+1, i);
+        strcat(source, str);
+        sprintf(str, "      read_pipe(pipe%d, res_id, 0, &dst[gid]);\n      commit_read_pipe(pipe%d, res_id);\n    }\n  }\n", i, i);
+        strcat(source, str);
+    }
+    strcat(source, "}");
+
+    str_length = strlen(source);
+    assert(str_length <= STRING_LENGTH*num_pipes);
+}
+
+static int verify_result(void *ptr1, void *ptr2, int n)
+{
+    int     i;
+    int        sum_input = 0, sum_output = 0;
+    cl_char    *inptr = (cl_char *)ptr1;
+    cl_char    *outptr = (cl_char *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_result_int(void *ptr1, void *ptr2, int n)
+{
+    int     i;
+    int        sum_input = 0, sum_output = 0;
+    cl_int    *inptr = (cl_int *)ptr1;
+    cl_int    *outptr = (cl_int *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+int test_pipe_max_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+
+    cl_mem        pipes[1024];
+    cl_mem      buffers[2];
+    void        *outptr;
+    cl_int        *inptr;
+    cl_program  program;
+    cl_kernel   kernel[2];
+    size_t      global_work_size[3];
+    cl_int      err;
+    cl_int        size;
+    int            num_pipe_elements = 1024;
+    int         i, j;
+    int            max_pipe_args;
+    char        *source;
+    cl_event    producer_sync_event = NULL;
+    cl_event    consumer_sync_event = NULL;
+    MTdata      d = init_genrand( gRandomSeed );
+    const char*    kernelName[] = {"test_multiple_pipe_write", "test_multiple_pipe_read"};
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PIPE_ARGS, sizeof(max_pipe_args), (void*)&max_pipe_args, NULL);
+    if(err){
+        print_error(err, " clGetDeviceInfo failed\n");
+        return -1;
+    }
+    if(max_pipe_args < 16){
+        log_error("The device should support minimum 16 pipe objects that could be passed as arguments to the kernel");
+        return -1;
+    }
+
+    global_work_size[0] = (cl_uint)num_pipe_elements * max_pipe_args;
+    size = sizeof(int) * num_pipe_elements * max_pipe_args;
+    source = (char *)malloc(STRING_LENGTH * sizeof(char) * max_pipe_args);
+
+    inptr = (cl_int *)align_malloc(size, min_alignment);
+
+    for(i = 0; i < num_pipe_elements * max_pipe_args; i++){
+        inptr[i] = (int)genrand_int32(d);
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        free(source);
+        print_error(err, " clCreateBuffer failed\n");
+        return -1;
+    }
+    outptr = align_malloc(size, min_alignment);
+    buffers[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,  size, outptr, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr );
+        free(source);
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    for(i = 0; i < max_pipe_args; i++){
+        pipes[i] = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_pipe_elements, NULL, &err);
+        if(err){
+            clReleaseMemObject(buffers[0]);
+            clReleaseMemObject(buffers[1]);
+            align_free( outptr );
+            free(source);
+            for(j = 0; j < i; j++) {
+                clReleaseMemObject(pipes[j]);
+            }
+            print_error(err, " clCreatePipe failed\n");
+            return -1;
+        }
+    }
+
+    createKernelSourceCode(source, max_pipe_args);
+
+    // Create producer kernel
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&source, kernelName[0], "-cl-std=CL2.0");
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        align_free(outptr);
+        free(source);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+    //Create consumer kernel
+    kernel[1] = clCreateKernel(program, kernelName[1], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        align_free(outptr);
+        free(source);
+        print_error(err, " Error creating kernel\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
+    for( i = 0; i < max_pipe_args; i++){
+        err |= clSetKernelArg(kernel[0], i+1, sizeof(cl_mem), (void*)&pipes[i]);
+    }
+    err |= clSetKernelArg(kernel[0], max_pipe_args + 1, sizeof(int), (void*)&max_pipe_args);
+    err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&buffers[1]);
+    for( i = 0; i < max_pipe_args; i++){
+        err |= clSetKernelArg(kernel[1], i+1, sizeof(cl_mem), (void*)&pipes[i]);
+    }
+    err |= clSetKernelArg(kernel[1], max_pipe_args + 1, sizeof(int), (void*)&max_pipe_args);
+    if ( err != CL_SUCCESS ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        print_error(err, " clSetKernelArg failed");
+        return -1;
+    }
+
+    // Launch Producer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+
+    // Launch Consumer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr, 1, &consumer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+
+    err = clWaitForEvents(1, &consumer_sync_event);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clWaitForEvents failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+
+    if( verify_result( inptr, outptr, num_pipe_elements*sizeof(cl_int))){
+        log_error("test_pipe_max_args failed\n");
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        for(j = 0; j < max_pipe_args; j++) {
+            clReleaseMemObject(pipes[j]);
+        }
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+    else {
+        log_info("test_pipe_max_args passed\n");
+    }
+    //cleanup
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    for(j = 0; j < max_pipe_args; j++) {
+        clReleaseMemObject(pipes[j]);
+    }
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseEvent(producer_sync_event);
+    clReleaseEvent(consumer_sync_event);
+    clReleaseProgram(program);
+    align_free(outptr);
+    free(source);
+
+    return 0;
+}
+
+
+int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem        pipe;
+    cl_mem      buffers[2];
+    void        *outptr;
+    cl_char        *inptr;
+    cl_program  program;
+    cl_kernel   kernel[2];
+    size_t      global_work_size[3];
+    cl_int      err;
+    size_t        size;
+    int            num_pipe_elements = 1024;
+    int         i;
+    cl_uint        max_pipe_packet_size;
+    char        *source;
+    char        str[256];
+    int            str_length;
+    cl_event    producer_sync_event = NULL;
+    cl_event    consumer_sync_event = NULL;
+    MTdata      d = init_genrand( gRandomSeed );
+    const char*    kernelName[] = {"test_pipe_max_packet_size_write", "test_pipe_max_packet_size_read"};
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_pipe_elements;
+
+    source = (char*)malloc(STRING_LENGTH*sizeof(char));
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_PACKET_SIZE, sizeof(max_pipe_packet_size), (void*)&max_pipe_packet_size, NULL);
+    if(err){
+        print_error(err, " clGetDeviceInfo failed\n");
+        return -1;
+    }
+    if(max_pipe_packet_size < 1024){
+        log_error("The device should support minimum packet size of 1024 bytes");
+        return -1;
+    }
+
+    if(max_pipe_packet_size > (32*1024*1024/num_pipe_elements))
+    {
+        max_pipe_packet_size = 32*1024*1024/num_pipe_elements;
+    }
+
+    size = max_pipe_packet_size * num_pipe_elements;
+
+    inptr = (cl_char *)align_malloc(size, min_alignment);
+
+    for(i = 0; i < size; i++){
+        inptr[i] = (char)genrand_int32(d);
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        free(source);
+        print_error(err, " clCreateBuffer failed\n");
+        return -1;
+    }
+    outptr = align_malloc(size, min_alignment);
+    buffers[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,  size, outptr, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr );
+        free(source);
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, max_pipe_packet_size, num_pipe_elements, NULL, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr );
+        free(source);
+        clReleaseMemObject(pipe);
+        print_error(err, " clCreatePipe failed\n");
+        return -1;
+    }
+
+    sprintf(str, "typedef struct{\n  char a[%d];\n}TestStruct;\n\n__kernel void test_pipe_max_packet_size_write(__global TestStruct *src, __write_only pipe TestStruct out_pipe)\n{\n", max_pipe_packet_size);
+    strcpy(source,str);
+    strcat(source, "  int gid = get_global_id(0);\n  reserve_id_t res_id;\n\n");
+    sprintf(str, "  res_id = reserve_write_pipe(out_pipe, 1);\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    write_pipe(out_pipe, res_id, 0, &src[gid]);\n    commit_write_pipe(out_pipe, res_id);\n  }\n}\n\n");
+    strcat(source, str);
+    sprintf(str, "__kernel void test_pipe_max_packet_size_read(__read_only pipe TestStruct in_pipe, __global TestStruct *dst)\n{\n");
+    strcat(source, str);
+    strcat(source, "  int gid = get_global_id(0);\n  reserve_id_t res_id;\n\n");
+    sprintf(str, "  res_id = reserve_read_pipe(in_pipe, 1);\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    read_pipe(in_pipe, res_id, 0, &dst[gid]);\n    commit_read_pipe(in_pipe, res_id);\n  }\n}\n\n");
+    strcat(source, str);
+
+    str_length = strlen(source);
+    assert(str_length <= STRING_LENGTH);
+
+    // Create producer kernel
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&source, kernelName[0], "-cl-std=CL2.0");
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        free(source);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+    //Create consumer kernel
+    kernel[1] = clCreateKernel(program, kernelName[1], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        free(source);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
+    if ( err != CL_SUCCESS ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        print_error(err, " clSetKernelArg failed");
+        return -1;
+    }
+
+    // Launch Producer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+
+    // Launch Consumer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr, 1, &consumer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+
+    if( verify_result( inptr, outptr, size)){
+        log_error("test_pipe_max_packet_size failed\n");
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        free(source);
+        return -1;
+    }
+    else {
+        log_info("test_pipe_max_packet_size passed\n");
+    }
+    //cleanup
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(pipe);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseEvent(producer_sync_event);
+    clReleaseEvent(consumer_sync_event);
+    clReleaseProgram(program);
+    align_free(outptr);
+    free(source);
+
+    return 0;
+}
+
+int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem        pipe;
+    cl_mem      buffers[2];
+    cl_mem        buf_reservations;
+    cl_mem        buf_status;
+    cl_mem        buf_reserve_id_t_size;
+    cl_mem        buf_reserve_id_t_size_aligned;
+    cl_int      *inptr;
+    void        *outptr;
+    int            size, i;
+    cl_program  program;
+    cl_kernel   kernel[3];
+    size_t      global_work_size[3];
+    cl_int      err;
+    int            status = 0;
+    cl_uint        max_active_reservations = 0;
+    cl_ulong    max_global_size = 0;
+    int            reserve_id_t_size;
+    int            temp;
+    char        *source;
+    char        str[256];
+    int            str_length;
+    cl_event    sync_event = NULL;
+    cl_event    read_event = NULL;
+    MTdata      d = init_genrand( gRandomSeed );
+    const char*    kernelName[3] = {"test_pipe_max_active_reservations_write", "test_pipe_max_active_reservations_read", "pipe_get_reserve_id_t_size"};
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    source = (char*)malloc(2*STRING_LENGTH*sizeof(char));
+
+    global_work_size[0] = 1;
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, sizeof(max_active_reservations), (void*)&max_active_reservations, NULL);
+    if(err){
+        print_error(err, " clGetDeviceInfo failed\n");
+        return -1;
+    }
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_size), (void*)&max_global_size, NULL);
+    if(err){
+        print_error(err, " clGetDeviceInfo failed\n");
+        return -1;
+    }
+
+    max_active_reservations = (max_active_reservations > max_global_size) ? 1<<16 : max_active_reservations;
+
+    if(max_active_reservations < 1){
+        log_error("The device should support minimum active reservations of 1");
+        return -1;
+    }
+
+    // To get reserve_id_t size
+    buf_reserve_id_t_size = clCreateBuffer(context, CL_MEM_HOST_READ_ONLY, sizeof(reserve_id_t_size), NULL, &err);
+    if ( err ){
+        clReleaseMemObject(buf_reserve_id_t_size);
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    sprintf(str, "__kernel void test_pipe_max_active_reservations_write(__global int *src, __write_only pipe int out_pipe, __global char *reserve_id, __global int *reserve_id_t_size_aligned, __global int *status)\n{\n");
+    strcpy(source,str);
+    sprintf(str, "  __global reserve_id_t *res_id_ptr;\n  int reserve_idx;\n  int commit_idx;\n");
+    strcat(source, str);
+    sprintf(str, "  for(reserve_idx = 0; reserve_idx < %d; reserve_idx++)\n  {\n", max_active_reservations);
+    strcat(source, str);
+    sprintf(str, "    res_id_ptr = (__global reserve_id_t*)(reserve_id + reserve_idx*reserve_id_t_size_aligned[0]);\n");
+    strcat(source, str);
+    sprintf(str, "    *res_id_ptr = reserve_write_pipe(out_pipe, 1);\n");
+    strcat(source, str);
+    sprintf(str, "    if(is_valid_reserve_id(res_id_ptr[0]))\n    {\n      write_pipe(out_pipe, res_id_ptr[0], 0, &src[reserve_idx]);\n    }\n");
+    strcat(source, str);
+    sprintf(str, "    else\n    {\n      *status = -1;\n      return;\n    }\n  }\n");
+    strcat(source, str);
+    sprintf(str, "  for(commit_idx = 0; commit_idx < %d; commit_idx++)\n  {\n", max_active_reservations);
+    strcat(source, str);
+    sprintf(str, "    res_id_ptr = (__global reserve_id_t*)(reserve_id + commit_idx*reserve_id_t_size_aligned[0]);\n");
+    strcat(source, str);
+    sprintf(str, "    commit_write_pipe(out_pipe, res_id_ptr[0]);\n  }\n}\n\n");
+    strcat(source, str);
+    sprintf(str, "__kernel void test_pipe_max_active_reservations_read(__read_only pipe int in_pipe, __global int *dst, __global char *reserve_id, __global int *reserve_id_t_size_aligned, __global int *status)\n{\n");
+    strcat(source, str);
+    sprintf(str, "  __global reserve_id_t *res_id_ptr;\n  int reserve_idx;\n  int commit_idx;\n");
+    strcat(source, str);
+    sprintf(str, "  for(reserve_idx = 0; reserve_idx < %d; reserve_idx++)\n  {\n", max_active_reservations);
+    strcat(source, str);
+    sprintf(str, "    res_id_ptr = (__global reserve_id_t*)(reserve_id + reserve_idx*reserve_id_t_size_aligned[0]);\n");
+    strcat(source, str);
+    sprintf(str, "    *res_id_ptr = reserve_read_pipe(in_pipe, 1);\n");
+    strcat(source, str);
+    sprintf(str, "    if(is_valid_reserve_id(res_id_ptr[0]))\n    {\n      read_pipe(in_pipe, res_id_ptr[0], 0, &dst[reserve_idx]);\n    }\n");
+    strcat(source, str);
+    sprintf(str, "    else\n    {\n      *status = -1;\n      return;\n    }\n  }\n");
+    strcat(source, str);
+    sprintf(str, "  for(commit_idx = 0; commit_idx < %d; commit_idx++)\n  {\n", max_active_reservations);
+    strcat(source, str);
+    sprintf(str, "    res_id_ptr = (__global reserve_id_t*)(reserve_id + commit_idx*reserve_id_t_size_aligned[0]);\n");
+    strcat(source, str);
+    sprintf(str, "    commit_read_pipe(in_pipe, res_id_ptr[0]);\n  }\n}\n\n");
+    strcat(source, str);
+    sprintf(str, "__kernel void pipe_get_reserve_id_t_size(__global int *reserve_id_t_size) \n");
+    strcat(source, str);
+    sprintf(str, "{\n  *reserve_id_t_size = sizeof(reserve_id_t);\n}\n");
+    strcat(source, str);
+
+    str_length = strlen(source);
+    assert(str_length <= 2*STRING_LENGTH);
+
+    // Create producer kernel
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&source, kernelName[0], "-cl-std=CL2.0");
+    if(err){
+        clReleaseMemObject(buf_reserve_id_t_size);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+
+    // Create consumer kernel
+    kernel[1] = clCreateKernel(program, kernelName[1], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buf_reserve_id_t_size);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+
+    // Create size query kernel for reserve_id_t
+    kernel[2] = clCreateKernel(program, kernelName[2], &err);
+    if( kernel[2] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buf_reserve_id_t_size);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+    err = clSetKernelArg(kernel[2], 0, sizeof(cl_mem), (void*)&buf_reserve_id_t_size);
+    if(err){
+        clReleaseMemObject(buf_reserve_id_t_size);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+    //Launch size query kernel for reserve_id_t
+    err = clEnqueueNDRangeKernel( queue, kernel[2], 1, NULL, global_work_size, NULL, 0, NULL, &sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buf_reserve_id_t_size, true, 0, sizeof(reserve_id_t_size), &reserve_id_t_size, 1, &sync_event, &read_event);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    err = clWaitForEvents(1, &read_event);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clWaitForEvents failed" );
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseEvent(read_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    // Round reserve_id_t_size to the nearest power of 2
+    temp = 1;
+    while(temp < reserve_id_t_size)
+        temp *= 2;
+    reserve_id_t_size = temp;
+
+    size = sizeof(cl_int) * max_active_reservations;
+    inptr = (cl_int *)align_malloc(size, min_alignment);
+
+    for(i = 0; i < max_active_reservations; i++){
+        inptr[i] = (int)genrand_int32(d);
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
+    if ( err ){
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buffers[0]);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseEvent(read_event);
+        clReleaseProgram(program);
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    outptr = align_malloc(size, min_alignment);
+    buffers[1] = clCreateBuffer(context, CL_MEM_HOST_READ_ONLY, size, NULL, &err);
+    if ( err ){
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseEvent(read_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    buf_reserve_id_t_size_aligned = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(reserve_id_t_size), &reserve_id_t_size, &err);
+    if ( err ){
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseEvent(read_event);
+        clReleaseProgram(program);
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    //For error status
+    buf_status = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  sizeof(int), &status, &err);
+    if ( err ){
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseEvent(read_event);
+        clReleaseProgram(program);
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), max_active_reservations, NULL, &err);
+    if(err){
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseEvent(read_event);
+        clReleaseProgram(program);
+        print_error(err, " clCreatePipe failed\n");
+        return -1;
+    }
+
+    // Global buffer to hold all active reservation ids
+    buf_reservations = clCreateBuffer(context, CL_MEM_HOST_NO_ACCESS, reserve_id_t_size*max_active_reservations, NULL, &err);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clCreateBuffer failed" );
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseEvent(read_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), (void*)&buf_reservations);
+    err |= clSetKernelArg(kernel[0], 3, sizeof(cl_mem), (void*)&buf_reserve_id_t_size_aligned);
+    err |= clSetKernelArg(kernel[0], 4, sizeof(cl_mem), (void*)&buf_status);
+    if ( err != CL_SUCCESS ){
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseProgram(program);
+        print_error(err, " clSetKernelArg failed");
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof(cl_mem), (void*)&buf_reservations);
+    err |= clSetKernelArg(kernel[1], 3, sizeof(cl_mem), (void*)&buf_reserve_id_t_size_aligned);
+    err |= clSetKernelArg(kernel[1], 4, sizeof(cl_mem), (void*)&buf_status);
+    if ( err != CL_SUCCESS ){
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseProgram(program);
+        print_error(err, " clSetKernelArg failed");
+        return -1;
+    }
+
+    // Launch Producer kernel
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &sync_event);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buf_status, true, 0, sizeof(int), &status, 1, &sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    if(status != 0)
+    {
+        log_error("test_pipe_max_active_reservations failed\n");
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    // Launch Consumer kernel
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, global_work_size, NULL, 0, NULL, &sync_event);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buf_status, true, 0, sizeof(int), &status, 1, &sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    if(status != 0)
+    {
+        log_error("test_pipe_max_active_reservations failed\n");
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr, 1, &sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+
+    if( verify_result_int( inptr, outptr, max_active_reservations)){
+        log_error("test_pipe_max_active_reservations failed\n");
+        clReleaseMemObject(buf_status);
+        clReleaseMemObject(buf_reserve_id_t_size);
+        clReleaseMemObject(buf_reserve_id_t_size_aligned);
+        clReleaseMemObject(buf_reservations);
+        clReleaseMemObject(pipe);
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free(outptr);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(sync_event);
+        clReleaseProgram(program);
+        return -1;
+    }
+    else {
+        log_info("test_pipe_max_active_reservations passed\n");
+    }
+
+    //cleanup
+    clReleaseMemObject(buf_status);
+    clReleaseMemObject(buf_reserve_id_t_size);
+    clReleaseMemObject(buf_reserve_id_t_size_aligned);
+    clReleaseMemObject(buf_reservations);
+    clReleaseMemObject(pipe);
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    align_free(outptr);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseEvent(sync_event);
+    clReleaseProgram(program);
+    return 0;
+}
\ No newline at end of file
diff --git a/test_conformance/pipes/test_pipe_query_functions.c b/test_conformance/pipes/test_pipe_query_functions.c
new file mode 100644
index 00000000..ce3baafd
--- /dev/null
+++ b/test_conformance/pipes/test_pipe_query_functions.c
@@ -0,0 +1,544 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#define TEST_PRIME_INT        ((1<<16)+1)
+
+const char* pipe_query_functions_kernel_code = {
+    "__kernel void test_pipe_write(__global int *src, __write_only pipe int out_pipe)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id;\n"
+    "    res_id = reserve_write_pipe(out_pipe, 1);\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        write_pipe(out_pipe, res_id, 0, &src[gid]);\n"
+    "        commit_write_pipe(out_pipe, res_id);\n"
+    "    }\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_query_functions(__write_only pipe int out_pipe, __global int *num_packets, __global int *max_packets)\n"
+    "{\n"
+    "    *max_packets = get_pipe_max_packets(out_pipe);\n"
+    "    *num_packets = get_pipe_num_packets(out_pipe);\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_read(__read_only pipe int in_pipe, __global int *dst)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id;\n"
+    "    res_id = reserve_read_pipe(in_pipe, 1);\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        read_pipe(in_pipe, res_id, 0, &dst[gid]);\n"
+    "        commit_read_pipe(in_pipe, res_id);\n"
+    "    }\n"
+    "}\n" };
+
+static int verify_result(void *ptr1, void *ptr2, int n)
+{
+    int     i, sum_output = 0;
+    cl_int    *outptr1 = (int *)ptr1;
+    cl_int    *outptr2 = (int *)ptr2;
+    int        cmp_val = ((n*3)/2) * TEST_PRIME_INT;
+
+    for(i = 0; i < n/2; i++)
+    {
+        sum_output += outptr1[i];
+    }
+    for(i = 0; i < n; i++)
+    {
+        sum_output += outptr2[i];
+    }
+    if(sum_output != cmp_val){
+        return -1;
+    }
+    return 0;
+}
+
+int test_pipe_query_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem        pipe;
+    cl_mem      buffers[4];
+    void        *outptr1;
+    void        *outptr2;
+    cl_int        *inptr;
+    cl_program  program;
+    cl_kernel   kernel[3];
+    size_t      global_work_size[3];
+    size_t        half_global_work_size[3];
+    size_t        global_work_size_pipe_query[3];
+    cl_int        pipe_max_packets, pipe_num_packets;
+    cl_int      err;
+    cl_int        size;
+    cl_int      i;
+    cl_event    producer_sync_event = NULL;
+    cl_event    consumer_sync_event = NULL;
+    cl_event    pipe_query_sync_event = NULL;
+    cl_event    pipe_read_sync_event = NULL;
+    MTdata      d = init_genrand( gRandomSeed );
+    const char*    kernelName[] = {"test_pipe_write", "test_pipe_read", "test_pipe_query_functions"};
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    size = sizeof(int) * num_elements;
+    global_work_size[0] = (cl_uint)num_elements;
+    half_global_work_size[0] = (cl_uint)(num_elements / 2);
+    global_work_size_pipe_query[0] = 1;
+
+    inptr = (int *)align_malloc(size, min_alignment);
+
+    for(i = 0; i < num_elements; i++){
+        inptr[i] = TEST_PRIME_INT;
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        print_error(err, " clCreateBuffer failed\n");
+        return -1;
+    }
+    outptr1 = align_malloc(size/2, min_alignment);
+    outptr2 = align_malloc(size, min_alignment);
+    buffers[1] = clCreateBuffer(context, CL_MEM_HOST_READ_ONLY,  size, NULL, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr1 );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    buffers[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,  sizeof(int), NULL, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        align_free( outptr1 );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    buffers[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,  sizeof(int), NULL, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        align_free( outptr1 );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_elements, NULL, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        align_free( outptr1 );
+        clReleaseMemObject(pipe);
+        print_error(err, " clCreatePipe failed\n");
+        return -1;
+    }
+
+    // Create producer kernel
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_query_functions_kernel_code, kernelName[0], "-cl-std=CL2.0");
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        align_free(outptr1);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+    //Create consumer kernel
+    kernel[1] = clCreateKernel(program, kernelName[1], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        align_free(outptr1);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+    //Create pipe query functions kernel
+    kernel[2] = clCreateKernel(program, kernelName[2], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        align_free(outptr1);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
+    err |= clSetKernelArg(kernel[2], 0, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[2], 1, sizeof(cl_mem), (void*)&buffers[2]);
+    err |= clSetKernelArg(kernel[2], 2, sizeof(cl_mem), (void*)&buffers[3]);
+
+    if ( err != CL_SUCCESS ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        print_error(err, " clSetKernelArg failed\n");
+        return -1;
+    }
+
+    // Launch Producer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    // Launch Pipe query kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[2], 1, NULL, global_work_size_pipe_query, NULL, 1, &producer_sync_event, &pipe_query_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(cl_int), &pipe_num_packets, 1, &pipe_query_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+
+    err = clEnqueueReadBuffer(queue, buffers[3], true, 0, sizeof(cl_int), &pipe_max_packets, 1, &pipe_query_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    if(pipe_num_packets != num_elements || pipe_max_packets != num_elements)
+    {
+        log_error("test_pipe_query_functions failed\n");
+        return -1;
+    }
+
+    // Launch Consumer kernel with half the previous global size
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, half_global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size / 2, outptr1, 1, &consumer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    // Launch Pipe query kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[2], 1, NULL, global_work_size_pipe_query, NULL, 1, &consumer_sync_event, &pipe_query_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(cl_int), &pipe_num_packets, 1, &pipe_query_sync_event, &pipe_read_sync_event);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    // After consumer kernel consumes num_elements/2 from the pipe,
+    // there are (num_elements - num_elements/2) remaining package in the pipe.
+    if(pipe_num_packets != (num_elements - num_elements/2))
+    {
+        log_error("test_pipe_query_functions failed\n");
+        return -1;
+    }
+
+    // Launch Producer kernel to fill the pipe again
+    global_work_size[0] = pipe_num_packets;
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 1, &pipe_read_sync_event, &producer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    // Launch Pipe query kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[2], 1, NULL, global_work_size_pipe_query, NULL, 1, &producer_sync_event, &pipe_query_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(cl_int), &pipe_num_packets, 1, &pipe_query_sync_event, &pipe_read_sync_event);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    if(pipe_num_packets != num_elements)
+    {
+        log_error("test_pipe_query_functions failed\n");
+        return -1;
+    }
+
+    // Launch Consumer kernel to consume all packets from pipe
+    global_work_size[0] = pipe_num_packets;
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &pipe_read_sync_event, &consumer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr2, 1, &consumer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed\n" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(buffers[3]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseKernel(kernel[2]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseEvent(pipe_query_sync_event);
+        clReleaseEvent(pipe_read_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr1);
+        return -1;
+    }
+
+
+    if( verify_result(outptr1, outptr2, num_elements )){
+        log_error("test_pipe_query_functions failed\n");
+        return -1;
+    }
+    else {
+        log_info("test_pipe_query_functions passed\n");
+    }
+    //cleanup
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseMemObject(buffers[3]);
+    clReleaseMemObject(pipe);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseKernel(kernel[2]);
+    clReleaseEvent(producer_sync_event);
+    clReleaseEvent(consumer_sync_event);
+    clReleaseEvent(pipe_query_sync_event);
+    clReleaseEvent(pipe_read_sync_event);
+    clReleaseProgram(program);
+    align_free(outptr1);
+
+    return 0;
+}
+
diff --git a/test_conformance/pipes/test_pipe_read_write.c b/test_conformance/pipes/test_pipe_read_write.c
new file mode 100644
index 00000000..9191aa0f
--- /dev/null
+++ b/test_conformance/pipes/test_pipe_read_write.c
@@ -0,0 +1,1809 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <assert.h>
+
+#include "procs.h"
+#include "kernels.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#ifndef uchar
+typedef unsigned char uchar;
+#endif
+
+typedef struct{
+    char    a;
+    int        b;
+} TestStruct;
+
+#define STRING_LENGTH  1024
+
+static int useWorkgroupReserve = 0;
+static int useSubgroupReserve = 0;
+static int useConvenienceBuiltIn = 0;
+
+static const char* int_kernel_name[] = { "test_pipe_write_int", "test_pipe_read_int", "test_pipe_write_int2", "test_pipe_read_int2", "test_pipe_write_int4", "test_pipe_read_int4", "test_pipe_write_int8", "test_pipe_read_int8", "test_pipe_write_int16", "test_pipe_read_int16" };
+static const char* uint_kernel_name[] = { "test_pipe_write_uint", "test_pipe_read_uint", "test_pipe_write_uint2", "test_pipe_read_uint2", "test_pipe_write_uint4", "test_pipe_read_uint4", "test_pipe_write_uint8", "test_pipe_read_uint8", "test_pipe_write_uint16", "test_pipe_read_uint16" };
+static const char* long_kernel_name[] = { "test_pipe_write_long", "test_pipe_read_long", "test_pipe_write_long2", "test_pipe_read_long2", "test_pipe_write_long4", "test_pipe_read_long4", "test_pipe_write_long8", "test_pipe_read_long8", "test_pipe_write_long16", "test_pipe_read_long16" };
+static const char* ulong_kernel_name[] = { "test_pipe_write_ulong", "test_pipe_read_ulong", "test_pipe_write_ulong2", "test_pipe_read_ulong2", "test_pipe_write_ulong4", "test_pipe_read_ulong4", "test_pipe_write_ulong8", "test_pipe_read_ulong8", "test_pipe_write_ulong16", "test_pipe_read_ulong16" };
+static const char* char_kernel_name[] = { "test_pipe_write_char", "test_pipe_read_char", "test_pipe_write_char2", "test_pipe_read_char2", "test_pipe_write_char4", "test_pipe_read_char4", "test_pipe_write_char8", "test_pipe_read_char8", "test_pipe_write_char16", "test_pipe_read_char16" };
+static const char* uchar_kernel_name[] = { "test_pipe_write_uchar", "test_pipe_read_uchar", "test_pipe_write_uchar2", "test_pipe_read_uchar2", "test_pipe_write_uchar4", "test_pipe_read_uchar4", "test_pipe_write_uchar8", "test_pipe_read_uchar8", "test_pipe_write_uchar16", "test_pipe_read_uchar16" };
+static const char* short_kernel_name[] = { "test_pipe_write_short", "test_pipe_read_short", "test_pipe_write_short2", "test_pipe_read_short2", "test_pipe_write_short4", "test_pipe_read_short4", "test_pipe_write_short8", "test_pipe_read_short8", "test_pipe_write_short16", "test_pipe_read_short16" };
+static const char* ushort_kernel_name[] = { "test_pipe_write_ushort", "test_pipe_read_ushort", "test_pipe_write_ushort2", "test_pipe_read_ushort2", "test_pipe_write_ushort4", "test_pipe_read_ushort4", "test_pipe_write_ushort8", "test_pipe_read_ushort8", "test_pipe_write_ushort16", "test_pipe_read_ushort16" };
+static const char* float_kernel_name[] = { "test_pipe_write_float", "test_pipe_read_float", "test_pipe_write_float2", "test_pipe_read_float2", "test_pipe_write_float4", "test_pipe_read_float4", "test_pipe_write_float8", "test_pipe_read_float8", "test_pipe_write_float16", "test_pipe_read_float16" };
+static const char* half_kernel_name[] = { "test_pipe_write_half", "test_pipe_read_half", "test_pipe_write_half2", "test_pipe_read_half2", "test_pipe_write_half4", "test_pipe_read_half4", "test_pipe_write_half8", "test_pipe_read_half8", "test_pipe_write_half16", "test_pipe_read_half16" };
+static const char* double_kernel_name[] = { "test_pipe_write_double", "test_pipe_read_double", "test_pipe_write_double2", "test_pipe_read_double2", "test_pipe_write_double4", "test_pipe_read_double4", "test_pipe_write_double8", "test_pipe_read_double8", "test_pipe_write_double16", "test_pipe_read_double16" };
+
+static const char* workgroup_int_kernel_name[] = { "test_pipe_workgroup_write_int", "test_pipe_workgroup_read_int", "test_pipe_workgroup_write_int2", "test_pipe_workgroup_read_int2", "test_pipe_workgroup_write_int4", "test_pipe_workgroup_read_int4", "test_pipe_workgroup_write_int8", "test_pipe_workgroup_read_int8", "test_pipe_workgroup_write_int16", "test_pipe_workgroup_read_int16" };
+static const char* workgroup_uint_kernel_name[] = { "test_pipe_workgroup_write_uint", "test_pipe_workgroup_read_uint", "test_pipe_workgroup_write_uint2", "test_pipe_workgroup_read_uint2", "test_pipe_workgroup_write_uint4", "test_pipe_workgroup_read_uint4", "test_pipe_workgroup_write_uint8", "test_pipe_workgroup_read_uint8", "test_pipe_workgroup_write_uint16", "test_pipe_workgroup_read_uint16" };
+static const char* workgroup_long_kernel_name[] = { "test_pipe_workgroup_write_long", "test_pipe_workgroup_read_long", "test_pipe_workgroup_write_long2", "test_pipe_workgroup_read_long2", "test_pipe_workgroup_write_long4", "test_pipe_workgroup_read_long4", "test_pipe_workgroup_write_long8", "test_pipe_workgroup_read_long8", "test_pipe_workgroup_write_long16", "test_pipe_workgroup_read_long16" };
+static const char* workgroup_ulong_kernel_name[] = { "test_pipe_workgroup_write_ulong", "test_pipe_workgroup_read_ulong", "test_pipe_workgroup_write_ulong2", "test_pipe_workgroup_read_ulong2", "test_pipe_workgroup_write_ulong4", "test_pipe_workgroup_read_ulong4", "test_pipe_workgroup_write_ulong8", "test_pipe_workgroup_read_ulong8", "test_pipe_workgroup_write_ulong16", "test_pipe_workgroup_read_ulong16" };
+static const char* workgroup_char_kernel_name[] = { "test_pipe_workgroup_write_char", "test_pipe_workgroup_read_char", "test_pipe_workgroup_write_char2", "test_pipe_workgroup_read_char2", "test_pipe_workgroup_write_char4", "test_pipe_workgroup_read_char4", "test_pipe_workgroup_write_char8", "test_pipe_workgroup_read_char8", "test_pipe_workgroup_write_char16", "test_pipe_workgroup_read_char16" };
+static const char* workgroup_uchar_kernel_name[] = { "test_pipe_workgroup_write_uchar", "test_pipe_workgroup_read_uchar", "test_pipe_workgroup_write_uchar2", "test_pipe_workgroup_read_uchar2", "test_pipe_workgroup_write_uchar4", "test_pipe_workgroup_read_uchar4", "test_pipe_workgroup_write_uchar8", "test_pipe_workgroup_read_uchar8", "test_pipe_workgroup_write_uchar16", "test_pipe_workgroup_read_uchar16" };
+static const char* workgroup_short_kernel_name[] = { "test_pipe_workgroup_write_short", "test_pipe_workgroup_read_short", "test_pipe_workgroup_write_short2", "test_pipe_workgroup_read_short2", "test_pipe_workgroup_write_short4", "test_pipe_workgroup_read_short4", "test_pipe_workgroup_write_short8", "test_pipe_workgroup_read_short8", "test_pipe_workgroup_write_short16", "test_pipe_workgroup_read_short16" };
+static const char* workgroup_ushort_kernel_name[] = { "test_pipe_workgroup_write_ushort", "test_pipe_workgroup_read_ushort", "test_pipe_workgroup_write_ushort2", "test_pipe_workgroup_read_ushort2", "test_pipe_workgroup_write_ushort4", "test_pipe_workgroup_read_ushort4", "test_pipe_workgroup_write_ushort8", "test_pipe_workgroup_read_ushort8", "test_pipe_workgroup_write_ushort16", "test_pipe_workgroup_read_ushort16" };
+static const char* workgroup_float_kernel_name[] = { "test_pipe_workgroup_write_float", "test_pipe_workgroup_read_float", "test_pipe_workgroup_write_float2", "test_pipe_workgroup_read_float2", "test_pipe_workgroup_write_float4", "test_pipe_workgroup_read_float4", "test_pipe_workgroup_write_float8", "test_pipe_workgroup_read_float8", "test_pipe_workgroup_write_float16", "test_pipe_workgroup_read_float16" };
+static const char* workgroup_half_kernel_name[] = { "test_pipe_workgroup_write_half", "test_pipe_workgroup_read_half", "test_pipe_workgroup_write_half2", "test_pipe_workgroup_read_half2", "test_pipe_workgroup_write_half4", "test_pipe_workgroup_read_half4", "test_pipe_workgroup_write_half8", "test_pipe_workgroup_read_half8", "test_pipe_workgroup_write_half16", "test_pipe_workgroup_read_half16" };
+static const char* workgroup_double_kernel_name[] = { "test_pipe_workgroup_write_double", "test_pipe_workgroup_read_double", "test_pipe_workgroup_write_double2", "test_pipe_workgroup_read_double2", "test_pipe_workgroup_write_double4", "test_pipe_workgroup_read_double4", "test_pipe_workgroup_write_double8", "test_pipe_workgroup_read_double8", "test_pipe_workgroup_write_double16", "test_pipe_workgroup_read_double16" };
+
+static const char* subgroup_int_kernel_name[] = { "test_pipe_subgroup_write_int", "test_pipe_subgroup_read_int", "test_pipe_subgroup_write_int2", "test_pipe_subgroup_read_int2", "test_pipe_subgroup_write_int4", "test_pipe_subgroup_read_int4", "test_pipe_subgroup_write_int8", "test_pipe_subgroup_read_int8", "test_pipe_subgroup_write_int16", "test_pipe_subgroup_read_int16" };
+static const char* subgroup_uint_kernel_name[] = { "test_pipe_subgroup_write_uint", "test_pipe_subgroup_read_uint", "test_pipe_subgroup_write_uint2", "test_pipe_subgroup_read_uint2", "test_pipe_subgroup_write_uint4", "test_pipe_subgroup_read_uint4", "test_pipe_subgroup_write_uint8", "test_pipe_subgroup_read_uint8", "test_pipe_subgroup_write_uint16", "test_pipe_subgroup_read_uint16" };
+static const char* subgroup_long_kernel_name[] = { "test_pipe_subgroup_write_long", "test_pipe_subgroup_read_long", "test_pipe_subgroup_write_long2", "test_pipe_subgroup_read_long2", "test_pipe_subgroup_write_long4", "test_pipe_subgroup_read_long4", "test_pipe_subgroup_write_long8", "test_pipe_subgroup_read_long8", "test_pipe_subgroup_write_long16", "test_pipe_subgroup_read_long16" };
+static const char* subgroup_ulong_kernel_name[] = { "test_pipe_subgroup_write_ulong", "test_pipe_subgroup_read_ulong", "test_pipe_subgroup_write_ulong2", "test_pipe_subgroup_read_ulong2", "test_pipe_subgroup_write_ulong4", "test_pipe_subgroup_read_ulong4", "test_pipe_subgroup_write_ulong8", "test_pipe_subgroup_read_ulong8", "test_pipe_subgroup_write_ulong16", "test_pipe_subgroup_read_ulong16" };
+static const char* subgroup_char_kernel_name[] = { "test_pipe_subgroup_write_char", "test_pipe_subgroup_read_char", "test_pipe_subgroup_write_char2", "test_pipe_subgroup_read_char2", "test_pipe_subgroup_write_char4", "test_pipe_subgroup_read_char4", "test_pipe_subgroup_write_char8", "test_pipe_subgroup_read_char8", "test_pipe_subgroup_write_char16", "test_pipe_subgroup_read_char16" };
+static const char* subgroup_uchar_kernel_name[] = { "test_pipe_subgroup_write_uchar", "test_pipe_subgroup_read_uchar", "test_pipe_subgroup_write_uchar2", "test_pipe_subgroup_read_uchar2", "test_pipe_subgroup_write_uchar4", "test_pipe_subgroup_read_uchar4", "test_pipe_subgroup_write_uchar8", "test_pipe_subgroup_read_uchar8", "test_pipe_subgroup_write_uchar16", "test_pipe_subgroup_read_uchar16" };
+static const char* subgroup_short_kernel_name[] = { "test_pipe_subgroup_write_short", "test_pipe_subgroup_read_short", "test_pipe_subgroup_write_short2", "test_pipe_subgroup_read_short2", "test_pipe_subgroup_write_short4", "test_pipe_subgroup_read_short4", "test_pipe_subgroup_write_short8", "test_pipe_subgroup_read_short8", "test_pipe_subgroup_write_short16", "test_pipe_subgroup_read_short16" };
+static const char* subgroup_ushort_kernel_name[] = { "test_pipe_subgroup_write_ushort", "test_pipe_subgroup_read_ushort", "test_pipe_subgroup_write_ushort2", "test_pipe_subgroup_read_ushort2", "test_pipe_subgroup_write_ushort4", "test_pipe_subgroup_read_ushort4", "test_pipe_subgroup_write_ushort8", "test_pipe_subgroup_read_ushort8", "test_pipe_subgroup_write_ushort16", "test_pipe_subgroup_read_ushort16" };
+static const char* subgroup_float_kernel_name[] = { "test_pipe_subgroup_write_float", "test_pipe_subgroup_read_float", "test_pipe_subgroup_write_float2", "test_pipe_subgroup_read_float2", "test_pipe_subgroup_write_float4", "test_pipe_subgroup_read_float4", "test_pipe_subgroup_write_float8", "test_pipe_subgroup_read_float8", "test_pipe_subgroup_write_float16", "test_pipe_subgroup_read_float16" };
+static const char* subgroup_half_kernel_name[] = { "test_pipe_subgroup_write_half", "test_pipe_subgroup_read_half", "test_pipe_subgroup_write_half2", "test_pipe_subgroup_read_half2", "test_pipe_subgroup_write_half4", "test_pipe_subgroup_read_half4", "test_pipe_subgroup_write_half8", "test_pipe_subgroup_read_half8", "test_pipe_subgroup_write_half16", "test_pipe_subgroup_read_half16" };
+static const char* subgroup_double_kernel_name[] = { "test_pipe_subgroup_write_double", "test_pipe_subgroup_read_double", "test_pipe_subgroup_write_double2", "test_pipe_subgroup_read_double2", "test_pipe_subgroup_write_double4", "test_pipe_subgroup_read_double4", "test_pipe_subgroup_write_double8", "test_pipe_subgroup_read_double8", "test_pipe_subgroup_write_double16", "test_pipe_subgroup_read_double16" };
+
+
+static const char* convenience_int_kernel_name[] = { "test_pipe_convenience_write_int", "test_pipe_convenience_read_int", "test_pipe_convenience_write_int2", "test_pipe_convenience_read_int2", "test_pipe_convenience_write_int4", "test_pipe_convenience_read_int4", "test_pipe_convenience_write_int8", "test_pipe_convenience_read_int8", "test_pipe_convenience_write_int16", "test_pipe_convenience_read_int16" };
+static const char* convenience_uint_kernel_name[] = { "test_pipe_convenience_write_uint", "test_pipe_convenience_read_uint", "test_pipe_convenience_write_uint2", "test_pipe_convenience_read_uint2", "test_pipe_convenience_write_uint4", "test_pipe_convenience_read_uint4", "test_pipe_convenience_write_uint8", "test_pipe_convenience_read_uint8", "test_pipe_convenience_write_uint16", "test_pipe_convenience_read_uint16" };
+static const char* convenience_long_kernel_name[] = { "test_pipe_convenience_write_long", "test_pipe_convenience_read_long", "test_pipe_convenience_write_long2", "test_pipe_convenience_read_long2", "test_pipe_convenience_write_long4", "test_pipe_convenience_read_long4", "test_pipe_convenience_write_long8", "test_pipe_convenience_read_long8", "test_pipe_convenience_write_long16", "test_pipe_convenience_read_long16" };
+static const char* convenience_ulong_kernel_name[] = { "test_pipe_convenience_write_ulong", "test_pipe_convenience_read_ulong", "test_pipe_convenience_write_ulong2", "test_pipe_convenience_read_ulong2", "test_pipe_convenience_write_ulong4", "test_pipe_convenience_read_ulong4", "test_pipe_convenience_write_ulong8", "test_pipe_convenience_read_ulong8", "test_pipe_convenience_write_ulong16", "test_pipe_convenience_read_ulong16" };
+static const char* convenience_char_kernel_name[] = { "test_pipe_convenience_write_char", "test_pipe_convenience_read_char", "test_pipe_convenience_write_char2", "test_pipe_convenience_read_char2", "test_pipe_convenience_write_char4", "test_pipe_convenience_read_char4", "test_pipe_convenience_write_char8", "test_pipe_convenience_read_char8", "test_pipe_convenience_write_char16", "test_pipe_convenience_read_char16" };
+static const char* convenience_uchar_kernel_name[] = { "test_pipe_convenience_write_uchar", "test_pipe_convenience_read_uchar", "test_pipe_convenience_write_uchar2", "test_pipe_convenience_read_uchar2", "test_pipe_convenience_write_uchar4", "test_pipe_convenience_read_uchar4", "test_pipe_convenience_write_uchar8", "test_pipe_convenience_read_uchar8", "test_pipe_convenience_write_uchar16", "test_pipe_convenience_read_uchar16" };
+static const char* convenience_short_kernel_name[] = { "test_pipe_convenience_write_short", "test_pipe_convenience_read_short", "test_pipe_convenience_write_short2", "test_pipe_convenience_read_short2", "test_pipe_convenience_write_short4", "test_pipe_convenience_read_short4", "test_pipe_convenience_write_short8", "test_pipe_convenience_read_short8", "test_pipe_convenience_write_short16", "test_pipe_convenience_read_short16" };
+static const char* convenience_ushort_kernel_name[] = { "test_pipe_convenience_write_ushort", "test_pipe_convenience_read_ushort", "test_pipe_convenience_write_ushort2", "test_pipe_convenience_read_ushort2", "test_pipe_convenience_write_ushort4", "test_pipe_convenience_read_ushort4", "test_pipe_convenience_write_ushort8", "test_pipe_convenience_read_ushort8", "test_pipe_convenience_write_ushort16", "test_pipe_convenience_read_ushort16" };
+static const char* convenience_float_kernel_name[] = { "test_pipe_convenience_write_float", "test_pipe_convenience_read_float", "test_pipe_convenience_write_float2", "test_pipe_convenience_read_float2", "test_pipe_convenience_write_float4", "test_pipe_convenience_read_float4", "test_pipe_convenience_write_float8", "test_pipe_convenience_read_float8", "test_pipe_convenience_write_float16", "test_pipe_convenience_read_float16" };
+static const char* convenience_half_kernel_name[] = { "test_pipe_convenience_write_half", "test_pipe_convenience_read_half", "test_pipe_convenience_write_half2", "test_pipe_convenience_read_half2", "test_pipe_convenience_write_half4", "test_pipe_convenience_read_half4", "test_pipe_convenience_write_half8", "test_pipe_convenience_read_half8", "test_pipe_convenience_write_half16", "test_pipe_convenience_read_half16" };
+static const char* convenience_double_kernel_name[] = { "test_pipe_convenience_write_double", "test_pipe_convenience_read_double", "test_pipe_convenience_write_double2", "test_pipe_convenience_read_double2", "test_pipe_convenience_write_double4", "test_pipe_convenience_read_double4", "test_pipe_convenience_write_double8", "test_pipe_convenience_read_double8", "test_pipe_convenience_write_double16", "test_pipe_convenience_read_double16" };
+
+static void insertPragmaForHalfType(char *source, char *type)
+{
+    source[0] = 0;
+    if(strncmp(type, "half",4) == 0)
+    {
+        strcat(source, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+    }
+}
+
+void createKernelSource(char *source, char *type)
+{
+    char str[512];
+    int     str_length;
+
+    insertPragmaForHalfType(source, type);
+
+    sprintf(str, "__kernel void test_pipe_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  reserve_id_t res_id; \n\n");
+    strcat(source, str);
+    sprintf(str, "  res_id = reserve_write_pipe(out_pipe, 1);\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    write_pipe(out_pipe, res_id, 0, &src[gid]);\n    commit_write_pipe(out_pipe, res_id);\n  }\n}\n\n");
+    strcat(source, str);
+    sprintf(str, "__kernel void test_pipe_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  reserve_id_t res_id; \n\n");
+    strcat(source, str);
+    sprintf(str, "  res_id = reserve_read_pipe(in_pipe, 1);\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    read_pipe(in_pipe, res_id, 0, &dst[gid]);\n    commit_read_pipe(in_pipe, res_id);\n  }\n}\n");
+    strcat(source, str);
+    str_length = strlen(source);
+    assert(str_length <= STRING_LENGTH);
+}
+
+void createKernelSourceWorkGroup(char *source, char *type)
+{
+    char str[512];
+    int     str_length;
+
+    insertPragmaForHalfType(source, type);
+
+    sprintf(str, "__kernel void test_pipe_workgroup_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  __local reserve_id_t res_id; \n\n");
+    strcat(source, str);
+    sprintf(str, "  res_id = work_group_reserve_write_pipe(out_pipe, get_local_size(0));\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    write_pipe(out_pipe, res_id, get_local_id(0), &src[gid]);\n    work_group_commit_write_pipe(out_pipe, res_id);\n  }\n}\n\n");
+    strcat(source, str);
+    sprintf(str, "__kernel void test_pipe_workgroup_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  __local reserve_id_t res_id; \n\n");
+    strcat(source, str);
+    sprintf(str, "  res_id = work_group_reserve_read_pipe(in_pipe, get_local_size(0));\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    read_pipe(in_pipe, res_id, get_local_id(0), &dst[gid]);\n    work_group_commit_read_pipe(in_pipe, res_id);\n  }\n}\n");
+    strcat(source, str);
+    str_length = strlen(source);
+    assert(str_length <= STRING_LENGTH);
+}
+
+void createKernelSourceSubGroup(char *source, char *type)
+{
+    char str[512];
+    int  str_length;
+
+    insertPragmaForHalfType(source, type);
+
+    sprintf(str, "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n__kernel void test_pipe_subgroup_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  reserve_id_t res_id; \n\n");
+    strcat(source, str);
+    sprintf(str, "  res_id = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    write_pipe(out_pipe, res_id, get_sub_group_local_id(), &src[gid]);\n    sub_group_commit_write_pipe(out_pipe, res_id);\n  }\n}\n\n");
+    strcat(source, str);
+    sprintf(str, "__kernel void test_pipe_subgroup_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  reserve_id_t res_id; \n\n");
+    strcat(source, str);
+    sprintf(str, "  res_id = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());\n  if(is_valid_reserve_id(res_id))\n  {\n");
+    strcat(source, str);
+    sprintf(str, "    read_pipe(in_pipe, res_id, get_sub_group_local_id(), &dst[gid]);\n    sub_group_commit_read_pipe(in_pipe, res_id);\n  }\n}\n");
+    strcat(source, str);
+    str_length = strlen(source);
+    assert(str_length <= STRING_LENGTH);
+}
+
+void createKernelSourceConvenience(char *source, char *type)
+{
+    char str[512];
+    int     str_length;
+
+    insertPragmaForHalfType(source, type);
+
+    sprintf(str, "__kernel void test_pipe_convenience_write_%s(__global %s *src, __write_only pipe %s out_pipe)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  write_pipe(out_pipe, &src[gid]);\n}\n\n");
+    strcat(source, str);
+    sprintf(str, "__kernel void test_pipe_convenience_read_%s(__read_only pipe %s in_pipe, __global %s *dst)\n", type, type, type);
+    strcat(source, str);
+    sprintf(str, "{\n  int gid = get_global_id(0);\n  read_pipe(in_pipe, &dst[gid]);\n}\n");
+    strcat(source, str);
+    str_length = strlen(source);
+    assert(str_length <= STRING_LENGTH);
+}
+
+// verify functions
+static int verify_readwrite_int(void *ptr1, void *ptr2, int n)
+{
+    int     i;
+    int        sum_input = 0, sum_output = 0;
+    cl_int  *inptr = (cl_int *)ptr1;
+    cl_int  *outptr = (cl_int *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+
+    return 0;
+}
+
+static int verify_readwrite_uint(void *ptr1, void *ptr2, int n)
+{
+    int     i;
+    int        sum_input = 0, sum_output = 0;
+    cl_uint *inptr = (cl_uint *)ptr1;
+    cl_uint *outptr = (cl_uint *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+
+    return 0;
+}
+
+static int verify_readwrite_short(void *ptr1, void *ptr2, int n)
+{
+    int            i;
+    int            sum_input = 0, sum_output = 0;
+    cl_short    *inptr = (cl_short *)ptr1;
+    cl_short    *outptr = (cl_short *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_ushort(void *ptr1, void *ptr2, int n)
+{
+    int            i;
+    int            sum_input = 0, sum_output = 0;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_char(void *ptr1, void *ptr2, int n)
+{
+    int     i;
+    int        sum_input = 0, sum_output = 0;
+    cl_char    *inptr = (cl_char *)ptr1;
+    cl_char    *outptr = (cl_char *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_uchar(void *ptr1, void *ptr2, int n)
+{
+    int            i;
+    int            sum_input = 0, sum_output = 0;
+    cl_uchar    *inptr = (cl_uchar *)ptr1;
+    cl_uchar    *outptr = (cl_uchar *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_float(void *ptr1, void *ptr2, int n)
+{
+    int     i;
+    int        sum_input = 0, sum_output = 0;
+    int        *inptr = (int *)ptr1;
+    int        *outptr = (int *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_half(void *ptr1, void *ptr2, int n)
+{
+    int            i;
+    int            sum_input = 0, sum_output = 0;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_long(void *ptr1, void *ptr2, int n)
+{
+    int            i;
+    cl_long        sum_input = 0, sum_output = 0;
+    cl_long        *inptr = (cl_long *)ptr1;
+    cl_long        *outptr = (cl_long *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_ulong(void *ptr1, void *ptr2, int n)
+{
+    int            i;
+    cl_ulong    sum_input = 0, sum_output = 0;
+    cl_ulong    *inptr = (cl_ulong *)ptr1;
+    cl_ulong    *outptr = (cl_ulong *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_double(void *ptr1, void *ptr2, int n)
+{
+    int                i;
+    long long int    sum_input = 0, sum_output = 0;
+    long long int    *inptr = (long long int *)ptr1;
+    long long int    *outptr = (long long int *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+        sum_output += outptr[i];
+    }
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+static int verify_readwrite_struct(void *ptr1, void *ptr2, int n)
+{
+    int            i;
+    int            sum_input_char = 0, sum_output_char = 0;
+    int            sum_input_int = 0, sum_output_int = 0;
+    TestStruct    *inptr = (TestStruct *)ptr1;
+    TestStruct    *outptr = (TestStruct *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input_char += inptr[i].a;
+        sum_input_int += inptr[i].b;
+        sum_output_char += outptr[i].a;
+        sum_output_int += outptr[i].b;
+    }
+    if( (sum_input_char != sum_output_char) && (sum_input_int != sum_output_int) ){
+        return -1;
+    }
+
+    return 0;
+}
+
+int test_pipe_readwrite( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
+                         void *inptr[5], const char *kernelName[], int (*fn)(void *, void *, int) )
+{
+    cl_mem        pipes[5];
+    cl_mem      buffers[10];
+    void        *outptr[5];
+    cl_program  program[5];
+    cl_kernel   kernel[10];
+    size_t      global_work_size[3];
+    size_t        local_work_size[3];
+    cl_int      err;
+    int         i, ii;
+    size_t      ptrSizes[5];
+    int         total_errors = 0;
+    cl_event    producer_sync_event[5];
+    cl_event    consumer_sync_event[5];
+    char        *sourceCode[5];
+    char        vector_type[10];
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < loops; i++)
+    {
+        ii = i << 1;
+        sourceCode[i] = (char*) malloc(STRING_LENGTH * sizeof(char));
+        buffers[ii] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, ptrSizes[i] * num_elements, inptr[i], &err);
+        if(err){
+            clReleaseMemObject(buffers[ii]);
+            align_free( outptr[i] );
+            print_error(err, " clCreateBuffer failed\n");
+            return -1;
+        }
+        outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
+        buffers[ii+1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,  ptrSizes[i] * num_elements, outptr[i], &err);
+
+        if ( err ){
+            clReleaseMemObject(buffers[ii]);
+            align_free( outptr[i] );
+            print_error(err, " clCreateBuffer failed\n" );
+            return -1;
+        }
+        // Creating pipe with non-power of 2 size
+        pipes[i] = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, ptrSizes[i], num_elements+3, NULL, &err);
+        if(err){
+            clReleaseMemObject(pipes[i]);
+            print_error(err, " clCreatePipe failed\n");
+            return -1;
+        }
+
+        switch(i)
+        {
+        case 0:
+            sprintf(vector_type, "%s", type);
+            break;
+        case 1:
+            sprintf(vector_type, "%s%d", type, 2);
+            break;
+        case 2:
+            sprintf(vector_type, "%s%d", type, 4);
+            break;
+        case 3:
+            sprintf(vector_type, "%s%d", type, 8);
+            break;
+        case 4:
+            sprintf(vector_type, "%s%d", type, 16);
+            break;
+        }
+
+        if(useWorkgroupReserve == 1){
+            createKernelSourceWorkGroup(sourceCode[i], vector_type);
+        }
+        else if(useSubgroupReserve == 1){
+            createKernelSourceSubGroup(sourceCode[i], vector_type);
+        }
+        else if(useConvenienceBuiltIn == 1){
+            createKernelSourceConvenience(sourceCode[i], vector_type);
+        }
+        else{
+            createKernelSource(sourceCode[i], vector_type);
+        }
+
+        // Create producer kernel
+        err = create_single_kernel_helper_with_build_options(context, &program[i], &kernel[ii], 1, (const char**)&sourceCode[i], kernelName[ii], "-cl-std=CL2.0");
+        if(err){
+            clReleaseMemObject(buffers[ii]);
+            clReleaseMemObject(buffers[ii+1]);
+            clReleaseMemObject(pipes[i]);
+            align_free( outptr[i] );
+            print_error(err, "Error creating program\n");
+            return -1;
+        }
+        //Create consumer kernel
+        kernel[ii + 1] = clCreateKernel(program[i], kernelName[ii + 1], &err);
+        if( kernel[ii + 1] == NULL || err != CL_SUCCESS)
+        {
+            clReleaseMemObject(buffers[ii]);
+            clReleaseMemObject(buffers[ii+1]);
+            clReleaseMemObject(pipes[i]);
+            align_free( outptr[i] );
+            log_error("Creating program for %s\n", type);
+            print_error( err, "Unable to create kernel" );
+            return -1;
+        }
+
+        err = clSetKernelArg(kernel[ii], 0, sizeof(cl_mem), (void*)&buffers[ii]);
+        err |= clSetKernelArg(kernel[ii], 1, sizeof(cl_mem), (void*)&pipes[i]);
+        err |= clSetKernelArg(kernel[ii + 1], 0, sizeof(cl_mem), (void*)&pipes[i]);
+        err |= clSetKernelArg(kernel[ii + 1], 1, sizeof(cl_mem), (void*)&buffers[ii + 1]);
+        if ( err != CL_SUCCESS ){
+            clReleaseMemObject(buffers[ii]);
+            clReleaseMemObject(buffers[ii+1]);
+            clReleaseMemObject(pipes[i]);
+            clReleaseKernel(kernel[ii]);
+            clReleaseKernel(kernel[ii+1]);
+            clReleaseProgram(program[i]);
+            align_free(outptr[i]);
+            print_error(err, " clSetKernelArg failed");
+            return -1;
+        }
+
+        if(useWorkgroupReserve == 1 || useSubgroupReserve == 1)
+        {
+            err = get_max_common_work_group_size( context, kernel[ii], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+            // Launch Producer kernel
+            err = clEnqueueNDRangeKernel( queue, kernel[ii], 1, NULL, global_work_size, local_work_size, 0, NULL, &producer_sync_event[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject(buffers[ii]);
+                clReleaseMemObject(buffers[ii+1]);
+                clReleaseMemObject(pipes[i]);
+                clReleaseKernel(kernel[ii]);
+                clReleaseKernel(kernel[ii+1]);
+                clReleaseEvent(producer_sync_event[i]);
+                clReleaseEvent(consumer_sync_event[i]);
+                clReleaseProgram(program[i]);
+                align_free(outptr[i]);
+                return -1;
+            }
+        }
+        else
+        {
+            // Launch Producer kernel
+            err = clEnqueueNDRangeKernel( queue, kernel[ii], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject(buffers[ii]);
+                clReleaseMemObject(buffers[ii+1]);
+                clReleaseMemObject(pipes[i]);
+                clReleaseKernel(kernel[ii]);
+                clReleaseKernel(kernel[ii+1]);
+                clReleaseEvent(producer_sync_event[i]);
+                clReleaseEvent(consumer_sync_event[i]);
+                clReleaseProgram(program[i]);
+                align_free(outptr[i]);
+                return -1;
+            }
+        }
+
+        if(useWorkgroupReserve == 1 || useSubgroupReserve == 1)
+        {
+            err = get_max_common_work_group_size( context, kernel[ii + 1], global_work_size[0], &local_work_size[0] );
+            test_error( err, "Unable to get work group size to use" );
+
+            // Launch Consumer kernel
+            err = clEnqueueNDRangeKernel( queue, kernel[ii + 1], 1, NULL, global_work_size, local_work_size, 1, &producer_sync_event[i], &consumer_sync_event[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject(buffers[ii]);
+                clReleaseMemObject(buffers[ii+1]);
+                clReleaseMemObject(pipes[i]);
+                clReleaseKernel(kernel[ii]);
+                clReleaseKernel(kernel[ii+1]);
+                clReleaseEvent(producer_sync_event[i]);
+                clReleaseEvent(consumer_sync_event[i]);
+                clReleaseProgram(program[i]);
+                align_free(outptr[i]);
+                return -1;
+            }
+        }
+        else
+        {
+            // Launch Consumer kernel
+            err = clEnqueueNDRangeKernel( queue, kernel[ii + 1], 1, NULL, global_work_size, NULL, 1,  &producer_sync_event[i], &consumer_sync_event[i] );
+            if ( err != CL_SUCCESS ){
+                print_error( err, " clEnqueueNDRangeKernel failed" );
+                clReleaseMemObject(buffers[ii]);
+                clReleaseMemObject(buffers[ii+1]);
+                clReleaseMemObject(pipes[i]);
+                clReleaseKernel(kernel[ii]);
+                clReleaseKernel(kernel[ii+1]);
+                clReleaseEvent(producer_sync_event[i]);
+                clReleaseEvent(consumer_sync_event[i]);
+                clReleaseProgram(program[i]);
+                align_free(outptr[i]);
+                return -1;
+            }
+        }
+
+        err = clEnqueueReadBuffer(queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 1, &consumer_sync_event[i], NULL);
+        if ( err != CL_SUCCESS ){
+            print_error( err, " clEnqueueReadBuffer failed" );
+            clReleaseMemObject(buffers[ii]);
+            clReleaseMemObject(buffers[ii+1]);
+            clReleaseMemObject(pipes[i]);
+            clReleaseKernel(kernel[ii]);
+            clReleaseKernel(kernel[ii+1]);
+            clReleaseEvent(producer_sync_event[i]);
+            clReleaseEvent(consumer_sync_event[i]);
+            clReleaseProgram(program[i]);
+            align_free(outptr[i]);
+            return -1;
+        }
+
+        if( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]))){
+            log_error("%s%d test failed\n", type, 1<<i);
+            total_errors++;
+        }
+        else {
+            log_info("%s%d test passed\n", type, 1<<i);
+        }
+        //cleanup
+        clReleaseMemObject(buffers[ii]);
+        clReleaseMemObject(buffers[ii+1]);
+        clReleaseMemObject(pipes[i]);
+        clReleaseKernel(kernel[ii]);
+        clReleaseKernel(kernel[ii+1]);
+        clReleaseEvent(producer_sync_event[i]);
+        clReleaseEvent(consumer_sync_event[i]);
+        clReleaseProgram(program[i]);
+        align_free(outptr[i]);
+
+    }
+
+    return total_errors;
+}
+
+int test_pipe_readwrite_struct_generic( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                                const char *kernelCode, const char *kernelName[])
+{
+    cl_mem        buffers[2];
+    cl_mem        pipe;
+    void        *outptr;
+    TestStruct    *inptr;
+    cl_program    program;
+    cl_kernel    kernel[2];
+    size_t        size = sizeof(TestStruct);
+    size_t      global_work_size[3];
+    cl_int      err;
+    int         total_errors = 0;
+    int            i;
+    MTdata      d = init_genrand( gRandomSeed );
+    cl_event    producer_sync_event = NULL;
+    cl_event    consumer_sync_event = NULL;
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (size_t)num_elements;
+
+    inptr = (TestStruct *)align_malloc(size * num_elements, min_alignment);
+
+    for ( i = 0; i < num_elements; i++ ){
+        inptr[i].a = (char)genrand_int32(d);
+        inptr[i].b = genrand_int32(d);
+    }
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size * num_elements, inptr, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        print_error(err, " clCreateBuffer failed\n");
+        return -1;
+    }
+    outptr = align_malloc( size * num_elements, min_alignment);
+    buffers[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,  size * num_elements, outptr, &err);
+    if (err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+    pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, size, num_elements, NULL, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr );
+        clReleaseMemObject(pipe);
+        print_error(err, " clCreatePipe failed\n");
+        return -1;
+    }
+    // Create producer kernel
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, &kernelCode, kernelName[0], "-cl-std=CL2.0");
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        log_error(" Error creating program for struct\n");
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+    //Create consumer kernel
+    kernel[1] = clCreateKernel(program, kernelName[1], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
+    if (err != CL_SUCCESS){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        print_error(err, " clSetKernelArg failed");
+        return -1;
+    }
+
+    // Launch Producer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
+    if (err != CL_SUCCESS){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    // Launch Consumer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
+    if (err != CL_SUCCESS){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size*num_elements, outptr, 1, &consumer_sync_event, NULL);
+    if (err != CL_SUCCESS){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    if( verify_readwrite_struct( inptr, outptr, num_elements)){
+        log_error("struct_readwrite test failed\n");
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+    else {
+        log_info("struct_readwrite test passed\n");
+    }
+    //cleanup
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(pipe);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseEvent(producer_sync_event);
+    clReleaseEvent(consumer_sync_event);
+    clReleaseProgram(program);
+    align_free(outptr);
+
+    return 0;
+}
+
+
+int test_pipe_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_int  *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_int;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (int)genrand_int32(d);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
+                                   workgroup_int_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
+                                   subgroup_int_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1) {
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
+                                   convenience_int_kernel_name, foo);
+    }
+    else {
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
+                                   int_kernel_name, foo);
+    }
+
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uint     *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_uint;
+
+    ptrSizes[0] = sizeof(cl_uint);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_uint)genrand_int32(d);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
+                                   workgroup_uint_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
+                                   subgroup_uint_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1) {
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
+                                   convenience_uint_kernel_name, foo);
+    }
+    else {
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
+                                   uint_kernel_name, foo);
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_short     *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_short;
+
+    ptrSizes[0] = sizeof(cl_short);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_short)genrand_int32(d);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
+                                   workgroup_short_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
+                                   subgroup_short_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
+                                   convenience_short_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
+                                   short_kernel_name, foo);
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ushort     *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_ushort;
+
+    ptrSizes[0] = sizeof(cl_ushort);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ushort)genrand_int32(d);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
+                                   workgroup_ushort_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
+                                   subgroup_ushort_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
+                                   convenience_ushort_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
+                                   ushort_kernel_name, foo);
+    }
+
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_char *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_char;
+
+    ptrSizes[0] = sizeof(cl_char);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (char)genrand_int32(d);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
+                                   workgroup_char_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
+                                   subgroup_char_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
+                                   convenience_char_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
+                                   char_kernel_name, foo);
+    }
+
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uchar    *inptr[5];
+    size_t        ptrSizes[5];
+    int            i, err;
+    cl_uint        j;
+    int            (*foo)(void *,void *,int);
+    MTdata        d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_uchar;
+
+    ptrSizes[0] = sizeof(cl_uchar);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (uchar)genrand_int32(d);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
+                                   workgroup_uchar_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
+                                   subgroup_uchar_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
+                                   convenience_uchar_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
+                                   uchar_kernel_name, foo);
+    }
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    float     *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_float;
+
+    ptrSizes[0] = sizeof(cl_float);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = get_random_float( -32, 32, d );
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
+                                   workgroup_float_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
+                                   subgroup_float_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
+                                   convenience_float_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
+                                   float_kernel_name, foo);
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    float   *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_half;
+
+    if(!is_extension_available(deviceID, "cl_khr_fp16"))
+    {
+        log_info("cl_khr_fp16 is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    ptrSizes[0] = sizeof(cl_float) / 2;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / (ptrSizes[0] * 2); j++ )
+            inptr[i][j] = get_random_float( -32, 32, d );
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_half ), (char*)"half", 5, (void**)inptr,
+                                    workgroup_half_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_half ), (char*)"half", 5, (void**)inptr,
+                                    subgroup_half_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_half ), (char*)"half", 5, (void**)inptr,
+                                    convenience_half_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_half ), (char*)"half", 5, (void**)inptr,
+                                    half_kernel_name, foo);
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+}
+
+int test_pipe_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_long *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_long;
+
+    ptrSizes[0] = sizeof(cl_long);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support long
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long", 5, (void**)inptr,
+                                   workgroup_long_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long", 5, (void**)inptr,
+                                   subgroup_long_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long", 5, (void**)inptr,
+                                   convenience_long_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long", 5, (void**)inptr,
+                                   long_kernel_name, foo);
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ulong *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_ulong;
+
+    ptrSizes[0] = sizeof(cl_ulong);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support long
+    if (! gHasLong )
+    {
+        log_info( "Device does not support 64-bit integers. Skipping test.\n" );
+        return CL_SUCCESS;
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong", 5, (void**)inptr,
+                                   workgroup_ulong_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong", 5, (void**)inptr,
+                                   subgroup_ulong_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong", 5, (void**)inptr,
+                                   convenience_ulong_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong", 5, (void**)inptr,
+                                   ulong_kernel_name, foo);
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_double *inptr[5];
+    size_t  ptrSizes[5];
+    int     i, err;
+    cl_uint j;
+    int     (*foo)(void *,void *,int);
+    MTdata  d = init_genrand( gRandomSeed );
+
+    size_t  min_alignment = get_min_alignment(context);
+
+    foo = verify_readwrite_long;
+
+    ptrSizes[0] = sizeof(cl_double);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    //skip devices that don't support double
+    if(!is_extension_available(deviceID, "cl_khr_fp64"))
+    {
+        log_info("cl_khr_fp64 is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_double *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
+
+        for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = get_random_double( -32, 32, d );
+    }
+
+    if(useWorkgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_double ), (char*)"double", 5, (void**)inptr,
+                                   workgroup_double_kernel_name, foo);
+    }
+    else if(useSubgroupReserve == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_double ), (char*)"double", 5, (void**)inptr,
+                                   subgroup_double_kernel_name, foo);
+    }
+    else if(useConvenienceBuiltIn == 1){
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_double ), (char*)"double", 5, (void**)inptr,
+                                   convenience_double_kernel_name, foo);
+    }
+    else{
+        err = test_pipe_readwrite( deviceID, context, queue, num_elements, sizeof( cl_double ), (char*)"double", 5, (void**)inptr,
+                                   double_kernel_name, foo);
+    }
+
+    for ( i = 0; i < 5; i++ ){
+        align_free( (void *)inptr[i] );
+    }
+    free_mtdata(d);
+
+    return err;
+
+}
+
+int test_pipe_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    const char *kernelNames[] = {"test_pipe_write_struct","test_pipe_read_struct"};
+    return test_pipe_readwrite_struct_generic(deviceID, context, queue, num_elements, pipe_readwrite_struct_kernel_code, kernelNames);
+}
+
+// Work-group functions for pipe reserve/commits
+int test_pipe_workgroup_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_int(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_uint(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_short(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_char(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_float(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_half(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_long(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useWorkgroupReserve = 1;
+    useSubgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+    return test_pipe_readwrite_double(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_workgroup_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    const char *kernelNames[] = {"test_pipe_workgroup_write_struct","test_pipe_workgroup_read_struct"};
+    return test_pipe_readwrite_struct_generic(deviceID, context, queue, num_elements, pipe_workgroup_readwrite_struct_kernel_code, kernelNames);
+}
+
+// Sub-group functions for pipe reserve/commits
+int test_pipe_subgroup_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_int(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_uint(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_short(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_char(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements);
+
+}
+
+int test_pipe_subgroup_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_float(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_half(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_long(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useSubgroupReserve = 1;
+    useWorkgroupReserve = 0;
+    useConvenienceBuiltIn = 0;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    return test_pipe_readwrite_double(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_subgroup_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+    const char *kernelNames[] = {"test_pipe_subgroup_write_struct","test_pipe_subgroup_read_struct"};
+    return test_pipe_readwrite_struct_generic(deviceID, context, queue, num_elements, pipe_subgroup_readwrite_struct_kernel_code, kernelNames);
+}
+
+// Convenience functions for pipe reserve/commits
+int test_pipe_convenience_readwrite_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_int(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_uint(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_short(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_char(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements);
+}
+
+
+int test_pipe_convenience_readwrite_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_float(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_half(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_long(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_double( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    useConvenienceBuiltIn = 1;
+    useSubgroupReserve = 0;
+    useWorkgroupReserve = 0;
+
+    return test_pipe_readwrite_double(deviceID, context, queue, num_elements);
+}
+
+int test_pipe_convenience_readwrite_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+{
+    const char *kernelNames[] = {"test_pipe_convenience_write_struct","test_pipe_convenience_read_struct"};
+    return test_pipe_readwrite_struct_generic(deviceID, context, queue, num_elements, pipe_convenience_readwrite_struct_kernel_code, kernelNames);
+}
diff --git a/test_conformance/pipes/test_pipe_readwrite_errors.c b/test_conformance/pipes/test_pipe_readwrite_errors.c
new file mode 100644
index 00000000..f4051d35
--- /dev/null
+++ b/test_conformance/pipes/test_pipe_readwrite_errors.c
@@ -0,0 +1,312 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+const char* pipe_readwrite_errors_kernel_code = {
+    "__kernel void test_pipe_write_error(__global int *src, __write_only pipe int out_pipe, __global int *status)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    int reserve_idx;\n"
+    "    reserve_id_t res_id;\n"
+    "\n"
+    "    res_id = reserve_write_pipe(out_pipe, 1);\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        write_pipe(out_pipe, res_id, 0, &src[gid]);\n"
+    "        commit_write_pipe(out_pipe, res_id);\n"
+    "    }\n"
+    "    else\n"
+    "    {\n"
+    "        *status = -1;\n"
+    "    }\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_read_error(__read_only pipe int in_pipe, __global int *dst, __global int *status)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    int reserve_idx;\n"
+    "    reserve_id_t res_id;\n"
+    "\n"
+    "    res_id = reserve_read_pipe(in_pipe, 1);\n"
+    "    if(is_valid_reserve_id(res_id))\n"
+    "    {\n"
+    "        read_pipe(in_pipe, res_id, 0, &dst[gid]);\n"
+    "        commit_read_pipe(in_pipe, res_id);\n"
+    "    }\n"
+    "    else\n"
+    "    {\n"
+    "        *status = -1;\n"
+    "    }\n"
+    "}\n"
+};
+
+
+int test_pipe_readwrite_errors(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem        pipe;
+    cl_mem      buffers[3];
+    void        *outptr;
+    cl_int        *inptr;
+    cl_program  program;
+    cl_kernel   kernel[2];
+    size_t      global_work_size[3];
+    cl_int      err;
+    cl_int        size;
+    cl_int      i;
+    cl_int        status = 0;
+    cl_event    producer_sync_event;
+    cl_event    consumer_sync_event;
+    MTdata      d = init_genrand( gRandomSeed );
+    const char*    kernelName[] = {"test_pipe_write_error", "test_pipe_read_error"};
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = num_elements;
+
+    size = num_elements * sizeof(cl_int);
+
+    inptr = (cl_int *)align_malloc(size, min_alignment);
+
+    for(i = 0; i < (cl_int)(size / sizeof(int)); i++){
+        inptr[i] = (int)genrand_int32(d);
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        print_error(err, " clCreateBuffer failed\n");
+        return -1;
+    }
+    outptr = align_malloc(size, min_alignment);
+    buffers[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,  size, outptr, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+    buffers[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  sizeof(int), &status, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        align_free( outptr );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+    //Pipe created with max_packets less than global size
+    pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_elements - (num_elements/2), NULL, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        align_free( outptr );
+        clReleaseMemObject(pipe);
+        print_error(err, " clCreatePipe failed\n");
+        return -1;
+    }
+
+    // Create producer kernel
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_readwrite_errors_kernel_code, kernelName[0], "-cl-std=CL2.0");
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+    //Create consumer kernel
+    kernel[1] = clCreateKernel(program, kernelName[1], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), (void*)&buffers[2]);
+    err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
+    err |= clSetKernelArg(kernel[1], 2, sizeof(cl_mem), (void*)&buffers[2]);
+    if ( err != CL_SUCCESS ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        print_error(err, " clSetKernelArg failed");
+        return -1;
+    }
+
+    // Launch Consumer kernel for empty pipe
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 0, NULL, &consumer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(status), &status, 1, &consumer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    if(status == 0){
+        log_error("test_pipe_readwrite_errors failed\n");
+        return -1;
+    }
+    else{
+        status = 0;
+    }
+
+    // Launch Producer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, &producer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(status), &status, 1, &producer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    if(status == 0){
+        log_error("test_pipe_readwrite_errors failed\n");
+        return -1;
+    }
+    else{
+        status = 0;
+    }
+
+    // Launch Consumer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, NULL, 1, &producer_sync_event, &consumer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[2], true, 0, sizeof(status), &status, 1, &consumer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    if(status == 0)
+    {
+        log_error("test_pipe_readwrite_errors failed\n");
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    log_info("test_pipe_readwrite_errors passed\n");
+    //cleanup
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseMemObject(pipe);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseEvent(producer_sync_event);
+    clReleaseEvent(consumer_sync_event);
+    clReleaseProgram(program);
+    align_free(outptr);
+    return 0;
+}
diff --git a/test_conformance/pipes/test_pipe_subgroups.c b/test_conformance/pipes/test_pipe_subgroups.c
new file mode 100644
index 00000000..6f8615da
--- /dev/null
+++ b/test_conformance/pipes/test_pipe_subgroups.c
@@ -0,0 +1,339 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+#define TEST_INT_VALUE 100
+
+const char* pipe_subgroups_kernel_code = {
+    "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
+    "__kernel void test_pipe_subgroups_divergence_write(__global int *src, __write_only pipe int out_pipe, __global int *active_work_item_buffer)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id; \n"
+    "\n"
+    "    if(get_sub_group_id() % 2 == 0)\n"
+    "    {\n"
+    "        active_work_item_buffer[gid] = 1;\n"
+    "        res_id = sub_group_reserve_write_pipe(out_pipe, get_sub_group_size());\n"
+    "        if(is_valid_reserve_id(res_id))\n"
+    "        {\n"
+    "            write_pipe(out_pipe, res_id, get_sub_group_local_id(), &src[gid]);\n"
+    "            sub_group_commit_write_pipe(out_pipe, res_id);\n"
+    "        }\n"
+    "    }\n"
+    "}\n"
+    "\n"
+    "__kernel void test_pipe_subgroups_divergence_read(__read_only pipe int in_pipe, __global int *dst)\n"
+    "{\n"
+    "    int gid = get_global_id(0);\n"
+    "    reserve_id_t res_id; \n"
+    "\n"
+    "    if(get_sub_group_id() % 2 == 0)\n"
+    "    {\n"
+    "        res_id = sub_group_reserve_read_pipe(in_pipe, get_sub_group_size());\n"
+    "        if(is_valid_reserve_id(res_id))\n"
+    "        {\n"
+    "            read_pipe(in_pipe, res_id, get_sub_group_local_id(), &dst[gid]);\n"
+    "            sub_group_commit_read_pipe(in_pipe, res_id);\n"
+    "        }\n"
+    "    }\n"
+    "}\n"
+};
+
+static int verify_result(void *ptr1, void *ptr2, int n)
+{
+    int     i;
+    int        sum_input = 0, sum_output = 0;
+    cl_int    *inptr = (cl_int *)ptr1;
+    cl_int    *outptr = (cl_int *)ptr2;
+
+    for(i = 0; i < n; i++)
+    {
+        sum_input += inptr[i];
+    }
+    sum_input *= TEST_INT_VALUE;
+    for(i = 0; i < n; i++)
+    {
+        if(outptr[i] == TEST_INT_VALUE){
+            sum_output += outptr[i];
+        }
+    }
+
+    if(sum_input != sum_output){
+        return -1;
+    }
+    return 0;
+}
+
+int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem        pipe;
+    cl_mem      buffers[3];
+    cl_int        *outptr;
+    cl_int        *inptr;
+    cl_int        *active_work_item_buffer;
+    cl_program  program;
+    cl_kernel   kernel[2];
+    size_t      global_work_size[3];
+    size_t      local_work_size[3];
+    cl_int      err;
+    cl_int        size;
+    int         i;
+    size_t      subgroup_count;
+    cl_event    producer_sync_event = NULL;
+    cl_event    consumer_sync_event = NULL;
+    const char*    kernelName[] = {"test_pipe_subgroups_divergence_write", "test_pipe_subgroups_divergence_read"};
+
+    size_t      min_alignment = get_min_alignment(context);
+
+    global_work_size[0] = (cl_uint)num_elements;
+
+    if(!is_extension_available(deviceID, "cl_khr_subgroups"))
+    {
+        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        return CL_SUCCESS;
+    }
+
+    size = sizeof(int) * num_elements;
+    inptr = (cl_int *)align_malloc(size, min_alignment);
+    outptr = (cl_int *)align_malloc(size, min_alignment);
+    active_work_item_buffer = (cl_int *)align_malloc(size, min_alignment);
+
+    for(i = 0; i < num_elements; i++){
+        inptr[i] = TEST_INT_VALUE;
+        outptr[i] = 0;
+        active_work_item_buffer[i] = 0;
+    }
+
+    buffers[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, size, inptr, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        print_error(err, " clCreateBuffer failed\n");
+        return -1;
+    }
+
+    buffers[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  size, outptr, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        align_free( outptr );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    buffers[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  size, active_work_item_buffer, &err);
+    if ( err ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        align_free( outptr );
+        print_error(err, " clCreateBuffer failed\n" );
+        return -1;
+    }
+
+    pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(int), num_elements, NULL, &err);
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        align_free( outptr );
+        clReleaseMemObject(pipe);
+        print_error(err, " clCreatePipe failed\n");
+        return -1;
+    }
+
+    // Create producer kernel
+    err = create_single_kernel_helper_with_build_options(context, &program, &kernel[0], 1, (const char**)&pipe_subgroups_kernel_code, kernelName[0], "-cl-std=CL2.0");
+    if(err){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        print_error(err, "Error creating program\n");
+        return -1;
+    }
+    //Create consumer kernel
+    kernel[1] = clCreateKernel(program, kernelName[1], &err);
+    if( kernel[1] == NULL || err != CL_SUCCESS)
+    {
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        align_free(outptr);
+        print_error(err, "Error creating kernel\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(kernel[0], 0, sizeof(cl_mem), (void*)&buffers[0]);
+    err |= clSetKernelArg(kernel[0], 1, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[0], 2, sizeof(cl_mem), (void*)&buffers[2]);
+    err |= clSetKernelArg(kernel[1], 0, sizeof(cl_mem), (void*)&pipe);
+    err |= clSetKernelArg(kernel[1], 1, sizeof(cl_mem), (void*)&buffers[1]);
+    if ( err != CL_SUCCESS ){
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        print_error(err, " clSetKernelArg failed");
+        return -1;
+    }
+
+    err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
+    if( err != CL_SUCCESS)
+    {
+        test_error( err, "Unable to get work group size to use" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+        cl_platform_id platform;
+        err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+        clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR = (clGetKernelSubGroupInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetKernelSubGroupInfoKHR");
+
+    err = clGetKernelSubGroupInfoKHR(kernel[0], deviceID, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, sizeof(local_work_size[0]), &local_work_size[0], sizeof(subgroup_count), &subgroup_count, NULL);
+    if(subgroup_count <= 1)
+    {
+        log_info("Only 1 subgroup per workgroup for the kernel. Hence no divergence among subgroups possible. Skipping test.\n");
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return CL_SUCCESS;
+    }
+
+    // Launch Producer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, &producer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[2], true, 0, size, active_work_item_buffer, 1, &producer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+
+    // Launch Consumer kernel
+    err = clEnqueueNDRangeKernel( queue, kernel[1], 1, NULL, global_work_size, local_work_size, 1, &producer_sync_event, &consumer_sync_event );
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueNDRangeKernel failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer(queue, buffers[1], true, 0, size, outptr, 1, &consumer_sync_event, NULL);
+    if ( err != CL_SUCCESS ){
+        print_error( err, " clEnqueueReadBuffer failed" );
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+
+    if( verify_result( active_work_item_buffer, outptr, num_elements)){
+        log_error("test_pipe_subgroups_divergence failed\n");
+        clReleaseMemObject(buffers[0]);
+        clReleaseMemObject(buffers[1]);
+        clReleaseMemObject(buffers[2]);
+        clReleaseMemObject(pipe);
+        clReleaseKernel(kernel[0]);
+        clReleaseKernel(kernel[1]);
+        clReleaseEvent(producer_sync_event);
+        clReleaseEvent(consumer_sync_event);
+        clReleaseProgram(program);
+        align_free(outptr);
+        return -1;
+    }
+    else {
+        log_info("test_pipe_subgroups_divergence passed\n");
+    }
+    //cleanup
+    clReleaseMemObject(buffers[0]);
+    clReleaseMemObject(buffers[1]);
+    clReleaseMemObject(buffers[2]);
+    clReleaseMemObject(pipe);
+    clReleaseKernel(kernel[0]);
+    clReleaseKernel(kernel[1]);
+    clReleaseEvent(producer_sync_event);
+    clReleaseEvent(consumer_sync_event);
+    clReleaseProgram(program);
+    align_free(outptr);
+
+    return 0;
+}
diff --git a/test_conformance/printf/CMakeLists.txt b/test_conformance/printf/CMakeLists.txt
new file mode 100644
index 00000000..914ffb58
--- /dev/null
+++ b/test_conformance/printf/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(MODULE_NAME PRINTF)
+
+set(${MODULE_NAME}_SOURCES
+         test_printf.c
+         util_printf.c
+         ../../test_common/harness/errorHelpers.c
+         ../../test_common/harness/threadTesting.c
+         ../../test_common/harness/kernelHelpers.c
+         ../../test_common/harness/typeWrappers.cpp
+         ../../test_common/harness/conversions.c
+         ../../test_common/harness/mt19937.c
+         ../../test_common/harness/msvc9.c
+         #../../test_common/harness/imageHelpers.cpp
+         ../../test_common/harness/parseParameters.cpp
+         ../../test_common/harness/kernelHelpers.c
+         )
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/printf/Jamfile b/test_conformance/printf/Jamfile
new file mode 100644
index 00000000..b310d5c2
--- /dev/null
+++ b/test_conformance/printf/Jamfile
@@ -0,0 +1,5 @@
+project    : requirements      <target-os>windows,<toolset>gcc:<cflags>-xc++      <toolset>msvc:<cflags>"/TP"     ;
+exe test_printf    : test_printf.c	  util_printf.c      ../../test_common/harness/mt19937.c      ../../test_common/harness/kernelHelpers.c      ../../test_common/harness/errorHelpers.c    : <target-os>windows:<source>../../test_common/harness/msvc9.c    ;
+install dist    : test_printf    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86/printf      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86/printf    ;
+
+install dist    : test_printf    : <variant>debug:<location>$(DIST)/debug/tests/conformance/1.2/x86_64/printf      <variant>release:<location>$(DIST)/release/tests/conformance/1.2/x86_64/printf      <address-model>64    ;
\ No newline at end of file
diff --git a/test_conformance/printf/Makefile b/test_conformance/printf/Makefile
new file mode 100644
index 00000000..b5cf65d9
--- /dev/null
+++ b/test_conformance/printf/Makefile
@@ -0,0 +1,44 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = test_printf.c \
+	util_printf.c \
+	../../test_common/harness/mt19937.c \
+	../../test_common/harness/kernelHelpers.c \
+	../../test_common/harness/errorHelpers.c \
+	../../test_common/harness/msvc9.c
+
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_printf
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
+
+
+
diff --git a/test_conformance/printf/test_printf.c b/test_conformance/printf/test_printf.c
new file mode 100644
index 00000000..fcb6d69e
--- /dev/null
+++ b/test_conformance/printf/test_printf.c
@@ -0,0 +1,813 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <string.h>
+#include <errno.h>
+
+#if ! defined( _WIN32)
+#if ! defined( __ANDROID__ )
+#include <sys/sysctl.h>
+#endif
+#include <unistd.h>
+#define streamDup(fd1) dup(fd1)
+#define streamDup2(fd1,fd2) dup2(fd1,fd2)
+#endif
+#include <limits.h>
+#include "test_printf.h"
+
+#if defined(_WIN32)
+#include <io.h>
+#define streamDup(fd1) _dup(fd1)
+#define streamDup2(fd1,fd2) _dup2(fd1,fd2)
+#include "../../test_common/harness/testHarness.h"
+#endif
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/mt19937.h"
+#include "../../test_common/harness/parseParameters.h"
+
+typedef  unsigned int uint32_t;
+
+
+//-----------------------------------------
+// Static helper functions declaration
+//-----------------------------------------
+
+//Stream helper functions
+
+//Associate stdout stream with the file(/tmp/tmpfile):i.e redirect stdout stream to the specific files (/tmp/tmpfile)
+static int acquireOutputStream();
+
+//Close the file(/tmp/tmpfile) associated with the stdout stream and disassociates it.
+static void releaseOutputStream(int fd);
+
+//Get analysis buffer to verify the correctess of printed data
+static void getAnalysisBuffer(char* analysisBuffer);
+
+//Kernel builder helper functions
+
+//Check if the test case is for kernel that has argument
+static int isKernelArgument(testCase* pTestCase,size_t testId);
+
+//Check if the test case treats %p format for void*
+static int isKernelPFormat(testCase* pTestCase,size_t testId);
+
+//-----------------------------------------
+// Static functions declarations
+//-----------------------------------------
+// Make a program that uses printf for the given type/format,
+static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context context,const unsigned int testId,const unsigned int testNum,bool isLongSupport = true,bool is64bAddrSpace = false);
+
+// Creates and execute the printf test for the given device, context, type/format
+static int doTest(cl_command_queue queue, cl_context context, const unsigned int testId, const unsigned int testNum, cl_device_id device,bool isLongSupport = true);
+
+// Check if device supports long
+static bool isLongSupported(cl_device_id  device_id);
+
+// Check if device address space is 64 bits
+static bool is64bAddressSpace(cl_device_id  device_id);
+
+//Wait until event status is CL_COMPLETE
+int waitForEvent(cl_event* event);
+
+//-----------------------------------------
+// Definitions and initializations
+//-----------------------------------------
+
+// Tests are broken into the major test which is based on the
+// src and cmp type and their corresponding vector types and
+// sub tests which is for each individual test.  The following
+// tracks the subtests
+int s_test_cnt = 0;
+int s_test_fail = 0;
+
+
+//-----------------------------------------
+// Static helper functions definition
+//-----------------------------------------
+
+//-----------------------------------------
+// acquireOutputStream
+//-----------------------------------------
+static int acquireOutputStream()
+{
+    int fd = streamDup(fileno(stdout));
+#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ ))
+    freopen("/tmp/tmpfile","w",stdout);
+#else
+    freopen("tmpfile","w",stdout);
+#endif
+    return fd;
+}
+
+//-----------------------------------------
+// releaseOutputStream
+//-----------------------------------------
+static void releaseOutputStream(int fd)
+{
+    fflush(stdout);
+    streamDup2(fd,fileno(stdout));
+    close(fd);
+}
+
+//-----------------------------------------
+// getAnalysisBuffer
+//-----------------------------------------
+static void getAnalysisBuffer(char* analysisBuffer)
+{
+    FILE *fp;
+    memset(analysisBuffer,0,ANALYSIS_BUFFER_SIZE);
+
+#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ ))
+    fp = fopen("/tmp/tmpfile","r");
+#else
+    fp = fopen("tmpfile","r");
+#endif
+    if(NULL == fp)
+        log_error("Failed to open analysis buffer ('%s')\n", strerror(errno));
+    else
+    while(fgets(analysisBuffer,ANALYSIS_BUFFER_SIZE , fp) != NULL );
+    fclose(fp);
+}
+
+//-----------------------------------------
+// isKernelArgument
+//-----------------------------------------
+static int isKernelArgument(testCase* pTestCase,size_t testId)
+{
+    return strcmp(pTestCase->_genParameters[testId].addrSpaceArgumentTypeQualifier,"");
+}
+//-----------------------------------------
+// isKernelPFormat
+//-----------------------------------------
+static int isKernelPFormat(testCase* pTestCase,size_t testId)
+{
+    return strcmp(pTestCase->_genParameters[testId].addrSpacePAdd,"");
+}
+
+//-----------------------------------------
+// waitForEvent
+//-----------------------------------------
+int waitForEvent(cl_event* event)
+{
+    cl_int status = CL_SUCCESS;
+    cl_int eventStatus = CL_QUEUED;
+    while(eventStatus != CL_COMPLETE)
+    {
+        status = clGetEventInfo(
+            *event,
+            CL_EVENT_COMMAND_EXECUTION_STATUS,
+            sizeof(cl_int),
+            &eventStatus,
+            NULL);
+        if(status != CL_SUCCESS)
+        {
+            log_error("clGetEventInfo failed");
+            return status;
+        }
+    }
+
+    status = clReleaseEvent(*event);
+    if(status != CL_SUCCESS)
+    {
+        log_error("clReleaseEvent failed. (*event)");
+        return status;
+    }
+    return CL_SUCCESS;
+}
+
+//-----------------------------------------
+// Static helper functions definition
+//-----------------------------------------
+
+//-----------------------------------------
+// makePrintfProgram
+//-----------------------------------------
+static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context context,const unsigned int testId,const unsigned int testNum,bool isLongSupport,bool is64bAddrSpace)
+{
+    int err,i;
+    cl_program program;
+    cl_device_id devID;
+    char buildLog[ 1024 * 128 ];
+    char testname[256] = {0};
+    char addrSpaceArgument[256] = {0};
+    char addrSpacePAddArgument[256] = {0};
+
+    //Program Source code for int,float,octal,hexadecimal,char,string
+    const char *sourceGen[] = {
+        "__kernel void ", testname,
+        "(void)\n",
+        "{\n"
+        "   printf(\"",
+        allTestCase[testId]->_genParameters[testNum].genericFormat,
+        "\\n\",",
+        allTestCase[testId]->_genParameters[testNum].dataRepresentation,
+        ");",
+        "}\n"
+    };
+    //Program Source code for vector
+    const char *sourceVec[] = {
+        "__kernel void ", testname,
+        "(void)\n",
+        "{\n",
+        allTestCase[testId]->_genParameters[testNum].dataType,
+        allTestCase[testId]->_genParameters[testNum].vectorSize,
+        " tmp = (",
+        allTestCase[testId]->_genParameters[testNum].dataType,
+        allTestCase[testId]->_genParameters[testNum].vectorSize,
+        ")",
+        allTestCase[testId]->_genParameters[testNum].dataRepresentation,
+        ";",
+        "   printf(\"",
+        allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,
+        "v",
+        allTestCase[testId]->_genParameters[testNum].vectorSize,
+        allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,
+        "\\n\",",
+        "tmp);",
+        "}\n"
+    };
+    //Program Source code for address space
+    const char *sourceAddrSpace[] = {
+        "__kernel void ", testname,"(",addrSpaceArgument,
+        ")\n{\n",
+        allTestCase[testId]->_genParameters[testNum].addrSpaceVariableTypeQualifier,
+        "printf(",
+        allTestCase[testId]->_genParameters[testNum].genericFormat,
+        ",",
+        allTestCase[testId]->_genParameters[testNum].addrSpaceParameter,
+        "); ",
+        addrSpacePAddArgument,
+        "\n}\n"
+    };
+
+    //Update testname
+    sprintf(testname,"%s%d","test",testId);
+
+    //Update addrSpaceArgument and addrSpacePAddArgument types, based on FULL_PROFILE/EMBEDDED_PROFILE
+    if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
+    {
+        sprintf(addrSpaceArgument, "%s",allTestCase[testId]->_genParameters[testNum].addrSpaceArgumentTypeQualifier);
+
+        sprintf(addrSpacePAddArgument,allTestCase[testId]->_genParameters[testNum].addrSpacePAdd);
+    }
+
+    if (strlen(addrSpaceArgument) == 0)
+        sprintf(addrSpaceArgument,"void");
+
+    // create program based on its type
+
+    if(allTestCase[testId]->_type == TYPE_VECTOR)
+    {
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, NULL);
+    }
+    else if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
+    {
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceAddrSpace) / sizeof(sourceAddrSpace[0]), sourceAddrSpace, NULL);
+    }
+    else
+    {
+        err = create_single_kernel_helper(context, &program, NULL, sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, NULL);
+    }
+
+    if (!program || err) {
+        log_error("create_single_kernel_helper failed\n");
+        return NULL;
+    }
+
+    *kernel_ptr = clCreateKernel(program, testname, &err);
+    if ( err ) {
+        log_error("clCreateKernel failed (%d)\n", err);
+        return NULL;
+    }
+
+    return program;
+}
+
+//-----------------------------------------
+// isLongSupported
+//-----------------------------------------
+static bool isLongSupported(cl_device_id device_id)
+{
+    //profile type && device extention for long support checking
+    char *profileType = NULL,*devExt = NULL;
+
+    size_t tempSize = 0;
+    cl_int status;
+    bool extSupport = true;
+
+    // Device profile
+    status = clGetDeviceInfo(
+        device_id,
+        CL_DEVICE_PROFILE,
+        0,
+        NULL,
+        &tempSize);
+
+    if(status != CL_SUCCESS)
+    {
+        log_error("*** clGetDeviceInfo FAILED ***\n\n");
+        return false;
+    }
+
+    profileType = new char[tempSize];
+    if(profileType == NULL)
+    {
+        log_error("Failed to allocate memory(profileType)");
+        return false;
+    }
+
+    status = clGetDeviceInfo(
+        device_id,
+        CL_DEVICE_PROFILE,
+        sizeof(char) * tempSize,
+        profileType,
+        NULL);
+
+
+    if(!strcmp("EMBEDDED_PROFILE",profileType))
+    {
+        // Device extention
+        status = clGetDeviceInfo(
+            device_id,
+            CL_DEVICE_EXTENSIONS,
+            0,
+            NULL,
+            &tempSize);
+
+        if(status != CL_SUCCESS)
+        {
+            log_error("*** clGetDeviceInfo FAILED ***\n\n");
+            return false;
+        }
+
+        devExt = new char[tempSize];
+        if(devExt == NULL)
+        {
+            log_error("Failed to allocate memory(devExt)");
+            return false;
+        }
+
+        status = clGetDeviceInfo(
+            device_id,
+            CL_DEVICE_EXTENSIONS,
+            sizeof(char) * tempSize,
+            devExt,
+            NULL);
+
+        extSupport  = (strstr(devExt,"cles_khr_int64") != NULL);
+
+        delete devExt;
+        delete profileType;
+    }
+    return extSupport;
+}
+//-----------------------------------------
+// is64bAddressSpace
+//-----------------------------------------
+static bool is64bAddressSpace(cl_device_id  device_id)
+{
+    cl_int status;
+    cl_uint addrSpaceB;
+
+    // Device profile
+    status = clGetDeviceInfo(
+        device_id,
+        CL_DEVICE_ADDRESS_BITS,
+        sizeof(cl_uint),
+        &addrSpaceB,
+        NULL);
+    if(status != CL_SUCCESS)
+    {
+        log_error("*** clGetDeviceInfo FAILED ***\n\n");
+        return false;
+    }
+    if(addrSpaceB == 64)
+        return true;
+    else
+        return false;
+}
+//-----------------------------------------
+// doTest
+//-----------------------------------------
+static int doTest(cl_command_queue queue, cl_context context, const unsigned int testId, const unsigned int testNum, cl_device_id device,bool isLongSupport)
+{
+    int err;
+    cl_program program;
+    cl_kernel  kernel;
+    cl_mem d_out;
+    char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
+    cl_uint out32 = 0;
+    cl_ulong out64 = 0;
+
+   // Define an index space (global work size) of threads for execution.
+   size_t globalWorkSize[1];
+
+    program = makePrintfProgram(&kernel, context,testId,testNum,isLongSupport,is64bAddressSpace(device));
+    if (!program || !kernel) {
+        ++s_test_fail;
+        ++s_test_cnt;
+        return -1;
+    }
+
+    //For address space test if there is kernel argument - set it
+    if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE )
+    {
+        if(isKernelArgument(allTestCase[testId],testNum))
+        {
+            int a = 2;
+            cl_mem d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
+                sizeof(int), &a, &err);
+            if(err!= CL_SUCCESS || d_a == NULL) {
+                log_error("clCreateBuffer failed\n");
+                goto exit;
+            }
+            err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
+            if(err!= CL_SUCCESS) {
+                log_error("clSetKernelArg failed\n");
+                goto exit;
+            }
+        }
+        //For address space test if %p is tested
+        if(isKernelPFormat(allTestCase[testId],testNum))
+        {
+            d_out = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                sizeof(cl_ulong), NULL, &err);
+            if(err!= CL_SUCCESS || d_out == NULL) {
+                log_error("clCreateBuffer failed\n");
+                goto exit;
+            }
+            err  = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_out);
+            if(err!= CL_SUCCESS) {
+                log_error("clSetKernelArg failed\n");
+                goto exit;
+            }
+        }
+    }
+
+    globalWorkSize[0] = 1;
+    cl_event ndrEvt;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL,&ndrEvt);
+    if (err != CL_SUCCESS) {
+        log_error("\n clEnqueueNDRangeKernel failed errcode:%d\n", err);
+        ++s_test_fail;
+        goto exit;
+    }
+
+    fflush(stdout);
+    err = clFlush(queue);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clFlush failed\n");
+        goto exit;
+    }
+    //Wait until kernel finishes its execution and (thus) the output printed from the kernel
+    //is immidatly printed
+    err = waitForEvent(&ndrEvt);
+
+    if(err != CL_SUCCESS)
+    {
+        log_error("waitforEvent failed\n");
+        goto exit;
+    }
+    fflush(stdout);
+
+    if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE && isKernelPFormat(allTestCase[testId],testNum))
+    {
+        // Read the OpenCL output buffer (d_out) to the host output array (out)
+        if(!is64bAddressSpace(device))//32-bit address space
+        {
+            clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0, sizeof(cl_int),&out32,
+                0, NULL, NULL);
+        }
+        else //64-bit address space
+        {
+            clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0, sizeof(cl_ulong),&out64,
+                0, NULL, NULL);
+        }
+    }
+
+    //
+    //Get the output printed from the kernel to _analysisBuffer
+    //and verify its correctness
+    getAnalysisBuffer(_analysisBuffer);
+    if(!is64bAddressSpace(device)) //32-bit address space
+    {
+        if(0 != verifyOutputBuffer(_analysisBuffer,allTestCase[testId],testNum,(cl_ulong) out32))
+            err = ++s_test_fail;
+    }
+    else //64-bit address space
+    {
+        if(0 != verifyOutputBuffer(_analysisBuffer,allTestCase[testId],testNum,out64))
+            err = ++s_test_fail;
+    }
+exit:
+    if(clReleaseKernel(kernel) != CL_SUCCESS)
+        log_error("clReleaseKernel failed\n");
+    if(clReleaseProgram(program) != CL_SUCCESS)
+        log_error("clReleaseProgram failed\n");
+    ++s_test_cnt;
+    return err;
+}
+//-----------------------------------------
+// printUsage
+//-----------------------------------------
+static void printUsage( void )
+{
+    log_info("test_printf:  [-cghw] [start_test_num] \n");
+    log_info("  default is to run the full test on the default device\n");
+    log_info("  start_test_num will start running from that num\n");
+}
+
+//-----------------------------------------
+// printArch
+//-----------------------------------------
+static void printArch( void )
+{
+    log_info( "sizeof( void*) = %d\n", (int) sizeof( void *) );
+
+#if defined( __APPLE__ )
+
+#if defined( __ppc__ )
+    log_info( "ARCH:\tppc\n" );
+#elif defined( __ppc64__ )
+    log_info( "ARCH:\tppc64\n" );
+#elif defined( __i386__ )
+    log_info( "ARCH:\ti386\n" );
+#elif defined( __x86_64__ )
+    log_info( "ARCH:\tx86_64\n" );
+#elif defined( __arm__ )
+    log_info( "ARCH:\tarm\n" );
+#else
+#error unknown arch
+#endif
+
+    int type = 0;
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    log_info( "cpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    log_info( "cpu subtype:\t%d\n", type );
+
+#endif
+}
+
+//-----------------------------------------
+// notify_callback
+//-----------------------------------------
+void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
+{
+    log_info( "%s\n", errinfo );
+}
+
+
+//-----------------------------------------
+// main
+//-----------------------------------------
+int main(int argc, const char* argv[]) {
+    int i;
+    cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_platform_id platform_id;
+    long           test_filter_num = 0;   // test number to run or 0
+    const char*    exec_testname = NULL;
+    cl_device_id      device_id;
+    uint32_t      device_frequency = 0;
+    uint32_t       compute_devices = 0;
+
+
+
+    test_start();
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    // Check the environmental to see if there is device preference
+    char *device_env = getenv("CL_DEVICE_TYPE");
+    if (device_env != NULL) {
+        if( strcmp( device_env, "gpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( device_env, "cpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( device_env, "accelerator" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( device_env, "default" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            log_error( "Unknown CL_DEVICE_TYPE environment variable: %s.\nAborting...\n", device_env );
+            abort();
+        }
+    }
+
+    // Determine if we want to run a particular test or if we want to
+    // start running from a certain point and if we want to run on cpu/gpu
+    // usage: test_printf [test_name] [start test num] [run_long]
+    // default is to run all tests on the gpu and be short
+    // test names are of the form printf_testId
+
+    for (i=1; i < argc; ++i) {
+        const char *arg = argv[i];
+        if (arg == NULL)
+            break;
+
+        if (arg[0] == '-')
+        {
+            arg++;
+            while(*arg != '\0')
+            {
+                switch(*arg) {
+                    case 'h':
+                        printUsage();
+                        return 0;
+                    default:
+                        log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
+                        printUsage();
+                        return 0;
+                }
+                arg++;
+            }
+        }
+        else {
+            char* t = NULL;
+            long num = strtol(argv[i], &t, 0);
+            if (t != argv[i])
+                test_filter_num = num;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_CPU" ) )
+                device_type = CL_DEVICE_TYPE_CPU;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_GPU" ) )
+                device_type = CL_DEVICE_TYPE_GPU;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_ACCELERATOR" ) )
+                device_type = CL_DEVICE_TYPE_ACCELERATOR;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_DEFAULT" ) )
+                device_type = CL_DEVICE_TYPE_DEFAULT;
+            else {
+                // assume it is a test name that we want to execute
+                exec_testname = argv[i];
+            }
+        }
+    }
+
+
+    int err;
+    int fd = acquireOutputStream();
+
+    // Get platform
+    err = clGetPlatformIDs(1, &platform_id, NULL);
+    checkErr(err,"clGetPlatformIDs failed");
+
+
+    // Get Device information
+    err = clGetDeviceIDs(platform_id, device_type, 1, &device_id, 0);
+    checkErr(err,"clGetComputeDevices");
+
+
+    err =  clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, NULL);
+    checkErr(err,"clGetComputeConfigInfo 1");
+
+
+    size_t config_size = sizeof( device_frequency );
+#if MULTITHREAD
+    if( (err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, config_size, &compute_devices, NULL )) )
+#endif
+    compute_devices = 1;
+
+    config_size = sizeof(device_frequency);
+    if((err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, config_size, &device_frequency, NULL )))
+        device_frequency = 1;
+
+    releaseOutputStream(fd);
+
+    log_info( "\nCompute Device info:\n" );
+    log_info( "\tProcessing with %d devices\n", compute_devices );
+    log_info( "\tDevice Frequency: %d MHz\n", device_frequency );
+
+
+
+    printDeviceHeader( device_id );
+
+    printArch();
+
+    err = check_opencl_version(device_id,1,2);
+    if( err != CL_SUCCESS ) {
+      print_missing_feature(err,"printf");
+      test_finish();
+      return err;
+    }
+
+    log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
+
+    fd = acquireOutputStream();
+
+    cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL);
+    checkNull(context, "clCreateContext");
+
+    cl_command_queue queue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL);
+    checkNull(queue, "clCreateCommandQueue");
+
+    // Forall types
+    for (int testId = 0; testId < TYPE_COUNT; ++testId) {
+        if (test_filter_num && (testId != test_filter_num)) {
+            releaseOutputStream(fd);
+            log_info("\n*** Skipping printf  for %s ***\n",strType[testId]);
+            fd = acquireOutputStream();
+        }
+        else {
+            releaseOutputStream(fd);
+            log_info("\n*** Testing printf for %s ***\n",strType[testId]);
+            fd = acquireOutputStream();
+            //For all formats
+            for(unsigned int testNum = 0;testNum < allTestCase[testId]->_testNum;++testNum){
+                releaseOutputStream(fd);
+                if(allTestCase[testId]->_type == TYPE_VECTOR)
+                    log_info("%d)testing printf(\"%sv%s%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,allTestCase[testId]->_genParameters[testNum].vectorSize,
+                    allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,allTestCase[testId]->_genParameters[testNum].dataRepresentation);
+                else if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
+                {
+                    if(isKernelArgument(allTestCase[testId], testNum))
+                        log_info("%d)testing kernel //argument %s \n   printf(%s,%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].addrSpaceArgumentTypeQualifier,
+                        allTestCase[testId]->_genParameters[testNum].genericFormat,allTestCase[testId]->_genParameters[testNum].addrSpaceParameter);
+                    else
+                        log_info("%d)testing kernel //variable %s \n   printf(%s,%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].addrSpaceVariableTypeQualifier,
+                        allTestCase[testId]->_genParameters[testNum].genericFormat,allTestCase[testId]->_genParameters[testNum].addrSpaceParameter);
+                }
+                else
+                    log_info("%d)testing printf(\"%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].genericFormat,allTestCase[testId]->_genParameters[testNum].dataRepresentation);
+                fd = acquireOutputStream();
+
+                // Long support for varible type
+                if(allTestCase[testId]->_type == TYPE_VECTOR && !strcmp(allTestCase[testId]->_genParameters[testNum].dataType,"long") && !isLongSupported(device_id))
+                        continue;
+
+                // Long support for address in FULL_PROFILE/EMBEDDED_PROFILE
+                bool isLongSupport = true;
+                if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE && isKernelPFormat(allTestCase[testId],testNum) && !isLongSupported(device_id))
+                    isLongSupport = false;
+
+                // Perform the test
+                if (doTest(queue, context,testId,testNum,device_id,isLongSupport) != 0)
+                {
+                    releaseOutputStream(fd);
+                    log_error("*** FAILED ***\n\n");
+                    fd = acquireOutputStream();
+                }
+                else
+                {
+                    releaseOutputStream(fd);
+                    log_info("Passed\n");
+                    fd = acquireOutputStream();
+                }
+            }
+        }
+    }
+
+    int error = clFinish(queue);
+    if (error) {
+        log_error("clFinish failed: %d\n", error);
+    }
+
+    if(clReleaseCommandQueue(queue)!=CL_SUCCESS)
+        log_error("clReleaseCommandQueue\n");
+    if(clReleaseContext(context)!= CL_SUCCESS)
+        log_error("clReleaseContext\n");
+
+    releaseOutputStream(fd);
+
+
+    if (s_test_fail == 0) {
+        if (s_test_cnt > 1)
+            log_info("PASSED %d of %d tests.\n", s_test_cnt, s_test_cnt);
+        else
+            log_info("PASSED test.\n");
+    } else if (s_test_fail > 0) {
+        if (s_test_cnt > 1)
+        {
+            log_error("FAILED %d of %d tests.\n", s_test_fail, s_test_cnt);
+            log_info("PASSED %d of %d tests.\n", s_test_cnt - s_test_fail, s_test_cnt);
+        }
+        else
+            log_error(" FAILED test.\n");
+    }
+
+    test_finish();
+    return s_test_fail;
+}
diff --git a/test_conformance/printf/test_printf.h b/test_conformance/printf/test_printf.h
new file mode 100644
index 00000000..776ec3ef
--- /dev/null
+++ b/test_conformance/printf/test_printf.h
@@ -0,0 +1,133 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TESTPRINTF_INCLUDED_H
+#define TESTPRINTF_INCLUDED_H
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#include <OpenCL/cl_platform.h>
+#else
+#include <CL/opencl.h>
+#include <CL/cl_platform.h>
+#endif
+
+// Enable the test to be used with ATF
+#if USE_ATF
+// export BUILD_WITH_ATF=1
+#include <ATF/ATF.h>
+#define test_start() ATFTestStart()
+#define log_info ATFLogInfo
+#define log_error ATFLogError
+#define test_finish() ATFTestFinish()
+#else
+#define test_start()
+#define log_info printf
+#define log_error printf
+#define test_finish()
+#endif // USE_ATF
+
+#define ANALYSIS_BUFFER_SIZE 256
+
+//-----------------------------------------
+// Definitions and initializations
+//-----------------------------------------
+
+//-----------------------------------------
+// Types
+//-----------------------------------------
+enum Type
+ {
+     TYPE_INT,
+     TYPE_FLOAT,
+     TYPE_OCTAL,
+     TYPE_UNSIGNED,
+     TYPE_HEXADEC,
+     TYPE_CHAR,
+     TYPE_STRING,
+     TYPE_VECTOR,
+     TYPE_ADDRESS_SPACE,
+     TYPE_COUNT
+};
+
+struct printDataGenParameters
+{
+    const char* genericFormat;
+    const char* dataRepresentation;
+    const char* vectorFormatFlag;
+    const char* vectorFormatSpecifier;
+    const char* dataType;
+    const char* vectorSize;
+    const char* addrSpaceArgumentTypeQualifier;
+    const char* addrSpaceVariableTypeQualifier;
+    const char* addrSpaceParameter;
+    const char* addrSpacePAdd;
+};
+
+//-----------------------------------------
+//Test Case
+//-----------------------------------------
+
+struct testCase
+{
+    unsigned int _testNum;                           //test number
+    enum Type _type;                                 //(data)type for test
+    //const char** _strPrint;                          //auxiliary data to build the code for kernel source
+    const char** _correctBuffer;                     //look-up table for correct results for printf
+    struct printDataGenParameters* _genParameters;   //auxiliary data to build the code for kernel source
+};
+
+
+extern const char* strType[];
+extern testCase* allTestCase[];
+
+size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId,cl_ulong pAddr = 0);
+
+// Helpful macros
+
+// The next three functions check on different return values.  Returns -1
+// if the check failed
+#define checkErr(err, msg)                \
+    if (err != CL_SUCCESS) {                \
+    log_error("%s failed errcode:%d\n", msg, err);    \
+    return -1;                    \
+    }
+
+#define checkZero(val, msg)                \
+    if (val == 0) {                    \
+    log_error("%s failed errcode:%d\n", msg, err);    \
+    return -1;                    \
+    }
+
+#define checkNull(ptr, msg)            \
+    if (!ptr) {                    \
+    log_error("%s failed\n", msg);        \
+    return -1;                \
+    }
+
+// When a helper returns a negative one, we want to return from main
+// with negative one. This helper prevents me from having to write
+// this multiple time
+#define checkHelperErr(err)            \
+    if (err == -1) {                \
+    return err;                \
+    }
+
+#endif // TESTSPRINTF_INCLUDED_H
diff --git a/test_conformance/printf/util_printf.c b/test_conformance/printf/util_printf.c
new file mode 100644
index 00000000..5bd9be1a
--- /dev/null
+++ b/test_conformance/printf/util_printf.c
@@ -0,0 +1,894 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include "test_printf.h"
+
+
+#if defined (_WIN32)
+#define strtoull _strtoi64
+#endif
+
+const char* strType[] = {"int","float","octal","unsigned","hexadecimal","char","string","vector","address space"};
+
+
+
+//==================================
+
+// int
+
+//==================================
+
+//------------------------------------------------------
+
+// [string] format  | [string] int-data representation |
+
+//------------------------------------------------------
+
+struct printDataGenParameters printIntGenParameters[] = {
+
+    //(Minimum)Five-wide,default(right)-justified
+
+    {"%5d","10"},
+
+        //(Minimum)Five-wide,left-justified
+
+    {"%-5d","10"},
+
+        //(Minimum)Five-wide,default(right)-justified,zero-filled
+
+    {"%05d","10"},
+
+        //(Minimum)Five-wide,default(right)-justified,with sign
+
+    {"%+5d","10"},
+
+         //(Minimum)Five-wide ,left-justified,with sign
+
+    {"%-+5d","10"},
+
+        //(Minimum)Five-digit(zero-filled in absent digits),default(right)-justified
+
+    {"%.5i","100"},
+
+        //(Minimum)Six-wide,Five-digit(zero-filled in absent digits),default(right)-justified
+
+    {"%6.5i","100"},
+
+        //0 and - flag both apper ==>0 is ignored,left-justified,capital I
+
+    {"%-06i","100"},
+
+        //(Minimum)Six-wide,Five-digit(zero-filled in absent digits),default(right)-justified
+
+    {"%06.5i","100"}
+
+};
+
+//------------------------------------------------
+
+// Lookup table - [string]int-correct buffer     |
+
+//------------------------------------------------
+
+const char *correctBufferInt[] = {
+
+    "   10",
+
+    "10   ",
+
+    "00010",
+
+    "  +10",
+
+    "+10  ",
+
+    "00100",
+
+    " 00100",
+
+    "100   ",
+
+    " 00100"
+
+};
+
+
+
+
+
+//-----------------------------------------------
+
+//test case for int                             |
+
+//-----------------------------------------------
+
+testCase testCaseInt = {
+
+    sizeof(correctBufferInt)/sizeof(char*),
+
+    TYPE_INT,
+
+    correctBufferInt,
+
+    printIntGenParameters
+
+};
+
+
+
+
+
+//==============================================
+
+// float
+
+//==============================================
+
+
+
+//--------------------------------------------------------
+
+// [string] format |  [string] float-data representation |
+
+//--------------------------------------------------------
+
+struct printDataGenParameters printFloatGenParameters[] = {
+
+    //Default(right)-justified
+
+    {"%f","10.3456"},
+
+    //One position after the decimal,default(right)-justified
+
+    {"%.1f","10.3456"},
+
+    //Two positions after the decimal,default(right)-justified
+
+    {"%.2f","10.3456"},
+
+    //(Minimum)Eight-wide,three positions after the decimal,default(right)-justified
+
+    {"%8.3f","10.3456"},
+
+    //(Minimum)Eight-wide,two positions after the decimal,zero-filled,default(right)-justified
+
+    {"%08.2f","10.3456"},
+
+    //(Minimum)Eight-wide,two positions after the decimal,left-justified
+
+    {"%-8.2f","10.3456"},
+
+    //(Minimum)Eight-wide,two positions after the decimal,with sign,default(right)-justified
+
+    {"%+8.2f","-10.3456"},
+
+    //Zero positions after the decimal([floor]rounding),default(right)-justified
+
+    {"%.0f","0.1"},
+
+    //Zero positions after the decimal([ceil]rounding),default(right)-justified
+
+    {"%.0f","0.6"},
+
+    //Zero-filled,default positions number after the decimal,default(right)-justified
+
+    {"%0f","0.6"},
+
+    //Double argument representing floating-point,used by f style,default(right)-justified
+
+    {"%4g","12345.6789"},
+
+    //Double argument representing floating-point,used by e style,default(right)-justified
+
+    {"%4.2g","12345.6789"},
+
+    //Double argument representing floating-point,used by f style,default(right)-justified
+
+    {"%4G","0.0000023"},
+
+    //Double argument representing floating-point,used by e style,default(right)-justified
+
+    {"%4G","0.023"},
+
+    //Double argument representing floating-point,with exponent,left-justified,default(right)-justified
+
+    {"%-#20.15e","789456123.0"},
+
+    //Double argument representing floating-point,with exponent,left-justified,with sign,capital E,default(right)-justified
+
+    {"%+#21.15E","789456123.0"},
+
+#if ! defined( __ANDROID__ )
+
+    //Double argument representing floating-point,in [-]xh.hhhhpAd style
+
+    {"%.6a","0.1"},
+
+    //(Minimum)Ten-wide,Double argument representing floating-point,in xh.hhhhpAd style,default(right)-justified
+
+    {"%10.2a","9990.235"},
+
+#endif
+
+    //Infinity (1.0/0.0)
+
+    {"%f","1.0f/0.0f"},
+
+    //NaN
+
+    {"%f","sqrt(-1.0f)"},
+
+    //NaN
+    {"%f","acospi(2.0f)"}
+    };
+//--------------------------------------------------------
+
+//  Lookup table - [string]float-correct buffer             |
+
+//--------------------------------------------------------
+
+const char* correctBufferFloat[] = {
+
+    "10.345600",
+
+    "10.3",
+
+    "10.35",
+
+    "  10.346",
+
+    "00010.35",
+
+    "10.35   ",
+
+    "  -10.35",
+
+    "0",
+
+    "1",
+
+    "0.600000",
+
+    "12345.7",
+
+    "1.2e+4",
+
+    "2.3E-6",
+
+    "0.023",
+
+    "7.894561230000000e+8",
+
+    "+7.894561230000000E+8",
+
+#if ! defined( __ANDROID__ )
+
+    "0x1.99999ap-4",
+
+    "0x1.38p+13",
+
+#endif
+
+    "inf",
+
+    "-nan",
+
+    "nan"
+};
+
+//---------------------------------------------------------
+
+//Test case for float                                     |
+
+//---------------------------------------------------------
+
+testCase testCaseFloat = {
+
+    sizeof(correctBufferFloat)/sizeof(char*),
+
+    TYPE_FLOAT,
+
+    correctBufferFloat,
+
+    printFloatGenParameters
+
+};
+
+
+
+//=========================================================
+
+// octal
+
+//=========================================================
+
+
+
+//---------------------------------------------------------
+
+// [string] format  | [string] octal-data representation  |
+
+//---------------------------------------------------------
+
+struct printDataGenParameters printOctalGenParameters[] = {
+
+    //Default(right)-justified
+
+    {"%o","10"},
+
+    //Five-digit,default(right)-justified
+
+    {"%.5o","10"},
+
+    //Default(right)-justified,increase precision
+
+    {"%#o","100000000"},
+
+    //(Minimum)Four-wide,Five-digit,0-flag ignored(because of precision),default(right)-justified
+
+    {"%04.5o","10"}
+
+};
+
+//-------------------------------------------------------
+
+// Lookup table - [string] octal-correct buffer            |
+
+//-------------------------------------------------------
+
+
+
+const char* correctBufferOctal[] = {
+
+    "12",
+
+    "00012",
+
+    "0575360400",
+
+    "00012"
+
+};
+
+//-------------------------------------------------------
+
+//Test case for octal                                   |
+
+//-------------------------------------------------------
+
+testCase testCaseOctal = {
+
+    sizeof(correctBufferOctal)/sizeof(char*),
+
+    TYPE_OCTAL,
+
+    correctBufferOctal,
+
+    printOctalGenParameters
+
+};
+
+
+
+//=========================================================
+
+// unsigned
+
+//=========================================================
+
+
+
+//---------------------------------------------------------
+
+// [string] format  | [string] unsined-data representation  |
+
+//---------------------------------------------------------
+
+struct printDataGenParameters printUnsignedGenParameters[] = {
+
+    //Default(right)-justified
+
+    {"%u","10"},
+
+    //Zero precision for zero,default(right)-justified
+
+    {"%.0u","0"},
+
+};
+
+//-------------------------------------------------------
+
+// Lookup table - [string] octal-correct buffer            |
+
+//-------------------------------------------------------
+
+
+
+const char* correctBufferUnsigned[] = {
+
+    "10",
+
+    ""
+
+};
+
+//-------------------------------------------------------
+
+//Test case for octal                                   |
+
+//-------------------------------------------------------
+
+testCase testCaseUnsigned = {
+
+    sizeof(correctBufferUnsigned)/sizeof(char*),
+
+    TYPE_UNSIGNED,
+
+    correctBufferUnsigned,
+
+    printUnsignedGenParameters
+
+};
+
+
+
+//=======================================================
+
+// hexadecimal
+
+//=======================================================
+
+
+
+//--------------------------------------------------------------
+
+// [string] format  | [string] hexadecimal-data representation |
+
+//--------------------------------------------------------------
+
+struct printDataGenParameters printHexadecimalGenParameters[] = {
+
+    //Add 0x,low x,default(right)-justified
+
+    {"%#x","0xABCDEF"},
+
+    //Add 0x,capital X,default(right)-justified
+
+    {"%#X","0xABCDEF"},
+
+    //Not add 0x,if zero,default(right)-justified
+
+    {"%#X","0"},
+
+    //(Minimum)Eight-wide,default(right)-justified
+
+    {"%8x","399"},
+
+    //(Minimum)Four-wide,zero-filled,default(right)-justified
+
+    {"%04x","399"}
+
+};
+
+//--------------------------------------------------------------
+
+// Lookup table - [string]hexadecimal-correct buffer           |
+
+//--------------------------------------------------------------
+
+
+
+const char* correctBufferHexadecimal[] = {
+
+    "0xabcdef",
+
+    "0XABCDEF",
+
+    "0",
+
+    "     18f",
+
+    "018f"
+
+};
+
+//--------------------------------------------------------------
+
+//Test case for hexadecimal                                    |
+
+//--------------------------------------------------------------
+
+testCase testCaseHexadecimal = {
+
+    sizeof(correctBufferHexadecimal)/sizeof(char*),
+
+    TYPE_HEXADEC,
+
+    correctBufferHexadecimal,
+
+    printHexadecimalGenParameters
+
+};
+
+
+
+//=============================================================
+
+// char
+
+//=============================================================
+
+
+
+//-----------------------------------------------------------
+
+// [string] format  | [string] string-data representation   |
+
+//-----------------------------------------------------------
+
+struct printDataGenParameters printCharGenParameters[] = {
+
+    //Four-wide,zero-filled,default(right)-justified
+
+    {"%4c","\'1\'"},
+
+        //Four-wide,left-justified
+
+    {"%-4c","\'1\'"},
+
+        //(unsigned) int argument,default(right)-justified
+
+    {"%c","66"}
+
+};
+
+//---------------------------------------------------------
+
+// Lookup table -[string] char-correct buffer             |
+
+//---------------------------------------------------------
+
+const char * correctBufferChar[] = {
+
+    "   1",
+
+    "1   ",
+
+    "B",
+
+};
+
+
+
+//----------------------------------------------------------
+
+//Test case for char                                       |
+
+//----------------------------------------------------------
+
+testCase testCaseChar = {
+
+    sizeof(correctBufferChar)/sizeof(char*),
+
+    TYPE_CHAR,
+
+    correctBufferChar,
+
+    printCharGenParameters
+
+};
+
+
+
+//==========================================================
+
+// string
+
+//==========================================================
+
+
+
+//--------------------------------------------------------
+
+// [string]format | [string] string-data representation  |
+
+//--------------------------------------------------------
+
+struct printDataGenParameters printStringGenParameters[] = {
+
+    //(Minimum)Four-wide,zero-filled,default(right)-justified
+
+    {"%4s","\"foo\""},
+
+    //One-digit(precision ignored),left-justified
+
+    {"%.1s","\"foo\""},
+
+    //%% specification
+
+    {"%s","\"%%\""},
+
+    //null string
+
+    {"%s","(void*)0"}
+
+};
+
+//---------------------------------------------------------
+
+// Lookup table -[string] string-correct buffer           |
+
+//---------------------------------------------------------
+
+const char * correctBufferString[] = {
+
+    " foo",
+
+    "f",
+
+    "%%",
+
+    "(null)"
+
+};
+
+//---------------------------------------------------------
+
+//Test case for string                                    |
+
+//---------------------------------------------------------
+
+testCase testCaseString = {
+
+    sizeof(correctBufferString)/sizeof(char*),
+
+    TYPE_STRING,
+
+    correctBufferString,
+
+    printStringGenParameters
+
+};
+
+
+
+//=========================================================
+
+// vector
+
+//=========================================================
+
+
+
+//-------------------------------------------------------------------------------------------------------------------
+
+//[string] flag | [string] specifier | [string] type | [string] vector-data representation | [string] vector size   |
+
+//-------------------------------------------------------------------------------------------------------------------
+
+struct printDataGenParameters printVectorGenParameters[]={
+
+    //(Minimum)Two-wide,two positions after decimal
+
+    {NULL,"(1.0f,2.0f,3.0f,4.0f)","%2.2","hlf","float","4"},
+
+    //Alternative form,uchar argument
+
+    {NULL,"(0xFA,0xFB)","%#","hhx","uchar","2"},
+
+    //Alternative form,ushort argument
+
+    {NULL,"(0x1234,0x8765)","%#","hx","ushort","2"},
+
+  //Alternative form,uint argument
+
+    {NULL,"(0x12345678,0x87654321)","%#","hlx","uint","2"},
+
+    //Alternative form,long argument
+
+    {NULL,"(12345678,98765432)","%","ld","long","2"}
+
+};
+
+//------------------------------------------------------------
+
+// Lookup table -[string] vector-correct buffer              |
+
+//------------------------------------------------------------
+
+const char * correctBufferVector[] = {
+
+    "1.00,2.00,3.00,4.00",
+
+    "0xfa,0xfb",
+
+    "0x1234,0x8765",
+
+  "0x12345678,0x87654321",
+
+    "12345678,98765432"
+
+};
+
+//-----------------------------------------------------------
+
+//Test case for vector                                      |
+
+//-----------------------------------------------------------
+
+testCase testCaseVector = {
+
+    sizeof(correctBufferVector)/(sizeof(char *)),
+
+    TYPE_VECTOR,
+
+    correctBufferVector,
+
+    printVectorGenParameters
+
+};
+
+
+
+//==================================================================
+
+// address space
+
+//==================================================================
+
+
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+// [string] argument type qualifier |[string] variable type qualifier + initialization | [string] format | [string] parameter |[string]%p indicator/additional code |
+
+//-------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+
+
+struct printDataGenParameters printAddrSpaceGenParameters[]={
+
+    //Global memory region
+
+    {"\"%d\\n\"",NULL,NULL,NULL,NULL,NULL,"__global int* x","","*x",""},
+
+    //Global,constant, memory region
+
+    {"\"%d\\n\"",NULL,NULL,NULL,NULL,NULL,"constant int* x","","*x",""},
+
+    //Local memory region
+
+    {"\"%+d\\n\"",NULL,NULL,NULL,NULL,NULL,"","local int x;\n x= (int)3;\n","x",""},
+
+    //Private memory region
+
+    {"\"%i\\n\"",NULL,NULL,NULL,NULL,NULL,"","private int x;\n x = (int)-1;\n","x",""},
+
+    //Address of void * from global memory region
+
+    {"\"%p\\n\"",NULL,NULL,NULL,NULL,NULL,"__global void* x,__global intptr_t*  xAddr","","x","*xAddr = (intptr_t)x;\n"}
+
+};
+
+//-------------------------------------------------------------------------------
+
+//  Lookup table -[string] address space -correct buffer                        |
+
+//-------------------------------------------------------------------------------
+
+const char * correctAddrSpace[] = {
+
+    "2","2","+3","-1",""
+
+};
+
+//-------------------------------------------------------------------------------
+
+//Test case for address space                                                   |
+
+//-------------------------------------------------------------------------------
+
+testCase testCaseAddrSpace = {
+
+    sizeof(correctAddrSpace)/(sizeof(char *)),
+
+    TYPE_ADDRESS_SPACE,
+
+    correctAddrSpace,
+
+    printAddrSpaceGenParameters
+
+};
+
+
+
+//-------------------------------------------------------------------------------
+
+//All Test cases                                                                |
+
+//-------------------------------------------------------------------------------
+
+testCase* allTestCase[] = {&testCaseInt,&testCaseFloat,&testCaseOctal,&testCaseUnsigned,&testCaseHexadecimal,&testCaseChar,&testCaseString,&testCaseVector,&testCaseAddrSpace};
+
+
+//-----------------------------------------
+
+// Check functions
+
+//-----------------------------------------
+
+size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId,cl_ulong pAddr)
+{
+    int terminatePos = strlen(analysisBuffer);
+        if(terminatePos > 0)
+    {
+        analysisBuffer[terminatePos - 1] = '\0';
+    }
+
+    //Convert analysis buffer to long for address space
+    if(pTestCase->_type == TYPE_ADDRESS_SPACE && strcmp(pTestCase->_genParameters[testId].addrSpacePAdd,""))
+
+    {
+        char analysisBufferTmp[ANALYSIS_BUFFER_SIZE];
+
+        if(strstr(analysisBuffer,"0x") == NULL)
+        // Need to prepend 0x to ASCII number before calling strtol.
+        strcpy(analysisBufferTmp,"0x");
+
+        else analysisBufferTmp[0]='\0';
+        strcat(analysisBufferTmp,analysisBuffer);
+        if (sizeof(long) == 8) {
+            if(strtoul(analysisBufferTmp,NULL,0) == pAddr) return 0;
+        }
+        else {
+            if(strtoull(analysisBufferTmp,NULL,0) == pAddr) return 0;
+        }
+        return 1;
+
+    }
+
+    char* exp;
+    //Exponenent representation
+    if((exp = strstr(analysisBuffer,"E+")) != NULL || (exp = strstr(analysisBuffer,"e+")) != NULL || (exp = strstr(analysisBuffer,"E-")) != NULL || (exp = strstr(analysisBuffer,"e-")) != NULL)
+    {
+        char correctExp[3]={0};
+        strncpy(correctExp,exp,2);
+
+
+        char* eCorrectBuffer = strstr((char*)pTestCase->_correctBuffer[testId],correctExp);
+        if(eCorrectBuffer == NULL)
+            return false;
+
+        eCorrectBuffer+=2;
+        exp += 2;
+
+        //Exponent always contains at least two digits
+        if(strlen(exp) < 2)
+            return false;
+        //Scip leading zeros in the exponent
+        while(*exp == '0')
+            ++exp;
+        return strcmp(eCorrectBuffer,exp);
+    }
+    if(!strcmp(pTestCase->_correctBuffer[testId],"inf"))
+    return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf");
+    if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) {
+      return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)");
+    }
+    return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]);
+}
+
diff --git a/test_conformance/profiling/CMakeLists.txt b/test_conformance/profiling/CMakeLists.txt
new file mode 100644
index 00000000..ab6ca5f9
--- /dev/null
+++ b/test_conformance/profiling/CMakeLists.txt
@@ -0,0 +1,23 @@
+set(MODULE_NAME PROFILING)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    readArray.c
+    writeArray.c
+    readImage.c
+    writeImage.c
+    copy.c
+    execute.c
+    execute_multipass.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/imageHelpers.cpp
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/profiling/Jamfile b/test_conformance/profiling/Jamfile
new file mode 100644
index 00000000..7dad6323
--- /dev/null
+++ b/test_conformance/profiling/Jamfile
@@ -0,0 +1,22 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_profiling
+    : copy.c
+      execute.c
+      execute_multipass.c
+      main.c
+      readArray.c
+      readImage.c
+      writeArray.c
+      writeImage.c
+    ;
+
+install dist
+    : test_profiling
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/profiling
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/profiling
+    ;
diff --git a/test_conformance/profiling/Makefile b/test_conformance/profiling/Makefile
new file mode 100644
index 00000000..f1834fbc
--- /dev/null
+++ b/test_conformance/profiling/Makefile
@@ -0,0 +1,45 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c readArray.c writeArray.c readImage.c writeImage.c copy.c execute.c execute_multipass.c \
+			../../test_common/harness/testHarness.c \
+			../../test_common/harness/errorHelpers.c \
+			../../test_common/harness/typeWrappers.cpp \
+			../../test_common/harness/imageHelpers.cpp \
+                        ../../test_common/harness/mt19937.c \
+                        ../../test_common/harness/conversions.c \
+			../../test_common/harness/kernelHelpers.c
+
+SOURCES = $(abspath $(SRCS))
+
+
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+           
+FRAMEWORK = ${SOURCES}
+HEADERS = 
+TARGET = test_profiling
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+#COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
+CC = c++
+CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
+CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+           
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/profiling/copy.c b/test_conformance/profiling/copy.c
new file mode 100644
index 00000000..9256c2e9
--- /dev/null
+++ b/test_conformance/profiling/copy.c
@@ -0,0 +1,869 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/conversions.h"
+
+//--- the code for the kernel executables
+static const char *write_kernel_code =
+"\n"
+"__kernel void test_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int            tid_x = get_global_id(0);\n"
+"    int            tid_y = get_global_id(1);\n"
+"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4         color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
+"    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n";
+
+
+//--- the verify functions
+static int verify_subimage( unsigned char *src, unsigned char *dst, size_t srcx, size_t srcy,
+                           size_t dstx, size_t dsty, size_t subw, size_t subh, size_t pitch, size_t element_pitch )
+{
+    size_t        i, j, k;
+    size_t        srcj, dstj;
+    size_t        srcLoc, dstLoc;
+
+    for( j = 0; j < subh; j++ ){
+        srcj = ( j + srcy ) * pitch * element_pitch;
+        dstj = ( j + dsty ) * pitch * element_pitch;
+        for( i = 0; i < subw; i++ ){
+            srcLoc = srcj + ( i + srcx ) * element_pitch;
+            dstLoc = dstj + ( i + dstx ) * element_pitch;
+            for( k = 0; k < element_pitch; k++ ){    // test each channel
+                if( src[srcLoc+k] != dst[dstLoc+k] ){
+                    return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+static int verify_copy_array( int *inptr, int *outptr, int n )
+{
+    int    i;
+
+    for( i = 0; i < n; i++ ) {
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+//----- helper functions
+static cl_uchar *generate_image( int n, MTdata d )
+{
+    cl_uchar   *ptr = (cl_uchar *)malloc( n );
+    int i;
+
+    for( i = 0; i < n; i++ )
+        ptr[i] = (cl_uchar)genrand_int32(d);
+
+    return ptr;
+}
+
+
+static int copy_size( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, MTdata d )
+{
+    cl_mem                streams[2];
+    cl_event            copyEvent;
+    cl_ulong            queueStart, submitStart, writeStart, writeEnd;
+    cl_int                *int_input_ptr, *int_output_ptr;
+    int                    err = 0;
+    int                    i;
+
+    int_input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    int_output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
+    if( !streams[0] ){
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err );
+    if( !streams[1] ){
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++){
+        int_input_ptr[i] = (int)genrand_int32(d);
+        int_output_ptr[i] = (int)genrand_int32(d) >> 30;    // seed with incorrect data
+    }
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int)*num_elements, (void *)int_input_ptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clWriteArray failed" );
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    err = clEnqueueCopyBuffer( queue, streams[0], streams[1], 0, 0, sizeof(cl_int)*num_elements, 0, NULL, &copyEvent );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clCopyArray failed" );
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    // This synchronization point is needed in order to assume the data is valid.
+    // Getting profiling information is not a synchronization point.
+    err = clWaitForEvents( 1, &copyEvent );
+    if( err != CL_SUCCESS )
+    {
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    // test profiling
+    while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)int_output_ptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( (void *)int_output_ptr );
+        free( (void *)int_input_ptr );
+        return -1;
+    }
+
+    if( verify_copy_array(int_input_ptr, int_output_ptr, num_elements) ){
+        log_error( "test failed\n" );
+        err = -1;
+    }
+    else{
+        log_info( "test passed\n" );
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseEvent(copyEvent);
+    clReleaseMemObject( streams[0] );
+    clReleaseMemObject( streams[1] );
+    free( (void *)int_output_ptr );
+    free( (void *)int_input_ptr );
+
+    if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+        err = -1;
+
+    return err;
+
+}    // end copy_size()
+
+
+static int copy_partial_size( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, cl_uint srcStart, cl_uint dstStart, int size, MTdata d )
+{
+    cl_mem                streams[2];
+    cl_event            copyEvent;
+    cl_ulong            queueStart, submitStart, writeStart, writeEnd;
+    cl_int                *inptr, *outptr;
+    int                    err = 0;
+    int                    i;
+
+    inptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
+    outptr = (cl_int *)malloc(sizeof(cl_int) * num_elements);
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    for (i=0; i<num_elements; i++){
+        inptr[i] = (int)genrand_int32(d);
+        outptr[i] = (int)get_random_float( -1.f, 1.f, d );    // seed with incorrect data
+    }
+
+    err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(cl_int)*num_elements, (void *)inptr, 0, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = clEnqueueCopyBuffer( queue, streams[0], streams[1], srcStart*sizeof(cl_int), dstStart*sizeof(cl_int),
+                       sizeof(cl_int)*size, 0, NULL, &copyEvent );
+    if( err != CL_SUCCESS){
+        print_error( err, "clCopyArray failed" );
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( outptr );
+        free( inptr );
+        return -1;
+    }
+
+    // This synchronization point is needed in order to assume the data is valid.
+    // Getting profiling information is not a synchronization point.
+    err = clWaitForEvents( 1, &copyEvent );
+    if( err != CL_SUCCESS )
+    {
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( outptr );
+        free( inptr );
+        return -1;
+    }
+
+    // test profiling
+    while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( outptr );
+        free( inptr );
+        return -1;
+    }
+
+    while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( outptr );
+        free( inptr );
+        return -1;
+    }
+
+
+    err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( outptr );
+        free( inptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseMemObject( streams[0] );
+        clReleaseMemObject( streams[1] );
+        free( outptr );
+        free( inptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)outptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS){
+        log_error("clReadVariableStream failed\n");
+        return -1;
+    }
+
+    if( verify_copy_array(inptr + srcStart, outptr + dstStart, size) ){
+        log_error("test failed\n");
+        err = -1;
+    }
+    else{
+        log_info("test passed\n");
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseEvent(copyEvent);
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    free(outptr);
+    free(inptr);
+
+    if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+        err = -1;
+
+    return err;
+
+}    // end copy_partial_size()
+
+
+int copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int        i, err = 0;
+    int        size;
+    MTdata  d = init_genrand( gRandomSeed );
+
+    // test the preset size
+    log_info( "set size: %d: ", num_elements );
+    err = copy_size( device, context, queue, num_elements, d );
+
+    // now test random sizes
+    for( i = 0; i < 8; i++ ){
+        size = (int)get_random_float(2.f,131072.f, d);
+        log_info( "random size: %d: ", size );
+        err |= copy_size( device, context, queue, size, d );
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}    // end copy_array()
+
+
+int copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int        i, err = 0;
+    int        size;
+    cl_uint    srcStart, dstStart;
+    MTdata  d = init_genrand( gRandomSeed );
+
+    // now test copy of partial sizes
+    for( i = 0; i < 8; i++ ){
+        srcStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
+        size = (int)get_random_float( 8.f, (float)(num_elements - srcStart), d );
+        dstStart = (cl_uint)get_random_float( 0.f, (float)(num_elements - size), d );
+        log_info( "random partial copy from %d to %d, size: %d: ", (int)srcStart, (int)dstStart, size );
+        err |= copy_partial_size( device, context, queue, num_elements, srcStart, dstStart, size, d );
+    }
+
+    free_mtdata(d);
+    return err;
+}    // end copy_partial_array()
+
+
+static int copy_image_size( cl_device_id device, cl_context context,
+                                                        cl_command_queue queue, size_t srcx, size_t srcy,
+                                                        size_t dstx, size_t dsty, size_t subw, size_t subh,
+                                                        MTdata d )
+{
+    cl_mem                        memobjs[3];
+    cl_program                program[1];
+    cl_image_format        image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
+    cl_event                    copyEvent;
+    cl_ulong                    queueStart, submitStart, writeStart, writeEnd;
+    void                            *inptr;
+    void                            *dst = NULL;
+    cl_kernel                    kernel[1];
+    size_t                        threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t                        localThreads[2];
+#endif
+    int                                err = 0;
+    cl_mem_flags            flags;
+    unsigned int            num_channels = 4;
+    size_t                        w = 256, h = 256;
+    size_t                        element_nbytes;
+    size_t                        num_bytes;
+    size_t                        channel_nbytes = sizeof( cl_char );
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    element_nbytes = channel_nbytes * num_channels;
+    num_bytes = w * h * element_nbytes;
+
+    threads[0] = (size_t)w;
+    threads[1] = (size_t)h;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
+    inptr = (void *)generate_image( (int)num_bytes, d );
+    if( ! inptr ){
+        log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    dst = malloc( num_bytes );
+    if( ! dst ){
+        free( (void *)inptr );
+        log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    // allocate the input image
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+    memobjs[0] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
+    if( memobjs[0] == (cl_mem)0 ) {
+        free( dst );
+        free( (void *)inptr );
+        log_error("unable to create Image2D\n");
+        return -1;
+    }
+
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), num_bytes, NULL, &err );
+    if( memobjs[1] == (cl_mem)0 ) {
+        clReleaseMemObject(memobjs[0]);
+        free( dst );
+        free( (void *)inptr );
+        log_error("unable to create array\n");
+        return -1;
+    }
+
+    // allocate the input image
+    memobjs[2] = create_image_2d(context, flags, &image_format_desc, w, h, 0, NULL, &err);
+    if( memobjs[2] == (cl_mem)0 ) {
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( (void *)inptr );
+        log_error("unable to create Image2D\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &write_kernel_code, "test_write" );
+    if( err ){
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
+    if (err != CL_SUCCESS){
+        log_error("clSetKernelArg failed\n");
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+#endif
+    if (err != CL_SUCCESS){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    // now do the copy
+    size_t srcPt[3] = { srcx, srcy, 0 };
+    size_t destPt[3] = { dstx, dsty, 0 };
+    size_t region[3] = { subw, subh, 1 };
+    err = clEnqueueCopyImage( queue, memobjs[0], memobjs[2], srcPt, destPt, region, 0, NULL, &copyEvent );
+    if (err != CL_SUCCESS){
+        print_error( err, "clCopyImage failed" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    // This synchronization point is needed in order to assume the data is valid.
+    // Getting profiling information is not a synchronization point.
+    err = clWaitForEvents( 1, &copyEvent );
+    if( err != CL_SUCCESS )
+    {
+        clReleaseEvent(copyEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    // test profiling
+    while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    while( ( err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( copyEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region2[3] = { w, h, 1 };
+    err = clEnqueueReadImage( queue, memobjs[2], true, origin, region2, 0, 0, dst, 0, NULL, NULL );
+    if (err != CL_SUCCESS){
+        print_error( err, "clReadImage failed" );
+        clReleaseEvent(copyEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[2] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = verify_subimage( (unsigned char *)inptr, (unsigned char *)dst, srcx, srcy,
+                          dstx, dsty, subw, subh, w, 4 );
+    //err = verify_image( (unsigned char *)inptr, (unsigned char *)dst, w * h * 4 );
+    if( err ){
+        log_error( "Image failed to verify.\n " );
+    }
+    else{
+        log_info( "Image verified.\n" );
+    }
+
+    // cleanup
+    clReleaseEvent(copyEvent);
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    clReleaseMemObject( memobjs[0] );
+    clReleaseMemObject( memobjs[1] );
+    clReleaseMemObject( memobjs[2] );
+    free( dst );
+    free( inptr );
+
+    if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+        err = -1;
+
+    return err;
+
+}    // end copy_image_size()
+
+
+int copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int            err = 0;
+    int            i;
+    size_t    srcx, srcy, dstx, dsty, subw, subh;
+    MTdata    d;
+
+    srcx = srcy = dstx = dsty = 0;
+    subw = subh = 256;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
+    if( err ){
+        log_error( "testing copy image, full size\n" );
+    }
+    else{
+        log_info( "testing copy image, full size\n" );
+    }
+
+    // now test random sub images
+    srcx = srcy = 0;
+    subw = subh = 16;
+    dstx = dsty = 0;
+    err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
+    if( err ){
+        log_error( "test copy of subimage size %d,%d  %d,%d  %d x %d\n", (int)srcx, (int)srcy,
+                  (int)dstx, (int)dsty, (int)subw, (int)subh );
+    }
+    else{
+        log_info( "test copy of subimage size %d,%d  %d,%d  %d x %d\n", (int)srcx, (int)srcy,
+                 (int)dstx, (int)dsty, (int)subw, (int)subh );
+    }
+
+    srcx = srcy = 8;
+    subw = subh = 16;
+    dstx = dsty = 32;
+    err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
+    if( err ){
+        log_error( "test copy of subimage size %d,%d  %d,%d  %d x %d\n", (int)srcx, (int)srcy,
+                  (int)dstx, (int)dsty, (int)subw, (int)subh );
+    }
+    else{
+        log_info( "test copy of subimage size %d,%d  %d,%d  %d x %d\n", (int)srcx, (int)srcy,
+                 (int)dstx, (int)dsty, (int)subw, (int)subh );
+    }
+
+    for( i = 0; i < 16; i++ ) {
+        srcx = (size_t)get_random_float( 0.f, 248.f, d );
+        srcy = (size_t)get_random_float( 0.f, 248.f, d );
+        subw = (size_t)get_random_float( 8.f, (float)(256 - srcx), d );
+        subh = (size_t)get_random_float( 8.f, (float)(256 - srcy), d );
+        dstx = (size_t)get_random_float( 0.f, (float)(256 - subw), d );
+        dsty = (size_t)get_random_float( 0.f, (float)(256 - subh), d );
+        err = copy_image_size( device, context, queue, srcx, srcy, dstx, dsty, subw, subh, d );
+        if( err ){
+            log_error( "test copy of subimage size %d,%d  %d,%d  %d x %d\n", (int)srcx, (int)srcy,
+                      (int)dstx, (int)dsty, (int)subw, (int)subh );
+        }
+        else{
+            log_info( "test copy of subimage size %d,%d  %d,%d  %d x %d\n", (int)srcx, (int)srcy,
+                     (int)dstx, (int)dsty, (int)subw, (int)subh );
+        }
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}    // end copy_image()
+
+
+int copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_mem            memobjs[3];
+    cl_image_format    image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
+    void            *inptr;
+    void            *dst;
+    int                err;
+    cl_mem_flags    flags;
+    unsigned int    num_channels = (unsigned int)get_format_channel_count( &image_format_desc );
+    size_t            w = 256, h = 256;
+    size_t            element_nbytes;
+    size_t            num_bytes;
+    size_t            channel_nbytes = sizeof( cl_char );
+    MTdata          d;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    element_nbytes = channel_nbytes * num_channels;
+    num_bytes = w * h * element_nbytes;
+    d = init_genrand( gRandomSeed );
+    inptr = (void *)generate_image( (int)num_bytes, d );
+    free_mtdata(d); d = NULL;
+    if( ! inptr ){
+        log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    dst = malloc( num_bytes );
+    if( ! dst ){
+        free( inptr );
+        log_error( " unable to allocate dst at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    // allocate the input image
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+    memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
+    if( memobjs[0] == (cl_mem)0 ){
+        free( dst );
+        free( inptr );
+        log_error( " unable to create Image2D\n" );
+        return -1;
+    }
+
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), channel_nbytes * num_channels*w*h, NULL, &err );
+    if( memobjs[1] == (cl_mem)0 ) {
+        clReleaseMemObject( memobjs[0] );
+        free( dst );
+        free( inptr );
+        log_error( " unable to create array: " );
+        return -1;
+    }
+
+    err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, (const void *)inptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clWriteArray failed" );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3] = { w, h, 1 };
+    err = clEnqueueCopyBufferToImage( queue, memobjs[1], memobjs[0], 0, origin, region, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clCopyArrayToImage failed" );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clEnqueueReadImage( queue, memobjs[0], true, origin, region, 0, 0, dst, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clReadImage failed" );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = verify_subimage( (cl_uchar *)inptr, (cl_uchar *)dst, 0, 0, 0, 0, w, h, w, num_channels );
+    if( err ){
+        log_error( " test failed: " );
+    }
+    else{
+        log_info( " test passed: " );
+    }
+
+    // cleanup
+    clReleaseMemObject( memobjs[1] );
+    clReleaseMemObject( memobjs[0] );
+    free( dst );
+    free( inptr );
+
+    return err;
+
+}    // end copy_array_to_image()
diff --git a/test_conformance/profiling/execute.c b/test_conformance/profiling/execute.c
new file mode 100644
index 00000000..e39d96ad
--- /dev/null
+++ b/test_conformance/profiling/execute.c
@@ -0,0 +1,440 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+#ifndef uchar
+typedef unsigned char uchar;
+#endif
+
+#undef MIN
+#define MIN(x,y)    ( (x) < (y) ? (x) : (y) )
+
+#undef MAX
+#define MAX(x,y)    ( (x) > (y) ? (x) : (y) )
+
+//#define CREATE_OUTPUT    1
+
+extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
+
+
+
+//--- the code for kernel executables
+static const char *image_filter_src =
+"constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
+"\n"
+"__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
+"              read_only image2d_t src_image, write_only image2d_t dst_image )\n"
+"{\n"
+"   int     i, j;\n"
+"   int     indx = 0;\n"
+"    int        tid_x = get_global_id(0);\n"
+"    int        tid_y = get_global_id(1);\n"
+"    float4  filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
+"\n"
+"    for (i=-m/2; i<(m+1)/2; i++){\n"
+"        for (j=-n/2; j<(n+1)/2; j++){\n"
+"            float   w = filter_weights[indx++];\n"
+"\n"
+"            if (w != 0.0f){\n"
+"                filter_result += w * read_imagef(src_image, sampler,\n"
+"                                                 (int2)(tid_x + j, tid_y + i));\n"
+"            }\n"
+"        }\n"
+"    }\n"
+"\n"
+"    write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
+"}\n";
+
+
+//--- equivalent non-kernel code
+static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
+{
+    // clamp the coords
+    int    x0 = MIN( MAX( x, 0 ), w - 1 );
+    int    y0 = MIN( MAX( y, 0 ), h - 1 );
+
+    // get tine index
+    int    indx = ( y0 * w + x0 ) * nChannels;
+
+    // seed the return array
+    int    i;
+    for( i = 0; i < nChannels; i++ ){
+        srcRgb[i] = (float)src[indx+i];
+    }
+}    // end read_imagef()
+
+
+static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
+{
+    // get tine index
+    int    indx = ( y * w + x ) * nChannels;
+
+    // seed the return array
+    int    i;
+    for( i = 0; i < nChannels; i++ ){
+        dst[indx+i] = (uchar)dstRgb[i];
+    }
+}    // end write_imagef()
+
+
+static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
+{
+    int        i, j, k;
+    int        indx = 0;
+    float    filter_result[] = { 0.f, 0.f, 0.f, 0.f };
+    float    srcRgb[4];
+
+    for( i = -m/2; i < (m+1)/2; i++ ){
+        for( j = -n/2; j < (n+1)/2; j++ ){
+            float    w = filter_weights[indx++];
+
+            if( w != 0 ){
+                read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
+                for( k = 0; k < nChannels; k++ ){
+                    filter_result[k] += w * srcRgb[k];
+                }
+            }
+        }
+    }
+
+    write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
+
+}    // end basicFilterPixel()
+
+
+//--- helper functions
+static uchar *createImage( int elements, MTdata d)
+{
+    int        i;
+    uchar    *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
+    if( ! ptr )
+        return NULL;
+
+    for( i = 0; i < elements; i++ ){
+        ptr[i] = (uchar)genrand_int32(d);
+    }
+
+    return ptr;
+
+}    // end createImage()
+
+
+static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
+{
+    int        x, y, z;
+    uchar    *p0 = ptr0;
+    uchar    *p1 = ptr1;
+
+    for( y = 0; y < ysize; y++ ){
+        for( x = 0; x < xsize; x++ ){
+            for( z = 0; z < nChannels; z++ ){
+                if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
+                    log_error( "  images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
+                              (int)p0[-1], (int)p1[-1] );
+                    return -1;
+                }
+            }
+        }
+    }
+
+    return 0;
+
+}    // end verifyImages()
+
+
+static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
+                         uchar *inptr, uchar *outptr )
+{
+    cl_program            program[1];
+    cl_kernel            kernel[1];
+    cl_mem                memobjs[3];
+    cl_image_format        image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
+    cl_event            executeEvent;
+    cl_ulong    queueStart, submitStart, writeStart, writeEnd;
+    size_t                threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t                localThreads[2];
+#endif
+    float                filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
+    int                    filter_w = 3, filter_h = 3;
+    int                    err = 0;
+
+    // set thread dimensions
+    threads[0] = w;
+    threads[1] = h;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
+    // allocate the input and output image memory objects
+    memobjs[0] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR),
+                                 &image_format_desc, w, h, 0, inptr, &err );
+    if( memobjs[0] == (cl_mem)0 ){
+        log_error( " unable to create 2D image using create_image_2d\n" );
+        return -1;
+    }
+
+    memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
+    if( memobjs[1] == (cl_mem)0 ){
+        log_error( " unable to create 2D image using create_image_2d\n" );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // allocate an array memory object to load the filter weights
+    memobjs[2] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR ),
+                               sizeof( cl_float ) * filter_w * filter_h, &filter_weights, &err );
+    if( memobjs[2] == (cl_mem)0 ){
+        log_error( " unable to create array using clCreateBuffer\n" );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // create the compute program
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
+    if( err ){
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+
+    // create kernel args object and set arg values.
+    // set the args values
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
+    err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
+    err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
+    err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
+
+    if( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArg failed\n" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, localThreads, NULL, 0, &executeEvent );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, NULL, 0, &executeEvent );
+#endif
+
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed\n" );
+    clReleaseEvent( executeEvent );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // This synchronization point is needed in order to assume the data is valid.
+    // Getting profiling information is not a synchronization point.
+    err = clWaitForEvents( 1, &executeEvent );
+    if( err != CL_SUCCESS )
+    {
+    clReleaseEvent( executeEvent );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // test profiling
+    while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent( executeEvent );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent( executeEvent );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent( executeEvent );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent( executeEvent );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // read output image
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3] = { w, h, 1 };
+    err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
+    if( err != CL_SUCCESS ){
+        print_error( err, "clReadImage failed\n" );
+    clReleaseEvent( executeEvent );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[2] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // release event, kernel, program, and memory objects
+  clReleaseEvent( executeEvent );
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    clReleaseMemObject( memobjs[2] );
+    clReleaseMemObject( memobjs[1] );
+    clReleaseMemObject( memobjs[0] );
+
+  if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+    err = -1;
+
+    return err;
+
+}    // end kernelFilter()
+
+
+static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
+{
+    const float    filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
+    int            filter_w = 3, filter_h = 3;
+    int            x, y;
+
+    for( y = 0; y < h; y++ ){
+        for( x = 0; x < w; x++ ){
+            basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
+        }
+    }
+
+    return 0;
+
+}    // end of basicFilter()
+
+
+int execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    uchar    *inptr;
+    uchar    *outptr[2];
+    int        w = 256, h = 256;
+    int        nChannels = 4;
+    int        nElements = w * h * nChannels;
+    int        err = 0;
+    MTdata  d;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    d = init_genrand( gRandomSeed );
+    inptr = createImage( nElements, d );
+    free_mtdata( d);    d = NULL;
+
+    if( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory for image\n", nElements );
+        return -1;
+    }
+
+    outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
+    if( ! outptr[0] ){
+        log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
+        free( (void *)inptr );
+        return -1;
+    }
+
+    outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
+    if( ! outptr[1] ){
+        log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
+        free( (void *)outptr[0] );
+        free( (void *)inptr );
+        return -1;
+    }
+
+    err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
+
+    if( ! err ){
+        basicFilter( w, h, nChannels, inptr, outptr[1] );
+
+        // verify that the images are the same
+        err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
+        if( err )
+            log_error( " images do not match\n" );
+    }
+
+    // clean up
+    free( (void *)outptr[1] );
+    free( (void *)outptr[0] );
+    free( (void *)inptr );
+
+    return err;
+
+}    // end execute()
+
+
+
diff --git a/test_conformance/profiling/execute_multipass.c b/test_conformance/profiling/execute_multipass.c
new file mode 100644
index 00000000..9ca5df29
--- /dev/null
+++ b/test_conformance/profiling/execute_multipass.c
@@ -0,0 +1,314 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+static const char *read3d_kernel_code =
+"\n"
+"__kernel void read3d(read_only image3d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    tid_z = get_global_id(2);\n"
+"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
+"    indx *= 4;\n"
+"    dst[indx+0] = (unsigned char)(color.x * 255.0f);\n"
+"    dst[indx+1] = (unsigned char)(color.y * 255.0f);\n"
+"    dst[indx+2] = (unsigned char)(color.z * 255.0f);\n"
+"    dst[indx+3] = (unsigned char)(color.w * 255.0f);\n"
+"\n"
+"}\n";
+
+
+static cl_uchar *createImage( int elements, MTdata d )
+{
+    int i;
+    cl_uchar *ptr = (cl_uchar *)malloc( elements * sizeof( cl_uchar ) );
+    if( ! ptr )
+        return NULL;
+
+    for( i = 0; i < elements; i++ ){
+        ptr[i] = (cl_uchar)genrand_int32(d);
+    }
+
+    return ptr;
+
+}    // end createImage()
+
+
+static int verifyImages( cl_uchar *ptr0, cl_uchar *ptr1, cl_uchar tolerance, int xsize, int ysize, int zsize, int nChannels )
+{
+    int x, y, z, c;
+    cl_uchar *p0 = ptr0;
+    cl_uchar *p1 = ptr1;
+
+    for( z = 0; z < zsize; z++ ){
+        for( y = 0; y < ysize; y++ ){
+            for( x = 0; x < xsize; x++ ){
+                for( c = 0; c < nChannels; c++ ){
+                    if( (cl_uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
+                        log_error( "  images differ at x,y,z = %d,%d,%d channel = %d, %d to %d\n",
+                                  x, y, z, c, (int)p0[-1], (int)p1[-1] );
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+
+}    // end verifyImages()
+
+
+static int run_kernel( cl_device_id device, cl_context context, cl_command_queue queue,
+                      int w, int h, int d, int nChannels, cl_uchar *inptr, cl_uchar *outptr )
+{
+    cl_program program[1];
+    cl_kernel kernel[1];
+    cl_mem memobjs[2];
+    cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
+    cl_event executeEvent = NULL;
+    cl_ulong queueStart, submitStart, writeStart, writeEnd;
+    size_t threads[3];
+    size_t localThreads[3];
+    int err = 0;
+
+    // set thread dimensions
+    threads[0] = w;
+    threads[1] = h;
+    threads[2] = d;
+
+    err = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( cl_uint ), (size_t*)localThreads, NULL );
+    if (err)
+    {
+        localThreads[0] = 256; localThreads[1] = 1; localThreads[2] = 1;
+        err = 0;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+
+    cl_sampler_properties properties[] = {
+        CL_SAMPLER_NORMALIZED_COORDS, CL_FALSE,
+        CL_SAMPLER_ADDRESSING_MODE, CL_ADDRESS_CLAMP_TO_EDGE,
+        CL_SAMPLER_FILTER_MODE, CL_FILTER_NEAREST,
+        0 };
+    cl_sampler sampler = clCreateSamplerWithProperties( context, properties, &err );
+    if( err ){
+        log_error( " clCreateSamplerWithProperties failed.\n" );
+        return -1;
+    }
+
+    // allocate the input and output image memory objects
+    memobjs[0] = create_image_3d( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), &image_format_desc, w, h, d, 0, 0, inptr, &err );
+    if( memobjs[0] == (cl_mem)0 ){
+        log_error( " unable to create 2D image using create_image_2d\n" );
+        return -1;
+    }
+
+    // allocate an array memory object to load the filter weights
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_READ_WRITE ), sizeof( cl_float ) * w*h*d*nChannels, NULL, &err );
+    if( memobjs[1] == (cl_mem)0 ){
+        log_error( " unable to create array using clCreateBuffer\n" );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // create the compute program
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &read3d_kernel_code, "read3d" );
+    if( err ){
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+
+    // create kernel args object and set arg values.
+    // set the args values
+    err |= clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
+    err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
+
+    if( err != CL_SUCCESS ){
+        print_error( err, "clSetKernelArg failed\n" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 3, NULL, threads, localThreads, NULL, 0, &executeEvent );
+
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed\n" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    if (executeEvent) {
+
+        // This synchronization point is needed in order to assume the data is valid.
+        // Getting profiling information is not a synchronization point.
+        err = clWaitForEvents( 1, &executeEvent );
+        if( err != CL_SUCCESS )
+        {
+            print_error( err, "clWaitForEvents failed\n" );
+            clReleaseKernel( kernel[0] );
+            clReleaseProgram( program[0] );
+            clReleaseMemObject( memobjs[1] );
+            clReleaseMemObject( memobjs[0] );
+            return -1;
+        }
+
+        // test profiling
+        while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[0] );
+            clReleaseProgram( program[0] );
+            clReleaseMemObject( memobjs[1] );
+            clReleaseMemObject( memobjs[0] );
+            return -1;
+        }
+
+        while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) == CL_PROFILING_INFO_NOT_AVAILABLE );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[0] );
+            clReleaseProgram( program[0] );
+            clReleaseMemObject( memobjs[1] );
+            clReleaseMemObject( memobjs[0] );
+            return -1;
+        }
+
+        err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[0] );
+            clReleaseProgram( program[0] );
+            clReleaseMemObject( memobjs[1] );
+            clReleaseMemObject( memobjs[0] );
+            return -1;
+        }
+
+        err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[0] );
+            clReleaseProgram( program[0] );
+            clReleaseMemObject( memobjs[1] );
+            clReleaseMemObject( memobjs[0] );
+            return -1;
+        }
+
+        log_info( "Profiling info:\n" );
+        log_info( "Time from queue to start of clEnqueueNDRangeKernel: %f seconds\n", (double)(writeStart - queueStart) / 1000000000000.f );
+        log_info( "Time from start of clEnqueueNDRangeKernel to end: %f seconds\n", (double)(writeEnd - writeStart) / 1000000000000.f );
+    }
+
+    // read output image
+    err = clEnqueueReadBuffer(queue, memobjs[1], CL_TRUE, 0, w*h*d*nChannels*4, outptr, 0, NULL, NULL);
+    if( err != CL_SUCCESS ){
+        print_error( err, "clReadImage failed\n" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject( memobjs[1] );
+        clReleaseMemObject( memobjs[0] );
+        return -1;
+    }
+
+    // release kernel, program, and memory objects
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    clReleaseMemObject( memobjs[1] );
+    clReleaseMemObject( memobjs[0] );
+
+    return err;
+
+}    // end run_kernel()
+
+
+// The main point of this test is to exercise code that causes a multipass cld launch for a single
+// kernel exec at the cl level. This is done on the gpu for 3d launches, and it's also done
+// to handle gdims that excede the maximums allowed by the hardware. In this case we
+// use 3d to exercise the multipass events. In the future 3d may not be multpass, in which
+// case we will need to ensure that we use gdims large enough to force multipass.
+
+int execute_multipass( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uchar *inptr;
+    cl_uchar *outptr;
+    int w = 256, h = 128, d = 32;
+    int nChannels = 4;
+    int nElements = w * h * d * nChannels;
+    int err = 0;
+    MTdata mtData;
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    mtData = init_genrand( gRandomSeed );
+    inptr = createImage( nElements, mtData );
+    free_mtdata( mtData); mtData = NULL;
+    if( ! inptr ){
+        log_error( " unable to allocate %d bytes of memory for image\n", nElements );
+        return -1;
+    }
+
+    outptr = (cl_uchar *)malloc( nElements * sizeof( cl_uchar ) );
+    if( ! outptr ){
+        log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
+        free( (void *)inptr );
+        return -1;
+    }
+
+
+    err = run_kernel( device, context, queue, w, h, d, nChannels, inptr, outptr );
+
+    if( ! err ){
+        // verify that the images are the same
+        err = verifyImages( outptr, inptr, (cl_uchar)0x1, w, h, d, nChannels );
+        if( err )
+            log_error( " images do not match\n" );
+    }
+
+    // clean up
+    free( (void *)outptr );
+    free( (void *)inptr );
+
+    return err;
+
+}    // end execute()
+
+
+
diff --git a/test_conformance/profiling/main.c b/test_conformance/profiling/main.c
new file mode 100644
index 00000000..4c2201f7
--- /dev/null
+++ b/test_conformance/profiling/main.c
@@ -0,0 +1,173 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
+// (for example, generate_random_image_data()), the tests are required to declare
+// the following variables (<rdar://problem/11111245>):
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+bool gTestRounding = false;
+
+basefn    basefn_list[] = {
+    read_int_array,
+    read_uint_array,
+    read_long_array,
+    read_ulong_array,
+    read_short_array,
+    read_ushort_array,
+    read_float_array,
+    read_char_array,
+    read_uchar_array,
+    read_struct_array,
+    write_int_array,
+    write_uint_array,
+    write_long_array,
+    write_ulong_array,
+    write_short_array,
+    write_ushort_array,
+    write_float_array,
+    write_char_array,
+    write_uchar_array,
+    write_struct_array,
+    read_float_image,
+    read_char_image,
+    read_uchar_image,
+    write_float_image,
+    write_char_image,
+    write_uchar_image,
+    copy_array,
+    copy_partial_array,
+    copy_image,
+    copy_array_to_image,
+    execute
+};
+
+
+const char *basefn_names[] = {
+"read_array_int",
+"read_array_uint",
+"read_array_long",
+"read_array_ulong",
+"read_array_short",
+"read_array_ushort",
+"read_array_float",
+"read_array_char",
+"read_array_uchar",
+"read_array_struct",
+"write_array_int",
+"write_array_uint",
+"write_array_long",
+"write_array_ulong",
+"write_array_short",
+"write_array_ushort",
+"write_array_float",
+"write_array_char",
+"write_array_uchar",
+"write_array_struct",
+"read_image_float",
+"read_image_int",
+"read_image_uint",
+"write_image_float",
+"write_image_char",
+"write_image_uchar",
+"copy_array",
+"copy_partial_array",
+"copy_image",
+"copy_array_to_image",
+"execute",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_streamfns = sizeof(basefn_names) / sizeof(char *);
+
+// FIXME: use timer resolution rather than hardcoding 1�s per tick.
+
+#define QUEUE_SECONDS_LIMIT 30
+#define SUBMIT_SECONDS_LIMIT 30
+#define COMMAND_SECONDS_LIMIT 30
+int check_times(cl_ulong queueStart, cl_ulong commandSubmit, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device) {
+  int err = 0;
+
+  size_t profiling_resolution = 0;
+  err = clGetDeviceInfo(device, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(profiling_resolution), &profiling_resolution, NULL);
+    test_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILING_TIMER_RESOLUTION failed.\n");
+
+  log_info("CL_PROFILING_COMMAND_QUEUED: %llu CL_PROFILING_COMMAND_SUBMIT: %llu CL_PROFILING_COMMAND_START: %llu CL_PROFILING_COMMAND_END: %llu CL_DEVICE_PROFILING_TIMER_RESOLUTION: %ld\n",
+           queueStart, commandSubmit, commandStart, commandEnd, profiling_resolution);
+
+  double queueTosubmitTimeS = (double)(commandSubmit - queueStart)*1e-9;
+  double submitToStartTimeS = (double)(commandStart - commandSubmit)*1e-9;
+  double startToEndTimeS = (double)(commandEnd - commandStart)*1e-9;
+
+    log_info( "Profiling info:\n" );
+    log_info( "Time from queue to submit : %fms\n", (double)(queueTosubmitTimeS) * 1000.f );
+    log_info( "Time from submit to start : %fms\n", (double)(submitToStartTimeS) * 1000.f );
+    log_info( "Time from start to end: %fms\n", (double)(startToEndTimeS) * 1000.f );
+
+  if(queueStart > commandSubmit) {
+    log_error("CL_PROFILING_COMMAND_QUEUED > CL_PROFILING_COMMAND_SUBMIT.\n");
+    err = -1;
+  }
+
+  if (commandSubmit > commandStart) {
+    log_error("CL_PROFILING_COMMAND_SUBMIT > CL_PROFILING_COMMAND_START.\n");
+    err = -1;
+  }
+
+  if (commandStart > commandEnd) {
+    log_error("CL_PROFILING_COMMAND_START > CL_PROFILING_COMMAND_END.\n");
+    err = -1;
+  }
+
+  if (queueStart == 0 && commandStart == 0 && commandEnd == 0) {
+    log_error("All values are 0. This is exceedingly unlikely.\n");
+    err = -1;
+  }
+
+  if (queueTosubmitTimeS > QUEUE_SECONDS_LIMIT) {
+    log_error("Time between queue and submit is too big: %fs, test limit: %fs.\n",
+              queueTosubmitTimeS , (double)QUEUE_SECONDS_LIMIT);
+    err = -1;
+  }
+
+   if (submitToStartTimeS > SUBMIT_SECONDS_LIMIT) {
+    log_error("Time between submit and start is too big: %fs, test limit: %fs.\n",
+              submitToStartTimeS , (double)QUEUE_SECONDS_LIMIT);
+    err = -1;
+  }
+
+  if (startToEndTimeS > COMMAND_SECONDS_LIMIT) {
+    log_error("Time between queue and start is too big: %fs, test limit: %fs.\n",
+             startToEndTimeS , (double)QUEUE_SECONDS_LIMIT);
+    err = -1;
+  }
+  return err;
+}
+
+
+int main( int argc, const char *argv[] )
+{
+    return runTestHarness( argc, argv, num_streamfns, basefn_list, basefn_names,
+                           false, false, CL_QUEUE_PROFILING_ENABLE );
+}
+
+
diff --git a/test_conformance/profiling/procs.h b/test_conformance/profiling/procs.h
new file mode 100644
index 00000000..57a7860d
--- /dev/null
+++ b/test_conformance/profiling/procs.h
@@ -0,0 +1,65 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __PROCS_H__
+#define __PROCS_H__
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/imageHelpers.h"
+#include "../../test_common/harness/mt19937.h"
+
+
+extern int check_times(cl_ulong queueStart, cl_ulong submitStart, cl_ulong commandStart, cl_ulong commandEnd, cl_device_id device);
+
+extern int        read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        copy_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        copy_partial_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        copy_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        copy_array_to_image( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+extern int        test_parallel_kernels( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements );
+
+
+#endif    // #ifndef __PROCS_H__
+
+
diff --git a/test_conformance/profiling/readArray.c b/test_conformance/profiling/readArray.c
new file mode 100644
index 00000000..30c687ff
--- /dev/null
+++ b/test_conformance/profiling/readArray.c
@@ -0,0 +1,997 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#define TEST_PRIME_INT        ((1<<16)+1)
+#define TEST_PRIME_UINT        ((1U<<16)+1U)
+#define TEST_PRIME_LONG        ((1LL<<32)+1LL)
+#define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
+#define TEST_PRIME_SHORT    ((1S<<8)+1S)
+#define TEST_PRIME_FLOAT    (float)3.40282346638528860e+38
+#define TEST_PRIME_HALF        119.f
+#define TEST_BOOL            true
+#define TEST_PRIME_CHAR        0x77
+
+
+#ifndef ulong
+typedef unsigned long ulong;
+#endif
+
+#ifndef uchar
+typedef unsigned char uchar;
+#endif
+
+#ifndef TestStruct
+typedef struct{
+    int        a;
+    float    b;
+} TestStruct;
+#endif
+
+
+
+//--- the code for the kernel executables
+static const char *stream_read_int_kernel_code[] = {
+"__kernel void test_stream_read_int(__global int *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1<<16)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_int2(__global int2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1<<16)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_int4(__global int4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1<<16)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_int8(__global int8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1<<16)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_int16(__global int16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1<<16)+1);\n"
+"}\n" };
+
+static const char *int_kernel_name[] = { "test_stream_read_int", "test_stream_read_int2", "test_stream_read_int4", "test_stream_read_int8", "test_stream_read_int16" };
+
+const char *stream_read_uint_kernel_code[] = {
+"__kernel void test_stream_read_uint(__global uint *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1U<<16)+1U);\n"
+"}\n",
+
+"__kernel void test_stream_read_uint2(__global uint2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1U<<16)+1U);\n"
+"}\n",
+
+"__kernel void test_stream_read_uint4(__global uint4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1U<<16)+1U);\n"
+"}\n",
+
+"__kernel void test_stream_read_uint8(__global uint8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1U<<16)+1U);\n"
+"}\n",
+
+"__kernel void test_stream_read_uint16(__global uint16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1U<<16)+1U);\n"
+"}\n" };
+
+const char *uint_kernel_name[] = { "test_stream_read_uint", "test_stream_read_uint2", "test_stream_read_uint4", "test_stream_read_uint8", "test_stream_read_uint16" };
+
+const char *stream_read_long_kernel_code[] = {
+"__kernel void test_stream_read_long(__global long *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1L<<32)+1L);\n"
+"}\n",
+
+"__kernel void test_stream_read_long2(__global long2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1L<<32)+1L);\n"
+"}\n",
+
+"__kernel void test_stream_read_long4(__global long4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1L<<32)+1L);\n"
+"}\n",
+
+"__kernel void test_stream_read_long8(__global long8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1L<<32)+1L);\n"
+"}\n",
+
+"__kernel void test_stream_read_long16(__global long16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1L<<32)+1L);\n"
+"}\n" };
+
+const char *long_kernel_name[] = { "test_stream_read_long", "test_stream_read_long2", "test_stream_read_long4", "test_stream_read_long8", "test_stream_read_long16" };
+
+const char *stream_read_ulong_kernel_code[] = {
+"__kernel void test_stream_read_ulong(__global ulong *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1UL<<32)+1UL);\n"
+"}\n",
+
+"__kernel void test_stream_read_ulong2(__global ulong2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1UL<<32)+1UL);\n"
+"}\n",
+
+"__kernel void test_stream_read_ulong4(__global ulong4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1UL<<32)+1UL);\n"
+"}\n",
+
+"__kernel void test_stream_read_ulong8(__global ulong8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1UL<<32)+1UL);\n"
+"}\n",
+
+"__kernel void test_stream_read_ulong16(__global ulong16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = ((1UL<<32)+1UL);\n"
+"}\n" };
+
+const char *ulong_kernel_name[] = { "test_stream_read_ulong", "test_stream_read_ulong2", "test_stream_read_ulong4", "test_stream_read_ulong8", "test_stream_read_ulong16" };
+
+const char *stream_read_short_kernel_code[] = {
+"__kernel void test_stream_read_short(__global short *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (short)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_short2(__global short2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (short)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_short4(__global short4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (short)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_short8(__global short8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (short)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_short16(__global short16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (short)((1<<8)+1);\n"
+"}\n" };
+
+const char *short_kernel_name[] = { "test_stream_read_short", "test_stream_read_short2", "test_stream_read_short4", "test_stream_read_short8", "test_stream_read_short16" };
+
+
+const char *stream_read_ushort_kernel_code[] = {
+"__kernel void test_stream_read_ushort(__global ushort *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (ushort)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_ushort2(__global ushort2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (ushort)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_ushort4(__global ushort4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (ushort)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_ushort8(__global ushort8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (ushort)((1<<8)+1);\n"
+"}\n",
+
+"__kernel void test_stream_read_ushort16(__global ushort16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (ushort)((1<<8)+1);\n"
+"}\n" };
+
+static const char *ushort_kernel_name[] = { "test_stream_read_ushort", "test_stream_read_ushort2", "test_stream_read_ushort4", "test_stream_read_ushort8", "test_stream_read_ushort16" };
+
+
+const char *stream_read_float_kernel_code[] = {
+"__kernel void test_stream_read_float(__global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)3.40282346638528860e+38;\n"
+"}\n",
+
+"__kernel void test_stream_read_float2(__global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)3.40282346638528860e+38;\n"
+"}\n",
+
+"__kernel void test_stream_read_float4(__global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)3.40282346638528860e+38;\n"
+"}\n",
+
+"__kernel void test_stream_read_float8(__global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)3.40282346638528860e+38;\n"
+"}\n",
+
+"__kernel void test_stream_read_float16(__global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (float)3.40282346638528860e+38;\n"
+"}\n" };
+
+const char *float_kernel_name[] = { "test_stream_read_float", "test_stream_read_float2", "test_stream_read_float4", "test_stream_read_float8", "test_stream_read_float16" };
+
+
+const char *stream_read_half_kernel_code[] = {
+"__kernel void test_stream_read_half(__global half *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (half)119;\n"
+"}\n",
+
+"__kernel void test_stream_read_half2(__global half2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (half)119;\n"
+"}\n",
+
+"__kernel void test_stream_read_half4(__global half4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (half)119;\n"
+"}\n",
+
+"__kernel void test_stream_read_half8(__global half8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (half)119;\n"
+"}\n",
+
+"__kernel void test_stream_read_half16(__global half16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (half)119;\n"
+"}\n" };
+
+const char *half_kernel_name[] = { "test_stream_read_half", "test_stream_read_half2", "test_stream_read_half4", "test_stream_read_half8", "test_stream_read_half16" };
+
+
+const char *stream_read_char_kernel_code[] = {
+"__kernel void test_stream_read_char(__global char *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (char)'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_char2(__global char2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (char)'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_char4(__global char4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (char)'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_char8(__global char8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (char)'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_char16(__global char16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (char)'w';\n"
+"}\n" };
+
+const char *char_kernel_name[] = { "test_stream_read_char", "test_stream_read_char2", "test_stream_read_char4", "test_stream_read_char8", "test_stream_read_char16" };
+
+
+const char *stream_read_uchar_kernel_code[] = {
+"__kernel void test_stream_read_uchar(__global uchar *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = 'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_uchar2(__global uchar2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (uchar)'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_uchar4(__global uchar4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (uchar)'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_uchar8(__global uchar8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (uchar)'w';\n"
+"}\n",
+
+"__kernel void test_stream_read_uchar16(__global uchar16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = (uchar)'w';\n"
+"}\n" };
+
+const char *uchar_kernel_name[] = { "test_stream_read_uchar", "test_stream_read_uchar2", "test_stream_read_uchar4", "test_stream_read_uchar8", "test_stream_read_uchar16" };
+
+
+const char *stream_read_struct_kernel_code[] = {
+"typedef struct{\n"
+"int    a;\n"
+"float    b;\n"
+"} TestStruct;\n"
+"__kernel void test_stream_read_struct(__global TestStruct *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid].a = ((1<<16)+1);\n"
+"     dst[tid].b = (float)3.40282346638528860e+38;\n"
+"}\n" };
+
+const char *struct_kernel_name[] = { "test_stream_read_struct" };
+
+
+
+//--- the verify functions
+static int verify_read_int(void *ptr, int n)
+{
+    int        i;
+    int        *outptr = (int *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != TEST_PRIME_INT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_uint(void *ptr, int n)
+{
+    int        i;
+    cl_uint    *outptr = (cl_uint *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != TEST_PRIME_UINT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_long(void *ptr, int n)
+{
+    int        i;
+    cl_long    *outptr = (cl_long *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != TEST_PRIME_LONG )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_ulong(void *ptr, int n)
+{
+    int        i;
+    cl_ulong    *outptr = (cl_ulong *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != TEST_PRIME_ULONG )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_short(void *ptr, int n)
+{
+    int        i;
+    short    *outptr = (short *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != (short)((1<<8)+1) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_ushort(void *ptr, int n)
+{
+    int        i;
+    cl_ushort    *outptr = (cl_ushort *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != (cl_ushort)((1<<8)+1) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_float( void *ptr, int n )
+{
+    int        i;
+    float    *outptr = (float *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != TEST_PRIME_FLOAT )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_half( void *ptr, int n )
+{
+    int        i;
+    float    *outptr = (float *)ptr;
+
+    for( i = 0; i < n / 2; i++ ){
+        if( outptr[i] != TEST_PRIME_HALF )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_char(void *ptr, int n)
+{
+    int        i;
+    char    *outptr = (char *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != TEST_PRIME_CHAR )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_uchar( void *ptr, int n )
+{
+    int        i;
+    uchar    *outptr = (uchar *)ptr;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != TEST_PRIME_CHAR )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_read_struct( void *ptr, int n )
+{
+    int         i;
+    TestStruct    *outptr = (TestStruct *)ptr;
+
+    for ( i = 0; i < n; i++ ){
+        if( ( outptr[i].a != TEST_PRIME_INT ) ||
+           ( outptr[i].b != TEST_PRIME_FLOAT ) )
+            return -1;
+    }
+
+    return 0;
+}
+
+//----- the test functions
+int test_stream_read( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
+                     const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
+{
+    cl_mem            streams[5];
+    void            *outptr[5];
+    cl_program        program[5];
+    cl_kernel        kernel[5];
+    cl_event        readEvent;
+    cl_ulong    queueStart, submitStart, readStart, readEnd;
+    size_t            threads[1];
+#ifdef USE_LOCAL_THREADS
+    size_t            localThreads[1];
+#endif
+    int                err, err_count = 0;
+    int                i;
+    size_t            ptrSizes[5];
+
+    threads[0] = (size_t)num_elements;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        log_error( "Unable to get thread group max size: %d", err );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+#endif
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+    for( i = 0; i < loops; i++ ){
+        outptr[i] = malloc( ptrSizes[i] * num_elements );
+        if( ! outptr[i] ){
+            log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
+            return -1;
+        }
+        streams[i] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
+        if( !streams[i] ){
+            log_error( " clCreateBuffer failed\n" );
+            free( outptr[i] );
+            return -1;
+        }
+        err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+        if( err ){
+            log_error( " Error creating program for %s\n", type );
+            clReleaseMemObject(streams[i]);
+            free( outptr[i] );
+            return -1;
+        }
+
+        err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[i] );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clSetKernelArg failed" );
+            clReleaseProgram( program[i] );
+            clReleaseKernel( kernel[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+
+#ifdef USE_LOCAL_THREADS
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+#else
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+#endif
+        if( err != CL_SUCCESS ){
+            print_error( err, "clEnqueueNDRangeKernel failed" );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &readEvent );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clEnqueueReadBuffer failed" );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+        err = clWaitForEvents( 1, &readEvent );
+        if( err != CL_SUCCESS )
+        {
+            print_error( err, "Unable to wait for event completion" );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+        err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[i] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        if (fn(outptr[i], num_elements*(1<<i))){
+            log_error( " %s%d data failed to verify\n", type, 1<<i );
+            err_count++;
+        }
+        else{
+            log_info( " %s%d data verified\n", type, 1<<i );
+        }
+
+    if (check_times(queueStart, submitStart, readStart, readEnd, device))
+      err_count++;
+
+        // cleanup
+        clReleaseEvent(readEvent);
+        clReleaseKernel( kernel[i] );
+        clReleaseProgram( program[i] );
+        clReleaseMemObject( streams[i] );
+        free( outptr[i] );
+    }
+
+    return err_count;
+
+}    // end test_stream_read()
+
+
+int read_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_int;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_int ), "int", 5,
+                             stream_read_int_kernel_code, int_kernel_name, foo );
+}
+
+
+int read_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_uint;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5,
+                             stream_read_uint_kernel_code, uint_kernel_name, foo );
+}
+
+
+int read_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_long;
+
+    if (!gHasLong)
+    {
+        log_info("read_long_array: Long types unsupported, skipping.");
+        return CL_SUCCESS;
+    }
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_long ), "long", 5,
+                             stream_read_long_kernel_code, long_kernel_name, foo );
+}
+
+
+int read_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_ulong;
+
+    if (!gHasLong)
+    {
+        log_info("read_long_array: Long types unsupported, skipping.");
+        return CL_SUCCESS;
+    }
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong", 5,
+                             stream_read_ulong_kernel_code, ulong_kernel_name, foo );
+}
+
+
+int read_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_short;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_short ), "short", 5,
+                             stream_read_short_kernel_code, short_kernel_name, foo );
+}
+
+
+int read_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_ushort;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5,
+                             stream_read_ushort_kernel_code, ushort_kernel_name, foo );
+}
+
+
+int read_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_float;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_float ), "float", 5,
+                             stream_read_float_kernel_code, float_kernel_name, foo );
+}
+
+
+int read_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_half;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_half ), "half", 5,
+                             stream_read_half_kernel_code, half_kernel_name, foo );
+}
+
+
+int read_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_char;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_char ), "char", 5,
+                             stream_read_char_kernel_code, char_kernel_name, foo );
+}
+
+
+int read_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_uchar;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5,
+                             stream_read_uchar_kernel_code, uchar_kernel_name, foo );
+}
+
+
+int read_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    (*foo)(void *,int);
+    foo = verify_read_struct;
+
+    return test_stream_read( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1,
+                             stream_read_struct_kernel_code, struct_kernel_name, foo );
+}
+
+/*
+int read_struct_array(cl_device_group device, cl_device id, cl_context context, int num_elements)
+{
+    cl_mem            streams[1];
+    TestStruct        *output_ptr;
+    cl_program        program[1];
+    cl_kernel        kernel[1];
+    void            *values[1];
+    size_t            sizes[1] = { sizeof(cl_stream) };
+    size_t            threads[1];
+#ifdef USE_LOCAL_THREADS
+    size_t            localThreads[1];
+#endif
+    int                err;
+    size_t            objSize = sizeof(TestStruct);
+
+    threads[0] = (size_t)num_elements;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        log_error( "Unable to get thread group max size: %d", err );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+#endif
+
+    output_ptr = malloc(objSize * num_elements);
+    if( ! output_ptr ){
+        log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
+        return -1;
+    }
+    streams[0] = clCreateBuffer( device, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL );
+    if( !streams[0] ){
+        log_error( " clCreateBuffer failed\n" );
+        free( output_ptr );
+        return -1;
+    }
+
+    err = create_program_and_kernel( device, stream_read_struct_kernel_code, "test_stream_read_struct", &program[0], &kernel[0]);
+    if( err ){
+        clReleaseProgram( program[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&streams[0] );
+    if( err != CL_SUCCESS){
+        print_error( err, "clSetKernelArg failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, localThreads, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
+#endif
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS){
+        print_error( err, "clEnqueueReadBuffer failed" );
+        clReleaseProgram( program[0] );
+        clReleaseKernel( kernel[0] );
+        clReleaseMemObject( streams[0] );
+        free( output_ptr );
+        return -1;
+    }
+
+    if (verify_read_struct(output_ptr, num_elements)){
+        log_error(" struct test failed\n");
+        err = -1;
+    }
+    else{
+        log_info(" struct test passed\n");
+        err = 0;
+    }
+
+    // cleanup
+    clReleaseProgram( program[0] );
+    clReleaseKernel( kernel[0] );
+    clReleaseMemObject( streams[0] );
+    free( output_ptr );
+
+    return err;
+}
+*/
+
+
diff --git a/test_conformance/profiling/readImage.c b/test_conformance/profiling/readImage.c
new file mode 100644
index 00000000..2a4069d2
--- /dev/null
+++ b/test_conformance/profiling/readImage.c
@@ -0,0 +1,386 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+//--- the code for the kernel executables
+static const char *readKernelCode[] = {
+"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
+"     color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    int4    color;\n"
+"\n"
+"    indx *= 4;\n"
+"     color.x = (int)src[indx+0];\n"
+"     color.y = (int)src[indx+1];\n"
+"     color.z = (int)src[indx+2];\n"
+"     color.w = (int)src[indx+3];\n"
+"    write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    uint4    color;\n"
+"\n"
+"    indx *= 4;\n"
+"     color.x = (uint)src[indx+0];\n"
+"     color.y = (uint)src[indx+1];\n"
+"     color.z = (uint)src[indx+2];\n"
+"     color.w = (uint)src[indx+3];\n"
+"    write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n" };
+
+static const char *readKernelName[] = { "testWritef", "testWritei", "testWriteui" };
+
+
+//--- helper functions
+static cl_uchar *generateImage( int n, MTdata d )
+{
+    cl_uchar    *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
+    int        i;
+
+    for( i = 0; i < n; i++ ){
+        ptr[i] = (cl_uchar)genrand_int32( d );
+    }
+
+    return ptr;
+
+}
+
+
+static char *generateSignedImage( int n, MTdata d )
+{
+    char    *ptr = (char *)malloc( n * sizeof( char ) );
+    int        i;
+
+    for( i = 0; i < n; i++ ){
+        ptr[i] = (char)genrand_int32( d );
+    }
+
+    return ptr;
+
+}
+
+
+static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
+{
+    int     i;
+
+    for( i = 0; i < w * h * 4; i++ ){
+        if( outptr[i] != image[i] ){
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+//----- the test functions
+int read_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code, const char *name,
+                   cl_image_format image_format_desc )
+{
+    cl_mem            memobjs[2];
+    cl_program        program[1];
+    void            *inptr;
+    void            *dst = NULL;
+    cl_kernel        kernel[1];
+    cl_event        readEvent;
+    cl_ulong        queueStart, submitStart, readStart, readEnd;
+    size_t          threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t          localThreads[2];
+#endif
+    int                err;
+    int                w = 64, h = 64;
+    cl_mem_flags    flags;
+    size_t            element_nbytes;
+    size_t            num_bytes;
+    size_t            channel_nbytes = sizeof( cl_uchar );
+    MTdata          d;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
+    num_bytes = w * h * element_nbytes;
+
+    threads[0] = (size_t)w;
+    threads[1] = (size_t)h;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
+    d = init_genrand( gRandomSeed );
+    if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
+        inptr = (void *)generateSignedImage( w * h * 4, d );
+    else
+        inptr = (void *)generateImage( w * h * 4, d );
+    free_mtdata(d);         d = NULL;
+
+    if( ! inptr ){
+        log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    dst = malloc( num_bytes );
+    if( ! dst ){
+        free( (void *)inptr );
+        log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    // allocate the input and output image memory objects
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+    memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
+    if( memobjs[0] == (cl_mem)0 ){
+        free( dst );
+        free( (void *)inptr );
+        log_error("unable to create Image2D\n");
+        return -1;
+    }
+
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  channel_nbytes * 4 * w * h, NULL, &err );
+    if( memobjs[1] == (cl_mem)0 ){
+        free( dst );
+        free( (void *)inptr );
+        clReleaseMemObject(memobjs[0]);
+        log_error("unable to create array\n");
+        return -1;
+    }
+
+    err = clEnqueueWriteBuffer( queue, memobjs[1], true, 0, num_bytes, inptr, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
+    if( err ){
+        log_error( "Unable to create program and kernel\n" );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[1] );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[0] );
+    if( err != CL_SUCCESS ){
+        log_error( "clSetKernelArg failed\n" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+#endif
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3] = { w, h, 1 };
+    err = clEnqueueReadImage( queue, memobjs[0], false, origin, region, 0, 0, dst, 0, NULL, &readEvent );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clReadImage2D failed" );
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    // This synchronization point is needed in order to assume the data is valid.
+    // Getting profiling information is not a synchronization point.
+    err = clWaitForEvents( 1, &readEvent );
+    if( err != CL_SUCCESS )
+    {
+    clReleaseEvent(readEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(readEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    while( ( err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(readEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(readEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(readEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
+    if( err ){
+        log_error( "Image failed to verify.\n" );
+    }
+    else{
+        log_info( "Image verified.\n" );
+    }
+
+  clReleaseEvent(readEvent);
+    clReleaseKernel(kernel[0]);
+    clReleaseProgram(program[0]);
+    clReleaseMemObject(memobjs[0]);
+    clReleaseMemObject(memobjs[1]);
+    free(dst);
+    free(inptr);
+
+  if (check_times(queueStart, submitStart, readStart, readEnd, device))
+      err = -1;
+
+  return err;
+
+}    // end read_image()
+
+
+int read_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    cl_image_format    image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    // 0 to 255 for unsigned image data
+    return read_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc );
+
+}
+
+
+int read_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    cl_image_format    image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    // -128 to 127 for signed iamge data
+    return read_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc );
+
+}
+
+
+int read_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    cl_image_format    image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    // 0 to 255 for unsigned image data
+    return read_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc );
+
+}
+
+
diff --git a/test_conformance/profiling/writeArray.c b/test_conformance/profiling/writeArray.c
new file mode 100644
index 00000000..23e29b62
--- /dev/null
+++ b/test_conformance/profiling/writeArray.c
@@ -0,0 +1,1375 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/conversions.h"
+
+//#define USE_LOCAL_THREADS    1
+
+#ifndef uchar
+typedef unsigned char uchar;
+#endif
+
+#ifndef TestStruct
+typedef struct{
+    int        a;
+    float    b;
+} TestStruct;
+#endif
+
+const char *stream_write_int_kernel_code[] = {
+    "__kernel void test_stream_write_int(__global int *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_int2(__global int2 *src, __global int2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_int4(__global int4 *src, __global int4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_int8(__global int8 *src, __global int8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_int16(__global int16 *src, __global int16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *int_kernel_name[] = { "test_stream_write_int", "test_stream_write_int2", "test_stream_write_int4", "test_stream_write_int8", "test_stream_write_int16" };
+
+
+const char *stream_write_uint_kernel_code[] = {
+    "__kernel void test_stream_write_uint(__global uint *src, __global uint *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *uint_kernel_name[] = { "test_stream_write_uint", "test_stream_write_uint2", "test_stream_write_uint4", "test_stream_write_uint8", "test_stream_write_uint16" };
+
+
+const char *stream_write_ushort_kernel_code[] = {
+    "__kernel void test_stream_write_ushort(__global ushort *src, __global ushort *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *ushort_kernel_name[] = { "test_stream_write_ushort", "test_stream_write_ushort2", "test_stream_write_ushort4", "test_stream_write_ushort8", "test_stream_write_ushort16" };
+
+
+
+const char *stream_write_short_kernel_code[] = {
+    "__kernel void test_stream_write_short(__global short *src, __global short *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_short2(__global short2 *src, __global short2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_short4(__global short4 *src, __global short4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_short8(__global short8 *src, __global short8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_short16(__global short16 *src, __global short16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *short_kernel_name[] = { "test_stream_write_short", "test_stream_write_short2", "test_stream_write_short4", "test_stream_write_short8", "test_stream_write_short16" };
+
+
+const char *stream_write_char_kernel_code[] = {
+    "__kernel void test_stream_write_char(__global char *src, __global char *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_char2(__global char2 *src, __global char2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_char4(__global char4 *src, __global char4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_char8(__global char8 *src, __global char8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_char16(__global char16 *src, __global char16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *char_kernel_name[] = { "test_stream_write_char", "test_stream_write_char2", "test_stream_write_char4", "test_stream_write_char8", "test_stream_write_char16" };
+
+
+const char *stream_write_uchar_kernel_code[] = {
+    "__kernel void test_stream_write_uchar(__global uchar *src, __global uchar *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *uchar_kernel_name[] = { "test_stream_write_uchar", "test_stream_write_uchar2", "test_stream_write_uchar4", "test_stream_write_uchar8", "test_stream_write_uchar16" };
+
+
+const char *stream_write_float_kernel_code[] = {
+    "__kernel void test_stream_write_float(__global float *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_float2(__global float2 *src, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_float4(__global float4 *src, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_float8(__global float8 *src, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_float16(__global float16 *src, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *float_kernel_name[] = { "test_stream_write_float", "test_stream_write_float2", "test_stream_write_float4", "test_stream_write_float8", "test_stream_write_float16" };
+
+
+const char *stream_write_half_kernel_code[] = {
+    "__kernel void test_stream_write_half(__global half *src, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_stream_write_half2(__global half2 *src, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half2( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_stream_write_half4(__global half4 *src, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half4( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_stream_write_half8(__global half8 *src, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half8( tid * 2, src );\n"
+    "}\n",
+
+    "__kernel void test_stream_write_half16(__global half16 *src, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = vload_half16( tid * 2, src );\n"
+    "}\n" };
+
+static const char *half_kernel_name[] = { "test_stream_write_half", "test_stream_write_half2", "test_stream_write_half4", "test_stream_write_half8", "test_stream_write_half16" };
+
+
+const char *stream_write_long_kernel_code[] = {
+    "__kernel void test_stream_write_long(__global long *src, __global long *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_long2(__global long2 *src, __global long2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_long4(__global long4 *src, __global long4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_long8(__global long8 *src, __global long8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_long16(__global long16 *src, __global long16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *long_kernel_name[] = { "test_stream_write_long", "test_stream_write_long2", "test_stream_write_long4", "test_stream_write_long8", "test_stream_write_long16" };
+
+
+const char *stream_write_ulong_kernel_code[] = {
+    "__kernel void test_stream_write_ulong(__global ulong *src, __global ulong *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n",
+
+    "__kernel void test_stream_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src[tid];\n"
+    "}\n" };
+
+static const char *ulong_kernel_name[] = { "test_stream_write_ulong", "test_stream_write_ulong2", "test_stream_write_ulong4", "test_stream_write_ulong8", "test_stream_write_ulong16" };
+
+
+static const char *stream_write_struct_kernel_code[] = {
+    "typedef struct{\n"
+    "int    a;\n"
+    "float    b;\n"
+    "} TestStruct;\n"
+    "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid].a = src[tid].a;\n"
+    "     dst[tid].b = src[tid].b;\n"
+    "}\n" };
+
+static const char *struct_kernel_name[] = { "read_write_struct" };
+
+
+static int verify_write_int( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    int        *inptr = (int *)ptr1;
+    int        *outptr = (int *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_uint( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    cl_uint    *inptr = (cl_uint *)ptr1;
+    cl_uint    *outptr = (cl_uint *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_short( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    short    *inptr = (short *)ptr1;
+    short    *outptr = (short *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_ushort( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_char( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    char    *inptr = (char *)ptr1;
+    char    *outptr = (char *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_uchar( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    uchar    *inptr = (uchar *)ptr1;
+    uchar    *outptr = (uchar *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_float( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    float    *inptr = (float *)ptr1;
+    float    *outptr = (float *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_half( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    cl_ushort    *inptr = (cl_ushort *)ptr1;
+    cl_ushort    *outptr = (cl_ushort *)ptr2;
+
+    for( i = 0; i < n; i++ ){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_long( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    cl_long    *inptr = (cl_long *)ptr1;
+    cl_long    *outptr = (cl_long *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_ulong( void *ptr1, void *ptr2, int n )
+{
+    int        i;
+    cl_ulong    *inptr = (cl_ulong *)ptr1;
+    cl_ulong    *outptr = (cl_ulong *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( outptr[i] != inptr[i] )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+static int verify_write_struct( void *ptr1, void *ptr2, int n )
+{
+    int            i;
+    TestStruct    *inptr = (TestStruct *)ptr1;
+    TestStruct    *outptr = (TestStruct *)ptr2;
+
+    for (i=0; i<n; i++){
+        if( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
+            return -1;
+    }
+
+    return 0;
+}
+
+
+int test_stream_write( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
+                      void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
+{
+    cl_mem            streams[10];
+    void            *outptr[5];
+    cl_program        program[5];
+    cl_kernel        kernel[5];
+    cl_event        writeEvent;
+    cl_ulong    queueStart, submitStart, writeStart, writeEnd;
+    size_t            ptrSizes[5], outPtrSizes[5];
+    size_t            threads[1];
+#ifdef USE_LOCAL_THREADS
+    size_t            localThreads[1];
+#endif
+    int                err, err_count = 0;
+    int                i, ii;
+
+    threads[0] = (size_t)num_elements;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, " Unable to get thread group max size" );
+        return -1;
+    }
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+#endif
+
+    ptrSizes[0] = size;
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    loops = ( loops < 5 ? loops : 5 );
+
+    for( i = 0; i < loops; i++ )
+    {
+        outPtrSizes[i] = ptrSizes[i];
+    }
+
+    for( i = 0; i < loops; i++ ){
+        ii = i << 1;
+        streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
+        if( ! streams[ii] ){
+            free( outptr[i] );
+            log_error( " clCreateBuffer failed\n" );
+            return -1;
+        }
+        if( ! strcmp( type, "half" ) ){
+            outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 );
+            streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * 2 * num_elements, NULL, &err );
+        }
+        else{
+            outptr[i] = malloc( outPtrSizes[i] * num_elements );
+            streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * num_elements, NULL, &err );
+        }
+        if( ! streams[ii+1] ){
+            clReleaseMemObject(streams[ii]);
+            free( outptr[i] );
+            log_error( " clCreateBuffer failed\n" );
+            return -1;
+        }
+
+        err = clEnqueueWriteBuffer( queue, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &writeEvent );
+        if( err != CL_SUCCESS ){
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            print_error( err, " clWriteArray failed" );
+            return -1;
+        }
+
+        // This synchronization point is needed in order to assume the data is valid.
+        // Getting profiling information is not a synchronization point.
+        err = clWaitForEvents( 1, &writeEvent );
+        if( err != CL_SUCCESS )
+        {
+            print_error( err, "Unable to wait for event completion" );
+            clReleaseEvent(writeEvent);
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        // test profiling
+        while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
+              CL_PROFILING_INFO_NOT_AVAILABLE );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseEvent(writeEvent);
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
+              CL_PROFILING_INFO_NOT_AVAILABLE );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseEvent(writeEvent);
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseEvent(writeEvent);
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
+        if( err != CL_SUCCESS ){
+            print_error( err, "clGetEventProfilingInfo failed" );
+            clReleaseEvent(writeEvent);
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            return -1;
+        }
+
+
+        err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
+        if( err ){
+            clReleaseEvent(writeEvent);
+            clReleaseMemObject(streams[ii]);
+            clReleaseMemObject(streams[ii+1]);
+            free( outptr[i] );
+            log_error( " Error creating program for %s\n", type );
+            return -1;
+        }
+
+        err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
+        err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
+        if (err != CL_SUCCESS){
+            clReleaseEvent(writeEvent);
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            print_error( err, " clSetKernelArg failed" );
+            return -1;
+        }
+
+#ifdef USE_LOCAL_THREADS
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+#else
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+#endif
+        if( err != CL_SUCCESS ){
+            print_error( err, " clEnqueueNDRangeKernel failed" );
+            clReleaseEvent(writeEvent);
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            return -1;
+        }
+
+        if( ! strcmp( type, "half" ) ){
+            err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+        }
+        else{
+            err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+        }
+        if( err != CL_SUCCESS ){
+            clReleaseEvent(writeEvent);
+            clReleaseKernel( kernel[i] );
+            clReleaseProgram( program[i] );
+            clReleaseMemObject( streams[ii] );
+            clReleaseMemObject( streams[ii+1] );
+            free( outptr[i] );
+            print_error( err, " clEnqueueReadBuffer failed" );
+            return -1;
+        }
+
+        char *inP = (char *)inptr[i];
+        char *outP = (char *)outptr[i];
+        int err2 = 0;
+        for( size_t p = 0; p < (size_t)num_elements; p++ )
+        {
+            if( fn( inP, outP, (int)(ptrSizes[i] / ptrSizes[0]) ) )
+            {
+                log_error( " %s%d data failed to verify\n", type, 1<<i );
+                err2 = -1;
+                err_count++;
+            }
+            inP += ptrSizes[i];
+            outP += outPtrSizes[i];
+        }
+        if( !err2 )
+        {
+            log_info( " %s%d data verified\n", type, 1<<i );
+        }
+        err = err2;
+
+        if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+            err_count++;
+
+        // cleanup
+        clReleaseEvent(writeEvent);
+        clReleaseKernel( kernel[i] );
+        clReleaseProgram( program[i] );
+        clReleaseMemObject( streams[ii] );
+        clReleaseMemObject( streams[ii+1] );
+        free( outptr[i] );
+    }
+
+    return err_count;
+
+}    // end test_stream_write()
+
+
+
+/*
+ int test_stream_struct_write( cl_device_group device, cl_device id, cl_context context, int num_elements )
+ {
+ cl_mem            streams[10];
+ void            *outptr[5];
+ TestStruct        *inptr[5];
+ cl_program        program[5];
+ cl_kernel        kernel[5];
+ void            *values[2];
+ size_t            sizes[2] = { sizeof(cl_stream), sizeof(cl_stream) };
+ size_t            ptrSizes[5];
+ size_t            size = sizeof( TestStruct );
+ size_t            threads[1];
+ #ifdef USE_LOCAL_THREADS
+ size_t            localThreads[1];
+ #endif
+ int                err;
+ int                i, ii, j;
+ int                loops = 1;        // no vector for structs
+
+ threads[0] = (size_t)num_elements;
+
+ #ifdef USE_LOCAL_THREADS
+ err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
+ if( err != CL_SUCCESS ){
+ log_error( "Unable to get thread group max size: %d", err );
+ return -1;
+ }
+ if( localThreads[0] > threads[0] )
+ localThreads[0] = threads[0];
+ #endif
+
+ ptrSizes[0] = size;
+ ptrSizes[1] = ptrSizes[0] << 1;
+ ptrSizes[2] = ptrSizes[1] << 1;
+ ptrSizes[3] = ptrSizes[2] << 1;
+ ptrSizes[4] = ptrSizes[3] << 1;
+
+
+ loops = ( loops < 5 ? loops : 5 );
+ for( i = 0; i < loops; i++ ){
+
+ inptr[i] = (TestStruct *)malloc(ptrSizes[i] * num_elements);
+
+ for( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
+ inptr[i][j].a = (int)random_float( -2147483648.f, 2147483647.0f );
+ inptr[i][j].b = random_float( -FLT_MAX, FLT_MAX );
+ }
+
+ ii = i << 1;
+ streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
+ if( ! streams[ii] ){
+ free( outptr[i] );
+ log_error( " clCreateBuffer failed\n" );
+ return -1;
+ }
+ outptr[i] = malloc( ptrSizes[i] * num_elements );
+ streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
+ if( ! streams[ii+1] ){
+ clReleaseMemObject(streams[ii]);
+ free( outptr[i] );
+ log_error( " clCreateBuffer failed\n" );
+ return -1;
+ }
+
+ err = clWriteArray(context, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], NULL);
+ if( err != CL_SUCCESS ){
+ clReleaseMemObject(streams[ii]);
+ clReleaseMemObject(streams[ii+1]);
+ free( outptr[i] );
+ print_error( err, " clWriteArray failed" );
+ return -1;
+ }
+
+ err = create_program_and_kernel( device, struct_kernel_code, "read_write_struct", &program[i], &kernel[i] );
+ if( err ){
+ clReleaseMemObject(streams[ii]);
+ clReleaseMemObject(streams[ii+1]);
+ free( outptr[i] );
+ log_error( " Error creating program for struct\n" );
+ return -1;
+ }
+
+ err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
+ err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
+ if (err != CL_SUCCESS){
+ clReleaseProgram( program[i] );
+ clReleaseKernel( kernel[i] );
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ free( outptr[i] );
+ print_error( err, " clSetKernelArg failed" );
+ return -1;
+ }
+
+ #ifdef USE_LOCAL_THREADS
+ err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
+ #else
+ err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+ #endif
+ if( err != CL_SUCCESS ){
+ print_error( err, " clEnqueueNDRangeKernel failed" );
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ return -1;
+ }
+
+ err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
+ if( err != CL_SUCCESS ){
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ print_error( err, " clEnqueueReadBuffer failed" );
+ return -1;
+ }
+
+ if( verify_write_struct( inptr[i], outptr[i], ptrSizes[i] * num_elements / ptrSizes[0] ) ){
+ log_error( " STREAM_WRITE struct%d test failed\n", 1<<i );
+ err = -1;
+ }
+ else{
+ log_info( " STREAM_WRITE struct%d test passed\n", 1<<i );
+ err = 0;
+ }
+ // cleanup
+ clReleaseMemObject( streams[ii] );
+ clReleaseMemObject( streams[ii+1] );
+ clReleaseKernel( kernel[i] );
+ clReleaseProgram( program[i] );
+ free( outptr[i] );
+ free( (void *)inptr[i] );
+ }
+
+ return err;
+
+ }    // end test_stream_struct_write()
+ */
+
+int write_int_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    int    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_int;
+
+    ptrSizes[0] = sizeof(cl_int);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (int *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = genrand_int32(d);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_int ), "int", 5, (void**)inptr,
+                            stream_write_int_kernel_code, int_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+
+    return err;
+
+}    // end write_int_array()
+
+
+int write_uint_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_uint    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_uint;
+
+    ptrSizes[0] = sizeof(cl_uint);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_uint *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = genrand_int32(d);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5, (void **)inptr,
+                            stream_write_uint_kernel_code, uint_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_uint_array()
+
+
+int write_short_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    short    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_short;
+
+    ptrSizes[0] = sizeof(cl_short);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (short *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (short)genrand_int32(d);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_short ), "short", 5, (void **)inptr,
+                            stream_write_short_kernel_code, short_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_short_array()
+
+
+int write_ushort_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ushort    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_ushort;
+
+    ptrSizes[0] = sizeof(cl_ushort);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ushort *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ushort)genrand_int32(d);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5, (void **)inptr,
+                            stream_write_ushort_kernel_code, ushort_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_ushort_array()
+
+
+int write_char_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    char    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_char;
+
+    ptrSizes[0] = sizeof(cl_char);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (char *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (char)genrand_int32(d);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_char ), "char", 5, (void **)inptr,
+                            stream_write_char_kernel_code, char_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_char_array()
+
+
+int write_uchar_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    uchar    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_uchar;
+
+    ptrSizes[0] = sizeof(cl_uchar);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (uchar *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (uchar)genrand_int32(d);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5, (void **)inptr,
+                            stream_write_uchar_kernel_code, uchar_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_uchar_array()
+
+
+int write_float_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    float    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_float;
+
+    ptrSizes[0] = sizeof(cl_float);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_float ), "float", 5, (void **)inptr,
+                            stream_write_float_kernel_code, float_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_float_array()
+
+
+int write_half_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    float    *inptr[5];
+    size_t    ptrSizes[5];
+    int        i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_half;
+
+    ptrSizes[0] = sizeof( cl_half );
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
+            inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_half ), "half", 5, (void **)inptr,
+                            stream_write_half_kernel_code, half_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_half_array()
+
+
+int write_long_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_long    *inptr[5];
+    size_t        ptrSizes[5];
+    int            i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_long;
+
+    if (!gHasLong)
+    {
+        log_info("write_long_array: Long types unsupported, skipping.");
+        return CL_SUCCESS;
+    }
+
+    ptrSizes[0] = sizeof(cl_long);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_long *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_long ), "cl_long", 5, (void **)inptr,
+                            stream_write_long_kernel_code, long_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_long_array()
+
+
+int write_ulong_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    cl_ulong    *inptr[5];
+    size_t                ptrSizes[5];
+    int                    i, j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_ulong;
+
+    if (!gHasLong)
+    {
+        log_info("write_long_array: Long types unsupported, skipping.");
+        return CL_SUCCESS;
+    }
+
+    ptrSizes[0] = sizeof(cl_ulong);
+    ptrSizes[1] = ptrSizes[0] << 1;
+    ptrSizes[2] = ptrSizes[1] << 1;
+    ptrSizes[3] = ptrSizes[2] << 1;
+    ptrSizes[4] = ptrSizes[3] << 1;
+
+    for( i = 0; i < 5; i++ ){
+        inptr[i] = (cl_ulong *)malloc(ptrSizes[i] * num_elements);
+
+        for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
+            inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong long", 5, (void **)inptr,
+                            stream_write_ulong_kernel_code, ulong_kernel_name, foo, d );
+
+    for( i = 0; i < 5; i++ ){
+        free( (void *)inptr[i] );
+    }
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_ulong_array()
+
+
+int write_struct_array( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+{
+    TestStruct            *inptr[1];
+    size_t                ptrSizes[1];
+    int                    j, err;
+    int    (*foo)(void *,void *,int);
+    MTdata d = init_genrand( gRandomSeed );
+    foo = verify_write_struct;
+
+    ptrSizes[0] = sizeof( TestStruct );
+
+    inptr[0] = (TestStruct *)malloc( ptrSizes[0] * num_elements );
+
+    for( j = 0; (unsigned int)j < ptrSizes[0] * num_elements / ptrSizes[0]; j++ ){
+        inptr[0][j].a = (int)genrand_int32(d);
+        inptr[0][j].b = get_random_float( 0.f, 1.844674407370954e+19f, d );
+    }
+
+    err = test_stream_write( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1, (void **)inptr,
+                            stream_write_struct_kernel_code, struct_kernel_name, foo, d );
+
+    free( (void *)inptr[0] );
+
+    free_mtdata(d);
+    return err;
+
+}    // end write_struct_array()
+
+
+
+
+
diff --git a/test_conformance/profiling/writeImage.c b/test_conformance/profiling/writeImage.c
new file mode 100644
index 00000000..e139eec1
--- /dev/null
+++ b/test_conformance/profiling/writeImage.c
@@ -0,0 +1,683 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+//--- the code for the kernel executables
+static const char *readKernelCode[] = {
+"__kernel void testReadf(read_only image2d_t srcimg, __global float4 *dst)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"     dst[indx].x = color.x;\n"
+"     dst[indx].y = color.y;\n"
+"     dst[indx].z = color.z;\n"
+"     dst[indx].w = color.w;\n"
+"\n"
+"}\n",
+
+"__kernel void testReadi(read_only image2d_t srcimg, __global uchar4 *dst)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    int4    color;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    color = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"  uchar4 dst_write;\n"
+"     dst_write.x = (uchar)color.x;\n"
+"     dst_write.y = (uchar)color.y;\n"
+"     dst_write.z = (uchar)color.z;\n"
+"     dst_write.w = (uchar)color.w;\n"
+"  dst[indx] = dst_write;\n"
+"\n"
+"}\n",
+
+"__kernel void testReadui(read_only image2d_t srcimg, __global uchar4 *dst)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
+"    uint4    color;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"  uchar4 dst_write;\n"
+"     dst_write.x = (uchar)color.x;\n"
+"     dst_write.y = (uchar)color.y;\n"
+"     dst_write.z = (uchar)color.z;\n"
+"     dst_write.w = (uchar)color.w;\n"
+"  dst[indx] = dst_write;\n"
+"\n"
+"}\n",
+
+"__kernel void testWritef(__global uchar *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    float4 color;\n"
+"\n"
+"    indx *= 4;\n"
+"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
+"     color /= (float4)(255.f, 255.f, 255.f, 255.f);\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testWritei(__global char *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    int4    color;\n"
+"\n"
+"    indx *= 4;\n"
+"     color.x = (int)src[indx+0];\n"
+"     color.y = (int)src[indx+1];\n"
+"     color.z = (int)src[indx+2];\n"
+"     color.w = (int)src[indx+3];\n"
+"    write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testWriteui(__global uchar *src, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int    indx = tid_y * get_image_width(dstimg) + tid_x;\n"
+"    uint4    color;\n"
+"\n"
+"    indx *= 4;\n"
+"     color.x = (uint)src[indx+0];\n"
+"     color.y = (uint)src[indx+1];\n"
+"     color.z = (uint)src[indx+2];\n"
+"     color.w = (uint)src[indx+3];\n"
+"    write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWriteff(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 color;\n"
+"\n"
+"    color = read_imagef(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWriteii(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int4    color;\n"
+"\n"
+"    color = read_imagei(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
+"    write_imagei(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWriteuiui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    uint4    color;\n"
+"\n"
+"    color = read_imageui(srcimg, CLK_DEFAULT_SAMPLER, (int2)(tid_x, tid_y));\n"
+"    write_imageui(dstimg, (int2)(tid_x, tid_y), color);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWritefi(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4 colorf;\n"
+"     int4    colori;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    colorf = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+// since we are going from unsigned to signed, be sure to convert
+// values greater 0.5 to negative values
+"     if( colorf.x >= 0.5f )\n"
+"         colori.x = (int)( ( colorf.x - 1.f ) * 255.f );\n"
+"     else\n"
+"         colori.x = (int)( colorf.x * 255.f );\n"
+"     if( colorf.y >= 0.5f )\n"
+"         colori.y = (int)( ( colorf.y - 1.f ) * 255.f );\n"
+"     else\n"
+"         colori.y = (int)( colorf.y * 255.f );\n"
+"     if( colorf.z >= 0.5f )\n"
+"         colori.z = (int)( ( colorf.z - 1.f ) * 255.f );\n"
+"     else\n"
+"         colori.z = (int)( colorf.z * 255.f );\n"
+"     if( colorf.w >= 0.5f )\n"
+"         colori.w = (int)( ( colorf.w - 1.f ) * 255.f );\n"
+"     else\n"
+"         colori.w = (int)( colorf.w * 255.f );\n"
+"    write_imagei(dstimg, (int2)(tid_x, tid_y), colori);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWritefui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    float4    colorf;\n"
+"     uint4    colorui;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    colorf = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"     colorui.x = (uint)( colorf.x * 255.f );\n"
+"     colorui.y = (uint)( colorf.y * 255.f );\n"
+"     colorui.z = (uint)( colorf.z * 255.f );\n"
+"     colorui.w = (uint)( colorf.w * 255.f );\n"
+"    write_imageui(dstimg, (int2)(tid_x, tid_y), colorui);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWriteif(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int4    colori;\n"
+"    float4    colorf;\n"
+"\n"
+// since we are going from signed to unsigned, we need to adjust the rgba values from
+// from the signed image to add 256 to the signed image values less than 0.
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    colori = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"     if( colori.x < 0 )\n"
+"        colorf.x = ( (float)colori.x + 256.f ) / 255.f;\n"
+"     else\n"
+"        colorf.x = (float)colori.x / 255.f;\n"
+"     if( colori.y < 0 )\n"
+"        colorf.y = ( (float)colori.y + 256.f ) / 255.f;\n"
+"     else\n"
+"        colorf.y = (float)colori.y / 255.f;\n"
+"     if( colori.z < 0 )\n"
+"        colorf.z = ( (float)colori.z + 256.f ) / 255.f;\n"
+"     else\n"
+"        colorf.z = (float)colori.z / 255.f;\n"
+"     if( colori.w < 0 )\n"
+"        colorf.w = ( (float)colori.w + 256.f ) / 255.f;\n"
+"     else\n"
+"        colorf.w = (float)colori.w / 255.f;\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), colorf);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWriteiui(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    int4    colori;\n"
+"    uint4    colorui;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    colori = read_imagei(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+// since we are going from signed to unsigned, we need to adjust the rgba values from
+// from the signed image to add 256 to the signed image values less than 0.
+"     if( colori.x < 0 )\n"
+"        colorui.x = (uint)( colori.x + 256 );\n"
+"     else\n"
+"        colorui.x = (uint)colori.x;\n"
+"     if( colori.y < 0 )\n"
+"        colorui.y = (uint)( colori.y + 256 );\n"
+"     else\n"
+"        colorui.y = (uint)colori.y;\n"
+"     if( colori.z < 0 )\n"
+"        colorui.z = (uint)( colori.z + 256 );\n"
+"     else\n"
+"        colorui.z = (uint)colori.z;\n"
+"     if( colori.w < 0 )\n"
+"        colorui.w = (uint)( colori.w + 256 );\n"
+"     else\n"
+"        colorui.w = (uint)colori.w;\n"
+"    write_imageui(dstimg, (int2)(tid_x, tid_y), colorui);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWriteuif(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    uint4    colorui;\n"
+"    float4    colorf;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    colorui = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+"     colorf.x = (float)colorui.x / 255.f;\n"
+"     colorf.y = (float)colorui.y / 255.f;\n"
+"     colorf.z = (float)colorui.z / 255.f;\n"
+"     colorf.w = (float)colorui.w / 255.f;\n"
+"    write_imagef(dstimg, (int2)(tid_x, tid_y), colorf);\n"
+"\n"
+"}\n",
+
+"__kernel void testReadWriteuii(read_only image2d_t srcimg, write_only image2d_t dstimg)\n"
+"{\n"
+"    int    tid_x = get_global_id(0);\n"
+"    int    tid_y = get_global_id(1);\n"
+"    uint4    colorui;\n"
+"    int4    colori;\n"
+"\n"
+"    const sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n"
+"    colorui = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
+// since we are going from unsigned to signed, be sure to convert
+// values greater 0.5 to negative values
+"     if( colorui.x >= 128U )\n"
+"         colori.x = (int)colorui.x - 256;\n"
+"     else\n"
+"         colori.x = (int)colorui.x;\n"
+"     if( colorui.y >= 128U )\n"
+"         colori.y = (int)colorui.y - 256;\n"
+"     else\n"
+"         colori.y = (int)colorui.y;\n"
+"     if( colorui.z >= 128U )\n"
+"         colori.z = (int)colorui.z - 256;\n"
+"     else\n"
+"         colori.z = (int)colorui.z;\n"
+"     if( colorui.w >= 128U )\n"
+"         colori.w = (int)colorui.w - 256;\n"
+"     else\n"
+"         colori.w = (int)colorui.w;\n"
+"    write_imagei(dstimg, (int2)(tid_x, tid_y), colori);\n"
+"\n"
+"}\n" };
+
+static const char *readKernelName[] = { "testReadf", "testReadi", "testReadui", "testWritef", "testWritei", "testWriteui",
+"testReadWriteff", "testReadWriteii", "testReadWriteuiui", "testReadWritefi",
+"testReadWritefui", "testReadWriteif", "testReadWriteiui", "testReadWriteuif",
+"testReadWriteuii" };
+
+
+static cl_uchar *generateImage( int n, MTdata d )
+{
+    cl_uchar    *ptr = (cl_uchar *)malloc( n * sizeof( cl_uchar ) );
+    int        i;
+
+    for( i = 0; i < n; i++ ){
+        ptr[i] = (cl_uchar)genrand_int32(d);
+    }
+
+    return ptr;
+
+}
+
+
+static char *generateSignedImage( int n, MTdata d )
+{
+    char    *ptr = (char *)malloc( n * sizeof( char ) );
+    int        i;
+
+    for( i = 0; i < n; i++ ){
+        ptr[i] = (char)genrand_int32(d);
+    }
+
+    return ptr;
+
+}
+
+
+static int verifyImage( cl_uchar *image, cl_uchar *outptr, int w, int h )
+{
+    int     i;
+
+    for( i = 0; i < w * h * 4; i++ ){
+        if( outptr[i] != image[i] ){
+            log_error("Image verification failed at offset %d. Actual value=%d, expected value=%d\n", i, outptr[i], image[i]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int verifyImageFloat ( cl_double *refptr, cl_float *outptr, int w, int h )
+{
+    int     i;
+
+    for (i=0; i<w*h*4; i++)
+    {
+        if (outptr[i] != (float)refptr[i])
+        {
+            float ulps = Ulp_Error( outptr[i], refptr[i]);
+
+            if(! (fabsf(ulps) < 1.5f) )
+            {
+                log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
+                    (int)i, refptr[i], outptr[ i ],  ulps );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static double *prepareReference( cl_uchar *inptr, int w, int h)
+{
+    int        i;
+    double    *refptr = (double *)malloc( w * h * 4*sizeof( double ) );
+    if ( !refptr )
+    {
+        log_error( "Unable to allocate refptr at %d x %d\n", (int)w, (int)h );
+        return 0;
+    }
+    for( i = 0; i < w * h * 4; i++ ) {
+        refptr[i] = ((double)inptr[i])/255;
+    }
+    return refptr;
+}
+
+//----- the test functions
+int write_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements, const char *code,
+                 const char *name, cl_image_format image_format_desc, int readFloat )
+{
+    cl_mem            memobjs[2];
+    cl_program        program[1];
+    void            *inptr;
+    double            *refptr = NULL;
+    void            *dst = NULL;
+    cl_kernel        kernel[1];
+    cl_event        writeEvent;
+    cl_ulong    queueStart, submitStart, writeStart, writeEnd;
+    size_t    threads[2];
+#ifdef USE_LOCAL_THREADS
+    size_t    localThreads[2];
+#endif
+    int                err;
+    int                w = 64, h = 64;
+    cl_mem_flags    flags;
+    size_t            element_nbytes;
+    size_t            num_bytes;
+    size_t            channel_nbytes = sizeof( cl_uchar );
+    MTdata          d;
+
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+
+    if (readFloat)
+        channel_nbytes = sizeof( cl_float );
+
+    element_nbytes = channel_nbytes * get_format_channel_count( &image_format_desc );
+    num_bytes = w * h * element_nbytes;
+
+    threads[0] = (size_t)w;
+    threads[1] = (size_t)h;
+
+#ifdef USE_LOCAL_THREADS
+    err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( unsigned int ), NULL );
+    test_error( err, "Unable to get thread group max size" );
+    localThreads[1] = localThreads[0];
+    if( localThreads[0] > threads[0] )
+        localThreads[0] = threads[0];
+    if( localThreads[1] > threads[1] )
+        localThreads[1] = threads[1];
+#endif
+
+    d = init_genrand( gRandomSeed );
+    if( image_format_desc.image_channel_data_type == CL_SIGNED_INT8 )
+        inptr = (void *)generateSignedImage( w * h * 4, d );
+    else
+        inptr = (void *)generateImage( w * h * 4, d );
+    free_mtdata(d); d = NULL;
+    if( ! inptr ){
+        log_error("unable to allocate inptr at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    dst = malloc( num_bytes );
+    if( ! dst ){
+        free( (void *)inptr );
+        log_error("unable to allocate dst at %d x %d\n", (int)w, (int)h );
+        return -1;
+    }
+
+    // allocate the input and output image memory objects
+    flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
+    memobjs[0] = create_image_2d( context, flags, &image_format_desc, w, h, 0, NULL, &err );
+    if( memobjs[0] == (cl_mem)0 ){
+        free( dst );
+        free( (void *)inptr );
+        log_error("unable to create Image2D\n");
+        return -1;
+    }
+
+    memobjs[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  channel_nbytes * 4 * w * h, NULL, &err );
+    if( memobjs[1] == (cl_mem)0 ){
+        free( dst );
+        free( (void *)inptr );
+        clReleaseMemObject(memobjs[0]);
+        log_error("unable to create array\n");
+        return -1;
+    }
+
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3] = { w, h, 1 };
+    err = clEnqueueWriteImage( queue, memobjs[0], false, origin, region, 0, 0, inptr, 0, NULL, &writeEvent );
+    if( err != CL_SUCCESS ){
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        print_error(err, "clWriteImage failed");
+        return -1;
+    }
+
+    // This synchronization point is needed in order to assume the data is valid.
+    // Getting profiling information is not a synchronization point.
+    err = clWaitForEvents( 1, &writeEvent );
+    if( err != CL_SUCCESS )
+    {
+        print_error( err, "clWaitForEvents failed" );
+        clReleaseEvent(writeEvent);
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    // test profiling
+    while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(writeEvent);
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
+          CL_PROFILING_INFO_NOT_AVAILABLE );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(writeEvent);
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(writeEvent);
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clGetEventProfilingInfo failed" );
+    clReleaseEvent(writeEvent);
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &code, name );
+    if( err ){
+        log_error( "Unable to create program and kernel\n" );
+    clReleaseEvent(writeEvent);
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&memobjs[0] );
+    err |= clSetKernelArg( kernel[0], 1, sizeof( cl_mem ), (void *)&memobjs[1] );
+    if( err != CL_SUCCESS ){
+        log_error( "clSetKernelArg failed\n" );
+    clReleaseEvent(writeEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+#ifdef USE_LOCAL_THREADS
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, localThreads, 0, NULL, NULL );
+#else
+    err = clEnqueueNDRangeKernel(queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
+#endif
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueNDRangeKernel failed" );
+    clReleaseEvent(writeEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, memobjs[1], true, 0, num_bytes, dst, 0, NULL, NULL );
+    if( err != CL_SUCCESS ){
+        print_error( err, "clEnqueueReadBuffer failed" );
+    clReleaseEvent(writeEvent);
+        clReleaseKernel( kernel[0] );
+        clReleaseProgram( program[0] );
+        clReleaseMemObject(memobjs[0]);
+        clReleaseMemObject(memobjs[1]);
+        free( dst );
+        free( inptr );
+        return -1;
+    }
+
+    if ( readFloat )
+    {
+        refptr = prepareReference( (cl_uchar *)inptr, w, h );
+        if ( refptr )
+        {
+            err = verifyImageFloat( refptr, (cl_float *)dst, w, h );
+            free ( refptr );
+        }
+        else
+            err = -1;
+    }
+    else
+        err = verifyImage( (cl_uchar *)inptr, (cl_uchar *)dst, w, h );
+
+    if( err )
+    {
+        log_error( "Image failed to verify.\n" );
+    }
+    else
+    {
+        log_info( "Image verified.\n" );
+    }
+
+    // cleanup
+  clReleaseEvent(writeEvent);
+    clReleaseKernel( kernel[0] );
+    clReleaseProgram( program[0] );
+    clReleaseMemObject(memobjs[0]);
+    clReleaseMemObject(memobjs[1]);
+    free( dst );
+    free( inptr );
+
+  if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+    err = -1;
+
+    return err;
+
+}    // end write_image()
+
+
+int write_float_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    cl_image_format    image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    // 0 to 255 for unsigned image data
+    return write_image( device, context, queue, numElements, readKernelCode[0], readKernelName[0], image_format_desc, 1 );
+
+}
+
+
+int write_char_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    cl_image_format    image_format_desc = { CL_RGBA, CL_SIGNED_INT8 };
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    // -128 to 127 for signed iamge data
+    return write_image( device, context, queue, numElements, readKernelCode[1], readKernelName[1], image_format_desc, 0 );
+
+}
+
+
+int write_uchar_image( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    cl_image_format    image_format_desc = { CL_RGBA, CL_UNSIGNED_INT8 };
+    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    // 0 to 255 for unsigned image data
+    return write_image( device, context, queue, numElements, readKernelCode[2], readKernelName[2], image_format_desc, 0 );
+
+}
+
+
diff --git a/test_conformance/relationals/CMakeLists.txt b/test_conformance/relationals/CMakeLists.txt
new file mode 100644
index 00000000..159b8e5b
--- /dev/null
+++ b/test_conformance/relationals/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(MODULE_NAME RELATIONALS)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_relationals.cpp
+    test_comparisons_float.cpp
+    test_comparisons_double.cpp
+    test_shuffles.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
+
diff --git a/test_conformance/relationals/Jamfile b/test_conformance/relationals/Jamfile
new file mode 100644
index 00000000..1ef71abf
--- /dev/null
+++ b/test_conformance/relationals/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_relationals
+    : main.c
+      test_comparisons_double.cpp
+      test_comparisons_float.cpp
+      test_relationals.cpp
+      test_shuffles.cpp
+    ;
+
+install dist
+    : test_relationals
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/relationals
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/relationals
+    ;
diff --git a/test_conformance/relationals/Makefile b/test_conformance/relationals/Makefile
new file mode 100644
index 00000000..99406dc8
--- /dev/null
+++ b/test_conformance/relationals/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		  test_relationals.cpp \
+		  test_shuffles.cpp \
+			test_comparisons_double.cpp \
+			test_comparisons_float.cpp \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/kernelHelpers.c \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/conversions.c 
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_relationals
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/relationals/main.c b/test_conformance/relationals/main.c
new file mode 100644
index 00000000..0b90fac8
--- /dev/null
+++ b/test_conformance/relationals/main.c
@@ -0,0 +1,100 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+#if DENSE_PACK_VECS
+const int g_vector_aligns[] = {0, 1, 2, 3, 4,
+                               5, 6, 7, 8,
+                               9, 10, 11, 12,
+                               13, 14, 15, 16};
+
+#else
+const int g_vector_aligns[] = {0, 1, 2, 4, 4,
+                               8, 8, 8, 8,
+                               16, 16, 16, 16,
+                               16, 16, 16, 16};
+#endif
+
+
+const int g_vector_allocs[] = {0, 1, 2, 4, 4,
+                               8, 8, 8, 8,
+                               16, 16, 16, 16,
+                               16, 16, 16, 16};
+
+
+basefn    basefn_list[] = {
+            test_relational_any,
+            test_relational_all,
+            test_relational_bitselect,
+            test_relational_select_signed,
+            test_relational_select_unsigned,
+
+            test_relational_isequal,
+            test_relational_isnotequal,
+            test_relational_isgreater,
+            test_relational_isgreaterequal,
+            test_relational_isless,
+            test_relational_islessequal,
+            test_relational_islessgreater,
+
+            test_shuffle_copy,
+            test_shuffle_function_call,
+            test_shuffle_array_cast,
+            test_shuffle_built_in,
+            test_shuffle_built_in_dual_input
+};
+
+const char    *basefn_names[] = {
+            "relational_any",
+            "relational_all",
+            "relational_bitselect",
+            "relational_select_signed",
+            "relational_select_unsigned",
+
+            "relational_isequal",
+            "relational_isnotequal",
+            "relational_isgreater",
+            "relational_isgreaterequal",
+            "relational_isless",
+            "relational_islessequal",
+            "relational_islessgreater",
+
+            "shuffle_copy",
+            "shuffle_function_call",
+            "shuffle_array_cast",
+            "shuffle_built_in",
+            "shuffle_built_in_dual_input",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/relationals/procs.h b/test_conformance/relationals/procs.h
new file mode 100644
index 00000000..5ee78b8a
--- /dev/null
+++ b/test_conformance/relationals/procs.h
@@ -0,0 +1,56 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+// The number of errors to print out for each test in the shuffle tests
+#define MAX_ERRORS_TO_PRINT 1
+
+extern const int g_vector_aligns[];
+extern const int g_vector_allocs[];
+
+#define DENSE_PACK_VECS 1
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+extern int test_relational_any(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_bitselect(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_select_signed(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_select_unsigned(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_relational_isequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_isnotequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_isgreater(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_isgreaterequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_isless(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_islessequal(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_relational_islessgreater(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_shuffles(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_shuffles_16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_shuffles_dual(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_shuffle_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_shuffle_function_call(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_shuffle_array_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_shuffle_built_in(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_shuffle_built_in_dual_input(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
diff --git a/test_conformance/relationals/testBase.h b/test_conformance/relationals/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/relationals/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/relationals/test_comparisons_double.cpp b/test_conformance/relationals/test_comparisons_double.cpp
new file mode 100644
index 00000000..21d4d575
--- /dev/null
+++ b/test_conformance/relationals/test_comparisons_double.cpp
@@ -0,0 +1,361 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+extern "C" { extern cl_uint gRandomSeed; };
+
+#define TEST_SIZE 512
+
+const char *equivTestKernelPattern_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
+"\n"
+"}\n";
+
+const char *equivTestKernelPatternLessGreater_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
+"\n"
+"}\n";
+
+
+const char *equivTestKernelPattern_double3 =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    double3 sampA = vload3(tid, (__global double *)sourceA);\n"
+"    double3 sampB = vload3(tid, (__global double *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
+"    vstore3(( sampA %s sampB ), tid, (__global long *)destValuesB);\n"
+"\n"
+"}\n";
+
+const char *equivTestKernelPatternLessGreater_double3 =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    double3 sampA = vload3(tid, (__global double *)sourceA);\n"
+"    double3 sampB = vload3(tid, (__global double *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
+"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global long *)destValuesB);\n"
+"\n"
+"}\n";
+
+
+typedef bool (*equivVerifyFn)( double inDataA, double inDataB );
+
+void verify_equiv_values_double( unsigned int vecSize, double *inDataA, double *inDataB, cl_long *outData, equivVerifyFn verifyFn )
+{
+    unsigned int i;
+    cl_long trueResult;
+    bool result;
+
+    trueResult = ( vecSize == 1 ) ? 1 : -1;
+    for( i = 0; i < vecSize; i++ )
+    {
+        result = verifyFn( inDataA[ i ], inDataB[ i ] );
+        outData[ i ] = result ? trueResult : 0;
+    }
+}
+
+void generate_equiv_test_data_double( double *outData, unsigned int vecSize, bool alpha, MTdata d )
+{
+    unsigned int i;
+
+    generate_random_data( kDouble, vecSize * TEST_SIZE, d, outData );
+
+    // Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
+    if( alpha )
+        outData += vecSize * vecSize;
+    for( i = 0; i < vecSize; i++ )
+    {
+        outData[ 0 ] = NAN;
+        outData += vecSize + 1;
+    }
+    // Make sure the third set is filled regardless, to test the case where both have NANs
+    if( !alpha )
+        outData += vecSize * vecSize;
+    for( i = 0; i < vecSize; i++ )
+    {
+        outData[ 0 ] = NAN;
+        outData += vecSize + 1;
+    }
+}
+
+int test_equiv_kernel_double(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
+                             unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[4];
+    double inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
+    cl_long outData[TEST_SIZE * 16], expected[16];
+    int error, i, j;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4];
+
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", vecSize );
+
+    if(DENSE_PACK_VECS && vecSize == 3) {
+        if (strcmp(fnName, "islessgreater")) {
+            sprintf( kernelSource, equivTestKernelPattern_double3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
+        } else {
+            sprintf( kernelSource, equivTestKernelPatternLessGreater_double3, sizeName, sizeName, sizeName, sizeName, fnName );
+        }
+    } else {
+        if (strcmp(fnName, "islessgreater")) {
+            sprintf( kernelSource, equivTestKernelPattern_double, sizeName, sizeName, sizeName, sizeName, fnName, opName );
+        } else {
+            sprintf( kernelSource, equivTestKernelPatternLessGreater_double, sizeName, sizeName, sizeName, sizeName, fnName );
+        }
+    }
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_equiv_test_data_double( inDataA, vecSize, true, d );
+    generate_equiv_test_data_double( inDataB, vecSize, false, d );
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataA, &error);
+    if( streams[0] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_double ) * vecSize * TEST_SIZE, &inDataB, &error);
+    if( streams[1] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
+    if( streams[2] == NULL )
+    {
+        print_error( error, "Creating output array failed!\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
+    if( streams[3] == NULL )
+    {
+        print_error( error, "Creating output array failed!\n");
+        return -1;
+    }
+
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
+
+        for( j = 0; j < (int)vecSize; j++ )
+        {
+            if( expected[ j ] != outData[ i * vecSize + j ] )
+            {
+                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
+                          i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
+                return -1;
+            }
+        }
+    }
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
+
+        for( j = 0; j < (int)vecSize; j++ )
+        {
+            if( expected[ j ] != outData[ i * vecSize + j ] )
+            {
+                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
+                          i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int test_equiv_kernel_set_double(cl_device_id device, cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
+{
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index;
+    int retVal = 0;
+
+    if (!is_extension_available(device, "cl_khr_fp64")) {
+        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+        return 0;
+    }
+    log_info("Testing doubles.\n");
+
+    for( index = 0; vecSizes[ index ] != 0; index++ )
+    {
+        // Test!
+        if( test_equiv_kernel_double(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
+        {
+            log_error( "   Vector double%d FAILED\n", vecSizes[ index ] );
+            retVal = -1;
+        }
+    }
+
+    return retVal;
+}
+
+bool isequal_verify_fn_double( double valueA, double valueB )
+{
+    if( isnan( valueA ) || isnan( valueB ) )
+        return false;
+    return valueA == valueB;
+}
+
+int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed(gRandomSeed);
+    return test_equiv_kernel_set_double( device, context, queue, "isequal", "==", isequal_verify_fn_double, seed );
+}
+
+bool isnotequal_verify_fn_double( double valueA, double valueB )
+{
+    if( isnan( valueA ) || isnan( valueB ) )
+        return true;
+    return valueA != valueB;
+}
+
+int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed(gRandomSeed);
+    return test_equiv_kernel_set_double( device, context, queue, "isnotequal", "!=", isnotequal_verify_fn_double, seed );
+}
+
+bool isgreater_verify_fn_double( double valueA, double valueB )
+{
+    if( isnan( valueA ) || isnan( valueB ) )
+        return false;
+    return valueA > valueB;
+}
+
+int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed(gRandomSeed);
+    return test_equiv_kernel_set_double( device, context, queue, "isgreater", ">", isgreater_verify_fn_double, seed );
+}
+
+bool isgreaterequal_verify_fn_double( double valueA, double valueB )
+{
+    if( isnan( valueA ) || isnan( valueB ) )
+        return false;
+    return valueA >= valueB;
+}
+
+int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed(gRandomSeed);
+    return test_equiv_kernel_set_double( device, context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_double, seed );
+}
+
+bool isless_verify_fn_double( double valueA, double valueB )
+{
+    if( isnan( valueA ) || isnan( valueB ) )
+        return false;
+    return valueA < valueB;
+}
+
+int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed(gRandomSeed);
+    return test_equiv_kernel_set_double( device, context, queue, "isless", "<", isless_verify_fn_double, seed );
+}
+
+bool islessequal_verify_fn_double( double valueA, double valueB )
+{
+    if( isnan( valueA ) || isnan( valueB ) )
+        return false;
+    return valueA <= valueB;
+}
+
+int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed(gRandomSeed);
+    return test_equiv_kernel_set_double( device, context, queue, "islessequal", "<=", islessequal_verify_fn_double, seed );
+}
+
+bool islessgreater_verify_fn_double( double valueA, double valueB )
+{
+    if( isnan( valueA ) || isnan( valueB ) )
+        return false;
+    return ( valueA < valueB ) || ( valueA > valueB );
+}
+
+int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed(gRandomSeed);
+    return test_equiv_kernel_set_double( device, context, queue, "islessgreater", "<>", islessgreater_verify_fn_double, seed );
+}
+
+
diff --git a/test_conformance/relationals/test_comparisons_float.cpp b/test_conformance/relationals/test_comparisons_float.cpp
new file mode 100644
index 00000000..76b31989
--- /dev/null
+++ b/test_conformance/relationals/test_comparisons_float.cpp
@@ -0,0 +1,361 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+extern "C" { extern cl_uint gRandomSeed;};
+
+#define TEST_SIZE 512
+
+const char *equivTestKernelPattern_float =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
+"\n"
+"}\n";
+
+const char *equivTestKernelPatternLessGreater_float =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
+"\n"
+"}\n";
+
+
+const char *equivTestKernelPattern_float3 =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    float3 sampA = vload3(tid, (__global float *)sourceA);\n"
+"    float3 sampB = vload3(tid, (__global float *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
+"    vstore3(( sampA %s sampB ), tid, (__global int *)destValuesB);\n"
+"\n"
+"}\n";
+
+const char *equivTestKernelPatternLessGreater_float3 =
+"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    float3 sampA = vload3(tid, (__global float *)sourceA);\n"
+"    float3 sampB = vload3(tid, (__global float *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
+"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global int *)destValuesB);\n"
+"\n"
+"}\n";
+
+typedef bool (*equivVerifyFn)( float inDataA, float inDataB );
+extern "C" { extern int gInfNanSupport; };
+
+int IsFloatInfinity(float x)
+{
+    return isinf(x);
+}
+
+int IsFloatNaN(float x)
+{
+    return isnan(x);
+}
+
+void verify_equiv_values_float( unsigned int vecSize, float *inDataA, float *inDataB, int *outData, equivVerifyFn verifyFn )
+{
+    unsigned int i;
+    int trueResult;
+    bool result;
+
+    trueResult = ( vecSize == 1 ) ? 1 : -1;
+    for( i = 0; i < vecSize; i++ )
+    {
+        result = verifyFn( inDataA[ i ], inDataB[ i ] );
+        outData[ i ] = result ? trueResult : 0;
+    }
+}
+
+void generate_equiv_test_data_float( float *outData, unsigned int vecSize, bool alpha, MTdata d )
+{
+    unsigned int i;
+
+    generate_random_data( kFloat, vecSize * TEST_SIZE, d, outData );
+
+    // Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
+    if( alpha )
+        outData += vecSize * vecSize;
+    for( i = 0; i < vecSize; i++ )
+    {
+        outData[ 0 ] = NAN;
+        outData += vecSize + 1;
+    }
+    // Make sure the third set is filled regardless, to test the case where both have NANs
+    if( !alpha )
+        outData += vecSize * vecSize;
+    for( i = 0; i < vecSize; i++ )
+    {
+        outData[ 0 ] = NAN;
+        outData += vecSize + 1;
+    }
+}
+
+int test_equiv_kernel_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
+                       unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[4];
+    float inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
+    int outData[TEST_SIZE * 16], expected[16];
+    int error, i, j;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4];
+
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", vecSize );
+
+
+    if(DENSE_PACK_VECS && vecSize == 3) {
+  if (strcmp(fnName, "islessgreater")) {
+            sprintf( kernelSource, equivTestKernelPattern_float3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
+        } else {
+            sprintf( kernelSource, equivTestKernelPatternLessGreater_float3, sizeName, sizeName, sizeName, sizeName, fnName );
+        }
+    } else {
+        if (strcmp(fnName, "islessgreater")) {
+          sprintf( kernelSource, equivTestKernelPattern_float, sizeName, sizeName, sizeName, sizeName, fnName, opName );
+  } else {
+    sprintf( kernelSource, equivTestKernelPatternLessGreater_float, sizeName, sizeName, sizeName, sizeName, fnName );
+  }
+    }
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_equiv_test_data_float( inDataA, vecSize, true, d );
+    generate_equiv_test_data_float( inDataB, vecSize, false, d );
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataA, &error);
+    if( streams[0] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof( cl_float ) * vecSize * TEST_SIZE, &inDataB, &error);
+    if( streams[1] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
+    if( streams[2] == NULL )
+    {
+        print_error( error, "Creating output array failed!\n");
+        return -1;
+    }
+  streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
+    if( streams[3] == NULL )
+    {
+        print_error( error, "Creating output array failed!\n");
+        return -1;
+    }
+
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+  error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+  /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+  /* And verify! */
+  for( i = 0; i < TEST_SIZE; i++ )
+  {
+        verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
+
+        for( j = 0; j < (int)vecSize; j++ )
+        {
+            if( expected[ j ] != outData[ i * vecSize + j ] )
+            {
+                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
+                  i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
+                return -1;
+            }
+        }
+  }
+
+  /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+  /* And verify! */
+    int fail = 0;
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
+
+        for( j = 0; j < (int)vecSize; j++ )
+        {
+            if( expected[ j ] != outData[ i * vecSize + j ] )
+            {
+                if (gInfNanSupport == 0)
+                {
+                    if (IsFloatNaN(inDataA[i*vecSize + j]) || IsFloatNaN (inDataB[i*vecSize + j]))
+                    {
+                        fail = 0;
+                    }
+                    else
+                        fail = 1;
+                }
+                if (fail)
+                {
+                    log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
+                      i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
+                    return -1;
+                }
+            }
+        }
+  }
+
+  return 0;
+}
+
+int test_equiv_kernel_set_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
+{
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index;
+    int retVal = 0;
+
+    for( index = 0; vecSizes[ index ] != 0; index++ )
+    {
+        // Test!
+        if( test_equiv_kernel_float(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
+        {
+            log_error( "   Vector float%d FAILED\n", vecSizes[ index ] );
+            retVal = -1;
+        }
+    }
+
+    return retVal;
+}
+
+bool isequal_verify_fn_float( float valueA, float valueB )
+{
+    return valueA == valueB;
+}
+
+int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed( gRandomSeed );
+    return test_equiv_kernel_set_float( context, queue, "isequal", "==", isequal_verify_fn_float, seed );
+}
+
+bool isnotequal_verify_fn_float( float valueA, float valueB )
+{
+    return valueA != valueB;
+}
+
+int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed( gRandomSeed );
+    return test_equiv_kernel_set_float( context, queue, "isnotequal", "!=", isnotequal_verify_fn_float, seed );
+}
+
+bool isgreater_verify_fn_float( float valueA, float valueB )
+{
+    return valueA > valueB;
+}
+
+int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed( gRandomSeed );
+    return test_equiv_kernel_set_float( context, queue, "isgreater", ">", isgreater_verify_fn_float, seed );
+}
+
+bool isgreaterequal_verify_fn_float( float valueA, float valueB )
+{
+    return valueA >= valueB;
+}
+
+int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed( gRandomSeed );
+    return test_equiv_kernel_set_float( context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_float, seed );
+}
+
+bool isless_verify_fn_float( float valueA, float valueB )
+{
+    return valueA < valueB;
+}
+
+int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed( gRandomSeed );
+    return test_equiv_kernel_set_float( context, queue, "isless", "<", isless_verify_fn_float, seed );
+}
+
+bool islessequal_verify_fn_float( float valueA, float valueB )
+{
+    return valueA <= valueB;
+}
+
+int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed( gRandomSeed );
+    return test_equiv_kernel_set_float( context, queue, "islessequal", "<=", islessequal_verify_fn_float, seed );
+}
+
+bool islessgreater_verify_fn_float( float valueA, float valueB )
+{
+    return ( valueA < valueB ) || ( valueA > valueB );
+}
+
+int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    RandomSeed seed( gRandomSeed );
+    return test_equiv_kernel_set_float( context, queue, "islessgreater", "<>", islessgreater_verify_fn_float, seed );
+}
+
+
diff --git a/test_conformance/relationals/test_relationals.cpp b/test_conformance/relationals/test_relationals.cpp
new file mode 100644
index 00000000..9542b851
--- /dev/null
+++ b/test_conformance/relationals/test_relationals.cpp
@@ -0,0 +1,785 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+const char *anyAllTestKernelPattern =
+"%s\n" // optional pragma
+"__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid] );\n"
+"\n"
+"}\n";
+
+const char *anyAllTestKernelPatternVload =
+"%s\n" // optional pragma
+"__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s(vload3(tid, (__global %s *)sourceA));\n" // ugh, almost
+"\n"
+"}\n";
+
+#define TEST_SIZE 512
+
+extern "C" {extern cl_uint gRandomSeed;};
+
+typedef int (*anyAllVerifyFn)( ExplicitType vecType, unsigned int vecSize, void *inData );
+
+int test_any_all_kernel(cl_context context, cl_command_queue queue,
+                        const char *fnName, ExplicitType vecType,
+                        unsigned int vecSize, anyAllVerifyFn verifyFn,
+                        MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+    cl_long inDataA[TEST_SIZE * 16], clearData[TEST_SIZE * 16];
+    int outData[TEST_SIZE];
+    int error, i;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4];
+
+
+    /* Create the source */
+    if( g_vector_aligns[vecSize] == 1 ) {
+        sizeName[ 0 ] = 0;
+    } else {
+        sprintf( sizeName, "%d", vecSize );
+    }
+    log_info("Testing any/all on %s%s\n",
+             get_explicit_type_name( vecType ), sizeName);
+    if(DENSE_PACK_VECS && vecSize == 3) {
+        // anyAllTestKernelPatternVload
+        sprintf( kernelSource, anyAllTestKernelPatternVload,
+                vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( vecType ), sizeName, fnName,
+                get_explicit_type_name(vecType));
+    } else {
+        sprintf( kernelSource, anyAllTestKernelPattern,
+                vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( vecType ), sizeName, fnName );
+    }
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1,
+                                    (const char **)&programPtr,
+                                    "sample_test" ) )
+    {
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataA );
+    memset( clearData, 0, sizeof( clearData ) );
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
+    if( streams[0] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * g_vector_aligns[vecSize] * TEST_SIZE, clearData, &error );
+    if( streams[1] == NULL )
+    {
+        print_error( error, "Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof( int ) * TEST_SIZE, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < TEST_SIZE; i++ )
+    {
+        int expected = verifyFn( vecType, vecSize, (char *)inDataA + i * get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] );
+        if( expected != outData[ i ] )
+        {
+            unsigned int *ptr = (unsigned int *)( (char *)inDataA + i * get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] );
+            log_error( "ERROR: Data sample %d does not validate! Expected (%d), got (%d), source 0x%08x\n",
+                      i, expected, outData[i], *ptr );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int anyVerifyFn( ExplicitType vecType, unsigned int vecSize, void *inData )
+{
+    unsigned int i;
+    switch( vecType )
+    {
+        case kChar:
+        {
+            char sum = 0;
+            char *tData = (char *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum |= tData[ i ] & 0x80;
+            return (sum != 0) ? 1 : 0;
+        }
+        case kShort:
+        {
+            short sum = 0;
+            short *tData = (short *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum |= tData[ i ] & 0x8000;
+            return (sum != 0);
+        }
+        case kInt:
+        {
+            cl_int sum = 0;
+            cl_int *tData = (cl_int *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum |= tData[ i ] & (cl_int)0x80000000L;
+            return (sum != 0);
+        }
+        case kLong:
+        {
+            cl_long sum = 0;
+            cl_long *tData = (cl_long *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum |= tData[ i ] & 0x8000000000000000LL;
+            return (sum != 0);
+        }
+        default:
+            return 0;
+    }
+}
+
+int test_relational_any(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kShort, kInt, kLong };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed );
+
+    for( typeIndex = 0; typeIndex < 4; typeIndex++ )
+    {
+        if (vecType[typeIndex] == kLong && !gHasLong)
+            continue;
+
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            // Test!
+            if( test_any_all_kernel(context, queue, "any", vecType[ typeIndex ], vecSizes[ index ], anyVerifyFn, seed ) != 0 )
+            {
+                log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ] );
+                retVal = -1;
+            }
+        }
+    }
+
+    return retVal;
+}
+
+int allVerifyFn( ExplicitType vecType, unsigned int vecSize, void *inData )
+{
+    unsigned int i;
+    switch( vecType )
+    {
+        case kChar:
+        {
+            char sum = 0x80;
+            char *tData = (char *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum &= tData[ i ] & 0x80;
+            return (sum != 0) ? 1 : 0;
+        }
+        case kShort:
+        {
+            short sum = 0x8000;
+            short *tData = (short *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum &= tData[ i ] & 0x8000;
+            return (sum != 0);
+        }
+        case kInt:
+        {
+            cl_int sum = 0x80000000L;
+            cl_int *tData = (cl_int *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum &= tData[ i ] & (cl_int)0x80000000L;
+            return (sum != 0);
+        }
+        case kLong:
+        {
+            cl_long sum = 0x8000000000000000LL;
+            cl_long *tData = (cl_long *)inData;
+            for( i = 0; i < vecSize; i++ )
+                sum &= tData[ i ] & 0x8000000000000000LL;
+            return (sum != 0);
+        }
+        default:
+            return 0;
+    }
+}
+
+int test_relational_all(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kShort, kInt, kLong };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed );
+
+
+    for( typeIndex = 0; typeIndex < 4; typeIndex++ )
+    {
+        if (vecType[typeIndex] == kLong && !gHasLong)
+            continue;
+
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            // Test!
+            if( test_any_all_kernel(context, queue, "all", vecType[ typeIndex ], vecSizes[ index ], allVerifyFn, seed ) != 0 )
+            {
+                log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ] );
+                retVal = -1;
+            }
+        }
+    }
+
+    return retVal;
+}
+
+const char *selectTestKernelPattern =
+"%s\n" // optional pragma
+"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid], sourceC[tid] );\n"
+"\n"
+"}\n";
+
+
+const char *selectTestKernelPatternVload =
+"%s\n" // optional pragma
+"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    %s%s tmp = %s( vload3(tid, (__global %s *)sourceA), vload3(tid, (__global %s *)sourceB), vload3(tid, (__global %s *)sourceC) );\n"
+"    vstore3(tmp, tid, (__global %s *)destValues);\n"
+"\n"
+"}\n";
+
+typedef void (*selectVerifyFn)( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData );
+
+int test_select_kernel(cl_context context, cl_command_queue queue, const char *fnName,
+                       ExplicitType vecType, unsigned int vecSize, ExplicitType testVecType, selectVerifyFn verifyFn, MTdata d )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[4];
+    cl_long inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ], inDataC[ TEST_SIZE * 16 ];
+    cl_long outData[TEST_SIZE * 16], expected[16];
+    int error, i;
+    size_t threads[1], localThreads[1];
+    char kernelSource[10240];
+    char *programPtr;
+    char sizeName[4], outSizeName[4];
+    unsigned int outVecSize;
+
+
+    /* Create the source */
+    if( vecSize == 1 )
+        sizeName[ 0 ] = 0;
+    else
+        sprintf( sizeName, "%d", vecSize );
+
+    outVecSize = vecSize;
+
+    if( outVecSize == 1 )
+        outSizeName[ 0 ] = 0;
+    else
+        sprintf( outSizeName, "%d", outVecSize );
+
+    if(DENSE_PACK_VECS && vecSize == 3) {
+        // anyAllTestKernelPatternVload
+        sprintf( kernelSource, selectTestKernelPatternVload,
+                (vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( vecType ), sizeName,
+                get_explicit_type_name( vecType ), sizeName,
+                get_explicit_type_name( testVecType ), sizeName,
+                get_explicit_type_name( vecType ), outSizeName,
+                get_explicit_type_name( vecType ), sizeName,
+                fnName,
+                get_explicit_type_name( vecType ),
+                get_explicit_type_name( vecType ),
+                get_explicit_type_name( vecType ),
+                get_explicit_type_name( testVecType ) );
+    } else {
+        sprintf( kernelSource, selectTestKernelPattern,
+                (vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
+                get_explicit_type_name( vecType ), sizeName,
+                get_explicit_type_name( vecType ), sizeName,
+                get_explicit_type_name( testVecType ), sizeName,
+                get_explicit_type_name( vecType ), outSizeName,
+                fnName );
+    }
+
+    /* Create kernels */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataA );
+    generate_random_data( vecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataB );
+    generate_random_data( testVecType, TEST_SIZE * g_vector_aligns[vecSize], d, inDataC );
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataA, &error);
+    if( streams[0] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataB, &error);
+    if( streams[1] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( testVecType ) * g_vector_aligns[vecSize] * TEST_SIZE, &inDataC, &error);
+    if( streams[2] == NULL )
+    {
+        print_error( error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, get_explicit_type_size( vecType ) * g_vector_aligns[outVecSize] * TEST_SIZE, NULL, &error);
+    if( streams[3] == NULL )
+    {
+        print_error( error, "Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
+    test_error( error, "Unable to set indexed kernel arguments" );
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer( queue, streams[3], true, 0, get_explicit_type_size( vecType ) * TEST_SIZE * g_vector_aligns[outVecSize], outData, 0, NULL, NULL );
+    test_error( error, "Unable to read output array!" );
+
+    /* And verify! */
+    for( i = 0; i < (int)(TEST_SIZE * g_vector_aligns[vecSize]); i++ )
+    {
+        if(i%g_vector_aligns[vecSize] >= (int) vecSize) {
+            continue;
+        }
+        verifyFn( vecType, testVecType, vecSize, (char *)inDataA + i * get_explicit_type_size( vecType ),
+                 (char *)inDataB + i * get_explicit_type_size( vecType ),
+                 (char *)inDataC + i * get_explicit_type_size( testVecType ),
+                 expected);
+
+        char *outPtr = (char *)outData;
+        outPtr += ( i / g_vector_aligns[vecSize] ) * get_explicit_type_size( vecType ) * g_vector_aligns[outVecSize];
+        outPtr += ( i % g_vector_aligns[vecSize] ) * get_explicit_type_size( vecType );
+        if( memcmp( expected, outPtr, get_explicit_type_size( vecType ) ) != 0 )
+        {
+            log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%08x), got (0x%08x) from (0x%08x) and (0x%08x) with test (0x%08x)\n",
+                      i / g_vector_aligns[vecSize],
+                      i % g_vector_aligns[vecSize],
+                      *( (int *)expected ),
+                      *( (int *)( (char *)outData +
+                                 i * get_explicit_type_size( vecType
+                                                            ) ) ),
+                      *( (int *)( (char *)inDataA +
+                                 i * get_explicit_type_size( vecType
+                                                            ) ) ),
+                      *( (int *)( (char *)inDataB +
+                                 i * get_explicit_type_size( vecType
+                                                            ) ) ),
+                      *( (int *)( (char *)inDataC +
+                                 i*get_explicit_type_size( testVecType
+                                                          ) ) ) );
+            int j;
+            log_error( "inA: " );
+            unsigned char *a = (unsigned char *)( (char *)inDataA + i * get_explicit_type_size( vecType ) );
+            unsigned char *b = (unsigned char *)( (char *)inDataB + i * get_explicit_type_size( vecType ) );
+            unsigned char *c = (unsigned char *)( (char *)inDataC + i * get_explicit_type_size( testVecType ) );
+            unsigned char *e = (unsigned char *)( expected );
+            unsigned char *g = (unsigned char *)( (char *)outData + i * get_explicit_type_size( vecType ) );
+            for( j = 0; j < 16; j++ )
+                log_error( "0x%02x ", a[ j ] );
+            log_error( "\ninB: " );
+            for( j = 0; j < 16; j++ )
+                log_error( "0x%02x ", b[ j ] );
+            log_error( "\ninC: " );
+            for( j = 0; j < 16; j++ )
+                log_error( "0x%02x ", c[ j ] );
+            log_error( "\nexp: " );
+            for( j = 0; j < 16; j++ )
+                log_error( "0x%02x ", e[ j ] );
+            log_error( "\ngot: " );
+            for( j = 0; j < 16; j++ )
+                log_error( "0x%02x ", g[ j ] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+void bitselect_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData )
+{
+    char *inA = (char *)inDataA, *inB = (char *)inDataB, *inT = (char *)inDataTest, *out = (char *)outData;
+    size_t i, numBytes = get_explicit_type_size( vecType );
+
+    // Type is meaningless, this is all bitwise!
+    for( i = 0; i < numBytes; i++ )
+    {
+        out[ i ] = ( inA[ i ] & ~inT[ i ] ) | ( inB[ i ] & inT[ i ] );
+    }
+}
+
+int test_relational_bitselect(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index, typeIndex;
+    int retVal = 0;
+    RandomSeed seed( gRandomSeed );
+
+
+    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    {
+        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
+            continue;
+
+        if (vecType[typeIndex] == kDouble)
+        {
+            if(!is_extension_available(device, "cl_khr_fp64"))
+            {
+                log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+                continue;
+            }
+            else
+                log_info("Testing doubles.\n");
+        }
+        for( index = 0; vecSizes[ index ] != 0; index++ )
+        {
+            // Test!
+            if( test_select_kernel(context, queue, "bitselect", vecType[ typeIndex ], vecSizes[ index ], vecType[typeIndex], bitselect_verify_fn, seed ) != 0 )
+            {
+                log_error( "   Vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ] );
+                retVal = -1;
+            }
+        }
+    }
+
+    return retVal;
+}
+
+void select_signed_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData )
+{
+    bool yep = false;
+    if (vecSize == 1)  {
+        switch( testVecType )
+        {
+            case kChar:
+                yep = *( (char *)inDataTest ) ? true : false;
+                break;
+            case kShort:
+                yep = *( (short *)inDataTest ) ? true : false;
+                break;
+            case kInt:
+                yep = *( (int *)inDataTest ) ? true : false;
+                break;
+            case kLong:
+                yep = *( (cl_long *)inDataTest ) ? true : false;
+                break;
+            default:
+                // Should never get here
+                return;
+        }
+    }
+    else {
+        switch( testVecType )
+        {
+            case kChar:
+                yep = *( (char *)inDataTest ) & 0x80 ? true : false;
+                break;
+            case kShort:
+                yep = *( (short *)inDataTest ) & 0x8000 ? true : false;
+                break;
+            case kInt:
+                yep = *( (int *)inDataTest ) & 0x80000000L ? true : false;
+                break;
+            case kLong:
+                yep = *( (cl_long *)inDataTest ) & 0x8000000000000000LL ? true : false;
+                break;
+            default:
+                // Should never get here
+                return;
+        }
+    }
+    memcpy( outData, ( yep ) ? inDataB : inDataA, get_explicit_type_size( vecType ) );
+}
+
+int test_relational_select_signed(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    ExplicitType testVecType[] = { kChar, kShort, kInt, kLong, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int index, typeIndex, testTypeIndex;
+    int retVal = 0;
+    RandomSeed seed( gRandomSeed );
+
+    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    {
+        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
+            continue;
+
+        if (vecType[typeIndex] == kDouble) {
+            if(!is_extension_available(device, "cl_khr_fp64")) {
+                log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+                continue;
+            } else {
+                log_info("Testing doubles.\n");
+            }
+        }
+        for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
+        {
+            if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
+                continue;
+
+            for( index = 0; vecSizes[ index ] != 0; index++ )
+            {
+                // Test!
+                if( test_select_kernel(context, queue, "select", vecType[ typeIndex ], vecSizes[ index ], testVecType[ testTypeIndex ], select_signed_verify_fn, seed ) != 0 )
+                {
+                    log_error( "   Vector %s%d, test vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ],
+                              get_explicit_type_name( testVecType[ testTypeIndex ] ), vecSizes[ index ] );
+                    retVal = -1;
+                }
+            }
+        }
+    }
+
+    return retVal;
+}
+
+void select_unsigned_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData )
+{
+    bool yep = false;
+    if (vecSize == 1)  {
+        switch( testVecType )
+        {
+            case kUChar:
+                yep = *( (unsigned char *)inDataTest ) ? true : false;
+                break;
+            case kUShort:
+                yep = *( (unsigned short *)inDataTest ) ? true : false;
+                break;
+            case kUInt:
+                yep = *( (unsigned int *)inDataTest ) ? true : false;
+                break;
+            case kULong:
+                yep = *( (cl_ulong *)inDataTest ) ? true : false;
+                break;
+            default:
+                // Should never get here
+                return;
+        }
+    }
+    else {
+        switch( testVecType )
+        {
+            case kUChar:
+                yep = *( (unsigned char *)inDataTest ) & 0x80 ? true : false;
+                break;
+            case kUShort:
+                yep = *( (unsigned short *)inDataTest ) & 0x8000 ? true : false;
+                break;
+            case kUInt:
+                yep = *( (unsigned int *)inDataTest ) & 0x80000000L ? true : false;
+                break;
+            case kULong:
+                yep = *( (cl_ulong *)inDataTest ) & 0x8000000000000000LL ? true : false;
+                break;
+            default:
+                // Should never get here
+                return;
+        }
+    }
+    memcpy( outData, ( yep ) ? inDataB : inDataA, get_explicit_type_size( vecType ) );
+}
+
+int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    ExplicitType testVecType[] = { kUChar, kUShort, kUInt, kULong, kNumExplicitTypes };
+    unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
+    unsigned int index, typeIndex, testTypeIndex;
+    int retVal = 0;
+    RandomSeed seed(gRandomSeed);
+
+
+    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    {
+        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
+            continue;
+
+        if (vecType[typeIndex] == kDouble) {
+            if(!is_extension_available(device, "cl_khr_fp64")) {
+                log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+                continue;
+            } else {
+                log_info("Testing doubles.\n");
+            }
+        }
+        for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
+        {
+            if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
+                continue;
+
+            for( index = 0; vecSizes[ index ] != 0; index++ )
+            {
+                // Test!
+                if( test_select_kernel(context, queue, "select", vecType[ typeIndex ], vecSizes[ index ], testVecType[ testTypeIndex ], select_unsigned_verify_fn, seed ) != 0 )
+                {
+                    log_error( "   Vector %s%d, test vector %s%d FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), vecSizes[ index ],
+                              get_explicit_type_name( testVecType[ testTypeIndex ] ), vecSizes[ index ] );
+                    retVal = -1;
+                }
+            }
+        }
+    }
+
+    return retVal;
+}
+
+
+
+extern int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+extern int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
+
+
+int test_relational_isequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int err = 0;
+    err |= test_relational_isequal_float( device, context, queue, numElements );
+    err |= test_relational_isequal_double( device, context, queue, numElements );
+    return err;
+}
+
+
+int test_relational_isnotequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int err = 0;
+    err |= test_relational_isnotequal_float( device, context, queue, numElements );
+    err |= test_relational_isnotequal_double( device, context, queue, numElements );
+    return err;
+}
+
+
+int test_relational_isgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int err = 0;
+    err |= test_relational_isgreater_float( device, context, queue, numElements );
+    err |= test_relational_isgreater_double( device, context, queue, numElements );
+    return err;
+}
+
+
+int test_relational_isgreaterequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int err = 0;
+    err |= test_relational_isgreaterequal_float( device, context, queue, numElements );
+    err |= test_relational_isgreaterequal_double( device, context, queue, numElements );
+    return err;
+}
+
+
+int test_relational_isless(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int err = 0;
+    err |= test_relational_isless_float( device, context, queue, numElements );
+    err |= test_relational_isless_double( device, context, queue, numElements );
+    return err;
+}
+
+
+int test_relational_islessequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int err = 0;
+    err |= test_relational_islessequal_float( device, context, queue, numElements );
+    err |= test_relational_islessequal_double( device, context, queue, numElements );
+    return err;
+}
+
+
+int test_relational_islessgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+{
+    int err = 0;
+    err |= test_relational_islessgreater_float( device, context, queue, numElements );
+    err |= test_relational_islessgreater_double( device, context, queue, numElements );
+    return err;
+}
+
+
diff --git a/test_conformance/relationals/test_shuffles.cpp b/test_conformance/relationals/test_shuffles.cpp
new file mode 100644
index 00000000..81465fc1
--- /dev/null
+++ b/test_conformance/relationals/test_shuffles.cpp
@@ -0,0 +1,932 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+// #define USE_NEW_SYNTAX    1
+// The number of shuffles to test per test
+#define NUM_TESTS 32
+// The number of times to run each combination of shuffles
+#define NUM_ITERATIONS_PER_TEST 2
+#define MAX_PROGRAM_SIZE NUM_TESTS*1024
+#define PRINT_SHUFFLE_KERNEL_SOURCE 0
+#define SPEW_ORDER_DETAILS 0
+
+enum ShuffleMode
+{
+    kNormalMode = 0,
+    kFunctionCallMode,
+    kArrayAccessMode,
+    kBuiltInFnMode,
+    kBuiltInDualInputFnMode
+};
+
+extern "C" { extern cl_uint gRandomSeed;};
+
+static const char *shuffleKernelPattern[3] =  {
+    "__kernel void sample_test( __global %s%s *source, __global %s%s *dest )\n"
+    "{\n"
+    "    if (get_global_id(0) != 0) return;\n"
+    "     //%s%s src1 %s, src2%s;\n",// Here's a comma...
+                                    // Above code is commented out for now, but keeping around for testing local storage options
+    "}\n" };
+
+static const char *shuffleTempPattern = "  %s%s tmp;\n";
+
+static const char *clearTempPattern = "        tmp = (%s%s)((%s)0);\n";
+
+static const char *shuffleSinglePattern =
+"        tmp%s%s = source[%d]%s%s;\n"
+"        dest[%d] = tmp;\n"
+;
+
+static const char * shuffleSinglePatternV3src =
+"           tmp%s%s = vload3(%d, source)%s%s;\n"
+"        dest[%d] = tmp;\n";
+
+static const char * shuffleSinglePatternV3dst =
+"        tmp%s%s = source[%d]%s%s;\n"
+"           vstore3(tmp, %d, dest);\n";
+
+
+static const char * shuffleSinglePatternV3srcV3dst =
+"tmp%s%s = vload3(%d, source)%s%s;\n"
+"vstore3(tmp, %d, dest);\n";
+
+static const char *shuffleFnLinePattern = "%s%s shuffle_fn( %s%s source );\n%s%s shuffle_fn( %s%s source ) { return source; }\n\n";
+
+static const char *shuffleFnPattern =
+"        tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
+"        dest[%d] = tmp;\n"
+;
+
+
+static const char *shuffleFnPatternV3src =
+"        tmp%s%s = shuffle_fn( vload3(%d, source) )%s%s;\n"
+"        dest[%d] = tmp;\n"
+;
+
+
+static const char *shuffleFnPatternV3dst =
+"        tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
+"               vstore3(tmp, %d, dest);\n"
+;
+
+
+static const char *shuffleFnPatternV3srcV3dst =
+"        tmp%s%s = shuffle_fn(vload3(%d, source) )%s%s;\n"
+"               vstore3(tmp, %d, dest);\n"
+;
+
+// shuffle() built-in function patterns
+static const char *shuffleBuiltInPattern =
+"        {\n"
+"            %s%s src1 = %s;\n"
+"            %s%s%s mask = (%s%s%s)( %s );\n"
+"            tmp = shuffle( src1, mask );\n"
+"            %s;\n"
+"        }\n"
+;
+
+// shuffle() built-in dual-input function patterns
+static const char *shuffleBuiltInDualPattern =
+"        {\n"
+"            %s%s src1 = %s;\n"
+"            %s%s src2 = %s;\n"
+"            %s%s%s mask = (%s%s%s)( %s );\n"
+"            tmp = shuffle2( src1, src2, mask );\n"
+"            %s;\n"
+"        }\n"
+;
+
+
+typedef unsigned char ShuffleOrder[ 16 ];
+
+void incrementShuffleOrder( ShuffleOrder &order, size_t orderSize, size_t orderRange )
+{
+    for( size_t i = 0; i < orderSize; i++ )
+    {
+        order[ i ]++;
+        if( order[ i ] < orderRange )
+            return;
+        order[ i ] = 0;
+    }
+}
+
+bool shuffleOrderContainsDuplicates( ShuffleOrder &order, size_t orderSize )
+{
+    bool flags[ 16 ] = { false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false };
+    for( size_t i = 0; i < orderSize; i++ )
+    {
+        if( flags[ order[ i ] ] )
+            return true;
+        flags[ order[ i ] ] = true;
+    }
+    return false;
+}
+
+static void shuffleVector( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
+{
+    for(size_t i = 0; i < lengthToUse; i++ )
+    {
+        unsigned char *inPtr = inVector + typeSize *order[ i ];
+        memcpy( outVector, inPtr, typeSize );
+        outVector += typeSize;
+    }
+}
+
+static void shuffleVector2( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
+{
+    for(size_t i = 0; i < lengthToUse; i++ )
+    {
+        unsigned char *outPtr = outVector + typeSize *order[ i ];
+        memcpy( outPtr, inVector, typeSize );
+        inVector += typeSize;
+    }
+}
+
+static void shuffleVectorDual( unsigned char *inVector, unsigned char *inSecondVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
+{
+    // This is tricky: the indices of each shuffle are in a range (0-srcVecSize * 2-1),
+    // where (srcVecSize-srcVecSize*2-1) refers to the second input.
+    size_t uphalfMask = (size_t)vecSize;
+    size_t lowerBits = (size_t)( vecSize - 1 );
+
+    for(size_t i = 0; i < lengthToUse; i++ )
+    {
+        unsigned char *inPtr;
+#if SPEW_ORDER_DETAILS
+        log_info("order[%d] is %d, or %d of %s\n", (int)i,
+                 (int)(order[i]),
+                 (int)(order[i] & lowerBits),
+                 ((order[i]&uphalfMask) == 0)?"lower num":"upper num");
+#endif
+        if( order[ i ] & uphalfMask )
+            inPtr = inSecondVector + typeSize * ( order[ i ] & lowerBits );
+        else
+            inPtr = inVector + typeSize * ( order[ i ] & lowerBits );
+        memcpy( outVector, inPtr, typeSize );
+        outVector += typeSize;
+    }
+}
+
+
+static ShuffleOrder sNaturalOrder = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+
+static int useNumbersFlip = 0;
+const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengthToUse, bool byNumber, MTdata d )
+{
+    // NOTE: names are only valid for hex characters (up to F) but for debugging, we use
+    // this to print out orders for dual inputs, which actually can be valid up to position 31 (two 16-element vectors)
+    // so we go ahead and fake the rest of the alphabet for those other 16 positions, so we have
+    // some (indirectly) meaningful output
+    char names[] = "0123456789abcdefghijklmnopqrstuv";
+    char namesUpperCase[] = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
+    char names2[] = "xyzw!!!!!!!!!!!!";
+
+    static char orderString[ 18 ];
+
+    size_t j, idx;
+
+    // Assume we don't have to use numbers
+    byNumber = 0;
+    // Check to see
+    for( j = 0; j < lengthToUse; j++ )
+    {
+        if (order[j] > 3) {
+            // An index is > xyzw so we need to use numbers
+            byNumber = 1;
+            break;
+        }
+    }
+    // If we can use numbers, do so half the time.
+    if (!byNumber) {
+        byNumber = (useNumbersFlip++)%2;
+    }
+    // Do not use xyzw for vectors whose length is not 2 or 4 per the spec.
+    if (vecSize != 2 || vecSize != 4 || vecSize != 3)
+        byNumber = 1;
+
+    if( byNumber || vecSize > 4 )
+    {
+        idx = 0;
+        // Randomly chose upper and lower case S
+        orderString[ idx++ ] = random_in_range(0, 1, d) ? 's' : 'S';
+        for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
+            // Randomly choose upper and lower case.
+            orderString[ idx++ ] = random_in_range(0, 1, d) ? names[ (int)order[ j ] ] : namesUpperCase[ (int)order[ j ] ];
+        }
+        orderString[ idx++ ] = 0;
+    }
+    else
+    {
+        for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
+            // Randomly choose upper and lower case.
+            orderString[ j ] = names2[ (int)order[ j ] ];
+        }
+        orderString[ j ] = 0;
+    }
+
+    return orderString;
+}
+
+char * get_order_name( ExplicitType vecType, size_t inVecSize, size_t outVecSize, ShuffleOrder &inOrder, ShuffleOrder &outOrder, cl_uint lengthToUse, MTdata d, bool inUseNumerics, bool outUseNumerics )
+{
+    static char orderName[ 512 ] = "";
+    char inOrderStr[ 512 ], outOrderStr[ 512 ];
+
+    if( inVecSize == 1 )
+        inOrderStr[ 0 ] = 0;
+    else
+        sprintf( inOrderStr, "%d.%s", (int)inVecSize, get_order_string( inOrder, outVecSize, lengthToUse, inUseNumerics, d ) );
+    if( outVecSize == 1 )
+        outOrderStr[ 0 ] = 0;
+    else
+        sprintf( outOrderStr, "%d.%s", (int)outVecSize, get_order_string( outOrder, outVecSize, lengthToUse, outUseNumerics, d ) );
+
+    sprintf( orderName, "order %s%s -> %s%s",
+            get_explicit_type_name( vecType ), inOrderStr, get_explicit_type_name( vecType ), outOrderStr );
+    return orderName;
+}
+
+void    print_hex_mem_dump( const unsigned char *inDataPtr, const unsigned char * inDataPtr2, const unsigned char *expected, const unsigned char *outDataPtr, size_t inVecSize, size_t outVecSize, size_t typeSize )
+{
+    char error [4096] = "";
+    strcat(error, "      Source: ");
+    for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
+    {
+        sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr[ j ] );
+    }
+    if( inDataPtr2 != NULL )
+    {
+        strcat(error, "\n    Source 2: ");
+        for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
+        {
+            sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr2[ j ] );
+        }
+    }
+    strcat(error, "\n    Expected: " );
+    for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
+    {
+        sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)expected[ j ] );
+    }
+    strcat(error, "\n      Actual: " );
+    for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
+    {
+        sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)outDataPtr[ j ] );
+    }
+    log_info("%s\n", error);
+}
+
+void generate_shuffle_mask( char *outMaskString, size_t maskSize, const ShuffleOrder *order )
+{
+    outMaskString[ 0 ] = 0;
+    if( order != NULL )
+    {
+        for( size_t jj = 0; jj < maskSize; jj++ )
+        {
+            char thisMask[ 16 ];
+            sprintf( thisMask, "%s%d", ( jj == 0 ) ? "" : ", ", (*order)[ jj ] );
+            strcat( outMaskString, thisMask );
+        }
+    }
+    else
+    {
+        for( size_t jj = 0; jj < maskSize; jj++ )
+        {
+            char thisMask[ 16 ];
+            sprintf( thisMask, "%s%ld", ( jj == 0 ) ? "" : ", ", jj );
+            strcat( outMaskString, thisMask );
+        }
+    }
+}
+
+static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl_kernel *outKernel,
+                                 size_t *outRealVecSize,
+                                 ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, bool inUseNumerics, bool outUseNumerics,
+                                 size_t numOrders, ShuffleOrder *inOrders, ShuffleOrder *outOrders,
+                                 MTdata d, ShuffleMode shuffleMode = kNormalMode )
+{
+    char inOrder[18], shuffledOrder[18];
+    size_t typeSize;
+    char kernelSource[MAX_PROGRAM_SIZE], progLine[ 10240 ];
+    char *programPtr;
+    char inSizeName[4], outSizeName[4], outRealSizeName[4], inSizeArgName[4];
+    char outSizeNameTmpVar[4];
+
+
+    /* Create the source; note vec size is the vector length we are testing */
+    if( inVecSize == 1 ) //|| (inVecSize == 3)) // just have arrays if we go with size 3
+        inSizeName[ 0 ] = 0;
+    else
+        sprintf( inSizeName, "%ld", inVecSize );
+    if( inVecSize == 3 )
+        inSizeArgName[ 0 ] = 0;
+    else
+        strcpy( inSizeArgName, inSizeName );
+
+
+    typeSize = get_explicit_type_size( vecType );
+
+    *outRealVecSize = outVecSize;
+
+    if( outVecSize == 1 ||  (outVecSize == 3))
+        outSizeName[ 0 ] = 0;
+    else
+        sprintf( outSizeName, "%d", (int)outVecSize );
+
+    if(outVecSize == 1) {
+        outSizeNameTmpVar[0] = 0;
+    } else {
+        sprintf(outSizeNameTmpVar, "%d", (int)outVecSize);
+    }
+
+    if( *outRealVecSize == 1 || ( *outRealVecSize == 3))
+        outRealSizeName[ 0 ] = 0;
+    else
+        sprintf( outRealSizeName, "%d", (int)*outRealVecSize );
+
+
+    // Loop through and create the source for all order strings
+    kernelSource[ 0 ] = 0;
+    if (vecType == kDouble) {
+        strcat(kernelSource, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
+    }
+
+    if( shuffleMode == kFunctionCallMode )
+    {
+        sprintf( progLine, shuffleFnLinePattern, get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName,
+                get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName );
+        strcat(kernelSource, progLine);
+    }
+
+    // We're going to play a REALLY NASTY trick here. We're going to use the inSize insert point
+    // to put in an entire third parameter if we need it
+    char inParamSizeString[ 1024 ];
+    if( shuffleMode == kBuiltInDualInputFnMode )
+        sprintf( inParamSizeString, "%s *secondSource, __global %s%s", inSizeArgName, get_explicit_type_name( vecType ), inSizeArgName );
+    else
+        strcpy( inParamSizeString, inSizeArgName );
+
+    // These two take care of unused variable warnings
+    const char * src2EnableA = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "/*";
+    const char * src2EnableB = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "*/";
+
+    sprintf( progLine, shuffleKernelPattern[ 0 ], get_explicit_type_name( vecType ), inParamSizeString,
+            get_explicit_type_name( vecType ), outRealSizeName, get_explicit_type_name( vecType ), inSizeName,
+            src2EnableA, src2EnableB );
+    strcat(kernelSource, progLine);
+    if( inOrders == NULL )
+        strcpy( inOrder, get_order_string( sNaturalOrder, outVecSize, (cl_uint)outVecSize, inUseNumerics, d ) );
+
+    sprintf( progLine, shuffleTempPattern, get_explicit_type_name( vecType ), outSizeNameTmpVar);
+    strcat(kernelSource, progLine);
+
+    for( unsigned int i = 0; i < numOrders; i++ )
+    {
+        if( inOrders != NULL )
+            strcpy( inOrder, get_order_string( inOrders[ i ], outVecSize, lengthToUse[i], inUseNumerics, d ) );
+        strcpy( shuffledOrder, get_order_string( outOrders[ i ], outVecSize, lengthToUse[i], outUseNumerics, d ) );
+
+
+        sprintf( progLine, clearTempPattern, get_explicit_type_name( vecType ), outSizeName,get_explicit_type_name( vecType ));
+        strcat(kernelSource, progLine);
+
+
+        if( shuffleMode == kNormalMode )
+        {
+            if(outVecSize == 3 && inVecSize == 3) {
+                // shuffleSinglePatternV3srcV3dst
+                sprintf( progLine, shuffleSinglePatternV3srcV3dst,
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
+                        inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
+            } else if(inVecSize == 3) {
+                // shuffleSinglePatternV3src
+                sprintf( progLine, shuffleSinglePatternV3src,
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
+                        inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
+            } else if(outVecSize == 3) {
+                // shuffleSinglePatternV3dst
+                sprintf( progLine, shuffleSinglePatternV3dst,
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
+                        inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
+                        (int)i );
+            } else {
+                sprintf( progLine, shuffleSinglePattern,
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
+                        inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
+            }
+        }
+        else if( shuffleMode == kFunctionCallMode )
+        {
+            // log_info("About to make a shuffle line\n");
+            // fflush(stdout);
+            if(inVecSize == 3 && outVecSize == 3) { // swap last two
+                sprintf( progLine, shuffleFnPatternV3srcV3dst,
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
+                        inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
+                        (int)i );
+            } else if(outVecSize == 3)  { // swap last two
+                                          // log_info("Here\n\n");
+                                          // fflush(stdout);
+                sprintf( progLine, shuffleFnPatternV3dst,
+                        outVecSize > 1 ? "." : "",
+                        outVecSize > 1 ? shuffledOrder : "",
+                        (int)i,
+                        inVecSize > 1 ? "." : "",
+                        inVecSize > 1 ? inOrder : "",
+                        (int)i );
+                // log_info("\n%s\n", progLine);
+                // fflush(stdout);
+            } else if(inVecSize == 3) {
+                sprintf( progLine, shuffleFnPatternV3src,
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
+                        inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
+            } else  {
+                sprintf( progLine, shuffleFnPattern,
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
+                        inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
+            }
+        }
+        else if( shuffleMode == kArrayAccessMode )
+        { // now we want to replace inSizeName with inSizeNameShuffleFn
+            int vectorSizeToCastTo = 16;
+            cl_uint item;
+            for (item =0; item<lengthToUse[i]; item++) {
+                int absoluteIndex = i*(int)inVecSize+(int)inOrders[i][item];
+                int castVectorIndex = absoluteIndex/vectorSizeToCastTo;
+                size_t castElementIndex = absoluteIndex % vectorSizeToCastTo;
+                ShuffleOrder myOutOrders, myInOrders;
+                myOutOrders[0]  = outOrders[i][item];
+                myInOrders[0] = castElementIndex;
+
+                strcpy( inOrder, get_order_string( myInOrders, 1, 1, 0, d ) );
+                strcpy( shuffledOrder, get_order_string( myOutOrders, 1, 1, 0, d ) );
+
+                sprintf(progLine, "     tmp%s%s = ((__global %s%d *)source)[%d]%s%s;\n",
+                        outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "",
+                        get_explicit_type_name( vecType ), vectorSizeToCastTo,
+                        castVectorIndex,
+                        vectorSizeToCastTo > 1 ? "." : "", vectorSizeToCastTo > 1 ? inOrder : "");
+                strcat(kernelSource, progLine);
+            }
+            if(outVecSize == 3) {
+                sprintf(progLine,"     vstore3(tmp, %d, (__global %s *)dest);\n",
+                        i, get_explicit_type_name( vecType ));
+                // probably don't need that last
+                // cast to (__global %s *) where %s is get_explicit_type_name( vecType)
+            } else {
+                sprintf(progLine,"     dest[%d] = tmp;\n", i );
+            }
+        }
+        else // shuffleMode == kBuiltInFnMode or kBuiltInDualInputFnMode
+        {
+            if(inVecSize == 3 || outVecSize == 3 ||
+               inVecSize == 1 || outVecSize == 1) {
+                // log_info("Skipping test for size 3\n");
+                continue;
+            }
+            ExplicitType maskType = vecType;
+            if( maskType == kFloat )
+                maskType = kUInt;
+            if( maskType == kDouble) {
+                maskType = kULong;
+            }
+
+            char maskString[ 1024 ] = "";
+            size_t maskSize = outVecSize;// ( shuffleMode == kBuiltInDualInputFnMode ) ? ( outVecSize << 1 ) : outVecSize;
+            generate_shuffle_mask( maskString, maskSize, ( outOrders != NULL ) ? &outOrders[ i ] : NULL );
+
+            // Set up a quick prefix, so mask gets unsigned type regardless of the input/output type
+            char maskPrefix[ 2 ] = "u";
+            if( get_explicit_type_name( maskType )[ 0 ] == 'u' )
+                maskPrefix[ 0 ] = 0;
+
+            char progLine2[ 10240 ];
+            if( shuffleMode == kBuiltInDualInputFnMode )
+            {
+                sprintf( progLine2, shuffleBuiltInDualPattern, get_explicit_type_name( vecType ), inSizeName,
+                        ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
+                        get_explicit_type_name( vecType ), inSizeName,
+                        ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)secondSource )" : "secondSource[ %ld ]",
+                        maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
+                        maskString,
+                        ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
+
+                if( outVecSize == 3 )
+                {
+                    if( inVecSize == 3 )
+                        sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
+                    else
+                        sprintf( progLine, progLine2, i, i, i, get_explicit_type_name( vecType ) );
+                }
+                else
+                {
+                    if( inVecSize == 3 )
+                        sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i );
+                    else
+                        sprintf( progLine, progLine2, i, i, i );
+                }
+            }
+            else
+            {
+                sprintf( progLine2, shuffleBuiltInPattern, get_explicit_type_name( vecType ), inSizeName,
+                        ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
+                        maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
+                        maskString,
+                        ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
+
+                if( outVecSize == 3 )
+                {
+                    if( inVecSize == 3 )
+                        sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
+                    else
+                        sprintf( progLine, progLine2, i, i, get_explicit_type_name( vecType ) );
+                }
+                else
+                {
+                    if( inVecSize == 3 )
+                        sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i );
+                    else
+                        sprintf( progLine, progLine2, i, i );
+                }
+            }
+        }
+
+        strcat( kernelSource, progLine );
+        if (strlen(kernelSource) > 0.9*MAX_PROGRAM_SIZE)
+            log_info("WARNING: Program has grown to 90%% (%d) of the defined max program size of %d\n", (int)strlen(kernelSource), (int)MAX_PROGRAM_SIZE);
+    }
+    strcat( kernelSource, shuffleKernelPattern[ 1 ] );
+
+    // Print the kernel source
+    if (PRINT_SHUFFLE_KERNEL_SOURCE)
+        log_info( "Kernel:%s\n", kernelSource );
+
+    /* Create kernel */
+    programPtr = kernelSource;
+    if( create_single_kernel_helper( context, outProgram, outKernel, 1, (const char **)&programPtr, "sample_test" ) )
+    {
+        return -1;
+    }
+    return 0;
+}
+
+int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
+                             ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, size_t numOrders,
+                             ShuffleOrder *inOrderIdx, ShuffleOrder *outOrderIdx, bool inUseNumerics, bool outUseNumerics, MTdata d,
+                             ShuffleMode shuffleMode = kNormalMode )
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    int error;
+    size_t threads[1], localThreads[1];
+    size_t typeSize, outRealVecSize;
+    clMemWrapper streams[ 3 ];
+
+    /* Create the source */
+    error = create_shuffle_kernel( context, &program, &kernel, &outRealVecSize, vecType,
+                                  inVecSize, outVecSize, lengthToUse, inUseNumerics, outUseNumerics, numOrders, inOrderIdx, outOrderIdx,
+                                  d, shuffleMode );
+    if( error != 0 )
+        return error;
+
+    typeSize = get_explicit_type_size( vecType );
+
+#if !(defined(_WIN32) && defined (_MSC_VER))
+    cl_long inData[ inVecSize * numOrders ];
+    cl_long inSecondData[ inVecSize * numOrders ];
+    cl_long outData[ outRealVecSize * numOrders ];
+#else
+    cl_long* inData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
+    cl_long* inSecondData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
+    cl_long* outData = (cl_long*)_malloca(outRealVecSize * numOrders * sizeof(cl_long));
+#endif
+    memset(outData, 0, outRealVecSize * numOrders * sizeof(cl_long) );
+
+    generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inData );
+    if( shuffleMode == kBuiltInDualInputFnMode )
+        generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
+
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inData, &error);
+    test_error( error, "Unable to create input stream" );
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * outRealVecSize * numOrders, outData, &error);
+    test_error( error, "Unable to create output stream" );
+
+    int argIndex = 0;
+    if( shuffleMode == kBuiltInDualInputFnMode )
+    {
+        streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * inVecSize * numOrders, inSecondData, &error);
+        test_error( error, "Unable to create second input stream" );
+
+        error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );
+        test_error( error, "Unable to set kernel argument" );
+    }
+
+    // Set kernel arguments
+    error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 0 ] ), &streams[ 0 ] );
+    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 1 ] ), &streams[ 1 ] );
+    test_error( error, "Unable to set kernel argument" );
+
+
+    /* Run the kernel */
+    threads[0] = numOrders;
+
+    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
+    test_error( error, "Unable to get work group size to use" );
+
+    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
+    test_error( error, "Unable to execute test kernel" );
+
+
+    // Read the results back
+    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, typeSize * numOrders * outRealVecSize, outData, 0, NULL, NULL );
+    test_error( error, "Unable to read results" );
+
+    unsigned char *inDataPtr = (unsigned char *)inData;
+    unsigned char *inSecondDataPtr = (unsigned char *)inSecondData;
+    unsigned char *outDataPtr = (unsigned char *)outData;
+    int ret = 0;
+    int errors_printed = 0;
+    for( size_t i = 0; i < numOrders; i++ )
+    {
+        unsigned char expected[ 1024 ];
+        unsigned char temp[ 1024 ];
+        memset(expected, 0, sizeof(expected));
+        memset(temp, 0, sizeof(temp));
+        if( shuffleMode == kBuiltInFnMode )
+            shuffleVector( inDataPtr, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
+        else if( shuffleMode == kBuiltInDualInputFnMode )
+            shuffleVectorDual( inDataPtr, inSecondDataPtr, expected, outOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
+        else
+        {
+            shuffleVector( inDataPtr, temp, inOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
+            shuffleVector2( temp, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
+        }
+
+        if( memcmp( expected, outDataPtr, outVecSize * typeSize ) != 0 )
+        {
+            log_error( " ERROR: Shuffle test %d FAILED for %s (memory hex dump follows)\n", (int)i,
+                      get_order_name( vecType, inVecSize, outVecSize, inOrderIdx[ i ], outOrderIdx[ i ], lengthToUse[i], d, inUseNumerics, outUseNumerics ) );
+
+            print_hex_mem_dump( inDataPtr, ( shuffleMode == kBuiltInDualInputFnMode ) ? inSecondDataPtr : NULL, expected, outDataPtr, inVecSize, outVecSize, typeSize );
+
+            if( ( shuffleMode == kBuiltInFnMode ) || ( shuffleMode == kBuiltInDualInputFnMode ) )
+            {
+                // Mask would've been different for every shuffle done, so we have to regen it to print it
+                char maskString[ 1024 ];
+                generate_shuffle_mask( maskString, outVecSize, ( outOrderIdx != NULL ) ? &outOrderIdx[ i ] : NULL );
+                log_error( "        Mask:  %s\n", maskString );
+            }
+
+            ret++;
+            errors_printed++;
+            if (errors_printed > MAX_ERRORS_TO_PRINT)
+            {
+                log_info("Further errors suppressed.\n");
+                return ret;
+            }
+        }
+        inDataPtr += inVecSize * typeSize;
+        inSecondDataPtr += inVecSize * typeSize;
+        outDataPtr += outRealVecSize * typeSize;
+    }
+
+    return ret;
+}
+
+void    build_random_shuffle_order( ShuffleOrder &outIndices, unsigned int length, unsigned int selectLength, bool allowRepeats, MTdata d )
+{
+    char flags[ 16 ];
+
+    memset( flags, 0, sizeof( flags ) );
+
+    for( unsigned int i = 0; i < length; i++ )
+    {
+        char selector = (char)random_in_range( 0, selectLength - 1, d );
+        if( !allowRepeats )
+        {
+            while( flags[ (int)selector ] )
+                selector = (char)random_in_range( 0, selectLength - 1, d );
+            flags[ (int)selector ] = true;
+        }
+        outIndices[ i ] = selector;
+    }
+}
+
+class shuffleBuffer
+{
+public:
+
+    shuffleBuffer( cl_context ctx, cl_command_queue queue, ExplicitType type, size_t inSize, size_t outSize, ShuffleMode mode )
+    {
+        mContext = ctx;
+        mQueue = queue;
+        mVecType = type;
+        mInVecSize = inSize;
+        mOutVecSize = outSize;
+        mShuffleMode = mode;
+
+        mCount = 0;
+
+        // Here's the deal with mLengthToUse[i].
+        // if you have, for instance
+        // uchar4 dst;
+        // uchar8 src;
+        // you can do
+        // src.s0213 = dst.s1045;
+        // but you can also do
+        // src.s02 = dst.s10;
+        // which has a different effect
+        // The intent with these "sub lengths" is to test all such
+        // possibilities
+        // Calculate a range of sub-lengths within the vector to copy.
+        int i;
+        size_t maxSize = (mInVecSize < mOutVecSize) ? mInVecSize : mOutVecSize;
+        for(i=0; i<NUM_TESTS; i++)
+        {
+            // Built-in fns can't select sub-lengths (the mask must be the length of the dest vector).
+            // Well, at least for these tests...
+            if( ( mode == kBuiltInFnMode ) || ( mode == kBuiltInDualInputFnMode ) )
+                mLengthToUse[i]    = (cl_int)mOutVecSize;
+            else
+            {
+                mLengthToUse[i] = (cl_uint)(((double)i/NUM_TESTS)*maxSize) + 1;
+                // Force the length to be a valid vector length.
+                if( ( mLengthToUse[i] == 1 ) && ( mode != kBuiltInFnMode ) )
+                    mLengthToUse[i] = 1;
+                else if (mLengthToUse[i] < 4)
+                    mLengthToUse[i] = 2;
+                else if (mLengthToUse[i] < 8)
+                    mLengthToUse[i] = 4;
+                else if (mLengthToUse[i] < 16)
+                    mLengthToUse[i] = 8;
+                else
+                    mLengthToUse[i] = 16;
+            }
+        }
+    }
+
+    int    AddRun( ShuffleOrder &inOrder, ShuffleOrder &outOrder, MTdata d )
+    {
+        memcpy( &mInOrders[ mCount ], &inOrder, sizeof( inOrder ) );
+        memcpy( &mOutOrders[ mCount ], &outOrder, sizeof( outOrder ) );
+        mCount++;
+
+        if( mCount == NUM_TESTS )
+            return Flush(d);
+
+        return CL_SUCCESS;
+    }
+
+    int Flush( MTdata d )
+    {
+        int err = CL_SUCCESS;
+        if( mCount > 0 )
+        {
+            err = test_shuffle_dual_kernel( mContext, mQueue, mVecType, mInVecSize, mOutVecSize, mLengthToUse,
+                                           mCount, mInOrders, mOutOrders, true, true, d, mShuffleMode );
+            mCount = 0;
+        }
+        return err;
+    }
+
+protected:
+    cl_context            mContext;
+    cl_command_queue    mQueue;
+    ExplicitType        mVecType;
+    size_t                mInVecSize, mOutVecSize, mCount;
+    ShuffleMode            mShuffleMode;
+    cl_uint             mLengthToUse[ NUM_TESTS ];
+
+    ShuffleOrder        mInOrders[ NUM_TESTS ], mOutOrders[ NUM_TESTS ];
+};
+
+
+int test_shuffle_random(cl_device_id device, cl_context context, cl_command_queue queue, ShuffleMode shuffleMode, MTdata d )
+{
+    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int srcIdx, dstIdx, typeIndex;
+    int error = 0, totalError = 0, prevTotalError = 0;
+    RandomSeed seed(gRandomSeed);
+
+    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    {
+        //log_info( "\n\t%s... ", get_explicit_type_name( vecType[ typeIndex ] ) );
+        //fflush( stdout );
+        if (vecType[typeIndex] == kDouble) {
+            if (!is_extension_available(device, "cl_khr_fp64")) {
+                log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+                continue;
+            }
+            log_info("Testing doubles.\n");
+        }
+
+        if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong )
+        {
+            log_info("Long types are unsupported, skipping.");
+            continue;
+        }
+
+        error = 0;
+        for( srcIdx = 0; vecSizes[ srcIdx ] != 0 /*&& error == 0*/; srcIdx++ )
+        {
+            for( dstIdx = 0; vecSizes[ dstIdx ] != 0 /*&& error == 0*/; dstIdx++ )
+            {
+                if( ( ( shuffleMode == kBuiltInDualInputFnMode ) || ( shuffleMode == kBuiltInFnMode ) ) &&
+                   ( ( vecSizes[ dstIdx ] & 1 ) || ( vecSizes[ srcIdx ] & 1 ) ) )
+                {
+                    // Built-in shuffle functions don't work on size 1 (scalars) or size 3 (vec3s)
+                    continue;
+                }
+
+                log_info("Testing [%s%d to %s%d]... ", get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[srcIdx], get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[dstIdx]);
+                shuffleBuffer buffer( context, queue, vecType[ typeIndex ], vecSizes[ srcIdx ], vecSizes[ dstIdx ], shuffleMode );
+
+                int numTests = NUM_TESTS*NUM_ITERATIONS_PER_TEST;
+                for( int i = 0; i < numTests /*&& error == 0*/; i++ )
+                {
+                    ShuffleOrder src, dst;
+                    if( shuffleMode == kBuiltInFnMode )
+                    {
+                        build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
+                    }
+                    else if(shuffleMode == kBuiltInDualInputFnMode)
+                    {
+                        build_random_shuffle_order(dst, vecSizes[dstIdx], 2*vecSizes[srcIdx], true, d);
+                    }
+                    else
+                    {
+                        build_random_shuffle_order( src, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
+                        build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ dstIdx ], false, d );
+                    }
+
+                    error = buffer.AddRun( src, dst, seed );
+                    if (error)
+                        totalError++;
+                }
+                int test_error = buffer.Flush(seed);
+                if (test_error)
+                    totalError++;
+
+                if (totalError == prevTotalError)
+                    log_info("\tPassed.\n");
+                else
+                {
+                    log_error("\tFAILED.\n");
+                    prevTotalError = totalError;
+                }
+            }
+        }
+    }
+    return totalError;
+}
+
+int test_shuffle_copy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    RandomSeed seed(gRandomSeed);
+    return test_shuffle_random( device, context, queue, kNormalMode, seed );
+}
+
+int test_shuffle_function_call(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    RandomSeed seed(gRandomSeed);
+    return test_shuffle_random( device, context, queue, kFunctionCallMode, seed );
+}
+
+int test_shuffle_array_cast(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    RandomSeed seed(gRandomSeed);
+    return test_shuffle_random( device, context, queue, kArrayAccessMode, seed );
+}
+
+int test_shuffle_built_in(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    RandomSeed seed(gRandomSeed);
+    return test_shuffle_random( device, context, queue, kBuiltInFnMode, seed );
+}
+
+int test_shuffle_built_in_dual_input(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    RandomSeed seed(gRandomSeed);
+    return test_shuffle_random( device, context, queue, kBuiltInDualInputFnMode, seed );
+}
+
diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
new file mode 100644
index 00000000..ea7f6775
--- /dev/null
+++ b/test_conformance/run_conformance.py
@@ -0,0 +1,357 @@
+#! /usr/bin/python
+
+#/******************************************************************
+#//
+#//  OpenCL Conformance Tests
+#//
+#//  Copyright:  (c) 2008-2009 by Apple Inc. All Rights Reserved.
+#//
+#******************************************************************/
+
+import os, re, sys, subprocess, time, commands, tempfile, math, string
+
+DEBUG = 0
+
+log_file_name = "opencl_conformance_results_" + time.strftime("%Y-%m-%d_%H-%M", time.localtime())+ ".log"
+process_pid = 0
+
+# The amount of time between printing a "." (if no output from test) or ":" (if output)
+#  to the screen while the tests are running.
+seconds_between_status_updates = 60*60*24*7  # effectively never
+
+# Help info
+def write_help_info() :
+ print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
+ print(" test_list - the .csv file containing the test names and commands to run the tests.")
+ print(" [partial-test-names, ...] - optional partial strings to select a subset of the tests to run.")
+ print(" [CL_DEVICE_TYPE(s) to test] - list of CL device types to test, default is CL_DEVICE_TYPE_DEFAULT.")
+ print(" [log=path/to/log/file/] - provide a path for the test log file, default is in the current directory.")
+ print("   (Note: spaces are not allowed in the log file path.")
+
+
+# Get the time formatted nicely
+def get_time() :
+ return time.strftime("%d-%b %H:%M:%S", time.localtime())
+
+# Write text to the screen and the log file
+def write_screen_log(text) :
+ global log_file
+ print(text)
+ log_file.write(text+"\n")
+
+# Load the tests from a csv formated file of the form name,command
+def get_tests(filename, devices_to_test):
+ tests = []
+ if (os.path.exists(filename) == False):
+  print("FAILED: test_list \"" + filename + "\" does not exist.")
+  print("")
+  write_help_info()
+  sys.exit(-1)
+ file = open(filename, 'r')
+ for line in file.readlines():
+  comment = re.search("^#.*", line)
+  if (comment):
+   continue
+  device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
+  if (device_specific_match):
+   if (device_specific_match.group(1) in devices_to_test):
+    test_path = string.replace(device_specific_match.group(3), '/', os.sep)
+    test_name = string.replace(device_specific_match.group(2), '/', os.sep)
+    tests.append((test_name, test_path))
+   else:
+    print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
+   continue
+  match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
+  if (match):
+   test_path = string.replace(match.group(2), '/', os.sep)
+   test_name = string.replace(match.group(1), '/', os.sep)
+   tests.append((test_name, test_path))
+ return tests
+
+
+def run_test_checking_output(current_directory, test_dir, log_file):
+  global process_pid, seconds_between_status_updates
+  failures_this_run = 0
+  start_time = time.time()
+  # Create a temporary file for capturing the output from the test
+  (output_fd, output_name) = tempfile.mkstemp()
+  if ( not os.path.exists(output_name)) :
+    write_screen_log("\n           ==> ERROR: could not create temporary file %s ." % output_name)
+    os.close(output_fd)
+    return -1
+  # Execute the test
+  program_to_run = test_dir_without_args = test_dir.split(None, 1)[0]
+  if ( os.sep == '\\' ) : program_to_run += ".exe"
+  if (os.path.exists(current_directory + os.sep + program_to_run)) :
+    os.chdir(os.path.dirname(current_directory+os.sep+test_dir_without_args) )
+    try:
+      if (DEBUG): p = subprocess.Popen("", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
+      else :  p = subprocess.Popen(current_directory + os.sep + test_dir, stderr=output_fd, stdout=output_fd, shell=True)
+    except OSError:
+      write_screen_log("\n           ==> ERROR: failed to execute test. Failing test. : " + str(OSError))
+      os.close(output_fd)
+      return -1
+  else:
+    write_screen_log("\n           ==> ERROR: test file (" + current_directory + os.sep + program_to_run +") does not exist.  Failing test.")
+    os.close(output_fd)
+    return -1
+  # Set the global pid so we can kill it if this is aborted
+  process_pid = p.pid
+  # Read one character at a time from the temporary output file while the process is running.
+  # When we get an end-of-line, look for errors and write the results to the log file.
+  # This allows us to process the file as it is being produced.
+  # Keep track of the state for reading
+  # Whether we are done, if we have more to read, and where in the file we last read
+  done = False
+  more_to_read = True
+  pointer = 0
+  pointer_at_last_user_update = 0
+  output_this_run = False
+  try:
+    read_output = open(output_name, 'r')
+  except IOError:
+    write_screen_log("\n           ==> ERROR: could not open output file from test.")
+    os.close(output_fd)
+    return -1
+  line = ""
+  while (not done or more_to_read):
+    os.fsync(output_fd)
+    # Determine if we should display some output
+    elapsed_time = (time.time() - start_time)
+    if (elapsed_time > seconds_between_status_updates):
+      start_time = time.time()
+      # If we've received output from the test since the last update, display a #
+      if (pointer != pointer_at_last_user_update):
+        sys.stdout.write(":")
+      else:
+        sys.stdout.write(".")
+      pointer_at_last_user_update = pointer
+      sys.stdout.flush()
+    # Check if we're done
+    p.poll()
+    if (not done and p.returncode != None):
+      if (p.returncode < 0):
+        if (not output_this_run):
+         print ""
+         output_this_run = True
+        write_screen_log("           ==> ERROR: test killed/crashed: " + str(p.returncode)+ ".")
+      done = True
+    # Try reading
+    try:
+      read_output.seek(pointer)
+      char_read = read_output.read(1)
+    except IOError:
+      time.sleep(1)
+      continue
+    # If we got a full line then process it
+    if (char_read == "\n"):
+      # Look for failures and report them as such
+      match = re.search(".*(FAILED|ERROR).*", line)
+      if (match):
+        if (not output_this_run):
+         print ""
+         output_this_run = True
+        print("           ==> " + line.replace('\n',''))
+      match = re.search(".*FAILED.*", line)
+      if (match):
+        failures_this_run = failures_this_run + 1
+      match = re.search(".*(PASSED).*", line)
+      if (match):
+       if (not output_this_run):
+        print ""
+        output_this_run = True
+       print("               " + line.replace('\n',''))
+      # Write it to the log
+      log_file.write("     " + line +"\n")
+      log_file.flush()
+      line = ""
+      pointer = pointer + 1
+    # If we are at the end of the file, then re-open it to get new data
+    elif (char_read == ""):
+      more_to_read = False
+      read_output.close()
+      time.sleep(1)
+      try:
+        os.fsync(output_fd)
+        read_output = open(output_name, 'r')
+        # See if there is more to read. This happens if the process ends and we have data left.
+        read_output.seek(pointer)
+        if (read_output.read(1) != ""):
+          more_to_read = True
+      except IOError:
+        write_screen_log("\n           ==> ERROR: could not reopen output file from test.")
+        return -1
+        done = True
+    else:
+      line = line + char_read
+      pointer = pointer + 1
+  # Now we are done, so write out any remaining data in the file:
+  # This should only happen if the process exited with an error.
+  os.fsync(output_fd)
+  while (read_output.read(1) != ""):
+    log_file.write(read_output.read(1))
+  # Return the total number of failures
+  if (p.returncode == 0 and failures_this_run > 0):
+   write_screen_log("\n           ==> ERROR: Test returned 0, but number of FAILED lines reported is " + str(failures_this_run) +".")
+   return failures_this_run
+  return p.returncode
+
+
+def run_tests(tests) :
+  global curent_directory
+  global process_pid
+  # Run the tests
+  failures = 0
+  previous_test = None
+  test_number = 1
+  for test in tests:
+   # Print the name of the test we're running and the time
+   (test_name, test_dir) = test
+   if (test_dir != previous_test):
+    print("==========   " + test_dir)
+    log_file.write("========================================================================================\n")
+    log_file.write("========================================================================================\n")
+    log_file.write("(" + get_time() + ")     Running Tests: " + test_dir +"\n")
+    log_file.write("========================================================================================\n")
+    log_file.write("========================================================================================\n")
+    previous_test = test_dir
+   print("("+get_time()+")     BEGIN  " + test_name.ljust(40) +": "),
+   log_file.write("     ----------------------------------------------------------------------------------------\n")
+   log_file.write("     (" + get_time() + ")     Running Sub Test: " + test_name + "\n")
+   log_file.write("     ----------------------------------------------------------------------------------------\n")
+   log_file.flush()
+   sys.stdout.flush()
+
+   # Run the test
+   result = 0
+   start_time = time.time()
+   try:
+    process_pid = 0
+    result = run_test_checking_output(current_directory, test_dir, log_file)
+   except KeyboardInterrupt:
+    # Catch an interrupt from the user
+    write_screen_log("\nFAILED: Execution interrupted.  Killing test process, but not aborting full test run.")
+    os.kill(process_pid, 9)
+    answer = raw_input("Abort all tests? (y/n)")
+    if (answer.find("y") != -1):
+     write_screen_log("\nUser chose to abort all tests.")
+     log_file.close()
+     sys.exit(-1)
+    else:
+     write_screen_log("\nUser chose to continue with other tests. Reporting this test as failed.")
+     result = 1
+   run_time = (time.time() - start_time)
+
+   # Move print the finish status
+   if (result == 0):
+    print("("+get_time()+")     PASSED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"),
+   else:
+    print("("+get_time()+")     FAILED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"),
+
+   test_number = test_number + 1
+   log_file.write("     ----------------------------------------------------------------------------------------\n")
+   log_file.flush()
+
+   print("")
+   if (result != 0):
+    log_file.write("  *******************************************************************************************\n")
+    log_file.write("  *  ("+get_time()+")     Test " + test_name + " ==> FAILED: " + str(result)+"\n")
+    log_file.write("  *******************************************************************************************\n")
+    failures = failures + 1
+   else:
+    log_file.write("     ("+get_time()+")     Test " + test_name +" passed in " + str(run_time) + "s\n")
+
+   log_file.write("     ----------------------------------------------------------------------------------------\n")
+   log_file.write("\n")
+  return failures
+
+
+
+
+
+# ########################
+# Begin OpenCL conformance run script
+# ########################
+
+if (len(sys.argv) < 2):
+ write_help_info()
+ sys.exit(-1)
+
+
+current_directory = os.getcwd()
+# Open the log file
+for arg in sys.argv:
+ match = re.search("log=(\S+)", arg)
+ if (match):
+  log_file_name = match.group(1).rstrip('/') + os.sep + log_file_name
+try:
+ log_file = open(log_file_name, "w")
+except IOError:
+ print "Could not open log file " + log_file_name
+
+# Determine which devices to test
+device_types = ["CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_ALL"]
+devices_to_test = []
+for device in device_types:
+ if device in sys.argv[2:]:
+  devices_to_test.append(device)
+if (len(devices_to_test) == 0):
+ devices_to_test = ["CL_DEVICE_TYPE_DEFAULT"]
+write_screen_log("Testing on: " + str(devices_to_test))
+
+# Get the tests
+tests = get_tests(sys.argv[1], devices_to_test)
+
+# If tests are specified on the command line then run just those ones
+tests_to_use = []
+num_of_patterns_to_match = 0
+for arg in sys.argv[2:]:
+ if arg in device_types:
+  continue
+ if re.search("log=(\S+)", arg):
+  continue
+ num_of_patterns_to_match = num_of_patterns_to_match + 1
+ found_it = False
+ for test in tests:
+  (test_name, test_dir) = test
+  if (test_name.find(arg) != -1 or test_dir.find(arg) != -1):
+   found_it = True
+   if (test not in tests_to_use):
+    tests_to_use.append(test)
+ if (found_it == False):
+  print("Failed to find a test matching " + arg)
+if (len(tests_to_use) == 0):
+ if (num_of_patterns_to_match > 0):
+  print("FAILED: Failed to find any tests matching the given command-line options.")
+  print("")
+  write_help_info()
+  sys.exit(-1)
+else:
+ tests = tests_to_use[:]
+
+write_screen_log("Test execution arguments: " + str(sys.argv))
+write_screen_log("Logging to file " + log_file_name +".")
+write_screen_log("Loaded tests from " + sys.argv[1] + ", total of " + str(len(tests)) + " tests selected to run:")
+for (test_name, test_command) in tests:
+ write_screen_log(test_name.ljust(50) + " (" + test_command +")")
+
+# Run the tests
+total_failures = 0
+for device_to_test in devices_to_test:
+ os.environ['CL_DEVICE_TYPE'] = device_to_test
+ write_screen_log("========================================================================================")
+ write_screen_log("========================================================================================")
+ write_screen_log(("Setting CL_DEVICE_TYPE to " + device_to_test).center(90))
+ write_screen_log("========================================================================================")
+ write_screen_log("========================================================================================")
+ failures = run_tests(tests)
+ write_screen_log("========================================================================================")
+ if (failures == 0):
+  write_screen_log(">> TEST on " + device_to_test + " PASSED")
+ else:
+  write_screen_log(">> TEST on " + device_to_test + " FAILED (" + str(failures) + " FAILURES)")
+ write_screen_log("========================================================================================")
+ total_failures = total_failures + failures
+
+write_screen_log("("+get_time()+") Testing complete.  " + str(total_failures) + " failures for " + str(len(tests)) + " tests.")
+log_file.close()
diff --git a/test_conformance/select/CMakeLists.txt b/test_conformance/select/CMakeLists.txt
new file mode 100644
index 00000000..6f25c466
--- /dev/null
+++ b/test_conformance/select/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(MODULE_NAME SELECT)
+
+set(${MODULE_NAME}_SOURCES
+        test_select.c
+        util_select.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/select/Jamfile b/test_conformance/select/Jamfile
new file mode 100644
index 00000000..6448048e
--- /dev/null
+++ b/test_conformance/select/Jamfile
@@ -0,0 +1,21 @@
+project
+    : requirements
+      -<library>/harness//harness <use>/harness//harness
+#      <toolset>gcc:<cflags>-xc
+      <toolset>msvc:<cflags>"/TP" 
+    ;
+
+exe test_select
+    : test_select.c
+      util_select.c
+      /harness//mt19937.c
+      /harness//kernelHelpers.c
+      /harness//errorHelpers.c
+    : <target-os>windows:<source>/harness//msvc9.c
+    ;
+
+install dist
+    : test_select
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/select
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/select
+    ;
diff --git a/test_conformance/select/Makefile b/test_conformance/select/Makefile
new file mode 100644
index 00000000..c89d5ff3
--- /dev/null
+++ b/test_conformance/select/Makefile
@@ -0,0 +1,33 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = test_select.c util_select.c ../../test_common/harness/mt19937.c ../../test_common/harness/kernelHelpers.c ../../test_common/harness/errorHelpers.c
+
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(abspath $(SRCS))
+HEADERS =  test_select.h
+TARGET = test_select
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Os -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
+LIBRARIES = -framework OpenCL ${ATF}
+
+OBJECTS = test_select.o util_select.o mt19937.o kernelHelpers.o errorHelpers.o
+TARGETOBJECT =
+all: $(TARGET)
+
+$(OBJECTS): $(FRAMEWORK) $(HEADERS)
+	$(CC) $(CFLAGS) $(INCLUDE) $(FRAMEWORK)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/select/test_select.c b/test_conformance/select/test_select.c
new file mode 100644
index 00000000..5eec6a3d
--- /dev/null
+++ b/test_conformance/select/test_select.c
@@ -0,0 +1,755 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#if ! defined( _WIN32)
+#if ! defined( __ANDROID__ )
+#include <sys/sysctl.h>
+#endif
+#endif
+#include <limits.h>
+#include "test_select.h"
+
+
+#include "../../test_common/harness/testHarness.h"
+
+
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/mt19937.h"
+#include "../../test_common/harness/parseParameters.h"
+
+
+//-----------------------------------------
+// Static functions
+//-----------------------------------------
+
+// initialize src1 and src2 buffer with values based on stype
+static void initSrcBuffer(void* src1, Type stype, MTdata);
+
+// initialize the valued used to compare with in the select with
+// vlaues [start, count)
+static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count);
+
+// make a program that uses select for the given stype (src/dest type),
+// ctype (comparison type), veclen (vector length)
+static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type stype, Type ctype, size_t veclen );
+
+// Creates and execute the select test for the given device, context,
+// stype (source/dest type), cmptype (comparison type), using max_tg_size
+// number of threads. It runs test for all the different vector lengths
+// for the given stype and cmptype.
+static int doTest(cl_command_queue queue, cl_context context,
+                  Type stype, Type cmptype, cl_device_id device);
+
+//-----------------------------------------
+// Definitions and initializations
+//-----------------------------------------
+
+// Define the buffer size that we want to block our test with
+#define BUFFER_SIZE (1024*1024)
+#define KPAGESIZE 4096
+
+
+// When we indicate non wimpy mode, the types that are 32 bits value will
+// test their entire range and 64 bits test will test the 32 bit
+// range.  Otherwise, we test a subset of the range
+// [-min_short, min_short]
+static bool  s_wimpy_mode = false;
+
+// Tests are broken into the major test which is based on the
+// src and cmp type and their corresponding vector types and
+// sub tests which is for each individual test.  The following
+// tracks the subtests
+int s_test_cnt = 0;
+int s_test_fail = 0;
+
+//-----------------------------------------
+// Static helper functions
+//-----------------------------------------
+
+// calculates log2 for a 32 bit number
+int int_log2(size_t value) {
+    if( 0 == value )
+        return INT_MIN;
+
+#if defined( __GNUC__ )
+    return (unsigned) (8*sizeof(size_t) - 1UL - __builtin_clzl(value));
+#else
+    int result = -1;
+    while(value)
+    {
+        result++;
+        value >>= 1;
+    }
+    return result;
+#endif
+}
+
+
+static void initSrcBuffer(void* src1, Type stype, MTdata d)
+{
+    unsigned int* s1 = (unsigned int *)src1;
+    size_t i;
+
+    for ( i=0 ; i < BUFFER_SIZE/sizeof(cl_int); i++)
+        s1[i]   = genrand_int32(d);
+}
+
+static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count) {
+    int i;
+    assert(cmptype != kfloat);
+    switch (type_size[cmptype]) {
+        case 1: {
+            uint8_t* ub = (uint8_t *)cmp;
+            for (i=0; i < count; ++i)
+                ub[i] = (uint8_t)start++;
+            break;
+        }
+        case 2: {
+            uint16_t* us = (uint16_t *)cmp;
+            for (i=0; i < count; ++i)
+                us[i] = (uint16_t)start++;
+            break;
+        }
+        case 4: {
+            if (!s_wimpy_mode) {
+                uint32_t* ui = (uint32_t *)cmp;
+                for (i=0; i < count; ++i)
+                    ui[i] = (uint32_t)start++;
+            }
+            else {
+                // The short test doesn't iterate over the entire 32 bit space so
+                // we alternate between positive and negative values
+                int32_t* ui = (int32_t *)cmp;
+                int32_t sign = 1;
+                for (i=0; i < count; ++i, ++start) {
+                    ui[i] = (int32_t)start*sign;
+                    sign = sign * -1;
+                }
+            }
+            break;
+        }
+        case 8: {
+            // We don't iterate over the entire space of 64 bit so for the
+            // selects, we want to test positive and negative values
+            int64_t* ll = (int64_t *)cmp;
+            int64_t sign = 1;
+            for (i=0; i < count; ++i, ++start) {
+                ll[i] = start*sign;
+                sign = sign * -1;
+            }
+            break;
+        }
+        default:
+            log_error("invalid cmptype %s\n",type_name[cmptype]);
+    } // end switch
+}
+
+// Make the various incarnations of the program we want to run
+//  stype: source and destination type for the select
+//  ctype: compare type
+static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type srctype, Type cmptype, size_t vec_len)
+{
+    char testname[256];
+    char stypename[32];
+    char ctypename[32];
+    char extension[128] = "";
+    int  err = 0;
+
+    int i; // generic, re-usable loop variable
+
+    const char *source[] = {
+        extension,
+        "__kernel void ", testname,
+        "(__global ", stypename, " *dest, __global ", stypename, " *src1,\n __global ",
+        stypename, " *src2, __global ",  ctypename, " *cmp)\n",
+        "{\n"
+        "   size_t tid = get_global_id(0);\n"
+        "   if( tid < get_global_size(0) )\n"
+        "       dest[tid] = select(src1[tid], src2[tid], cmp[tid]);\n"
+        "}\n"
+    };
+
+
+    const char *sourceV3[] = {
+        extension,
+        "__kernel void ", testname,
+        "(__global ", stypename, " *dest, __global ", stypename, " *src1,\n __global ",
+        stypename, " *src2, __global ",  ctypename, " *cmp)\n",
+        "{\n"
+        "   size_t tid = get_global_id(0);\n"
+        "   size_t size = get_global_size(0);\n"
+        "   if( tid + 1 < size ) // can't run off the end\n"
+        "       vstore3( select( vload3(tid, src1), vload3(tid, src2), vload3(tid, cmp)), tid, dest );\n"
+        "   else if(tid + 1 == size)\n"
+        "   {\n"
+        // If the size is odd, then we have odd * 3 elements, which is an odd number of scalars in the array
+        // If the size is even, then we have even * 3 elements, which is an even number of scalars in the array
+        // 3 will never divide evenly into a power of two sized buffer, so the last vec3 will overhang by 1 or 2.
+        //  The only even number x in power_of_two < x <= power_of_two+2 is power_of_two+2.
+        //  The only odd number x in power_of_two < x <= power_of_two+2 is power_of_two+1.
+        // Therefore, odd sizes overhang the end of the array by 1, and even sizes overhang by 2.
+        "       size_t leftovers = 1 + (size & 1);\n"
+        "       ", stypename, "3 a, b; \n"
+        "       ", ctypename, "3 c;\n"
+        "       switch( leftovers )  \n"
+        "       {\n"
+        "           case 2:\n"
+        "               a.y = src1[3*tid+1];\n"
+        "               b.y = src2[3*tid+1];\n"
+        "               c.y = cmp[3*tid+1];\n"
+        "           // fall through \n"
+        "           case 1:\n"
+        "               a.x = src1[3*tid];\n"
+        "               b.x = src2[3*tid];\n"
+        "               c.x = cmp[3*tid];\n"
+        "               break;\n"
+        "       }\n"
+        "       a = select( a, b, c );\n"
+        "       switch( leftovers )  \n"
+        "       {\n"
+        "           case 2:\n"
+        "               dest[3*tid+1] = a.y;\n"
+        "           // fall through \n"
+        "           case 1:\n"
+        "               dest[3*tid] = a.x;\n"
+        "               break;\n"
+        "       }\n"
+        "   }\n"
+        "}\n"
+    };
+
+    if (srctype == kdouble)
+        strcpy( extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" );
+
+    // create type name and testname
+    switch( vec_len )
+    {
+        case 1:
+            strncpy(stypename, type_name[srctype], sizeof(stypename));
+            strncpy(ctypename, type_name[cmptype], sizeof(ctypename));
+            snprintf(testname, sizeof(testname), "select_%s_%s", stypename, ctypename );
+            log_info("Building %s(%s, %s, %s)\n", testname, stypename, stypename, ctypename);
+            break;
+        case 3:
+            strncpy(stypename, type_name[srctype], sizeof(stypename));
+            strncpy(ctypename, type_name[cmptype], sizeof(ctypename));
+            snprintf(testname, sizeof(testname), "select_%s3_%s3", stypename, ctypename );
+            log_info("Building %s(%s3, %s3, %s3)\n", testname, stypename, stypename, ctypename);
+            break;
+        case 2:
+        case 4:
+        case 8:
+        case 16:
+            snprintf(stypename,sizeof(stypename), "%s%d", type_name[srctype],(int)vec_len);
+            snprintf(ctypename,sizeof(ctypename), "%s%d", type_name[cmptype],(int)vec_len);
+            snprintf(testname, sizeof(testname), "select_%s_%s", stypename, ctypename );
+            log_info("Building %s(%s, %s, %s)\n", testname, stypename, stypename, ctypename);
+            break;
+        default:
+            log_error( "Unkown vector type. Aborting...\n" );
+            exit(-1);
+            break;
+    }
+
+    /*
+     int j;
+     for( j = 0; j < sizeof( source ) / sizeof( source[0] ); j++ )
+     log_info( "%s", source[j] );
+     */
+
+    // create program
+    cl_program program;
+
+    if (create_single_kernel_helper(context, &program, kernel_ptr, (cl_uint)(vec_len == 3 ? sizeof(sourceV3) / sizeof(sourceV3[0]) : sizeof(source) / sizeof(source[0])), vec_len == 3 ? sourceV3 : source, testname))
+    {
+        log_error("Failed to build program (%d)\n", err);
+        return NULL;
+    }
+
+    return program;
+}
+
+
+#define VECTOR_SIZE_COUNT   6
+
+static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device)
+{
+    int err = CL_SUCCESS;
+    MTdata    d;
+    const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
+    cl_mem src1 = NULL;
+    cl_mem src2 = NULL;
+    cl_mem cmp = NULL;
+    cl_mem dest = NULL;
+    void *ref = NULL;
+    void *sref = NULL;
+
+    cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
+    size_t block_elements = BUFFER_SIZE / type_size[stype];
+    size_t step = s_wimpy_mode ? 256 : 1;
+    cl_ulong cmp_stride = block_elements * step;
+
+    // It is more efficient to create the tests all at once since we
+    // use the same test data on each of the vector sizes
+    int vecsize;
+    cl_program programs[VECTOR_SIZE_COUNT];
+    cl_kernel  kernels[VECTOR_SIZE_COUNT];
+
+    if(stype == kdouble && ! is_extension_available( device, "cl_khr_fp64" ))
+    {
+        log_info("Skipping double because cl_khr_fp64 extension is not supported.\n");
+        return 0;
+    }
+
+    if (gIsEmbedded)
+    {
+       if (( stype == klong || stype == kulong ) && ! is_extension_available( device, "cles_khr_int64" ))
+       {
+         log_info("Long types unsupported, skipping.");
+         return 0;
+       }
+
+       if (( cmptype == klong || cmptype == kulong ) && ! is_extension_available( device, "cles_khr_int64" ))
+       {
+         log_info("Long types unsupported, skipping.");
+         return 0;
+       }
+    }
+
+    for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize)
+    {
+        programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] );
+        if (!programs[vecsize] || !kernels[vecsize]) {
+            ++s_test_fail;
+            return -1;
+        }
+    }
+
+    ref = malloc( BUFFER_SIZE );
+    if( NULL == ref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; }
+    sref = malloc( BUFFER_SIZE );
+    if( NULL == sref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; }
+    src1 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err );
+    if( err ) { log_error( "Error: could not allocate src1 buffer\n" );  ++s_test_fail; goto exit; }
+    src2 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err );
+    if( err ) { log_error( "Error: could not allocate src2 buffer\n" );  ++s_test_fail; goto exit; }
+    cmp = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err );
+    if( err ) { log_error( "Error: could not allocate cmp buffer\n" );  ++s_test_fail; goto exit; }
+    dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err );
+    if( err ) { log_error( "Error: could not allocate dest buffer\n" );  ++s_test_fail; goto exit; }
+
+
+    // We block the test as we are running over the range of compare values
+    // "block the test" means "break the test into blocks"
+    if( type_size[stype] == 4 )
+        cmp_stride = block_elements * step * (0x100000000ULL / 0x100000000ULL);
+    if( type_size[stype] == 8 )
+        cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1);
+
+    log_info("Testing...");
+    d = init_genrand( gRandomSeed );
+    uint64_t i;
+    for (i=0; i < blocks; i+=step)
+    {
+        void *s1 = clEnqueueMapBuffer( queue, src1, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
+        if( err ){ log_error( "Error: Could not map src1" ); goto exit; }
+        // Setup the input data to change for each block
+        initSrcBuffer( s1, stype, d);
+
+        void *s2 = clEnqueueMapBuffer( queue, src2, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
+        if( err ){ log_error( "Error: Could not map src2" ); goto exit; }
+        // Setup the input data to change for each block
+        initSrcBuffer( s2, stype, d);
+
+        void *s3 = clEnqueueMapBuffer( queue, cmp, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
+        if( err ){ log_error( "Error: Could not map cmp" ); goto exit; }
+        // Setup the input data to change for each block
+        initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements);
+
+        // Create the reference result
+        Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1];
+        (*sfunc)(ref, s1, s2, s3, block_elements);
+
+        sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1];
+        (*sfunc)(sref, s1, s2, s3, block_elements);
+
+        if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL )))
+        { log_error( "Error: coult not unmap src1\n" );  ++s_test_fail; goto exit; }
+        if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL )))
+        { log_error( "Error: coult not unmap src2\n" );  ++s_test_fail; goto exit; }
+        if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL )))
+        { log_error( "Error: coult not unmap cmp\n" );  ++s_test_fail; goto exit; }
+
+        for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize)
+        {
+            size_t vector_size = element_count[vecsize] * type_size[stype];
+            size_t vector_count =  (BUFFER_SIZE + vector_size - 1) / vector_size;
+
+            if((err = clSetKernelArg(kernels[vecsize], 0,  sizeof dest, &dest) ))
+            { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
+            if((err = clSetKernelArg(kernels[vecsize], 1,  sizeof src1, &src1) ))
+            { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
+            if((err = clSetKernelArg(kernels[vecsize], 2,  sizeof src2, &src2) ))
+            { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
+            if((err = clSetKernelArg(kernels[vecsize], 3,  sizeof cmp, &cmp) ))
+            { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
+
+
+            // Wipe destination
+            void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
+            if( err ){ log_error( "Error: Could not map dest" );  ++s_test_fail; goto exit; }
+            memset( d, -1, BUFFER_SIZE );
+            if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ){ log_error( "Error: Could not unmap dest" ); ++s_test_fail; goto exit; }
+
+            err = clEnqueueNDRangeKernel(queue, kernels[vecsize], 1, NULL, &vector_count, NULL, 0, NULL, NULL);
+            if (err != CL_SUCCESS) {
+                log_error("clEnqueueNDRangeKernel failed errcode:%d\n", err);
+                ++s_test_fail;
+                goto exit;
+            }
+
+            d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_READ, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
+            if( err ){ log_error( "Error: Could not map dest # 2" );  ++s_test_fail; goto exit; }
+
+            if ((*checkResults[stype])(d, vecsize == 0 ? sref : ref, block_elements, element_count[vecsize])!=0){
+                log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i);
+                ++s_test_fail;
+                goto exit;
+            }
+
+            if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) )
+            {
+                log_error( "Error: Could not unmap dest" );
+                ++s_test_fail;
+                goto exit;
+            }
+        } // for vecsize
+    } // for i
+
+    if (!s_wimpy_mode)
+        log_info(" Passed\n\n");
+    else
+        log_info(" Wimpy Passed\n\n");
+
+exit:
+    if( src1 )  clReleaseMemObject( src1 );
+    if( src2 )  clReleaseMemObject( src2 );
+    if( cmp )   clReleaseMemObject( cmp );
+    if( dest)   clReleaseMemObject( dest );
+    if( ref )   free(ref );
+    if( sref )  free(sref );
+
+    free_mtdata(d);
+    for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) {
+        clReleaseKernel(kernels[vecsize]);
+        clReleaseProgram(programs[vecsize]);
+    }
+    ++s_test_cnt;
+    return err;
+}
+
+static void printUsage( void )
+{
+    log_info("test_select:  [-cghw] [test_name|start_test_num] \n");
+    log_info("  default is to run the full test on the default device\n");
+    log_info("  -w run in wimpy mode (smoke test)\n");
+    log_info("  test_name will run only one test of that name\n");
+    log_info("  start_test_num will start running from that num\n");
+}
+
+static void printArch( void )
+{
+    log_info( "sizeof( void*) = %d\n", (int) sizeof( void *) );
+
+#if defined( __APPLE__ )
+
+#if defined( __ppc__ )
+    log_info( "ARCH:\tppc\n" );
+#elif defined( __ppc64__ )
+    log_info( "ARCH:\tppc64\n" );
+#elif defined( __i386__ )
+    log_info( "ARCH:\ti386\n" );
+#elif defined( __x86_64__ )
+    log_info( "ARCH:\tx86_64\n" );
+#elif defined( __arm__ )
+    log_info( "ARCH:\tarm\n" );
+#else
+#error unknown arch
+#endif
+
+    int type = 0;
+    size_t typeSize = sizeof( type );
+    sysctlbyname( "hw.cputype", &type, &typeSize, NULL, 0 );
+    log_info( "cpu type:\t%d\n", type );
+    typeSize = sizeof( type );
+    sysctlbyname( "hw.cpusubtype", &type, &typeSize, NULL, 0 );
+    log_info( "cpu subtype:\t%d\n", type );
+
+#endif
+}
+
+
+
+
+//-----------------------------------------
+// main
+//-----------------------------------------
+int main(int argc, const char* argv[]) {
+    int i;
+    cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_platform_id platform_id;
+    long           test_start_num = 0;   // start test number
+    const char*    exec_testname = NULL;
+    cl_device_id      device_id;
+    uint32_t       device_frequency = 0;
+    uint32_t       compute_devices = 0;
+
+
+    test_start();
+
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
+    {
+        test_finish();
+        return -1;
+    }
+
+    // Maybe we want turn off sleep
+
+    // Check the environmental to see if there is device preference
+    char *device_env = getenv("CL_DEVICE_TYPE");
+    if (device_env != NULL) {
+        if( strcmp( device_env, "gpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( device_env, "cpu" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( device_env, "accelerator" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( device_env, "default" ) == 0 || strcmp( device_env, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            device_type = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            log_error( "Unknown CL_DEVICE_TYPE environment variable: %s.\nAborting...\n", device_env );
+            abort();
+        }
+    }
+
+    // Check for the wimpy mode environment variable
+    if (getenv("CL_WIMPY_MODE")) {
+      log_info("*** Detected CL_WIMPY_MODE env\n");
+      s_wimpy_mode = 1;
+    }
+
+    // Determine if we want to run a particular test or if we want to
+    // start running from a certain point and if we want to run on cpu/gpu
+    // usage: test_selects [test_name] [start test num] [run_long]
+    // default is to run all tests on the gpu and be short
+    // test names are of the form select_[src/dest type]_[cmp_type]
+    // In the long test, we run the full range for any type >= 32 bits
+    // and 32 bits subset for the 64 bit value.
+    for (i=1; i < argc; ++i) {
+        const char *arg = argv[i];
+        if (arg == NULL)
+            break;
+
+        if (arg[0] == '-')
+        {
+            arg++;
+            while(*arg != '\0')
+            {
+                switch(*arg) {
+                    case 'h':
+                        printUsage();
+                        return 0;
+                    case 'w':  // Wimpy mode
+                        s_wimpy_mode = true;
+                        break;
+                    default:
+                        log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
+                        printUsage();
+                        return 0;
+                }
+                arg++;
+            }
+        }
+        else {
+            char* t = NULL;
+            long num = strtol(argv[i], &t, 0);
+            if (t != argv[i])
+                test_start_num = num;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_CPU" ) )
+                device_type = CL_DEVICE_TYPE_CPU;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_GPU" ) )
+                device_type = CL_DEVICE_TYPE_GPU;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_ACCELERATOR" ) )
+                device_type = CL_DEVICE_TYPE_ACCELERATOR;
+            else if( 0 == strcmp( argv[i], "CL_DEVICE_TYPE_DEFAULT" ) )
+                device_type = CL_DEVICE_TYPE_DEFAULT;
+            else if( 0 == strcmp( argv[i], "randomize" ) ) {
+                gRandomSeed = (cl_uint) time( NULL );
+                log_info("\nRandom seed: %u.\n", gRandomSeed );
+            } else {
+                // assume it is a test name that we want to execute
+                exec_testname = argv[i];
+            }
+        }
+    }
+
+
+    int err;
+
+    // Get platform
+    err = clGetPlatformIDs(1, &platform_id, NULL);
+    checkErr(err,"clGetPlatformIDs failed");
+
+    // Get Device information
+    err = clGetDeviceIDs(platform_id, device_type, 1, &device_id, 0);
+    checkErr(err,"clGetComputeDevices");
+
+    err =  clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, NULL);
+    checkErr(err,"clGetComputeConfigInfo 1");
+
+    size_t config_size = sizeof( device_frequency );
+#if MULTITHREAD
+    if( (err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, config_size, &compute_devices, NULL )) )
+#endif
+        compute_devices = 1;
+
+    config_size = sizeof(device_frequency);
+    if((err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, config_size, &device_frequency, NULL )))
+        device_frequency = 1;
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    if( (err = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) ){}
+    else if( strstr(profile, "EMBEDDED_PROFILE" ) )
+    {
+        gIsEmbedded = 1;
+    }
+
+
+    log_info( "\nCompute Device info:\n" );
+    log_info( "\tProcessing with %d devices\n", compute_devices );
+    log_info( "\tDevice Frequency: %d MHz\n", device_frequency );
+
+    printDeviceHeader( device_id );
+    printArch();
+
+    log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
+    if (s_wimpy_mode) {
+        log_info("\n");
+        log_info("*** WARNING: Testing in Wimpy mode!                     ***\n");
+        log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n");
+        log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n");
+    }
+
+    cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL);
+    checkNull(context, "clCreateContext");
+
+    cl_command_queue queue = clCreateCommandQueueWithProperties(context, device_id, 0, NULL);
+    checkNull(queue, "clCreateCommandQueue");
+
+
+    if (exec_testname) {
+        // Parse name
+        // Skip the first part of the name
+        bool success = false;
+        if (strncmp(exec_testname, "select_", 7) == 0) {
+            int i;
+            Type src_type = kTypeCount;
+            Type cmp_type = kTypeCount;
+            char* sptr = (char *)strchr(exec_testname, '_');
+            if (sptr) {
+                for (++sptr, i=0; i < kTypeCount; i++) {
+                    if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) {
+                        src_type = (Type)i;
+                        break;
+                    }
+                }
+                sptr = strchr(sptr, '_');
+                if (sptr) {
+                    for (++sptr, i=0; i < kTypeCount; i++) {
+                        if (strncmp(sptr, type_name[i], strlen(type_name[i])) == 0) {
+                            cmp_type = (Type)i;
+                            break;
+                        }
+                    }
+                }
+            }
+            if (src_type != kTypeCount && cmp_type != kTypeCount) {
+                success = true;
+                log_info("Testing only select_%s_%s\n",
+                         type_name[src_type], type_name[cmp_type]);
+                if (doTest(queue, context, src_type, cmp_type, device_id) != 0)
+                    log_error("*** select_%s_%s FAILED ***\n\n",
+                              type_name[src_type], type_name[cmp_type]);
+            }
+        }
+        if (!success) {
+            log_error("can not find test:%s", exec_testname);
+            return -1;
+        }
+    }
+    else {
+        int src_type, j;
+        int test_num;
+        test_num = 0;
+        for (src_type = 0; src_type < kTypeCount; ++src_type) {
+            for (j = 0; j < 2; ++j) {
+                Type cmp_type = ctype[src_type][j];
+                if (++test_num < test_start_num) {
+                    log_info("%d) skipping select_%s_%s\n", test_num,
+                             type_name[src_type], type_name[cmp_type]);
+                }
+                else {
+                    log_info("%d) Testing select_%s_%s\n",
+                             test_num, type_name[src_type], type_name[cmp_type]);
+                    if (doTest(queue, context, (Type)src_type, cmp_type, device_id) != 0)
+                        log_error("*** %d) select_%s_%s FAILED ***\n\n", test_num,
+                                  type_name[src_type], type_name[cmp_type]);
+                }
+            }
+        }
+    }
+
+    int error = clFinish(queue);
+    if (error) {
+        log_error("clFinish failed: %d\n", error);
+    }
+
+    clReleaseContext(context);
+    clReleaseCommandQueue(queue);
+
+    if (s_test_fail == 0) {
+        if (s_test_cnt > 1)
+            log_info("PASSED %d of %d tests.\n", s_test_cnt, s_test_cnt);
+        else
+            log_info("PASSED test.\n");
+    } else if (s_test_fail > 0) {
+        if (s_test_cnt > 1)
+            log_error("FAILED %d of %d tests.\n", s_test_fail, s_test_cnt);
+        else
+            log_error("FAILED test.\n");
+    }
+
+    test_finish();
+    return s_test_fail;
+}
diff --git a/test_conformance/select/test_select.h b/test_conformance/select/test_select.h
new file mode 100644
index 00000000..a0edcf29
--- /dev/null
+++ b/test_conformance/select/test_select.h
@@ -0,0 +1,117 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef TESTSELECTS_INCLUDED_H
+#define TESTSELECTS_INCLUDED_H
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+// Enable the test to be used with ATF
+#if USE_ATF
+// export BUILD_WITH_ATF=1
+#include <ATF/ATF.h>
+#define test_start() ATFTestStart()
+#define log_info ATFLogInfo
+#define log_error ATFLogError
+#define test_finish() ATFTestFinish()
+#else
+#define test_start()
+#define log_info printf
+#define log_error printf
+#define test_finish()
+#endif // USE_ATF
+
+
+// Defines the set of types we support (no support for double)
+typedef enum {
+    kuchar = 0,
+    kchar = 1,
+    kushort = 2,
+    kshort = 3,
+    kuint = 4,
+    kint = 5,
+    kfloat = 6,
+    kulong = 7,
+    klong = 8,
+    kdouble = 9,
+    kTypeCount  // always goes last
+} Type;
+
+
+// Support max vector size of 16
+#define kVectorSizeCount    6
+#define kMaxVectorSize      16
+
+
+// Type names and their sizes in bytes
+extern const char *type_name[kTypeCount];
+extern const size_t type_size[kTypeCount];
+
+// Associated comparison types
+extern const Type ctype[kTypeCount][2];
+
+// Reference functions for the primitive (non vector) type
+typedef void (*Select)(void *dest, void *src1, void *src2, void *cmp, size_t c);
+extern Select refSelects[kTypeCount][2];
+
+// Reference functions for the primtive type but uses the vector
+// definition of true and false
+extern Select vrefSelects[kTypeCount][2];
+
+// Check functions for each output type
+typedef size_t (*CheckResults)(void *out1, void *out2, size_t count, size_t vectorSize);
+extern CheckResults checkResults[kTypeCount];
+
+// Helpful macros
+
+// The next three functions check on different return values.  Returns -1
+// if the check failed
+#define checkErr(err, msg)                \
+    if (err != CL_SUCCESS) {                \
+    log_error("%s failed errcode:%d\n", msg, err);    \
+    return -1;                    \
+    }
+
+#define checkZero(val, msg)                \
+    if (val == 0) {                    \
+    log_error("%s failed errcode:%d\n", msg, err);    \
+    return -1;                    \
+    }
+
+#define checkNull(ptr, msg)            \
+    if (!ptr) {                    \
+    log_error("%s failed\n", msg);        \
+    return -1;                \
+    }
+
+// When a helper returns a negative one, we want to return from main
+// with negative one. This helper prevents me from having to write
+// this multiple time
+#define checkHelperErr(err)            \
+    if (err == -1) {                \
+    return err;                \
+    }
+
+
+#endif // TESTSELECTS_INCLUDED_H
diff --git a/test_conformance/select/util_select.c b/test_conformance/select/util_select.c
new file mode 100644
index 00000000..59a9dc82
--- /dev/null
+++ b/test_conformance/select/util_select.c
@@ -0,0 +1,734 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include "test_select.h"
+
+
+//-----------------------------------------
+// Definitions and initializations
+//-----------------------------------------
+
+
+const char *type_name[kTypeCount] = {
+    "uchar", "char",
+    "ushort", "short",
+    "uint",   "int",
+    "float",  "ulong", "long", "double" };
+
+const size_t type_size[kTypeCount] = {
+    sizeof(cl_uchar), sizeof(cl_char),
+    sizeof(cl_ushort), sizeof(cl_short),
+    sizeof(cl_uint), sizeof(cl_int),
+    sizeof(cl_float), sizeof(cl_ulong), sizeof(cl_long), sizeof( cl_double ) };
+
+const Type ctype[kTypeCount][2] = {
+    { kuchar,  kchar },     // uchar
+    { kuchar,  kchar },     // char
+    { kushort, kshort},     // ushort
+    { kushort, kshort},     // short
+    { kuint,   kint  },     // uint
+    { kuint,   kint  },     // int
+    { kuint,   kint  },     // float
+    { kulong,  klong },     // ulong
+    { kulong,  klong },     // long
+    { kulong,  klong }     // double
+};
+
+
+//-----------------------------------------
+// Reference functions
+//-----------------------------------------
+
+void refselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_char *d, *x, *y, *m;
+    d = (cl_char*) dest;
+    x = (cl_char*) src1;
+    y = (cl_char*) src2;
+    m = (cl_char*) cmp;
+    for (i=0; i < count; ++i) {
+        d[i] = m[i] ? y[i] : x[i];
+    }
+}
+
+void refselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uchar *d, *x, *y;
+    cl_char *m;
+    d = (cl_uchar*) dest;
+    x = (cl_uchar*) src1;
+    y = (cl_uchar*) src2;
+    m = (cl_char*) cmp;
+    for (i=0; i < count; ++i) {
+        d[i] = m[i] ? y[i] : x[i];
+    }
+}
+
+void refselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_short *d, *x, *y, *m;
+    d = (cl_short*) dest;
+    x = (cl_short*) src1;
+    y = (cl_short*) src2;
+    m = (cl_short*) cmp;
+
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ushort *d, *x, *y;
+    cl_short *m;
+    d = (cl_ushort*) dest;
+    x = (cl_ushort*) src1;
+    y = (cl_ushort*) src2;
+    m = (cl_short*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_int *d, *x, *y, *m;
+    d = (cl_int*)dest;
+    x = (cl_int*)src1;
+    y = (cl_int*)src2;
+    m = (cl_int*)cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){
+    size_t i;
+    cl_uint *d, *x, *y;
+    cl_int *m;
+    d = (cl_uint*)dest;
+    x = (cl_uint*)src1;
+    y = (cl_uint*)src2;
+    m = (cl_int*)cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_long *d, *x, *y, *m;
+    d = (cl_long*) dest;
+    x = (cl_long*) src1;
+    y = (cl_long*) src2;
+    m = (cl_long*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ulong *d, *x, *y;
+    cl_long *m;
+    d = (cl_ulong*) dest;
+    x = (cl_ulong*) src1;
+    y = (cl_ulong*) src2;
+    m = (cl_long*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_char *d, *x, *y;
+    cl_uchar *m;
+    d = (cl_char*) dest;
+    x = (cl_char*) src1;
+    y = (cl_char*) src2;
+    m = (cl_uchar*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uchar *d, *x, *y, *m;
+    d = (cl_uchar*) dest;
+    x = (cl_uchar*) src1;
+    y = (cl_uchar*) src2;
+    m = (cl_uchar*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_short *d, *x, *y;
+    cl_ushort *m;
+    d = (cl_short*) dest;
+    x = (cl_short*) src1;
+    y = (cl_short*) src2;
+    m = (cl_ushort*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ushort *d, *x, *y, *m;
+    d = (cl_ushort*) dest;
+    x = (cl_ushort*) src1;
+    y = (cl_ushort*) src2;
+    m = (cl_ushort*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_int *d, *x, *y;
+    cl_uint *m;
+    d = (cl_int*) dest;
+    x = (cl_int*) src1;
+    y = (cl_int*) src2;
+    m = (cl_uint*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uint *d, *x, *y, *m;
+    d = (cl_uint*) dest;
+    x = (cl_uint*) src1;
+    y = (cl_uint*) src2;
+    m = (cl_uint*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_long *d, *x, *y;
+    cl_ulong *m;
+    d = (cl_long*) dest;
+    x = (cl_long*) src1;
+    y = (cl_long*) src2;
+    m = (cl_ulong*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ulong *d, *x, *y, *m;
+    d = (cl_ulong*) dest;
+    x = (cl_ulong*) src1;
+    y = (cl_ulong*) src2;
+    m = (cl_ulong*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_int *d, *x, *y;
+    cl_int *m;
+    d = (cl_int*) dest;
+    x = (cl_int*) src1;
+    y = (cl_int*) src2;
+    m = (cl_int*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uint *d, *x, *y;
+    cl_uint *m;
+    d = (cl_uint*) dest;
+    x = (cl_uint*) src1;
+    y = (cl_uint*) src2;
+    m = (cl_uint*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_long *d, *x, *y;
+    cl_long *m;
+    d = (cl_long*) dest;
+    x = (cl_long*) src1;
+    y = (cl_long*) src2;
+    m = (cl_long*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_long *d, *x, *y;
+    cl_ulong *m;
+    d = (cl_long*) dest;
+    x = (cl_long*) src1;
+    y = (cl_long*) src2;
+    m = (cl_ulong*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = m[i] ? y[i] : x[i];
+}
+
+void vrefselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_char *d, *x, *y, *m;
+    d = (cl_char*) dest;
+    x = (cl_char*) src1;
+    y = (cl_char*) src2;
+    m = (cl_char*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80) ? y[i] : x[i];
+}
+
+void vrefselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uchar *d, *x, *y;
+    cl_char *m;
+    d = (cl_uchar*) dest;
+    x = (cl_uchar*) src1;
+    y = (cl_uchar*) src2;
+    m = (cl_char*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80) ? y[i] : x[i];
+}
+
+void vrefselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_short *d, *x, *y, *m;
+    d = (cl_short*) dest;
+    x = (cl_short*) src1;
+    y = (cl_short*) src2;
+    m = (cl_short*) cmp;
+
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000) ? y[i] : x[i];
+}
+
+void vrefselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ushort *d, *x, *y;
+    cl_short *m;
+    d = (cl_ushort*) dest;
+    x = (cl_ushort*)src1;
+    y = (cl_ushort*)src2;
+    m = (cl_short*)cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000) ? y[i] : x[i];
+}
+
+void vrefselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_int *d, *x, *y, *m;
+    d = (cl_int*) dest;
+    x = (cl_int*) src1;
+    y = (cl_int*) src2;
+    m = (cl_int*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80000000) ? y[i] : x[i];
+}
+
+void vrefselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){
+    size_t i;
+    cl_uint *d, *x, *y;
+    cl_int *m;
+    d = (cl_uint*) dest;
+    x = (cl_uint*) src1;
+    y = (cl_uint*) src2;
+    m = (cl_int*) cmp;
+
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80000000) ? y[i] : x[i];
+}
+
+void vrefselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_long *d, *x, *y, *m;
+    d = (cl_long*) dest;
+    x = (cl_long*) src1;
+    y = (cl_long*) src2;
+    m = (cl_long*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i];
+}
+
+void vrefselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ulong *d, *x, *y;
+    cl_long *m;
+    d = (cl_ulong*) dest;
+    x = (cl_ulong*) src1;
+    y = (cl_ulong*) src2;
+    m = (cl_long*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i];
+}
+
+void vrefselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_char *d, *x, *y;
+    cl_uchar *m;
+    d = (cl_char*) dest;
+    x = (cl_char*) src1;
+    y = (cl_char*) src2;
+    m = (cl_uchar*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80U) ? y[i] : x[i];
+}
+
+void vrefselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uchar *d, *x, *y, *m;
+    d = (cl_uchar*) dest;
+    x = (cl_uchar*) src1;
+    y = (cl_uchar*) src2;
+    m = (cl_uchar*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80U) ? y[i] : x[i];
+}
+
+void vrefselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_short *d, *x, *y;
+    cl_ushort *m;
+    d = (cl_short*) dest;
+    x = (cl_short*) src1;
+    y = (cl_short*) src2;
+    m = (cl_ushort*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000U) ? y[i] : x[i];
+}
+
+void vrefselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ushort *d, *x, *y, *m;
+    d = (cl_ushort*) dest;
+    x = (cl_ushort*) src1;
+    y = (cl_ushort*) src2;
+    m = (cl_ushort*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000U) ? y[i] : x[i];
+}
+
+void vrefselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_int *d, *x, *y;
+    cl_uint *m;
+    d = (cl_int*) dest;
+    x = (cl_int*) src1;
+    y = (cl_int*) src2;
+    m = (cl_uint*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80000000U) ? y[i] : x[i];
+}
+
+void vrefselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uint *d, *x, *y, *m;
+    d = (cl_uint*) dest;
+    x = (cl_uint*) src1;
+    y = (cl_uint*) src2;
+    m = (cl_uint*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80000000U) ? y[i] : x[i];
+}
+
+void vrefselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_long *d, *x, *y;
+    cl_ulong *m;
+    d = (cl_long*) dest;
+    x = (cl_long*) src1;
+    y = (cl_long*) src2;
+    m = (cl_ulong*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i];
+}
+
+void vrefselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ulong *d, *x, *y, *m;
+    d = (cl_ulong*) dest;
+    x = (cl_ulong*) src1;
+    y = (cl_ulong*) src2;
+    m = (cl_ulong*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i];
+}
+
+void vrefselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uint *d, *x, *y;
+    cl_int *m;
+    d = (cl_uint*) dest;
+    x = (cl_uint*) src1;
+    y = (cl_uint*) src2;
+    m = (cl_int*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80000000) ? y[i] : x[i];
+}
+
+void vrefselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_uint *d, *x, *y;
+    cl_uint *m;
+    d = (cl_uint*) dest;
+    x = (cl_uint*) src1;
+    y = (cl_uint*) src2;
+    m = (cl_uint*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x80000000U) ? y[i] : x[i];
+}
+
+void vrefselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ulong *d, *x, *y;
+    cl_long *m;
+    d = (cl_ulong*) dest;
+    x = (cl_ulong*) src1;
+    y = (cl_ulong*) src2;
+    m = (cl_long*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i];
+}
+
+void vrefselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+    size_t i;
+    cl_ulong *d, *x, *y;
+    cl_ulong *m;
+    d = (cl_ulong*) dest;
+    x = (cl_ulong*) src1;
+    y = (cl_ulong*) src2;
+    m = (cl_ulong*) cmp;
+    for (i=0; i < count; ++i)
+        d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i];
+}
+
+// Define refSelects
+Select refSelects[kTypeCount][2] =  {
+    { refselect_1u8u,  refselect_1u8  }, // cl_uchar
+    { refselect_1i8u,  refselect_1i8  }, // char
+    { refselect_1u16u, refselect_1u16 }, // ushort
+    { refselect_1i16u, refselect_1i16 }, // short
+    { refselect_1u32u, refselect_1u32 }, // uint
+    { refselect_1i32u, refselect_1i32 }, // int
+    { refselect_ffu,   refselect_ffi  }, // float
+    { refselect_1u64u, refselect_1u64 }, // ulong
+    { refselect_1i64u, refselect_1i64 }, // long
+    { refselect_ddu,   refselect_ddi }   // double
+};
+
+// Define vrefSelects (vector refSelects)
+Select vrefSelects[kTypeCount][2] =  {
+    { vrefselect_1u8u,  vrefselect_1u8  }, // cl_uchar
+    { vrefselect_1i8u,  vrefselect_1i8  }, // char
+    { vrefselect_1u16u, vrefselect_1u16 }, // ushort
+    { vrefselect_1i16u, vrefselect_1i16 }, // short
+    { vrefselect_1u32u, vrefselect_1u32 }, // uint
+    { vrefselect_1i32u, vrefselect_1i32 }, // int
+    { vrefselect_ffu,   vrefselect_ffi  }, // float
+    { vrefselect_1u64u, vrefselect_1u64 }, // ulong
+    { vrefselect_1i64u, vrefselect_1i64 }, // long
+    { vrefselect_ddu,   vrefselect_ddi  }     // double
+};
+
+
+//-----------------------------------------
+// Check functions
+//-----------------------------------------
+size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_uchar *t = (const cl_uchar *) test;
+    const cl_uchar *c = (const cl_uchar *) correct;
+    size_t i;
+
+    for(i = 0; i < count; i++)
+        if (t[i] != c[i]) {
+            log_error("\n(check_uchar) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]);
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_char(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_char *t = (const cl_char *) test;
+    const cl_char *c = (const cl_char *) correct;
+    size_t i;
+
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] ) {
+            log_error("\n(check_char) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_ushort *t = (const cl_ushort *) test;
+    const cl_ushort *c = (const cl_ushort *) correct;
+    size_t i;
+
+
+    for( i = 0; i < count; i++ )
+        if(t[i] != c[i]) {
+            log_error("\n(check_ushort) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_short(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_short *t = (const cl_short *) test;
+    const cl_short *c = (const cl_short *) correct;
+    size_t i;
+
+
+    for (i = 0; i < count; i++)
+        if(t[i] != c[i]) {
+            log_error("\n(check_short) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_uint *t = (const cl_uint *) test;
+    const cl_uint *c = (const cl_uint *) correct;
+    size_t i;
+
+
+
+    for (i = 0; i < count; i++)
+        if(t[i] != c[i]) {
+            log_error("\n(check_uint) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_int(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_int *t = (const cl_int *) test;
+    const cl_int *c = (const cl_int *) correct;
+    size_t i;
+
+
+    for(i = 0; i < count; i++)
+        if( t[i] != c[i] ) {
+
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+1, count,c[i+1], t[i+1]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+2, count,c[i+2], t[i+2]);
+            log_error("\n(check_int) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i+3, count,c[i+3], t[i+3]);
+            if(i) {
+                log_error("\n(check_int) Error for vector size %ld found just after 0x%8.8lx:  "
+                          "*0x%8.8x vs 0x%8.8x\n", vector_size, i-1, c[i-1], t[i-1]);
+            }
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_ulong *t = (const cl_ulong *) test;
+    const cl_ulong *c = (const cl_ulong *) correct;
+    size_t i;
+
+
+    for( i = 0; i < count; i++ )
+        if( t[i] != c[i] ) {
+            log_error("\n(check_ulong) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_long(void *test, void *correct, size_t count, size_t vector_size) {
+    const cl_long *t = (const cl_long *) test;
+    const cl_long *c = (const cl_long *) correct;
+    size_t i;
+
+
+    for(i = 0; i < count; i++ )
+        if(t[i] != c[i]) {
+            log_error("\n(check_long) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_float( void *test, void *correct, size_t count, size_t vector_size ) {
+    const cl_uint *t = (const cl_uint *) test;
+    const cl_uint *c = (const cl_uint *) correct;
+    size_t i;
+
+
+    for( i = 0; i < count; i++ )
+        /* Allow nans to be binary different */
+        if ((t[i] != c[i]) && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i]))) {
+            log_error("\n(check_float) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+size_t check_double( void *test, void *correct, size_t count, size_t vector_size ) {
+    const cl_ulong *t = (const cl_ulong *) test;
+    const cl_ulong *c = (const cl_ulong *) correct;
+    size_t i;
+
+
+
+    for( i = 0; i < count; i++ )
+        /* Allow nans to be binary different */
+        if ((t[i] != c[i]) && !(isnan(((double *)correct)[i]) && isnan(((double *)test)[i]))) {
+            log_error("\n(check_double) Error for vector size %ld found at 0x%8.8lx (of 0x%8.8lx):  "
+                      "*0x%16.16llx vs 0x%16.16llx\n", vector_size, i, count, c[i], t[i] );
+            return i + 1;
+        }
+
+    return 0;
+}
+
+CheckResults checkResults[kTypeCount] = {
+    check_uchar, check_char, check_ushort, check_short, check_uint,
+    check_int, check_float, check_ulong, check_long, check_double };
diff --git a/test_conformance/spir/CMakeLists.txt b/test_conformance/spir/CMakeLists.txt
new file mode 100644
index 00000000..b0a3e020
--- /dev/null
+++ b/test_conformance/spir/CMakeLists.txt
@@ -0,0 +1,45 @@
+function (install_spir_artifacts suite_name)
+  install(FILES "${suite_name}.zip" DESTINATION ${CLConf_OUT_DIR}/spir)
+endfunction()
+
+include_directories(${CLConf_SRC_DIR}/test_common)
+
+clconf_add_executable(
+    test_spir
+    main.cpp
+    datagen.cpp
+    run_build_test.cpp
+    run_services.cpp
+    kernelargs.cpp)
+
+target_link_libraries(
+    test_spir${CLConf_SUFFIX}
+    miniz${CLConf_SUFFIX}
+)
+
+install_spir_artifacts(api)
+install_spir_artifacts(atomics)
+install_spir_artifacts(basic)
+install_spir_artifacts(compile_and_link)
+install_spir_artifacts(commonfns)
+install_spir_artifacts(conversions)
+install_spir_artifacts(geometrics)
+install_spir_artifacts(enum_values)
+install_spir_artifacts(half)
+install_spir_artifacts(kernel_attributes)
+install_spir_artifacts(kernel_image_methods)
+install_spir_artifacts(images_kernel_read_write)
+install_spir_artifacts(images_samplerlessRead)
+install_spir_artifacts(integer_ops)
+install_spir_artifacts(math_brute_force)
+install_spir_artifacts(printf)
+install_spir_artifacts(profiling)
+install_spir_artifacts(relationals)
+install_spir_artifacts(select)
+install_spir_artifacts(sampler_enumeration)
+install_spir_artifacts(vec_align)
+install_spir_artifacts(vec_step)
+install_spir_artifacts(binary_type)
+install(FILES "khr.csv" DESTINATION ${CLConf_OUT_DIR}/spir)
+#Add any other runtime directories you need here.
+# end of file #
diff --git a/test_conformance/spir/Makefile b/test_conformance/spir/Makefile
new file mode 100644
index 00000000..377cfdd3
--- /dev/null
+++ b/test_conformance/spir/Makefile
@@ -0,0 +1,45 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp datagen.cpp kernelargs.cpp run_build_test.cpp run_services.cpp \
+			../../test_common/miniz/miniz.c \
+			../../test_common/harness/testHarness.c \
+			../../test_common/harness/errorHelpers.c \
+			../../test_common/harness/typeWrappers.cpp \
+			../../test_common/harness/mt19937.c \
+			../../test_common/harness/os_helpers.c \
+			../../test_common/harness/kernelHelpers.c
+
+SOURCES = $(abspath $(SRCS))
+
+
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+           
+FRAMEWORK = ${SOURCES}
+HEADERS = 
+TARGET = test_spir
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+#COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
+CC = c++
+CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
+CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+           
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/spir/api.zip b/test_conformance/spir/api.zip
new file mode 100644
index 00000000..1d3f67f8
Binary files /dev/null and b/test_conformance/spir/api.zip differ
diff --git a/test_conformance/spir/atomics.zip b/test_conformance/spir/atomics.zip
new file mode 100644
index 00000000..6c0673f9
Binary files /dev/null and b/test_conformance/spir/atomics.zip differ
diff --git a/test_conformance/spir/basic.zip b/test_conformance/spir/basic.zip
new file mode 100644
index 00000000..d60636fb
Binary files /dev/null and b/test_conformance/spir/basic.zip differ
diff --git a/test_conformance/spir/binary_type.zip b/test_conformance/spir/binary_type.zip
new file mode 100644
index 00000000..97479034
Binary files /dev/null and b/test_conformance/spir/binary_type.zip differ
diff --git a/test_conformance/spir/commonfns.zip b/test_conformance/spir/commonfns.zip
new file mode 100644
index 00000000..3774af11
Binary files /dev/null and b/test_conformance/spir/commonfns.zip differ
diff --git a/test_conformance/spir/compile_and_link.zip b/test_conformance/spir/compile_and_link.zip
new file mode 100644
index 00000000..55000a88
Binary files /dev/null and b/test_conformance/spir/compile_and_link.zip differ
diff --git a/test_conformance/spir/conversions.zip b/test_conformance/spir/conversions.zip
new file mode 100644
index 00000000..8929b62a
Binary files /dev/null and b/test_conformance/spir/conversions.zip differ
diff --git a/test_conformance/spir/datagen.cpp b/test_conformance/spir/datagen.cpp
new file mode 100644
index 00000000..1bd21fa6
--- /dev/null
+++ b/test_conformance/spir/datagen.cpp
@@ -0,0 +1,644 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include "exceptions.h"
+#include "datagen.h"
+
+RandomGenerator gRG;
+
+size_t WorkSizeInfo::getGlobalWorkSize() const
+{
+    switch( work_dim )
+    {
+    case 1: return global_work_size[0];
+    case 2: return global_work_size[0] * global_work_size[1];
+    case 3: return global_work_size[0] * global_work_size[1] * global_work_size[2];
+    default:
+        throw Exceptions::TestError("wrong work dimention\n");
+    }
+}
+
+/*
+ * DataGenerator
+ */
+
+DataGenerator* DataGenerator::Instance = NULL;
+
+DataGenerator* DataGenerator::getInstance()
+{
+    if (!Instance)
+        Instance = new DataGenerator();
+
+    return Instance;
+}
+
+DataGenerator::DataGenerator()
+{
+    #define TYPE_HNDL( type, isBuffer, base_element_size, vector_size, min_value, max_value, Generator) \
+    assert(m_argGenerators.find(type) == m_argGenerators.end()) ; \
+    m_argGenerators[type] = new Generator( isBuffer, vector_size, min_value, max_value);
+    #include "typeinfo.h"
+    #undef TYPE_HNDL
+}
+
+DataGenerator::~DataGenerator()
+{
+    ArgGeneratorsMap::iterator i = m_argGenerators.begin();
+    ArgGeneratorsMap::iterator e = m_argGenerators.end();
+
+    for(; i != e; ++i)
+    {
+        delete i->second;
+    }
+}
+
+KernelArgGenerator* DataGenerator::getArgGenerator( const KernelArgInfo& argInfo )
+{
+    //try to match the full type first
+    ArgGeneratorsMap::iterator i = m_argGenerators.find(argInfo.getTypeName());
+    ArgGeneratorsMap::iterator e = m_argGenerators.end();
+
+    if( i != e )
+    {
+        return i->second;
+    }
+    // search for the proper prefix of the type
+    for(i = m_argGenerators.begin(); i != e; ++i)
+    {
+        if( 0 == argInfo.getTypeName().find(i->first))
+        {
+            return i->second;
+        }
+    }
+    throw Exceptions::TestError(std::string("Can't find the generator for the type ")
+      + argInfo.getTypeName() + " for argument " + argInfo.getName() + "\n");
+}
+
+void DataGenerator::setArgGenerator(const KernelArgInfo& argInfo,
+                                    KernelArgGenerator* pGen)
+{
+    m_argGenerators[argInfo.getTypeName()] = pGen;
+}
+
+float get_random_float(float low, float high, MTdata d)
+{
+    float t = (float)((double)genrand_int32(d) / (double)0xFFFFFFFF);
+    return (1.0f - t) * low + t * high;
+}
+
+double get_random_double(double low, double high, MTdata d)
+{
+    cl_ulong u = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32 );
+    double t = (double) u * MAKE_HEX_DOUBLE( 0x1.0p-64, 0x1, -64);
+    return (1.0f - t) * low + t * high;
+}
+
+size_t get_random_size_t(size_t low, size_t high, MTdata d)
+{
+  enum { N = sizeof(size_t)/sizeof(int) };
+
+  union {
+    int word[N];
+    size_t size;
+  } u;
+
+  for (unsigned i=0; i != N; ++i) {
+    u.word[i] = genrand_int32(d);
+  }
+
+  assert(low <= high && "Invalid random number range specified");
+  size_t range = high - low;
+
+  return (range) ? low + ((u.size - low) % range) : low;
+}
+
+size_t get_random_int32(int low, int high, MTdata d)
+{
+  int v = genrand_int32(d);
+
+  assert(low <= high && "Invalid random number range specified");
+  size_t range = high - low;
+
+  return (range) ? low + ((v - low) % range) : low;
+}
+
+/*
+ * KernelArgGeneratorSampler
+ */
+KernelArgGeneratorSampler::KernelArgGeneratorSampler(bool isBuffer,
+                                                     size_t vectorSize,
+                                                     int minValue,
+                                                     int maxValue) {
+  initToDefaults();
+}
+
+void KernelArgGeneratorSampler::initToDefaults() {
+  m_normalized = false;
+  m_addressingMode = CL_ADDRESS_NONE;
+  m_filterMode = CL_FILTER_NEAREST;
+}
+
+KernelArgGeneratorSampler::KernelArgGeneratorSampler() {
+  initToDefaults();
+
+}
+
+void KernelArgGeneratorSampler::setNormalized(cl_bool isNormalized)
+{
+    m_normalized = isNormalized;
+}
+
+void KernelArgGeneratorSampler::setAddressingMode(cl_addressing_mode mode)
+{
+    m_addressingMode = mode;
+}
+
+void KernelArgGeneratorSampler::setFiterMode(cl_filter_mode mode)
+{
+    m_filterMode = mode;
+}
+
+
+/*
+ * SamplerValuesGenerator.
+ */
+
+/*
+ * Static fields initialization.
+ */
+cl_bool SamplerValuesGenerator::coordNormalizations[] = {CL_TRUE, CL_FALSE};
+
+cl_filter_mode SamplerValuesGenerator::filterModes[]  = {
+    CL_FILTER_NEAREST,
+    CL_FILTER_LINEAR
+};
+
+cl_addressing_mode SamplerValuesGenerator::addressingModes[] = {
+    CL_ADDRESS_NONE,
+    CL_ADDRESS_CLAMP,
+    CL_ADDRESS_CLAMP_TO_EDGE,
+    CL_ADDRESS_REPEAT,
+    CL_ADDRESS_MIRRORED_REPEAT
+};
+
+const size_t NUM_NORM_MODES =
+    sizeof(SamplerValuesGenerator::coordNormalizations)/sizeof(cl_bool);
+
+const size_t NUM_FILTER_MODES =
+    sizeof(SamplerValuesGenerator::filterModes)/sizeof(cl_filter_mode);
+
+const size_t NUM_ADDR_MODES =
+    sizeof(SamplerValuesGenerator::addressingModes)/sizeof(cl_addressing_mode);
+
+SamplerValuesGenerator::iterator SamplerValuesGenerator::end()
+{
+    return iterator(NUM_NORM_MODES-1, NUM_FILTER_MODES-1, NUM_ADDR_MODES-1);
+}
+
+/*
+ * A constructor for generating an 'end iterator'.
+ */
+SamplerValuesGenerator::iterator::iterator(size_t norm, size_t filter,
+                                           size_t addressing):
+    m_normIndex(norm), m_filterIndex(filter), m_addressingModeIndex(addressing){}
+
+/*
+ * A constructor for generating a 'begin iterator'.
+ */
+SamplerValuesGenerator::iterator::iterator():
+    m_normIndex(0), m_filterIndex(0), m_addressingModeIndex(0){}
+
+SamplerValuesGenerator::iterator& SamplerValuesGenerator::iterator::operator ++()
+{
+    if (incrementIndex(m_normIndex, NUM_NORM_MODES)) return *this;
+    if (incrementIndex(m_filterIndex, NUM_FILTER_MODES)) return *this;
+    if (incrementIndex(m_addressingModeIndex, NUM_ADDR_MODES)) return *this;
+
+    assert(false && "incrementing end iterator!");
+    return *this;
+}
+
+bool SamplerValuesGenerator::iterator::incrementIndex(size_t &i,
+                                                      const size_t limit)
+{
+    i = (i+1) % limit;
+    return i != 0;
+}
+
+bool SamplerValuesGenerator::iterator::operator == (const iterator& other) const
+{
+    return m_normIndex == other.m_normIndex &&
+         m_filterIndex == other.m_filterIndex &&
+         m_addressingModeIndex == other.m_addressingModeIndex;
+}
+
+bool SamplerValuesGenerator::iterator::operator != (const iterator& other) const
+{
+    return !(*this == other);
+}
+
+cl_bool SamplerValuesGenerator::iterator::getNormalized() const
+{
+    assert(m_normIndex < NUM_NORM_MODES && "illegal index");
+    return coordNormalizations[m_normIndex];
+}
+
+cl_filter_mode SamplerValuesGenerator::iterator::getFilterMode() const
+{
+    assert(m_filterIndex < NUM_FILTER_MODES && "illegal index");
+    return filterModes[m_filterIndex];
+}
+
+cl_addressing_mode SamplerValuesGenerator::iterator::getAddressingMode() const
+{
+    assert(m_addressingModeIndex < NUM_ADDR_MODES && "illegal index");
+    return addressingModes[m_addressingModeIndex];
+}
+
+unsigned SamplerValuesGenerator::iterator::toBitmap() const
+{
+    unsigned norm, filter, addressingModes;
+    switch (getNormalized())
+    {
+    case CL_TRUE:
+        norm = 8;
+        break;
+    case CL_FALSE:
+        norm = 0;
+        break;
+    default:
+    assert(0 && "invalid normalize value");
+    }
+
+    switch (getFilterMode())
+    {
+    case CL_FILTER_NEAREST:
+        filter = 0;
+        break;
+    case CL_FILTER_LINEAR:
+        filter = 16;
+        break;
+    default:
+    assert(0 && "invalid filter value");
+    }
+
+    switch(getAddressingMode())
+    {
+    case CL_ADDRESS_NONE:
+        addressingModes = 0;
+        break;
+    case CL_ADDRESS_CLAMP:
+        addressingModes = 1;
+        break;
+    case CL_ADDRESS_CLAMP_TO_EDGE:
+        addressingModes = 2;
+        break;
+    case CL_ADDRESS_REPEAT:
+        addressingModes = 3;
+        break;
+    case CL_ADDRESS_MIRRORED_REPEAT:
+        addressingModes = 4;
+        break;
+    default:
+    assert(0 && "invalid filter value");
+    }
+
+    return norm | filter | addressingModes;
+}
+
+std::string SamplerValuesGenerator::iterator::toString() const
+{
+    std::string ret("(");
+
+    switch (getNormalized())
+    {
+    case CL_TRUE:
+        ret.append("Normalized | ");
+        break;
+    case CL_FALSE:
+        ret.append("Not Normalized | ");
+        break;
+    default:
+    assert(0 && "invalid normalize value");
+    }
+
+    switch (getFilterMode())
+    {
+    case CL_FILTER_NEAREST:
+        ret.append("Filter Nearest | ");
+        break;
+    case CL_FILTER_LINEAR:
+        ret.append("Filter Linear | ");
+        break;
+    default:
+    assert(0 && "invalid filter value");
+    }
+
+    switch(getAddressingMode())
+    {
+    case CL_ADDRESS_NONE:
+        ret.append("Address None");
+        break;
+    case CL_ADDRESS_CLAMP:
+        ret.append("Address clamp");
+        break;
+    case CL_ADDRESS_CLAMP_TO_EDGE:
+        ret.append("Address clamp to edge");
+        break;
+    case CL_ADDRESS_REPEAT:
+        ret.append("Address repeat");
+        break;
+    case CL_ADDRESS_MIRRORED_REPEAT:
+        ret.append("Address mirrored repeat");
+        break;
+    default:
+    assert(0 && "invalid filter value");
+    }
+
+    ret.append(")");
+    return ret;
+}
+
+/*
+ * ImageValuesGenerator.
+ */
+
+/*
+ * Static fields initialization.
+ */
+const char* ImageValuesGenerator::imageTypes[] = {
+    "image1d_array_float",
+    "image1d_array_int",
+    "image1d_array_uint",
+    "image1d_buffer_float",
+    "image1d_buffer_int",
+    "image1d_buffer_uint",
+    "image1d_float",
+    "image1d_int",
+    "image1d_uint",
+    "image2d_array_float",
+    "image2d_array_int",
+    "image2d_array_uint",
+    "image2d_float",
+    "image2d_int",
+    "image2d_uint",
+    "image3d_float",
+    "image3d_int",
+    "image3d_uint"
+};
+
+cl_channel_order ImageValuesGenerator::channelOrders[] = {
+    CL_A,
+    CL_R,
+    CL_Rx,
+    CL_RG,
+    CL_RGx,
+    CL_RA,
+    CL_RGB,
+    CL_RGBx,
+    CL_RGBA,
+    CL_ARGB,
+    CL_BGRA,
+    CL_INTENSITY,
+    CL_LUMINANCE,
+    CL_DEPTH,
+    CL_DEPTH_STENCIL
+};
+
+const size_t NUM_CHANNEL_ORDERS = sizeof(ImageValuesGenerator::channelOrders)/sizeof(ImageValuesGenerator::channelOrders[0]);
+const size_t NUM_IMG_TYS = sizeof(ImageValuesGenerator::imageTypes)/sizeof(ImageValuesGenerator::imageTypes[0]);
+
+ImageValuesGenerator::iterator ImageValuesGenerator::begin()
+{
+    return ImageValuesGenerator::iterator(this);
+}
+
+ImageValuesGenerator::iterator ImageValuesGenerator::end()
+{
+    return ImageValuesGenerator::iterator(0);
+}
+/*
+ * Class Iterator
+ */
+ImageValuesGenerator::iterator::iterator(ImageValuesGenerator *pParent):
+    m_parent(pParent), m_channelIndex(0), m_imgTyIndex(0)
+{
+}
+
+/*
+ * Initializes an 'end' iterator.
+ */
+ImageValuesGenerator::iterator::iterator(int):
+    m_parent(NULL),
+    m_channelIndex(NUM_CHANNEL_ORDERS),
+    m_imgTyIndex(NUM_IMG_TYS) {}
+
+ImageValuesGenerator::iterator& ImageValuesGenerator::iterator::operator ++()
+{
+    assert(m_channelIndex < NUM_CHANNEL_ORDERS && m_imgTyIndex < NUM_IMG_TYS &&
+           "Attempt to increment an end iterator");
+
+    ImageValuesGenerator::iterator endIter = iterator(0);
+    // Incrementing untill we find the next legal combination, or we reach the
+    // end value.
+    while (incrementIndex(m_channelIndex,NUM_CHANNEL_ORDERS))
+        if (isLegalCombination())
+            return *this;
+
+     // We have reach to this line because last increment caused an 'oveflow'
+     // in data channel order index.
+     if (incrementIndex(m_imgTyIndex, NUM_IMG_TYS))
+        // In case this combination is not legal, we go on to the next legal
+        // combo.
+        return isLegalCombination() ? *this : ++(*this);
+
+    *this = endIter;
+    return *this;
+}
+
+bool ImageValuesGenerator::iterator::operator == (
+    const ImageValuesGenerator::iterator& o) const
+{
+    return m_channelIndex == o.m_channelIndex &&
+           m_imgTyIndex == o.m_imgTyIndex;
+}
+
+bool ImageValuesGenerator::iterator::operator != (
+    const ImageValuesGenerator::iterator& o) const
+{
+    return !(*this == o);
+}
+
+std::string ImageValuesGenerator::iterator::getDataTypeName() const
+{
+    assert(m_imgTyIndex < NUM_IMG_TYS && "image type index is out of bound");
+
+    std::string tyName(imageTypes[m_imgTyIndex]);
+    // Find the last '_' and remove it (the suffix is _<channel type>).
+    size_t pos = tyName.find_last_of('_');
+    assert (std::string::npos != pos && "no under score in type name?");
+    tyName = tyName.erase(0, pos+1);
+    return tyName;
+}
+
+int ImageValuesGenerator::iterator::getOpenCLChannelOrder() const
+{
+    assert(m_channelIndex < NUM_CHANNEL_ORDERS && "channel index out of bound");
+    return channelOrders[m_channelIndex];
+}
+
+int ImageValuesGenerator::iterator::getSPIRChannelOrder() const
+{
+    return getOpenCLChannelOrder();
+}
+
+std::string ImageValuesGenerator::iterator::getImageTypeName() const
+{
+    assert(m_imgTyIndex < NUM_IMG_TYS && "image type index is out of bound");
+
+    std::string tyName = imageTypes[m_imgTyIndex];
+    // Find the last '_' and remove it (the suffix is _<channel type>).
+    size_t pos = tyName.find_last_of('_');
+    assert (std::string::npos != pos && "no under score in type name?");
+    tyName = tyName.erase(pos, tyName.size() - pos);
+
+    return tyName;
+}
+
+std::string ImageValuesGenerator::iterator::getImageGeneratorName() const
+{
+    assert(m_imgTyIndex < NUM_IMG_TYS && "image type index is out of bound");
+    return imageTypes[m_imgTyIndex];
+}
+
+std::string ImageValuesGenerator::iterator::getBaseImageGeneratorName() const
+{
+    assert(m_imgTyIndex < NUM_IMG_TYS && "image type index is out of bound");
+    std::string tyName = getImageTypeName();
+    tyName.append("_t");
+    return tyName;
+}
+
+int ImageValuesGenerator::iterator::getDataType() const
+{
+    assert(m_imgTyIndex < NUM_IMG_TYS && "image type index is out of bound");
+    std::string tyName = getDataTypeName();
+
+    if ("int" == tyName)
+       return SPIR_CLK_SIGNED_INT32;
+    if ("uint" == tyName)
+        return SPIR_CLK_UNSIGNED_INT32;
+    if ("float" == tyName)
+        return SPIR_CLK_FLOAT;
+    assert (false && "unkown image data type");
+    return -1;
+}
+
+std::string ImageValuesGenerator::iterator::toString() const
+{
+    if (*this == m_parent->end())
+        return "End iterator";
+
+    // Sanity.
+    assert(m_imgTyIndex < NUM_IMG_TYS && "image type index is out of bound");
+    assert(m_channelIndex < NUM_CHANNEL_ORDERS && "channel index out of bound");
+
+    std::string str = imageTypes[m_imgTyIndex];
+    str.append("_");
+
+    switch (channelOrders[m_channelIndex])
+    {
+    case CL_R:
+        str.append("cl_r");
+        break;
+    case CL_A:
+        str.append("cl_a");
+        break;
+    case CL_RG:
+        str.append("cl_rg");
+        break;
+    case CL_RA:
+        str.append("cl_ra");
+        break;
+    case CL_RGB:
+        str.append("cl_rgb");
+        break;
+    case CL_RGBA:
+        str.append("cl_rgba");
+        break;
+    case CL_BGRA:
+        str.append("cl_bgra");
+        break;
+    case CL_ARGB:
+        str.append("cl_argb");
+        break;
+    case CL_INTENSITY:
+        str.append("cl_intensity");
+        break;
+    case CL_LUMINANCE:
+        str.append("cl_luminace");
+        break;
+    case CL_Rx:
+        str.append("cl_Rx");
+        break;
+    case CL_RGx:
+        str.append("cl_RGx");
+        break;
+    case CL_RGBx:
+        str.append("cl_RGBx");
+        break;
+    case CL_DEPTH:
+        str.append("cl_depth");
+        break;
+    case CL_DEPTH_STENCIL:
+        str.append( "cl_depth_stencil");
+        break;
+    default:
+        assert(false && "Invalid channel order");
+        str.append("<invalid channel order>");
+        break;
+    }
+
+    return str;
+}
+
+bool ImageValuesGenerator::iterator::incrementIndex(size_t& index,
+                                                    size_t arrSize)
+{
+    index = (index + 1) % arrSize;
+    return index != 0;
+}
+
+bool ImageValuesGenerator::iterator::isLegalCombination() const
+{
+    cl_channel_order corder = channelOrders[m_channelIndex];
+    std::string strImgTy(imageTypes[m_imgTyIndex]);
+
+    if (corder == CL_INTENSITY || corder == CL_LUMINANCE)
+    {
+        return getDataTypeName() == std::string("float");
+    }
+
+    if (corder == CL_DEPTH)
+        return false;
+
+    if (corder == CL_RGBx || corder == CL_RGB // Can only be applied for int unorms.
+        || corder == CL_ARGB || corder == CL_BGRA) // Can only be applied for int8.
+        return false;
+
+    return true;
+}
+
diff --git a/test_conformance/spir/datagen.h b/test_conformance/spir/datagen.h
new file mode 100644
index 00000000..3b8ba5b8
--- /dev/null
+++ b/test_conformance/spir/datagen.h
@@ -0,0 +1,1066 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __DATAGEN_H
+#define __DATAGEN_H
+
+#include "../../test_common/harness/compat.h"
+
+#include <assert.h>
+
+#include <string>
+#include <memory>
+#include <vector>
+#include <map>
+#include <fstream>
+#include <algorithm>
+
+#include "../../test_common/harness/mt19937.h"
+
+#include "exceptions.h"
+#include "kernelargs.h"
+
+// ESINNS is a short name for EXPLICIT_SPECIALIZATION_IN_NON_NAMESPACE_SCOPE
+
+#undef ESINNS
+
+#ifdef __GNUC__
+
+#define ESINNS
+#define ESINNS_PREF() inline
+#define ESINNS_POST() RandomGenerator::
+
+#else
+
+#define ESINNS_PREF()
+#define ESINNS_POST()
+
+#endif
+
+#define MAX_WORK_DIM        3
+#define GLOBAL_WORK_SIZE    (((CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(cl_double) / 16) / 2) * 2)            // max buffer size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(double16)
+
+// SPIR definitions for image channel data types (Section 2.1.3.2).
+#define SPIR_CLK_SNORM_INT8         0x10D0
+#define SPIR_CLK_SNORM_INT16        0x10D1
+#define SPIR_CLK_UNORM_INT8         0x10D2
+#define SPIR_CLK_UNORM_INT16        0x10D3
+#define SPIR_CLK_UNORM_SHORT_565    0x10D4
+#define SPIR_CLK_UNORM_SHORT_555    0x10D5
+#define SPIR_CLK_UNORM_SHORT_101010 0x10D6
+#define SPIR_CLK_SIGNED_INT8        0x10D7
+#define SPIR_CLK_SIGNED_INT16       0x10D8
+#define SPIR_CLK_SIGNED_INT32       0x10D9
+#define SPIR_CLK_UNSIGNED_INT8      0x10DA
+#define SPIR_CLK_UNSIGNED_INT16     0x10DB
+#define SPIR_CLK_UNSIGNED_INT32     0x10DC
+#define SPIR_CLK_HALF_FLOAT         0x10DD
+#define SPIR_CLK_FLOAT              0x10DE
+#define SPIR_CLK_UNORM_INT24        0x10DF
+
+#define NUM_IMG_FORMATS 64
+
+double get_random_double(double low, double high, MTdata d);
+float get_random_float(float low, float high, MTdata d);
+size_t get_random_size_t(size_t low, size_t high, MTdata d);
+
+/**
+ Simple container for the work size information
+ */
+class WorkSizeInfo
+{
+public:
+    /**
+      Returns the flat global size
+      */
+    size_t getGlobalWorkSize() const;
+public:
+    cl_uint work_dim;
+    size_t  global_work_offset[MAX_WORK_DIM];
+    size_t  global_work_size[MAX_WORK_DIM];
+    size_t  local_work_size[MAX_WORK_DIM];
+};
+
+/**
+ Generates various types of random numbers
+ */
+class RandomGenerator
+{
+public:
+    RandomGenerator():m_d(NULL)
+    {
+       init(0);
+    }
+
+    ~RandomGenerator()
+    {
+        if( NULL != m_d )
+            free_mtdata(m_d);
+    }
+
+    void init(cl_uint seed)
+    {
+        m_d = init_genrand( seed );
+    }
+
+    template<class T> T getNext(T low, T high)
+    {
+        assert(false && "Not implemented");
+        return T();
+    }
+
+#ifdef ESINNS
+
+private:
+    MTdata m_d;
+};
+
+#endif
+
+template<> ESINNS_PREF() bool ESINNS_POST()getNext(bool low, bool high)
+{
+    return (bool)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_char ESINNS_POST()getNext(cl_char low, cl_char high)
+{
+    return (cl_char)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_double ESINNS_POST()getNext(cl_double low, cl_double high)
+{
+    return get_random_double(low, high, m_d);
+}
+
+template<> ESINNS_PREF() cl_float ESINNS_POST()getNext(cl_float low, cl_float high)
+{
+    return get_random_float(low, high, m_d);
+}
+
+template<> ESINNS_PREF() cl_int ESINNS_POST()getNext(cl_int low, cl_int high)
+{
+    return (cl_int)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_long ESINNS_POST()getNext(cl_long low, cl_long high)
+{
+    return (cl_long)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_short ESINNS_POST()getNext(cl_short low, cl_short high)
+{
+    return (cl_short)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_uchar ESINNS_POST()getNext(cl_uchar low, cl_uchar high)
+{
+    return (cl_uchar)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_uint ESINNS_POST()getNext(cl_uint low, cl_uint high)
+{
+    return (cl_uint)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_ulong ESINNS_POST()getNext(cl_ulong low, cl_ulong high)
+{
+    return (cl_ulong)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+template<> ESINNS_PREF() cl_ushort ESINNS_POST()getNext(cl_ushort low, cl_ushort high)
+{
+    return (cl_ushort)get_random_size_t((size_t)low, (size_t)high, m_d);
+}
+
+#ifndef ESINNS
+
+private:
+    MTdata m_d;
+};
+
+#endif
+
+extern RandomGenerator gRG;
+
+/**
+ Base class for kernel argument generator
+ */
+class KernelArgGenerator
+{
+protected:
+    KernelArgGenerator()
+    {}
+
+public:
+    virtual KernelArg* generate( cl_context context,
+                                 const WorkSizeInfo& ws,
+                                 const KernelArgInfo& argInfo,
+                                 const KernelArg* refArg,
+                                 const cl_kernel kernel,
+                                 const cl_device_id device ) = 0;
+    virtual ~KernelArgGenerator() {}
+};
+
+/**
+ Mock: 'Not implemented' version of the kernel argument generator - used for the still unsupported types
+ */
+class KernelArgGeneratorNI: public KernelArgGenerator
+{
+public:
+    KernelArgGeneratorNI( bool isBuffer, size_t vectorSize, int minValue, int maxValue )
+    {}
+
+    KernelArg* generate( cl_context context,
+                         const WorkSizeInfo& ws,
+                         const KernelArgInfo& argInfo,
+                         const KernelArg* refArg,
+                         const cl_kernel kernel,
+                         const cl_device_id device )
+    {
+        //assert(false && "Not implemented");
+        throw Exceptions::TestError("KernelArgGenerator is not implemented\n");
+    }
+};
+
+/**
+ Kernel argument generator for images
+ */
+class KernelArgGeneratorImage: public KernelArgGenerator
+{
+public:
+    KernelArgGeneratorImage(bool isBuffer, size_t vectorSize, char minValue, char maxValue) :
+        m_isBuffer(isBuffer),
+        m_vectorSize(vectorSize),
+        m_minValue(minValue),
+        m_maxValue(maxValue)
+    {
+        m_format.image_channel_order = CL_RGBA;
+
+        m_desc.image_width = 32;
+        m_desc.image_height = 1;
+        m_desc.image_depth = 1;
+        m_desc.image_array_size = 1;
+        m_desc.num_mip_levels = 0;
+        m_desc.num_samples = 0;
+        m_desc.buffer = NULL;
+    }
+
+    bool isValidChannelOrder(cl_context context, cl_channel_order order) const
+    {
+        cl_mem_flags flags = CL_MEM_COPY_HOST_PTR;
+        cl_uint actualNumFormats = 0;
+        cl_image_format imgFormat = m_format;
+        imgFormat.image_channel_order = order;
+
+        cl_int error = clGetSupportedImageFormats(
+            context,
+            flags,
+            m_desc.image_type,
+            0,
+            NULL,
+            &actualNumFormats);
+        if (CL_SUCCESS != error)
+            throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);
+
+        std::auto_ptr<cl_image_format> supportedFormats(new cl_image_format[actualNumFormats]);
+        error = clGetSupportedImageFormats(
+            context,
+            flags,
+            m_desc.image_type,
+            actualNumFormats,
+            supportedFormats.get(),
+            NULL);
+        if (CL_SUCCESS != error)
+            throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);
+
+        for (size_t i=0; i<actualNumFormats; ++i)
+        {
+            cl_image_format curFormat = supportedFormats.get()[i];
+
+            if(imgFormat.image_channel_order == curFormat.image_channel_order &&
+               imgFormat.image_channel_data_type == curFormat.image_channel_data_type)
+               return true;
+        }
+
+        return false;
+    }
+
+    void setChannelOrder(cl_channel_order order)
+    {
+        m_format.image_channel_order = order;
+    }
+
+    KernelArg* generate(cl_context context,
+                        const WorkSizeInfo& ws,
+                        const KernelArgInfo& argInfo,
+                        const KernelArg* refArg,
+                        const cl_kernel kernel,
+                        const cl_device_id device)
+    {
+        void * pBuffer = NULL;
+        size_t numPixels = m_desc.image_width * m_desc.image_height * m_desc.image_depth * m_desc.image_array_size;
+        const int alignment = sizeof(cl_int) * 4 ; //RGBA channel size * sizeof (cl_int)
+        size_t allocSize = numPixels * alignment ;
+
+        cl_kernel_arg_access_qualifier accessQ = argInfo.getAccessQualifier();
+
+        cl_mem_flags mem_flags = 0;
+
+        if (accessQ == CL_KERNEL_ARG_ACCESS_READ_ONLY)
+        {
+            mem_flags |=  CL_MEM_READ_ONLY;
+        }
+
+        if (accessQ == CL_KERNEL_ARG_ACCESS_WRITE_ONLY)
+        {
+            mem_flags |=  CL_MEM_WRITE_ONLY;
+        }
+
+        if (accessQ == CL_KERNEL_ARG_ACCESS_READ_WRITE)
+        {
+            mem_flags |=  CL_MEM_READ_WRITE;
+        }
+
+        pBuffer = align_malloc(allocSize, alignment);
+        if (NULL == pBuffer)
+        {
+            throw Exceptions::TestError("align_malloc failed for image\n", 1);
+        }
+        assert( (size_t)pBuffer % alignment == 0 );
+        if (NULL == refArg)
+        {
+            fillBuffer((cl_char *)pBuffer, allocSize );
+        }
+        else {
+            memcpy(pBuffer, refArg->getBuffer(), allocSize );
+        }
+
+        return new KernelArgImage(context, argInfo, pBuffer, allocSize, mem_flags, m_format, m_desc);
+    }
+
+protected:
+    KernelArgGeneratorImage()
+    {}
+
+    void fillBuffer( cl_char * ptr, size_t nelem)
+    {
+        for( size_t i = 0; i < nelem; ++i )
+        {
+            ptr[i]  = gRG.getNext<cl_char>(m_minValue, m_maxValue);
+        }
+    }
+
+protected:
+    bool m_isBuffer;
+    size_t m_vectorSize;
+    cl_char m_minValue;
+    cl_char m_maxValue;
+    cl_image_format m_format;
+    cl_image_desc m_desc;
+};
+
+/**
+ Kernel argument generator for image1d_array
+ */
+template<cl_channel_type channel_type> class KernelArgGeneratorImage1dArray: public KernelArgGeneratorImage
+{
+public:
+    KernelArgGeneratorImage1dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ):
+        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
+    {
+        m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        m_format.image_channel_data_type = channel_type;
+
+        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
+        m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
+
+    }
+};
+
+/**
+ Kernel argument generator for image1d_buffer
+ */
+template<cl_channel_type channel_type> class KernelArgGeneratorImage1dBuffer: public KernelArgGeneratorImage
+{
+public:
+    KernelArgGeneratorImage1dBuffer( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
+        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
+    {
+        m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
+        m_format.image_channel_data_type = channel_type;
+
+        m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
+        // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf 5.2.2;
+        // Slice pitch of 1d images should be zero.
+        m_desc.image_slice_pitch = 0;
+    }
+};
+
+/**
+ Kernel argument generator for image1d
+ */
+template<cl_channel_type channel_type> class KernelArgGeneratorImage1d: public KernelArgGeneratorImage
+{
+public:
+    KernelArgGeneratorImage1d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
+        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
+    {
+        m_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+        m_format.image_channel_data_type = channel_type;
+
+        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
+        // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
+        // '5.3.1.2 image descriptor': Slice pitch is not applicable for one-
+        // dimensional images.
+        m_desc.image_slice_pitch = 0;
+    }
+};
+
+/**
+ Kernel argument generator for image2d_array
+ */
+template<cl_channel_type channel_type> class KernelArgGeneratorImage2dArray: public KernelArgGeneratorImage
+{
+public:
+    KernelArgGeneratorImage2dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
+        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
+    {
+        m_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+        m_format.image_channel_data_type = channel_type;
+
+        m_desc.image_height = 32;
+        m_desc.image_array_size = 8;
+        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
+        m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
+    }
+};
+
+/**
+ Kernel argument generator for image2d
+ */
+template<cl_channel_type channel_type> class KernelArgGeneratorImage2d: public KernelArgGeneratorImage
+{
+public:
+    KernelArgGeneratorImage2d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
+        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
+    {
+        m_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+        m_format.image_channel_data_type = channel_type;
+
+        m_desc.image_height = 32;
+        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
+        // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
+        // '5.3.1.2 image descriptor': Slice pitch is not applicable for two-
+        // dimensional images.
+        m_desc.image_slice_pitch = 0;
+    }
+};
+
+/**
+ Kernel argument generator for image3d
+ */
+template<cl_channel_type channel_type> class KernelArgGeneratorImage3d: public KernelArgGeneratorImage
+{
+public:
+    KernelArgGeneratorImage3d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
+        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
+    {
+        m_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+        m_format.image_channel_data_type = channel_type;
+
+        m_desc.image_height = 32;
+        m_desc.image_depth = 8;
+        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
+        m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
+    }
+};
+
+/**
+ Kernel argument generator for samplers
+ */
+class KernelArgGeneratorSampler: public KernelArgGenerator
+{
+public:
+    KernelArgGeneratorSampler(bool isBuffer, size_t vectorSize, int minValue, int maxValue);
+
+    KernelArgGeneratorSampler();
+
+    /*
+     * Sampler property setters.
+     */
+    void setNormalized(cl_bool);
+    void setAddressingMode(cl_addressing_mode);
+    void setFiterMode(cl_filter_mode);
+
+    KernelArg* generate(cl_context context,
+                        const WorkSizeInfo& ws,
+                        const KernelArgInfo& argInfo,
+                        const KernelArg* refArg,
+                        const cl_kernel kernel,
+                        const cl_device_id device)
+    {
+        return new KernelArgSampler(context, m_normalized, m_addressingMode, m_filterMode);
+    }
+private:
+    void initToDefaults();
+
+    cl_bool m_normalized;
+    cl_addressing_mode m_addressingMode;
+    cl_filter_mode m_filterMode;
+};
+
+/*
+ * Generates all the possible values for image samplers.
+ */
+class SamplerValuesGenerator
+{
+public:
+    class iterator {
+        friend class SamplerValuesGenerator;
+
+        size_t m_normIndex, m_filterIndex, m_addressingModeIndex;
+
+        iterator(size_t norm, size_t filter, size_t addressing);
+
+        bool incrementIndex(size_t &i, const size_t limit);
+    public:
+        iterator();
+
+        /*
+         * Moves the iterator to the next sampler value.
+         */
+        iterator& operator ++();
+
+        bool operator == (const iterator& other) const;
+
+        bool operator != (const iterator& other) const;
+
+        cl_bool getNormalized() const;
+
+        cl_filter_mode getFilterMode() const;
+
+        cl_addressing_mode getAddressingMode() const;
+
+        /*
+         * Converts the value of the sampler to a bitmask representation.
+         */
+        unsigned toBitmap() const;
+
+        /*
+         * Retruns a string representation of the sampler.
+         */
+        std::string toString() const;
+    };
+
+    iterator begin() { return iterator(); }
+
+    iterator end();
+
+    static cl_bool coordNormalizations[];
+    static cl_filter_mode filterModes[];
+    static cl_addressing_mode addressingModes[];
+};
+
+typedef struct struct_type {
+    cl_float float4d[4];
+    cl_int intd;
+} typedef_struct_type;
+
+typedef struct {
+    cl_int width;
+    cl_int channelType;
+    cl_int channelOrder;
+    cl_int expectedChannelType;
+    cl_int expectedChannelOrder;
+ } image_kernel_data;
+
+typedef struct testStruct {
+     cl_double vec[16];
+ } testStruct;
+
+typedef struct {
+     cl_uint workDim;
+     cl_uint globalSize[3];
+     cl_uint globalID[3];
+     cl_uint localSize[3];
+     cl_uint localID[3];
+     cl_uint numGroups[3];
+     cl_uint groupID[3];
+  } work_item_data;
+
+/**
+ Kernel argument generator for structure "struct_type"
+
+ Kernel argument generator for structure "image_kernel_data"
+
+ Kernel argument generator for structure "testStruct"
+ Since there are many "testStruct", we define it to have maximum space
+ Also the alignment is done following the "worst" case
+
+ Kernel argument generator for structure "work_item_data"
+ */
+
+  template<typename T> class KernelStructTypeArgGenerator: public KernelArgGenerator
+  {
+
+  public:
+      KernelStructTypeArgGenerator( bool isBuffer, size_t vectorSize, cl_int minValue, cl_int maxValue ):
+          m_isBuffer(isBuffer),
+          m_vectorSize(vectorSize),
+          m_alignment(0),
+          m_size(0)
+      {}
+
+      KernelArg* generate( cl_context context,
+                                 const WorkSizeInfo& ws,
+                                 const KernelArgInfo& argInfo,
+                                 const KernelArg* refArg,
+                                 const cl_kernel kernel,
+                                 const cl_device_id device )
+      {
+          T *pStruct = NULL;
+
+          calcSizeAndAlignment(pStruct);
+          size_t size = m_size;
+
+          if( m_isBuffer )
+          {
+              cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();
+
+              if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
+              {
+                  if ( (CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size < m_size )
+                      size=(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size;
+              }
+
+              if( CL_KERNEL_ARG_ADDRESS_GLOBAL   == addrQ ||
+                  CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
+              {
+                  size_t no_e = ws.getGlobalWorkSize();
+                  size = no_e * m_size;
+                  pStruct = (T *)align_malloc(size, m_alignment);
+                  if (NULL == pStruct)
+                  {
+                       throwExceptions(pStruct);
+                  }
+                  assert( (size_t)pStruct % m_alignment == 0 );
+                   if (NULL == refArg)
+                  {
+                      fillBuffer(pStruct, no_e);
+                  }
+                  else {
+                      memcpy(pStruct, refArg->getBuffer(), size);
+                  }
+              }
+              return new KernelArgBuffer( context, argInfo, (void*)pStruct, size);
+          }
+          else {
+              pStruct = (T *)align_malloc(m_size, m_alignment);
+              if (NULL == pStruct)
+              {
+                   throwExceptions(pStruct);
+              }
+              assert( (size_t)pStruct % m_alignment == 0 );
+              if (NULL == refArg)
+              {
+                  fillBuffer(pStruct, 1);
+              }
+              else {
+                  memcpy(pStruct, refArg->getBuffer(), m_size);
+              }
+
+              return new KernelArg( argInfo, (void*)pStruct, m_size);
+          }
+      }
+  private:
+
+      std::string getTypeString(typedef_struct_type *pStruct)
+      {
+          return "typedef_struct_type";
+      }
+
+      std::string getTypeString(image_kernel_data *pStruct)
+      {
+          return "image_kernel_data";
+      }
+
+      std::string getTypeString(testStruct *pStruct)
+      {
+          return "testStruct";
+      }
+
+      std::string getTypeString(work_item_data *pStruct)
+      {
+          return "work_item_data";
+      }
+
+      void throwExceptions(T * pStruct)
+      {
+          std::string str = "align_malloc failed for " ;
+          if (m_isBuffer)
+              str += "array of " ;
+          str += getTypeString(pStruct) ;
+          throw Exceptions::TestError(str, 1);
+      }
+
+      void fillBuffer( typedef_struct_type *pStruct, size_t no_e )
+      {
+          for (size_t e = 0; e < no_e; ++e)
+          {
+              for( size_t i = 0; i < 4; ++i )
+              {
+                  pStruct[e].float4d[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
+              }
+              pStruct[e].intd = gRG.getNext<cl_int>(0, 0x7fffffff);
+          }
+      }
+
+      void fillBuffer( image_kernel_data *pStruct, size_t no_e )
+      {
+          for (size_t e = 0; e < no_e; ++e)
+          {
+              pStruct[e].width = gRG.getNext<cl_int>(0, 0x7fffffff);
+              pStruct[e].channelType = gRG.getNext<cl_int>(0, 0x7fffffff);
+              pStruct[e].channelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
+              pStruct[e].expectedChannelType = gRG.getNext<cl_int>(0, 0x7fffffff);
+              pStruct[e].expectedChannelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
+          }
+      }
+
+      void fillBuffer( testStruct *pStruct, size_t no_e )
+      {
+          for (size_t e = 0; e < no_e; ++e)
+          {
+              for( size_t i = 0; i < 16; ++i )
+              {
+                  pStruct[e].vec[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
+               }
+          }
+      }
+
+      void fillBuffer( work_item_data *pStruct, size_t no_e )
+      {
+          for (size_t e = 0; e < no_e; ++e)
+          {
+              memset(&pStruct[e], 0, sizeof(work_item_data));
+          }
+      }
+
+      // structure alignment is derived from the size of the larger field in it
+      // size of the structure is the size of the largest field multiple by the number of fields
+
+      void calcSizeAndAlignment(typedef_struct_type *pStruct)
+      {
+          m_alignment = sizeof(cl_float) * 4;
+          m_size = m_alignment * 2 ;
+      }
+
+      void calcSizeAndAlignment(image_kernel_data *pStruct)
+      {
+          m_alignment = sizeof(cl_int);
+          m_size = sizeof(image_kernel_data) ;
+      }
+
+      void calcSizeAndAlignment(testStruct *pStruct)
+      {
+          m_alignment = sizeof(cl_double) * 16;
+          m_size = m_alignment;
+      }
+
+      void calcSizeAndAlignment(work_item_data *pStruct)
+      {
+          m_alignment = sizeof(cl_uint);
+          m_size = sizeof(work_item_data);
+      }
+
+  private:
+      bool m_isBuffer;
+      size_t m_vectorSize;
+      int m_alignment;
+      size_t m_size;
+};
+
+/**
+ Kernel argument generator for the simple scalar and vector types
+ */
+template <class T> class KernelArgGeneratorT: public KernelArgGenerator
+{
+public:
+    KernelArgGeneratorT( bool isBuffer, size_t vectorSize, T minValue, T maxValue ):
+        m_isBuffer(isBuffer),
+        m_vectorSize(vectorSize),
+        m_minValue(minValue),
+        m_maxValue(maxValue)
+    {}
+
+    KernelArg* generate( cl_context context,
+                         const WorkSizeInfo& ws,
+                         const KernelArgInfo& argInfo,
+                         const KernelArg* refArg,
+                         const cl_kernel kernel,
+                         const cl_device_id device  )
+    {
+        T* pBuffer = NULL;
+        size_t size = 0;
+        int alignment, error;
+        cl_ulong totalDeviceLocalMem;
+        cl_ulong localMemUsedByKernel;
+        cl_uint numArgs, numLocalArgs = 0;
+        KernelArgInfo kernel_arg_info;
+
+        error = CL_SUCCESS;
+
+        // take care of 3-elements vector's alignment issue:
+        // if 3-elements vector - the alignment is 4-elements
+        if (m_vectorSize == 3)
+            alignment = sizeof(T) * 4;
+        else
+            alignment = sizeof(T) * m_vectorSize;
+
+        // gather information about the kernel and device
+        clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(totalDeviceLocalMem), &totalDeviceLocalMem, NULL);
+        clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(localMemUsedByKernel), &localMemUsedByKernel, NULL);
+        clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
+
+        // Calculate the number of local memory arguments
+        for (cl_uint i = 0; i < numArgs; i ++)
+        {
+            error = clGetKernelArgInfo( kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(cl_kernel_arg_address_qualifier), kernel_arg_info.getAddressQualifierRef(), &size);
+            if( error != CL_SUCCESS )
+            {
+                throw Exceptions::TestError("Unable to get argument address qualifier\n", error);
+            }
+
+            if(kernel_arg_info.getAddressQualifier() == CL_KERNEL_ARG_ADDRESS_LOCAL)
+            {
+                numLocalArgs ++;
+            }
+        }
+
+        // reduce the amount of local memory by the amount the kernel + implementation uses
+        totalDeviceLocalMem -= localMemUsedByKernel;
+
+        if( m_isBuffer )
+        {
+            cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();
+
+            // decide about the buffer size - take into account the alignment and padding
+            size = ws.getGlobalWorkSize() * alignment;
+
+            // reduce the size of the buffer for local memory
+            if (numLocalArgs &&
+                size > floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs)) &&
+                addrQ == CL_KERNEL_ARG_ADDRESS_LOCAL)
+            {
+                size = floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs));
+            }
+
+            if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
+            {
+                if ( CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE < size )
+                    size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE;
+            }
+
+            if( CL_KERNEL_ARG_ADDRESS_GLOBAL   == addrQ ||
+                CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
+            {
+                pBuffer = (T *)align_malloc(size, alignment);
+                if (NULL == pBuffer)
+                {
+                     throw Exceptions::TestError("align_malloc failed for array buffer\n", 1);
+                }
+                assert( (size_t)pBuffer % alignment == 0 );
+                if (NULL == refArg)
+                {
+                    fillBuffer(pBuffer, size / sizeof(T));
+                }
+                else {
+                    memcpy(pBuffer, refArg->getBuffer(), size);
+                }
+            }
+            return new KernelArgBuffer( context, argInfo, (void*)pBuffer, size);
+        }
+        else
+        {
+            if (m_vectorSize == 3)
+                size = sizeof(T) * 4;
+            else
+                size = sizeof(T) * m_vectorSize;
+
+            pBuffer = (T *)align_malloc(size, alignment);
+            if (NULL == pBuffer)
+            {
+                throw Exceptions::TestError("align_malloc failed for pBuffer\n", 1);
+            }
+            assert( (size_t)pBuffer % alignment == 0 );
+            if (NULL == refArg)
+            {
+                fillBuffer(pBuffer, m_vectorSize);
+            }
+            else {
+                memcpy(pBuffer, refArg->getBuffer(), size);
+            }
+            return new KernelArg( argInfo, (void*)pBuffer, size);
+        }
+    }
+private:
+    void fillBuffer( T* buffer, size_t nelem)
+    {
+        for( size_t i = 0; i < nelem; ++i )
+        {
+            buffer[i]  = gRG.getNext<T>(m_minValue, m_maxValue);
+        }
+    }
+
+private:
+    bool m_isBuffer;
+    size_t m_vectorSize;
+    T    m_minValue;
+    T    m_maxValue;
+};
+
+/**
+ General facade for the kernel arguments generation functionality.
+ */
+class DataGenerator
+{
+public:
+     static DataGenerator* getInstance();
+
+    ~DataGenerator();
+
+    KernelArg* generateKernelArg(cl_context context,
+                                 const KernelArgInfo& argInfo,
+                                 const WorkSizeInfo& ws,
+                                 const KernelArg* refArg,
+                                 const cl_kernel kernel,
+                                 const cl_device_id device)
+    {
+        KernelArgGenerator* pArgGenerator = getArgGenerator(argInfo);
+        return pArgGenerator->generate(context, ws, argInfo, refArg, kernel, device);
+    }
+
+    /*
+     * Gets the generator associated to the given key.
+     */
+    KernelArgGenerator* getArgGenerator(const KernelArgInfo& argInfo);
+
+    /*
+     * Sets the entry associated to the given key, with the given prototype
+     * generator.
+     */
+    void setArgGenerator(const KernelArgInfo& key, KernelArgGenerator* gen);
+
+private:
+    DataGenerator();
+
+    static DataGenerator *Instance;
+
+    typedef std::map<std::string, KernelArgGenerator*> ArgGeneratorsMap;
+    ArgGeneratorsMap m_argGenerators;
+};
+
+class ImageValuesGenerator
+{
+public:
+    class iterator
+    {
+        friend class ImageValuesGenerator;
+    public:
+        /*
+         * Iterator operators.
+         */
+        iterator& operator ++();
+        bool operator == (const iterator&) const;
+        bool operator != (const iterator&) const;
+        /*
+         * Returns the name of the basic image type (e.g., image2d_t).
+         */
+        std::string getImageTypeName() const;
+
+        /*
+         * Returns the name of the genrator that generates images of this type
+         * (e.g., imaget2d_float).
+         */
+        std::string getImageGeneratorName() const;
+
+        /*
+         * Returns the name of the genrator that generates images of the 'base'
+         * type (e.g., imaget2d_t).
+         */
+        std::string getBaseImageGeneratorName() const;
+
+        /*
+         * Returns the OpenCL enumeration for the channel order of the image
+         * object this iterator creates.
+         */
+        int getOpenCLChannelOrder() const;
+
+        /*
+         * Returns the SPIR enumeration for the channel order of the image
+         * object this iterator creates.
+         */
+        int getSPIRChannelOrder() const;
+
+        /*
+         * Returns the data type of the image object this iterator creates. (e.g.,
+         * cl_float, cl_int).
+         */
+        int getDataType() const;
+
+        /*
+         * Returns the data type of the image object this iterator creates. (e.g.,
+         * float, int), in string format.
+         */
+        std::string getDataTypeName() const;
+
+        std::string toString() const;
+    private:
+        /*
+         * Constructor for creating a 'begin' iterator.
+         */
+        iterator(ImageValuesGenerator*);
+        /*
+         * Constructor for creating an 'end' iterator.
+         */
+        iterator(int);
+        /*
+        * Increments the given argument up to the given limit.
+        * In case the new value reaches the limit, the index is reset to hold zero.
+        * Returns: true if the value of the index was incremented, false if it was reset
+        * to zero.
+        */
+        bool incrementIndex(size_t& index, size_t limit);
+
+        /*
+         * Returns true is the index combination of this iterator is legal,
+         * or false otherwise.
+         */
+        bool isLegalCombination() const;
+
+        ImageValuesGenerator* m_parent;
+        size_t m_channelIndex, m_imgTyIndex;
+    }; //End iterator.
+
+    iterator begin();
+    iterator end();
+
+    static cl_channel_order channelOrders[];
+    static const char* imageTypes[];
+private:
+    WorkSizeInfo  m_wsInfo;
+};
+
+#endif
diff --git a/test_conformance/spir/enum_values.zip b/test_conformance/spir/enum_values.zip
new file mode 100644
index 00000000..8e30f52d
Binary files /dev/null and b/test_conformance/spir/enum_values.zip differ
diff --git a/test_conformance/spir/exceptions.h b/test_conformance/spir/exceptions.h
new file mode 100644
index 00000000..61215102
--- /dev/null
+++ b/test_conformance/spir/exceptions.h
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __EXCEPTIONS_H
+#define __EXCEPTIONS_H
+
+#include <stdexcept>
+#include "../../test_common/miniz/miniz.h"
+
+namespace Exceptions
+{
+    /**
+    Exception thrown on error in command line parameters
+    */
+    class CmdLineError : public std::runtime_error
+    {
+    public:
+        CmdLineError (const std::string& msg): std::runtime_error(msg){}
+    };
+
+    /**
+    Exception thrown on error in test run
+    */
+    class TestError : public std::runtime_error
+    {
+    public:
+        TestError (const std::string& msg, int errorCode = 1): std::runtime_error(msg), m_errorCode(errorCode){}
+
+        int getErrorCode() const { return m_errorCode; }
+    private:
+        int m_errorCode;
+    };
+
+    class ArchiveError : public std::runtime_error
+    {
+    public:
+        ArchiveError(int errCode): std::runtime_error(mz_error(errCode)){}
+    };
+}
+
+#endif
diff --git a/test_conformance/spir/geometrics.zip b/test_conformance/spir/geometrics.zip
new file mode 100644
index 00000000..73a5dd37
Binary files /dev/null and b/test_conformance/spir/geometrics.zip differ
diff --git a/test_conformance/spir/half.zip b/test_conformance/spir/half.zip
new file mode 100644
index 00000000..f0d3e8ff
Binary files /dev/null and b/test_conformance/spir/half.zip differ
diff --git a/test_conformance/spir/images_kernel_read_write.zip b/test_conformance/spir/images_kernel_read_write.zip
new file mode 100644
index 00000000..65fa8728
Binary files /dev/null and b/test_conformance/spir/images_kernel_read_write.zip differ
diff --git a/test_conformance/spir/images_samplerlessRead.zip b/test_conformance/spir/images_samplerlessRead.zip
new file mode 100644
index 00000000..bb3e9993
Binary files /dev/null and b/test_conformance/spir/images_samplerlessRead.zip differ
diff --git a/test_conformance/spir/integer_ops.zip b/test_conformance/spir/integer_ops.zip
new file mode 100644
index 00000000..e93df165
Binary files /dev/null and b/test_conformance/spir/integer_ops.zip differ
diff --git a/test_conformance/spir/kernel_attributes.zip b/test_conformance/spir/kernel_attributes.zip
new file mode 100644
index 00000000..57eb6804
Binary files /dev/null and b/test_conformance/spir/kernel_attributes.zip differ
diff --git a/test_conformance/spir/kernel_image_methods.zip b/test_conformance/spir/kernel_image_methods.zip
new file mode 100644
index 00000000..7adbc227
Binary files /dev/null and b/test_conformance/spir/kernel_image_methods.zip differ
diff --git a/test_conformance/spir/kernelargs.cpp b/test_conformance/spir/kernelargs.cpp
new file mode 100644
index 00000000..d9a216ee
--- /dev/null
+++ b/test_conformance/spir/kernelargs.cpp
@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+#include "kernelargs.h"
+#include "datagen.h"
+
+KernelArg* KernelArg::clone(cl_context ctx, const WorkSizeInfo& ws, const cl_kernel kernel, const cl_device_id device) const
+{
+    return DataGenerator::getInstance()->generateKernelArg(ctx, m_argInfo, ws,
+                                                           this, kernel, device);
+}
diff --git a/test_conformance/spir/kernelargs.h b/test_conformance/spir/kernelargs.h
new file mode 100644
index 00000000..9f14053e
--- /dev/null
+++ b/test_conformance/spir/kernelargs.h
@@ -0,0 +1,444 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __KERNELARGS_H
+#define __KERNELARGS_H
+
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <assert.h>
+
+#include <string>
+#include <vector>
+#include <iostream>
+
+#include "../../test_common/harness/typeWrappers.h"
+
+#include "exceptions.h"
+
+class WorkSizeInfo;
+
+/**
+ Represents the single kernel argument information
+ */
+class KernelArgInfo
+{
+public:
+    cl_kernel_arg_address_qualifier getAddressQualifier() const { return m_address_qualifier; }
+    cl_kernel_arg_access_qualifier  getAccessQualifier() const { return m_access_qualifier; }
+    cl_kernel_arg_type_qualifier    getTypeQualifier() const { return m_type_qualifier; }
+
+    cl_kernel_arg_address_qualifier* getAddressQualifierRef() { return &m_address_qualifier; }
+    cl_kernel_arg_access_qualifier*  getAccessQualifierRef() { return &m_access_qualifier; }
+    cl_kernel_arg_type_qualifier*    getTypeQualifierRef() { return &m_type_qualifier; }
+
+    void setTypeName( const char* name) { m_type.assign(name); }
+    void setName( const char* name) { m_name.assign(name); }
+
+    std::string getTypeName() const { return m_type; }
+    std::string getName() const { return m_name; }
+
+    bool operator == ( const KernelArgInfo& rhs ) const
+    {
+        return !m_name.compare(rhs.m_name) &&
+            !m_type.compare(rhs.m_type) &&
+            m_address_qualifier == rhs.m_address_qualifier &&
+            m_access_qualifier == rhs.m_access_qualifier &&
+            m_type_qualifier == rhs.m_type_qualifier;
+    }
+
+    bool operator != ( const KernelArgInfo& rhs ) const
+    {
+        return !(*this == rhs);
+    }
+
+private:
+    std::string m_name;
+    std::string m_type;
+    cl_kernel_arg_address_qualifier m_address_qualifier;
+    cl_kernel_arg_access_qualifier  m_access_qualifier;
+    cl_kernel_arg_type_qualifier    m_type_qualifier;
+};
+
+/**
+ Represents the single kernel's argument value.
+ Responsible for livekeeping of OCL objects.
+ */
+class KernelArg
+{
+public:
+    KernelArg(const KernelArgInfo& argInfo, void* buffer, size_t size):
+      m_argInfo(argInfo),
+      m_buffer(buffer),
+      m_size(size)
+    {}
+
+    virtual ~KernelArg()
+    {
+        align_free(m_buffer);
+    }
+
+    virtual size_t getArgSize() const
+    {
+        return m_size;
+    }
+
+    virtual const void* getBuffer() const
+    {
+        return m_buffer;
+    }
+
+    virtual const void* getArgValue() const
+    {
+        return m_buffer;
+    }
+
+    virtual bool compare( const KernelArg& rhs ) const
+    {
+        if( m_argInfo != rhs.m_argInfo )
+        {
+            return false;
+        }
+
+        if( m_size != rhs.m_size)
+        {
+            return false;
+        }
+
+        if( (NULL == m_buffer || NULL == rhs.m_buffer) && m_buffer != rhs.m_buffer )
+        {
+            return false;
+        }
+
+        //check two NULL buffers case
+        if( NULL == m_buffer && NULL == rhs.m_buffer )
+        {
+            return true;
+        }
+
+        if( memcmp( m_buffer, rhs.m_buffer, m_size) )
+        {
+            size_t compared = 0;
+            while (compared < m_size)
+            {
+                if ( *(((char*)m_buffer)+compared) != *(((char*)rhs.m_buffer)+compared) )
+                {
+                    std::cerr << std::endl << " difference is at offset " << compared << std::endl;
+                    return false;
+                }
+                compared++;
+            }
+        }
+
+        return true;
+    }
+
+    virtual void readToHost(cl_command_queue queue)
+    {
+        return;
+    }
+
+    KernelArg* clone(cl_context context, const WorkSizeInfo& ws, const cl_kernel kernel, const cl_device_id device) const;
+
+protected:
+    KernelArgInfo m_argInfo;
+    void*  m_buffer;
+    size_t m_size;
+};
+
+class KernelArgSampler:public KernelArg
+{
+public:
+    KernelArgSampler(cl_context context, cl_bool isNormalized,
+                     cl_addressing_mode addressMode, cl_filter_mode filterMode):
+    KernelArg(KernelArgInfo(), NULL, sizeof(cl_sampler))
+    {
+        m_argInfo.setTypeName("sampler_t");
+        int error = CL_SUCCESS;
+        m_samplerObj = clCreateSampler(context, isNormalized, addressMode,
+                                       filterMode, &error);
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("clCreateSampler failed\n", error);
+        }
+    }
+
+    ~KernelArgSampler()
+    {
+        //~clSamplerWrapper() releases the sampler object
+    }
+
+    size_t getArgSize() const
+    {
+        return sizeof(cl_sampler);
+    }
+
+    const void* getArgValue() const
+    {
+        return &m_samplerObj;
+    }
+
+    bool compare( const KernelArg& rhs ) const
+    {
+        if (const KernelArgSampler *Rhs = dynamic_cast<const KernelArgSampler*>(&rhs))
+        {
+            return isNormalized() == Rhs->isNormalized() &&
+                   getAddressingMode() == Rhs->getAddressingMode() &&
+                   getFilterMode() == Rhs->getFilterMode();
+        }
+        return false;
+    }
+
+    cl_sampler getSampler() const
+    {
+      return (cl_sampler)m_samplerObj;
+    }
+
+protected:
+    mutable clSamplerWrapper m_samplerObj;
+
+    cl_bool isNormalized() const
+    {
+        cl_bool norm;
+        cl_int err = clGetSamplerInfo(getSampler(),
+                                      CL_SAMPLER_NORMALIZED_COORDS,
+                                      sizeof(cl_bool),
+                                      &norm,
+                                      NULL);
+        if (CL_SUCCESS != err)
+            throw Exceptions::TestError("clGetSamplerInfo failed\n", err);
+        return norm;
+    }
+
+    cl_addressing_mode getAddressingMode() const
+    {
+        cl_addressing_mode addressingmode;
+        cl_int err = clGetSamplerInfo(getSampler(),
+                                      CL_SAMPLER_ADDRESSING_MODE,
+                                      sizeof(cl_addressing_mode),
+                                      &addressingmode,
+                                      NULL);
+        if (CL_SUCCESS != err)
+            throw Exceptions::TestError("clGetSamplerInfo failed\n", err);
+        return addressingmode;
+    }
+
+    cl_filter_mode getFilterMode() const
+    {
+        cl_filter_mode filtermode;
+        cl_int err = clGetSamplerInfo(getSampler(),
+                                      CL_SAMPLER_FILTER_MODE,
+                                      sizeof(cl_filter_mode),
+                                      &filtermode,
+                                      NULL);
+        if (CL_SUCCESS != err)
+            throw Exceptions::TestError("clGetSamplerInfo failed\n", err);
+        return filtermode;
+    }
+
+};
+
+
+class KernelArgMemObj:public KernelArg
+{
+public:
+    KernelArgMemObj(const KernelArgInfo& argInfo, void* buffer, size_t size):
+      KernelArg(argInfo, buffer, size)
+    {
+        m_memObj = NULL;
+    }
+
+    ~KernelArgMemObj()
+    {
+        //~clMemWrapper() releases the memory object
+    }
+
+    virtual void readToHost(cl_command_queue queue)  = 0;
+
+
+    size_t getArgSize() const
+    {
+        if( NULL == m_buffer )
+            return m_size;              // local buffer
+        else
+            return sizeof(cl_mem);
+    }
+
+    const void* getArgValue() const
+    {
+        if( NULL == m_buffer )
+        {
+            return NULL;                // local buffer
+        }
+        else {
+            clMemWrapper* p = const_cast<clMemWrapper*>(&m_memObj);
+
+            return (const void*)(&(*p));
+        }
+    }
+
+protected:
+    clMemWrapper m_memObj;
+};
+
+/**
+ Represents the single kernel's argument value.
+ Responsible for livekeeping of OCL objects.
+ */
+class KernelArgBuffer:public KernelArgMemObj
+{
+public:
+    KernelArgBuffer(cl_context context, const KernelArgInfo& argInfo, void* buffer, size_t size):
+        KernelArgMemObj(argInfo, buffer, size)
+    {
+        if( NULL != buffer )
+        {
+            int error = CL_SUCCESS;
+            m_memObj = clCreateBuffer( context,
+                                       (cl_mem_flags)( CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR ),
+                                       size, buffer, &error );
+            if( error != CL_SUCCESS )
+            {
+                throw Exceptions::TestError("clCreateBuffer failed\n", error);
+            }
+        }
+    }
+
+    void readToHost(cl_command_queue queue)
+    {
+        if( NULL == m_buffer )
+        {
+            return;
+        }
+
+        int error = clEnqueueReadBuffer( queue, m_memObj, CL_TRUE, 0, m_size, m_buffer, 0, NULL, NULL);
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("clEnqueueReadBuffer failed\n", error);
+        }
+    }
+};
+
+class KernelArgImage:public KernelArgMemObj
+{
+public:
+    KernelArgImage(cl_context context, const KernelArgInfo& argInfo,
+                   void* buffer, size_t size, cl_mem_flags flags,
+                   cl_image_format format, cl_image_desc desc):
+    KernelArgMemObj(argInfo, buffer, size), m_desc(desc)
+    {
+        if( NULL != buffer )
+        {
+            int error = CL_SUCCESS;
+            flags |= CL_MEM_COPY_HOST_PTR ;
+            if (CL_MEM_OBJECT_IMAGE1D_BUFFER == m_desc.image_type)
+            {
+                m_desc.buffer = clCreateBuffer( context, flags, m_desc.image_row_pitch, buffer, &error );
+                if( error != CL_SUCCESS )
+                {
+                    throw Exceptions::TestError("KernelArgImage clCreateBuffer failed\n", error);
+                }
+                buffer = NULL;
+                flags &= ~CL_MEM_COPY_HOST_PTR;
+                m_desc.image_row_pitch = 0;
+                m_desc.image_slice_pitch = 0;
+            }
+            m_memObj = clCreateImage( context, flags, &format, &m_desc, buffer, &error );
+            if( error != CL_SUCCESS )
+            {
+                throw Exceptions::TestError("KernelArgImage clCreateImage failed\n", error);
+            }
+        }
+    }
+
+    ~KernelArgImage()
+    {
+        if (CL_MEM_OBJECT_IMAGE1D_BUFFER == m_desc.image_type)
+        {
+             clReleaseMemObject(m_desc.buffer);
+        }
+    }
+
+    void readToHost(cl_command_queue queue)
+    {
+        if( NULL == m_buffer )
+        {
+            return;
+        }
+
+        size_t origin[3] = {0, 0, 0};
+        size_t region[3] = {m_desc.image_width , m_desc.image_height , m_desc.image_depth};
+
+        int error = clEnqueueReadImage (queue, m_memObj, CL_TRUE, origin, region, m_desc.image_row_pitch, m_desc.image_slice_pitch, m_buffer, 0, NULL, NULL);
+
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("clEnqueueReadImage failed\n", error);
+        }
+    }
+
+private:
+    cl_image_desc m_desc;
+};
+
+/**
+ Represents the container for the kernel parameters
+ */
+class KernelArgs
+{
+    typedef std::vector<KernelArg*> KernelsArgsVector;
+public:
+    KernelArgs(){}
+    ~KernelArgs()
+    {
+        KernelsArgsVector::iterator i = m_args.begin();
+        KernelsArgsVector::iterator e = m_args.end();
+
+        for( ; i != e; ++i )
+        {
+            assert( NULL != *i );
+            delete *i;
+        }
+    }
+
+    void readToHost(cl_command_queue queue)
+    {
+        KernelsArgsVector::iterator i = m_args.begin();
+        KernelsArgsVector::iterator e = m_args.end();
+
+        for( ; i != e; ++i )
+        {
+            (*i)->readToHost(queue);
+        }
+    }
+
+    size_t getArgCount() const { return m_args.size(); }
+
+    KernelArg* getArg(size_t index ) { return m_args[index]; }
+
+    const KernelArg* getArg(size_t index) const { return m_args[index]; }
+
+    void addArg( KernelArg* arg ) { m_args.push_back(arg); }
+
+private:
+    KernelsArgsVector m_args;
+};
+
+#endif
diff --git a/test_conformance/spir/khr.csv b/test_conformance/spir/khr.csv
new file mode 100644
index 00000000..b29c49b5
--- /dev/null
+++ b/test_conformance/spir/khr.csv
@@ -0,0 +1,2176 @@
+"suite name", "test name", "extension name", "CL_DEVICE_IMAGE_SUPPORT", "CL_DEVICE_3D_IMAGE_SUPPORT"
+"math_brute_force", "math_kernel.modf_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.tanh_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isordered_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.cbrt_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.cos_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.asinpi_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.log1p_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.fmin_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.ilogb_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.hypot_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isinf_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isfinite_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.sin_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.log2_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.tan_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isnan_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.maxmag_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.modf_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.islessequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isgreaterequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.ilogb_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.exp2_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.asinpi_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.fmax_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isnotequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.asinpi_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.ceil_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.floor_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.islessgreater_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.cosh_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.logb_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.exp2_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.islessequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.signbit_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.cos_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isgreater_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.sqrt_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isinf_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.sin_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.fabs_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.log2_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.acosh_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.asinpi_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.maxmag_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.fabs_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.frexp_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.asinpi_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.fmin_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.powr_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.ilogb_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.log_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.fract_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.exp_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.fmax_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.islessgreater_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.sincos_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.nextafter_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.exp10_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.minmag_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.round_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.expm1_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.rint_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.islessgreater_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.cospi_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.powr_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.copysign_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.exp10_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.asinh_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.ceil_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.atan2pi_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.frexp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.rootn_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.acospi_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.cosh_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.tanpi_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.remainder_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.atanpi_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.atan2pi_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.mad_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.islessgreater_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.fract_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.nan_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.exp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.lgamma_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.atan_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.hypot_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.logb_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.nan_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.log10_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.sinpi_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.islessequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.atan_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.minmag_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.trunc_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.tanh_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.acos_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.atanh_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.ldexp_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.cosh_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.remainder_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.cbrt_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.trunc_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.log10_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.tanpi_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.log10_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isgreaterequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.fabs_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.sinpi_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.cos_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.sqrt_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.round_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.modf_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.atan_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.lgamma_r_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.ilogb_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.atanh_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isgreaterequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.fract_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.signbit_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.fdim_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.fmod_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isunordered_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.rootn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isgreater_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.fabs_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isgreaterequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.atan_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.mad_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.expm1_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.atanpi_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isnormal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.expm1_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.sqrt_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.lgamma_r_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.pow_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.round_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.remquo_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.sqrt_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.rootn_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isfinite_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.log_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.tan_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.fdim_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.round_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.sinh_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.log1p_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.rsqrt_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isnan_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.exp_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.fdim_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.log10_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.sinh_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.logb_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.ceil_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.pown_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isnotequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.islessgreater_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.lgamma_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isinf_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.logb_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.nan_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.mad_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isordered_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.rint_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.nan_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.exp2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.sinpi_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.log_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.sinpi_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.asin_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.cbrt_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.expm1_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.fmax_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.fmod_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isunordered_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.cospi_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isgreater_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isinf_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.maxmag_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.pown_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.asinh_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.sin_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.fmod_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.signbit_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.round_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.lgamma_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.ceil_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.acospi_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.atan2_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.fdim_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.rint_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.log2_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isless_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.sinh_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.tanpi_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.sinpi_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.fract_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.asinpi_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.fmod_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.cospi_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.sincos_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.atanh_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isordered_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.sinpi_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.asin_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.pow_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isordered_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.rootn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.fma_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.rootn_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.acospi_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.frexp_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.ldexp_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.trunc_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.islessequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.nan_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isnormal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.islessgreater_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isless_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.remquo_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isgreater_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isunordered_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.cosh_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isnotequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.fmod_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.fmax_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.rsqrt_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isinf_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.fmin_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.cbrt_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.islessequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.acos_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isless_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.exp2_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.sincos_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.tanh_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.fmin_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.cospi_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.remquo_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.sin_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.tan_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.log1p_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.exp_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.log_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.asinh_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isnotequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.asinh_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.lgamma_r_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.sqrt_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.sinh_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.powr_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.tanh_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.pown_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isnotequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.sincos_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.trunc_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.ceil_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.acosh_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.powr_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isunordered_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.sincos_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isnormal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.rsqrt_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.log_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.atan2_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.remainder_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.rint_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.powr_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.minmag_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.tan_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.fmod_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isfinite_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isnormal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.sin_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.sqrt_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.rint_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.floor_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.mad_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.ldexp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.fma_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.asin_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.acos_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.exp2_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.rootn_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.remainder_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.acosh_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isnan_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.acosh_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.atan_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isgreaterequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.exp10_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.trunc_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.fract_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.lgamma_r_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isnotequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.fabs_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.fdim_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.fabs_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.minmag_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.frexp_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.lgamma_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.fmin_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.exp10_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.log2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.modf_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.ldexp_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.nan_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.log2_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.asin_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.rsqrt_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.atanpi_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.minmag_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.cospi_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.ilogb_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.frexp_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.atan2pi_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.nextafter_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isnormal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.maxmag_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.fma_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.pown_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.hypot_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.expm1_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.copysign_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.hypot_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.lgamma_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isless_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isfinite_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isnormal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.powr_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.nextafter_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.log1p_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.remquo_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.log10_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.remainder_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.atanh_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.rsqrt_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isgreaterequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.expm1_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.logb_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.ceil_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isnan_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.pown_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.tanpi_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.remquo_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.atanh_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.ldexp_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.tanpi_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.trunc_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.atan2pi_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.maxmag_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.sinh_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.acospi_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.log2_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.atan_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.atan2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.log1p_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.pown_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.atan2_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.cos_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.sinh_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.lgamma_r_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.fma_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.atanpi_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.round_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.pow_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isless_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.atanpi_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.sincos_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.tanh_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.ldexp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.hypot_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.pow_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.signbit_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.cbrt_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.sin_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.isordered_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.cospi_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.acosh_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.floor_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.lgamma_r_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.acos_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.copysign_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.log10_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.exp2_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.pow_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.fma_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.frexp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.isnan_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.atanpi_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.asinh_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.remainder_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.cosh_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.acos_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.nextafter_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.exp10_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.atan2pi_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.cosh_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.copysign_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isgreater_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.signbit_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.cos_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.floor_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.atanh_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.pow_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.fmax_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.acos_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.fmax_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.floor_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isless_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.floor_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.minmag_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isfinite_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.islessequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.nextafter_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isinf_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.atan2pi_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.mad_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.acospi_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.mad_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isnan_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isunordered_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.fmin_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.tan_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.ilogb_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.atan2_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.tan_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.cos_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.modf_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.copysign_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.asin_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.asinh_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.exp10_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.exp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.tanh_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isfinite_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.signbit_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.atan2_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.fract_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.remquo_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.modf_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.isequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.log_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.maxmag_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.nextafter_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.tanpi_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.fdim_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.rint_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.isgreater_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.exp_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.isunordered_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.asin_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.copysign_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.fma_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.lgamma_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.logb_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel3.hypot_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.cbrt_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel4.log1p_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel.isordered_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel16.acospi_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel2.acosh_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"math_brute_force", "math_kernel8.rsqrt_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "image_d", "", "CL_TRUE", "CL_FALSE"
+"api", "image_d_write_array", "cles_khr_2d_image_array_writes", "CL_TRUE", "CL_FALSE"
+"api", "image_d_3d", "", "CL_TRUE", "CL_TRUE"
+"api", "sample_test.min_max_read_image_args", "", "CL_TRUE", "CL_FALSE"
+"api", "double_vector3_p", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector2_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector4_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector4_p", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector16_p2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector2_p", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector8_p2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector16_p", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector3_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector16_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector3_p2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector8_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector2_p2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector4_p2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_vector8_p", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_scalar_p", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_scalar_p2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "double_scalar_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_scalar_p2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_const_scalar_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_const_vector16_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_const_vector2_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_const_vector3_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_const_vector4_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_const_vector8_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_scalar_p3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_scalar_restrict_p2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_scalar_restrict_p3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector16_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector16_restrict_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector2_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector2_restrict_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector3_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector3_restrict_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector4_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector4_restrict_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector8_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_constant_vector8_restrict_p1", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_const_volatile_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_global_volatile_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_const_volatile_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_scalar_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_scalar_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector16_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector16_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector2_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector2_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector3_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector3_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector4_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector4_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector8_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_local_volatile_vector8_restrict_p", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_const_scalar_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_const_vector16_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_const_vector2_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_const_vector3_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_const_vector4_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_const_vector8_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_scalar_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_vector16_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_vector2_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_vector3_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_vector4_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_private_vector8_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_scalar_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_vector16_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_vector2_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_vector3_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_vector4_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"api", "long_vector8_d", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_normalize_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_distance_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_length_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_length_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_distance_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_cross_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_distance_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_dot_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_normalize_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_normalize_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_normalize_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_length_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_cross_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_distance_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_dot_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_dot_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_dot_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"geometrics", "sample_test.geom_length_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "convert2_type_roundingmode_type_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "convert8_type_roundingmode_type_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "convert16_type_roundingmode_type_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "convert3_type_roundingmode_type_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "convert4_type_roundingmode_type_d", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_short", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_char", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_short", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_uint", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ushort_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_uint_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_long", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_float", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_long", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_float2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_float2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_uchar", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_ushort", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_long2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_ulong", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_int2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_int_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_ushort", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_ushort", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_int", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_float", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_uchar", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_char", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_uint", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_ulong2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_ulong2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_short2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_short2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_short", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_int", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_uchar2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_ulong2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_char2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_uint", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_long", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_int2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_ushort2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_uchar2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_char2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_char2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_short", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_int2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_char", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_uint", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_ushort2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_short", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_short2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_float2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_float", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_long2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_uchar2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_ushort", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_ulong2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_uchar2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_uint", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_short", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_long2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_float", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_short_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_float", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_uint2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_uint2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_char2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_uint2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_ulong", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_ushort2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_char", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_int2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_short2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_ulong", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_int", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_short2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_long", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_ulong", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_long", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_char", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_uchar", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_ushort", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_char_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_uchar2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_ushort2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_uchar", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_float_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_ulong", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_uint2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_uint2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_ushort2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_sat_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtp_int2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_long2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_uchar_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_sat_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtn_char", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_float2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_uchar", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_int", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_int", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float2_rtp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtz_long2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_float", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_char2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_uint", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rtn_ulong2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_uchar", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtz_long", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_sat_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort2_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rte_ushort", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong2_sat_rte_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double_rtp_ulong", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtn_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_double_int", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rte_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short2_sat_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_rtz_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_double2_rte_float2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint2_rtn_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtz_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_uchar_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_uchar_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uchar_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_char_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_char_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_char_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ushort_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ushort_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ushort_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_short_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_short_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_short_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_uint_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_uint_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_uint_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_int_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_int_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_int_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_float_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_float_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_float_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_ulong_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_ulong_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_uchar", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_char", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_ushort", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_short", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_float", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_implicit_long_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rte_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtn_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "test_convert_long_sat_rtz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "long_convert2_type_roundingmode_type_f", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "long_convert3_type_roundingmode_type_f", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "long_convert4_type_roundingmode_type_f", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "long_convert8_type_roundingmode_type_f", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"conversions", "long_convert16_type_roundingmode_type_f", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_double3_ulong3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double4_ulong4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double8_ulong8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double8_long8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double_ulong", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double3_long3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double_long", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double4_long4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double16_long16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double16_ulong16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double2_long2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_double2_ulong2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong2_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong3_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong4_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong8_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong16_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong2_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong3_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong4_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong8_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_ulong16_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long2_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long3_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long4_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long8_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long16_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long2_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long3_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long4_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long8_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"select", "select_long16_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "sample_test.vec_type_hint_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_global_to_local_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_copy_local_to_global_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_global_to_local_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.async_strided_copy_local_to_global_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.prefetch_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_constant_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_global_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_local_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vload_private_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_global_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_local_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_fn.vstore_private_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test_sizeof.sizeof_unsigned_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test.kernel_memory_alignment_private_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"basic", "test.kernel_memory_alignment_private_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_or_local_int", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_min_global_uint", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_dec_global_uint", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xchg_global_int", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xchg_local_uint", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_and_global_uint", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xor_local_uint", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_inc_local_uint", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xchg_local_float", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_and_global_int", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_min_local_int", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_inc_global_uint", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_add_local_uint", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_add_global_uint", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_cmpxchg_global_uint", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_dec_global_int", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_sub_global_int", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_inc_local_int", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_or_local_uint", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_and_local_int", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xchg_global_float", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xor_local_int", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_cmpxchg_local_uint", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_and_local_uint", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_add_local_int", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xor_global_int", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xor_global_uint", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xchg_local_int", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_max_local_uint", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_cmpxchg_global_int", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_or_global_int", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_xchg_global_uint", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_sub_global_uint", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_dec_local_int", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_min_local_uint", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_max_global_uint", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_add_global_int", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_or_global_uint", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_max_global_int", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_dec_local_uint", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_min_global_int", "cl_khr_global_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_inc_global_int", "cl_khr_global_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_max_local_int", "cl_khr_local_int32_extended_atomics", "CL_FALSE", "CL_FALSE"
+"atomics", "test_atomic_fn.atomic_cmpxchg_local_int", "cl_khr_local_int32_base_atomics", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_max_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_max_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_degrees4_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_degrees16_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_step4_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_min_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_sign4_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_clamp.test_clamp_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_sign3_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_radians3_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_step_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_min_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_sign_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_clamp.test_clamp_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_clamp.test_clamp_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_max_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_clamp.test_clamp_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_step3_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_max_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_max_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_step2_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_radians8_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_sign16_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_step16_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_sign8_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_degrees8_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_min_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_min_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_min_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_clamp.test_clamp_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_degrees_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_degrees3_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_radians2_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_max_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_clamp.test_clamp_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_degrees2_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_radians4_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_sign2_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_radians_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_radians16_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_fn.test_min_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"commonfns", "test_step8_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_step", "test_step_var.step_var_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isless_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreaterequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isless_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isless_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreater_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isnotequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessgreater_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double8_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double16_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double16_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double2_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreaterequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double2_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreater_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double4_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double16_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessgreater_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreater_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double4_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double16_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double4_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double4_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double16_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isnotequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isnotequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double8_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isnotequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreater_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreater_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double2_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessequal_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isnotequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreaterequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreater_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double8_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double4_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double8_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessgreater_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double4_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double16_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double2_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double4_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessgreater_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double2_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isless_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isnotequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double4_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessgreater_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isless_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double8_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double8_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreaterequal_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double2_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double2_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double16_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double8_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreaterequal_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessgreater_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_islessequal_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isgreaterequal_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_double16_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isless_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_double8_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_isequal_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_any_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_any_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_any_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_any_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_any_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_any_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_all_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_all_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_all_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_all_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_all_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_all_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_bitselect_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_signed_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_signed_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_signed_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_signed_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_signed_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_unsigned_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_unsigned_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_unsigned_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_unsigned_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.relational_select_unsigned_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long2_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long2_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long2_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long2_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long4_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long4_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long4_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long4_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long8_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long8_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long8_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long8_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long16_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long16_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long16_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_long16_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong2_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong2_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong2_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong2_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong4_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong4_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong4_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong4_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong8_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong8_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong8_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong8_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong16_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong16_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong16_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_ulong16_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long2_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long2_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long2_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long2_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long4_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long4_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long4_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long4_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long8_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long8_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long8_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long8_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long16_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long16_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long16_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_long16_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong2_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong2_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong2_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong2_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong4_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong4_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong4_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong4_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong8_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong8_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong8_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong8_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong16_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong16_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong16_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"relationals", "sample_test.shuffle_built_in_dual_input_ulong16_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_global_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_global_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_global_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_global_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_global_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_global_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_private_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_private_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_private_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_local_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_local_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_local_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_local_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_global_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_local_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_global_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_private_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_local_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_private_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_local_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_local_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_private_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_private_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rte_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_global_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_private_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_private_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_local_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtp_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_global_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_global_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_private_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtn_global_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_local_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_private_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_global_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtn_private_double", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_private_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtz_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_local_double16", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_local_double8", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rtz_global_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstorea_half_rtp_private_double4", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_rte_local_double3", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"half", "test.vstore_half_local_double2", "cl_khr_fp64", "CL_FALSE", "CL_FALSE"
+"enum_values", "hostval_image_desc", "", "CL_TRUE", "CL_FALSE"
+"enum_values", "hostval_image_desc_3d", "", "CL_TRUE", "CL_TRUE"
+"images_kernel_read_write", "sample_kernel.write_image_2D_array_set_float", "cles_khr_2d_image_array_writes", "CL_TRUE", "CL_FALSE"
+"images_kernel_read_write", "sample_kernel.write_image_2D_array_set_int", "cles_khr_2d_image_array_writes", "CL_TRUE", "CL_FALSE"
+"images_kernel_read_write", "sample_kernel.write_image_2D_array_set_uint", "cles_khr_2d_image_array_writes", "CL_TRUE", "CL_FALSE"
+"images_kernel_read_write", "sample_kernel.read_image_set_3D_fint", "", "CL_TRUE", "CL_TRUE"
+"images_kernel_read_write", "sample_kernel.read_image_set_3D_ffloat", "", "CL_TRUE", "CL_TRUE"
+"images_kernel_read_write", "sample_kernel.read_image_set_3D_iint", "", "CL_TRUE", "CL_TRUE"
+"images_kernel_read_write", "sample_kernel.read_image_set_3D_ifloat", "", "CL_TRUE", "CL_TRUE"
+"images_kernel_read_write", "sample_kernel.read_image_set_3D_uiint", "", "CL_TRUE", "CL_TRUE"
+"images_kernel_read_write", "sample_kernel.read_image_set_3D_uifloat", "", "CL_TRUE", "CL_TRUE"
+"images_kernel_read_write", "*", "", "CL_TRUE", "CL_FALSE"
+"images_samplerlessRead", "sample_kernel.read_image_set_3D_float", "", "CL_TRUE", "CL_TRUE"
+"images_samplerlessRead", "sample_kernel.read_image_set_3D_int", "", "CL_TRUE", "CL_TRUE"
+"images_samplerlessRead", "sample_kernel.read_image_set_3D_uint", "", "CL_TRUE", "CL_TRUE"
+"images_samplerlessRead", "*", "", "CL_TRUE", "CL_FALSE"
+"kernel_image_methods", "sample_kernel.get_image_info_3D", "", "CL_TRUE", "CL_TRUE"
+"kernel_image_methods", "*", "", "CL_TRUE", "CL_FALSE"
+"sampler_enumeration", "*", "", "CL_TRUE", "CL_FALSE"
+"profiling", "testReadf", "", "CL_TRUE", "CL_FALSE"
+"profiling", "image_filter", "", "CL_TRUE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clz_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_hadd_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rhadd_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mul_hi_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_rotate_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_clamp_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_sat_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_mad_hi_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_min_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "sample_test.integer_max_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_abs_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_absdiff_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_add_sat_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_sub_sat_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_ulong", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_popcount_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_int", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_int2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_int3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_int4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_int8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_int16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_uint", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_uint2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_uint3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_uint4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_uint8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"integer_ops", "test_upsample.integer_upsample_uint16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"printf", "test7.testCaseVector_long", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong2", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong3", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong4", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong8", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong16", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_longp", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
+"vec_align", "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulongp", "cles_khr_int64", "CL_FALSE", "CL_FALSE"
diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp
new file mode 100644
index 00000000..b0fba6fc
--- /dev/null
+++ b/test_conformance/spir/main.cpp
@@ -0,0 +1,6985 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <iostream>
+#include <memory>
+#include <sstream>
+#include <iterator>
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/os_helpers.h"
+
+#include "exceptions.h"
+#include "run_build_test.h"
+#include "run_services.h"
+
+#include <list>
+#include <algorithm>
+#include "../../test_common/miniz/miniz.h"
+
+#if defined(_WIN32)
+#include <windows.h>
+#include <direct.h>
+#else // !_WIN32
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#endif
+
+static int no_unzip = 0;
+
+class custom_cout : public std::streambuf
+{
+private:
+    std::stringstream ss;
+
+    std::streamsize xsputn (const char* s, std::streamsize n)
+    {
+        ss.write(s, n);
+        return n;
+    }
+
+    int overflow(int c)
+    {
+        if(c > 0 && c < 256) ss.put(c);
+        return c;
+    }
+
+    int sync()
+    {
+        log_info("%s", ss.str().c_str());
+        ss.str("");
+        return 0;
+    }
+};
+
+class custom_cerr : public std::streambuf
+{
+private:
+    std::stringstream ss;
+
+    std::streamsize xsputn (const char* s, std::streamsize n)
+    {
+        ss.write(s, n);
+        return n;
+    }
+
+    int overflow(int c)
+    {
+        if(c > 0 && c < 256) ss.put(c);
+        return c;
+    }
+
+    int sync()
+    {
+        log_error("%s", ss.str().c_str());
+        ss.str("");
+        return 0;
+    }
+};
+
+class override_buff
+{
+    std::ostream* stream;
+    std::streambuf* buff;
+
+public:
+    override_buff(std::ostream& s, std::streambuf& b)
+    {
+        stream = &s;
+        buff = stream->rdbuf();
+        stream->rdbuf(&b);
+    }
+
+    ~override_buff()
+    {
+        stream->rdbuf(buff);
+    }
+};
+
+typedef bool (*testfn)(cl_device_id device, cl_uint size_t_width, const char *folder);
+
+template <typename T>
+void dealloc(T *p)
+{
+    if (p) delete p;
+}
+
+static bool is_dir_exits(const char* path)
+{
+    assert(path && "NULL directory");
+#if defined(_WIN32)
+    DWORD ftyp = GetFileAttributesA(path);
+    if (ftyp != INVALID_FILE_ATTRIBUTES && (ftyp & FILE_ATTRIBUTE_DIRECTORY))
+    return true;
+#else // Linux assumed here.
+    if (DIR *pDir = opendir(path))
+    {
+        closedir(pDir);
+        return true;
+    }
+#endif
+    return false;
+}
+
+static void get_spir_version(cl_device_id device, std::vector<float>& versions)
+{
+    char version[64] = {0};
+    cl_int err;
+    size_t size = 0;
+
+    if (err = clGetDeviceInfo(device, CL_DEVICE_SPIR_VERSIONS, sizeof(version),
+                              (void*)version, &size))
+    {
+        log_error( "Error: failed to obtain SPIR version at %s:%d (err = %d)\n",
+                  __FILE__, __LINE__, err );
+        return;
+    }
+
+    assert(size && "Empty version string");
+
+    std::list<std::string> versionVector;
+    std::stringstream versionStream(version);
+    std::copy(std::istream_iterator<std::string>(versionStream),
+              std::istream_iterator<std::string>(),
+              std::back_inserter(versionVector));
+    for(std::list<std::string>::const_iterator it = versionVector.begin(),
+                                               e  = versionVector.end(); it != e;
+                                               it++)
+    {
+        versions.push_back(atof(it->c_str()));
+    }
+}
+
+struct CounterEventHandler: EventHandler{
+    unsigned int& Counter;
+    unsigned int TN;
+    std::string LastTest;
+
+    //N - counter of successful tests.
+    //T - total number of tests in the suite
+    CounterEventHandler(unsigned int& N, unsigned int T): Counter(N), TN(T){}
+
+    void operator ()(const std::string& testName, const std::string& kernelName) {
+        if (testName != LastTest){
+            ++Counter;
+            LastTest = testName;
+        }
+    }
+};
+
+class AccumulatorEventHandler: public EventHandler{
+  std::list<std::string>& m_list;
+  const std::string m_name;
+public:
+  AccumulatorEventHandler(std::list<std::string>& L, const std::string N):
+  m_list(L), m_name(N){}
+
+  void operator ()(const std::string& T, const std::string& K){
+    std::cerr << "\nTest " << T << "\t Kernel " << K << " failed" << std::endl;
+    m_list.push_back(m_name + "\t" + T + "\t" + K);
+  }
+};
+
+static void printError(const std::string& S){
+  std::cerr << S << std::endl;
+}
+
+static bool extractKernelAttribute(std::string& kernel_attributes,
+    const std::string& attribute, std::vector<std::string>& attribute_vector) {
+  size_t start = kernel_attributes.find(attribute + "(");
+  if (start == 0) {
+    size_t end = kernel_attributes.find(")", start);
+    if (end != std::string::npos) {
+      size_t length = end-start+1;
+      attribute_vector.push_back(kernel_attributes.substr(start, length));
+      kernel_attributes.erase(start, length);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Extracts suite with the given name, and saves it to disk.
+static void extract_suite(const char *suiteName)
+{
+  mz_zip_archive zip_archive;
+
+  // Composing the name of the archive.
+  char* dir = get_exe_dir();
+  std::string archiveName(dir);
+  archiveName.append(dir_sep());
+  archiveName.append(suiteName);
+  archiveName.append(".zip");
+  free(dir);
+
+#if defined(_WIN32)
+      _mkdir(suiteName);
+#else
+      mkdir(suiteName, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+#endif
+
+  memset(&zip_archive, 0, sizeof(zip_archive));
+  if (!mz_zip_reader_init_file(&zip_archive, archiveName.c_str(), 0))
+      throw Exceptions::ArchiveError(MZ_DATA_ERROR);
+
+  // Get and print information about each file in the archive.
+  for (size_t i = 0; i < mz_zip_reader_get_num_files(&zip_archive); i++)
+  {
+      mz_zip_archive_file_stat fileStat;
+      size_t fileSize = 0;
+
+      if (!mz_zip_reader_file_stat(&zip_archive, i, &fileStat))
+      {
+          mz_zip_reader_end(&zip_archive);
+          throw Exceptions::ArchiveError(MZ_DATA_ERROR);
+      }
+      const std::string fileName = fileStat.m_filename;
+
+      // If the file is a directory, skip it. We create suite folder at the beggining.
+      if (mz_zip_reader_is_file_a_directory(&zip_archive, fileStat.m_file_index))
+      {
+          continue;
+      }
+
+      // Extracting the file.
+      void *p = mz_zip_reader_extract_file_to_heap(&zip_archive,
+                                                   fileName.c_str(),
+                                                   &fileSize, 0);
+      if (!p)
+      {
+          mz_zip_reader_end(&zip_archive);
+          throw std::runtime_error("mz_zip_reader_extract_file_to_heap() failed!\n");
+      }
+
+      // Writing the file back to the disk
+      std::fstream file(fileName.c_str(),
+                        std::ios_base::trunc | std::ios_base::in |
+                        std::ios_base::out | std::ios_base::binary);
+      if (!file.is_open())
+      {
+          std::string msg = "Failed to open ";
+          msg.append(fileName);
+          throw Exceptions::TestError(msg);
+      }
+
+      file.write((const char*)p, fileSize);
+      if (file.bad())
+      {
+          std::string msg("Failed to write into ");
+          msg.append(fileName);
+          throw Exceptions::TestError(msg);
+      }
+
+      // Cleanup.
+      file.flush();
+      file.close();
+      free(p);
+  }
+  mz_zip_reader_end(&zip_archive);
+}
+
+//
+// Extracts the given suite package if needed.
+// return true if the suite was extracted, false otherwise.
+//
+static bool try_extract(const char* suite)
+{
+    if(no_unzip == 0)
+    {
+        std::cout << "Extracting test suite " << suite << std::endl;
+        extract_suite(suite);
+        std::cout << "Done." << std::endl;
+    }
+    return true;
+}
+
+bool test_suite(cl_device_id device, cl_uint size_t_width, const char *folder,
+                const char *test_name[], unsigned int number_of_tests,
+                const char *extension)
+{
+    // If the folder doesn't exist, we extract in from the archive.
+    try_extract(folder);
+
+    std::cout << "Running tests:" << std::endl;
+
+    OclExtensions deviceCapabilities = OclExtensions::getDeviceCapabilities(device);
+    unsigned int tests_passed = 0;
+    CounterEventHandler SuccE(tests_passed, number_of_tests);
+    std::list<std::string> ErrList;
+    for (unsigned int i = 0; i < number_of_tests; ++i)
+    {
+        AccumulatorEventHandler FailE(ErrList, test_name[i]);
+        if((strlen(extension) != 0) && (!is_extension_available(device, extension)))
+        {
+            (SuccE)(test_name[i], "");
+            std::cout << test_name[i] << "... Skipped. (Cannot run on device due to missing extension: " << extension << " )." << std::endl;
+            continue;
+        }
+        TestRunner testRunner(&SuccE, &FailE, deviceCapabilities);
+        testRunner.runBuildTest(device, folder, test_name[i], size_t_width);
+    }
+
+    std::cout << std::endl;
+    std::cout << "PASSED " << tests_passed << " of " << number_of_tests << " tests.\n" << std::endl;
+
+    if (!ErrList.empty())
+    {
+        std::cout << "Failed tests:" << std::endl;
+        std::for_each(ErrList.begin(), ErrList.end(), printError);
+    }
+    std::cout << std::endl;
+    return tests_passed == number_of_tests;
+}
+
+static std::string getTestFolder(const std::string& TS)
+{
+  const std::string DOUBLE("_double");
+  if (TS.size() < DOUBLE.size())
+    return TS;
+
+  const size_t prefixLen = TS.size() - DOUBLE.size();
+  const std::string postfix = TS.substr(prefixLen, DOUBLE.size());
+  if (DOUBLE == postfix)
+      return TS.substr(0, prefixLen);
+
+  return TS;
+}
+
+bool test_api (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "const_derived_d",
+        "const_scalar_d",
+        "const_vector16_d",
+        "const_vector2_d",
+        "const_vector3_d",
+        "const_vector4_d",
+        "const_vector8_d",
+        "constant_derived_p0",
+        "constant_derived_p1",
+        "constant_derived_restrict_p0",
+        "constant_derived_restrict_p1",
+        "constant_scalar_p0",
+        "constant_scalar_p1",
+        "constant_scalar_p2",
+        "constant_scalar_p3",
+        "constant_scalar_restrict_p0",
+        "constant_scalar_restrict_p1",
+        "constant_scalar_restrict_p2",
+        "constant_scalar_restrict_p3",
+        "constant_vector16_p0",
+        "constant_vector16_p1",
+        "constant_vector16_p2",
+        "constant_vector16_restrict_p0",
+        "constant_vector16_restrict_p2",
+        "constant_vector2_p0",
+        "constant_vector2_p1",
+        "constant_vector2_restrict_p0",
+        "constant_vector2_restrict_p1",
+        "constant_vector2_restrict_p2",
+        "constant_vector3_p0",
+        "constant_vector3_p1",
+        "constant_vector3_p2",
+        "constant_vector3_restrict_p0",
+        "constant_vector3_restrict_p1",
+        "constant_vector3_restrict_p2",
+        "constant_vector4_p0",
+        "constant_vector4_p1",
+        "constant_vector4_p2",
+        "constant_vector4_restrict_p0",
+        "constant_vector4_restrict_p1",
+        "constant_vector4_restrict_p2",
+        "constant_vector8_p0",
+        "constant_vector8_p1",
+        "constant_vector8_p2",
+        "constant_vector8_restrict_p0",
+        "constant_vector8_restrict_p1",
+        "constant_vector8_restrict_p2",
+        "derived_d",
+        "global_const_derived_p",
+        "global_const_derived_restrict_p",
+        "global_const_scalar_p",
+        "global_const_scalar_restrict_p",
+        "global_const_vector16_p",
+        "global_const_vector16_restrict_p",
+        "global_const_vector2_p",
+        "global_const_vector2_restrict_p",
+        "global_const_vector3_p",
+        "global_const_vector3_restrict_p",
+        "global_const_vector4_p",
+        "global_const_vector4_restrict_p",
+        "global_const_vector8_p",
+        "global_const_vector8_restrict_p",
+        "global_const_volatile_derived_p",
+        "global_const_volatile_derived_restrict_p",
+        "global_const_volatile_scalar_p",
+        "global_const_volatile_scalar_restrict_p",
+        "global_const_volatile_vector16_p",
+        "global_const_volatile_vector16_restrict_p",
+        "global_const_volatile_vector2_p",
+        "global_const_volatile_vector2_restrict_p",
+        "global_const_volatile_vector3_p",
+        "global_const_volatile_vector3_restrict_p",
+        "global_const_volatile_vector4_p",
+        "global_const_volatile_vector4_restrict_p",
+        "global_const_volatile_vector8_p",
+        "global_const_volatile_vector8_restrict_p",
+        "global_derived_p",
+        "global_derived_restrict_p",
+        "global_scalar_p",
+        "global_scalar_restrict_p",
+        "global_vector16_p",
+        "global_vector16_restrict_p",
+        "global_vector2_p",
+        "global_vector2_restrict_p",
+        "global_vector3_p",
+        "global_vector3_restrict_p",
+        "global_vector4_p",
+        "global_vector4_restrict_p",
+        "global_vector8_p",
+        "global_vector8_restrict_p",
+        "global_volatile_derived_p",
+        "global_volatile_derived_restrict_p",
+        "global_volatile_scalar_p",
+        "global_volatile_scalar_restrict_p",
+        "global_volatile_vector16_p",
+        "global_volatile_vector16_restrict_p",
+        "global_volatile_vector2_p",
+        "global_volatile_vector2_restrict_p",
+        "global_volatile_vector3_p",
+        "global_volatile_vector3_restrict_p",
+        "global_volatile_vector4_p",
+        "global_volatile_vector4_restrict_p",
+        "global_volatile_vector8_p",
+        "global_volatile_vector8_restrict_p",
+        "local_const_derived_p",
+        "local_const_derived_restrict_p",
+        "local_const_scalar_p",
+        "local_const_scalar_restrict_p",
+        "local_const_vector16_p",
+        "local_const_vector16_restrict_p",
+        "local_const_vector2_p",
+        "local_const_vector2_restrict_p",
+        "local_const_vector3_p",
+        "local_const_vector3_restrict_p",
+        "local_const_vector4_p",
+        "local_const_vector4_restrict_p",
+        "local_const_vector8_p",
+        "local_const_vector8_restrict_p",
+        "local_const_volatile_derived_p",
+        "local_const_volatile_derived_restrict_p",
+        "local_const_volatile_scalar_p",
+        "local_const_volatile_scalar_restrict_p",
+        "local_const_volatile_vector16_p",
+        "local_const_volatile_vector16_restrict_p",
+        "local_const_volatile_vector2_p",
+        "local_const_volatile_vector2_restrict_p",
+        "local_const_volatile_vector3_p",
+        "local_const_volatile_vector3_restrict_p",
+        "local_const_volatile_vector4_p",
+        "local_const_volatile_vector4_restrict_p",
+        "local_const_volatile_vector8_p",
+        "local_const_volatile_vector8_restrict_p",
+        "local_derived_p",
+        "local_derived_restrict_p",
+        "local_scalar_p",
+        "local_scalar_restrict_p",
+        "local_vector16_p",
+        "local_vector16_restrict_p",
+        "local_vector2_p",
+        "local_vector2_restrict_p",
+        "local_vector3_p",
+        "local_vector3_restrict_p",
+        "local_vector4_p",
+        "local_vector4_restrict_p",
+        "local_vector8_p",
+        "local_vector8_restrict_p",
+        "local_volatile_derived_p",
+        "local_volatile_derived_restrict_p",
+        "local_volatile_scalar_p",
+        "local_volatile_scalar_restrict_p",
+        "local_volatile_vector16_p",
+        "local_volatile_vector16_restrict_p",
+        "local_volatile_vector2_p",
+        "local_volatile_vector2_restrict_p",
+        "local_volatile_vector3_p",
+        "local_volatile_vector3_restrict_p",
+        "local_volatile_vector4_p",
+        "local_volatile_vector4_restrict_p",
+        "local_volatile_vector8_p",
+        "local_volatile_vector8_restrict_p",
+        "private_const_derived_d",
+        "private_const_scalar_d",
+        "private_const_vector16_d",
+        "private_const_vector2_d",
+        "private_const_vector3_d",
+        "private_const_vector4_d",
+        "private_const_vector8_d",
+        "private_derived_d",
+        "private_scalar_d",
+        "private_vector16_d",
+        "private_vector2_d",
+        "private_vector3_d",
+        "private_vector4_d",
+        "private_vector8_d",
+        "scalar_d",
+        "vector16_d",
+        "vector2_d",
+        "vector3_d",
+        "vector4_d",
+        "vector8_d",
+        "image_d",
+        "image_d_write_array",
+        "image_d_3d",
+        "sample_test.min_max_read_image_args",
+        "kernel_with_bool",
+        "bool_scalar_d",
+        "long_constant_scalar_p2",
+        "long_const_scalar_d",
+        "long_const_vector16_d",
+        "long_const_vector2_d",
+        "long_const_vector3_d",
+        "long_const_vector4_d",
+        "long_const_vector8_d",
+        "long_constant_scalar_p3",
+        "long_constant_scalar_restrict_p2",
+        "long_constant_scalar_restrict_p3",
+        "long_constant_vector16_p1",
+        "long_constant_vector16_restrict_p1",
+        "long_constant_vector2_p1",
+        "long_constant_vector2_restrict_p1",
+        "long_constant_vector3_p1",
+        "long_constant_vector3_restrict_p1",
+        "long_constant_vector4_p1",
+        "long_constant_vector4_restrict_p1",
+        "long_constant_vector8_p1",
+        "long_constant_vector8_restrict_p1",
+        "long_global_const_scalar_p",
+        "long_global_const_scalar_restrict_p",
+        "long_global_const_vector16_p",
+        "long_global_const_vector16_restrict_p",
+        "long_global_const_vector2_p",
+        "long_global_const_vector2_restrict_p",
+        "long_global_const_vector3_p",
+        "long_global_const_vector3_restrict_p",
+        "long_global_const_vector4_p",
+        "long_global_const_vector4_restrict_p",
+        "long_global_const_vector8_p",
+        "long_global_const_vector8_restrict_p",
+        "long_global_const_volatile_scalar_p",
+        "long_global_const_volatile_scalar_restrict_p",
+        "long_global_const_volatile_vector16_p",
+        "long_global_const_volatile_vector16_restrict_p",
+        "long_global_const_volatile_vector2_p",
+        "long_global_const_volatile_vector2_restrict_p",
+        "long_global_const_volatile_vector3_p",
+        "long_global_const_volatile_vector3_restrict_p",
+        "long_global_const_volatile_vector4_p",
+        "long_global_const_volatile_vector4_restrict_p",
+        "long_global_const_volatile_vector8_p",
+        "long_global_const_volatile_vector8_restrict_p",
+        "long_global_scalar_p",
+        "long_global_scalar_restrict_p",
+        "long_global_vector16_p",
+        "long_global_vector16_restrict_p",
+        "long_global_vector2_p",
+        "long_global_vector2_restrict_p",
+        "long_global_vector3_p",
+        "long_global_vector3_restrict_p",
+        "long_global_vector4_p",
+        "long_global_vector4_restrict_p",
+        "long_global_vector8_p",
+        "long_global_vector8_restrict_p",
+        "long_global_volatile_scalar_p",
+        "long_global_volatile_scalar_restrict_p",
+        "long_global_volatile_vector16_p",
+        "long_global_volatile_vector16_restrict_p",
+        "long_global_volatile_vector2_p",
+        "long_global_volatile_vector2_restrict_p",
+        "long_global_volatile_vector3_p",
+        "long_global_volatile_vector3_restrict_p",
+        "long_global_volatile_vector4_p",
+        "long_global_volatile_vector4_restrict_p",
+        "long_global_volatile_vector8_p",
+        "long_global_volatile_vector8_restrict_p",
+        "long_local_const_scalar_p",
+        "long_local_const_scalar_restrict_p",
+        "long_local_const_vector16_p",
+        "long_local_const_vector16_restrict_p",
+        "long_local_const_vector2_p",
+        "long_local_const_vector2_restrict_p",
+        "long_local_const_vector3_p",
+        "long_local_const_vector3_restrict_p",
+        "long_local_const_vector4_p",
+        "long_local_const_vector4_restrict_p",
+        "long_local_const_vector8_p",
+        "long_local_const_vector8_restrict_p",
+        "long_local_const_volatile_scalar_p",
+        "long_local_const_volatile_scalar_restrict_p",
+        "long_local_const_volatile_vector16_p",
+        "long_local_const_volatile_vector16_restrict_p",
+        "long_local_const_volatile_vector2_p",
+        "long_local_const_volatile_vector2_restrict_p",
+        "long_local_const_volatile_vector3_p",
+        "long_local_const_volatile_vector3_restrict_p",
+        "long_local_const_volatile_vector4_p",
+        "long_local_const_volatile_vector4_restrict_p",
+        "long_local_const_volatile_vector8_p",
+        "long_local_const_volatile_vector8_restrict_p",
+        "long_local_scalar_p",
+        "long_local_scalar_restrict_p",
+        "long_local_vector16_p",
+        "long_local_vector16_restrict_p",
+        "long_local_vector2_p",
+        "long_local_vector2_restrict_p",
+        "long_local_vector3_p",
+        "long_local_vector3_restrict_p",
+        "long_local_vector4_p",
+        "long_local_vector4_restrict_p",
+        "long_local_vector8_p",
+        "long_local_vector8_restrict_p",
+        "long_local_volatile_scalar_p",
+        "long_local_volatile_scalar_restrict_p",
+        "long_local_volatile_vector16_p",
+        "long_local_volatile_vector16_restrict_p",
+        "long_local_volatile_vector2_p",
+        "long_local_volatile_vector2_restrict_p",
+        "long_local_volatile_vector3_p",
+        "long_local_volatile_vector3_restrict_p",
+        "long_local_volatile_vector4_p",
+        "long_local_volatile_vector4_restrict_p",
+        "long_local_volatile_vector8_p",
+        "long_local_volatile_vector8_restrict_p",
+        "long_private_const_scalar_d",
+        "long_private_const_vector16_d",
+        "long_private_const_vector2_d",
+        "long_private_const_vector3_d",
+        "long_private_const_vector4_d",
+        "long_private_const_vector8_d",
+        "long_private_scalar_d",
+        "long_private_vector16_d",
+        "long_private_vector2_d",
+        "long_private_vector3_d",
+        "long_private_vector4_d",
+        "long_private_vector8_d",
+        "long_scalar_d",
+        "long_vector16_d",
+        "long_vector2_d",
+        "long_vector3_d",
+        "long_vector4_d",
+        "long_vector8_d",
+    };
+
+    log_info("test_api\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_api_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "double_scalar_p",
+        "double_scalar_p2",
+        "double_scalar_d",
+        "double_vector2_p",
+        "double_vector2_p2",
+        "double_vector2_d",
+        "double_vector3_p",
+        "double_vector3_p2",
+        "double_vector3_d",
+        "double_vector4_p",
+        "double_vector4_p2",
+        "double_vector4_d",
+        "double_vector8_p",
+        "double_vector8_p2",
+        "double_vector8_d",
+        "double_vector16_p",
+        "double_vector16_p2",
+        "double_vector16_d",
+    };
+
+    log_info("test_api_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_atomics (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test_atomic_fn.atomic_add_global_int",
+        "test_atomic_fn.atomic_add_global_uint",
+        "test_atomic_fn.atomic_sub_global_int",
+        "test_atomic_fn.atomic_sub_global_uint",
+        "test_atomic_fn.atomic_xchg_global_int",
+        "test_atomic_fn.atomic_xchg_global_uint",
+        "test_atomic_fn.atomic_xchg_global_float",
+        "test_atomic_fn.atomic_min_global_int",
+        "test_atomic_fn.atomic_min_global_uint",
+        "test_atomic_fn.atomic_max_global_int",
+        "test_atomic_fn.atomic_max_global_uint",
+        "test_atomic_fn.atomic_inc_global_int",
+        "test_atomic_fn.atomic_inc_global_uint",
+        "test_atomic_fn.atomic_dec_global_int",
+        "test_atomic_fn.atomic_dec_global_uint",
+        "test_atomic_fn.atomic_cmpxchg_global_int",
+        "test_atomic_fn.atomic_cmpxchg_global_uint",
+        "test_atomic_fn.atomic_and_global_int",
+        "test_atomic_fn.atomic_and_global_uint",
+        "test_atomic_fn.atomic_or_global_int",
+        "test_atomic_fn.atomic_or_global_uint",
+        "test_atomic_fn.atomic_xor_global_int",
+        "test_atomic_fn.atomic_xor_global_uint",
+        "test_atomic_fn.atomic_add_local_int",
+        "test_atomic_fn.atomic_add_local_uint",
+        "test_atomic_fn.atomic_sub_local_int",
+        "test_atomic_fn.atomic_sub_local_uint",
+        "test_atomic_fn.atomic_xchg_local_int",
+        "test_atomic_fn.atomic_xchg_local_uint",
+        "test_atomic_fn.atomic_xchg_local_float",
+        "test_atomic_fn.atomic_min_local_int",
+        "test_atomic_fn.atomic_min_local_uint",
+        "test_atomic_fn.atomic_max_local_int",
+        "test_atomic_fn.atomic_max_local_uint",
+        "test_atomic_fn.atomic_inc_local_int",
+        "test_atomic_fn.atomic_inc_local_uint",
+        "test_atomic_fn.atomic_dec_local_int",
+        "test_atomic_fn.atomic_dec_local_uint",
+        "test_atomic_fn.atomic_cmpxchg_local_int",
+        "test_atomic_fn.atomic_cmpxchg_local_uint",
+        "test_atomic_fn.atomic_and_local_int",
+        "test_atomic_fn.atomic_and_local_uint",
+        "test_atomic_fn.atomic_or_local_int",
+        "test_atomic_fn.atomic_or_local_uint",
+        "test_atomic_fn.atomic_xor_local_int",
+        "test_atomic_fn.atomic_xor_local_uint",
+    };
+
+    log_info("test_atomics\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_basic (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_kernel.work_item_functions",
+        "test_sizeof.sizeof_char",
+        "test_sizeof.sizeof_uchar",
+        "test_sizeof.sizeof_unsigned_char",
+        "test_sizeof.sizeof_short",
+        "test_sizeof.sizeof_ushort",
+        "test_sizeof.sizeof_unsigned_short",
+        "test_sizeof.sizeof_int",
+        "test_sizeof.sizeof_uint",
+        "test_sizeof.sizeof_unsigned_int",
+        "test_sizeof.sizeof_float",
+        "test_sizeof.sizeof_long",
+        "test_sizeof.sizeof_ulong",
+        "test_sizeof.sizeof_unsigned_long",
+        "test_sizeof.sizeof_char2",
+        "test_sizeof.sizeof_uchar2",
+        "test_sizeof.sizeof_short2",
+        "test_sizeof.sizeof_ushort2",
+        "test_sizeof.sizeof_int2",
+        "test_sizeof.sizeof_uint2",
+        "test_sizeof.sizeof_float2",
+        "test_sizeof.sizeof_long2",
+        "test_sizeof.sizeof_ulong2",
+        "test_sizeof.sizeof_char4",
+        "test_sizeof.sizeof_uchar4",
+        "test_sizeof.sizeof_short4",
+        "test_sizeof.sizeof_ushort4",
+        "test_sizeof.sizeof_int4",
+        "test_sizeof.sizeof_uint4",
+        "test_sizeof.sizeof_float4",
+        "test_sizeof.sizeof_long4",
+        "test_sizeof.sizeof_ulong4",
+        "test_sizeof.sizeof_char8",
+        "test_sizeof.sizeof_uchar8",
+        "test_sizeof.sizeof_short8",
+        "test_sizeof.sizeof_ushort8",
+        "test_sizeof.sizeof_int8",
+        "test_sizeof.sizeof_uint8",
+        "test_sizeof.sizeof_float8",
+        "test_sizeof.sizeof_long8",
+        "test_sizeof.sizeof_ulong8",
+        "test_sizeof.sizeof_char16",
+        "test_sizeof.sizeof_uchar16",
+        "test_sizeof.sizeof_short16",
+        "test_sizeof.sizeof_ushort16",
+        "test_sizeof.sizeof_int16",
+        "test_sizeof.sizeof_uint16",
+        "test_sizeof.sizeof_float16",
+        "test_sizeof.sizeof_long16",
+        "test_sizeof.sizeof_ulong16",
+        "test_sizeof.sizeof_void_p",
+        "test_sizeof.sizeof_size_t",
+        "test_sizeof.sizeof_sizeof_int",
+        "test_sizeof.sizeof_ptrdiff_t",
+        "test_sizeof.sizeof_intptr_t",
+        "test_sizeof.sizeof_uintptr_t",
+        "test_sizeof.sizeof_image2d_t",
+        "test_sizeof.sizeof_image3d_t",
+        "test_sizeof.sizeof_sampler_t",
+        "test_sizeof.sizeof_double",
+        "test_sizeof.sizeof_double2",
+        "test_sizeof.sizeof_double4",
+        "test_sizeof.sizeof_double8",
+        "test_sizeof.sizeof_double16",
+        "sample_test.vec_type_hint_char",
+        "sample_test.vec_type_hint_char2",
+        "sample_test.vec_type_hint_char4",
+        "sample_test.vec_type_hint_char8",
+        "sample_test.vec_type_hint_char16",
+        "sample_test.vec_type_hint_uchar",
+        "sample_test.vec_type_hint_uchar2",
+        "sample_test.vec_type_hint_uchar4",
+        "sample_test.vec_type_hint_uchar8",
+        "sample_test.vec_type_hint_uchar16",
+        "sample_test.vec_type_hint_short",
+        "sample_test.vec_type_hint_short2",
+        "sample_test.vec_type_hint_short4",
+        "sample_test.vec_type_hint_short8",
+        "sample_test.vec_type_hint_short16",
+        "sample_test.vec_type_hint_ushort",
+        "sample_test.vec_type_hint_ushort2",
+        "sample_test.vec_type_hint_ushort4",
+        "sample_test.vec_type_hint_ushort8",
+        "sample_test.vec_type_hint_ushort16",
+        "sample_test.vec_type_hint_int",
+        "sample_test.vec_type_hint_int2",
+        "sample_test.vec_type_hint_int4",
+        "sample_test.vec_type_hint_int8",
+        "sample_test.vec_type_hint_int16",
+        "sample_test.vec_type_hint_uint",
+        "sample_test.vec_type_hint_uint2",
+        "sample_test.vec_type_hint_uint4",
+        "sample_test.vec_type_hint_uint8",
+        "sample_test.vec_type_hint_uint16",
+        "sample_test.vec_type_hint_long",
+        "sample_test.vec_type_hint_long2",
+        "sample_test.vec_type_hint_long4",
+        "sample_test.vec_type_hint_long8",
+        "sample_test.vec_type_hint_long16",
+        "sample_test.vec_type_hint_ulong",
+        "sample_test.vec_type_hint_ulong2",
+        "sample_test.vec_type_hint_ulong4",
+        "sample_test.vec_type_hint_ulong8",
+        "sample_test.vec_type_hint_ulong16",
+        "sample_test.vec_type_hint_float",
+        "sample_test.vec_type_hint_float2",
+        "sample_test.vec_type_hint_float4",
+        "sample_test.vec_type_hint_float8",
+        "sample_test.vec_type_hint_float16",
+        "test.kernel_memory_alignment_private_char",
+        "test.kernel_memory_alignment_private_uchar",
+        "test.kernel_memory_alignment_private_short",
+        "test.kernel_memory_alignment_private_ushort",
+        "test.kernel_memory_alignment_private_int",
+        "test.kernel_memory_alignment_private_uint",
+        "test.kernel_memory_alignment_private_long",
+        "test.kernel_memory_alignment_private_ulong",
+        "test.kernel_memory_alignment_private_float",
+        "test_fn.vload_global_char2",
+        "test_fn.vload_global_char3",
+        "test_fn.vload_global_char4",
+        "test_fn.vload_global_char8",
+        "test_fn.vload_global_char16",
+        "test_fn.vload_global_uchar2",
+        "test_fn.vload_global_uchar3",
+        "test_fn.vload_global_uchar4",
+        "test_fn.vload_global_uchar8",
+        "test_fn.vload_global_uchar16",
+        "test_fn.vload_global_short2",
+        "test_fn.vload_global_short3",
+        "test_fn.vload_global_short4",
+        "test_fn.vload_global_short8",
+        "test_fn.vload_global_short16",
+        "test_fn.vload_global_ushort2",
+        "test_fn.vload_global_ushort3",
+        "test_fn.vload_global_ushort4",
+        "test_fn.vload_global_ushort8",
+        "test_fn.vload_global_ushort16",
+        "test_fn.vload_global_int2",
+        "test_fn.vload_global_int3",
+        "test_fn.vload_global_int4",
+        "test_fn.vload_global_int8",
+        "test_fn.vload_global_int16",
+        "test_fn.vload_global_uint2",
+        "test_fn.vload_global_uint3",
+        "test_fn.vload_global_uint4",
+        "test_fn.vload_global_uint8",
+        "test_fn.vload_global_uint16",
+        "test_fn.vload_global_long2",
+        "test_fn.vload_global_long3",
+        "test_fn.vload_global_long4",
+        "test_fn.vload_global_long8",
+        "test_fn.vload_global_long16",
+        "test_fn.vload_global_ulong2",
+        "test_fn.vload_global_ulong3",
+        "test_fn.vload_global_ulong4",
+        "test_fn.vload_global_ulong8",
+        "test_fn.vload_global_ulong16",
+        "test_fn.vload_global_float2",
+        "test_fn.vload_global_float3",
+        "test_fn.vload_global_float4",
+        "test_fn.vload_global_float8",
+        "test_fn.vload_global_float16",
+        "test_fn.vload_constant_char2",
+        "test_fn.vload_constant_char3",
+        "test_fn.vload_constant_char4",
+        "test_fn.vload_constant_char8",
+        "test_fn.vload_constant_char16",
+        "test_fn.vload_constant_uchar2",
+        "test_fn.vload_constant_uchar3",
+        "test_fn.vload_constant_uchar4",
+        "test_fn.vload_constant_uchar8",
+        "test_fn.vload_constant_uchar16",
+        "test_fn.vload_constant_short2",
+        "test_fn.vload_constant_short3",
+        "test_fn.vload_constant_short4",
+        "test_fn.vload_constant_short8",
+        "test_fn.vload_constant_short16",
+        "test_fn.vload_constant_ushort2",
+        "test_fn.vload_constant_ushort3",
+        "test_fn.vload_constant_ushort4",
+        "test_fn.vload_constant_ushort8",
+        "test_fn.vload_constant_ushort16",
+        "test_fn.vload_constant_int2",
+        "test_fn.vload_constant_int3",
+        "test_fn.vload_constant_int4",
+        "test_fn.vload_constant_int8",
+        "test_fn.vload_constant_int16",
+        "test_fn.vload_constant_uint2",
+        "test_fn.vload_constant_uint3",
+        "test_fn.vload_constant_uint4",
+        "test_fn.vload_constant_uint8",
+        "test_fn.vload_constant_uint16",
+        "test_fn.vload_constant_long2",
+        "test_fn.vload_constant_long3",
+        "test_fn.vload_constant_long4",
+        "test_fn.vload_constant_long8",
+        "test_fn.vload_constant_long16",
+        "test_fn.vload_constant_ulong2",
+        "test_fn.vload_constant_ulong3",
+        "test_fn.vload_constant_ulong4",
+        "test_fn.vload_constant_ulong8",
+        "test_fn.vload_constant_ulong16",
+        "test_fn.vload_constant_float2",
+        "test_fn.vload_constant_float3",
+        "test_fn.vload_constant_float4",
+        "test_fn.vload_constant_float8",
+        "test_fn.vload_constant_float16",
+        "test_fn.vload_private_char2",
+        "test_fn.vload_private_char3",
+        "test_fn.vload_private_char4",
+        "test_fn.vload_private_char8",
+        "test_fn.vload_private_char16",
+        "test_fn.vload_private_uchar2",
+        "test_fn.vload_private_uchar3",
+        "test_fn.vload_private_uchar4",
+        "test_fn.vload_private_uchar8",
+        "test_fn.vload_private_uchar16",
+        "test_fn.vload_private_short2",
+        "test_fn.vload_private_short3",
+        "test_fn.vload_private_short4",
+        "test_fn.vload_private_short8",
+        "test_fn.vload_private_short16",
+        "test_fn.vload_private_ushort2",
+        "test_fn.vload_private_ushort3",
+        "test_fn.vload_private_ushort4",
+        "test_fn.vload_private_ushort8",
+        "test_fn.vload_private_ushort16",
+        "test_fn.vload_private_int2",
+        "test_fn.vload_private_int3",
+        "test_fn.vload_private_int4",
+        "test_fn.vload_private_int8",
+        "test_fn.vload_private_int16",
+        "test_fn.vload_private_uint2",
+        "test_fn.vload_private_uint3",
+        "test_fn.vload_private_uint4",
+        "test_fn.vload_private_uint8",
+        "test_fn.vload_private_uint16",
+        "test_fn.vload_private_long2",
+        "test_fn.vload_private_long3",
+        "test_fn.vload_private_long4",
+        "test_fn.vload_private_long8",
+        "test_fn.vload_private_long16",
+        "test_fn.vload_private_ulong2",
+        "test_fn.vload_private_ulong3",
+        "test_fn.vload_private_ulong4",
+        "test_fn.vload_private_ulong8",
+        "test_fn.vload_private_ulong16",
+        "test_fn.vload_private_float2",
+        "test_fn.vload_private_float3",
+        "test_fn.vload_private_float4",
+        "test_fn.vload_private_float8",
+        "test_fn.vload_private_float16",
+        "test_fn.vload_local_char2",
+        "test_fn.vload_local_char3",
+        "test_fn.vload_local_char4",
+        "test_fn.vload_local_char8",
+        "test_fn.vload_local_char16",
+        "test_fn.vload_local_uchar2",
+        "test_fn.vload_local_uchar3",
+        "test_fn.vload_local_uchar4",
+        "test_fn.vload_local_uchar8",
+        "test_fn.vload_local_uchar16",
+        "test_fn.vload_local_short2",
+        "test_fn.vload_local_short3",
+        "test_fn.vload_local_short4",
+        "test_fn.vload_local_short8",
+        "test_fn.vload_local_short16",
+        "test_fn.vload_local_ushort2",
+        "test_fn.vload_local_ushort3",
+        "test_fn.vload_local_ushort4",
+        "test_fn.vload_local_ushort8",
+        "test_fn.vload_local_ushort16",
+        "test_fn.vload_local_int2",
+        "test_fn.vload_local_int3",
+        "test_fn.vload_local_int4",
+        "test_fn.vload_local_int8",
+        "test_fn.vload_local_int16",
+        "test_fn.vload_local_uint2",
+        "test_fn.vload_local_uint3",
+        "test_fn.vload_local_uint4",
+        "test_fn.vload_local_uint8",
+        "test_fn.vload_local_uint16",
+        "test_fn.vload_local_long2",
+        "test_fn.vload_local_long3",
+        "test_fn.vload_local_long4",
+        "test_fn.vload_local_long8",
+        "test_fn.vload_local_long16",
+        "test_fn.vload_local_ulong2",
+        "test_fn.vload_local_ulong3",
+        "test_fn.vload_local_ulong4",
+        "test_fn.vload_local_ulong8",
+        "test_fn.vload_local_ulong16",
+        "test_fn.vload_local_float2",
+        "test_fn.vload_local_float3",
+        "test_fn.vload_local_float4",
+        "test_fn.vload_local_float8",
+        "test_fn.vload_local_float16",
+        "test_fn.vstore_global_char2",
+        "test_fn.vstore_global_char3",
+        "test_fn.vstore_global_char4",
+        "test_fn.vstore_global_char8",
+        "test_fn.vstore_global_char16",
+        "test_fn.vstore_global_uchar2",
+        "test_fn.vstore_global_uchar3",
+        "test_fn.vstore_global_uchar4",
+        "test_fn.vstore_global_uchar8",
+        "test_fn.vstore_global_uchar16",
+        "test_fn.vstore_global_short2",
+        "test_fn.vstore_global_short3",
+        "test_fn.vstore_global_short4",
+        "test_fn.vstore_global_short8",
+        "test_fn.vstore_global_short16",
+        "test_fn.vstore_global_ushort2",
+        "test_fn.vstore_global_ushort3",
+        "test_fn.vstore_global_ushort4",
+        "test_fn.vstore_global_ushort8",
+        "test_fn.vstore_global_ushort16",
+        "test_fn.vstore_global_int2",
+        "test_fn.vstore_global_int3",
+        "test_fn.vstore_global_int4",
+        "test_fn.vstore_global_int8",
+        "test_fn.vstore_global_int16",
+        "test_fn.vstore_global_uint2",
+        "test_fn.vstore_global_uint3",
+        "test_fn.vstore_global_uint4",
+        "test_fn.vstore_global_uint8",
+        "test_fn.vstore_global_uint16",
+        "test_fn.vstore_global_long2",
+        "test_fn.vstore_global_long3",
+        "test_fn.vstore_global_long4",
+        "test_fn.vstore_global_long8",
+        "test_fn.vstore_global_long16",
+        "test_fn.vstore_global_ulong2",
+        "test_fn.vstore_global_ulong3",
+        "test_fn.vstore_global_ulong4",
+        "test_fn.vstore_global_ulong8",
+        "test_fn.vstore_global_ulong16",
+        "test_fn.vstore_global_float2",
+        "test_fn.vstore_global_float3",
+        "test_fn.vstore_global_float4",
+        "test_fn.vstore_global_float8",
+        "test_fn.vstore_global_float16",
+        "test_fn.vstore_local_char2",
+        "test_fn.vstore_local_char3",
+        "test_fn.vstore_local_char4",
+        "test_fn.vstore_local_char8",
+        "test_fn.vstore_local_char16",
+        "test_fn.vstore_local_uchar2",
+        "test_fn.vstore_local_uchar3",
+        "test_fn.vstore_local_uchar4",
+        "test_fn.vstore_local_uchar8",
+        "test_fn.vstore_local_uchar16",
+        "test_fn.vstore_local_short2",
+        "test_fn.vstore_local_short3",
+        "test_fn.vstore_local_short4",
+        "test_fn.vstore_local_short8",
+        "test_fn.vstore_local_short16",
+        "test_fn.vstore_local_ushort2",
+        "test_fn.vstore_local_ushort3",
+        "test_fn.vstore_local_ushort4",
+        "test_fn.vstore_local_ushort8",
+        "test_fn.vstore_local_ushort16",
+        "test_fn.vstore_local_int2",
+        "test_fn.vstore_local_int3",
+        "test_fn.vstore_local_int4",
+        "test_fn.vstore_local_int8",
+        "test_fn.vstore_local_int16",
+        "test_fn.vstore_local_uint2",
+        "test_fn.vstore_local_uint3",
+        "test_fn.vstore_local_uint4",
+        "test_fn.vstore_local_uint8",
+        "test_fn.vstore_local_uint16",
+        "test_fn.vstore_local_long2",
+        "test_fn.vstore_local_long3",
+        "test_fn.vstore_local_long4",
+        "test_fn.vstore_local_long8",
+        "test_fn.vstore_local_long16",
+        "test_fn.vstore_local_ulong2",
+        "test_fn.vstore_local_ulong3",
+        "test_fn.vstore_local_ulong4",
+        "test_fn.vstore_local_ulong8",
+        "test_fn.vstore_local_ulong16",
+        "test_fn.vstore_local_float2",
+        "test_fn.vstore_local_float3",
+        "test_fn.vstore_local_float4",
+        "test_fn.vstore_local_float8",
+        "test_fn.vstore_local_float16",
+        "test_fn.vstore_private_char2",
+        "test_fn.vstore_private_char3",
+        "test_fn.vstore_private_char4",
+        "test_fn.vstore_private_char8",
+        "test_fn.vstore_private_char16",
+        "test_fn.vstore_private_uchar2",
+        "test_fn.vstore_private_uchar3",
+        "test_fn.vstore_private_uchar4",
+        "test_fn.vstore_private_uchar8",
+        "test_fn.vstore_private_uchar16",
+        "test_fn.vstore_private_short2",
+        "test_fn.vstore_private_short3",
+        "test_fn.vstore_private_short4",
+        "test_fn.vstore_private_short8",
+        "test_fn.vstore_private_short16",
+        "test_fn.vstore_private_ushort2",
+        "test_fn.vstore_private_ushort3",
+        "test_fn.vstore_private_ushort4",
+        "test_fn.vstore_private_ushort8",
+        "test_fn.vstore_private_ushort16",
+        "test_fn.vstore_private_int2",
+        "test_fn.vstore_private_int3",
+        "test_fn.vstore_private_int4",
+        "test_fn.vstore_private_int8",
+        "test_fn.vstore_private_int16",
+        "test_fn.vstore_private_uint2",
+        "test_fn.vstore_private_uint3",
+        "test_fn.vstore_private_uint4",
+        "test_fn.vstore_private_uint8",
+        "test_fn.vstore_private_uint16",
+        "test_fn.vstore_private_long2",
+        "test_fn.vstore_private_long3",
+        "test_fn.vstore_private_long4",
+        "test_fn.vstore_private_long8",
+        "test_fn.vstore_private_long16",
+        "test_fn.vstore_private_ulong2",
+        "test_fn.vstore_private_ulong3",
+        "test_fn.vstore_private_ulong4",
+        "test_fn.vstore_private_ulong8",
+        "test_fn.vstore_private_ulong16",
+        "test_fn.vstore_private_float2",
+        "test_fn.vstore_private_float3",
+        "test_fn.vstore_private_float4",
+        "test_fn.vstore_private_float8",
+        "test_fn.vstore_private_float16",
+        "test_fn.async_copy_global_to_local_char",
+        "test_fn.async_copy_global_to_local_char2",
+        "test_fn.async_copy_global_to_local_char4",
+        "test_fn.async_copy_global_to_local_char8",
+        "test_fn.async_copy_global_to_local_char16",
+        "test_fn.async_copy_global_to_local_uchar",
+        "test_fn.async_copy_global_to_local_uchar2",
+        "test_fn.async_copy_global_to_local_uchar4",
+        "test_fn.async_copy_global_to_local_uchar8",
+        "test_fn.async_copy_global_to_local_uchar16",
+        "test_fn.async_copy_global_to_local_short",
+        "test_fn.async_copy_global_to_local_short2",
+        "test_fn.async_copy_global_to_local_short4",
+        "test_fn.async_copy_global_to_local_short8",
+        "test_fn.async_copy_global_to_local_short16",
+        "test_fn.async_copy_global_to_local_ushort",
+        "test_fn.async_copy_global_to_local_ushort2",
+        "test_fn.async_copy_global_to_local_ushort4",
+        "test_fn.async_copy_global_to_local_ushort8",
+        "test_fn.async_copy_global_to_local_ushort16",
+        "test_fn.async_copy_global_to_local_int",
+        "test_fn.async_copy_global_to_local_int2",
+        "test_fn.async_copy_global_to_local_int4",
+        "test_fn.async_copy_global_to_local_int8",
+        "test_fn.async_copy_global_to_local_int16",
+        "test_fn.async_copy_global_to_local_uint",
+        "test_fn.async_copy_global_to_local_uint2",
+        "test_fn.async_copy_global_to_local_uint4",
+        "test_fn.async_copy_global_to_local_uint8",
+        "test_fn.async_copy_global_to_local_uint16",
+        "test_fn.async_copy_global_to_local_long",
+        "test_fn.async_copy_global_to_local_long2",
+        "test_fn.async_copy_global_to_local_long4",
+        "test_fn.async_copy_global_to_local_long8",
+        "test_fn.async_copy_global_to_local_long16",
+        "test_fn.async_copy_global_to_local_ulong",
+        "test_fn.async_copy_global_to_local_ulong2",
+        "test_fn.async_copy_global_to_local_ulong4",
+        "test_fn.async_copy_global_to_local_ulong8",
+        "test_fn.async_copy_global_to_local_ulong16",
+        "test_fn.async_copy_global_to_local_float",
+        "test_fn.async_copy_global_to_local_float2",
+        "test_fn.async_copy_global_to_local_float4",
+        "test_fn.async_copy_global_to_local_float8",
+        "test_fn.async_copy_global_to_local_float16",
+        "test_fn.async_copy_global_to_local_double",
+        "test_fn.async_copy_global_to_local_double2",
+        "test_fn.async_copy_global_to_local_double4",
+        "test_fn.async_copy_global_to_local_double8",
+        "test_fn.async_copy_global_to_local_double16",
+        "test_fn.async_copy_local_to_global_char",
+        "test_fn.async_copy_local_to_global_char2",
+        "test_fn.async_copy_local_to_global_char4",
+        "test_fn.async_copy_local_to_global_char8",
+        "test_fn.async_copy_local_to_global_char16",
+        "test_fn.async_copy_local_to_global_uchar",
+        "test_fn.async_copy_local_to_global_uchar2",
+        "test_fn.async_copy_local_to_global_uchar4",
+        "test_fn.async_copy_local_to_global_uchar8",
+        "test_fn.async_copy_local_to_global_uchar16",
+        "test_fn.async_copy_local_to_global_short",
+        "test_fn.async_copy_local_to_global_short2",
+        "test_fn.async_copy_local_to_global_short4",
+        "test_fn.async_copy_local_to_global_short8",
+        "test_fn.async_copy_local_to_global_short16",
+        "test_fn.async_copy_local_to_global_ushort",
+        "test_fn.async_copy_local_to_global_ushort2",
+        "test_fn.async_copy_local_to_global_ushort4",
+        "test_fn.async_copy_local_to_global_ushort8",
+        "test_fn.async_copy_local_to_global_ushort16",
+        "test_fn.async_copy_local_to_global_int",
+        "test_fn.async_copy_local_to_global_int2",
+        "test_fn.async_copy_local_to_global_int4",
+        "test_fn.async_copy_local_to_global_int8",
+        "test_fn.async_copy_local_to_global_int16",
+        "test_fn.async_copy_local_to_global_uint",
+        "test_fn.async_copy_local_to_global_uint2",
+        "test_fn.async_copy_local_to_global_uint4",
+        "test_fn.async_copy_local_to_global_uint8",
+        "test_fn.async_copy_local_to_global_uint16",
+        "test_fn.async_copy_local_to_global_long",
+        "test_fn.async_copy_local_to_global_long2",
+        "test_fn.async_copy_local_to_global_long4",
+        "test_fn.async_copy_local_to_global_long8",
+        "test_fn.async_copy_local_to_global_long16",
+        "test_fn.async_copy_local_to_global_ulong",
+        "test_fn.async_copy_local_to_global_ulong2",
+        "test_fn.async_copy_local_to_global_ulong4",
+        "test_fn.async_copy_local_to_global_ulong8",
+        "test_fn.async_copy_local_to_global_ulong16",
+        "test_fn.async_copy_local_to_global_float",
+        "test_fn.async_copy_local_to_global_float2",
+        "test_fn.async_copy_local_to_global_float4",
+        "test_fn.async_copy_local_to_global_float8",
+        "test_fn.async_copy_local_to_global_float16",
+        "test_fn.async_strided_copy_global_to_local_char",
+        "test_fn.async_strided_copy_global_to_local_char2",
+        "test_fn.async_strided_copy_global_to_local_char4",
+        "test_fn.async_strided_copy_global_to_local_char8",
+        "test_fn.async_strided_copy_global_to_local_char16",
+        "test_fn.async_strided_copy_global_to_local_uchar",
+        "test_fn.async_strided_copy_global_to_local_uchar2",
+        "test_fn.async_strided_copy_global_to_local_uchar4",
+        "test_fn.async_strided_copy_global_to_local_uchar8",
+        "test_fn.async_strided_copy_global_to_local_uchar16",
+        "test_fn.async_strided_copy_global_to_local_short",
+        "test_fn.async_strided_copy_global_to_local_short2",
+        "test_fn.async_strided_copy_global_to_local_short4",
+        "test_fn.async_strided_copy_global_to_local_short8",
+        "test_fn.async_strided_copy_global_to_local_short16",
+        "test_fn.async_strided_copy_global_to_local_ushort",
+        "test_fn.async_strided_copy_global_to_local_ushort2",
+        "test_fn.async_strided_copy_global_to_local_ushort4",
+        "test_fn.async_strided_copy_global_to_local_ushort8",
+        "test_fn.async_strided_copy_global_to_local_ushort16",
+        "test_fn.async_strided_copy_global_to_local_int",
+        "test_fn.async_strided_copy_global_to_local_int2",
+        "test_fn.async_strided_copy_global_to_local_int4",
+        "test_fn.async_strided_copy_global_to_local_int8",
+        "test_fn.async_strided_copy_global_to_local_int16",
+        "test_fn.async_strided_copy_global_to_local_uint",
+        "test_fn.async_strided_copy_global_to_local_uint2",
+        "test_fn.async_strided_copy_global_to_local_uint4",
+        "test_fn.async_strided_copy_global_to_local_uint8",
+        "test_fn.async_strided_copy_global_to_local_uint16",
+        "test_fn.async_strided_copy_global_to_local_long",
+        "test_fn.async_strided_copy_global_to_local_long2",
+        "test_fn.async_strided_copy_global_to_local_long4",
+        "test_fn.async_strided_copy_global_to_local_long8",
+        "test_fn.async_strided_copy_global_to_local_long16",
+        "test_fn.async_strided_copy_global_to_local_ulong",
+        "test_fn.async_strided_copy_global_to_local_ulong2",
+        "test_fn.async_strided_copy_global_to_local_ulong4",
+        "test_fn.async_strided_copy_global_to_local_ulong8",
+        "test_fn.async_strided_copy_global_to_local_ulong16",
+        "test_fn.async_strided_copy_global_to_local_float",
+        "test_fn.async_strided_copy_global_to_local_float2",
+        "test_fn.async_strided_copy_global_to_local_float4",
+        "test_fn.async_strided_copy_global_to_local_float8",
+        "test_fn.async_strided_copy_global_to_local_float16",
+        "test_fn.async_strided_copy_local_to_global_char",
+        "test_fn.async_strided_copy_local_to_global_char2",
+        "test_fn.async_strided_copy_local_to_global_char4",
+        "test_fn.async_strided_copy_local_to_global_char8",
+        "test_fn.async_strided_copy_local_to_global_char16",
+        "test_fn.async_strided_copy_local_to_global_uchar",
+        "test_fn.async_strided_copy_local_to_global_uchar2",
+        "test_fn.async_strided_copy_local_to_global_uchar4",
+        "test_fn.async_strided_copy_local_to_global_uchar8",
+        "test_fn.async_strided_copy_local_to_global_uchar16",
+        "test_fn.async_strided_copy_local_to_global_short",
+        "test_fn.async_strided_copy_local_to_global_short2",
+        "test_fn.async_strided_copy_local_to_global_short4",
+        "test_fn.async_strided_copy_local_to_global_short8",
+        "test_fn.async_strided_copy_local_to_global_short16",
+        "test_fn.async_strided_copy_local_to_global_ushort",
+        "test_fn.async_strided_copy_local_to_global_ushort2",
+        "test_fn.async_strided_copy_local_to_global_ushort4",
+        "test_fn.async_strided_copy_local_to_global_ushort8",
+        "test_fn.async_strided_copy_local_to_global_ushort16",
+        "test_fn.async_strided_copy_local_to_global_int",
+        "test_fn.async_strided_copy_local_to_global_int2",
+        "test_fn.async_strided_copy_local_to_global_int4",
+        "test_fn.async_strided_copy_local_to_global_int8",
+        "test_fn.async_strided_copy_local_to_global_int16",
+        "test_fn.async_strided_copy_local_to_global_uint",
+        "test_fn.async_strided_copy_local_to_global_uint2",
+        "test_fn.async_strided_copy_local_to_global_uint4",
+        "test_fn.async_strided_copy_local_to_global_uint8",
+        "test_fn.async_strided_copy_local_to_global_uint16",
+        "test_fn.async_strided_copy_local_to_global_long",
+        "test_fn.async_strided_copy_local_to_global_long2",
+        "test_fn.async_strided_copy_local_to_global_long4",
+        "test_fn.async_strided_copy_local_to_global_long8",
+        "test_fn.async_strided_copy_local_to_global_long16",
+        "test_fn.async_strided_copy_local_to_global_ulong",
+        "test_fn.async_strided_copy_local_to_global_ulong2",
+        "test_fn.async_strided_copy_local_to_global_ulong4",
+        "test_fn.async_strided_copy_local_to_global_ulong8",
+        "test_fn.async_strided_copy_local_to_global_ulong16",
+        "test_fn.async_strided_copy_local_to_global_float",
+        "test_fn.async_strided_copy_local_to_global_float2",
+        "test_fn.async_strided_copy_local_to_global_float4",
+        "test_fn.async_strided_copy_local_to_global_float8",
+        "test_fn.async_strided_copy_local_to_global_float16",
+        "test_fn.prefetch_char",
+        "test_fn.prefetch_char2",
+        "test_fn.prefetch_char4",
+        "test_fn.prefetch_char8",
+        "test_fn.prefetch_char16",
+        "test_fn.prefetch_uchar",
+        "test_fn.prefetch_uchar2",
+        "test_fn.prefetch_uchar4",
+        "test_fn.prefetch_uchar8",
+        "test_fn.prefetch_uchar16",
+        "test_fn.prefetch_short",
+        "test_fn.prefetch_short2",
+        "test_fn.prefetch_short4",
+        "test_fn.prefetch_short8",
+        "test_fn.prefetch_short16",
+        "test_fn.prefetch_ushort",
+        "test_fn.prefetch_ushort2",
+        "test_fn.prefetch_ushort4",
+        "test_fn.prefetch_ushort8",
+        "test_fn.prefetch_ushort16",
+        "test_fn.prefetch_int",
+        "test_fn.prefetch_int2",
+        "test_fn.prefetch_int4",
+        "test_fn.prefetch_int8",
+        "test_fn.prefetch_int16",
+        "test_fn.prefetch_uint",
+        "test_fn.prefetch_uint2",
+        "test_fn.prefetch_uint4",
+        "test_fn.prefetch_uint8",
+        "test_fn.prefetch_uint16",
+        "test_fn.prefetch_long",
+        "test_fn.prefetch_long2",
+        "test_fn.prefetch_long4",
+        "test_fn.prefetch_long8",
+        "test_fn.prefetch_long16",
+        "test_fn.prefetch_ulong",
+        "test_fn.prefetch_ulong2",
+        "test_fn.prefetch_ulong4",
+        "test_fn.prefetch_ulong8",
+        "test_fn.prefetch_ulong16",
+        "test_fn.prefetch_float",
+        "test_fn.prefetch_float2",
+        "test_fn.prefetch_float4",
+        "test_fn.prefetch_float8",
+        "test_fn.prefetch_float16",
+    };
+
+    log_info("test_basic\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+bool test_basic_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_test.vec_type_hint_double",
+        "sample_test.vec_type_hint_double2",
+        "sample_test.vec_type_hint_double4",
+        "sample_test.vec_type_hint_double8",
+        "sample_test.vec_type_hint_double16",
+        "test.kernel_memory_alignment_private_double",
+        "test_fn.vload_global_double2",
+        "test_fn.vload_global_double3",
+        "test_fn.vload_global_double4",
+        "test_fn.vload_global_double8",
+        "test_fn.vload_global_double16",
+        "test_fn.vload_constant_double2",
+        "test_fn.vload_constant_double3",
+        "test_fn.vload_constant_double4",
+        "test_fn.vload_constant_double8",
+        "test_fn.vload_constant_double16",
+        "test_fn.vstore_global_double2",
+        "test_fn.vstore_global_double3",
+        "test_fn.vstore_global_double4",
+        "test_fn.vstore_global_double8",
+        "test_fn.vstore_global_double16",
+        "test_fn.vload_local_double2",
+        "test_fn.vload_local_double3",
+        "test_fn.vload_local_double4",
+        "test_fn.vload_local_double8",
+        "test_fn.vload_local_double16",
+        "test_fn.vstore_global_double2",
+        "test_fn.vstore_global_double3",
+        "test_fn.vstore_global_double4",
+        "test_fn.vstore_global_double8",
+        "test_fn.vstore_global_double16",
+        "test_fn.vstore_local_double2",
+        "test_fn.vstore_local_double3",
+        "test_fn.vstore_local_double4",
+        "test_fn.vstore_local_double8",
+        "test_fn.vstore_local_double16",
+        "test_fn.vstore_private_double2",
+        "test_fn.vstore_private_double3",
+        "test_fn.vstore_private_double4",
+        "test_fn.vstore_private_double8",
+        "test_fn.vstore_private_double16",
+        "test_fn.async_copy_local_to_global_double",
+        "test_fn.async_copy_local_to_global_double2",
+        "test_fn.async_copy_local_to_global_double4",
+        "test_fn.async_copy_local_to_global_double8",
+        "test_fn.async_copy_local_to_global_double16",
+        "test_fn.async_strided_copy_global_to_local_double",
+        "test_fn.async_strided_copy_global_to_local_double2",
+        "test_fn.async_strided_copy_global_to_local_double4",
+        "test_fn.async_strided_copy_global_to_local_double8",
+        "test_fn.async_strided_copy_global_to_local_double16",
+        "test_fn.async_strided_copy_local_to_global_double",
+        "test_fn.async_strided_copy_local_to_global_double2",
+        "test_fn.async_strided_copy_local_to_global_double4",
+        "test_fn.async_strided_copy_local_to_global_double8",
+        "test_fn.async_strided_copy_local_to_global_double16",
+        "test_fn.prefetch_double",
+        "test_fn.prefetch_double2",
+        "test_fn.prefetch_double4",
+        "test_fn.prefetch_double8",
+        "test_fn.prefetch_double16",
+    };
+
+    log_info("test_basic_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_commonfns (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test_clamp.test_clamp_float",
+        "test_clamp.test_clamp_float2",
+        "test_clamp.test_clamp_float4",
+        "test_clamp.test_clamp_float8",
+        "test_clamp.test_clamp_float16",
+        "test_clamp.test_clamp_float3",
+        "test_degrees",
+        "test_degrees2",
+        "test_degrees4",
+        "test_degrees8",
+        "test_degrees16",
+        "test_degrees3",
+        "test_fmax",
+        "test_fmax2",
+        "test_fmax4",
+        "test_fmax8",
+        "test_fmax16",
+        "test_fmax3",
+        "test_fmin",
+        "test_fmin2",
+        "test_fmin4",
+        "test_fmin8",
+        "test_fmin16",
+        "test_fmin3",
+        "test_fn.test_max_float",
+        "test_fn.test_max_float2",
+        "test_fn.test_max_float4",
+        "test_fn.test_max_float8",
+        "test_fn.test_max_float16",
+        "test_fn.test_max_float3",
+        "test_fn.test_min_float",
+        "test_fn.test_min_float2",
+        "test_fn.test_min_float4",
+        "test_fn.test_min_float8",
+        "test_fn.test_min_float16",
+        "test_fn.test_min_float3",
+        "test_mix",
+        "test_radians",
+        "test_radians2",
+        "test_radians4",
+        "test_radians8",
+        "test_radians16",
+        "test_radians3",
+        "test_step",
+        "test_step2",
+        "test_step4",
+        "test_step8",
+        "test_step16",
+        "test_step3",
+        "test_smoothstep",
+        "test_smoothstep2",
+        "test_smoothstep4",
+        "test_smoothstep8",
+        "test_smoothstep16",
+        "test_smoothstep3",
+        "test_sign",
+        "test_sign2",
+        "test_sign4",
+        "test_sign8",
+        "test_sign16",
+        "test_sign3",
+    };
+
+    log_info("test_commonfns\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_commonfns_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test_clamp.test_clamp_double",
+        "test_clamp.test_clamp_double2",
+        "test_clamp.test_clamp_double4",
+        "test_clamp.test_clamp_double8",
+        "test_clamp.test_clamp_double16",
+        "test_clamp.test_clamp_double3",
+        "test_degrees_double",
+        "test_degrees2_double",
+        "test_degrees4_double",
+        "test_degrees8_double",
+        "test_degrees16_double",
+        "test_degrees3_double",
+        "test_fn.test_max_double",
+        "test_fn.test_max_double2",
+        "test_fn.test_max_double4",
+        "test_fn.test_max_double8",
+        "test_fn.test_max_double16",
+        "test_fn.test_max_double3",
+        "test_fn.test_min_double",
+        "test_fn.test_min_double2",
+        "test_fn.test_min_double4",
+        "test_fn.test_min_double8",
+        "test_fn.test_min_double16",
+        "test_fn.test_min_double3",
+        "test_radians_double",
+        "test_radians2_double",
+        "test_radians4_double",
+        "test_radians8_double",
+        "test_radians16_double",
+        "test_radians3_double",
+        "test_step_double",
+        "test_step2_double",
+        "test_step4_double",
+        "test_step8_double",
+        "test_step16_double",
+        "test_step3_double",
+        "test_sign_double",
+        "test_sign2_double",
+        "test_sign4_double",
+        "test_sign8_double",
+        "test_sign16_double",
+        "test_sign3_double",
+    };
+
+    log_info("test_commonfns_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+bool test_conversions (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "convert2_type_roundingmode_type_f",
+        "convert3_type_roundingmode_type_f",
+        "convert4_type_roundingmode_type_f",
+        "convert8_type_roundingmode_type_f",
+        "convert16_type_roundingmode_type_f",
+        "test_implicit_uchar_uchar",
+        "test_convert_uchar_uchar",
+        "test_convert_uchar_rte_uchar",
+        "test_convert_uchar_rtp_uchar",
+        "test_convert_uchar_rtn_uchar",
+        "test_convert_uchar_rtz_uchar",
+        "test_convert_uchar_sat_uchar",
+        "test_convert_uchar_sat_rte_uchar",
+        "test_convert_uchar_sat_rtp_uchar",
+        "test_convert_uchar_sat_rtn_uchar",
+        "test_convert_uchar_sat_rtz_uchar",
+        "test_implicit_uchar_char",
+        "test_convert_uchar_char",
+        "test_convert_uchar_rte_char",
+        "test_convert_uchar_rtp_char",
+        "test_convert_uchar_rtn_char",
+        "test_convert_uchar_rtz_char",
+        "test_convert_uchar_sat_char",
+        "test_convert_uchar_sat_rte_char",
+        "test_convert_uchar_sat_rtp_char",
+        "test_convert_uchar_sat_rtn_char",
+        "test_convert_uchar_sat_rtz_char",
+        "test_implicit_uchar_ushort",
+        "test_convert_uchar_ushort",
+        "test_convert_uchar_rte_ushort",
+        "test_convert_uchar_rtp_ushort",
+        "test_convert_uchar_rtn_ushort",
+        "test_convert_uchar_rtz_ushort",
+        "test_convert_uchar_sat_ushort",
+        "test_convert_uchar_sat_rte_ushort",
+        "test_convert_uchar_sat_rtp_ushort",
+        "test_convert_uchar_sat_rtn_ushort",
+        "test_convert_uchar_sat_rtz_ushort",
+        "test_implicit_uchar_short",
+        "test_convert_uchar_short",
+        "test_convert_uchar_rte_short",
+        "test_convert_uchar_rtp_short",
+        "test_convert_uchar_rtn_short",
+        "test_convert_uchar_rtz_short",
+        "test_convert_uchar_sat_short",
+        "test_convert_uchar_sat_rte_short",
+        "test_convert_uchar_sat_rtp_short",
+        "test_convert_uchar_sat_rtn_short",
+        "test_convert_uchar_sat_rtz_short",
+        "test_implicit_uchar_uint",
+        "test_convert_uchar_uint",
+        "test_convert_uchar_rte_uint",
+        "test_convert_uchar_rtp_uint",
+        "test_convert_uchar_rtn_uint",
+        "test_convert_uchar_rtz_uint",
+        "test_convert_uchar_sat_uint",
+        "test_convert_uchar_sat_rte_uint",
+        "test_convert_uchar_sat_rtp_uint",
+        "test_convert_uchar_sat_rtn_uint",
+        "test_convert_uchar_sat_rtz_uint",
+        "test_implicit_uchar_int",
+        "test_convert_uchar_int",
+        "test_convert_uchar_rte_int",
+        "test_convert_uchar_rtp_int",
+        "test_convert_uchar_rtn_int",
+        "test_convert_uchar_rtz_int",
+        "test_convert_uchar_sat_int",
+        "test_convert_uchar_sat_rte_int",
+        "test_convert_uchar_sat_rtp_int",
+        "test_convert_uchar_sat_rtn_int",
+        "test_convert_uchar_sat_rtz_int",
+        "test_implicit_uchar_float",
+        "test_convert_uchar_float",
+        "test_convert_uchar_rte_float",
+        "test_convert_uchar_rtp_float",
+        "test_convert_uchar_rtn_float",
+        "test_convert_uchar_rtz_float",
+        "test_convert_uchar_sat_float",
+        "test_convert_uchar_sat_rte_float",
+        "test_convert_uchar_sat_rtp_float",
+        "test_convert_uchar_sat_rtn_float",
+        "test_convert_uchar_sat_rtz_float",
+        "test_implicit_uchar_ulong",
+        "test_convert_uchar_ulong",
+        "test_convert_uchar_rte_ulong",
+        "test_convert_uchar_rtp_ulong",
+        "test_convert_uchar_rtn_ulong",
+        "test_convert_uchar_rtz_ulong",
+        "test_convert_uchar_sat_ulong",
+        "test_convert_uchar_sat_rte_ulong",
+        "test_convert_uchar_sat_rtp_ulong",
+        "test_convert_uchar_sat_rtn_ulong",
+        "test_convert_uchar_sat_rtz_ulong",
+        "test_implicit_uchar_long",
+        "test_convert_uchar_long",
+        "test_convert_uchar_rte_long",
+        "test_convert_uchar_rtp_long",
+        "test_convert_uchar_rtn_long",
+        "test_convert_uchar_rtz_long",
+        "test_convert_uchar_sat_long",
+        "test_convert_uchar_sat_rte_long",
+        "test_convert_uchar_sat_rtp_long",
+        "test_convert_uchar_sat_rtn_long",
+        "test_convert_uchar_sat_rtz_long",
+        "test_implicit_char_uchar",
+        "test_convert_char_uchar",
+        "test_convert_char_rte_uchar",
+        "test_convert_char_rtp_uchar",
+        "test_convert_char_rtn_uchar",
+        "test_convert_char_rtz_uchar",
+        "test_convert_char_sat_uchar",
+        "test_convert_char_sat_rte_uchar",
+        "test_convert_char_sat_rtp_uchar",
+        "test_convert_char_sat_rtn_uchar",
+        "test_convert_char_sat_rtz_uchar",
+        "test_implicit_char_char",
+        "test_convert_char_char",
+        "test_convert_char_rte_char",
+        "test_convert_char_rtp_char",
+        "test_convert_char_rtn_char",
+        "test_convert_char_rtz_char",
+        "test_convert_char_sat_char",
+        "test_convert_char_sat_rte_char",
+        "test_convert_char_sat_rtp_char",
+        "test_convert_char_sat_rtn_char",
+        "test_convert_char_sat_rtz_char",
+        "test_implicit_char_ushort",
+        "test_convert_char_ushort",
+        "test_convert_char_rte_ushort",
+        "test_convert_char_rtp_ushort",
+        "test_convert_char_rtn_ushort",
+        "test_convert_char_rtz_ushort",
+        "test_convert_char_sat_ushort",
+        "test_convert_char_sat_rte_ushort",
+        "test_convert_char_sat_rtp_ushort",
+        "test_convert_char_sat_rtn_ushort",
+        "test_convert_char_sat_rtz_ushort",
+        "test_implicit_char_short",
+        "test_convert_char_short",
+        "test_convert_char_rte_short",
+        "test_convert_char_rtp_short",
+        "test_convert_char_rtn_short",
+        "test_convert_char_rtz_short",
+        "test_convert_char_sat_short",
+        "test_convert_char_sat_rte_short",
+        "test_convert_char_sat_rtp_short",
+        "test_convert_char_sat_rtn_short",
+        "test_convert_char_sat_rtz_short",
+        "test_implicit_char_uint",
+        "test_convert_char_uint",
+        "test_convert_char_rte_uint",
+        "test_convert_char_rtp_uint",
+        "test_convert_char_rtn_uint",
+        "test_convert_char_rtz_uint",
+        "test_convert_char_sat_uint",
+        "test_convert_char_sat_rte_uint",
+        "test_convert_char_sat_rtp_uint",
+        "test_convert_char_sat_rtn_uint",
+        "test_convert_char_sat_rtz_uint",
+        "test_implicit_char_int",
+        "test_convert_char_int",
+        "test_convert_char_rte_int",
+        "test_convert_char_rtp_int",
+        "test_convert_char_rtn_int",
+        "test_convert_char_rtz_int",
+        "test_convert_char_sat_int",
+        "test_convert_char_sat_rte_int",
+        "test_convert_char_sat_rtp_int",
+        "test_convert_char_sat_rtn_int",
+        "test_convert_char_sat_rtz_int",
+        "test_implicit_char_float",
+        "test_convert_char_float",
+        "test_convert_char_rte_float",
+        "test_convert_char_rtp_float",
+        "test_convert_char_rtn_float",
+        "test_convert_char_rtz_float",
+        "test_convert_char_sat_float",
+        "test_convert_char_sat_rte_float",
+        "test_convert_char_sat_rtp_float",
+        "test_convert_char_sat_rtn_float",
+        "test_convert_char_sat_rtz_float",
+        "test_implicit_char_ulong",
+        "test_convert_char_ulong",
+        "test_convert_char_rte_ulong",
+        "test_convert_char_rtp_ulong",
+        "test_convert_char_rtn_ulong",
+        "test_convert_char_rtz_ulong",
+        "test_convert_char_sat_ulong",
+        "test_convert_char_sat_rte_ulong",
+        "test_convert_char_sat_rtp_ulong",
+        "test_convert_char_sat_rtn_ulong",
+        "test_convert_char_sat_rtz_ulong",
+        "test_implicit_char_long",
+        "test_convert_char_long",
+        "test_convert_char_rte_long",
+        "test_convert_char_rtp_long",
+        "test_convert_char_rtn_long",
+        "test_convert_char_rtz_long",
+        "test_convert_char_sat_long",
+        "test_convert_char_sat_rte_long",
+        "test_convert_char_sat_rtp_long",
+        "test_convert_char_sat_rtn_long",
+        "test_convert_char_sat_rtz_long",
+        "test_implicit_ushort_uchar",
+        "test_convert_ushort_uchar",
+        "test_convert_ushort_rte_uchar",
+        "test_convert_ushort_rtp_uchar",
+        "test_convert_ushort_rtn_uchar",
+        "test_convert_ushort_rtz_uchar",
+        "test_convert_ushort_sat_uchar",
+        "test_convert_ushort_sat_rte_uchar",
+        "test_convert_ushort_sat_rtp_uchar",
+        "test_convert_ushort_sat_rtn_uchar",
+        "test_convert_ushort_sat_rtz_uchar",
+        "test_implicit_ushort_char",
+        "test_convert_ushort_char",
+        "test_convert_ushort_rte_char",
+        "test_convert_ushort_rtp_char",
+        "test_convert_ushort_rtn_char",
+        "test_convert_ushort_rtz_char",
+        "test_convert_ushort_sat_char",
+        "test_convert_ushort_sat_rte_char",
+        "test_convert_ushort_sat_rtp_char",
+        "test_convert_ushort_sat_rtn_char",
+        "test_convert_ushort_sat_rtz_char",
+        "test_implicit_ushort_ushort",
+        "test_convert_ushort_ushort",
+        "test_convert_ushort_rte_ushort",
+        "test_convert_ushort_rtp_ushort",
+        "test_convert_ushort_rtn_ushort",
+        "test_convert_ushort_rtz_ushort",
+        "test_convert_ushort_sat_ushort",
+        "test_convert_ushort_sat_rte_ushort",
+        "test_convert_ushort_sat_rtp_ushort",
+        "test_convert_ushort_sat_rtn_ushort",
+        "test_convert_ushort_sat_rtz_ushort",
+        "test_implicit_ushort_short",
+        "test_convert_ushort_short",
+        "test_convert_ushort_rte_short",
+        "test_convert_ushort_rtp_short",
+        "test_convert_ushort_rtn_short",
+        "test_convert_ushort_rtz_short",
+        "test_convert_ushort_sat_short",
+        "test_convert_ushort_sat_rte_short",
+        "test_convert_ushort_sat_rtp_short",
+        "test_convert_ushort_sat_rtn_short",
+        "test_convert_ushort_sat_rtz_short",
+        "test_implicit_ushort_uint",
+        "test_convert_ushort_uint",
+        "test_convert_ushort_rte_uint",
+        "test_convert_ushort_rtp_uint",
+        "test_convert_ushort_rtn_uint",
+        "test_convert_ushort_rtz_uint",
+        "test_convert_ushort_sat_uint",
+        "test_convert_ushort_sat_rte_uint",
+        "test_convert_ushort_sat_rtp_uint",
+        "test_convert_ushort_sat_rtn_uint",
+        "test_convert_ushort_sat_rtz_uint",
+        "test_implicit_ushort_int",
+        "test_convert_ushort_int",
+        "test_convert_ushort_rte_int",
+        "test_convert_ushort_rtp_int",
+        "test_convert_ushort_rtn_int",
+        "test_convert_ushort_rtz_int",
+        "test_convert_ushort_sat_int",
+        "test_convert_ushort_sat_rte_int",
+        "test_convert_ushort_sat_rtp_int",
+        "test_convert_ushort_sat_rtn_int",
+        "test_convert_ushort_sat_rtz_int",
+        "test_implicit_ushort_float",
+        "test_convert_ushort_float",
+        "test_convert_ushort_rte_float",
+        "test_convert_ushort_rtp_float",
+        "test_convert_ushort_rtn_float",
+        "test_convert_ushort_rtz_float",
+        "test_convert_ushort_sat_float",
+        "test_convert_ushort_sat_rte_float",
+        "test_convert_ushort_sat_rtp_float",
+        "test_convert_ushort_sat_rtn_float",
+        "test_convert_ushort_sat_rtz_float",
+        "test_implicit_ushort_ulong",
+        "test_convert_ushort_ulong",
+        "test_convert_ushort_rte_ulong",
+        "test_convert_ushort_rtp_ulong",
+        "test_convert_ushort_rtn_ulong",
+        "test_convert_ushort_rtz_ulong",
+        "test_convert_ushort_sat_ulong",
+        "test_convert_ushort_sat_rte_ulong",
+        "test_convert_ushort_sat_rtp_ulong",
+        "test_convert_ushort_sat_rtn_ulong",
+        "test_convert_ushort_sat_rtz_ulong",
+        "test_implicit_ushort_long",
+        "test_convert_ushort_long",
+        "test_convert_ushort_rte_long",
+        "test_convert_ushort_rtp_long",
+        "test_convert_ushort_rtn_long",
+        "test_convert_ushort_rtz_long",
+        "test_convert_ushort_sat_long",
+        "test_convert_ushort_sat_rte_long",
+        "test_convert_ushort_sat_rtp_long",
+        "test_convert_ushort_sat_rtn_long",
+        "test_convert_ushort_sat_rtz_long",
+        "test_implicit_short_uchar",
+        "test_convert_short_uchar",
+        "test_convert_short_rte_uchar",
+        "test_convert_short_rtp_uchar",
+        "test_convert_short_rtn_uchar",
+        "test_convert_short_rtz_uchar",
+        "test_convert_short_sat_uchar",
+        "test_convert_short_sat_rte_uchar",
+        "test_convert_short_sat_rtp_uchar",
+        "test_convert_short_sat_rtn_uchar",
+        "test_convert_short_sat_rtz_uchar",
+        "test_implicit_short_char",
+        "test_convert_short_char",
+        "test_convert_short_rte_char",
+        "test_convert_short_rtp_char",
+        "test_convert_short_rtn_char",
+        "test_convert_short_rtz_char",
+        "test_convert_short_sat_char",
+        "test_convert_short_sat_rte_char",
+        "test_convert_short_sat_rtp_char",
+        "test_convert_short_sat_rtn_char",
+        "test_convert_short_sat_rtz_char",
+        "test_implicit_short_ushort",
+        "test_convert_short_ushort",
+        "test_convert_short_rte_ushort",
+        "test_convert_short_rtp_ushort",
+        "test_convert_short_rtn_ushort",
+        "test_convert_short_rtz_ushort",
+        "test_convert_short_sat_ushort",
+        "test_convert_short_sat_rte_ushort",
+        "test_convert_short_sat_rtp_ushort",
+        "test_convert_short_sat_rtn_ushort",
+        "test_convert_short_sat_rtz_ushort",
+        "test_implicit_short_short",
+        "test_convert_short_short",
+        "test_convert_short_rte_short",
+        "test_convert_short_rtp_short",
+        "test_convert_short_rtn_short",
+        "test_convert_short_rtz_short",
+        "test_convert_short_sat_short",
+        "test_convert_short_sat_rte_short",
+        "test_convert_short_sat_rtp_short",
+        "test_convert_short_sat_rtn_short",
+        "test_convert_short_sat_rtz_short",
+        "test_implicit_short_uint",
+        "test_convert_short_uint",
+        "test_convert_short_rte_uint",
+        "test_convert_short_rtp_uint",
+        "test_convert_short_rtn_uint",
+        "test_convert_short_rtz_uint",
+        "test_convert_short_sat_uint",
+        "test_convert_short_sat_rte_uint",
+        "test_convert_short_sat_rtp_uint",
+        "test_convert_short_sat_rtn_uint",
+        "test_convert_short_sat_rtz_uint",
+        "test_implicit_short_int",
+        "test_convert_short_int",
+        "test_convert_short_rte_int",
+        "test_convert_short_rtp_int",
+        "test_convert_short_rtn_int",
+        "test_convert_short_rtz_int",
+        "test_convert_short_sat_int",
+        "test_convert_short_sat_rte_int",
+        "test_convert_short_sat_rtp_int",
+        "test_convert_short_sat_rtn_int",
+        "test_convert_short_sat_rtz_int",
+        "test_implicit_short_float",
+        "test_convert_short_float",
+        "test_convert_short_rte_float",
+        "test_convert_short_rtp_float",
+        "test_convert_short_rtn_float",
+        "test_convert_short_rtz_float",
+        "test_convert_short_sat_float",
+        "test_convert_short_sat_rte_float",
+        "test_convert_short_sat_rtp_float",
+        "test_convert_short_sat_rtn_float",
+        "test_convert_short_sat_rtz_float",
+        "test_implicit_short_ulong",
+        "test_convert_short_ulong",
+        "test_convert_short_rte_ulong",
+        "test_convert_short_rtp_ulong",
+        "test_convert_short_rtn_ulong",
+        "test_convert_short_rtz_ulong",
+        "test_convert_short_sat_ulong",
+        "test_convert_short_sat_rte_ulong",
+        "test_convert_short_sat_rtp_ulong",
+        "test_convert_short_sat_rtn_ulong",
+        "test_convert_short_sat_rtz_ulong",
+        "test_implicit_short_long",
+        "test_convert_short_long",
+        "test_convert_short_rte_long",
+        "test_convert_short_rtp_long",
+        "test_convert_short_rtn_long",
+        "test_convert_short_rtz_long",
+        "test_convert_short_sat_long",
+        "test_convert_short_sat_rte_long",
+        "test_convert_short_sat_rtp_long",
+        "test_convert_short_sat_rtn_long",
+        "test_convert_short_sat_rtz_long",
+        "test_implicit_uint_uchar",
+        "test_convert_uint_uchar",
+        "test_convert_uint_rte_uchar",
+        "test_convert_uint_rtp_uchar",
+        "test_convert_uint_rtn_uchar",
+        "test_convert_uint_rtz_uchar",
+        "test_convert_uint_sat_uchar",
+        "test_convert_uint_sat_rte_uchar",
+        "test_convert_uint_sat_rtp_uchar",
+        "test_convert_uint_sat_rtn_uchar",
+        "test_convert_uint_sat_rtz_uchar",
+        "test_implicit_uint_char",
+        "test_convert_uint_char",
+        "test_convert_uint_rte_char",
+        "test_convert_uint_rtp_char",
+        "test_convert_uint_rtn_char",
+        "test_convert_uint_rtz_char",
+        "test_convert_uint_sat_char",
+        "test_convert_uint_sat_rte_char",
+        "test_convert_uint_sat_rtp_char",
+        "test_convert_uint_sat_rtn_char",
+        "test_convert_uint_sat_rtz_char",
+        "test_implicit_uint_ushort",
+        "test_convert_uint_ushort",
+        "test_convert_uint_rte_ushort",
+        "test_convert_uint_rtp_ushort",
+        "test_convert_uint_rtn_ushort",
+        "test_convert_uint_rtz_ushort",
+        "test_convert_uint_sat_ushort",
+        "test_convert_uint_sat_rte_ushort",
+        "test_convert_uint_sat_rtp_ushort",
+        "test_convert_uint_sat_rtn_ushort",
+        "test_convert_uint_sat_rtz_ushort",
+        "test_implicit_uint_short",
+        "test_convert_uint_short",
+        "test_convert_uint_rte_short",
+        "test_convert_uint_rtp_short",
+        "test_convert_uint_rtn_short",
+        "test_convert_uint_rtz_short",
+        "test_convert_uint_sat_short",
+        "test_convert_uint_sat_rte_short",
+        "test_convert_uint_sat_rtp_short",
+        "test_convert_uint_sat_rtn_short",
+        "test_convert_uint_sat_rtz_short",
+        "test_implicit_uint_uint",
+        "test_convert_uint_uint",
+        "test_convert_uint_rte_uint",
+        "test_convert_uint_rtp_uint",
+        "test_convert_uint_rtn_uint",
+        "test_convert_uint_rtz_uint",
+        "test_convert_uint_sat_uint",
+        "test_convert_uint_sat_rte_uint",
+        "test_convert_uint_sat_rtp_uint",
+        "test_convert_uint_sat_rtn_uint",
+        "test_convert_uint_sat_rtz_uint",
+        "test_implicit_uint_int",
+        "test_convert_uint_int",
+        "test_convert_uint_rte_int",
+        "test_convert_uint_rtp_int",
+        "test_convert_uint_rtn_int",
+        "test_convert_uint_rtz_int",
+        "test_convert_uint_sat_int",
+        "test_convert_uint_sat_rte_int",
+        "test_convert_uint_sat_rtp_int",
+        "test_convert_uint_sat_rtn_int",
+        "test_convert_uint_sat_rtz_int",
+        "test_implicit_uint_float",
+        "test_convert_uint_float",
+        "test_convert_uint_rte_float",
+        "test_convert_uint_rtp_float",
+        "test_convert_uint_rtn_float",
+        "test_convert_uint_rtz_float",
+        "test_convert_uint_sat_float",
+        "test_convert_uint_sat_rte_float",
+        "test_convert_uint_sat_rtp_float",
+        "test_convert_uint_sat_rtn_float",
+        "test_convert_uint_sat_rtz_float",
+        "test_implicit_uint_ulong",
+        "test_convert_uint_ulong",
+        "test_convert_uint_rte_ulong",
+        "test_convert_uint_rtp_ulong",
+        "test_convert_uint_rtn_ulong",
+        "test_convert_uint_rtz_ulong",
+        "test_convert_uint_sat_ulong",
+        "test_convert_uint_sat_rte_ulong",
+        "test_convert_uint_sat_rtp_ulong",
+        "test_convert_uint_sat_rtn_ulong",
+        "test_convert_uint_sat_rtz_ulong",
+        "test_implicit_uint_long",
+        "test_convert_uint_long",
+        "test_convert_uint_rte_long",
+        "test_convert_uint_rtp_long",
+        "test_convert_uint_rtn_long",
+        "test_convert_uint_rtz_long",
+        "test_convert_uint_sat_long",
+        "test_convert_uint_sat_rte_long",
+        "test_convert_uint_sat_rtp_long",
+        "test_convert_uint_sat_rtn_long",
+        "test_convert_uint_sat_rtz_long",
+        "test_implicit_int_uchar",
+        "test_convert_int_uchar",
+        "test_convert_int_rte_uchar",
+        "test_convert_int_rtp_uchar",
+        "test_convert_int_rtn_uchar",
+        "test_convert_int_rtz_uchar",
+        "test_convert_int_sat_uchar",
+        "test_convert_int_sat_rte_uchar",
+        "test_convert_int_sat_rtp_uchar",
+        "test_convert_int_sat_rtn_uchar",
+        "test_convert_int_sat_rtz_uchar",
+        "test_implicit_int_char",
+        "test_convert_int_char",
+        "test_convert_int_rte_char",
+        "test_convert_int_rtp_char",
+        "test_convert_int_rtn_char",
+        "test_convert_int_rtz_char",
+        "test_convert_int_sat_char",
+        "test_convert_int_sat_rte_char",
+        "test_convert_int_sat_rtp_char",
+        "test_convert_int_sat_rtn_char",
+        "test_convert_int_sat_rtz_char",
+        "test_implicit_int_ushort",
+        "test_convert_int_ushort",
+        "test_convert_int_rte_ushort",
+        "test_convert_int_rtp_ushort",
+        "test_convert_int_rtn_ushort",
+        "test_convert_int_rtz_ushort",
+        "test_convert_int_sat_ushort",
+        "test_convert_int_sat_rte_ushort",
+        "test_convert_int_sat_rtp_ushort",
+        "test_convert_int_sat_rtn_ushort",
+        "test_convert_int_sat_rtz_ushort",
+        "test_implicit_int_short",
+        "test_convert_int_short",
+        "test_convert_int_rte_short",
+        "test_convert_int_rtp_short",
+        "test_convert_int_rtn_short",
+        "test_convert_int_rtz_short",
+        "test_convert_int_sat_short",
+        "test_convert_int_sat_rte_short",
+        "test_convert_int_sat_rtp_short",
+        "test_convert_int_sat_rtn_short",
+        "test_convert_int_sat_rtz_short",
+        "test_implicit_int_uint",
+        "test_convert_int_uint",
+        "test_convert_int_rte_uint",
+        "test_convert_int_rtp_uint",
+        "test_convert_int_rtn_uint",
+        "test_convert_int_rtz_uint",
+        "test_convert_int_sat_uint",
+        "test_convert_int_sat_rte_uint",
+        "test_convert_int_sat_rtp_uint",
+        "test_convert_int_sat_rtn_uint",
+        "test_convert_int_sat_rtz_uint",
+        "test_implicit_int_int",
+        "test_convert_int_int",
+        "test_convert_int_rte_int",
+        "test_convert_int_rtp_int",
+        "test_convert_int_rtn_int",
+        "test_convert_int_rtz_int",
+        "test_convert_int_sat_int",
+        "test_convert_int_sat_rte_int",
+        "test_convert_int_sat_rtp_int",
+        "test_convert_int_sat_rtn_int",
+        "test_convert_int_sat_rtz_int",
+        "test_implicit_int_float",
+        "test_convert_int_float",
+        "test_convert_int_rte_float",
+        "test_convert_int_rtp_float",
+        "test_convert_int_rtn_float",
+        "test_convert_int_rtz_float",
+        "test_convert_int_sat_float",
+        "test_convert_int_sat_rte_float",
+        "test_convert_int_sat_rtp_float",
+        "test_convert_int_sat_rtn_float",
+        "test_convert_int_sat_rtz_float",
+        "test_implicit_int_ulong",
+        "test_convert_int_ulong",
+        "test_convert_int_rte_ulong",
+        "test_convert_int_rtp_ulong",
+        "test_convert_int_rtn_ulong",
+        "test_convert_int_rtz_ulong",
+        "test_convert_int_sat_ulong",
+        "test_convert_int_sat_rte_ulong",
+        "test_convert_int_sat_rtp_ulong",
+        "test_convert_int_sat_rtn_ulong",
+        "test_convert_int_sat_rtz_ulong",
+        "test_implicit_int_long",
+        "test_convert_int_long",
+        "test_convert_int_rte_long",
+        "test_convert_int_rtp_long",
+        "test_convert_int_rtn_long",
+        "test_convert_int_rtz_long",
+        "test_convert_int_sat_long",
+        "test_convert_int_sat_rte_long",
+        "test_convert_int_sat_rtp_long",
+        "test_convert_int_sat_rtn_long",
+        "test_convert_int_sat_rtz_long",
+        "test_implicit_float_uchar",
+        "test_convert_float_uchar",
+        "test_convert_float_rte_uchar",
+        "test_convert_float_rtp_uchar",
+        "test_convert_float_rtn_uchar",
+        "test_convert_float_rtz_uchar",
+        "test_implicit_float_char",
+        "test_convert_float_char",
+        "test_convert_float_rte_char",
+        "test_convert_float_rtp_char",
+        "test_convert_float_rtn_char",
+        "test_convert_float_rtz_char",
+        "test_implicit_float_ushort",
+        "test_convert_float_ushort",
+        "test_convert_float_rte_ushort",
+        "test_convert_float_rtp_ushort",
+        "test_convert_float_rtn_ushort",
+        "test_convert_float_rtz_ushort",
+        "test_implicit_float_short",
+        "test_convert_float_short",
+        "test_convert_float_rte_short",
+        "test_convert_float_rtp_short",
+        "test_convert_float_rtn_short",
+        "test_convert_float_rtz_short",
+        "test_implicit_float_uint",
+        "test_convert_float_uint",
+        "test_convert_float_rte_uint",
+        "test_convert_float_rtp_uint",
+        "test_convert_float_rtn_uint",
+        "test_convert_float_rtz_uint",
+        "test_implicit_float_int",
+        "test_convert_float_int",
+        "test_convert_float_rte_int",
+        "test_convert_float_rtp_int",
+        "test_convert_float_rtn_int",
+        "test_convert_float_rtz_int",
+        "test_implicit_float_float",
+        "test_convert_float_float",
+        "test_convert_float_rte_float",
+        "test_convert_float_rtp_float",
+        "test_convert_float_rtn_float",
+        "test_convert_float_rtz_float",
+        "test_implicit_float_ulong",
+        "test_convert_float_ulong",
+        "test_convert_float_rte_ulong",
+        "test_convert_float_rtp_ulong",
+        "test_convert_float_rtn_ulong",
+        "test_convert_float_rtz_ulong",
+        "test_implicit_float_long",
+        "test_convert_float_long",
+        "test_convert_float_rte_long",
+        "test_convert_float_rtp_long",
+        "test_convert_float_rtn_long",
+        "test_convert_float_rtz_long",
+        "test_implicit_ulong_uchar",
+        "test_convert_ulong_uchar",
+        "test_convert_ulong_rte_uchar",
+        "test_convert_ulong_rtp_uchar",
+        "test_convert_ulong_rtn_uchar",
+        "test_convert_ulong_rtz_uchar",
+        "test_convert_ulong_sat_uchar",
+        "test_convert_ulong_sat_rte_uchar",
+        "test_convert_ulong_sat_rtp_uchar",
+        "test_convert_ulong_sat_rtn_uchar",
+        "test_convert_ulong_sat_rtz_uchar",
+        "test_implicit_ulong_char",
+        "test_convert_ulong_char",
+        "test_convert_ulong_rte_char",
+        "test_convert_ulong_rtp_char",
+        "test_convert_ulong_rtn_char",
+        "test_convert_ulong_rtz_char",
+        "test_convert_ulong_sat_char",
+        "test_convert_ulong_sat_rte_char",
+        "test_convert_ulong_sat_rtp_char",
+        "test_convert_ulong_sat_rtn_char",
+        "test_convert_ulong_sat_rtz_char",
+        "test_implicit_ulong_ushort",
+        "test_convert_ulong_ushort",
+        "test_convert_ulong_rte_ushort",
+        "test_convert_ulong_rtp_ushort",
+        "test_convert_ulong_rtn_ushort",
+        "test_convert_ulong_rtz_ushort",
+        "test_convert_ulong_sat_ushort",
+        "test_convert_ulong_sat_rte_ushort",
+        "test_convert_ulong_sat_rtp_ushort",
+        "test_convert_ulong_sat_rtn_ushort",
+        "test_convert_ulong_sat_rtz_ushort",
+        "test_implicit_ulong_short",
+        "test_convert_ulong_short",
+        "test_convert_ulong_rte_short",
+        "test_convert_ulong_rtp_short",
+        "test_convert_ulong_rtn_short",
+        "test_convert_ulong_rtz_short",
+        "test_convert_ulong_sat_short",
+        "test_convert_ulong_sat_rte_short",
+        "test_convert_ulong_sat_rtp_short",
+        "test_convert_ulong_sat_rtn_short",
+        "test_convert_ulong_sat_rtz_short",
+        "test_implicit_ulong_uint",
+        "test_convert_ulong_uint",
+        "test_convert_ulong_rte_uint",
+        "test_convert_ulong_rtp_uint",
+        "test_convert_ulong_rtn_uint",
+        "test_convert_ulong_rtz_uint",
+        "test_convert_ulong_sat_uint",
+        "test_convert_ulong_sat_rte_uint",
+        "test_convert_ulong_sat_rtp_uint",
+        "test_convert_ulong_sat_rtn_uint",
+        "test_convert_ulong_sat_rtz_uint",
+        "test_implicit_ulong_int",
+        "test_convert_ulong_int",
+        "test_convert_ulong_rte_int",
+        "test_convert_ulong_rtp_int",
+        "test_convert_ulong_rtn_int",
+        "test_convert_ulong_rtz_int",
+        "test_convert_ulong_sat_int",
+        "test_convert_ulong_sat_rte_int",
+        "test_convert_ulong_sat_rtp_int",
+        "test_convert_ulong_sat_rtn_int",
+        "test_convert_ulong_sat_rtz_int",
+        "test_implicit_ulong_float",
+        "test_convert_ulong_float",
+        "test_convert_ulong_rte_float",
+        "test_convert_ulong_rtp_float",
+        "test_convert_ulong_rtn_float",
+        "test_convert_ulong_rtz_float",
+        "test_convert_ulong_sat_float",
+        "test_convert_ulong_sat_rte_float",
+        "test_convert_ulong_sat_rtp_float",
+        "test_convert_ulong_sat_rtn_float",
+        "test_convert_ulong_sat_rtz_float",
+        "test_implicit_ulong_ulong",
+        "test_convert_ulong_ulong",
+        "test_convert_ulong_rte_ulong",
+        "test_convert_ulong_rtp_ulong",
+        "test_convert_ulong_rtn_ulong",
+        "test_convert_ulong_rtz_ulong",
+        "test_convert_ulong_sat_ulong",
+        "test_convert_ulong_sat_rte_ulong",
+        "test_convert_ulong_sat_rtp_ulong",
+        "test_convert_ulong_sat_rtn_ulong",
+        "test_convert_ulong_sat_rtz_ulong",
+        "test_implicit_ulong_long",
+        "test_convert_ulong_long",
+        "test_convert_ulong_rte_long",
+        "test_convert_ulong_rtp_long",
+        "test_convert_ulong_rtn_long",
+        "test_convert_ulong_rtz_long",
+        "test_convert_ulong_sat_long",
+        "test_convert_ulong_sat_rte_long",
+        "test_convert_ulong_sat_rtp_long",
+        "test_convert_ulong_sat_rtn_long",
+        "test_convert_ulong_sat_rtz_long",
+        "test_implicit_long_uchar",
+        "test_convert_long_uchar",
+        "test_convert_long_rte_uchar",
+        "test_convert_long_rtp_uchar",
+        "test_convert_long_rtn_uchar",
+        "test_convert_long_rtz_uchar",
+        "test_convert_long_sat_uchar",
+        "test_convert_long_sat_rte_uchar",
+        "test_convert_long_sat_rtp_uchar",
+        "test_convert_long_sat_rtn_uchar",
+        "test_convert_long_sat_rtz_uchar",
+        "test_implicit_long_char",
+        "test_convert_long_char",
+        "test_convert_long_rte_char",
+        "test_convert_long_rtp_char",
+        "test_convert_long_rtn_char",
+        "test_convert_long_rtz_char",
+        "test_convert_long_sat_char",
+        "test_convert_long_sat_rte_char",
+        "test_convert_long_sat_rtp_char",
+        "test_convert_long_sat_rtn_char",
+        "test_convert_long_sat_rtz_char",
+        "test_implicit_long_ushort",
+        "test_convert_long_ushort",
+        "test_convert_long_rte_ushort",
+        "test_convert_long_rtp_ushort",
+        "test_convert_long_rtn_ushort",
+        "test_convert_long_rtz_ushort",
+        "test_convert_long_sat_ushort",
+        "test_convert_long_sat_rte_ushort",
+        "test_convert_long_sat_rtp_ushort",
+        "test_convert_long_sat_rtn_ushort",
+        "test_convert_long_sat_rtz_ushort",
+        "test_implicit_long_short",
+        "test_convert_long_short",
+        "test_convert_long_rte_short",
+        "test_convert_long_rtp_short",
+        "test_convert_long_rtn_short",
+        "test_convert_long_rtz_short",
+        "test_convert_long_sat_short",
+        "test_convert_long_sat_rte_short",
+        "test_convert_long_sat_rtp_short",
+        "test_convert_long_sat_rtn_short",
+        "test_convert_long_sat_rtz_short",
+        "test_implicit_long_uint",
+        "test_convert_long_uint",
+        "test_convert_long_rte_uint",
+        "test_convert_long_rtp_uint",
+        "test_convert_long_rtn_uint",
+        "test_convert_long_rtz_uint",
+        "test_convert_long_sat_uint",
+        "test_convert_long_sat_rte_uint",
+        "test_convert_long_sat_rtp_uint",
+        "test_convert_long_sat_rtn_uint",
+        "test_convert_long_sat_rtz_uint",
+        "test_implicit_long_int",
+        "test_convert_long_int",
+        "test_convert_long_rte_int",
+        "test_convert_long_rtp_int",
+        "test_convert_long_rtn_int",
+        "test_convert_long_rtz_int",
+        "test_convert_long_sat_int",
+        "test_convert_long_sat_rte_int",
+        "test_convert_long_sat_rtp_int",
+        "test_convert_long_sat_rtn_int",
+        "test_convert_long_sat_rtz_int",
+        "test_implicit_long_float",
+        "test_convert_long_float",
+        "test_convert_long_rte_float",
+        "test_convert_long_rtp_float",
+        "test_convert_long_rtn_float",
+        "test_convert_long_rtz_float",
+        "test_convert_long_sat_float",
+        "test_convert_long_sat_rte_float",
+        "test_convert_long_sat_rtp_float",
+        "test_convert_long_sat_rtn_float",
+        "test_convert_long_sat_rtz_float",
+        "test_implicit_long_ulong",
+        "test_convert_long_ulong",
+        "test_convert_long_rte_ulong",
+        "test_convert_long_rtp_ulong",
+        "test_convert_long_rtn_ulong",
+        "test_convert_long_rtz_ulong",
+        "test_convert_long_sat_ulong",
+        "test_convert_long_sat_rte_ulong",
+        "test_convert_long_sat_rtp_ulong",
+        "test_convert_long_sat_rtn_ulong",
+        "test_convert_long_sat_rtz_ulong",
+        "test_implicit_long_long",
+        "test_convert_long_long",
+        "test_convert_long_rte_long",
+        "test_convert_long_rtp_long",
+        "test_convert_long_rtn_long",
+        "test_convert_long_rtz_long",
+        "test_convert_long_sat_long",
+        "test_convert_long_sat_rte_long",
+        "test_convert_long_sat_rtp_long",
+        "test_convert_long_sat_rtn_long",
+        "test_convert_long_sat_rtz_long",
+        "long_convert2_type_roundingmode_type_f",
+        "long_convert3_type_roundingmode_type_f",
+        "long_convert4_type_roundingmode_type_f",
+        "long_convert8_type_roundingmode_type_f",
+        "long_convert16_type_roundingmode_type_f",
+    };
+
+    log_info("test_conversions\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_conversions_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "convert2_type_roundingmode_type_d",
+        "convert3_type_roundingmode_type_d",
+        "convert4_type_roundingmode_type_d",
+        "convert8_type_roundingmode_type_d",
+        "convert16_type_roundingmode_type_d",
+        "test_implicit_uchar_double",
+        "test_convert_uchar_double",
+        "test_convert_uchar_rte_double",
+        "test_convert_uchar_rtp_double",
+        "test_convert_uchar_rtn_double",
+        "test_convert_uchar_rtz_double",
+        "test_convert_uchar_sat_double",
+        "test_convert_uchar_sat_rte_double",
+        "test_convert_uchar_sat_rtp_double",
+        "test_convert_uchar_sat_rtn_double",
+        "test_convert_uchar_sat_rtz_double",
+        "test_implicit_char_double",
+        "test_convert_char_double",
+        "test_convert_char_rte_double",
+        "test_convert_char_rtp_double",
+        "test_convert_char_rtn_double",
+        "test_convert_char_rtz_double",
+        "test_convert_char_sat_double",
+        "test_convert_char_sat_rte_double",
+        "test_convert_char_sat_rtp_double",
+        "test_convert_char_sat_rtn_double",
+        "test_convert_char_sat_rtz_double",
+        "test_implicit_ushort_double",
+        "test_convert_ushort_double",
+        "test_convert_ushort_rte_double",
+        "test_convert_ushort_rtp_double",
+        "test_convert_ushort_rtn_double",
+        "test_convert_ushort_rtz_double",
+        "test_convert_ushort_sat_double",
+        "test_convert_ushort_sat_rte_double",
+        "test_convert_ushort_sat_rtp_double",
+        "test_convert_ushort_sat_rtn_double",
+        "test_convert_ushort_sat_rtz_double",
+        "test_implicit_short_double",
+        "test_convert_short_double",
+        "test_convert_short_rte_double",
+        "test_convert_short_rtp_double",
+        "test_convert_short_rtn_double",
+        "test_convert_short_rtz_double",
+        "test_convert_short_sat_double",
+        "test_convert_short_sat_rte_double",
+        "test_convert_short_sat_rtp_double",
+        "test_convert_short_sat_rtn_double",
+        "test_convert_short_sat_rtz_double",
+        "test_implicit_uint_double",
+        "test_convert_uint_double",
+        "test_convert_uint_rte_double",
+        "test_convert_uint_rtp_double",
+        "test_convert_uint_rtn_double",
+        "test_convert_uint_rtz_double",
+        "test_convert_uint_sat_double",
+        "test_convert_uint_sat_rte_double",
+        "test_convert_uint_sat_rtp_double",
+        "test_convert_uint_sat_rtn_double",
+        "test_convert_uint_sat_rtz_double",
+        "test_implicit_int_double",
+        "test_convert_int_double",
+        "test_convert_int_rte_double",
+        "test_convert_int_rtp_double",
+        "test_convert_int_rtn_double",
+        "test_convert_int_rtz_double",
+        "test_convert_int_sat_double",
+        "test_convert_int_sat_rte_double",
+        "test_convert_int_sat_rtp_double",
+        "test_convert_int_sat_rtn_double",
+        "test_convert_int_sat_rtz_double",
+        "test_implicit_float_double",
+        "test_convert_float_double",
+        "test_convert_float_rte_double",
+        "test_convert_float_rtp_double",
+        "test_convert_float_rtn_double",
+        "test_convert_float_rtz_double",
+        "test_implicit_double_uchar",
+        "test_convert_double_uchar",
+        "test_convert_double_rte_uchar",
+        "test_convert_double_rtp_uchar",
+        "test_convert_double_rtn_uchar",
+        "test_convert_double_rtz_uchar",
+        "test_implicit_double_char",
+        "test_convert_double_char",
+        "test_convert_double_rte_char",
+        "test_convert_double_rtp_char",
+        "test_convert_double_rtn_char",
+        "test_convert_double_rtz_char",
+        "test_implicit_double_ushort",
+        "test_convert_double_ushort",
+        "test_convert_double_rte_ushort",
+        "test_convert_double_rtp_ushort",
+        "test_convert_double_rtn_ushort",
+        "test_convert_double_rtz_ushort",
+        "test_implicit_double_short",
+        "test_convert_double_short",
+        "test_convert_double_rte_short",
+        "test_convert_double_rtp_short",
+        "test_convert_double_rtn_short",
+        "test_convert_double_rtz_short",
+        "test_implicit_double_uint",
+        "test_convert_double_uint",
+        "test_convert_double_rte_uint",
+        "test_convert_double_rtp_uint",
+        "test_convert_double_rtn_uint",
+        "test_convert_double_rtz_uint",
+        "test_implicit_double_int",
+        "test_convert_double_int",
+        "test_convert_double_rte_int",
+        "test_convert_double_rtp_int",
+        "test_convert_double_rtn_int",
+        "test_convert_double_rtz_int",
+        "test_implicit_double_float",
+        "test_convert_double_float",
+        "test_convert_double_rte_float",
+        "test_convert_double_rtp_float",
+        "test_convert_double_rtn_float",
+        "test_convert_double_rtz_float",
+        "test_implicit_double_double",
+        "test_convert_double_double",
+        "test_convert_double_rte_double",
+        "test_convert_double_rtp_double",
+        "test_convert_double_rtn_double",
+        "test_convert_double_rtz_double",
+        "test_implicit_double_ulong",
+        "test_convert_double_ulong",
+        "test_convert_double_rte_ulong",
+        "test_convert_double_rtp_ulong",
+        "test_convert_double_rtn_ulong",
+        "test_convert_double_rtz_ulong",
+        "test_implicit_double_long",
+        "test_convert_double_long",
+        "test_convert_double_rte_long",
+        "test_convert_double_rtp_long",
+        "test_convert_double_rtn_long",
+        "test_convert_double_rtz_long",
+        "test_implicit_ulong_double",
+        "test_convert_ulong_double",
+        "test_convert_ulong_rte_double",
+        "test_convert_ulong_rtp_double",
+        "test_convert_ulong_rtn_double",
+        "test_convert_ulong_rtz_double",
+        "test_convert_ulong_sat_double",
+        "test_convert_ulong_sat_rte_double",
+        "test_convert_ulong_sat_rtp_double",
+        "test_convert_ulong_sat_rtn_double",
+        "test_convert_ulong_sat_rtz_double",
+        "test_implicit_long_double",
+        "test_convert_long_double",
+        "test_convert_long_rte_double",
+        "test_convert_long_rtp_double",
+        "test_convert_long_rtn_double",
+        "test_convert_long_rtz_double",
+        "test_convert_long_sat_double",
+        "test_convert_long_sat_rte_double",
+        "test_convert_long_sat_rtp_double",
+        "test_convert_long_sat_rtn_double",
+        "test_convert_long_sat_rtz_double",
+    };
+
+    log_info("test_conversions_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_geometrics (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_test.geom_cross_float3",
+        "sample_test.geom_cross_float4",
+        "sample_test.geom_dot_float",
+        "sample_test.geom_dot_float2",
+        "sample_test.geom_dot_float3",
+        "sample_test.geom_dot_float4",
+        "sample_test.geom_distance_float",
+        "sample_test.geom_distance_float2",
+        "sample_test.geom_distance_float3",
+        "sample_test.geom_distance_float4",
+        "sample_test.geom_fast_distance_float",
+        "sample_test.geom_fast_distance_float2",
+        "sample_test.geom_fast_distance_float3",
+        "sample_test.geom_fast_distance_float4",
+        "sample_test.geom_length_float",
+        "sample_test.geom_length_float2",
+        "sample_test.geom_length_float3",
+        "sample_test.geom_length_float4",
+        "sample_test.geom_fast_length_float",
+        "sample_test.geom_fast_length_float2",
+        "sample_test.geom_fast_length_float3",
+        "sample_test.geom_fast_length_float4",
+        "sample_test.geom_normalize_float",
+        "sample_test.geom_normalize_float2",
+        "sample_test.geom_normalize_float3",
+        "sample_test.geom_normalize_float4",
+        "sample_test.geom_fast_normalize_float",
+        "sample_test.geom_fast_normalize_float2",
+        "sample_test.geom_fast_normalize_float3",
+        "sample_test.geom_fast_normalize_float4",
+    };
+
+    log_info("test_geometrics\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_geometrics_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_test.geom_cross_double3",
+        "sample_test.geom_cross_double4",
+        "sample_test.geom_dot_double",
+        "sample_test.geom_dot_double2",
+        "sample_test.geom_dot_double3",
+        "sample_test.geom_dot_double4",
+        "sample_test.geom_distance_double",
+        "sample_test.geom_distance_double2",
+        "sample_test.geom_distance_double3",
+        "sample_test.geom_distance_double4",
+        "sample_test.geom_length_double",
+        "sample_test.geom_length_double2",
+        "sample_test.geom_length_double3",
+        "sample_test.geom_length_double4",
+        "sample_test.geom_normalize_double",
+        "sample_test.geom_normalize_double2",
+        "sample_test.geom_normalize_double3",
+        "sample_test.geom_normalize_double4",
+    };
+
+    log_info("test_geometrics_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_half (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test.vload_half_global",
+        "test.vload_half_private",
+        "test.vload_half_local",
+        "test.vload_half_constant",
+        "test.vload_half2_global",
+        "test.vload_half2_private",
+        "test.vload_half2_local",
+        "test.vload_half2_constant",
+        "test.vload_half4_global",
+        "test.vload_half4_private",
+        "test.vload_half4_local",
+        "test.vload_half4_constant",
+        "test.vload_half8_global",
+        "test.vload_half8_private",
+        "test.vload_half8_local",
+        "test.vload_half8_constant",
+        "test.vload_half16_global",
+        "test.vload_half16_private",
+        "test.vload_half16_local",
+        "test.vload_half16_constant",
+        "test.vload_half3_global",
+        "test.vload_half3_private",
+        "test.vload_half3_local",
+        "test.vload_half3_constant",
+        "test.vloada_half_global",
+        "test.vloada_half_private",
+        "test.vloada_half_local",
+        "test.vloada_half_constant",
+        "test.vloada_half2_global",
+        "test.vloada_half2_private",
+        "test.vloada_half2_local",
+        "test.vloada_half2_constant",
+        "test.vloada_half4_global",
+        "test.vloada_half4_private",
+        "test.vloada_half4_local",
+        "test.vloada_half4_constant",
+        "test.vloada_half8_global",
+        "test.vloada_half8_private",
+        "test.vloada_half8_local",
+        "test.vloada_half8_constant",
+        "test.vloada_half16_global",
+        "test.vloada_half16_private",
+        "test.vloada_half16_local",
+        "test.vloada_half16_constant",
+        "test.vloada_half3_global",
+        "test.vloada_half3_private",
+        "test.vloada_half3_local",
+        "test.vloada_half3_constant",
+        "test.vstore_half_global_float",
+        "test.vstore_half_private_float",
+        "test.vstore_half_local_float",
+        "test.vstore_half_global_float2",
+        "test.vstore_half_private_float2",
+        "test.vstore_half_local_float2",
+        "test.vstore_half_global_float4",
+        "test.vstore_half_private_float4",
+        "test.vstore_half_local_float4",
+        "test.vstore_half_global_float8",
+        "test.vstore_half_private_float8",
+        "test.vstore_half_local_float8",
+        "test.vstore_half_global_float16",
+        "test.vstore_half_private_float16",
+        "test.vstore_half_local_float16",
+        "test.vstore_half_global_float3",
+        "test.vstore_half_private_float3",
+        "test.vstore_half_local_float3",
+        "test.vstorea_half_global_float2",
+        "test.vstorea_half_private_float2",
+        "test.vstorea_half_local_float2",
+        "test.vstorea_half_global_float4",
+        "test.vstorea_half_private_float4",
+        "test.vstorea_half_local_float4",
+        "test.vstorea_half_global_float8",
+        "test.vstorea_half_private_float8",
+        "test.vstorea_half_local_float8",
+        "test.vstorea_half_global_float16",
+        "test.vstorea_half_private_float16",
+        "test.vstorea_half_local_float16",
+        "test.vstorea_half_global_float3",
+        "test.vstorea_half_private_float3",
+        "test.vstorea_half_local_float3",
+        "test.vstore_half_rte_global_float",
+        "test.vstore_half_rte_private_float",
+        "test.vstore_half_rte_local_float",
+        "test.vstore_half_rte_global_float2",
+        "test.vstore_half_rte_private_float2",
+        "test.vstore_half_rte_local_float2",
+        "test.vstore_half_rte_global_float4",
+        "test.vstore_half_rte_private_float4",
+        "test.vstore_half_rte_local_float4",
+        "test.vstore_half_rte_global_float8",
+        "test.vstore_half_rte_private_float8",
+        "test.vstore_half_rte_local_float8",
+        "test.vstore_half_rte_global_float16",
+        "test.vstore_half_rte_private_float16",
+        "test.vstore_half_rte_local_float16",
+        "test.vstore_half_rte_global_float3",
+        "test.vstore_half_rte_private_float3",
+        "test.vstore_half_rte_local_float3",
+        "test.vstorea_half_rte_global_float2",
+        "test.vstorea_half_rte_private_float2",
+        "test.vstorea_half_rte_local_float2",
+        "test.vstorea_half_rte_global_float4",
+        "test.vstorea_half_rte_private_float4",
+        "test.vstorea_half_rte_local_float4",
+        "test.vstorea_half_rte_global_float8",
+        "test.vstorea_half_rte_private_float8",
+        "test.vstorea_half_rte_local_float8",
+        "test.vstorea_half_rte_global_float16",
+        "test.vstorea_half_rte_private_float16",
+        "test.vstorea_half_rte_local_float16",
+        "test.vstorea_half_rte_global_float3",
+        "test.vstorea_half_rte_private_float3",
+        "test.vstorea_half_rte_local_float3",
+        "test.vstore_half_rtz_global_float",
+        "test.vstore_half_rtz_private_float",
+        "test.vstore_half_rtz_local_float",
+        "test.vstore_half_rtz_global_float2",
+        "test.vstore_half_rtz_private_float2",
+        "test.vstore_half_rtz_local_float2",
+        "test.vstore_half_rtz_global_float4",
+        "test.vstore_half_rtz_private_float4",
+        "test.vstore_half_rtz_local_float4",
+        "test.vstore_half_rtz_global_float8",
+        "test.vstore_half_rtz_private_float8",
+        "test.vstore_half_rtz_local_float8",
+        "test.vstore_half_rtz_global_float16",
+        "test.vstore_half_rtz_private_float16",
+        "test.vstore_half_rtz_local_float16",
+        "test.vstore_half_rtz_global_float3",
+        "test.vstore_half_rtz_private_float3",
+        "test.vstore_half_rtz_local_float3",
+        "test.vstorea_half_rtz_global_float2",
+        "test.vstorea_half_rtz_private_float2",
+        "test.vstorea_half_rtz_local_float2",
+        "test.vstorea_half_rtz_global_float4",
+        "test.vstorea_half_rtz_private_float4",
+        "test.vstorea_half_rtz_local_float4",
+        "test.vstorea_half_rtz_global_float8",
+        "test.vstorea_half_rtz_private_float8",
+        "test.vstorea_half_rtz_local_float8",
+        "test.vstorea_half_rtz_global_float16",
+        "test.vstorea_half_rtz_private_float16",
+        "test.vstorea_half_rtz_local_float16",
+        "test.vstorea_half_rtz_global_float3",
+        "test.vstorea_half_rtz_private_float3",
+        "test.vstorea_half_rtz_local_float3",
+        "test.vstore_half_rtp_global_float",
+        "test.vstore_half_rtp_private_float",
+        "test.vstore_half_rtp_local_float",
+        "test.vstore_half_rtp_global_float2",
+        "test.vstore_half_rtp_private_float2",
+        "test.vstore_half_rtp_local_float2",
+        "test.vstore_half_rtp_global_float4",
+        "test.vstore_half_rtp_private_float4",
+        "test.vstore_half_rtp_local_float4",
+        "test.vstore_half_rtp_global_float8",
+        "test.vstore_half_rtp_private_float8",
+        "test.vstore_half_rtp_local_float8",
+        "test.vstore_half_rtp_global_float16",
+        "test.vstore_half_rtp_private_float16",
+        "test.vstore_half_rtp_local_float16",
+        "test.vstore_half_rtp_global_float3",
+        "test.vstore_half_rtp_private_float3",
+        "test.vstore_half_rtp_local_float3",
+        "test.vstorea_half_rtp_global_float2",
+        "test.vstorea_half_rtp_private_float2",
+        "test.vstorea_half_rtp_local_float2",
+        "test.vstorea_half_rtp_global_float4",
+        "test.vstorea_half_rtp_private_float4",
+        "test.vstorea_half_rtp_local_float4",
+        "test.vstorea_half_rtp_global_float8",
+        "test.vstorea_half_rtp_private_float8",
+        "test.vstorea_half_rtp_local_float8",
+        "test.vstorea_half_rtp_global_float16",
+        "test.vstorea_half_rtp_private_float16",
+        "test.vstorea_half_rtp_local_float16",
+        "test.vstorea_half_rtp_global_float3",
+        "test.vstorea_half_rtp_private_float3",
+        "test.vstorea_half_rtp_local_float3",
+        "test.vstore_half_rtn_global_float",
+        "test.vstore_half_rtn_private_float",
+        "test.vstore_half_rtn_local_float",
+        "test.vstore_half_rtn_global_float2",
+        "test.vstore_half_rtn_private_float2",
+        "test.vstore_half_rtn_local_float2",
+        "test.vstore_half_rtn_global_float4",
+        "test.vstore_half_rtn_private_float4",
+        "test.vstore_half_rtn_local_float4",
+        "test.vstore_half_rtn_global_float8",
+        "test.vstore_half_rtn_private_float8",
+        "test.vstore_half_rtn_local_float8",
+        "test.vstore_half_rtn_global_float16",
+        "test.vstore_half_rtn_private_float16",
+        "test.vstore_half_rtn_local_float16",
+        "test.vstore_half_rtn_global_float3",
+        "test.vstore_half_rtn_private_float3",
+        "test.vstore_half_rtn_local_float3",
+        "test.vstorea_half_rtn_global_float2",
+        "test.vstorea_half_rtn_private_float2",
+        "test.vstorea_half_rtn_local_float2",
+        "test.vstorea_half_rtn_global_float4",
+        "test.vstorea_half_rtn_private_float4",
+        "test.vstorea_half_rtn_local_float4",
+        "test.vstorea_half_rtn_global_float8",
+        "test.vstorea_half_rtn_private_float8",
+        "test.vstorea_half_rtn_local_float8",
+        "test.vstorea_half_rtn_global_float16",
+        "test.vstorea_half_rtn_private_float16",
+        "test.vstorea_half_rtn_local_float16",
+        "test.vstorea_half_rtn_global_float3",
+        "test.vstorea_half_rtn_private_float3",
+        "test.vstorea_half_rtn_local_float3",
+    };
+
+    log_info("test_half\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_half_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test.vstore_half_global_double",
+        "test.vstore_half_private_double",
+        "test.vstore_half_local_double",
+        "test.vstore_half_global_double2",
+        "test.vstore_half_private_double2",
+        "test.vstore_half_local_double2",
+        "test.vstore_half_global_double4",
+        "test.vstore_half_private_double4",
+        "test.vstore_half_local_double4",
+        "test.vstore_half_global_double8",
+        "test.vstore_half_private_double8",
+        "test.vstore_half_local_double8",
+        "test.vstore_half_global_double16",
+        "test.vstore_half_private_double16",
+        "test.vstore_half_local_double16",
+        "test.vstore_half_global_double3",
+        "test.vstore_half_private_double3",
+        "test.vstore_half_local_double3",
+        "test.vstorea_half_global_double2",
+        "test.vstorea_half_private_double2",
+        "test.vstorea_half_local_double2",
+        "test.vstorea_half_global_double4",
+        "test.vstorea_half_private_double4",
+        "test.vstorea_half_local_double4",
+        "test.vstorea_half_global_double8",
+        "test.vstorea_half_private_double8",
+        "test.vstorea_half_local_double8",
+        "test.vstorea_half_global_double16",
+        "test.vstorea_half_private_double16",
+        "test.vstorea_half_local_double16",
+        "test.vstorea_half_global_double3",
+        "test.vstorea_half_private_double3",
+        "test.vstorea_half_local_double3",
+        "test.vstore_half_rte_global_double",
+        "test.vstore_half_rte_private_double",
+        "test.vstore_half_rte_local_double",
+        "test.vstore_half_rte_global_double2",
+        "test.vstore_half_rte_private_double2",
+        "test.vstore_half_rte_local_double2",
+        "test.vstore_half_rte_global_double4",
+        "test.vstore_half_rte_private_double4",
+        "test.vstore_half_rte_local_double4",
+        "test.vstore_half_rte_global_double8",
+        "test.vstore_half_rte_private_double8",
+        "test.vstore_half_rte_local_double8",
+        "test.vstore_half_rte_global_double16",
+        "test.vstore_half_rte_private_double16",
+        "test.vstore_half_rte_local_double16",
+        "test.vstore_half_rte_global_double3",
+        "test.vstore_half_rte_private_double3",
+        "test.vstore_half_rte_local_double3",
+        "test.vstorea_half_rte_global_double2",
+        "test.vstorea_half_rte_private_double2",
+        "test.vstorea_half_rte_local_double2",
+        "test.vstorea_half_rte_global_double4",
+        "test.vstorea_half_rte_private_double4",
+        "test.vstorea_half_rte_local_double4",
+        "test.vstorea_half_rte_global_double8",
+        "test.vstorea_half_rte_private_double8",
+        "test.vstorea_half_rte_local_double8",
+        "test.vstorea_half_rte_global_double16",
+        "test.vstorea_half_rte_private_double16",
+        "test.vstorea_half_rte_local_double16",
+        "test.vstorea_half_rte_global_double3",
+        "test.vstorea_half_rte_private_double3",
+        "test.vstorea_half_rte_local_double3",
+        "test.vstore_half_rtz_global_double",
+        "test.vstore_half_rtz_private_double",
+        "test.vstore_half_rtz_local_double",
+        "test.vstore_half_rtz_global_double2",
+        "test.vstore_half_rtz_private_double2",
+        "test.vstore_half_rtz_local_double2",
+        "test.vstore_half_rtz_global_double4",
+        "test.vstore_half_rtz_private_double4",
+        "test.vstore_half_rtz_local_double4",
+        "test.vstore_half_rtz_global_double8",
+        "test.vstore_half_rtz_private_double8",
+        "test.vstore_half_rtz_local_double8",
+        "test.vstore_half_rtz_global_double16",
+        "test.vstore_half_rtz_private_double16",
+        "test.vstore_half_rtz_local_double16",
+        "test.vstore_half_rtz_global_double3",
+        "test.vstore_half_rtz_private_double3",
+        "test.vstore_half_rtz_local_double3",
+        "test.vstorea_half_rtz_global_double2",
+        "test.vstorea_half_rtz_private_double2",
+        "test.vstorea_half_rtz_local_double2",
+        "test.vstorea_half_rtz_global_double4",
+        "test.vstorea_half_rtz_private_double4",
+        "test.vstorea_half_rtz_local_double4",
+        "test.vstorea_half_rtz_global_double8",
+        "test.vstorea_half_rtz_private_double8",
+        "test.vstorea_half_rtz_local_double8",
+        "test.vstorea_half_rtz_global_double16",
+        "test.vstorea_half_rtz_private_double16",
+        "test.vstorea_half_rtz_local_double16",
+        "test.vstorea_half_rtz_global_double3",
+        "test.vstorea_half_rtz_private_double3",
+        "test.vstorea_half_rtz_local_double3",
+        "test.vstore_half_rtp_global_double",
+        "test.vstore_half_rtp_private_double",
+        "test.vstore_half_rtp_local_double",
+        "test.vstore_half_rtp_global_double2",
+        "test.vstore_half_rtp_private_double2",
+        "test.vstore_half_rtp_local_double2",
+        "test.vstore_half_rtp_global_double4",
+        "test.vstore_half_rtp_private_double4",
+        "test.vstore_half_rtp_local_double4",
+        "test.vstore_half_rtp_global_double8",
+        "test.vstore_half_rtp_private_double8",
+        "test.vstore_half_rtp_local_double8",
+        "test.vstore_half_rtp_global_double16",
+        "test.vstore_half_rtp_private_double16",
+        "test.vstore_half_rtp_local_double16",
+        "test.vstore_half_rtp_global_double3",
+        "test.vstore_half_rtp_private_double3",
+        "test.vstore_half_rtp_local_double3",
+        "test.vstorea_half_rtp_global_double2",
+        "test.vstorea_half_rtp_private_double2",
+        "test.vstorea_half_rtp_local_double2",
+        "test.vstorea_half_rtp_global_double4",
+        "test.vstorea_half_rtp_private_double4",
+        "test.vstorea_half_rtp_local_double4",
+        "test.vstorea_half_rtp_global_double8",
+        "test.vstorea_half_rtp_private_double8",
+        "test.vstorea_half_rtp_local_double8",
+        "test.vstorea_half_rtp_global_double16",
+        "test.vstorea_half_rtp_private_double16",
+        "test.vstorea_half_rtp_local_double16",
+        "test.vstorea_half_rtp_global_double3",
+        "test.vstorea_half_rtp_private_double3",
+        "test.vstorea_half_rtp_local_double3",
+        "test.vstore_half_rtn_global_double",
+        "test.vstore_half_rtn_private_double",
+        "test.vstore_half_rtn_local_double",
+        "test.vstore_half_rtn_global_double2",
+        "test.vstore_half_rtn_private_double2",
+        "test.vstore_half_rtn_local_double2",
+        "test.vstore_half_rtn_global_double4",
+        "test.vstore_half_rtn_private_double4",
+        "test.vstore_half_rtn_local_double4",
+        "test.vstore_half_rtn_global_double8",
+        "test.vstore_half_rtn_private_double8",
+        "test.vstore_half_rtn_local_double8",
+        "test.vstore_half_rtn_global_double16",
+        "test.vstore_half_rtn_private_double16",
+        "test.vstore_half_rtn_local_double16",
+        "test.vstore_half_rtn_global_double3",
+        "test.vstore_half_rtn_private_double3",
+        "test.vstore_half_rtn_local_double3",
+        "test.vstorea_half_rtn_global_double2",
+        "test.vstorea_half_rtn_private_double2",
+        "test.vstorea_half_rtn_local_double2",
+        "test.vstorea_half_rtn_global_double4",
+        "test.vstorea_half_rtn_private_double4",
+        "test.vstorea_half_rtn_local_double4",
+        "test.vstorea_half_rtn_global_double8",
+        "test.vstorea_half_rtn_private_double8",
+        "test.vstorea_half_rtn_local_double8",
+        "test.vstorea_half_rtn_global_double16",
+        "test.vstorea_half_rtn_private_double16",
+        "test.vstorea_half_rtn_local_double16",
+        "test.vstorea_half_rtn_global_double3",
+        "test.vstorea_half_rtn_private_double3",
+        "test.vstorea_half_rtn_local_double3",
+    };
+
+    log_info("test_half_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_kernel_image_methods (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_kernel.get_image_info_1D",
+        "sample_kernel.get_image_info_2D",
+        "sample_kernel.get_image_info_3D",
+        "sample_kernel.get_image_info_1D_array",
+        "sample_kernel.get_image_info_2D_array",
+    };
+
+    log_info("test_kernel_image_methods\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_images_kernel_read_write (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_kernel.read_image_set_1D_fint",
+        "sample_kernel.read_image_set_1D_ffloat",
+        "sample_kernel.read_image_set_1D_iint",
+        "sample_kernel.read_image_set_1D_ifloat",
+        "sample_kernel.read_image_set_1D_uiint",
+        "sample_kernel.read_image_set_1D_uifloat",
+        "sample_kernel.write_image_1D_set_float",
+        "sample_kernel.write_image_1D_set_int",
+        "sample_kernel.write_image_1D_set_uint",
+        "sample_kernel.read_image_set_2D_fint",
+        "sample_kernel.read_image_set_2D_ffloat",
+        "sample_kernel.read_image_set_2D_iint",
+        "sample_kernel.read_image_set_2D_ifloat",
+        "sample_kernel.read_image_set_2D_uiint",
+        "sample_kernel.read_image_set_2D_uifloat",
+        "sample_kernel.write_image_2D_set_float",
+        "sample_kernel.write_image_2D_set_int",
+        "sample_kernel.write_image_2D_set_uint",
+        "sample_kernel.read_image_set_3D_fint",
+        "sample_kernel.read_image_set_3D_ffloat",
+        "sample_kernel.read_image_set_3D_iint",
+        "sample_kernel.read_image_set_3D_ifloat",
+        "sample_kernel.read_image_set_3D_uiint",
+        "sample_kernel.read_image_set_3D_uifloat",
+        "sample_kernel.read_image_set_1D_array_fint",
+        "sample_kernel.read_image_set_1D_array_ffloat",
+        "sample_kernel.read_image_set_1D_array_iint",
+        "sample_kernel.read_image_set_1D_array_ifloat",
+        "sample_kernel.read_image_set_1D_array_uiint",
+        "sample_kernel.read_image_set_1D_array_uifloat",
+        "sample_kernel.write_image_1D_array_set_float",
+        "sample_kernel.write_image_1D_array_set_int",
+        "sample_kernel.write_image_1D_array_set_uint",
+        "sample_kernel.read_image_set_2D_array_fint",
+        "sample_kernel.read_image_set_2D_array_ffloat",
+        "sample_kernel.read_image_set_2D_array_iint",
+        "sample_kernel.read_image_set_2D_array_ifloat",
+        "sample_kernel.read_image_set_2D_array_uiint",
+        "sample_kernel.read_image_set_2D_array_uifloat",
+        "sample_kernel.write_image_2D_array_set_float",
+        "sample_kernel.write_image_2D_array_set_int",
+        "sample_kernel.write_image_2D_array_set_uint",
+    };
+
+    log_info("test_images_kernel_read_write\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_images_samplerless_read (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_kernel.read_image_set_1D_float",
+        "sample_kernel.read_image_set_1D_int",
+        "sample_kernel.read_image_set_1D_uint",
+        "sample_kernel.read_image_set_1D_buffer_float",
+        "sample_kernel.read_image_set_1D_buffer_int",
+        "sample_kernel.read_image_set_1D_buffer_uint",
+        "sample_kernel.read_image_set_2D_float",
+        "sample_kernel.read_image_set_2D_int",
+        "sample_kernel.read_image_set_2D_uint",
+        "sample_kernel.read_image_set_3D_float",
+        "sample_kernel.read_image_set_3D_int",
+        "sample_kernel.read_image_set_3D_uint",
+        "sample_kernel.read_image_set_1D_array_float",
+        "sample_kernel.read_image_set_1D_array_int",
+        "sample_kernel.read_image_set_1D_array_uint",
+        "sample_kernel.read_image_set_2D_array_float",
+        "sample_kernel.read_image_set_2D_array_int",
+        "sample_kernel.read_image_set_2D_array_uint",
+    };
+
+    log_info("test_images_samplerless_read\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_integer_ops (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_test.integer_clz_char",
+        "sample_test.integer_clz_char2",
+        "sample_test.integer_clz_char3",
+        "sample_test.integer_clz_char4",
+        "sample_test.integer_clz_char8",
+        "sample_test.integer_clz_char16",
+        "sample_test.integer_clz_uchar",
+        "sample_test.integer_clz_uchar2",
+        "sample_test.integer_clz_uchar3",
+        "sample_test.integer_clz_uchar4",
+        "sample_test.integer_clz_uchar8",
+        "sample_test.integer_clz_uchar16",
+        "sample_test.integer_clz_short",
+        "sample_test.integer_clz_short2",
+        "sample_test.integer_clz_short3",
+        "sample_test.integer_clz_short4",
+        "sample_test.integer_clz_short8",
+        "sample_test.integer_clz_short16",
+        "sample_test.integer_clz_ushort",
+        "sample_test.integer_clz_ushort2",
+        "sample_test.integer_clz_ushort3",
+        "sample_test.integer_clz_ushort4",
+        "sample_test.integer_clz_ushort8",
+        "sample_test.integer_clz_ushort16",
+        "sample_test.integer_clz_int",
+        "sample_test.integer_clz_int2",
+        "sample_test.integer_clz_int3",
+        "sample_test.integer_clz_int4",
+        "sample_test.integer_clz_int8",
+        "sample_test.integer_clz_int16",
+        "sample_test.integer_clz_uint",
+        "sample_test.integer_clz_uint2",
+        "sample_test.integer_clz_uint3",
+        "sample_test.integer_clz_uint4",
+        "sample_test.integer_clz_uint8",
+        "sample_test.integer_clz_uint16",
+        "sample_test.integer_clz_long",
+        "sample_test.integer_clz_long2",
+        "sample_test.integer_clz_long3",
+        "sample_test.integer_clz_long4",
+        "sample_test.integer_clz_long8",
+        "sample_test.integer_clz_long16",
+        "sample_test.integer_clz_ulong",
+        "sample_test.integer_clz_ulong2",
+        "sample_test.integer_clz_ulong3",
+        "sample_test.integer_clz_ulong4",
+        "sample_test.integer_clz_ulong8",
+        "sample_test.integer_clz_ulong16",
+        "sample_test.integer_hadd_char",
+        "sample_test.integer_hadd_char2",
+        "sample_test.integer_hadd_char3",
+        "sample_test.integer_hadd_char4",
+        "sample_test.integer_hadd_char8",
+        "sample_test.integer_hadd_char16",
+        "sample_test.integer_hadd_uchar",
+        "sample_test.integer_hadd_uchar2",
+        "sample_test.integer_hadd_uchar3",
+        "sample_test.integer_hadd_uchar4",
+        "sample_test.integer_hadd_uchar8",
+        "sample_test.integer_hadd_uchar16",
+        "sample_test.integer_hadd_short",
+        "sample_test.integer_hadd_short2",
+        "sample_test.integer_hadd_short3",
+        "sample_test.integer_hadd_short4",
+        "sample_test.integer_hadd_short8",
+        "sample_test.integer_hadd_short16",
+        "sample_test.integer_hadd_ushort",
+        "sample_test.integer_hadd_ushort2",
+        "sample_test.integer_hadd_ushort3",
+        "sample_test.integer_hadd_ushort4",
+        "sample_test.integer_hadd_ushort8",
+        "sample_test.integer_hadd_ushort16",
+        "sample_test.integer_hadd_int",
+        "sample_test.integer_hadd_int2",
+        "sample_test.integer_hadd_int3",
+        "sample_test.integer_hadd_int4",
+        "sample_test.integer_hadd_int8",
+        "sample_test.integer_hadd_int16",
+        "sample_test.integer_hadd_uint",
+        "sample_test.integer_hadd_uint2",
+        "sample_test.integer_hadd_uint3",
+        "sample_test.integer_hadd_uint4",
+        "sample_test.integer_hadd_uint8",
+        "sample_test.integer_hadd_uint16",
+        "sample_test.integer_hadd_long",
+        "sample_test.integer_hadd_long2",
+        "sample_test.integer_hadd_long3",
+        "sample_test.integer_hadd_long4",
+        "sample_test.integer_hadd_long8",
+        "sample_test.integer_hadd_long16",
+        "sample_test.integer_hadd_ulong",
+        "sample_test.integer_hadd_ulong2",
+        "sample_test.integer_hadd_ulong3",
+        "sample_test.integer_hadd_ulong4",
+        "sample_test.integer_hadd_ulong8",
+        "sample_test.integer_hadd_ulong16",
+        "sample_test.integer_rhadd_char",
+        "sample_test.integer_rhadd_char2",
+        "sample_test.integer_rhadd_char3",
+        "sample_test.integer_rhadd_char4",
+        "sample_test.integer_rhadd_char8",
+        "sample_test.integer_rhadd_char16",
+        "sample_test.integer_rhadd_uchar",
+        "sample_test.integer_rhadd_uchar2",
+        "sample_test.integer_rhadd_uchar3",
+        "sample_test.integer_rhadd_uchar4",
+        "sample_test.integer_rhadd_uchar8",
+        "sample_test.integer_rhadd_uchar16",
+        "sample_test.integer_rhadd_short",
+        "sample_test.integer_rhadd_short2",
+        "sample_test.integer_rhadd_short3",
+        "sample_test.integer_rhadd_short4",
+        "sample_test.integer_rhadd_short8",
+        "sample_test.integer_rhadd_short16",
+        "sample_test.integer_rhadd_ushort",
+        "sample_test.integer_rhadd_ushort2",
+        "sample_test.integer_rhadd_ushort3",
+        "sample_test.integer_rhadd_ushort4",
+        "sample_test.integer_rhadd_ushort8",
+        "sample_test.integer_rhadd_ushort16",
+        "sample_test.integer_rhadd_int",
+        "sample_test.integer_rhadd_int2",
+        "sample_test.integer_rhadd_int3",
+        "sample_test.integer_rhadd_int4",
+        "sample_test.integer_rhadd_int8",
+        "sample_test.integer_rhadd_int16",
+        "sample_test.integer_rhadd_uint",
+        "sample_test.integer_rhadd_uint2",
+        "sample_test.integer_rhadd_uint3",
+        "sample_test.integer_rhadd_uint4",
+        "sample_test.integer_rhadd_uint8",
+        "sample_test.integer_rhadd_uint16",
+        "sample_test.integer_rhadd_long",
+        "sample_test.integer_rhadd_long2",
+        "sample_test.integer_rhadd_long3",
+        "sample_test.integer_rhadd_long4",
+        "sample_test.integer_rhadd_long8",
+        "sample_test.integer_rhadd_long16",
+        "sample_test.integer_rhadd_ulong",
+        "sample_test.integer_rhadd_ulong2",
+        "sample_test.integer_rhadd_ulong3",
+        "sample_test.integer_rhadd_ulong4",
+        "sample_test.integer_rhadd_ulong8",
+        "sample_test.integer_rhadd_ulong16",
+        "sample_test.integer_mul_hi_char",
+        "sample_test.integer_mul_hi_char2",
+        "sample_test.integer_mul_hi_char3",
+        "sample_test.integer_mul_hi_char4",
+        "sample_test.integer_mul_hi_char8",
+        "sample_test.integer_mul_hi_char16",
+        "sample_test.integer_mul_hi_uchar",
+        "sample_test.integer_mul_hi_uchar2",
+        "sample_test.integer_mul_hi_uchar3",
+        "sample_test.integer_mul_hi_uchar4",
+        "sample_test.integer_mul_hi_uchar8",
+        "sample_test.integer_mul_hi_uchar16",
+        "sample_test.integer_mul_hi_short",
+        "sample_test.integer_mul_hi_short2",
+        "sample_test.integer_mul_hi_short3",
+        "sample_test.integer_mul_hi_short4",
+        "sample_test.integer_mul_hi_short8",
+        "sample_test.integer_mul_hi_short16",
+        "sample_test.integer_mul_hi_ushort",
+        "sample_test.integer_mul_hi_ushort2",
+        "sample_test.integer_mul_hi_ushort3",
+        "sample_test.integer_mul_hi_ushort4",
+        "sample_test.integer_mul_hi_ushort8",
+        "sample_test.integer_mul_hi_ushort16",
+        "sample_test.integer_mul_hi_int",
+        "sample_test.integer_mul_hi_int2",
+        "sample_test.integer_mul_hi_int3",
+        "sample_test.integer_mul_hi_int4",
+        "sample_test.integer_mul_hi_int8",
+        "sample_test.integer_mul_hi_int16",
+        "sample_test.integer_mul_hi_uint",
+        "sample_test.integer_mul_hi_uint2",
+        "sample_test.integer_mul_hi_uint3",
+        "sample_test.integer_mul_hi_uint4",
+        "sample_test.integer_mul_hi_uint8",
+        "sample_test.integer_mul_hi_uint16",
+        "sample_test.integer_mul_hi_long",
+        "sample_test.integer_mul_hi_long2",
+        "sample_test.integer_mul_hi_long3",
+        "sample_test.integer_mul_hi_long4",
+        "sample_test.integer_mul_hi_long8",
+        "sample_test.integer_mul_hi_long16",
+        "sample_test.integer_mul_hi_ulong",
+        "sample_test.integer_mul_hi_ulong2",
+        "sample_test.integer_mul_hi_ulong3",
+        "sample_test.integer_mul_hi_ulong4",
+        "sample_test.integer_mul_hi_ulong8",
+        "sample_test.integer_mul_hi_ulong16",
+        "sample_test.integer_rotate_char",
+        "sample_test.integer_rotate_char2",
+        "sample_test.integer_rotate_char3",
+        "sample_test.integer_rotate_char4",
+        "sample_test.integer_rotate_char8",
+        "sample_test.integer_rotate_char16",
+        "sample_test.integer_rotate_uchar",
+        "sample_test.integer_rotate_uchar2",
+        "sample_test.integer_rotate_uchar3",
+        "sample_test.integer_rotate_uchar4",
+        "sample_test.integer_rotate_uchar8",
+        "sample_test.integer_rotate_uchar16",
+        "sample_test.integer_rotate_short",
+        "sample_test.integer_rotate_short2",
+        "sample_test.integer_rotate_short3",
+        "sample_test.integer_rotate_short4",
+        "sample_test.integer_rotate_short8",
+        "sample_test.integer_rotate_short16",
+        "sample_test.integer_rotate_ushort",
+        "sample_test.integer_rotate_ushort2",
+        "sample_test.integer_rotate_ushort3",
+        "sample_test.integer_rotate_ushort4",
+        "sample_test.integer_rotate_ushort8",
+        "sample_test.integer_rotate_ushort16",
+        "sample_test.integer_rotate_int",
+        "sample_test.integer_rotate_int2",
+        "sample_test.integer_rotate_int3",
+        "sample_test.integer_rotate_int4",
+        "sample_test.integer_rotate_int8",
+        "sample_test.integer_rotate_int16",
+        "sample_test.integer_rotate_uint",
+        "sample_test.integer_rotate_uint2",
+        "sample_test.integer_rotate_uint3",
+        "sample_test.integer_rotate_uint4",
+        "sample_test.integer_rotate_uint8",
+        "sample_test.integer_rotate_uint16",
+        "sample_test.integer_rotate_long",
+        "sample_test.integer_rotate_long2",
+        "sample_test.integer_rotate_long3",
+        "sample_test.integer_rotate_long4",
+        "sample_test.integer_rotate_long8",
+        "sample_test.integer_rotate_long16",
+        "sample_test.integer_rotate_ulong",
+        "sample_test.integer_rotate_ulong2",
+        "sample_test.integer_rotate_ulong3",
+        "sample_test.integer_rotate_ulong4",
+        "sample_test.integer_rotate_ulong8",
+        "sample_test.integer_rotate_ulong16",
+        "sample_test.integer_clamp_char",
+        "sample_test.integer_clamp_char2",
+        "sample_test.integer_clamp_char3",
+        "sample_test.integer_clamp_char4",
+        "sample_test.integer_clamp_char8",
+        "sample_test.integer_clamp_char16",
+        "sample_test.integer_clamp_uchar",
+        "sample_test.integer_clamp_uchar2",
+        "sample_test.integer_clamp_uchar3",
+        "sample_test.integer_clamp_uchar4",
+        "sample_test.integer_clamp_uchar8",
+        "sample_test.integer_clamp_uchar16",
+        "sample_test.integer_clamp_short",
+        "sample_test.integer_clamp_short2",
+        "sample_test.integer_clamp_short3",
+        "sample_test.integer_clamp_short4",
+        "sample_test.integer_clamp_short8",
+        "sample_test.integer_clamp_short16",
+        "sample_test.integer_clamp_ushort",
+        "sample_test.integer_clamp_ushort2",
+        "sample_test.integer_clamp_ushort3",
+        "sample_test.integer_clamp_ushort4",
+        "sample_test.integer_clamp_ushort8",
+        "sample_test.integer_clamp_ushort16",
+        "sample_test.integer_clamp_int",
+        "sample_test.integer_clamp_int2",
+        "sample_test.integer_clamp_int3",
+        "sample_test.integer_clamp_int4",
+        "sample_test.integer_clamp_int8",
+        "sample_test.integer_clamp_int16",
+        "sample_test.integer_clamp_uint",
+        "sample_test.integer_clamp_uint2",
+        "sample_test.integer_clamp_uint3",
+        "sample_test.integer_clamp_uint4",
+        "sample_test.integer_clamp_uint8",
+        "sample_test.integer_clamp_uint16",
+        "sample_test.integer_clamp_long",
+        "sample_test.integer_clamp_long2",
+        "sample_test.integer_clamp_long3",
+        "sample_test.integer_clamp_long4",
+        "sample_test.integer_clamp_long8",
+        "sample_test.integer_clamp_long16",
+        "sample_test.integer_clamp_ulong",
+        "sample_test.integer_clamp_ulong2",
+        "sample_test.integer_clamp_ulong3",
+        "sample_test.integer_clamp_ulong4",
+        "sample_test.integer_clamp_ulong8",
+        "sample_test.integer_clamp_ulong16",
+        "sample_test.integer_mad_sat_char",
+        "sample_test.integer_mad_sat_char2",
+        "sample_test.integer_mad_sat_char3",
+        "sample_test.integer_mad_sat_char4",
+        "sample_test.integer_mad_sat_char8",
+        "sample_test.integer_mad_sat_char16",
+        "sample_test.integer_mad_sat_uchar",
+        "sample_test.integer_mad_sat_uchar2",
+        "sample_test.integer_mad_sat_uchar3",
+        "sample_test.integer_mad_sat_uchar4",
+        "sample_test.integer_mad_sat_uchar8",
+        "sample_test.integer_mad_sat_uchar16",
+        "sample_test.integer_mad_sat_short",
+        "sample_test.integer_mad_sat_short2",
+        "sample_test.integer_mad_sat_short3",
+        "sample_test.integer_mad_sat_short4",
+        "sample_test.integer_mad_sat_short8",
+        "sample_test.integer_mad_sat_short16",
+        "sample_test.integer_mad_sat_ushort",
+        "sample_test.integer_mad_sat_ushort2",
+        "sample_test.integer_mad_sat_ushort3",
+        "sample_test.integer_mad_sat_ushort4",
+        "sample_test.integer_mad_sat_ushort8",
+        "sample_test.integer_mad_sat_ushort16",
+        "sample_test.integer_mad_sat_int",
+        "sample_test.integer_mad_sat_int2",
+        "sample_test.integer_mad_sat_int3",
+        "sample_test.integer_mad_sat_int4",
+        "sample_test.integer_mad_sat_int8",
+        "sample_test.integer_mad_sat_int16",
+        "sample_test.integer_mad_sat_uint",
+        "sample_test.integer_mad_sat_uint2",
+        "sample_test.integer_mad_sat_uint3",
+        "sample_test.integer_mad_sat_uint4",
+        "sample_test.integer_mad_sat_uint8",
+        "sample_test.integer_mad_sat_uint16",
+        "sample_test.integer_mad_sat_long",
+        "sample_test.integer_mad_sat_long2",
+        "sample_test.integer_mad_sat_long3",
+        "sample_test.integer_mad_sat_long4",
+        "sample_test.integer_mad_sat_long8",
+        "sample_test.integer_mad_sat_long16",
+        "sample_test.integer_mad_sat_ulong",
+        "sample_test.integer_mad_sat_ulong2",
+        "sample_test.integer_mad_sat_ulong3",
+        "sample_test.integer_mad_sat_ulong4",
+        "sample_test.integer_mad_sat_ulong8",
+        "sample_test.integer_mad_sat_ulong16",
+        "sample_test.integer_mad_hi_char",
+        "sample_test.integer_mad_hi_char2",
+        "sample_test.integer_mad_hi_char3",
+        "sample_test.integer_mad_hi_char4",
+        "sample_test.integer_mad_hi_char8",
+        "sample_test.integer_mad_hi_char16",
+        "sample_test.integer_mad_hi_uchar",
+        "sample_test.integer_mad_hi_uchar2",
+        "sample_test.integer_mad_hi_uchar3",
+        "sample_test.integer_mad_hi_uchar4",
+        "sample_test.integer_mad_hi_uchar8",
+        "sample_test.integer_mad_hi_uchar16",
+        "sample_test.integer_mad_hi_short",
+        "sample_test.integer_mad_hi_short2",
+        "sample_test.integer_mad_hi_short3",
+        "sample_test.integer_mad_hi_short4",
+        "sample_test.integer_mad_hi_short8",
+        "sample_test.integer_mad_hi_short16",
+        "sample_test.integer_mad_hi_ushort",
+        "sample_test.integer_mad_hi_ushort2",
+        "sample_test.integer_mad_hi_ushort3",
+        "sample_test.integer_mad_hi_ushort4",
+        "sample_test.integer_mad_hi_ushort8",
+        "sample_test.integer_mad_hi_ushort16",
+        "sample_test.integer_mad_hi_int",
+        "sample_test.integer_mad_hi_int2",
+        "sample_test.integer_mad_hi_int3",
+        "sample_test.integer_mad_hi_int4",
+        "sample_test.integer_mad_hi_int8",
+        "sample_test.integer_mad_hi_int16",
+        "sample_test.integer_mad_hi_uint",
+        "sample_test.integer_mad_hi_uint2",
+        "sample_test.integer_mad_hi_uint3",
+        "sample_test.integer_mad_hi_uint4",
+        "sample_test.integer_mad_hi_uint8",
+        "sample_test.integer_mad_hi_uint16",
+        "sample_test.integer_mad_hi_long",
+        "sample_test.integer_mad_hi_long2",
+        "sample_test.integer_mad_hi_long3",
+        "sample_test.integer_mad_hi_long4",
+        "sample_test.integer_mad_hi_long8",
+        "sample_test.integer_mad_hi_long16",
+        "sample_test.integer_mad_hi_ulong",
+        "sample_test.integer_mad_hi_ulong2",
+        "sample_test.integer_mad_hi_ulong3",
+        "sample_test.integer_mad_hi_ulong4",
+        "sample_test.integer_mad_hi_ulong8",
+        "sample_test.integer_mad_hi_ulong16",
+        "sample_test.integer_min_char",
+        "sample_test.integer_min_char2",
+        "sample_test.integer_min_char3",
+        "sample_test.integer_min_char4",
+        "sample_test.integer_min_char8",
+        "sample_test.integer_min_char16",
+        "sample_test.integer_min_uchar",
+        "sample_test.integer_min_uchar2",
+        "sample_test.integer_min_uchar3",
+        "sample_test.integer_min_uchar4",
+        "sample_test.integer_min_uchar8",
+        "sample_test.integer_min_uchar16",
+        "sample_test.integer_min_short",
+        "sample_test.integer_min_short2",
+        "sample_test.integer_min_short3",
+        "sample_test.integer_min_short4",
+        "sample_test.integer_min_short8",
+        "sample_test.integer_min_short16",
+        "sample_test.integer_min_ushort",
+        "sample_test.integer_min_ushort2",
+        "sample_test.integer_min_ushort3",
+        "sample_test.integer_min_ushort4",
+        "sample_test.integer_min_ushort8",
+        "sample_test.integer_min_ushort16",
+        "sample_test.integer_min_int",
+        "sample_test.integer_min_int2",
+        "sample_test.integer_min_int3",
+        "sample_test.integer_min_int4",
+        "sample_test.integer_min_int8",
+        "sample_test.integer_min_int16",
+        "sample_test.integer_min_uint",
+        "sample_test.integer_min_uint2",
+        "sample_test.integer_min_uint3",
+        "sample_test.integer_min_uint4",
+        "sample_test.integer_min_uint8",
+        "sample_test.integer_min_uint16",
+        "sample_test.integer_min_long",
+        "sample_test.integer_min_long2",
+        "sample_test.integer_min_long3",
+        "sample_test.integer_min_long4",
+        "sample_test.integer_min_long8",
+        "sample_test.integer_min_long16",
+        "sample_test.integer_min_ulong",
+        "sample_test.integer_min_ulong2",
+        "sample_test.integer_min_ulong3",
+        "sample_test.integer_min_ulong4",
+        "sample_test.integer_min_ulong8",
+        "sample_test.integer_min_ulong16",
+        "sample_test.integer_max_char",
+        "sample_test.integer_max_char2",
+        "sample_test.integer_max_char3",
+        "sample_test.integer_max_char4",
+        "sample_test.integer_max_char8",
+        "sample_test.integer_max_char16",
+        "sample_test.integer_max_uchar",
+        "sample_test.integer_max_uchar2",
+        "sample_test.integer_max_uchar3",
+        "sample_test.integer_max_uchar4",
+        "sample_test.integer_max_uchar8",
+        "sample_test.integer_max_uchar16",
+        "sample_test.integer_max_short",
+        "sample_test.integer_max_short2",
+        "sample_test.integer_max_short3",
+        "sample_test.integer_max_short4",
+        "sample_test.integer_max_short8",
+        "sample_test.integer_max_short16",
+        "sample_test.integer_max_ushort",
+        "sample_test.integer_max_ushort2",
+        "sample_test.integer_max_ushort3",
+        "sample_test.integer_max_ushort4",
+        "sample_test.integer_max_ushort8",
+        "sample_test.integer_max_ushort16",
+        "sample_test.integer_max_int",
+        "sample_test.integer_max_int2",
+        "sample_test.integer_max_int3",
+        "sample_test.integer_max_int4",
+        "sample_test.integer_max_int8",
+        "sample_test.integer_max_int16",
+        "sample_test.integer_max_uint",
+        "sample_test.integer_max_uint2",
+        "sample_test.integer_max_uint3",
+        "sample_test.integer_max_uint4",
+        "sample_test.integer_max_uint8",
+        "sample_test.integer_max_uint16",
+        "sample_test.integer_max_long",
+        "sample_test.integer_max_long2",
+        "sample_test.integer_max_long3",
+        "sample_test.integer_max_long4",
+        "sample_test.integer_max_long8",
+        "sample_test.integer_max_long16",
+        "sample_test.integer_max_ulong",
+        "sample_test.integer_max_ulong2",
+        "sample_test.integer_max_ulong3",
+        "sample_test.integer_max_ulong4",
+        "sample_test.integer_max_ulong8",
+        "sample_test.integer_max_ulong16",
+        "test_upsample.integer_upsample_char",
+        "test_upsample.integer_upsample_char2",
+        "test_upsample.integer_upsample_char3",
+        "test_upsample.integer_upsample_char4",
+        "test_upsample.integer_upsample_char8",
+        "test_upsample.integer_upsample_char16",
+        "test_upsample.integer_upsample_uchar",
+        "test_upsample.integer_upsample_uchar2",
+        "test_upsample.integer_upsample_uchar3",
+        "test_upsample.integer_upsample_uchar4",
+        "test_upsample.integer_upsample_uchar8",
+        "test_upsample.integer_upsample_uchar16",
+        "test_upsample.integer_upsample_short",
+        "test_upsample.integer_upsample_short2",
+        "test_upsample.integer_upsample_short3",
+        "test_upsample.integer_upsample_short4",
+        "test_upsample.integer_upsample_short8",
+        "test_upsample.integer_upsample_short16",
+        "test_upsample.integer_upsample_ushort",
+        "test_upsample.integer_upsample_ushort2",
+        "test_upsample.integer_upsample_ushort3",
+        "test_upsample.integer_upsample_ushort4",
+        "test_upsample.integer_upsample_ushort8",
+        "test_upsample.integer_upsample_ushort16",
+        "test_upsample.integer_upsample_int",
+        "test_upsample.integer_upsample_int2",
+        "test_upsample.integer_upsample_int3",
+        "test_upsample.integer_upsample_int4",
+        "test_upsample.integer_upsample_int8",
+        "test_upsample.integer_upsample_int16",
+        "test_upsample.integer_upsample_uint",
+        "test_upsample.integer_upsample_uint2",
+        "test_upsample.integer_upsample_uint3",
+        "test_upsample.integer_upsample_uint4",
+        "test_upsample.integer_upsample_uint8",
+        "test_upsample.integer_upsample_uint16",
+        "test_abs_char",
+        "test_abs_char2",
+        "test_abs_char3",
+        "test_abs_char4",
+        "test_abs_char8",
+        "test_abs_char16",
+        "test_abs_short",
+        "test_abs_short2",
+        "test_abs_short3",
+        "test_abs_short4",
+        "test_abs_short8",
+        "test_abs_short16",
+        "test_abs_int",
+        "test_abs_int2",
+        "test_abs_int3",
+        "test_abs_int4",
+        "test_abs_int8",
+        "test_abs_int16",
+        "test_abs_long",
+        "test_abs_long2",
+        "test_abs_long3",
+        "test_abs_long4",
+        "test_abs_long8",
+        "test_abs_long16",
+        "test_abs_uchar",
+        "test_abs_uchar2",
+        "test_abs_uchar3",
+        "test_abs_uchar4",
+        "test_abs_uchar8",
+        "test_abs_uchar16",
+        "test_abs_ushort",
+        "test_abs_ushort2",
+        "test_abs_ushort3",
+        "test_abs_ushort4",
+        "test_abs_ushort8",
+        "test_abs_ushort16",
+        "test_abs_uint",
+        "test_abs_uint2",
+        "test_abs_uint3",
+        "test_abs_uint4",
+        "test_abs_uint8",
+        "test_abs_uint16",
+        "test_abs_ulong",
+        "test_abs_ulong2",
+        "test_abs_ulong3",
+        "test_abs_ulong4",
+        "test_abs_ulong8",
+        "test_abs_ulong16",
+        "test_absdiff_char",
+        "test_absdiff_char2",
+        "test_absdiff_char3",
+        "test_absdiff_char4",
+        "test_absdiff_char8",
+        "test_absdiff_char16",
+        "test_absdiff_uchar",
+        "test_absdiff_uchar2",
+        "test_absdiff_uchar3",
+        "test_absdiff_uchar4",
+        "test_absdiff_uchar8",
+        "test_absdiff_uchar16",
+        "test_absdiff_short",
+        "test_absdiff_short2",
+        "test_absdiff_short3",
+        "test_absdiff_short4",
+        "test_absdiff_short8",
+        "test_absdiff_short16",
+        "test_absdiff_ushort",
+        "test_absdiff_ushort2",
+        "test_absdiff_ushort3",
+        "test_absdiff_ushort4",
+        "test_absdiff_ushort8",
+        "test_absdiff_ushort16",
+        "test_absdiff_int",
+        "test_absdiff_int2",
+        "test_absdiff_int3",
+        "test_absdiff_int4",
+        "test_absdiff_int8",
+        "test_absdiff_int16",
+        "test_absdiff_uint",
+        "test_absdiff_uint2",
+        "test_absdiff_uint3",
+        "test_absdiff_uint4",
+        "test_absdiff_uint8",
+        "test_absdiff_uint16",
+        "test_absdiff_long",
+        "test_absdiff_long2",
+        "test_absdiff_long3",
+        "test_absdiff_long4",
+        "test_absdiff_long8",
+        "test_absdiff_long16",
+        "test_absdiff_ulong",
+        "test_absdiff_ulong2",
+        "test_absdiff_ulong3",
+        "test_absdiff_ulong4",
+        "test_absdiff_ulong8",
+        "test_absdiff_ulong16",
+        "test_add_sat_char",
+        "test_add_sat_char2",
+        "test_add_sat_char3",
+        "test_add_sat_char4",
+        "test_add_sat_char8",
+        "test_add_sat_char16",
+        "test_add_sat_uchar",
+        "test_add_sat_uchar2",
+        "test_add_sat_uchar3",
+        "test_add_sat_uchar4",
+        "test_add_sat_uchar8",
+        "test_add_sat_uchar16",
+        "test_add_sat_short",
+        "test_add_sat_short2",
+        "test_add_sat_short3",
+        "test_add_sat_short4",
+        "test_add_sat_short8",
+        "test_add_sat_short16",
+        "test_add_sat_ushort",
+        "test_add_sat_ushort2",
+        "test_add_sat_ushort3",
+        "test_add_sat_ushort4",
+        "test_add_sat_ushort8",
+        "test_add_sat_ushort16",
+        "test_add_sat_int",
+        "test_add_sat_int2",
+        "test_add_sat_int3",
+        "test_add_sat_int4",
+        "test_add_sat_int8",
+        "test_add_sat_int16",
+        "test_add_sat_uint",
+        "test_add_sat_uint2",
+        "test_add_sat_uint3",
+        "test_add_sat_uint4",
+        "test_add_sat_uint8",
+        "test_add_sat_uint16",
+        "test_add_sat_long",
+        "test_add_sat_long2",
+        "test_add_sat_long3",
+        "test_add_sat_long4",
+        "test_add_sat_long8",
+        "test_add_sat_long16",
+        "test_add_sat_ulong",
+        "test_add_sat_ulong2",
+        "test_add_sat_ulong3",
+        "test_add_sat_ulong4",
+        "test_add_sat_ulong8",
+        "test_add_sat_ulong16",
+        "test_sub_sat_char",
+        "test_sub_sat_char2",
+        "test_sub_sat_char3",
+        "test_sub_sat_char4",
+        "test_sub_sat_char8",
+        "test_sub_sat_char16",
+        "test_sub_sat_uchar",
+        "test_sub_sat_uchar2",
+        "test_sub_sat_uchar3",
+        "test_sub_sat_uchar4",
+        "test_sub_sat_uchar8",
+        "test_sub_sat_uchar16",
+        "test_sub_sat_short",
+        "test_sub_sat_short2",
+        "test_sub_sat_short3",
+        "test_sub_sat_short4",
+        "test_sub_sat_short8",
+        "test_sub_sat_short16",
+        "test_sub_sat_ushort",
+        "test_sub_sat_ushort2",
+        "test_sub_sat_ushort3",
+        "test_sub_sat_ushort4",
+        "test_sub_sat_ushort8",
+        "test_sub_sat_ushort16",
+        "test_sub_sat_int",
+        "test_sub_sat_int2",
+        "test_sub_sat_int3",
+        "test_sub_sat_int4",
+        "test_sub_sat_int8",
+        "test_sub_sat_int16",
+        "test_sub_sat_uint",
+        "test_sub_sat_uint2",
+        "test_sub_sat_uint3",
+        "test_sub_sat_uint4",
+        "test_sub_sat_uint8",
+        "test_sub_sat_uint16",
+        "test_sub_sat_long",
+        "test_sub_sat_long2",
+        "test_sub_sat_long3",
+        "test_sub_sat_long4",
+        "test_sub_sat_long8",
+        "test_sub_sat_long16",
+        "test_sub_sat_ulong",
+        "test_sub_sat_ulong2",
+        "test_sub_sat_ulong3",
+        "test_sub_sat_ulong4",
+        "test_sub_sat_ulong8",
+        "test_sub_sat_ulong16",
+        "test_int_mul24",
+        "test_int2_mul24",
+        "test_int3_mul24",
+        "test_int4_mul24",
+        "test_int8_mul24",
+        "test_int16_mul24",
+        "test_uint_mul24",
+        "test_uint2_mul24",
+        "test_uint3_mul24",
+        "test_uint4_mul24",
+        "test_uint8_mul24",
+        "test_uint16_mul24",
+        "test_int_mad24",
+        "test_int2_mad24",
+        "test_int3_mad24",
+        "test_int4_mad24",
+        "test_int8_mad24",
+        "test_int16_mad24",
+        "test_uint_mad24",
+        "test_uint2_mad24",
+        "test_uint3_mad24",
+        "test_uint4_mad24",
+        "test_uint8_mad24",
+        "test_uint16_mad24",
+        "test_popcount_char",
+        "test_popcount_char2",
+        "test_popcount_char3",
+        "test_popcount_char4",
+        "test_popcount_char8",
+        "test_popcount_char16",
+        "test_popcount_uchar",
+        "test_popcount_uchar2",
+        "test_popcount_uchar3",
+        "test_popcount_uchar4",
+        "test_popcount_uchar8",
+        "test_popcount_uchar16",
+        "test_popcount_short",
+        "test_popcount_short2",
+        "test_popcount_short3",
+        "test_popcount_short4",
+        "test_popcount_short8",
+        "test_popcount_short16",
+        "test_popcount_ushort",
+        "test_popcount_ushort2",
+        "test_popcount_ushort3",
+        "test_popcount_ushort4",
+        "test_popcount_ushort8",
+        "test_popcount_ushort16",
+        "test_popcount_int",
+        "test_popcount_int2",
+        "test_popcount_int3",
+        "test_popcount_int4",
+        "test_popcount_int8",
+        "test_popcount_int16",
+        "test_popcount_uint",
+        "test_popcount_uint2",
+        "test_popcount_uint3",
+        "test_popcount_uint4",
+        "test_popcount_uint8",
+        "test_popcount_uint16",
+        "test_popcount_long",
+        "test_popcount_long2",
+        "test_popcount_long3",
+        "test_popcount_long4",
+        "test_popcount_long8",
+        "test_popcount_long16",
+        "test_popcount_ulong",
+        "test_popcount_ulong2",
+        "test_popcount_ulong3",
+        "test_popcount_ulong4",
+        "test_popcount_ulong8",
+        "test_popcount_ulong16",
+    };
+
+    log_info("test_integer_ops\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_math_brute_force (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "math_kernel.acos_float",
+        "math_kernel3.acos_float3",
+        "math_kernel16.acos_float16",
+        "math_kernel2.acos_float2",
+        "math_kernel4.acos_float4",
+        "math_kernel8.acos_float8",
+        "math_kernel16.acosh_float16",
+        "math_kernel8.acosh_float8",
+        "math_kernel4.acosh_float4",
+        "math_kernel2.acosh_float2",
+        "math_kernel3.acosh_float3",
+        "math_kernel.acosh_float",
+        "math_kernel16.acospi_float16",
+        "math_kernel8.acospi_float8",
+        "math_kernel3.acospi_float3",
+        "math_kernel4.acospi_float4",
+        "math_kernel2.acospi_float2",
+        "math_kernel.acospi_float",
+        "math_kernel16.asin_float16",
+        "math_kernel8.asin_float8",
+        "math_kernel4.asin_float4",
+        "math_kernel3.asin_float3",
+        "math_kernel2.asin_float2",
+        "math_kernel.asin_float",
+        "math_kernel8.asinh_float8",
+        "math_kernel16.asinh_float16",
+        "math_kernel4.asinh_float4",
+        "math_kernel3.asinh_float3",
+        "math_kernel2.asinh_float2",
+        "math_kernel.asinh_float",
+        "math_kernel8.asinpi_float8",
+        "math_kernel16.asinpi_float16",
+        "math_kernel3.asinpi_float3",
+        "math_kernel4.asinpi_float4",
+        "math_kernel2.asinpi_float2",
+        "math_kernel.asinpi_float",
+        "math_kernel16.atan_float16",
+        "math_kernel8.atan_float8",
+        "math_kernel4.atan_float4",
+        "math_kernel2.atan_float2",
+        "math_kernel3.atan_float3",
+        "math_kernel.atan_float",
+        "math_kernel16.atanh_float16",
+        "math_kernel4.atanh_float4",
+        "math_kernel8.atanh_float8",
+        "math_kernel3.atanh_float3",
+        "math_kernel.atanh_float",
+        "math_kernel2.atanh_float2",
+        "math_kernel16.atanpi_float16",
+        "math_kernel8.atanpi_float8",
+        "math_kernel4.atanpi_float4",
+        "math_kernel3.atanpi_float3",
+        "math_kernel2.atanpi_float2",
+        "math_kernel.atanpi_float",
+        "math_kernel8.atan2_float8",
+        "math_kernel16.atan2_float16",
+        "math_kernel4.atan2_float4",
+        "math_kernel3.atan2_float3",
+        "math_kernel2.atan2_float2",
+        "math_kernel.atan2_float",
+        "math_kernel16.atan2pi_float16",
+        "math_kernel8.atan2pi_float8",
+        "math_kernel4.atan2pi_float4",
+        "math_kernel3.atan2pi_float3",
+        "math_kernel.atan2pi_float",
+        "math_kernel2.atan2pi_float2",
+        "math_kernel16.cbrt_float16",
+        "math_kernel8.cbrt_float8",
+        "math_kernel4.cbrt_float4",
+        "math_kernel2.cbrt_float2",
+        "math_kernel3.cbrt_float3",
+        "math_kernel.cbrt_float",
+        "math_kernel4.ceil_float4",
+        "math_kernel8.ceil_float8",
+        "math_kernel3.ceil_float3",
+        "math_kernel16.ceil_float16",
+        "math_kernel2.ceil_float2",
+        "math_kernel.ceil_float",
+        "math_kernel16.copysign_float16",
+        "math_kernel4.copysign_float4",
+        "math_kernel2.copysign_float2",
+        "math_kernel8.copysign_float8",
+        "math_kernel3.copysign_float3",
+        "math_kernel.copysign_float",
+        "math_kernel8.cos_float8",
+        "math_kernel16.cos_float16",
+        "math_kernel4.cos_float4",
+        "math_kernel3.cos_float3",
+        "math_kernel2.cos_float2",
+        "math_kernel.cos_float",
+        "math_kernel8.cosh_float8",
+        "math_kernel16.cosh_float16",
+        "math_kernel4.cosh_float4",
+        "math_kernel3.cosh_float3",
+        "math_kernel2.cosh_float2",
+        "math_kernel.cosh_float",
+        "math_kernel16.cospi_float16",
+        "math_kernel8.cospi_float8",
+        "math_kernel4.cospi_float4",
+        "math_kernel3.cospi_float3",
+        "math_kernel2.cospi_float2",
+        "math_kernel.cospi_float",
+        "math_kernel16.exp_float16",
+        "math_kernel4.exp_float4",
+        "math_kernel3.exp_float3",
+        "math_kernel8.exp_float8",
+        "math_kernel2.exp_float2",
+        "math_kernel.exp_float",
+        "math_kernel8.exp2_float8",
+        "math_kernel16.exp2_float16",
+        "math_kernel4.exp2_float4",
+        "math_kernel2.exp2_float2",
+        "math_kernel3.exp2_float3",
+        "math_kernel.exp2_float",
+        "math_kernel16.exp10_float16",
+        "math_kernel8.exp10_float8",
+        "math_kernel3.exp10_float3",
+        "math_kernel4.exp10_float4",
+        "math_kernel2.exp10_float2",
+        "math_kernel.exp10_float",
+        "math_kernel8.expm1_float8",
+        "math_kernel4.expm1_float4",
+        "math_kernel16.expm1_float16",
+        "math_kernel2.expm1_float2",
+        "math_kernel3.expm1_float3",
+        "math_kernel.expm1_float",
+        "math_kernel16.fabs_float16",
+        "math_kernel8.fabs_float8",
+        "math_kernel4.fabs_float4",
+        "math_kernel3.fabs_float3",
+        "math_kernel.fabs_float",
+        "math_kernel2.fabs_float2",
+        "math_kernel16.fdim_float16",
+        "math_kernel4.fdim_float4",
+        "math_kernel8.fdim_float8",
+        "math_kernel2.fdim_float2",
+        "math_kernel.fdim_float",
+        "math_kernel3.fdim_float3",
+        "math_kernel8.floor_float8",
+        "math_kernel16.floor_float16",
+        "math_kernel4.floor_float4",
+        "math_kernel3.floor_float3",
+        "math_kernel2.floor_float2",
+        "math_kernel.floor_float",
+        "math_kernel2.fma_float2",
+        "math_kernel16.fma_float16",
+        "math_kernel3.fma_float3",
+        "math_kernel4.fma_float4",
+        "math_kernel.fma_float",
+        "math_kernel8.fma_float8",
+        "math_kernel8.fmax_float8",
+        "math_kernel4.fmax_float4",
+        "math_kernel3.fmax_float3",
+        "math_kernel.fmax_float",
+        "math_kernel16.fmax_float16",
+        "math_kernel2.fmax_float2",
+        "math_kernel16.fmin_float16",
+        "math_kernel8.fmin_float8",
+        "math_kernel3.fmin_float3",
+        "math_kernel4.fmin_float4",
+        "math_kernel2.fmin_float2",
+        "math_kernel.fmin_float",
+        "math_kernel16.fmod_float16",
+        "math_kernel8.fmod_float8",
+        "math_kernel4.fmod_float4",
+        "math_kernel2.fmod_float2",
+        "math_kernel3.fmod_float3",
+        "math_kernel.fmod_float",
+        "math_kernel16.fract_float16",
+        "math_kernel4.fract_float4",
+        "math_kernel2.fract_float2",
+        "math_kernel3.fract_float3",
+        "math_kernel.fract_float",
+        "math_kernel8.fract_float8",
+        "math_kernel2.frexp_float2",
+        "math_kernel.frexp_float",
+        "math_kernel4.frexp_float4",
+        "math_kernel8.frexp_float8",
+        "math_kernel3.frexp_float3",
+        "math_kernel16.frexp_float16",
+        "math_kernel4.hypot_float4",
+        "math_kernel16.hypot_float16",
+        "math_kernel8.hypot_float8",
+        "math_kernel3.hypot_float3",
+        "math_kernel2.hypot_float2",
+        "math_kernel.hypot_float",
+        "math_kernel16.ilogb_float16",
+        "math_kernel3.ilogb_float3",
+        "math_kernel8.ilogb_float8",
+        "math_kernel2.ilogb_float2",
+        "math_kernel.ilogb_float",
+        "math_kernel4.ilogb_float4",
+        "math_kernel.isequal_float",
+        "math_kernel4.isequal_float4",
+        "math_kernel8.isequal_float8",
+        "math_kernel16.isequal_float16",
+        "math_kernel3.isequal_float3",
+        "math_kernel2.isequal_float2",
+        "math_kernel2.isfinite_float2",
+        "math_kernel16.isfinite_float16",
+        "math_kernel8.isfinite_float8",
+        "math_kernel.isfinite_float",
+        "math_kernel4.isfinite_float4",
+        "math_kernel3.isfinite_float3",
+        "math_kernel16.isgreater_float16",
+        "math_kernel8.isgreater_float8",
+        "math_kernel4.isgreater_float4",
+        "math_kernel3.isgreater_float3",
+        "math_kernel2.isgreater_float2",
+        "math_kernel.isgreater_float",
+        "math_kernel8.isgreaterequal_float8",
+        "math_kernel16.isgreaterequal_float16",
+        "math_kernel4.isgreaterequal_float4",
+        "math_kernel.isgreaterequal_float",
+        "math_kernel3.isgreaterequal_float3",
+        "math_kernel2.isgreaterequal_float2",
+        "math_kernel4.isinf_float4",
+        "math_kernel16.isinf_float16",
+        "math_kernel8.isinf_float8",
+        "math_kernel3.isinf_float3",
+        "math_kernel2.isinf_float2",
+        "math_kernel.isinf_float",
+        "math_kernel16.isless_float16",
+        "math_kernel8.isless_float8",
+        "math_kernel4.isless_float4",
+        "math_kernel3.isless_float3",
+        "math_kernel2.isless_float2",
+        "math_kernel.isless_float",
+        "math_kernel8.islessequal_float8",
+        "math_kernel16.islessequal_float16",
+        "math_kernel2.islessequal_float2",
+        "math_kernel3.islessequal_float3",
+        "math_kernel4.islessequal_float4",
+        "math_kernel.islessequal_float",
+        "math_kernel8.islessgreater_float8",
+        "math_kernel16.islessgreater_float16",
+        "math_kernel4.islessgreater_float4",
+        "math_kernel3.islessgreater_float3",
+        "math_kernel2.islessgreater_float2",
+        "math_kernel.islessgreater_float",
+        "math_kernel4.isnan_float4",
+        "math_kernel16.isnan_float16",
+        "math_kernel8.isnan_float8",
+        "math_kernel3.isnan_float3",
+        "math_kernel2.isnan_float2",
+        "math_kernel.isnan_float",
+        "math_kernel16.isnormal_float16",
+        "math_kernel8.isnormal_float8",
+        "math_kernel4.isnormal_float4",
+        "math_kernel3.isnormal_float3",
+        "math_kernel2.isnormal_float2",
+        "math_kernel.isnormal_float",
+        "math_kernel16.isnotequal_float16",
+        "math_kernel8.isnotequal_float8",
+        "math_kernel4.isnotequal_float4",
+        "math_kernel3.isnotequal_float3",
+        "math_kernel2.isnotequal_float2",
+        "math_kernel.isnotequal_float",
+        "math_kernel16.isordered_float16",
+        "math_kernel8.isordered_float8",
+        "math_kernel3.isordered_float3",
+        "math_kernel4.isordered_float4",
+        "math_kernel2.isordered_float2",
+        "math_kernel.isordered_float",
+        "math_kernel16.isunordered_float16",
+        "math_kernel8.isunordered_float8",
+        "math_kernel4.isunordered_float4",
+        "math_kernel2.isunordered_float2",
+        "math_kernel3.isunordered_float3",
+        "math_kernel.isunordered_float",
+        "math_kernel8.ldexp_float8",
+        "math_kernel2.ldexp_float2",
+        "math_kernel3.ldexp_float3",
+        "math_kernel16.ldexp_float16",
+        "math_kernel4.ldexp_float4",
+        "math_kernel.ldexp_float",
+        "math_kernel4.lgamma_float4",
+        "math_kernel16.lgamma_float16",
+        "math_kernel8.lgamma_float8",
+        "math_kernel2.lgamma_float2",
+        "math_kernel.lgamma_float",
+        "math_kernel3.lgamma_float3",
+        "math_kernel16.lgamma_r_float16",
+        "math_kernel8.lgamma_r_float8",
+        "math_kernel4.lgamma_r_float4",
+        "math_kernel3.lgamma_r_float3",
+        "math_kernel.lgamma_r_float",
+        "math_kernel2.lgamma_r_float2",
+        "math_kernel16.log_float16",
+        "math_kernel4.log_float4",
+        "math_kernel8.log_float8",
+        "math_kernel2.log_float2",
+        "math_kernel.log_float",
+        "math_kernel3.log_float3",
+        "math_kernel16.log2_float16",
+        "math_kernel4.log2_float4",
+        "math_kernel8.log2_float8",
+        "math_kernel2.log2_float2",
+        "math_kernel.log2_float",
+        "math_kernel3.log2_float3",
+        "math_kernel8.log10_float8",
+        "math_kernel4.log10_float4",
+        "math_kernel16.log10_float16",
+        "math_kernel2.log10_float2",
+        "math_kernel.log10_float",
+        "math_kernel3.log10_float3",
+        "math_kernel16.log1p_float16",
+        "math_kernel8.log1p_float8",
+        "math_kernel4.log1p_float4",
+        "math_kernel3.log1p_float3",
+        "math_kernel2.log1p_float2",
+        "math_kernel.log1p_float",
+        "math_kernel16.logb_float16",
+        "math_kernel8.logb_float8",
+        "math_kernel4.logb_float4",
+        "math_kernel3.logb_float3",
+        "math_kernel2.logb_float2",
+        "math_kernel.logb_float",
+        "math_kernel16.mad_float16",
+        "math_kernel8.mad_float8",
+        "math_kernel4.mad_float4",
+        "math_kernel2.mad_float2",
+        "math_kernel3.mad_float3",
+        "math_kernel.mad_float",
+        "math_kernel8.maxmag_float8",
+        "math_kernel16.maxmag_float16",
+        "math_kernel4.maxmag_float4",
+        "math_kernel3.maxmag_float3",
+        "math_kernel2.maxmag_float2",
+        "math_kernel.maxmag_float",
+        "math_kernel16.minmag_float16",
+        "math_kernel8.minmag_float8",
+        "math_kernel4.minmag_float4",
+        "math_kernel3.minmag_float3",
+        "math_kernel2.minmag_float2",
+        "math_kernel.minmag_float",
+        "math_kernel16.modf_float16",
+        "math_kernel8.modf_float8",
+        "math_kernel3.modf_float3",
+        "math_kernel4.modf_float4",
+        "math_kernel2.modf_float2",
+        "math_kernel.modf_float",
+        "math_kernel16.nan_float16",
+        "math_kernel8.nan_float8",
+        "math_kernel4.nan_float4",
+        "math_kernel2.nan_float2",
+        "math_kernel.nan_float",
+        "math_kernel3.nan_float3",
+        "math_kernel8.nextafter_float8",
+        "math_kernel16.nextafter_float16",
+        "math_kernel4.nextafter_float4",
+        "math_kernel2.nextafter_float2",
+        "math_kernel3.nextafter_float3",
+        "math_kernel.nextafter_float",
+        "math_kernel16.pow_float16",
+        "math_kernel8.pow_float8",
+        "math_kernel4.pow_float4",
+        "math_kernel3.pow_float3",
+        "math_kernel2.pow_float2",
+        "math_kernel.pow_float",
+        "math_kernel4.pown_float4",
+        "math_kernel8.pown_float8",
+        "math_kernel16.pown_float16",
+        "math_kernel3.pown_float3",
+        "math_kernel2.pown_float2",
+        "math_kernel.pown_float",
+        "math_kernel16.powr_float16",
+        "math_kernel8.powr_float8",
+        "math_kernel4.powr_float4",
+        "math_kernel2.powr_float2",
+        "math_kernel3.powr_float3",
+        "math_kernel.powr_float",
+        "math_kernel4.remainder_float4",
+        "math_kernel8.remainder_float8",
+        "math_kernel16.remainder_float16",
+        "math_kernel3.remainder_float3",
+        "math_kernel2.remainder_float2",
+        "math_kernel.remainder_float",
+        "math_kernel8.remquo_float8",
+        "math_kernel2.remquo_float2",
+        "math_kernel3.remquo_float3",
+        "math_kernel16.remquo_float16",
+        "math_kernel4.remquo_float4",
+        "math_kernel.remquo_float",
+        "math_kernel8.rint_float8",
+        "math_kernel16.rint_float16",
+        "math_kernel4.rint_float4",
+        "math_kernel3.rint_float3",
+        "math_kernel.rint_float",
+        "math_kernel2.rint_float2",
+        "math_kernel16.rootn_float16",
+        "math_kernel8.rootn_float8",
+        "math_kernel3.rootn_float3",
+        "math_kernel4.rootn_float4",
+        "math_kernel.rootn_float",
+        "math_kernel2.rootn_float2",
+        "math_kernel8.round_float8",
+        "math_kernel16.round_float16",
+        "math_kernel4.round_float4",
+        "math_kernel2.round_float2",
+        "math_kernel3.round_float3",
+        "math_kernel.round_float",
+        "math_kernel8.rsqrt_float8",
+        "math_kernel4.rsqrt_float4",
+        "math_kernel16.rsqrt_float16",
+        "math_kernel3.rsqrt_float3",
+        "math_kernel.rsqrt_float",
+        "math_kernel2.rsqrt_float2",
+        "math_kernel8.signbit_float8",
+        "math_kernel16.signbit_float16",
+        "math_kernel4.signbit_float4",
+        "math_kernel3.signbit_float3",
+        "math_kernel2.signbit_float2",
+        "math_kernel.signbit_float",
+        "math_kernel8.sin_float8",
+        "math_kernel4.sin_float4",
+        "math_kernel16.sin_float16",
+        "math_kernel2.sin_float2",
+        "math_kernel3.sin_float3",
+        "math_kernel.sin_float",
+        "math_kernel8.sincos_float8",
+        "math_kernel4.sincos_float4",
+        "math_kernel16.sincos_float16",
+        "math_kernel2.sincos_float2",
+        "math_kernel3.sincos_float3",
+        "math_kernel.sincos_float",
+        "math_kernel8.sinh_float8",
+        "math_kernel16.sinh_float16",
+        "math_kernel4.sinh_float4",
+        "math_kernel3.sinh_float3",
+        "math_kernel2.sinh_float2",
+        "math_kernel.sinh_float",
+        "math_kernel16.sinpi_float16",
+        "math_kernel4.sinpi_float4",
+        "math_kernel3.sinpi_float3",
+        "math_kernel.sinpi_float",
+        "math_kernel8.sinpi_float8",
+        "math_kernel2.sinpi_float2",
+        "math_kernel4.sqrt_float4",
+        "math_kernel16.sqrt_float16",
+        "math_kernel8.sqrt_float8",
+        "math_kernel2.sqrt_float2",
+        "math_kernel3.sqrt_float3",
+        "math_kernel.sqrt_float",
+        "math_kernel8.tan_float8",
+        "math_kernel16.tan_float16",
+        "math_kernel4.tan_float4",
+        "math_kernel.tan_float",
+        "math_kernel3.tan_float3",
+        "math_kernel2.tan_float2",
+        "math_kernel16.tanh_float16",
+        "math_kernel8.tanh_float8",
+        "math_kernel4.tanh_float4",
+        "math_kernel2.tanh_float2",
+        "math_kernel.tanh_float",
+        "math_kernel3.tanh_float3",
+        "math_kernel16.tanpi_float16",
+        "math_kernel8.tanpi_float8",
+        "math_kernel4.tanpi_float4",
+        "math_kernel3.tanpi_float3",
+        "math_kernel2.tanpi_float2",
+        "math_kernel.tanpi_float",
+        "math_kernel8.trunc_float8",
+        "math_kernel4.trunc_float4",
+        "math_kernel16.trunc_float16",
+        "math_kernel2.trunc_float2",
+        "math_kernel3.trunc_float3",
+        "math_kernel.trunc_float",
+        "math_kernel16.trunc_double16",
+        "math_kernel16.half_cos_float16",
+        "math_kernel8.half_cos_float8",
+        "math_kernel4.half_cos_float4",
+        "math_kernel3.half_cos_float3",
+        "math_kernel2.half_cos_float2",
+        "math_kernel.half_cos_float",
+        "math_kernel16.half_divide_float16",
+        "math_kernel8.half_divide_float8",
+        "math_kernel4.half_divide_float4",
+        "math_kernel3.half_divide_float3",
+        "math_kernel2.half_divide_float2",
+        "math_kernel.half_divide_float",
+        "math_kernel8.half_exp_float8",
+        "math_kernel16.half_exp_float16",
+        "math_kernel4.half_exp_float4",
+        "math_kernel3.half_exp_float3",
+        "math_kernel2.half_exp_float2",
+        "math_kernel.half_exp_float",
+        "math_kernel16.half_exp2_float16",
+        "math_kernel4.half_exp2_float4",
+        "math_kernel8.half_exp2_float8",
+        "math_kernel.half_exp2_float",
+        "math_kernel3.half_exp2_float3",
+        "math_kernel2.half_exp2_float2",
+        "math_kernel8.half_exp10_float8",
+        "math_kernel4.half_exp10_float4",
+        "math_kernel16.half_exp10_float16",
+        "math_kernel2.half_exp10_float2",
+        "math_kernel3.half_exp10_float3",
+        "math_kernel.half_exp10_float",
+        "math_kernel8.half_log_float8",
+        "math_kernel16.half_log_float16",
+        "math_kernel3.half_log_float3",
+        "math_kernel.half_log_float",
+        "math_kernel2.half_log_float2",
+        "math_kernel4.half_log_float4",
+        "math_kernel16.half_log2_float16",
+        "math_kernel4.half_log2_float4",
+        "math_kernel8.half_log2_float8",
+        "math_kernel2.half_log2_float2",
+        "math_kernel3.half_log2_float3",
+        "math_kernel.half_log2_float",
+        "math_kernel4.half_log10_float4",
+        "math_kernel8.half_log10_float8",
+        "math_kernel16.half_log10_float16",
+        "math_kernel2.half_log10_float2",
+        "math_kernel3.half_log10_float3",
+        "math_kernel.half_log10_float",
+        "math_kernel8.half_powr_float8",
+        "math_kernel16.half_powr_float16",
+        "math_kernel4.half_powr_float4",
+        "math_kernel3.half_powr_float3",
+        "math_kernel2.half_powr_float2",
+        "math_kernel.half_powr_float",
+        "math_kernel16.half_recip_float16",
+        "math_kernel8.half_recip_float8",
+        "math_kernel4.half_recip_float4",
+        "math_kernel3.half_recip_float3",
+        "math_kernel2.half_recip_float2",
+        "math_kernel.half_recip_float",
+        "math_kernel16.half_rsqrt_float16",
+        "math_kernel8.half_rsqrt_float8",
+        "math_kernel4.half_rsqrt_float4",
+        "math_kernel3.half_rsqrt_float3",
+        "math_kernel2.half_rsqrt_float2",
+        "math_kernel.half_rsqrt_float",
+        "math_kernel16.half_sin_float16",
+        "math_kernel8.half_sin_float8",
+        "math_kernel4.half_sin_float4",
+        "math_kernel3.half_sin_float3",
+        "math_kernel2.half_sin_float2",
+        "math_kernel.half_sin_float",
+        "math_kernel8.half_sqrt_float8",
+        "math_kernel4.half_sqrt_float4",
+        "math_kernel3.half_sqrt_float3",
+        "math_kernel16.half_sqrt_float16",
+        "math_kernel2.half_sqrt_float2",
+        "math_kernel.half_sqrt_float",
+        "math_kernel16.half_tan_float16",
+        "math_kernel8.half_tan_float8",
+        "math_kernel4.half_tan_float4",
+        "math_kernel3.half_tan_float3",
+        "math_kernel2.half_tan_float2",
+        "math_kernel.half_tan_float",
+    };
+
+    log_info("test_math_brute_force\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_math_brute_force_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "math_kernel8.acos_double8",
+        "math_kernel4.acos_double4",
+        "math_kernel16.acos_double16",
+        "math_kernel2.acos_double2",
+        "math_kernel3.acos_double3",
+        "math_kernel.acos_double",
+        "math_kernel16.acosh_double16",
+        "math_kernel8.acosh_double8",
+        "math_kernel4.acosh_double4",
+        "math_kernel2.acosh_double2",
+        "math_kernel3.acosh_double3",
+        "math_kernel.acosh_double",
+        "math_kernel8.acospi_double8",
+        "math_kernel16.acospi_double16",
+        "math_kernel4.acospi_double4",
+        "math_kernel3.acospi_double3",
+        "math_kernel2.acospi_double2",
+        "math_kernel.acospi_double",
+        "math_kernel16.asin_double16",
+        "math_kernel8.asin_double8",
+        "math_kernel4.asin_double4",
+        "math_kernel3.asin_double3",
+        "math_kernel.asin_double",
+        "math_kernel2.asin_double2",
+        "math_kernel16.asinh_double16",
+        "math_kernel8.asinh_double8",
+        "math_kernel4.asinh_double4",
+        "math_kernel2.asinh_double2",
+        "math_kernel3.asinh_double3",
+        "math_kernel.asinh_double",
+        "math_kernel4.asinpi_double4",
+        "math_kernel8.asinpi_double8",
+        "math_kernel16.asinpi_double16",
+        "math_kernel2.asinpi_double2",
+        "math_kernel3.asinpi_double3",
+        "math_kernel.asinpi_double",
+        "math_kernel16.atan_double16",
+        "math_kernel8.atan_double8",
+        "math_kernel4.atan_double4",
+        "math_kernel2.atan_double2",
+        "math_kernel3.atan_double3",
+        "math_kernel.atan_double",
+        "math_kernel16.atanh_double16",
+        "math_kernel8.atanh_double8",
+        "math_kernel4.atanh_double4",
+        "math_kernel3.atanh_double3",
+        "math_kernel2.atanh_double2",
+        "math_kernel.atanh_double",
+         "math_kernel8.atanpi_double8",
+        "math_kernel16.atanpi_double16",
+        "math_kernel3.atanpi_double3",
+        "math_kernel4.atanpi_double4",
+        "math_kernel2.atanpi_double2",
+        "math_kernel.atanpi_double",
+        "math_kernel16.atan2_double16",
+        "math_kernel8.atan2_double8",
+        "math_kernel4.atan2_double4",
+        "math_kernel2.atan2_double2",
+        "math_kernel3.atan2_double3",
+        "math_kernel.atan2_double",
+        "math_kernel8.atan2pi_double8",
+        "math_kernel4.atan2pi_double4",
+        "math_kernel16.atan2pi_double16",
+        "math_kernel3.atan2pi_double3",
+        "math_kernel2.atan2pi_double2",
+        "math_kernel.atan2pi_double",
+        "math_kernel4.cbrt_double4",
+        "math_kernel8.cbrt_double8",
+        "math_kernel3.cbrt_double3",
+        "math_kernel16.cbrt_double16",
+        "math_kernel2.cbrt_double2",
+        "math_kernel.cbrt_double",
+        "math_kernel16.ceil_double16",
+        "math_kernel4.ceil_double4",
+        "math_kernel2.ceil_double2",
+        "math_kernel8.ceil_double8",
+        "math_kernel3.ceil_double3",
+        "math_kernel.ceil_double",
+        "math_kernel16.copysign_double16",
+        "math_kernel8.copysign_double8",
+        "math_kernel4.copysign_double4",
+        "math_kernel2.copysign_double2",
+        "math_kernel3.copysign_double3",
+        "math_kernel.copysign_double",
+        "math_kernel8.cos_double8",
+        "math_kernel16.cos_double16",
+        "math_kernel4.cos_double4",
+        "math_kernel3.cos_double3",
+        "math_kernel2.cos_double2",
+        "math_kernel.cos_double",
+        "math_kernel16.cosh_double16",
+        "math_kernel8.cosh_double8",
+        "math_kernel4.cosh_double4",
+        "math_kernel3.cosh_double3",
+        "math_kernel2.cosh_double2",
+        "math_kernel.cosh_double",
+        "math_kernel4.cospi_double4",
+        "math_kernel16.cospi_double16",
+        "math_kernel8.cospi_double8",
+        "math_kernel3.cospi_double3",
+        "math_kernel.cospi_double",
+        "math_kernel2.cospi_double2",
+        "math_kernel16.exp_double16",
+        "math_kernel8.exp_double8",
+        "math_kernel4.exp_double4",
+        "math_kernel2.exp_double2",
+        "math_kernel3.exp_double3",
+        "math_kernel.exp_double",
+        "math_kernel8.exp2_double8",
+        "math_kernel16.exp2_double16",
+        "math_kernel4.exp2_double4",
+        "math_kernel3.exp2_double3",
+        "math_kernel2.exp2_double2",
+        "math_kernel.exp2_double",
+        "math_kernel8.exp10_double8",
+        "math_kernel4.exp10_double4",
+        "math_kernel16.exp10_double16",
+        "math_kernel3.exp10_double3",
+        "math_kernel.exp10_double",
+        "math_kernel2.exp10_double2",
+        "math_kernel16.expm1_double16",
+        "math_kernel8.expm1_double8",
+        "math_kernel2.expm1_double2",
+        "math_kernel4.expm1_double4",
+        "math_kernel3.expm1_double3",
+        "math_kernel.expm1_double",
+        "math_kernel16.fabs_double16",
+        "math_kernel8.fabs_double8",
+        "math_kernel4.fabs_double4",
+        "math_kernel3.fabs_double3",
+        "math_kernel2.fabs_double2",
+        "math_kernel.fabs_double",
+        "math_kernel8.fdim_double8",
+        "math_kernel16.fdim_double16",
+        "math_kernel4.fdim_double4",
+        "math_kernel3.fdim_double3",
+        "math_kernel2.fdim_double2",
+        "math_kernel.fdim_double",
+        "math_kernel4.floor_double4",
+        "math_kernel16.floor_double16",
+        "math_kernel8.floor_double8",
+        "math_kernel3.floor_double3",
+        "math_kernel2.floor_double2",
+        "math_kernel.floor_double",
+        "math_kernel4.fma_double4",
+        "math_kernel16.fma_double16",
+        "math_kernel8.fma_double8",
+        "math_kernel2.fma_double2",
+        "math_kernel3.fma_double3",
+        "math_kernel.fma_double",
+        "math_kernel8.fmax_float8",
+        "math_kernel4.fmax_float4",
+        "math_kernel3.fmax_float3",
+        "math_kernel.fmax_float",
+        "math_kernel16.fmax_float16",
+        "math_kernel2.fmax_float2",
+        "math_kernel8.fmax_double8",
+        "math_kernel16.fmax_double16",
+        "math_kernel2.fmax_double2",
+        "math_kernel4.fmax_double4",
+        "math_kernel3.fmax_double3",
+        "math_kernel.fmax_double",
+        "math_kernel16.fmin_double16",
+        "math_kernel8.fmin_double8",
+        "math_kernel4.fmin_double4",
+        "math_kernel3.fmin_double3",
+        "math_kernel2.fmin_double2",
+        "math_kernel.fmin_double",
+        "math_kernel8.fmod_double8",
+        "math_kernel16.fmod_double16",
+        "math_kernel3.fmod_double3",
+        "math_kernel4.fmod_double4",
+        "math_kernel2.fmod_double2",
+        "math_kernel.fmod_double",
+        "math_kernel16.fract_double16",
+        "math_kernel8.fract_double8",
+        "math_kernel4.fract_double4",
+        "math_kernel2.fract_double2",
+        "math_kernel3.fract_double3",
+        "math_kernel.fract_double",
+        "math_kernel4.frexp_double4",
+        "math_kernel8.frexp_double8",
+        "math_kernel2.frexp_double2",
+        "math_kernel3.frexp_double3",
+        "math_kernel16.frexp_double16",
+        "math_kernel.frexp_double",
+        "math_kernel4.hypot_double4",
+        "math_kernel8.hypot_double8",
+        "math_kernel16.hypot_double16",
+        "math_kernel2.hypot_double2",
+        "math_kernel3.hypot_double3",
+        "math_kernel.hypot_double",
+        "math_kernel16.ilogb_double16",
+        "math_kernel8.ilogb_double8",
+        "math_kernel4.ilogb_double4",
+        "math_kernel3.ilogb_double3",
+        "math_kernel.ilogb_double",
+        "math_kernel2.ilogb_double2",
+        "math_kernel16.isequal_double16",
+        "math_kernel8.isequal_double8",
+        "math_kernel4.isequal_double4",
+        "math_kernel3.isequal_double3",
+        "math_kernel.isequal_double",
+        "math_kernel2.isequal_double2",
+        "math_kernel16.isfinite_double16",
+        "math_kernel8.isfinite_double8",
+        "math_kernel4.isfinite_double4",
+        "math_kernel3.isfinite_double3",
+        "math_kernel2.isfinite_double2",
+        "math_kernel.isfinite_double",
+        "math_kernel16.isgreater_double16",
+        "math_kernel8.isgreater_double8",
+        "math_kernel4.isgreater_double4",
+        "math_kernel3.isgreater_double3",
+        "math_kernel.isgreater_double",
+        "math_kernel2.isgreater_double2",
+        "math_kernel16.isgreaterequal_double16",
+        "math_kernel8.isgreaterequal_double8",
+        "math_kernel4.isgreaterequal_double4",
+        "math_kernel3.isgreaterequal_double3",
+        "math_kernel2.isgreaterequal_double2",
+        "math_kernel.isgreaterequal_double",
+        "math_kernel8.isinf_double8",
+        "math_kernel16.isinf_double16",
+        "math_kernel3.isinf_double3",
+        "math_kernel4.isinf_double4",
+        "math_kernel2.isinf_double2",
+        "math_kernel.isinf_double",
+        "math_kernel8.isless_double8",
+        "math_kernel4.isless_double4",
+        "math_kernel16.isless_double16",
+        "math_kernel2.isless_double2",
+        "math_kernel3.isless_double3",
+        "math_kernel.isless_double",
+        "math_kernel16.islessequal_double16",
+        "math_kernel8.islessequal_double8",
+        "math_kernel4.islessequal_double4",
+        "math_kernel2.islessequal_double2",
+        "math_kernel3.islessequal_double3",
+        "math_kernel.islessequal_double",
+        "math_kernel16.islessgreater_double16",
+        "math_kernel3.islessgreater_double3",
+        "math_kernel8.islessgreater_double8",
+        "math_kernel4.islessgreater_double4",
+        "math_kernel2.islessgreater_double2",
+        "math_kernel.islessgreater_double",
+        "math_kernel8.isnan_double8",
+        "math_kernel4.isnan_double4",
+        "math_kernel16.isnan_double16",
+        "math_kernel3.isnan_double3",
+        "math_kernel2.isnan_double2",
+        "math_kernel.isnan_double",
+        "math_kernel16.isnormal_double16",
+        "math_kernel8.isnormal_double8",
+        "math_kernel4.isnormal_double4",
+        "math_kernel2.isnormal_double2",
+        "math_kernel3.isnormal_double3",
+        "math_kernel.isnormal_double",
+        "math_kernel16.isnotequal_double16",
+        "math_kernel4.isnotequal_double4",
+        "math_kernel8.isnotequal_double8",
+        "math_kernel3.isnotequal_double3",
+        "math_kernel2.isnotequal_double2",
+        "math_kernel.isnotequal_double",
+        "math_kernel16.isordered_double16",
+        "math_kernel3.isordered_double3",
+        "math_kernel4.isordered_double4",
+        "math_kernel8.isordered_double8",
+        "math_kernel2.isordered_double2",
+        "math_kernel.isordered_double",
+        "math_kernel8.isunordered_double8",
+        "math_kernel16.isunordered_double16",
+        "math_kernel4.isunordered_double4",
+        "math_kernel3.isunordered_double3",
+        "math_kernel2.isunordered_double2",
+        "math_kernel.isunordered_double",
+        "math_kernel16.ldexp_double16",
+        "math_kernel4.ldexp_double4",
+        "math_kernel8.ldexp_double8",
+        "math_kernel2.ldexp_double2",
+        "math_kernel.ldexp_double",
+        "math_kernel3.ldexp_double3",
+        "math_kernel8.lgamma_double8",
+        "math_kernel16.lgamma_double16",
+        "math_kernel4.lgamma_double4",
+        "math_kernel2.lgamma_double2",
+        "math_kernel.lgamma_double",
+        "math_kernel3.lgamma_double3",
+        "math_kernel16.lgamma_r_double16",
+        "math_kernel8.lgamma_r_double8",
+        "math_kernel3.lgamma_r_double3",
+        "math_kernel4.lgamma_r_double4",
+        "math_kernel.lgamma_r_double",
+        "math_kernel2.lgamma_r_double2",
+        "math_kernel8.log_double8",
+        "math_kernel16.log_double16",
+        "math_kernel4.log_double4",
+        "math_kernel3.log_double3",
+        "math_kernel2.log_double2",
+        "math_kernel.log_double",
+        "math_kernel8.log2_double8",
+        "math_kernel16.log2_double16",
+        "math_kernel4.log2_double4",
+        "math_kernel3.log2_double3",
+        "math_kernel.log2_double",
+        "math_kernel2.log2_double2",
+        "math_kernel16.log10_double16",
+        "math_kernel4.log10_double4",
+        "math_kernel8.log10_double8",
+        "math_kernel3.log10_double3",
+        "math_kernel2.log10_double2",
+        "math_kernel.log10_double",
+        "math_kernel16.log1p_double16",
+        "math_kernel4.log1p_double4",
+        "math_kernel8.log1p_double8",
+        "math_kernel2.log1p_double2",
+        "math_kernel3.log1p_double3",
+        "math_kernel.log1p_double",
+        "math_kernel16.logb_double16",
+        "math_kernel8.logb_double8",
+        "math_kernel4.logb_double4",
+        "math_kernel2.logb_double2",
+        "math_kernel3.logb_double3",
+        "math_kernel.logb_double",
+        "math_kernel8.mad_double8",
+        "math_kernel16.mad_double16",
+        "math_kernel4.mad_double4",
+        "math_kernel3.mad_double3",
+        "math_kernel2.mad_double2",
+        "math_kernel.mad_double",
+        "math_kernel8.maxmag_double8",
+        "math_kernel16.maxmag_double16",
+        "math_kernel4.maxmag_double4",
+        "math_kernel3.maxmag_double3",
+        "math_kernel2.maxmag_double2",
+        "math_kernel.maxmag_double",
+        "math_kernel16.minmag_double16",
+        "math_kernel8.minmag_double8",
+        "math_kernel4.minmag_double4",
+        "math_kernel3.minmag_double3",
+        "math_kernel2.minmag_double2",
+        "math_kernel.minmag_double",
+        "math_kernel16.modf_double16",
+        "math_kernel8.modf_double8",
+        "math_kernel4.modf_double4",
+        "math_kernel2.modf_double2",
+        "math_kernel3.modf_double3",
+        "math_kernel.modf_double",
+        "math_kernel8.nan_double8",
+        "math_kernel16.nan_double16",
+        "math_kernel4.nan_double4",
+        "math_kernel3.nan_double3",
+        "math_kernel2.nan_double2",
+        "math_kernel.nan_double",
+        "math_kernel8.nextafter_double8",
+        "math_kernel4.nextafter_double4",
+        "math_kernel16.nextafter_double16",
+        "math_kernel3.nextafter_double3",
+        "math_kernel2.nextafter_double2",
+        "math_kernel.nextafter_double",
+        "math_kernel4.pow_double4",
+        "math_kernel8.pow_double8",
+        "math_kernel16.pow_double16",
+        "math_kernel3.pow_double3",
+        "math_kernel2.pow_double2",
+        "math_kernel.pow_double",
+        "math_kernel4.pown_double4",
+        "math_kernel8.pown_double8",
+        "math_kernel2.pown_double2",
+        "math_kernel3.pown_double3",
+        "math_kernel.pown_double",
+        "math_kernel16.pown_double16",
+        "math_kernel16.powr_double16",
+        "math_kernel8.powr_double8",
+        "math_kernel4.powr_double4",
+        "math_kernel3.powr_double3",
+        "math_kernel2.powr_double2",
+        "math_kernel.powr_double",
+        "math_kernel4.remainder_double4",
+        "math_kernel8.remainder_double8",
+        "math_kernel16.remainder_double16",
+        "math_kernel2.remainder_double2",
+        "math_kernel3.remainder_double3",
+        "math_kernel.remainder_double",
+        "math_kernel8.remquo_double8",
+        "math_kernel16.remquo_double16",
+        "math_kernel3.remquo_double3",
+        "math_kernel4.remquo_double4",
+        "math_kernel2.remquo_double2",
+        "math_kernel.remquo_double",
+        "math_kernel8.rint_double8",
+        "math_kernel4.rint_double4",
+        "math_kernel16.rint_double16",
+        "math_kernel3.rint_double3",
+        "math_kernel2.rint_double2",
+        "math_kernel.rint_double",
+        "math_kernel16.rootn_double16",
+        "math_kernel8.rootn_double8",
+        "math_kernel4.rootn_double4",
+        "math_kernel3.rootn_double3",
+        "math_kernel2.rootn_double2",
+        "math_kernel.rootn_double",
+        "math_kernel16.round_double16",
+        "math_kernel8.round_double8",
+        "math_kernel4.round_double4",
+        "math_kernel3.round_double3",
+        "math_kernel2.round_double2",
+        "math_kernel.round_double",
+        "math_kernel8.rsqrt_double8",
+        "math_kernel4.rsqrt_double4",
+        "math_kernel16.rsqrt_double16",
+        "math_kernel3.rsqrt_double3",
+        "math_kernel.rsqrt_double",
+        "math_kernel2.rsqrt_double2",
+        "math_kernel8.signbit_double8",
+        "math_kernel4.signbit_double4",
+        "math_kernel16.signbit_double16",
+        "math_kernel2.signbit_double2",
+        "math_kernel3.signbit_double3",
+        "math_kernel.signbit_double",
+        "math_kernel16.sin_double16",
+        "math_kernel4.sin_double4",
+        "math_kernel8.sin_double8",
+        "math_kernel2.sin_double2",
+        "math_kernel3.sin_double3",
+        "math_kernel.sin_double",
+        "math_kernel16.sincos_double16",
+        "math_kernel8.sincos_double8",
+        "math_kernel4.sincos_double4",
+        "math_kernel3.sincos_double3",
+        "math_kernel2.sincos_double2",
+        "math_kernel.sincos_double",
+        "math_kernel16.sinh_double16",
+        "math_kernel4.sinh_double4",
+        "math_kernel2.sinh_double2",
+        "math_kernel8.sinh_double8",
+        "math_kernel3.sinh_double3",
+        "math_kernel.sinh_double",
+        "math_kernel16.sinpi_double16",
+        "math_kernel8.sinpi_double8",
+        "math_kernel3.sinpi_double3",
+        "math_kernel4.sinpi_double4",
+        "math_kernel2.sinpi_double2",
+        "math_kernel.sinpi_double",
+        "math_kernel16.sqrt_double16",
+        "math_kernel8.sqrt_double8",
+        "math_kernel4.sqrt_double4",
+        "math_kernel2.sqrt_double2",
+        "math_kernel3.sqrt_double3",
+        "math_kernel.sqrt_double",
+        "math_kernel8.tan_double8",
+        "math_kernel16.tan_double16",
+        "math_kernel.tan_double",
+        "math_kernel3.tan_double3",
+        "math_kernel4.tan_double4",
+        "math_kernel2.tan_double2",
+        "math_kernel4.tanh_double4",
+        "math_kernel8.tanh_double8",
+        "math_kernel2.tanh_double2",
+        "math_kernel16.tanh_double16",
+        "math_kernel3.tanh_double3",
+        "math_kernel.tanh_double",
+        "math_kernel16.tanpi_double16",
+        "math_kernel4.tanpi_double4",
+        "math_kernel8.tanpi_double8",
+        "math_kernel3.tanpi_double3",
+        "math_kernel.tanpi_double",
+        "math_kernel2.tanpi_double2",
+        "math_kernel16.trunc_double16",
+        "math_kernel8.trunc_double8",
+        "math_kernel4.trunc_double4",
+        "math_kernel3.trunc_double3",
+        "math_kernel2.trunc_double2",
+        "math_kernel.trunc_double",
+    };
+
+    log_info("test_math_brute_force_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_printf (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test0.testCaseInt",
+        "test1.testCaseFloat",
+        "test5.testCaseChar",
+        "test6.testCaseString",
+        "test7.testCaseVector_float",
+        "test7.testCaseVector_long",
+        "test7.testCaseVector_uchar",
+        "test7.testCaseVector_uint",
+        "test8.testCaseAddrSpace_constant",
+        "test8.testCaseAddrSpace_global",
+        "test8.testCaseAddrSpace_local",
+        "test8.testCaseAddrSpace_private",
+    };
+
+    log_info("test_printf\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_profiling (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "testReadf",
+        "image_filter",
+    };
+
+    log_info("test_profiling\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_relationals (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_test.relational_any_char",
+        "sample_test.relational_any_char2",
+        "sample_test.relational_any_char3",
+        "sample_test.relational_any_char4",
+        "sample_test.relational_any_char8",
+        "sample_test.relational_any_char16",
+        "sample_test.relational_any_short",
+        "sample_test.relational_any_short2",
+        "sample_test.relational_any_short3",
+        "sample_test.relational_any_short4",
+        "sample_test.relational_any_short8",
+        "sample_test.relational_any_short16",
+        "sample_test.relational_any_int",
+        "sample_test.relational_any_int2",
+        "sample_test.relational_any_int3",
+        "sample_test.relational_any_int4",
+        "sample_test.relational_any_int8",
+        "sample_test.relational_any_int16",
+        "sample_test.relational_any_long",
+        "sample_test.relational_any_long2",
+        "sample_test.relational_any_long3",
+        "sample_test.relational_any_long4",
+        "sample_test.relational_any_long8",
+        "sample_test.relational_any_long16",
+        "sample_test.relational_all_char",
+        "sample_test.relational_all_char2",
+        "sample_test.relational_all_char3",
+        "sample_test.relational_all_char4",
+        "sample_test.relational_all_char8",
+        "sample_test.relational_all_char16",
+        "sample_test.relational_all_short",
+        "sample_test.relational_all_short2",
+        "sample_test.relational_all_short3",
+        "sample_test.relational_all_short4",
+        "sample_test.relational_all_short8",
+        "sample_test.relational_all_short16",
+        "sample_test.relational_all_int",
+        "sample_test.relational_all_int2",
+        "sample_test.relational_all_int3",
+        "sample_test.relational_all_int4",
+        "sample_test.relational_all_int8",
+        "sample_test.relational_all_int16",
+        "sample_test.relational_all_long",
+        "sample_test.relational_all_long2",
+        "sample_test.relational_all_long3",
+        "sample_test.relational_all_long4",
+        "sample_test.relational_all_long8",
+        "sample_test.relational_all_long16",
+        "sample_test.relational_bitselect_char",
+        "sample_test.relational_bitselect_char2",
+        "sample_test.relational_bitselect_char3",
+        "sample_test.relational_bitselect_char4",
+        "sample_test.relational_bitselect_char8",
+        "sample_test.relational_bitselect_char16",
+        "sample_test.relational_bitselect_uchar",
+        "sample_test.relational_bitselect_uchar2",
+        "sample_test.relational_bitselect_uchar3",
+        "sample_test.relational_bitselect_uchar4",
+        "sample_test.relational_bitselect_uchar8",
+        "sample_test.relational_bitselect_uchar16",
+        "sample_test.relational_bitselect_short",
+        "sample_test.relational_bitselect_short2",
+        "sample_test.relational_bitselect_short3",
+        "sample_test.relational_bitselect_short4",
+        "sample_test.relational_bitselect_short8",
+        "sample_test.relational_bitselect_short16",
+        "sample_test.relational_bitselect_ushort",
+        "sample_test.relational_bitselect_ushort2",
+        "sample_test.relational_bitselect_ushort3",
+        "sample_test.relational_bitselect_ushort4",
+        "sample_test.relational_bitselect_ushort8",
+        "sample_test.relational_bitselect_ushort16",
+        "sample_test.relational_bitselect_int",
+        "sample_test.relational_bitselect_int2",
+        "sample_test.relational_bitselect_int3",
+        "sample_test.relational_bitselect_int4",
+        "sample_test.relational_bitselect_int8",
+        "sample_test.relational_bitselect_int16",
+        "sample_test.relational_bitselect_uint",
+        "sample_test.relational_bitselect_uint2",
+        "sample_test.relational_bitselect_uint3",
+        "sample_test.relational_bitselect_uint4",
+        "sample_test.relational_bitselect_uint8",
+        "sample_test.relational_bitselect_uint16",
+        "sample_test.relational_bitselect_long",
+        "sample_test.relational_bitselect_long2",
+        "sample_test.relational_bitselect_long3",
+        "sample_test.relational_bitselect_long4",
+        "sample_test.relational_bitselect_long8",
+        "sample_test.relational_bitselect_long16",
+        "sample_test.relational_bitselect_ulong",
+        "sample_test.relational_bitselect_ulong2",
+        "sample_test.relational_bitselect_ulong3",
+        "sample_test.relational_bitselect_ulong4",
+        "sample_test.relational_bitselect_ulong8",
+        "sample_test.relational_bitselect_ulong16",
+        "sample_test.relational_bitselect_float",
+        "sample_test.relational_bitselect_float2",
+        "sample_test.relational_bitselect_float3",
+        "sample_test.relational_bitselect_float4",
+        "sample_test.relational_bitselect_float8",
+        "sample_test.relational_bitselect_float16",
+        "sample_test.relational_select_signed_char",
+        "sample_test.relational_select_signed_char2",
+        "sample_test.relational_select_signed_char4",
+        "sample_test.relational_select_signed_char8",
+        "sample_test.relational_select_signed_char16",
+        "sample_test.relational_select_signed_short",
+        "sample_test.relational_select_signed_short2",
+        "sample_test.relational_select_signed_short4",
+        "sample_test.relational_select_signed_short8",
+        "sample_test.relational_select_signed_short16",
+        "sample_test.relational_select_signed_int",
+        "sample_test.relational_select_signed_int2",
+        "sample_test.relational_select_signed_int4",
+        "sample_test.relational_select_signed_int8",
+        "sample_test.relational_select_signed_int16",
+        "sample_test.relational_select_signed_long",
+        "sample_test.relational_select_signed_long2",
+        "sample_test.relational_select_signed_long4",
+        "sample_test.relational_select_signed_long8",
+        "sample_test.relational_select_signed_long16",
+        "sample_test.relational_select_unsigned_uchar",
+        "sample_test.relational_select_unsigned_uchar2",
+        "sample_test.relational_select_unsigned_uchar4",
+        "sample_test.relational_select_unsigned_uchar8",
+        "sample_test.relational_select_unsigned_uchar16",
+        "sample_test.relational_select_unsigned_ushort",
+        "sample_test.relational_select_unsigned_ushort2",
+        "sample_test.relational_select_unsigned_ushort4",
+        "sample_test.relational_select_unsigned_ushort8",
+        "sample_test.relational_select_unsigned_ushort16",
+        "sample_test.relational_select_unsigned_uint",
+        "sample_test.relational_select_unsigned_uint2",
+        "sample_test.relational_select_unsigned_uint4",
+        "sample_test.relational_select_unsigned_uint8",
+        "sample_test.relational_select_unsigned_uint16",
+        "sample_test.relational_select_unsigned_ulong",
+        "sample_test.relational_select_unsigned_ulong2",
+        "sample_test.relational_select_unsigned_ulong4",
+        "sample_test.relational_select_unsigned_ulong8",
+        "sample_test.relational_select_unsigned_ulong16",
+        "sample_test.relational_isequal_float",
+        "sample_test.relational_isequal_float2",
+        "sample_test.relational_isequal_float3",
+        "sample_test.relational_isequal_float4",
+        "sample_test.relational_isequal_float8",
+        "sample_test.relational_isequal_float16",
+        "sample_test.relational_isnotequal_float",
+        "sample_test.relational_isnotequal_float2",
+        "sample_test.relational_isnotequal_float3",
+        "sample_test.relational_isnotequal_float4",
+        "sample_test.relational_isnotequal_float8",
+        "sample_test.relational_isnotequal_float16",
+        "sample_test.relational_isgreater_float",
+        "sample_test.relational_isgreater_float2",
+        "sample_test.relational_isgreater_float3",
+        "sample_test.relational_isgreater_float4",
+        "sample_test.relational_isgreater_float8",
+        "sample_test.relational_isgreater_float16",
+        "sample_test.relational_isgreaterequal_float",
+        "sample_test.relational_isgreaterequal_float2",
+        "sample_test.relational_isgreaterequal_float3",
+        "sample_test.relational_isgreaterequal_float4",
+        "sample_test.relational_isgreaterequal_float8",
+        "sample_test.relational_isgreaterequal_float16",
+        "sample_test.relational_isless_float",
+        "sample_test.relational_isless_float2",
+        "sample_test.relational_isless_float3",
+        "sample_test.relational_isless_float4",
+        "sample_test.relational_isless_float8",
+        "sample_test.relational_isless_float16",
+        "sample_test.relational_islessequal_float",
+        "sample_test.relational_islessequal_float2",
+        "sample_test.relational_islessequal_float3",
+        "sample_test.relational_islessequal_float4",
+        "sample_test.relational_islessequal_float8",
+        "sample_test.relational_islessequal_float16",
+        "sample_test.relational_islessgreater_float",
+        "sample_test.relational_islessgreater_float2",
+        "sample_test.relational_islessgreater_float3",
+        "sample_test.relational_islessgreater_float4",
+        "sample_test.relational_islessgreater_float8",
+        "sample_test.relational_islessgreater_float16",
+        "sample_test.shuffle_built_in_char2_char2",
+        "sample_test.shuffle_built_in_char2_char4",
+        "sample_test.shuffle_built_in_char2_char8",
+        "sample_test.shuffle_built_in_char2_char16",
+        "sample_test.shuffle_built_in_char4_char2",
+        "sample_test.shuffle_built_in_char4_char4",
+        "sample_test.shuffle_built_in_char4_char8",
+        "sample_test.shuffle_built_in_char4_char16",
+        "sample_test.shuffle_built_in_char8_char2",
+        "sample_test.shuffle_built_in_char8_char4",
+        "sample_test.shuffle_built_in_char8_char8",
+        "sample_test.shuffle_built_in_char8_char16",
+        "sample_test.shuffle_built_in_char16_char2",
+        "sample_test.shuffle_built_in_char16_char4",
+        "sample_test.shuffle_built_in_char16_char8",
+        "sample_test.shuffle_built_in_char16_char16",
+        "sample_test.shuffle_built_in_uchar2_uchar2",
+        "sample_test.shuffle_built_in_uchar2_uchar4",
+        "sample_test.shuffle_built_in_uchar2_uchar8",
+        "sample_test.shuffle_built_in_uchar2_uchar16",
+        "sample_test.shuffle_built_in_uchar4_uchar2",
+        "sample_test.shuffle_built_in_uchar4_uchar4",
+        "sample_test.shuffle_built_in_uchar4_uchar8",
+        "sample_test.shuffle_built_in_uchar4_uchar16",
+        "sample_test.shuffle_built_in_uchar8_uchar2",
+        "sample_test.shuffle_built_in_uchar8_uchar4",
+        "sample_test.shuffle_built_in_uchar8_uchar8",
+        "sample_test.shuffle_built_in_uchar8_uchar16",
+        "sample_test.shuffle_built_in_uchar16_uchar2",
+        "sample_test.shuffle_built_in_uchar16_uchar4",
+        "sample_test.shuffle_built_in_uchar16_uchar8",
+        "sample_test.shuffle_built_in_uchar16_uchar16",
+        "sample_test.shuffle_built_in_short2_short2",
+        "sample_test.shuffle_built_in_short2_short4",
+        "sample_test.shuffle_built_in_short2_short8",
+        "sample_test.shuffle_built_in_short2_short16",
+        "sample_test.shuffle_built_in_short4_short2",
+        "sample_test.shuffle_built_in_short4_short4",
+        "sample_test.shuffle_built_in_short4_short8",
+        "sample_test.shuffle_built_in_short4_short16",
+        "sample_test.shuffle_built_in_short8_short2",
+        "sample_test.shuffle_built_in_short8_short4",
+        "sample_test.shuffle_built_in_short8_short8",
+        "sample_test.shuffle_built_in_short8_short16",
+        "sample_test.shuffle_built_in_short16_short2",
+        "sample_test.shuffle_built_in_short16_short4",
+        "sample_test.shuffle_built_in_short16_short8",
+        "sample_test.shuffle_built_in_short16_short16",
+        "sample_test.shuffle_built_in_ushort2_ushort2",
+        "sample_test.shuffle_built_in_ushort2_ushort4",
+        "sample_test.shuffle_built_in_ushort2_ushort8",
+        "sample_test.shuffle_built_in_ushort2_ushort16",
+        "sample_test.shuffle_built_in_ushort4_ushort2",
+        "sample_test.shuffle_built_in_ushort4_ushort4",
+        "sample_test.shuffle_built_in_ushort4_ushort8",
+        "sample_test.shuffle_built_in_ushort4_ushort16",
+        "sample_test.shuffle_built_in_ushort8_ushort2",
+        "sample_test.shuffle_built_in_ushort8_ushort4",
+        "sample_test.shuffle_built_in_ushort8_ushort8",
+        "sample_test.shuffle_built_in_ushort8_ushort16",
+        "sample_test.shuffle_built_in_ushort16_ushort2",
+        "sample_test.shuffle_built_in_ushort16_ushort4",
+        "sample_test.shuffle_built_in_ushort16_ushort8",
+        "sample_test.shuffle_built_in_ushort16_ushort16",
+        "sample_test.shuffle_built_in_int2_int2",
+        "sample_test.shuffle_built_in_int2_int4",
+        "sample_test.shuffle_built_in_int2_int8",
+        "sample_test.shuffle_built_in_int2_int16",
+        "sample_test.shuffle_built_in_int4_int2",
+        "sample_test.shuffle_built_in_int4_int4",
+        "sample_test.shuffle_built_in_int4_int8",
+        "sample_test.shuffle_built_in_int4_int16",
+        "sample_test.shuffle_built_in_int8_int2",
+        "sample_test.shuffle_built_in_int8_int4",
+        "sample_test.shuffle_built_in_int8_int8",
+        "sample_test.shuffle_built_in_int8_int16",
+        "sample_test.shuffle_built_in_int16_int2",
+        "sample_test.shuffle_built_in_int16_int4",
+        "sample_test.shuffle_built_in_int16_int8",
+        "sample_test.shuffle_built_in_int16_int16",
+        "sample_test.shuffle_built_in_uint2_uint2",
+        "sample_test.shuffle_built_in_uint2_uint4",
+        "sample_test.shuffle_built_in_uint2_uint8",
+        "sample_test.shuffle_built_in_uint2_uint16",
+        "sample_test.shuffle_built_in_uint4_uint2",
+        "sample_test.shuffle_built_in_uint4_uint4",
+        "sample_test.shuffle_built_in_uint4_uint8",
+        "sample_test.shuffle_built_in_uint4_uint16",
+        "sample_test.shuffle_built_in_uint8_uint2",
+        "sample_test.shuffle_built_in_uint8_uint4",
+        "sample_test.shuffle_built_in_uint8_uint8",
+        "sample_test.shuffle_built_in_uint8_uint16",
+        "sample_test.shuffle_built_in_uint16_uint2",
+        "sample_test.shuffle_built_in_uint16_uint4",
+        "sample_test.shuffle_built_in_uint16_uint8",
+        "sample_test.shuffle_built_in_uint16_uint16",
+        "sample_test.shuffle_built_in_long2_long2",
+        "sample_test.shuffle_built_in_long2_long4",
+        "sample_test.shuffle_built_in_long2_long8",
+        "sample_test.shuffle_built_in_long2_long16",
+        "sample_test.shuffle_built_in_long4_long2",
+        "sample_test.shuffle_built_in_long4_long4",
+        "sample_test.shuffle_built_in_long4_long8",
+        "sample_test.shuffle_built_in_long4_long16",
+        "sample_test.shuffle_built_in_long8_long2",
+        "sample_test.shuffle_built_in_long8_long4",
+        "sample_test.shuffle_built_in_long8_long8",
+        "sample_test.shuffle_built_in_long8_long16",
+        "sample_test.shuffle_built_in_long16_long2",
+        "sample_test.shuffle_built_in_long16_long4",
+        "sample_test.shuffle_built_in_long16_long8",
+        "sample_test.shuffle_built_in_long16_long16",
+        "sample_test.shuffle_built_in_ulong2_ulong2",
+        "sample_test.shuffle_built_in_ulong2_ulong4",
+        "sample_test.shuffle_built_in_ulong2_ulong8",
+        "sample_test.shuffle_built_in_ulong2_ulong16",
+        "sample_test.shuffle_built_in_ulong4_ulong2",
+        "sample_test.shuffle_built_in_ulong4_ulong4",
+        "sample_test.shuffle_built_in_ulong4_ulong8",
+        "sample_test.shuffle_built_in_ulong4_ulong16",
+        "sample_test.shuffle_built_in_ulong8_ulong2",
+        "sample_test.shuffle_built_in_ulong8_ulong4",
+        "sample_test.shuffle_built_in_ulong8_ulong8",
+        "sample_test.shuffle_built_in_ulong8_ulong16",
+        "sample_test.shuffle_built_in_ulong16_ulong2",
+        "sample_test.shuffle_built_in_ulong16_ulong4",
+        "sample_test.shuffle_built_in_ulong16_ulong8",
+        "sample_test.shuffle_built_in_ulong16_ulong16",
+        "sample_test.shuffle_built_in_float2_float2",
+        "sample_test.shuffle_built_in_float2_float4",
+        "sample_test.shuffle_built_in_float2_float8",
+        "sample_test.shuffle_built_in_float2_float16",
+        "sample_test.shuffle_built_in_float4_float2",
+        "sample_test.shuffle_built_in_float4_float4",
+        "sample_test.shuffle_built_in_float4_float8",
+        "sample_test.shuffle_built_in_float4_float16",
+        "sample_test.shuffle_built_in_float8_float2",
+        "sample_test.shuffle_built_in_float8_float4",
+        "sample_test.shuffle_built_in_float8_float8",
+        "sample_test.shuffle_built_in_float8_float16",
+        "sample_test.shuffle_built_in_float16_float2",
+        "sample_test.shuffle_built_in_float16_float4",
+        "sample_test.shuffle_built_in_float16_float8",
+        "sample_test.shuffle_built_in_float16_float16",
+        "sample_test.shuffle_built_in_dual_input_char2_char2",
+        "sample_test.shuffle_built_in_dual_input_char2_char4",
+        "sample_test.shuffle_built_in_dual_input_char2_char8",
+        "sample_test.shuffle_built_in_dual_input_char2_char16",
+        "sample_test.shuffle_built_in_dual_input_char4_char2",
+        "sample_test.shuffle_built_in_dual_input_char4_char4",
+        "sample_test.shuffle_built_in_dual_input_char4_char8",
+        "sample_test.shuffle_built_in_dual_input_char4_char16",
+        "sample_test.shuffle_built_in_dual_input_char8_char2",
+        "sample_test.shuffle_built_in_dual_input_char8_char4",
+        "sample_test.shuffle_built_in_dual_input_char8_char8",
+        "sample_test.shuffle_built_in_dual_input_char8_char16",
+        "sample_test.shuffle_built_in_dual_input_char16_char2",
+        "sample_test.shuffle_built_in_dual_input_char16_char4",
+        "sample_test.shuffle_built_in_dual_input_char16_char8",
+        "sample_test.shuffle_built_in_dual_input_char16_char16",
+        "sample_test.shuffle_built_in_dual_input_uchar2_uchar2",
+        "sample_test.shuffle_built_in_dual_input_uchar2_uchar4",
+        "sample_test.shuffle_built_in_dual_input_uchar2_uchar8",
+        "sample_test.shuffle_built_in_dual_input_uchar2_uchar16",
+        "sample_test.shuffle_built_in_dual_input_uchar4_uchar2",
+        "sample_test.shuffle_built_in_dual_input_uchar4_uchar4",
+        "sample_test.shuffle_built_in_dual_input_uchar4_uchar8",
+        "sample_test.shuffle_built_in_dual_input_uchar4_uchar16",
+        "sample_test.shuffle_built_in_dual_input_uchar8_uchar2",
+        "sample_test.shuffle_built_in_dual_input_uchar8_uchar4",
+        "sample_test.shuffle_built_in_dual_input_uchar8_uchar8",
+        "sample_test.shuffle_built_in_dual_input_uchar8_uchar16",
+        "sample_test.shuffle_built_in_dual_input_uchar16_uchar2",
+        "sample_test.shuffle_built_in_dual_input_uchar16_uchar4",
+        "sample_test.shuffle_built_in_dual_input_uchar16_uchar8",
+        "sample_test.shuffle_built_in_dual_input_uchar16_uchar16",
+        "sample_test.shuffle_built_in_dual_input_short2_short2",
+        "sample_test.shuffle_built_in_dual_input_short2_short4",
+        "sample_test.shuffle_built_in_dual_input_short2_short8",
+        "sample_test.shuffle_built_in_dual_input_short2_short16",
+        "sample_test.shuffle_built_in_dual_input_short4_short2",
+        "sample_test.shuffle_built_in_dual_input_short4_short4",
+        "sample_test.shuffle_built_in_dual_input_short4_short8",
+        "sample_test.shuffle_built_in_dual_input_short4_short16",
+        "sample_test.shuffle_built_in_dual_input_short8_short2",
+        "sample_test.shuffle_built_in_dual_input_short8_short4",
+        "sample_test.shuffle_built_in_dual_input_short8_short8",
+        "sample_test.shuffle_built_in_dual_input_short8_short16",
+        "sample_test.shuffle_built_in_dual_input_short16_short2",
+        "sample_test.shuffle_built_in_dual_input_short16_short4",
+        "sample_test.shuffle_built_in_dual_input_short16_short8",
+        "sample_test.shuffle_built_in_dual_input_short16_short16",
+        "sample_test.shuffle_built_in_dual_input_ushort2_ushort2",
+        "sample_test.shuffle_built_in_dual_input_ushort2_ushort4",
+        "sample_test.shuffle_built_in_dual_input_ushort2_ushort8",
+        "sample_test.shuffle_built_in_dual_input_ushort2_ushort16",
+        "sample_test.shuffle_built_in_dual_input_ushort4_ushort2",
+        "sample_test.shuffle_built_in_dual_input_ushort4_ushort4",
+        "sample_test.shuffle_built_in_dual_input_ushort4_ushort8",
+        "sample_test.shuffle_built_in_dual_input_ushort4_ushort16",
+        "sample_test.shuffle_built_in_dual_input_ushort8_ushort2",
+        "sample_test.shuffle_built_in_dual_input_ushort8_ushort4",
+        "sample_test.shuffle_built_in_dual_input_ushort8_ushort8",
+        "sample_test.shuffle_built_in_dual_input_ushort8_ushort16",
+        "sample_test.shuffle_built_in_dual_input_ushort16_ushort2",
+        "sample_test.shuffle_built_in_dual_input_ushort16_ushort4",
+        "sample_test.shuffle_built_in_dual_input_ushort16_ushort8",
+        "sample_test.shuffle_built_in_dual_input_ushort16_ushort16",
+        "sample_test.shuffle_built_in_dual_input_int2_int2",
+        "sample_test.shuffle_built_in_dual_input_int2_int4",
+        "sample_test.shuffle_built_in_dual_input_int2_int8",
+        "sample_test.shuffle_built_in_dual_input_int2_int16",
+        "sample_test.shuffle_built_in_dual_input_int4_int2",
+        "sample_test.shuffle_built_in_dual_input_int4_int4",
+        "sample_test.shuffle_built_in_dual_input_int4_int8",
+        "sample_test.shuffle_built_in_dual_input_int4_int16",
+        "sample_test.shuffle_built_in_dual_input_int8_int2",
+        "sample_test.shuffle_built_in_dual_input_int8_int4",
+        "sample_test.shuffle_built_in_dual_input_int8_int8",
+        "sample_test.shuffle_built_in_dual_input_int8_int16",
+        "sample_test.shuffle_built_in_dual_input_int16_int2",
+        "sample_test.shuffle_built_in_dual_input_int16_int4",
+        "sample_test.shuffle_built_in_dual_input_int16_int8",
+        "sample_test.shuffle_built_in_dual_input_int16_int16",
+        "sample_test.shuffle_built_in_dual_input_uint2_uint2",
+        "sample_test.shuffle_built_in_dual_input_uint2_uint4",
+        "sample_test.shuffle_built_in_dual_input_uint2_uint8",
+        "sample_test.shuffle_built_in_dual_input_uint2_uint16",
+        "sample_test.shuffle_built_in_dual_input_uint4_uint2",
+        "sample_test.shuffle_built_in_dual_input_uint4_uint4",
+        "sample_test.shuffle_built_in_dual_input_uint4_uint8",
+        "sample_test.shuffle_built_in_dual_input_uint4_uint16",
+        "sample_test.shuffle_built_in_dual_input_uint8_uint2",
+        "sample_test.shuffle_built_in_dual_input_uint8_uint4",
+        "sample_test.shuffle_built_in_dual_input_uint8_uint8",
+        "sample_test.shuffle_built_in_dual_input_uint8_uint16",
+        "sample_test.shuffle_built_in_dual_input_uint16_uint2",
+        "sample_test.shuffle_built_in_dual_input_uint16_uint4",
+        "sample_test.shuffle_built_in_dual_input_uint16_uint8",
+        "sample_test.shuffle_built_in_dual_input_uint16_uint16",
+        "sample_test.shuffle_built_in_dual_input_long2_long2",
+        "sample_test.shuffle_built_in_dual_input_long2_long4",
+        "sample_test.shuffle_built_in_dual_input_long2_long8",
+        "sample_test.shuffle_built_in_dual_input_long2_long16",
+        "sample_test.shuffle_built_in_dual_input_long4_long2",
+        "sample_test.shuffle_built_in_dual_input_long4_long4",
+        "sample_test.shuffle_built_in_dual_input_long4_long8",
+        "sample_test.shuffle_built_in_dual_input_long4_long16",
+        "sample_test.shuffle_built_in_dual_input_long8_long2",
+        "sample_test.shuffle_built_in_dual_input_long8_long4",
+        "sample_test.shuffle_built_in_dual_input_long8_long8",
+        "sample_test.shuffle_built_in_dual_input_long8_long16",
+        "sample_test.shuffle_built_in_dual_input_long16_long2",
+        "sample_test.shuffle_built_in_dual_input_long16_long4",
+        "sample_test.shuffle_built_in_dual_input_long16_long8",
+        "sample_test.shuffle_built_in_dual_input_long16_long16",
+        "sample_test.shuffle_built_in_dual_input_ulong2_ulong2",
+        "sample_test.shuffle_built_in_dual_input_ulong2_ulong4",
+        "sample_test.shuffle_built_in_dual_input_ulong2_ulong8",
+        "sample_test.shuffle_built_in_dual_input_ulong2_ulong16",
+        "sample_test.shuffle_built_in_dual_input_ulong4_ulong2",
+        "sample_test.shuffle_built_in_dual_input_ulong4_ulong4",
+        "sample_test.shuffle_built_in_dual_input_ulong4_ulong8",
+        "sample_test.shuffle_built_in_dual_input_ulong4_ulong16",
+        "sample_test.shuffle_built_in_dual_input_ulong8_ulong2",
+        "sample_test.shuffle_built_in_dual_input_ulong8_ulong4",
+        "sample_test.shuffle_built_in_dual_input_ulong8_ulong8",
+        "sample_test.shuffle_built_in_dual_input_ulong8_ulong16",
+        "sample_test.shuffle_built_in_dual_input_ulong16_ulong2",
+        "sample_test.shuffle_built_in_dual_input_ulong16_ulong4",
+        "sample_test.shuffle_built_in_dual_input_ulong16_ulong8",
+        "sample_test.shuffle_built_in_dual_input_ulong16_ulong16",
+        "sample_test.shuffle_built_in_dual_input_float2_float2",
+        "sample_test.shuffle_built_in_dual_input_float2_float4",
+        "sample_test.shuffle_built_in_dual_input_float2_float8",
+        "sample_test.shuffle_built_in_dual_input_float2_float16",
+        "sample_test.shuffle_built_in_dual_input_float4_float2",
+        "sample_test.shuffle_built_in_dual_input_float4_float4",
+        "sample_test.shuffle_built_in_dual_input_float4_float8",
+        "sample_test.shuffle_built_in_dual_input_float4_float16",
+        "sample_test.shuffle_built_in_dual_input_float8_float2",
+        "sample_test.shuffle_built_in_dual_input_float8_float4",
+        "sample_test.shuffle_built_in_dual_input_float8_float8",
+        "sample_test.shuffle_built_in_dual_input_float8_float16",
+        "sample_test.shuffle_built_in_dual_input_float16_float2",
+        "sample_test.shuffle_built_in_dual_input_float16_float4",
+        "sample_test.shuffle_built_in_dual_input_float16_float8",
+        "sample_test.shuffle_built_in_dual_input_float16_float16",
+    };
+
+    log_info("test_relationals\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_relationals_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "sample_test.relational_bitselect_double",
+        "sample_test.relational_bitselect_double2",
+        "sample_test.relational_bitselect_double3",
+        "sample_test.relational_bitselect_double4",
+        "sample_test.relational_bitselect_double8",
+        "sample_test.relational_bitselect_double16",
+        "sample_test.relational_isequal_double",
+        "sample_test.relational_isequal_double2",
+        "sample_test.relational_isequal_double3",
+        "sample_test.relational_isequal_double4",
+        "sample_test.relational_isequal_double8",
+        "sample_test.relational_isequal_double16",
+        "sample_test.relational_isnotequal_double",
+        "sample_test.relational_isnotequal_double2",
+        "sample_test.relational_isnotequal_double3",
+        "sample_test.relational_isnotequal_double4",
+        "sample_test.relational_isnotequal_double8",
+        "sample_test.relational_isnotequal_double16",
+        "sample_test.relational_isgreater_double",
+        "sample_test.relational_isgreater_double2",
+        "sample_test.relational_isgreater_double3",
+        "sample_test.relational_isgreater_double4",
+        "sample_test.relational_isgreater_double8",
+        "sample_test.relational_isgreater_double16",
+        "sample_test.relational_isgreaterequal_double",
+        "sample_test.relational_isgreaterequal_double2",
+        "sample_test.relational_isgreaterequal_double3",
+        "sample_test.relational_isgreaterequal_double4",
+        "sample_test.relational_isgreaterequal_double8",
+        "sample_test.relational_isgreaterequal_double16",
+        "sample_test.relational_isless_double",
+        "sample_test.relational_isless_double2",
+        "sample_test.relational_isless_double3",
+        "sample_test.relational_isless_double4",
+        "sample_test.relational_isless_double8",
+        "sample_test.relational_isless_double16",
+        "sample_test.relational_islessequal_double",
+        "sample_test.relational_islessequal_double2",
+        "sample_test.relational_islessequal_double3",
+        "sample_test.relational_islessequal_double4",
+        "sample_test.relational_islessequal_double8",
+        "sample_test.relational_islessequal_double16",
+        "sample_test.relational_islessgreater_double",
+        "sample_test.relational_islessgreater_double2",
+        "sample_test.relational_islessgreater_double3",
+        "sample_test.relational_islessgreater_double4",
+        "sample_test.relational_islessgreater_double8",
+        "sample_test.relational_islessgreater_double16",
+        "sample_test.shuffle_built_in_double2_double2",
+        "sample_test.shuffle_built_in_double2_double4",
+        "sample_test.shuffle_built_in_double2_double8",
+        "sample_test.shuffle_built_in_double2_double16",
+        "sample_test.shuffle_built_in_double4_double2",
+        "sample_test.shuffle_built_in_double4_double4",
+        "sample_test.shuffle_built_in_double4_double8",
+        "sample_test.shuffle_built_in_double4_double16",
+        "sample_test.shuffle_built_in_double8_double2",
+        "sample_test.shuffle_built_in_double8_double4",
+        "sample_test.shuffle_built_in_double8_double8",
+        "sample_test.shuffle_built_in_double8_double16",
+        "sample_test.shuffle_built_in_double16_double2",
+        "sample_test.shuffle_built_in_double16_double4",
+        "sample_test.shuffle_built_in_double16_double8",
+        "sample_test.shuffle_built_in_double16_double16",
+        "sample_test.shuffle_built_in_dual_input_double2_double2",
+        "sample_test.shuffle_built_in_dual_input_double2_double4",
+        "sample_test.shuffle_built_in_dual_input_double2_double8",
+        "sample_test.shuffle_built_in_dual_input_double2_double16",
+        "sample_test.shuffle_built_in_dual_input_double4_double2",
+        "sample_test.shuffle_built_in_dual_input_double4_double4",
+        "sample_test.shuffle_built_in_dual_input_double4_double8",
+        "sample_test.shuffle_built_in_dual_input_double4_double16",
+        "sample_test.shuffle_built_in_dual_input_double8_double2",
+        "sample_test.shuffle_built_in_dual_input_double8_double4",
+        "sample_test.shuffle_built_in_dual_input_double8_double8",
+        "sample_test.shuffle_built_in_dual_input_double8_double16",
+        "sample_test.shuffle_built_in_dual_input_double16_double2",
+        "sample_test.shuffle_built_in_dual_input_double16_double4",
+        "sample_test.shuffle_built_in_dual_input_double16_double8",
+        "sample_test.shuffle_built_in_dual_input_double16_double16",
+    };
+
+    log_info("test_relationals_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_select (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "select_uchar_uchar",
+        "select_uchar2_uchar2",
+        "select_uchar3_uchar3",
+        "select_uchar4_uchar4",
+        "select_uchar8_uchar8",
+        "select_uchar16_uchar16",
+        "select_uchar_char",
+        "select_uchar2_char2",
+        "select_uchar3_char3",
+        "select_uchar4_char4",
+        "select_uchar8_char8",
+        "select_uchar16_char16",
+        "select_char_uchar",
+        "select_char2_uchar2",
+        "select_char3_uchar3",
+        "select_char4_uchar4",
+        "select_char8_uchar8",
+        "select_char16_uchar16",
+        "select_char_char",
+        "select_char2_char2",
+        "select_char3_char3",
+        "select_char4_char4",
+        "select_char8_char8",
+        "select_char16_char16",
+        "select_ushort_ushort",
+        "select_ushort2_ushort2",
+        "select_ushort3_ushort3",
+        "select_ushort4_ushort4",
+        "select_ushort8_ushort8",
+        "select_ushort16_ushort16",
+        "select_ushort_short",
+        "select_ushort2_short2",
+        "select_ushort3_short3",
+        "select_ushort4_short4",
+        "select_ushort8_short8",
+        "select_ushort16_short16",
+        "select_short_ushort",
+        "select_short2_ushort2",
+        "select_short3_ushort3",
+        "select_short4_ushort4",
+        "select_short8_ushort8",
+        "select_short16_ushort16",
+        "select_short_short",
+        "select_short2_short2",
+        "select_short3_short3",
+        "select_short4_short4",
+        "select_short8_short8",
+        "select_short16_short16",
+        "select_uint_uint",
+        "select_uint2_uint2",
+        "select_uint3_uint3",
+        "select_uint4_uint4",
+        "select_uint8_uint8",
+        "select_uint16_uint16",
+        "select_uint_int",
+        "select_uint2_int2",
+        "select_uint3_int3",
+        "select_uint4_int4",
+        "select_uint8_int8",
+        "select_uint16_int16",
+        "select_int_uint",
+        "select_int2_uint2",
+        "select_int3_uint3",
+        "select_int4_uint4",
+        "select_int8_uint8",
+        "select_int16_uint16",
+        "select_int_int",
+        "select_int2_int2",
+        "select_int3_int3",
+        "select_int4_int4",
+        "select_int8_int8",
+        "select_int16_int16",
+        "select_float_uint",
+        "select_float2_uint2",
+        "select_float3_uint3",
+        "select_float4_uint4",
+        "select_float8_uint8",
+        "select_float16_uint16",
+        "select_float_int",
+        "select_float2_int2",
+        "select_float3_int3",
+        "select_float4_int4",
+        "select_float8_int8",
+        "select_float16_int16",
+        "select_ulong_ulong",
+        "select_ulong2_ulong2",
+        "select_ulong3_ulong3",
+        "select_ulong4_ulong4",
+        "select_ulong8_ulong8",
+        "select_ulong16_ulong16",
+        "select_ulong_long",
+        "select_ulong2_long2",
+        "select_ulong3_long3",
+        "select_ulong4_long4",
+        "select_ulong8_long8",
+        "select_ulong16_long16",
+        "select_long_ulong",
+        "select_long2_ulong2",
+        "select_long3_ulong3",
+        "select_long4_ulong4",
+        "select_long8_ulong8",
+        "select_long16_ulong16",
+        "select_long_long",
+        "select_long2_long2",
+        "select_long3_long3",
+        "select_long4_long4",
+        "select_long8_long8",
+        "select_long16_long16",
+    };
+
+    log_info("test_select\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_select_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "select_double_ulong",
+        "select_double2_ulong2",
+        "select_double3_ulong3",
+        "select_double4_ulong4",
+        "select_double8_ulong8",
+        "select_double16_ulong16",
+        "select_double_long",
+        "select_double2_long2",
+        "select_double3_long3",
+        "select_double4_long4",
+        "select_double8_long8",
+        "select_double16_long16",
+    };
+
+    log_info("test_select_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+bool test_vec_align (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_char2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_char3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_char4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_char8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_char16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uchar2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uchar3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uchar4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uchar8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uchar16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_short2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_short3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_short4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_short8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_short16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ushort2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ushort3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ushort4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ushort8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ushort16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_int2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_int3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_int4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_int8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_int16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uint2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uint3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uint4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uint8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uint16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_long16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulong16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_float2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_float3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_float4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_float8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_float16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_char",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_charp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ucharp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_shortp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ushortp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_intp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_uintp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_longp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_ulongp",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_floatp",
+    };
+
+    log_info("vec_align\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+
+bool test_vec_align_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_double2",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_double3",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_double4",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_double8",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_double16",
+        "test_vec_align_packed_struct_arr.vec_align_packed_struct_arr_doublep",
+    };
+
+    log_info("vec_align_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+
+bool test_vec_step (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test_step_var.step_var_char",
+        "test_step_var.step_var_char2",
+        "test_step_var.step_var_char3",
+        "test_step_var.step_var_char4",
+        "test_step_var.step_var_char8",
+        "test_step_var.step_var_char16",
+        "test_step_var.step_var_uchar",
+        "test_step_var.step_var_uchar2",
+        "test_step_var.step_var_uchar3",
+        "test_step_var.step_var_uchar4",
+        "test_step_var.step_var_uchar8",
+        "test_step_var.step_var_uchar16",
+        "test_step_var.step_var_short",
+        "test_step_var.step_var_short2",
+        "test_step_var.step_var_short3",
+        "test_step_var.step_var_short4",
+        "test_step_var.step_var_short8",
+        "test_step_var.step_var_short16",
+        "test_step_var.step_var_ushort",
+        "test_step_var.step_var_ushort2",
+        "test_step_var.step_var_ushort3",
+        "test_step_var.step_var_ushort4",
+        "test_step_var.step_var_ushort8",
+        "test_step_var.step_var_ushort16",
+        "test_step_var.step_var_int",
+        "test_step_var.step_var_int2",
+        "test_step_var.step_var_int3",
+        "test_step_var.step_var_int4",
+        "test_step_var.step_var_int8",
+        "test_step_var.step_var_int16",
+        "test_step_var.step_var_uint",
+        "test_step_var.step_var_uint2",
+        "test_step_var.step_var_uint3",
+        "test_step_var.step_var_uint4",
+        "test_step_var.step_var_uint8",
+        "test_step_var.step_var_uint16",
+        "test_step_var.step_var_long",
+        "test_step_var.step_var_long2",
+        "test_step_var.step_var_long3",
+        "test_step_var.step_var_long4",
+        "test_step_var.step_var_long8",
+        "test_step_var.step_var_long16",
+        "test_step_var.step_var_ulong",
+        "test_step_var.step_var_ulong2",
+        "test_step_var.step_var_ulong3",
+        "test_step_var.step_var_ulong4",
+        "test_step_var.step_var_ulong8",
+        "test_step_var.step_var_ulong16",
+        "test_step_var.step_var_float",
+        "test_step_var.step_var_float2",
+        "test_step_var.step_var_float3",
+        "test_step_var.step_var_float4",
+        "test_step_var.step_var_float8",
+        "test_step_var.step_var_float16",
+    };
+
+    log_info("vec_step\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+bool test_vec_step_double (cl_device_id device, cl_uint size_t_width, const char *folder)
+{
+    static const char* test_name[] = {
+        "test_step_var.step_var_double",
+        "test_step_var.step_var_double2",
+        "test_step_var.step_var_double3",
+        "test_step_var.step_var_double4",
+        "test_step_var.step_var_double8",
+        "test_step_var.step_var_double16",
+    };
+
+    log_info("vec_step_double\n");
+    return test_suite(device, size_t_width, folder, test_name, sizeof(test_name) / sizeof(const char *), "cl_khr_fp64");
+}
+
+template<typename T>
+void getT(const TestResult& res, unsigned arg, T& out)
+{
+    out = *(T*)(res.kernelArgs().getArg(arg)->getBuffer());
+}
+
+class LinkageTestService {
+    std::vector<const char*> m_moduleNames;
+    const char* m_kernelName;
+    int m_expectedResult;
+    const char *m_name;
+
+public:
+    LinkageTestService(const char **moduleNames, int numModules,
+                       const char *kernelName) :
+    m_moduleNames(numModules),
+    m_kernelName(kernelName),
+    m_expectedResult(-1),
+    m_name(NULL) {
+        std::copy(moduleNames, moduleNames+numModules, m_moduleNames.begin());
+    }
+
+    void setExpectedResult(int expectedRes) {
+        m_expectedResult = expectedRes;
+    }
+
+    bool compareResult(cl_device_id dev, cl_uint width) {
+        clContextWrapper context;
+        clCommandQueueWrapper queue;
+        size_t num_modules = m_moduleNames.size();
+        std::vector<cl_program> programs(num_modules);
+        create_context_and_queue(dev, &context, &queue);
+
+        for (size_t i=0; i<num_modules; i++)
+        {
+            std::string filepath;
+            get_bc_file_path("compile_and_link", m_moduleNames[i], filepath, width);
+            programs[i] = create_program_from_bc(context, filepath);
+        }
+        // Linking to the modules together.
+        LinkTask linkTask(&programs[0], num_modules, context, dev);
+        if (!linkTask.execute()) {
+            std::cerr << "Failed due to the following link error: "
+                      << linkTask.getErrorLog() << std::endl;
+            return false;
+        }
+
+        // Running the Kernel.
+        cl_program exec = linkTask.getExecutable();
+        clKernelWrapper kernel = create_kernel_helper(exec, m_kernelName);
+        TestResult res;
+        WorkSizeInfo ws;
+        generate_kernel_data(context, kernel, ws, res);
+        run_kernel(kernel, queue, ws, res);
+
+        // Checking the result.
+        res.readToHost(queue);
+        int actual_value;
+        getT(res, 0, actual_value);
+        return (m_expectedResult == actual_value);
+    }
+
+    void setName(const char* name)
+    {
+        m_name = name;
+    }
+
+    const char* getName()const
+    {
+        return m_name;
+    }
+};
+
+bool test_compile_and_link (cl_device_id device, cl_uint width, const char *folder)
+{
+    try_extract(folder);
+    std::cout << "Running tests:" << std::endl;
+
+    // Each array represents a testcast in compile and link. The first element
+    // is the name of the 'main' module, as the second is the module being
+    // linked.
+    const char* private_files[]  = {"private_link", "private"};
+    const char* internal_files[] = {"internal_linkage", "internal_linkage.mod"};
+    const char* external_files[] = {"external_linkage", "external_linkage.mod"};
+    const char* available_externally_files[] = {"available_externally", "global"};
+
+    std::vector<LinkageTestService*> linkageTests;
+    linkageTests.push_back(new LinkageTestService(private_files, 2, "k"));
+    linkageTests.push_back(new LinkageTestService(internal_files, 2, "internal_linkage"));
+    linkageTests.push_back(new LinkageTestService(external_files, 2, "external_linkage"));
+    linkageTests.push_back(new LinkageTestService(available_externally_files, 2, "k"));
+    // Set tests Names.
+    linkageTests[0]->setName("private_linkage");
+    linkageTests[1]->setName("internal_linkage");
+    linkageTests[2]->setName("external_linkage");
+    linkageTests[3]->setName("available_externally");
+    // Set expected results.
+    linkageTests[0]->setExpectedResult(std::string("spir_conformance").size());
+    linkageTests[1]->setExpectedResult(1);
+    linkageTests[2]->setExpectedResult(42);
+    linkageTests[3]->setExpectedResult(42);
+
+    unsigned int tests_passed = 0;
+    CounterEventHandler SuccE(tests_passed, linkageTests.size());
+    std::list<std::string> ErrList;
+
+    for (size_t i=0; i<linkageTests.size(); i++)
+    {
+        AccumulatorEventHandler FailE(ErrList, linkageTests[i]->getName());
+        std::cout << linkageTests[i]->getName() << "..." << std::endl;
+        if(linkageTests[i]->compareResult(device, width))
+        {
+            (SuccE)(linkageTests[i]->getName(), "");
+            std::cout << linkageTests[i]->getName() << " passed." << std::endl;
+        }
+        else
+        {
+
+            (FailE)(linkageTests[i]->getName(), "");
+            std::cout << linkageTests[i]->getName() << " FAILED" << std::endl;
+        }
+    }
+
+    std::cout << std::endl;
+    std::cout << "PASSED " << tests_passed << " of " << SuccE.TN << " tests.\n" << std::endl;
+    // Deallocating.
+    std::for_each(linkageTests.begin(), linkageTests.end(), dealloc<LinkageTestService>);
+    return tests_passed == SuccE.TN;
+}
+
+static bool test_sampler_enumeration(cl_device_id device, cl_uint width, const char *folder)
+{
+  static const char* test_name[] = {
+      "sampler_NormF_AddrC_FilterL",
+      "sampler_NormF_AddrC_FilterN",
+      "sampler_NormF_AddrE_FilterL",
+      "sampler_NormF_AddrE_FilterN",
+      // "sampler_NormF_AddrM_FilterL" - Invalid combination
+      // "sampler_NormF_AddrM_FilterN" - Invalid combination
+      "sampler_NormF_AddrN_FilterL",
+      "sampler_NormF_AddrN_FilterN",
+      // "sampler_NormF_AddrR_FilterL" - Invalid combination
+      // "sampler_NormF_AddrR_FilterN" - Invalid combination
+      "sampler_NormT_AddrC_FilterL",
+      "sampler_NormT_AddrC_FilterN",
+      "sampler_NormT_AddrE_FilterL",
+      "sampler_NormT_AddrE_FilterN",
+      "sampler_NormT_AddrM_FilterL",
+      "sampler_NormT_AddrM_FilterN",
+      "sampler_NormT_AddrN_FilterL",
+      "sampler_NormT_AddrN_FilterN",
+      "sampler_NormT_AddrR_FilterL",
+      "sampler_NormT_AddrR_FilterN"
+  };
+
+  log_info("test_sampler_enum_values\n");
+  return test_suite(device, width, folder, test_name, sizeof(test_name) / sizeof(const char *), "");
+}
+
+const char* HOSTVAL_SAMPLER    = "hostval_sampler";
+const char* HOSTVAL_IMAGE_DESC = "hostval_image_desc";
+const char* HOSTVAL_IMAGE_DESC_3D = "hostval_image_desc_3d";
+
+static bool test_image_enumeration(cl_context context, cl_command_queue queue,
+                                   cl_program prog, cl_device_id device,
+                                   CounterEventHandler &SuccE, std::list<std::string> &ErrList)
+{
+    // Creating image descriptor value generator.
+    ImageValuesGenerator imgVals;
+    bool success = true;
+
+    for(ImageValuesGenerator::iterator it = imgVals.begin(), e = imgVals.end(); it != e; ++it)
+    {
+        bool currentSuccess = true;
+        AccumulatorEventHandler FailE(ErrList, it.toString());
+
+        std::string kernelName(HOSTVAL_IMAGE_DESC);
+        kernelName.append("_");
+        kernelName.append(it.getImageTypeName());
+
+        if (it.getImageTypeName() == "image3d")
+        {
+            // If the type is a 3D image we continue to the next one
+            continue;
+        }
+
+        // Saving the original image generator, for later restoration.
+        std::string baseGenName = it.getBaseImageGeneratorName();
+        KernelArgInfo baseInfo;
+        baseInfo.setTypeName(baseGenName.c_str());
+        DataGenerator *pDataGen = DataGenerator::getInstance();
+        KernelArgGenerator* pOrig = pDataGen->getArgGenerator(baseInfo);
+
+        try
+        {
+            // Creating the kernel for this specific enumeration.
+            WorkSizeInfo ws;
+            clKernelWrapper kernel = create_kernel_helper(prog, kernelName);
+
+            // Acquiring a reference to the image generator we need for this image
+            // type.
+            KernelArgInfo typedInfo;
+            const std::string tyName = it.getImageGeneratorName();
+            typedInfo.setTypeName(tyName.c_str());
+            KernelArgGeneratorImage* pImgGen = (KernelArgGeneratorImage*)pDataGen->getArgGenerator(typedInfo);
+
+            // If the channel order is not valid for the current image type, we
+            // continue to the next one.
+            if (!pImgGen->isValidChannelOrder(context, it.getOpenCLChannelOrder()))
+                continue;
+
+            // Due to unknown number of types at the beggining count them on the fly
+            SuccE.TN++;
+
+            // Configuring the image generator so it will produce the correct image
+            // descriptor.
+            pImgGen->setChannelOrder(it.getOpenCLChannelOrder());
+            pDataGen->setArgGenerator(baseInfo, pImgGen);
+
+            // Generate the arguments and run the kernel.
+            TestResult res;
+            generate_kernel_data(context, kernel, ws, res);
+            run_kernel(kernel, queue, ws, res);
+
+            // Informing the result.
+            std::cout << "enum_" << it.toString() << "..." << std::endl;
+            int actualOrder = 0, actualTy = 0;
+            getT<int>(res, 1U, actualOrder), getT<int>(res, 2U, actualTy);
+            if (actualOrder != it.getSPIRChannelOrder())
+            {
+                std::cout << " expected channel order: " << it.getSPIRChannelOrder()
+                          << " but received " << actualOrder << "." << std::endl;
+                success = currentSuccess = false;
+            }
+
+            if (actualTy != it.getDataType())
+            {
+                std::cout << " expected data type: " << it.getDataType()
+                          << " but received " << actualTy << "." << std::endl;
+                success = currentSuccess = false;
+            }
+
+            if (currentSuccess)
+            {
+                (SuccE)(it.toString(), kernelName);
+                std::cout << "enum_" << it.toString() << " passed." << std::endl;
+            }
+            else
+            {
+                (FailE)(it.toString(), kernelName);
+                std::cout << "enum_" << it.toString() << " FAILED" << std::endl;
+            }
+        } catch(std::exception e)
+        {
+            (FailE)(it.toString(), kernelName);
+            print_error(1, e.what());
+            success = currentSuccess = false;
+        }
+
+        // Restore the base image generator to its original value.
+        pDataGen->setArgGenerator(baseInfo, pOrig);
+    }
+
+    return success;
+}
+
+static bool test_image_enumeration_3d(cl_context context, cl_command_queue queue,
+                                   cl_program prog, cl_device_id device,
+                                   CounterEventHandler &SuccE, std::list<std::string> &ErrList)
+{
+    // Creating image descriptor value generator.
+    ImageValuesGenerator imgVals;
+    bool success = true;
+
+    for(ImageValuesGenerator::iterator it = imgVals.begin(), e = imgVals.end(); it != e; ++it)
+    {
+        bool currentSuccess = true;
+        AccumulatorEventHandler FailE(ErrList, it.toString());
+
+        std::string kernelName(HOSTVAL_IMAGE_DESC);
+        kernelName.append("_");
+        kernelName.append(it.getImageTypeName());
+
+        if (it.getImageTypeName() != "image3d")
+        {
+            // If the type is not a 3D image we continue to the next one
+            continue;
+        }
+
+        // Saving the original image generator, for later restoration.
+        std::string baseGenName = it.getBaseImageGeneratorName();
+        KernelArgInfo baseInfo;
+        baseInfo.setTypeName(baseGenName.c_str());
+        DataGenerator *pDataGen = DataGenerator::getInstance();
+        KernelArgGenerator* pOrig = pDataGen->getArgGenerator(baseInfo);
+
+        try
+        {
+            // Creating the kernel for this specific enumeration.
+            WorkSizeInfo ws;
+            clKernelWrapper kernel = create_kernel_helper(prog, kernelName);
+
+            // Acquiring a reference to the image generator we need for this image
+            // type.
+            KernelArgInfo typedInfo;
+            const std::string tyName = it.getImageGeneratorName();
+            typedInfo.setTypeName(tyName.c_str());
+            KernelArgGeneratorImage* pImgGen = (KernelArgGeneratorImage*)pDataGen->getArgGenerator(typedInfo);
+
+            // If the channel order is not valid for the current image type, we
+            // continue to the next one.
+            if (!pImgGen->isValidChannelOrder(context, it.getOpenCLChannelOrder()))
+                continue;
+
+            // Due to unknown number of types at the beggining count them on the fly
+            SuccE.TN++;
+
+            // Configuring the image generator so it will produce the correct image
+            // descriptor.
+            pImgGen->setChannelOrder(it.getOpenCLChannelOrder());
+            pDataGen->setArgGenerator(baseInfo, pImgGen);
+
+            // Generate the arguments and run the kernel.
+            TestResult res;
+            generate_kernel_data(context, kernel, ws, res);
+            run_kernel(kernel, queue, ws, res);
+
+            // Informing the result.
+            std::cout << "enum_" << it.toString() << "..." << std::endl;
+            int actualOrder = 0, actualTy = 0;
+            getT<int>(res, 1U, actualOrder), getT<int>(res, 2U, actualTy);
+            if (actualOrder != it.getSPIRChannelOrder())
+            {
+                std::cout << " expected channel order: " << it.getSPIRChannelOrder()
+                          << " but received " << actualOrder << "." << std::endl;
+                success = currentSuccess = false;
+            }
+
+            if (actualTy != it.getDataType())
+            {
+                std::cout << " expected data type: " << it.getDataType()
+                          << " but received " << actualTy << "." << std::endl;
+                success = currentSuccess = false;
+            }
+
+            if (currentSuccess)
+            {
+                (SuccE)(it.toString(), kernelName);
+                std::cout << "enum_" << it.toString() << " passed." << std::endl;
+            }
+            else
+            {
+                (FailE)(it.toString(), kernelName);
+                std::cout << "enum_" << it.toString() << " FAILED" << std::endl;
+            }
+        } catch(std::exception e)
+        {
+            (FailE)(it.toString(), kernelName);
+            print_error(1, e.what());
+            success = currentSuccess = false;
+        }
+
+        // Restore the base image generator to its original value.
+        pDataGen->setArgGenerator(baseInfo, pOrig);
+    }
+
+    return success;
+}
+
+static bool test_enum_values(cl_device_id device, cl_uint width, const char *folder)
+{
+    try_extract(folder);
+    std::cout << "Running tests:" << std::endl;
+    bool success = true;
+    typedef bool (*EnumTest)(cl_context, cl_command_queue, cl_program, cl_device_id, CounterEventHandler &SuccE, std::list<std::string> &ErrList);
+    EnumTest test_functions[] = { test_image_enumeration, test_image_enumeration_3d };
+    const char *enum_tests[] = { HOSTVAL_IMAGE_DESC, HOSTVAL_IMAGE_DESC_3D };
+    const size_t TEST_NUM = sizeof(enum_tests)/sizeof(char*);
+
+    unsigned int tests_passed = 0;
+    CounterEventHandler SuccE(tests_passed, 0);
+    std::list<std::string> ErrList;
+
+    // Composing the name of the CSV file.
+    char* dir = get_exe_dir();
+    std::string csvName(dir);
+    csvName.append(dir_sep());
+    csvName.append("khr.csv");
+    free(dir);
+
+    // Figure out whether the test can run on the device. If not, we skip it.
+    const KhrSupport& khrDb = *KhrSupport::get(csvName);
+
+    for (size_t i=0; i<TEST_NUM; i++)
+    {
+        const char *cur_test = enum_tests[i];
+        cl_bool images = khrDb.isImagesRequired(folder, cur_test);
+        cl_bool images3D = khrDb.isImages3DRequired(folder, cur_test);
+        if(images == CL_TRUE && checkForImageSupport(device) != 0)
+        {
+            std::cout << cur_test << " Skipped. (Cannot run on device due to Images is not supported)." << std::endl;
+            continue;
+        }
+
+        if(images3D == CL_TRUE && checkFor3DImageSupport(device) != 0)
+        {
+            std::cout << cur_test << " Skipped. (Cannot run on device as 3D images are not supported)." << std::endl;
+            continue;
+        }
+
+        std::string bc_file_path;
+        get_bc_file_path(folder, cur_test, bc_file_path, width);
+        clContextWrapper context;
+        clCommandQueueWrapper queue;
+        create_context_and_queue(device, &context, &queue);
+        clProgramWrapper bcprog = create_program_from_bc(context, bc_file_path);
+
+        // Build the kernel.
+        SpirBuildTask build_task(bcprog, device, "-x spir -spir-std=1.2 -cl-kernel-arg-info");
+        if (!build_task.execute())
+        {
+            std::cerr << "Cannot run enum_values suite due to the "
+                      << "following build error: "
+                      << build_task.getErrorLog()
+                      << std::endl;
+            return false;
+        }
+
+        success &= test_functions[i](context, queue, bcprog, device, SuccE, ErrList);
+    }
+
+    std::cout << std::endl;
+    std::cout << "PASSED " << tests_passed << " of " << SuccE.TN << " tests.\n" << std::endl;
+
+    if (!ErrList.empty())
+    {
+        std::cout << "Failed tests:" << std::endl;
+        std::for_each(ErrList.begin(), ErrList.end(), printError);
+    }
+    std::cout << std::endl;
+    return success;
+}
+
+std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems)
+{
+    std::stringstream ss(s);
+    std::string item;
+    while (std::getline(ss, item, delim)) {
+        elems.push_back(item);
+    }
+    return elems;
+}
+
+
+static bool
+test_kernel_attributes(cl_device_id device, cl_uint width, const char *folder)
+{
+    try_extract(folder);
+    std::cout << "Running tests:" << std::endl;
+    bool success = true;
+    clContextWrapper context;
+    std::string bc_file_path;
+    clCommandQueueWrapper queue;
+    clKernelWrapper kernel;
+    char attributes[256] = {0};
+    size_t i, res_size = 0;
+
+    unsigned int tests_passed = 0;
+    CounterEventHandler SuccE(tests_passed, 1);
+    std::list<std::string> ErrList;
+    std::string test_name("kernel_attributes");
+
+    log_info("kernel_attributes...\n");
+    AccumulatorEventHandler FailE(ErrList, test_name);
+
+    try
+    {
+        create_context_and_queue(device, &context, &queue);
+        get_bc_file_path(folder, "kernel_attributes", bc_file_path, width);
+        clProgramWrapper bcprog = create_program_from_bc(context, bc_file_path);
+
+        // Building the program, so we could create the kernel.
+        SpirBuildTask build_task(bcprog, device, "-x spir -spir-std=1.2 -cl-kernel-arg-info");
+        if (!build_task.execute())
+        {
+            std::cerr << "Cannot run kernel_attributes suite due to the following build error: "
+                      << build_task.getErrorLog()
+                      << std::endl;
+            throw std::exception();
+        }
+
+        // Querying the kernel for its attributes.
+        kernel = create_kernel_helper(bcprog, "test");
+        cl_int err_code = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, sizeof(attributes), attributes, &res_size);
+        if (err_code != CL_SUCCESS)
+        {
+            std::cerr << "clGetKernelInfo unable retrieve kernel attributes (error code: " << err_code << " )\n";
+            throw std::exception();
+        }
+
+        // Building the expected attributes vector.
+        std::vector<std::string> expected;
+        expected.push_back(std::string("work_group_size_hint(64,1,1)"));
+        expected.push_back(std::string("vec_type_hint(float4)"));
+
+        std::vector<std::string> actual;
+        split(attributes, ' ', actual);
+
+        for(i = 0; i < expected.size(); ++i)
+        {
+            if(std::find(actual.begin(), actual.end(), expected[i]) == actual.end())
+            {
+                // Attribute not found
+                std::cout << "Extracted from kernel: " << attributes << std::endl;
+                std::cerr << "expected " << expected[i] << " attribute not found" << std::endl;
+                throw std::exception();
+            }
+        }
+        (SuccE)(test_name, "");
+        log_info("kernel_attributes passed.\n");
+    } catch(std::exception e)
+    {
+        (FailE)(test_name, "");
+        log_info("kernel_attributes FAILED\n");
+        success = false;
+    }
+
+    std::cout << std::endl;
+    std::cout << "PASSED " << tests_passed << " of " << 1 << " tests.\n" << std::endl;
+
+    if (!ErrList.empty())
+    {
+        std::cout << "Failed tests:" << std::endl;
+        std::for_each(ErrList.begin(), ErrList.end(), printError);
+    }
+    std::cout << std::endl;
+    return success;
+}
+
+static bool test_binary_type(cl_device_id device, cl_uint width, const char *folder)
+{
+    std::string bc_file_path;
+    clContextWrapper context;
+    clCommandQueueWrapper queue;
+
+    // Extract the suite if needed.
+    try_extract(folder);
+    std::cout << "Running tests:" << std::endl;
+    bool success = true;
+    unsigned int tests_passed = 0;
+    CounterEventHandler SuccE(tests_passed, 1);
+    std::list<std::string> ErrList;
+    std::string test_name("binary_type");
+
+    log_info("binary_type...\n");
+    AccumulatorEventHandler FailE(ErrList, test_name);
+
+    try
+    {
+        // Creating the program object.
+        get_bc_file_path(folder, "simple", bc_file_path, width);
+        create_context_and_queue(device, &context, &queue);
+        clProgramWrapper clprog = create_program_from_bc(context, bc_file_path);
+
+        // Checking the attribute matches the requierment in Section 9.15.2 of the
+        // extensions SPEC.
+        cl_int binary_type = 0;
+        size_t ret_size = 0;
+        if (cl_int err_code = clGetProgramBuildInfo(clprog, device, CL_PROGRAM_BINARY_TYPE, sizeof(cl_int), &binary_type, &ret_size))
+        {
+            std::cerr << "Cannot run test_binary_type suite due to the "
+                      << "following build error: "
+                      << err_code << std::endl;
+            throw std::exception();
+        }
+
+        assert(ret_size == sizeof(cl_int) && "Return size doesn't match.");
+        if (binary_type != CL_PROGRAM_BINARY_TYPE_INTERMEDIATE)
+        {
+            std::cerr << "binary type is " << binary_type
+                      << " as opposed to " << CL_PROGRAM_BINARY_TYPE_INTERMEDIATE
+                      << " which is the expected value." << std::endl;
+            throw std::exception();
+        }
+        (SuccE)(test_name, "");
+        log_info("binary_type passed.\n");
+    } catch(std::exception e)
+    {
+        (FailE)(test_name, "");
+        log_info("binary_type FAILED\n");
+        success = false;
+    }
+
+
+    std::cout << std::endl;
+    std::cout << "PASSED " << tests_passed << " of " << 1 << " tests.\n" << std::endl;
+
+    if (!ErrList.empty())
+    {
+        std::cout << "Failed tests:" << std::endl;
+        std::for_each(ErrList.begin(), ErrList.end(), printError);
+    }
+    std::cout << std::endl;
+    return success;
+}
+
+struct sub_suite
+{
+    const char *name;
+    const char *folder;
+    const testfn test_function;
+};
+
+static const sub_suite spir_suites[] = {
+    {"api",                         "api",                       test_api},
+    {"api_double",                  "api",                       test_api_double},
+    {"atomics",                     "atomics",                   test_atomics},
+    {"basic",                       "basic",                     test_basic},
+    {"basic_double",                "basic",                     test_basic_double},
+    {"commonfns",                   "commonfns",                 test_commonfns},
+    {"commonfns_double",            "commonfns",                 test_commonfns_double},
+    {"conversions",                 "conversions",               test_conversions},
+    {"conversions_double",          "conversions",               test_conversions_double},
+    {"geometrics",                  "geometrics",                test_geometrics},
+    {"geometrics_double",           "geometrics",                test_geometrics_double},
+    {"half",                        "half",                      test_half},
+    {"half_double",                 "half",                      test_half_double},
+    {"kernel_image_methods",        "kernel_image_methods",      test_kernel_image_methods},
+    {"images_kernel_read_write",    "images_kernel_read_write",  test_images_kernel_read_write},
+    {"images_samplerlessRead",      "images_samplerlessRead",    test_images_samplerless_read},
+    {"integer_ops",                 "integer_ops",               test_integer_ops},
+    {"math_brute_force",            "math_brute_force",          test_math_brute_force},
+    {"math_brute_force_double",     "math_brute_force",          test_math_brute_force_double},
+    {"printf",                      "printf",                    test_printf},
+    {"profiling",                   "profiling",                 test_profiling},
+    {"relationals",                 "relationals",               test_relationals},
+    {"relationals_double",          "relationals",               test_relationals_double},
+    {"select",                      "select",                    test_select},
+    {"select_double",               "select",                    test_select_double},
+    {"vec_align",                   "vec_align",                 test_vec_align},
+    {"vec_align_double",            "vec_align",                 test_vec_align_double},
+    {"vec_step",                    "vec_step",                  test_vec_step},
+    {"vec_step_double",             "vec_step",                  test_vec_step_double},
+    {"compile_and_link",            "compile_and_link",          test_compile_and_link},
+    {"sampler_enumeration",         "sampler_enumeration",       test_sampler_enumeration},
+    {"enum_values",                 "enum_values",               test_enum_values},
+    {"kernel_attributes",           "kernel_attributes",         test_kernel_attributes},
+    {"binary_type",                  "binary_type",              test_binary_type},
+};
+
+
+/**
+Utility function using to find a specific sub-suite name in the SPIR tests.
+Called in case the user asked for running a specific sub-suite or specific tests.
+ */
+static int find_suite_name (std::string suite_name)
+{
+    for (unsigned int i = 0; i < sizeof(spir_suites) / sizeof(sub_suite); ++i)
+    {
+        if (0 == suite_name.compare(spir_suites[i].name))
+        {
+            return i;
+        }
+    }
+    return -1;
+}
+
+
+/**
+Look for the first device from the first platform .
+ */
+cl_device_id get_platform_device (cl_device_type device_type, cl_uint choosen_device_index, cl_uint choosen_platform_index)
+{
+    int error = CL_SUCCESS;
+    cl_uint num_platforms = 0;
+    cl_platform_id *platforms;
+    cl_uint num_devices = 0;
+    cl_device_id *devices = NULL;
+
+    /* Get the platform */
+    error = clGetPlatformIDs(0, NULL, &num_platforms);
+    if ( error != CL_SUCCESS )
+    {
+        throw std::runtime_error("clGetPlatformIDs failed: " + std::string(IGetErrorString(error)));
+    }
+    if ( choosen_platform_index >= num_platforms )
+    {
+        throw std::runtime_error("platform index out of range");
+    }
+
+    platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
+    if ( !platforms )
+    {
+        throw std::runtime_error("platform malloc failed");
+    }
+    BufferOwningPtr<cl_platform_id> platformsBuf(platforms);
+
+    error = clGetPlatformIDs(num_platforms, platforms, NULL);
+    if ( error != CL_SUCCESS )
+    {
+        throw std::runtime_error("clGetPlatformIDs failed: " + std::string(IGetErrorString(error)));
+    }
+
+    /* Get the number of requested devices */
+    error = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, 0, NULL, &num_devices );
+    if ( error != CL_SUCCESS )
+    {
+        throw std::runtime_error("clGetDeviceIDs failed: " + std::string(IGetErrorString(error)));
+    }
+    if ( choosen_device_index >= num_devices )
+    {
+        throw std::runtime_error("device index out of rangen");
+    }
+
+    devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
+    if ( !devices )
+    {
+        throw std::runtime_error("device malloc failed");
+    }
+    BufferOwningPtr<cl_device_id> devicesBuf(devices);
+
+    /* Get the requested device */
+    error = clGetDeviceIDs(platforms[choosen_platform_index],  device_type, num_devices, devices, NULL );
+    if ( error != CL_SUCCESS )
+    {
+        throw std::runtime_error("clGetDeviceIDs failed: " + std::string(IGetErrorString(error)));
+    }
+
+    return devices[choosen_device_index];
+}
+
+
+/**
+ Parses the command line parameters and set the
+ appropriate global variables accordingly
+ The valid options are:
+    a) none - run all SPIR tests
+    b) one argument (tests-suite name) - run one SPIR tests-suite
+    c) two arguments (tests-suite name and test name) - run one SPIR test
+ */
+static int ParseCommandLine (int argc, const char *argv[],
+    std::string& suite_name, std::string& test_name, cl_device_type *device_type, cl_uint *device_index, cl_uint *platform_index, cl_uint *size_t_width)
+{
+    int based_on_env_var = 0;
+
+    /* Check for environment variable to set device type */
+    char *env_mode = getenv( "CL_DEVICE_TYPE" );
+    if( env_mode != NULL )
+    {
+        based_on_env_var = 1;
+        if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
+            *device_type = CL_DEVICE_TYPE_GPU;
+        else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
+            *device_type = CL_DEVICE_TYPE_CPU;
+        else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+            *device_type = CL_DEVICE_TYPE_ACCELERATOR;
+        else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+            *device_type = CL_DEVICE_TYPE_DEFAULT;
+        else
+        {
+            throw Exceptions::CmdLineError( "Unknown CL_DEVICE_TYPE env variable setting\n");
+        }
+    }
+
+    env_mode = getenv( "CL_DEVICE_INDEX" );
+    if( env_mode != NULL )
+    {
+        *device_index = atoi(env_mode);
+    }
+
+    env_mode = getenv( "CL_PLATFORM_INDEX" );
+    if( env_mode != NULL )
+    {
+        *platform_index = atoi(env_mode);
+    }
+
+        /* Process the command line arguments */
+
+    /* Special case: just list the tests */
+    if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
+    {
+        log_info( "Usage: %s [<suite name>] [pid<num>] [id<num>] [<device type>] [w32] [no-unzip]\n", argv[0] );
+        log_info( "\t<suite name>\tOne or more of: (default all)\n");
+        log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
+        log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
+        log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
+        log_info( "\tw32\t\tIndicates device address bits is 32.\n" );
+        log_info( "\tno-unzip\t\tDo not extract test files from Zip; use existing.\n" );
+
+        for( unsigned int i = 0; i < (sizeof(spir_suites) / sizeof(sub_suite)); i++ )
+        {
+            log_info( "\t\t%s\n", spir_suites[i].name );
+        }
+        return 0;
+    }
+
+    /* Do we have a CPU/GPU specification? */
+    while( argc > 1 )
+    {
+        if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
+        {
+            *device_type = CL_DEVICE_TYPE_GPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
+        {
+            *device_type = CL_DEVICE_TYPE_CPU;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+        {
+            *device_type = CL_DEVICE_TYPE_ACCELERATOR;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+        {
+            *device_type = CL_DEVICE_TYPE_DEFAULT;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "w32" ) == 0 )
+        {
+            *size_t_width = 32;
+            argc--;
+        }
+        else if( strcmp( argv[ argc - 1 ], "no-unzip" ) == 0 )
+        {
+            no_unzip = 1;
+            argc--;
+        }
+        else break;
+    }
+
+    /* Did we choose a specific device index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
+        {
+            *device_index = atoi( &(argv[ argc - 1 ][2]) );
+            argc--;
+        }
+    }
+
+    /* Did we choose a specific platform index? */
+    if( argc > 1 )
+    {
+        if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
+        {
+            *platform_index = atoi( &(argv[ argc - 1 ][3]) );
+            argc--;
+        }
+    }
+
+    switch( *device_type )
+    {
+        case CL_DEVICE_TYPE_GPU:
+            log_info( "Requesting GPU device " );
+            break;
+        case CL_DEVICE_TYPE_CPU:
+            log_info( "Requesting CPU device " );
+            break;
+        case CL_DEVICE_TYPE_ACCELERATOR:
+            log_info( "Requesting Accelerator device " );
+            break;
+        case CL_DEVICE_TYPE_DEFAULT:
+            log_info( "Requesting Default device " );
+            break;
+        default:
+            throw Exceptions::CmdLineError( "Requesting unknown device ");
+            break;
+    }
+    log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
+    log_info( "for platform index %d and device index %d\n", *platform_index, *device_index);
+
+    if (argc > 3)
+    {
+        throw Exceptions::CmdLineError("Command line error. Unrecognized token\n");
+    }
+    else {
+        if (argc > 1)
+        {
+            suite_name.assign(argv[1]);
+        }
+        if (argc == 3)
+        {
+            test_name.assign(argv[2]);
+        }
+    }
+
+    return 1;
+}
+
+struct WLMsg: EventHandler
+{
+    const char* Msg;
+
+    WLMsg(const char* M): Msg(M){}
+
+    void operator()(const std::string& T, const std::string& K)
+    {
+        std::cout << "Test " << T << " Kernel " << K << "\t" << Msg << std::endl;
+    }
+};
+
+
+int main (int argc, const char* argv[])
+{
+    std::string test_suite_name;                       // name of the selected tests-suite (NULL for all)
+    std::string test_file_name;                        // name of the .selected test (NULL for all)
+    cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
+    cl_uint choosen_device_index = 0;
+    cl_uint choosen_platform_index = 0;
+    cl_uint size_t_width = 0;                            // device address bits (32 or 64).
+    cl_int err;
+    int failed = 0;
+    int ntests = 0;
+    custom_cout atf_info;
+    custom_cerr atf_error;
+    override_buff atf_cout(std::cout, atf_info);
+    override_buff atf_err(std::cerr, atf_error);
+
+    WLMsg Success("\t\tPassed"), Failure("\t\tFailure");
+    try
+    {
+        if (ParseCommandLine(argc, argv, test_suite_name, test_file_name, &device_type, &choosen_device_index, &choosen_platform_index, &size_t_width) == 0)
+            return 0;
+
+        cl_device_id device = get_platform_device(device_type, choosen_device_index, choosen_platform_index);
+        printDeviceHeader(device);
+
+        std::vector<float> versions;
+        get_spir_version(device, versions);
+        if (!is_extension_available( device, "cl_khr_spir") ||
+            std::find(versions.begin(), versions.end(), 1.2f) == versions.end())
+        {
+            log_info("Spir extension version 1.2 is not supported by the device\n");
+            return 0;
+        }
+
+        // size_t_width <> 0 - device address bits is forced by command line argument
+        if ((0 == size_t_width) && ((err = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &size_t_width, NULL))))
+        {
+            print_error( err, "Unable to obtain device address bits" );
+            return -1;
+        }
+
+        if (! test_suite_name.empty())
+        {
+            // command line is not empty - do not run all the tests
+            int tsn = find_suite_name(test_suite_name);
+            ntests = 1;
+            if (tsn < 0)
+            {
+                throw Exceptions::CmdLineError("Command line error. Error in SPIR sub-suite name\n");
+            }
+            else if (test_file_name.empty())
+            {
+                if (!spir_suites[tsn].test_function(device, size_t_width, spir_suites[tsn].folder))
+                    failed++;
+            }
+            else
+            {
+                OclExtensions devExt = OclExtensions::getDeviceCapabilities(device);
+                TestRunner runner(&Success, &Failure, devExt);
+                std::string folder = getTestFolder(test_suite_name.c_str());
+                try_extract(folder.c_str());
+                if (!runner.runBuildTest(device, folder.c_str(), test_file_name.c_str(), size_t_width))
+                    failed++;
+            }
+        }
+        else
+        {
+            // Run all the tests
+            ntests = (sizeof(spir_suites) / sizeof(spir_suites[0]));
+            for (unsigned int i = 0; i < ntests; ++i)
+            {
+                if (!spir_suites[i].test_function(device, size_t_width, spir_suites[i].folder))
+                    failed++;
+            }
+        }
+        if (failed)
+            std::cout << "FAILED " << failed << " of " << ntests << " test suites.\n" << std::endl;
+        else
+            std::cout << "PASSED " << ntests << " of " << ntests << " test suites.\n" << std::endl;
+        return failed;
+    }
+    catch(const Exceptions::CmdLineError& e)
+    {
+        print_error(1, e.what());
+        return 1;
+    }
+    catch(const std::runtime_error& e)
+    {
+        print_error(2, e.what());
+        return 2;
+    }
+    catch(const std::exception& e)
+    {
+        print_error(3, e.what());
+        return 3;
+    }
+}
+
diff --git a/test_conformance/spir/math_brute_force.zip b/test_conformance/spir/math_brute_force.zip
new file mode 100644
index 00000000..b27a4248
Binary files /dev/null and b/test_conformance/spir/math_brute_force.zip differ
diff --git a/test_conformance/spir/printf.zip b/test_conformance/spir/printf.zip
new file mode 100644
index 00000000..7f10759e
Binary files /dev/null and b/test_conformance/spir/printf.zip differ
diff --git a/test_conformance/spir/profiling.zip b/test_conformance/spir/profiling.zip
new file mode 100644
index 00000000..33ac3396
Binary files /dev/null and b/test_conformance/spir/profiling.zip differ
diff --git a/test_conformance/spir/relationals.zip b/test_conformance/spir/relationals.zip
new file mode 100644
index 00000000..95439ab8
Binary files /dev/null and b/test_conformance/spir/relationals.zip differ
diff --git a/test_conformance/spir/run_build_test.cpp b/test_conformance/spir/run_build_test.cpp
new file mode 100644
index 00000000..c00fcf82
--- /dev/null
+++ b/test_conformance/spir/run_build_test.cpp
@@ -0,0 +1,422 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <sstream>
+#include <fstream>
+#include <assert.h>
+#include <functional>
+#include <memory>
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/clImageHelper.h"
+#include "../../test_common/harness/os_helpers.h"
+
+#include "exceptions.h"
+#include "kernelargs.h"
+#include "datagen.h"
+#include "run_services.h"
+#include "run_build_test.h"
+
+//
+// Task
+//
+Task::Task(cl_device_id device, const char* options):
+m_devid(device) {
+  if (options)
+    m_options = options;
+}
+
+Task::~Task() {}
+
+const char* Task::getErrorLog() const {
+  return m_log.c_str();
+}
+
+void Task::setErrorLog(cl_program prog) {
+    size_t len = 0;
+    std::vector<char> log;
+
+    cl_int err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, 0, NULL, &len);
+    if(err_code != CL_SUCCESS)
+    {
+        m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &len) failed.\n";
+        return;
+    }
+
+    log.resize(len, 0);
+
+    err_code = clGetProgramBuildInfo(prog, m_devid, CL_PROGRAM_BUILD_LOG, len, &log[0], NULL);
+    if(err_code != CL_SUCCESS)
+    {
+        m_log = "Error: clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG, &log) failed.\n";
+        return;
+    }
+    m_log.append(&log[0]);
+}
+
+//
+// BuildTask
+//
+BuildTask::BuildTask(cl_program prog, cl_device_id dev, const char* options) :
+    m_program(prog), Task(dev, options) {}
+
+bool BuildTask::execute() {
+    cl_int err_code = clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL);
+    if(CL_SUCCESS == err_code)
+        return true;
+
+    setErrorLog(m_program);
+    return false;
+}
+
+//
+// SpirBuildTask
+//
+SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev, const char* options) :
+    BuildTask(prog, dev, options) {}
+
+//
+// CompileTask
+//
+
+CompileTask::CompileTask(cl_program prog, cl_device_id dev, const char* options) :
+    m_program(prog), Task(dev, options) {}
+
+void CompileTask::addHeader(const char* hname, cl_program hprog) {
+    m_headers.push_back(std::make_pair(hname, hprog));
+}
+
+const char* first(std::pair<const char*,cl_program>& p) {
+    return p.first;
+}
+
+cl_program second(const std::pair<const char*, cl_program>& p) {
+    return p.second;
+}
+
+bool CompileTask::execute() {
+    // Generating the header names vector.
+    std::vector<const char*> names;
+    std::transform(m_headers.begin(), m_headers.end(), names.begin(), first);
+
+    // Generating the header programs vector.
+    std::vector<cl_program> programs;
+    std::transform(m_headers.begin(), m_headers.end(), programs.begin(), second);
+
+    const char** h_names = NULL;
+    const cl_program* h_programs = NULL;
+    if (!m_headers.empty())
+    {
+        h_programs = &programs[0];
+        h_names    = &names[0];
+    }
+
+    // Compiling with the headers.
+    cl_int err_code = clCompileProgram(
+        m_program,
+        1U,
+        &m_devid,
+        m_options.c_str(),
+        m_headers.size(), // # of headers
+        h_programs,
+        h_names,
+        NULL, NULL);
+    if (CL_SUCCESS == err_code)
+        return true;
+
+    setErrorLog(m_program);
+    return false;
+}
+
+//
+// SpirCompileTask
+//
+SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev, const char* options) :
+    CompileTask(prog, dev, options) {}
+
+
+//
+// LinkTask
+//
+LinkTask::LinkTask(cl_program* programs, int num_programs, cl_context ctxt,
+                   cl_device_id dev, const char* options) :
+    m_programs(programs), m_numPrograms(num_programs), m_context(ctxt), m_executable(NULL),
+    Task(dev, options) {}
+
+bool LinkTask::execute() {
+    cl_int err_code;
+    int i;
+
+    for(i = 0; i < m_numPrograms; ++i)
+    {
+        err_code = clCompileProgram(m_programs[i], 1, &m_devid, "-x spir -spir-std=1.2 -cl-kernel-arg-info", 0, NULL, NULL, NULL, NULL);
+        if (CL_SUCCESS != err_code)
+        {
+            setErrorLog(m_programs[i]);
+            return false;
+        }
+    }
+
+    m_executable = clLinkProgram(m_context, 1, &m_devid, m_options.c_str(), m_numPrograms, m_programs, NULL, NULL, &err_code);
+    if (CL_SUCCESS == err_code)
+      return true;
+
+    if(m_executable) setErrorLog(m_executable);
+    return false;
+}
+
+cl_program LinkTask::getExecutable() const {
+    return m_executable;
+}
+
+LinkTask::~LinkTask() {
+    if(m_executable) clReleaseProgram(m_executable);
+}
+
+//
+// KernelEnumerator
+//
+void KernelEnumerator::process(cl_program prog) {
+    const size_t MAX_KERNEL_NAME = 64;
+    size_t num_kernels;
+
+    cl_int err_code = clGetProgramInfo(
+        prog,
+        CL_PROGRAM_NUM_KERNELS,
+        sizeof(size_t),
+        &num_kernels,
+        NULL
+    );
+    if (CL_SUCCESS != err_code)
+        return;
+
+    // Querying for the number of kernels.
+    size_t buffer_len = sizeof(char)*num_kernels*MAX_KERNEL_NAME;
+    char* kernel_names = new char[buffer_len];
+    memset(kernel_names, '\0', buffer_len);
+    size_t str_len = 0;
+    err_code = clGetProgramInfo(
+        prog,
+        CL_PROGRAM_KERNEL_NAMES,
+        buffer_len,
+        (void *)kernel_names,
+        &str_len
+    );
+    if (CL_SUCCESS != err_code)
+        return;
+
+    //parsing the names and inserting them to the list
+    std::string names(kernel_names);
+    assert (str_len == 1+names.size() && "incompatible string lengths");
+    size_t offset = 0;
+    for(size_t i=0 ; i<names.size() ; ++i){
+        //kernel names are separated by semi colons
+        if (names[i] == ';'){
+            m_kernels.push_back(names.substr(offset, i-offset));
+            offset = i+1;
+        }
+    }
+    m_kernels.push_back(names.substr(offset, names.size()-offset));
+    delete[] kernel_names;
+}
+
+KernelEnumerator::KernelEnumerator(cl_program prog) {
+    process(prog);
+}
+
+KernelEnumerator::iterator KernelEnumerator::begin(){
+    return m_kernels.begin();
+}
+
+KernelEnumerator::iterator KernelEnumerator::end(){
+    return m_kernels.end();
+}
+
+size_t KernelEnumerator::size() const {
+    return m_kernels.size();
+}
+
+/**
+ Run the single test - run the test for both CL and SPIR versions of the kernel
+ */
+static bool run_test(cl_context context, cl_command_queue queue, cl_program clprog,
+    cl_program bcprog, const std::string& kernel_name, std::string& err, const cl_device_id device)
+{
+    WorkSizeInfo ws;
+    TestResult cl_result;
+    std::auto_ptr<TestResult> bc_result;
+    // first, run the single CL test
+    {
+        // make sure that the kernel will be released before the program
+        clKernelWrapper kernel = create_kernel_helper(clprog, kernel_name);
+        // based on the kernel characteristics, we are generating and initializing the arguments for both phases (cl and bc executions)
+        generate_kernel_data(context, kernel, ws, cl_result);
+        bc_result.reset(cl_result.clone(context, ws, kernel, device));
+        assert (compare_results(cl_result, *bc_result) && "not equal?");
+        run_kernel( kernel, queue, ws, cl_result );
+    }
+    // now, run the single BC test
+    {
+        // make sure that the kernel will be released before the program
+        clKernelWrapper kernel = create_kernel_helper(bcprog, kernel_name);
+        run_kernel( kernel, queue, ws, *bc_result );
+    }
+
+    int error = clFinish(queue);
+    if( CL_SUCCESS != error)
+    {
+        err = "clFinish failed\n";
+        return false;
+    }
+
+    // compare the results
+    if( !compare_results(cl_result, *bc_result) )
+    {
+        err = " (result diff in kernel '" + kernel_name + "').";
+        return false;
+    }
+    return true;
+}
+
+TestRunner::TestRunner(EventHandler *success, EventHandler *failure,
+                       const OclExtensions& devExt):
+    m_successHandler(success), m_failureHandler(failure), m_devExt(&devExt) {}
+
+/**
+ Based on the test name build the cl file name, the bc file name and execute
+ the kernel for both modes (cl and bc).
+ */
+bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
+                              const char *test_name, cl_uint size_t_width)
+{
+    int failures = 0;
+    // Composing the name of the CSV file.
+    char* dir = get_exe_dir();
+    std::string csvName(dir);
+    csvName.append(dir_sep());
+    csvName.append("khr.csv");
+    free(dir);
+
+    log_info("%s...\n", test_name);
+
+    // Figure out whether the test can run on the device. If not, we skip it.
+    const KhrSupport& khrDb = *KhrSupport::get(csvName);
+    cl_bool images = khrDb.isImagesRequired(folder, test_name);
+    cl_bool images3D = khrDb.isImages3DRequired(folder, test_name);
+
+    char deviceProfile[64];
+    clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(deviceProfile), &deviceProfile, NULL);
+    std::string device_profile(deviceProfile, 64);
+
+    if(images == CL_TRUE && checkForImageSupport(device) != 0)
+    {
+        (*m_successHandler)(test_name, "");
+        std::cout << "Skipped. (Cannot run on device due to Images is not supported)." << std::endl;
+        return true;
+    }
+
+    if(images3D == CL_TRUE && checkFor3DImageSupport(device) != 0)
+    {
+        (*m_successHandler)(test_name, "");
+        std::cout << "Skipped. (Cannot run on device as 3D images are not supported)." << std::endl;
+        return true;
+    }
+
+    OclExtensions requiredExt = khrDb.getRequiredExtensions(folder, test_name);
+    if(!m_devExt->supports(requiredExt))
+    {
+        (*m_successHandler)(test_name, "");
+        std::cout << "Skipped. (Cannot run on device due to missing extensions: " << m_devExt->get_missing(requiredExt) << " )." << std::endl;
+        return true;
+    }
+
+    std::string cl_file_path, bc_file;
+    // Build cl file name based on the test name
+    get_cl_file_path(folder, test_name, cl_file_path);
+    // Build bc file name based on the test name
+    get_bc_file_path(folder, test_name, bc_file, size_t_width);
+    gRG.init(1);
+    //
+    // Processing each kernel in the program separately
+    //
+    clContextWrapper context;
+    clCommandQueueWrapper queue;
+    create_context_and_queue(device, &context, &queue);
+    clProgramWrapper clprog = create_program_from_cl(context, cl_file_path);
+    clProgramWrapper bcprog = create_program_from_bc(context, bc_file);
+
+    // Building the programs.
+    BuildTask clBuild(clprog, device, "-cl-kernel-arg-info");
+    if (!clBuild.execute()) {
+        std::cerr << clBuild.getErrorLog() << std::endl;
+        return false;
+    }
+
+    SpirBuildTask bcBuild(bcprog, device, "-x spir -spir-std=1.2 -cl-kernel-arg-info");
+    if (!bcBuild.execute()) {
+        std::cerr << bcBuild.getErrorLog() << std::endl;
+        return false;
+    }
+
+    KernelEnumerator clkernel_enumerator(clprog),
+                     bckernel_enumerator(bcprog);
+    if (clkernel_enumerator.size() != bckernel_enumerator.size()) {
+        std::cerr << "number of kernels in test" << test_name
+                  << " doesn't match in bc and cl files" << std::endl;
+        return false;
+    }
+    KernelEnumerator::iterator it = clkernel_enumerator.begin(),
+        e = clkernel_enumerator.end();
+    while (it != e)
+    {
+        std::string kernel_name = *it++;
+        std::string err;
+        try
+        {
+            bool success = run_test(context, queue, clprog, bcprog, kernel_name, err, device);
+            if (success)
+            {
+                log_info("kernel '%s' passed.\n", kernel_name.c_str());
+                (*m_successHandler)(test_name, kernel_name);
+            }
+            else
+            {
+                ++failures;
+                log_info("kernel '%s' failed.\n", kernel_name.c_str());
+                (*m_failureHandler)(test_name, kernel_name);
+            }
+        }
+        catch (std::runtime_error err)
+        {
+            ++failures;
+            log_info("kernel '%s' failed: %s\n", kernel_name.c_str(), err.what());
+            (*m_failureHandler)(test_name, kernel_name);
+        }
+    }
+
+    log_info("%s %s\n", test_name, failures ? "FAILED" : "passed.");
+    return failures == 0;
+}
+
diff --git a/test_conformance/spir/run_build_test.h b/test_conformance/spir/run_build_test.h
new file mode 100644
index 00000000..22c4f847
--- /dev/null
+++ b/test_conformance/spir/run_build_test.h
@@ -0,0 +1,145 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __RUN_BUILD_TEST_H__
+#define __RUN_BUILD_TEST_H__
+
+#include <string>
+#include <list>
+#include <vector>
+#include <utility>
+
+class OclExtensions;
+
+struct EventHandler{
+    virtual void operator()(const std::string&, const std::string&) = 0;
+    virtual std::string toString()const {return std::string();}
+};
+
+/*
+ * Abstract task to be executed on a cl program.
+ */
+class Task{
+public:
+    Task(cl_device_id, const char* options);
+
+    virtual bool execute() = 0;
+
+    virtual ~Task();
+
+    const char* getErrorLog() const;
+
+protected:
+  void setErrorLog(cl_program);
+
+  cl_device_id m_devid;
+  std::string  m_log;
+  std::string  m_options;
+};
+
+/*
+ * Build task - builds a given program.
+ */
+class BuildTask: public Task {
+public:
+    BuildTask(cl_program, cl_device_id, const char* options);
+
+    bool execute();
+
+private:
+  cl_program m_program;
+};
+
+/*
+ * Spir build task - build programs from SPIR binaries.
+ */
+class SpirBuildTask : public BuildTask {
+public:
+  SpirBuildTask(cl_program, cl_device_id, const char* options);
+};
+
+/*
+ * Compile task - compiles a given program.
+ */
+class CompileTask: public Task {
+public:
+    CompileTask(cl_program, cl_device_id, const char* options);
+
+    void addHeader(const char* hname, cl_program hprog);
+
+    bool execute();
+
+private:
+  std::vector<std::pair<const char*,cl_program> > m_headers;
+  cl_program m_program;
+};
+
+/*
+ * Spir compile task - compiles programs from SPIR binaries.
+ */
+class SpirCompileTask: public CompileTask {
+public:
+  SpirCompileTask(cl_program, cl_device_id, const char* options);
+};
+
+/*
+ * Link task - links a given programs to an OpecnCL executable.
+ */
+class LinkTask: public Task{
+public:
+    LinkTask(cl_program* programs, int num_programs, cl_context, cl_device_id,
+             const char* options=NULL);
+
+    bool execute();
+
+    cl_program getExecutable() const;
+
+    ~LinkTask();
+private:
+    cl_program   m_executable;
+    cl_program*  m_programs;
+    int          m_numPrograms;
+    cl_context   m_context;
+};
+
+class TestRunner{
+    EventHandler*const m_successHandler, *const m_failureHandler;
+    const OclExtensions *m_devExt;
+
+public:
+    TestRunner(EventHandler *success, EventHandler *failure,
+               const OclExtensions& devExt);
+
+    bool runBuildTest(cl_device_id device, const char *folder,
+                      const char *test_name, cl_uint size_t_width);
+};
+
+//
+//Provides means to iterate over the kernels of a given program
+//
+class KernelEnumerator {
+    std::list<std::string> m_kernels;
+
+    void process(cl_program prog);
+public:
+    typedef std::list<std::string>::iterator iterator;
+
+    KernelEnumerator(cl_program prog);
+    iterator begin();
+    iterator end();
+    size_t size()const;
+};
+
+#endif//__RUN_BUILD_TEST_H__
diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp
new file mode 100644
index 00000000..282a805d
--- /dev/null
+++ b/test_conformance/spir/run_services.cpp
@@ -0,0 +1,767 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include <assert.h>
+#include <string>
+#include <fstream>
+#include <iterator>
+#include <memory>
+#include <sstream>
+
+#include "exceptions.h"
+#include "datagen.h"
+#include "run_services.h"
+
+#define XSTR(A) STR(A)
+#define STR(A) #A
+
+/**
+ Based on the folder and the input string build the cl file nanme
+ */
+void get_cl_file_path (const char *folder, const char *test_name, std::string &cl_file_path)
+{
+    assert(folder && "folder is empty");
+    assert(test_name && "test_name is empty");
+
+    cl_file_path.append(folder);
+    cl_file_path.append("/");
+    cl_file_path.append(test_name);
+    cl_file_path.append(".cl");
+}
+
+/**
+ Based on the folder and the input string build the bc file nanme
+ */
+void get_bc_file_path (const char *folder, const char *test_name, std::string &bc_file_path, cl_uint size_t_width)
+{
+    assert(folder && "folder is empty");
+    assert(test_name && "test_name is empty");
+    bc_file_path.append(folder);
+    bc_file_path.append("/");
+    bc_file_path.append(test_name);
+    if (32 == size_t_width)
+        bc_file_path.append(".bc32");
+    else
+        bc_file_path.append(".bc64");
+}
+
+/**
+ Based on the folder and the input string build the h file nanme
+ */
+void get_h_file_path (const char *folder, const char *file_name, std::string &h_file_path)
+{
+    assert(folder && "folder is empty");
+    assert(file_name && "file_name is empty");
+
+    h_file_path.assign(folder);
+    h_file_path.append("/");
+    h_file_path.append(file_name);
+}
+
+/**
+ Fetch the kernel nanme from the test name
+ */
+void get_kernel_name (const char *test_name, std::string &kernel_name)
+{
+    char *temp_str, *p;
+    std::string temp;
+
+    temp.assign(test_name);
+
+    // Check if the test name includes '.' -
+    // the convention is that the test's kernel name is embedded in the test name up to the first '.'
+    temp_str = (char *)temp.c_str();
+    p = strstr(temp_str, ".");
+    if (p != NULL)
+    {
+        *p = '\0';
+    }
+    kernel_name.assign(temp_str);
+}
+
+extern "C" void CL_CALLBACK notify_callback(const char *errInfo, const void *privateInfo, size_t cb, void *userData);
+
+void create_context_and_queue(cl_device_id device, cl_context *out_context, cl_command_queue *out_queue)
+{
+    assert( out_context && "out_context arg must be a valid pointer");
+    assert( out_queue && "out_queue arg must be a valid pointer");
+
+    int error = CL_SUCCESS;
+
+    *out_context = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
+    if( NULL == *out_context || error != CL_SUCCESS)
+    {
+        throw Exceptions::TestError("clCreateContext failed\n", error);
+    }
+
+    *out_queue = clCreateCommandQueueWithProperties( *out_context, device, 0, &error );
+    if( NULL == *out_queue || error )
+    {
+        throw Exceptions::TestError("clCreateCommandQueue failed\n", error);
+    }
+}
+
+/**
+ Loads the kernel text from the given text file
+ */
+std::string load_file_cl( const std::string& file_name)
+{
+    std::ifstream ifs(file_name.c_str());
+    if( !ifs.good() )
+        throw Exceptions::TestError("Can't load the cl File " + file_name, 1);
+    std::string str( ( std::istreambuf_iterator<char>( ifs ) ), std::istreambuf_iterator<char>());
+    return str;
+}
+
+/**
+ Loads the kernel IR from the given binary file in SPIR BC format
+ */
+void* load_file_bc( const std::string& file_name, size_t *binary_size)
+{
+    assert(binary_size && "binary_size arg should be valid");
+
+    std::ifstream file(file_name.c_str(), std::ios::binary);
+
+    if( !file.good() )
+    {
+        throw Exceptions::TestError("Can't load the bc File " + file_name, 1);
+    }
+
+    file.seekg(0, std::ios::end);
+    *binary_size = (size_t)file.tellg();
+    file.seekg(0, std::ios::beg);
+
+    void* buffer = malloc(*binary_size);
+    file.read((char*)buffer, *binary_size);
+    file.close();
+
+    return buffer;
+}
+
+/**
+ Create program from the CL source file
+ */
+cl_program create_program_from_cl(cl_context context, const std::string& file_name)
+{
+    std::string text_file  = load_file_cl(file_name);
+    const char* text_str = text_file.c_str();
+    int error  = CL_SUCCESS;
+
+    cl_program program = clCreateProgramWithSource( context, 1, &text_str, NULL, &error );
+    if( program == NULL || error != CL_SUCCESS)
+    {
+        throw Exceptions::TestError("Error creating program\n", error);
+    }
+
+    return program;
+}
+
+/**
+ Create program from the BC source file
+ */
+cl_program create_program_from_bc (cl_context context, const std::string& file_name)
+{
+    cl_int load_error = CL_SUCCESS;
+    cl_int error;
+    size_t binary_size;
+    BufferOwningPtr<const unsigned char> binary(load_file_bc(file_name, &binary_size));
+    const unsigned char* ptr = binary;
+
+    cl_device_id device = get_context_device(context);
+    cl_program program = clCreateProgramWithBinary( context, 1, &device, &binary_size, &ptr, &load_error, &error );
+
+
+    if( program == NULL || error != CL_SUCCESS )
+    {
+        throw Exceptions::TestError("clCreateProgramWithBinary failed: Unable to load valid program binary\n", error);
+    }
+
+    if( load_error != CL_SUCCESS )
+    {
+         throw Exceptions::TestError("clCreateProgramWithBinary failed: Unable to load valid device binary into program\n", load_error);
+    }
+
+    return program;
+}
+
+/**
+ Creates the kernel with the given name from the given program.
+ */
+cl_kernel create_kernel_helper( cl_program program, const std::string& kernel_name )
+{
+    int error = CL_SUCCESS;
+    cl_kernel kernel = NULL;
+    cl_device_id device = get_program_device(program);
+    /* And create a kernel from it */
+    kernel = clCreateKernel( program, kernel_name.c_str(), &error );
+    if( kernel == NULL || error != CL_SUCCESS)
+        throw Exceptions::TestError("Unable to create kernel\n", error);
+    return kernel;
+}
+
+cl_device_id get_context_device (cl_context context)
+{
+    cl_device_id device[1];
+
+    int error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device), device, NULL);
+    if( error != CL_SUCCESS )
+    {
+        throw Exceptions::TestError("clGetContextInfo failed\n", error);
+    }
+
+    return device[0];
+}
+
+cl_device_id get_program_device (cl_program program)
+{
+    cl_device_id device[1];
+
+    int error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(device), device, NULL);
+    if( error != CL_SUCCESS )
+    {
+        throw Exceptions::TestError("clGetProgramInfo failed\n", error);
+    }
+
+    return device[0];
+}
+
+void generate_kernel_ws( cl_device_id device, cl_kernel kernel, WorkSizeInfo& ws)
+{
+    size_t compile_work_group_size[MAX_WORK_DIM];
+
+    memset(&ws, 0, sizeof(WorkSizeInfo));
+    ws.work_dim = 1;
+    ws.global_work_size[0] = (GLOBAL_WORK_SIZE <= 32) ? GLOBAL_WORK_SIZE : 32;        // kernels limitations
+    ws.local_work_size[0] = ((GLOBAL_WORK_SIZE % 4) == 0) ? (GLOBAL_WORK_SIZE / 4) : (GLOBAL_WORK_SIZE / 2);
+
+    //Check if the kernel was compiled with specific work group size
+    int error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof(compile_work_group_size), &compile_work_group_size, NULL);
+    if( error != CL_SUCCESS )
+    {
+        throw Exceptions::TestError("clGetKernelWorkGroupInfo failed\n", error);
+    }
+
+    // if compile_work_group_size[0] is not 0 - use the compiled values
+    if ( 0 != compile_work_group_size[0] )
+    {
+        // the kernel compiled with __attribute__((reqd_work_group_size(X, Y, Z)))
+        memcpy(ws.global_work_size, compile_work_group_size, sizeof(ws.global_work_size));
+
+        // Now, check the correctness of the local work size and fix it if necessary
+        for ( int i = 0; i < MAX_WORK_DIM; ++i )
+        {
+            if ( ws.local_work_size[i] > compile_work_group_size[i] )
+            {
+                ws.local_work_size[i] = compile_work_group_size[i];
+            }
+        }
+    }
+}
+
+TestResult* TestResult::clone(cl_context ctx, const WorkSizeInfo& ws, const cl_kernel kernel, const cl_device_id device) const
+{
+    TestResult *cpy = new TestResult();
+
+    for (size_t i=0; i<m_kernelArgs.getArgCount(); ++i)
+        cpy->m_kernelArgs.addArg(m_kernelArgs.getArg(i)->clone(ctx, ws, kernel, device));
+
+    return cpy;
+}
+
+/*
+ * class DataRow
+ */
+
+const std::string& DataRow::operator[](int column)const
+{
+    assert((column > -1 && (size_t)column < m_row.size()) && "Index out of bound");
+    return m_row[column];
+}
+
+std::string& DataRow::operator[](int column)
+{
+    assert((column > -1) && "Index out of bound");
+
+    if ((size_t)column < m_row.size())
+        return m_row[column];
+
+    if (column == m_row.size())
+    {
+        m_row.push_back("");
+        return m_row[column];
+    }
+
+    assert(0 && "Index out of bound.");
+}
+
+/*
+ * class DataTable
+ */
+
+size_t DataTable::getNumRows() const
+{
+    return m_rows.size();
+}
+
+void DataTable::addTableRow(DataRow *dr)
+{
+    m_rows.push_back(dr);
+}
+
+const DataRow& DataTable::operator[](int index)const
+{
+    assert((index > -1 && (size_t)index < m_rows.size()) && "Index out of bound");
+    return *m_rows[index];
+}
+
+DataRow& DataTable::operator[](int index)
+{
+    assert((index > -1 && (size_t)index < m_rows.size()) && "Index out of bound");
+    return *m_rows[index];
+}
+
+/*
+ * class OclExtensions
+ */
+OclExtensions OclExtensions::getDeviceCapabilities(cl_device_id devId)
+{
+    char extensions[1024] = {0};
+    size_t size;
+
+    // Querying the device for its supported extensions
+    cl_int errcode = clGetDeviceInfo(devId,
+                                     CL_DEVICE_EXTENSIONS,
+                                     sizeof(extensions),
+                                     extensions,
+                                     &size);
+    if (errcode)
+        throw Exceptions::TestError("Device query failed");
+
+    char device_profile[1024] = {0};
+    errcode = clGetDeviceInfo(devId,
+                              CL_DEVICE_PROFILE,
+                              sizeof(device_profile),
+                              device_profile,
+                              NULL);
+    if (errcode)
+        throw Exceptions::TestError("Device query failed");
+
+    OclExtensions ret = OclExtensions::empty();
+    assert(size < sizeof(extensions));
+    if (!size)
+      return ret;
+
+    // Iterate over the extensions, and convert them into the bit field.
+    std::list<std::string> extVector;
+    std::stringstream khrStream(extensions);
+    std::copy(std::istream_iterator<std::string>(khrStream),
+              std::istream_iterator<std::string>(),
+              std::back_inserter(extVector));
+
+    // full_profile devices supports embedded profile as core feature
+    if ( std::string( device_profile ) == "FULL_PROFILE" ) {
+        extVector.push_back("cles_khr_int64");
+        extVector.push_back("cles_khr_2d_image_array_writes");
+    }
+
+    for(std::list<std::string>::const_iterator it = extVector.begin(),
+                                               e = extVector.end(); it != e;
+                                               it++)
+    {
+        ret = ret | OclExtensions::fromString(*it);
+    }
+    return ret;
+}
+
+OclExtensions OclExtensions::empty()
+{
+    return OclExtensions(0);
+}
+
+OclExtensions OclExtensions::fromString(const std::string& e)
+{
+    std::string s = "OclExtensions::" + e;
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_base_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_extended_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_3d_image_writes);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp16);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_event);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d10_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_dx9_media_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d11_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_depth_images);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_depth_images);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_msaa_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_image2d_from_buffer);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_initialize_memory);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_spir);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp64);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_base_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_extended_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_base_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_extended_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::cl_khr_byte_addressable_store);
+    RETURN_IF_ENUM(s, OclExtensions::cles_khr_int64);
+    RETURN_IF_ENUM(s, OclExtensions::cles_khr_2d_image_array_writes);
+    // Unknown KHR string.
+    return OclExtensions::empty();
+}
+
+std::string OclExtensions::toString()
+{
+
+    #define APPEND_STR_IF_SUPPORTS( STR, E)                          \
+        if ( this->supports(E) )                                     \
+        {                                                            \
+            std::string ext_str( #E );                               \
+            std::string prefix = "OclExtensions::";                  \
+            size_t pos = ext_str.find( prefix );                     \
+            if (  pos != std::string::npos )                         \
+            {                                                        \
+                ext_str.replace( pos, prefix.length(), "");          \
+            }                                                        \
+            STR += ext_str;                                          \
+        }
+
+    std::string s = "";
+
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_base_atomics );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_extended_atomics );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_3d_image_writes );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp16 );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_sharing );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_event );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d10_sharing );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_dx9_media_sharing );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d11_sharing );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_depth_images );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_depth_images );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_msaa_sharing );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_image2d_from_buffer );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_initialize_memory );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_spir );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp64 );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_base_atomics );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_extended_atomics );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_base_atomics );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_extended_atomics );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_byte_addressable_store );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_int64 );
+    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_2d_image_array_writes );
+
+    return s;
+}
+
+std::ostream& operator<<(std::ostream& os, OclExtensions ext)
+{
+    return os << ext.toString();
+}
+
+OclExtensions OclExtensions::operator|(const OclExtensions& b) const
+{
+    return OclExtensions(m_extVector | b.m_extVector);
+}
+
+bool OclExtensions::supports(const OclExtensions& b) const
+{
+    return ((b.m_extVector & m_extVector) == b.m_extVector);
+}
+
+OclExtensions OclExtensions::get_missing(const OclExtensions& b) const
+{
+    return OclExtensions( b.m_extVector & ( ~ m_extVector ) );
+}
+
+/*
+ * class KhrSupport
+ */
+
+KhrSupport *KhrSupport::m_instance = NULL;
+
+const KhrSupport* KhrSupport::get(const std::string& path)
+{
+    if(m_instance)
+        return m_instance;
+
+    m_instance = new KhrSupport();
+    // First invokation, parse the file into memory.
+    std::fstream csv(path.c_str(), std::ios_base::in);
+    if (!csv.is_open())
+    {
+        delete m_instance;
+        std::string msg;
+        msg.append("File ");
+        msg.append(path);
+        msg.append(" cannot be opened");
+        throw Exceptions::TestError(msg.c_str());
+    }
+
+    m_instance->parseCSV(csv);
+    csv.close();
+    return m_instance;
+}
+
+void KhrSupport::parseCSV(std::fstream& f)
+{
+    assert(f.is_open() && "file is not in reading state.") ;
+    char line[1024];
+    while (!f.getline(line, sizeof(line)).eof())
+    {
+        DataRow *dr = parseLine(std::string(line));
+        m_dt.addTableRow(dr);
+    }
+}
+
+DataRow* KhrSupport::parseLine(const std::string& line)
+{
+    const char DELIM = ',';
+    std::string token;
+    DataRow *dr = new DataRow();
+    int tIndex = 0;
+
+    for(std::string::const_iterator it = line.begin(), e = line.end(); it != e;
+        it++)
+    {
+        // Eat those characters away.
+        if(isspace(*it) || '"' == *it)
+            continue;
+
+        // If that's a delimiter, we need to tokenize the collected value.
+        if(*it == DELIM)
+        {
+            (*dr)[tIndex++] = token;
+            token.clear();
+            continue;
+        }
+
+        // Append to current token.
+        token.append(1U, *it);
+    }
+    if (!token.empty())
+        (*dr)[tIndex] = token;
+
+    assert(tIndex && "empty data row??");
+    return dr;
+}
+
+OclExtensions KhrSupport::getRequiredExtensions(const char* suite, const char* test) const
+{
+    OclExtensions ret = OclExtensions::empty();
+
+    const std::string strSuite(suite), strTest(test);
+    // Iterating on the DataTable, searching whether the row with th requested
+    // row exists.
+    for(size_t rowIndex = 0; rowIndex < m_dt.getNumRows(); rowIndex++)
+    {
+        const DataRow& dr = m_dt[rowIndex];
+        const std::string csvSuite = dr[SUITE_INDEX], csvTest = dr[TEST_INDEX];
+        bool sameSuite = (csvSuite == strSuite), sameTest = (csvTest == strTest)||(csvTest == "*");
+        if (sameTest && sameSuite)
+        {
+            ret = ret | OclExtensions::fromString(dr[EXT_INDEX]);
+        }
+    }
+
+    return ret;
+}
+
+cl_bool KhrSupport::isImagesRequired(const char* suite, const char* test) const
+{
+    cl_bool ret = CL_FALSE;
+    const std::string strSuite(suite), strTest(test);
+
+    // Iterating on the DataTable, searching whether the row with th requested
+    // row exists.
+    for(size_t rowIndex = 0; rowIndex < m_dt.getNumRows(); rowIndex++)
+    {
+        const DataRow& dr = m_dt[rowIndex];
+        const std::string csvSuite = dr[SUITE_INDEX], csvTest = dr[TEST_INDEX];
+        bool sameSuite = (csvSuite == strSuite), sameTest = (csvTest == strTest)||(csvTest == "*");
+        if (sameTest && sameSuite)
+        {
+            ret = (dr[IMAGES_INDEX] == "CL_TRUE") ? CL_TRUE : CL_FALSE;
+            break;
+        }
+    }
+
+    return ret;
+}
+
+cl_bool KhrSupport::isImages3DRequired(const char* suite, const char* test) const
+{
+    cl_bool ret = CL_FALSE;
+    const std::string strSuite(suite), strTest(test);
+
+    // Iterating on the DataTable, searching whether the row with th requested
+    // row exists.
+    for(size_t rowIndex = 0; rowIndex < m_dt.getNumRows(); rowIndex++)
+    {
+        const DataRow& dr = m_dt[rowIndex];
+        const std::string csvSuite = dr[SUITE_INDEX], csvTest = dr[TEST_INDEX];
+        bool sameSuite = (csvSuite == strSuite), sameTest = (csvTest == strTest)||(csvTest == "*");
+        if (sameTest && sameSuite)
+        {
+            ret = (dr[IMAGES_3D_INDEX] == "CL_TRUE") ? CL_TRUE : CL_FALSE;
+            break;
+        }
+    }
+
+    return ret;
+}
+
+
+static void generate_kernel_args(cl_context context, cl_kernel kernel, const WorkSizeInfo& ws, KernelArgs& cl_args, const cl_device_id device)
+{
+    int error = CL_SUCCESS;
+    cl_uint num_args = 0;
+    KernelArg* cl_arg = NULL;
+    DataGenerator* dg = DataGenerator::getInstance();
+
+    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( num_args ), &num_args, NULL );
+    if( error != CL_SUCCESS )
+    {
+        throw Exceptions::TestError("Unable to get kernel arg count\n", error);
+    }
+
+    for ( cl_uint j = 0; j < num_args; ++j )
+    {
+        KernelArgInfo kernel_arg_info;
+        size_t size;
+        const int max_name_len = 512;
+        char name[max_name_len];
+
+        // Try to get the address qualifier of each argument.
+        error = clGetKernelArgInfo( kernel, j, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(cl_kernel_arg_address_qualifier), kernel_arg_info.getAddressQualifierRef(), &size);
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("Unable to get argument address qualifier\n", error);
+        }
+
+        // Try to get the access qualifier of each argument.
+        error = clGetKernelArgInfo( kernel, j, CL_KERNEL_ARG_ACCESS_QUALIFIER, sizeof(cl_kernel_arg_access_qualifier), kernel_arg_info.getAccessQualifierRef(), &size );
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("Unable to get argument access qualifier\n", error);
+        }
+
+        // Try to get the type qualifier of each argument.
+        error = clGetKernelArgInfo( kernel, j, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(cl_kernel_arg_type_qualifier), kernel_arg_info.getTypeQualifierRef(), &size );
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("Unable to get argument type qualifier\n", error);
+        }
+
+        // Try to get the type of each argument.
+        memset( name, 0, max_name_len );
+        error = clGetKernelArgInfo(kernel, j, CL_KERNEL_ARG_TYPE_NAME, max_name_len, name, NULL );
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("Unable to get argument type name\n", error);
+        }
+        kernel_arg_info.setTypeName(name);
+
+        // Try to get the name of each argument.
+        memset( name, 0, max_name_len );
+        error = clGetKernelArgInfo( kernel, j, CL_KERNEL_ARG_NAME, max_name_len, name, NULL );
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("Unable to get argument name\n", error);
+        }
+        kernel_arg_info.setName(name);
+
+        cl_arg = dg->generateKernelArg(context, kernel_arg_info, ws, NULL, kernel, device);
+        cl_args.addArg( cl_arg );
+    }
+}
+
+void set_kernel_args( cl_kernel kernel, KernelArgs& args)
+{
+    int error = CL_SUCCESS;
+    for( size_t i = 0;  i < args.getArgCount(); ++ i )
+    {
+        error = clSetKernelArg( kernel, i, args.getArg(i)->getArgSize(), args.getArg(i)->getArgValue());
+        if( error != CL_SUCCESS )
+        {
+            throw Exceptions::TestError("clSetKernelArg failed\n", error);
+        }
+    }
+}
+
+/**
+ Run the single kernel
+ */
+void generate_kernel_data ( cl_context context, cl_kernel kernel, WorkSizeInfo &ws, TestResult& results)
+{
+    cl_device_id device = get_context_device(context);
+    generate_kernel_ws( device, kernel, ws);
+    generate_kernel_args(context, kernel, ws, results.kernelArgs(), device);
+}
+
+/**
+ Run the single kernel
+ */
+void run_kernel( cl_kernel kernel, cl_command_queue queue, WorkSizeInfo &ws, TestResult& result )
+{
+    clEventWrapper execute_event;
+
+    set_kernel_args(kernel, result.kernelArgs());
+
+    int error = clEnqueueNDRangeKernel( queue, kernel, ws.work_dim, ws.global_work_offset, ws.global_work_size, ws.local_work_size, 0, NULL, &execute_event );
+    if( error != CL_SUCCESS )
+    {
+        throw Exceptions::TestError("clEnqueueNDRangeKernel failed\n", error);
+    }
+
+    error = clWaitForEvents( 1, &execute_event );
+    if( error != CL_SUCCESS )
+    {
+        throw Exceptions::TestError("clWaitForEvents failed\n", error);
+    }
+
+    // read all the buffers back to host
+    result.readToHost(queue);
+}
+
+/**
+ Compare two test results
+ */
+bool compare_results( const TestResult& lhs, const TestResult& rhs )
+{
+    if( lhs.kernelArgs().getArgCount() != rhs.kernelArgs().getArgCount() )
+    {
+        log_error("number of kernel parameters differ between SPIR and CL version of the kernel\n");
+        return false;
+    }
+
+    for( size_t i = 0 ; i < lhs.kernelArgs().getArgCount(); ++i )
+    {
+        if( ! lhs.kernelArgs().getArg(i)->compare( *rhs.kernelArgs().getArg(i)) )
+        {
+            log_error("the kernel parameter (%d) is different between SPIR and CL version of the kernel\n", i);
+            return false;
+        }
+    }
+    return true;
+}
+
diff --git a/test_conformance/spir/run_services.h b/test_conformance/spir/run_services.h
new file mode 100644
index 00000000..37cec763
--- /dev/null
+++ b/test_conformance/spir/run_services.h
@@ -0,0 +1,222 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __RUN_SERVICES_H
+#define __RUN_SERVICES_H
+
+#include <string>
+#include "kernelargs.h"
+#include "datagen.h"
+#include <list>
+
+void get_cl_file_path(const char *folder, const char *str, std::string &cl_file_path);
+void get_bc_file_path(const char *folder, const char *str, std::string &bc_file_path, cl_uint size_t_width);
+void get_h_file_path(const char *folder, const char *str, std::string &h_file_path);
+void get_kernel_name(const char *test_name, std::string &kernel_name);
+
+cl_device_id get_context_device(cl_context context);
+
+void create_context_and_queue(cl_device_id device, cl_context *out_context, cl_command_queue *out_queue);
+cl_program create_program_from_cl(cl_context context, const std::string& file_name);
+cl_program create_program_from_bc(cl_context context, const std::string& file_name);
+/**
+ Retrieves the kernel with the given name from the program
+ */
+cl_kernel create_kernel_helper(cl_program program, const std::string& kernel_name);
+
+cl_device_id get_program_device (cl_program program);
+
+void generate_kernel_ws( cl_device_id device, cl_kernel kernel, WorkSizeInfo& ws);
+
+/**
+ Responsible for holding the result of a single test
+ */
+class TestResult
+{
+public:
+    TestResult(){};
+
+    KernelArgs& kernelArgs() { return m_kernelArgs; }
+
+    const KernelArgs& kernelArgs() const { return m_kernelArgs; }
+
+    void readToHost(cl_command_queue queue) { m_kernelArgs.readToHost(queue); }
+
+    /*
+     * Clones this object to a newly heap-allocated (deeply copied) object.
+     */
+    TestResult* clone(cl_context ctx, const WorkSizeInfo& ws, const cl_kernel kernel, const cl_device_id device) const;
+
+private:
+    KernelArgs m_kernelArgs;
+};
+
+template <int i>
+struct KhrValue
+{
+  enum {Mask = (1 << i)};
+};
+
+template <>
+struct KhrValue<0>
+{
+  enum {Mask = 1};
+};
+
+/*
+ * Represents a set of OpenCL extension.
+ */
+class OclExtensions
+{
+public:
+    static OclExtensions getDeviceCapabilities(cl_device_id);
+
+    static OclExtensions empty();
+
+    #define STRINIGFY(X) #X
+
+    #define RETURN_IF_ENUM(S, E) if(S == STRINIGFY(E)) return E
+
+
+    static OclExtensions fromString(const std::string&);
+
+    std::string toString();
+
+    // Operators
+
+    // Merges the given extension and this one together, and returns the merged
+    // value.
+    OclExtensions operator|(const OclExtensions&) const;
+
+
+    // Indicates whether each extension in this objects also resides in b.
+    bool supports(const OclExtensions& b) const;
+
+    // Return list of missing extensions
+    OclExtensions get_missing(const OclExtensions& b) const;
+
+
+    size_t get() const { return m_extVector; }
+private:
+
+    OclExtensions(size_t ext) : m_extVector(ext) {}
+
+// Fix a compilation error, since cl_khr_gl_sharing is defined as a macro.
+#ifdef cl_khr_gl_sharing
+#undef cl_khr_gl_sharing
+#endif//cl_khr_gl_sharing
+
+#ifdef cl_khr_icd
+#undef cl_khr_icd
+#endif//cl_khr_icd
+
+    enum ClKhrs
+    {
+        no_extensions                 = KhrValue<0>::Mask,
+        cl_khr_int64_base_atomics     = KhrValue<1>::Mask,
+        cl_khr_int64_extended_atomics = KhrValue<2>::Mask,
+        cl_khr_3d_image_writes        = KhrValue<3>::Mask,
+        cl_khr_fp16                   = KhrValue<4>::Mask,
+        cl_khr_gl_sharing             = KhrValue<5>::Mask,
+        cl_khr_gl_event               = KhrValue<6>::Mask,
+        cl_khr_d3d10_sharing          = KhrValue<7>::Mask,
+        cl_khr_dx9_media_sharing      = KhrValue<8>::Mask,
+        cl_khr_d3d11_sharing          = KhrValue<9>::Mask,
+        cl_khr_depth_images           = KhrValue<10>::Mask,
+        cl_khr_gl_depth_images        = KhrValue<11>::Mask,
+        cl_khr_gl_msaa_sharing        = KhrValue<12>::Mask,
+        cl_khr_image2d_from_buffer    = KhrValue<13>::Mask,
+        cl_khr_initialize_memory      = KhrValue<14>::Mask,
+        cl_khr_context_abort          = KhrValue<15>::Mask,
+        cl_khr_spir                   = KhrValue<16>::Mask,
+        cl_khr_fp64                   = KhrValue<17>::Mask,
+        cl_khr_global_int32_base_atomics     = KhrValue<18>::Mask,
+        cl_khr_global_int32_extended_atomics = KhrValue<19>::Mask,
+        cl_khr_local_int32_base_atomics      = KhrValue<20>::Mask,
+        cl_khr_local_int32_extended_atomics  = KhrValue<21>::Mask,
+        cl_khr_byte_addressable_store        = KhrValue<22>::Mask,
+        cles_khr_int64                         = KhrValue<23>::Mask,
+        cles_khr_2d_image_array_writes         = KhrValue<24>::Mask,
+    };
+
+    size_t m_extVector;
+};
+
+std::ostream& operator<<(std::ostream& os, OclExtensions ext);
+
+/*
+ * Indicates whether a given test needs KHR extension.
+ */
+
+class DataRow;
+
+class DataTable
+{
+  std::vector<DataRow*> m_rows;
+public:
+    size_t getNumRows() const;
+    void addTableRow(DataRow*);
+    const DataRow& operator[](int index)const;
+    DataRow& operator[](int index);
+};
+
+class KhrSupport
+{
+public:
+  static const KhrSupport* get(const std::string& csvFile);
+  DataRow* parseLine(const std::string&);
+  OclExtensions getRequiredExtensions(const char* suite, const char* test) const;
+  cl_bool isImagesRequired(const char* suite, const char* test) const;
+  cl_bool isImages3DRequired(const char* suite, const char* test) const;
+
+private:
+  static const int SUITE_INDEX     = 0;
+  static const int TEST_INDEX      = 1;
+  static const int EXT_INDEX       = 2;
+  static const int IMAGES_INDEX    = 3;
+  static const int IMAGES_3D_INDEX = 4;
+
+  void parseCSV(std::fstream&);
+
+  DataTable m_dt;
+  static KhrSupport* m_instance;
+};
+
+class DataRow
+{
+    std::vector<std::string> m_row;
+    DataRow() {}
+public:
+    const std::string& operator[](int)const;
+    std::string&      operator[](int);
+
+    friend DataRow* KhrSupport::parseLine(const std::string&);
+};
+
+/*
+ * Generates data for the given kernel.
+ * Parameters:
+ *   context - The context of the kernel.
+ *   kernel  - The kernel to which arguments will be generated
+ *   ws(OUT) - generated work size info.
+ *   res(OUT)- generated test results.
+ */
+void generate_kernel_data(cl_context context, cl_kernel kernel,
+                          WorkSizeInfo &ws, TestResult& res);
+
+void run_kernel(cl_kernel kernel, cl_command_queue queue, WorkSizeInfo &ws, TestResult& result);
+bool compare_results(const TestResult& lhs, const TestResult& rhs);
+
+#endif
diff --git a/test_conformance/spir/sampler_enumeration.zip b/test_conformance/spir/sampler_enumeration.zip
new file mode 100644
index 00000000..4fe2c775
Binary files /dev/null and b/test_conformance/spir/sampler_enumeration.zip differ
diff --git a/test_conformance/spir/select.zip b/test_conformance/spir/select.zip
new file mode 100644
index 00000000..21dbb4fe
Binary files /dev/null and b/test_conformance/spir/select.zip differ
diff --git a/test_conformance/spir/typeinfo.h b/test_conformance/spir/typeinfo.h
new file mode 100644
index 00000000..ea1f6b66
--- /dev/null
+++ b/test_conformance/spir/typeinfo.h
@@ -0,0 +1,208 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+TYPE_HNDL("_Bool",                   false, 0,  1,                  false,               true,    KernelArgGeneratorT<bool>)
+TYPE_HNDL("_Bool*",                  true,  0, 16,                  false,               true,    KernelArgGeneratorT<bool>)
+TYPE_HNDL("bool",                    false, 0,  1,                  false,               true,    KernelArgGeneratorT<bool>)
+TYPE_HNDL("bool*",                   true,  0, 16,                  false,               true,    KernelArgGeneratorT<bool>)
+TYPE_HNDL("char",                    false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char*",                   true,  0, 16,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char16",                  false, 0, 16,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char16*",                 true,  0, 16,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char2",                   false, 0,  2,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char2*",                  true,  0,  2,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char3",                   false, 0,  3,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char3*",                  true,  0,  3,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char4",                   false, 0,  4,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char4*",                  true,  0,  4,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char8",                   false, 0,  8,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("char8*",                  true,  0,  8,                    0x0,               0x7f,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("double",                  false, 0,  1,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double*",                 true,  0, 16,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double16",                false, 0, 16,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double16*",               true,  0, 16,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double2",                 false, 0,  2,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double2*",                true,  0,  2,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double3",                 false, 0,  3,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double3*",                true,  0,  3,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double4",                 false, 0,  4,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double4*",                true,  0,  4,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double8",                 false, 0,  8,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("double8*",                true,  0,  8,            -0x40000000,         0x40000000,    KernelArgGeneratorT<cl_double>)
+TYPE_HNDL("enum enum_type",          false, 0,  1,                      0,                  2,    KernelArgGeneratorT<cl_int>)                    // enum defines 0..2
+TYPE_HNDL("enum enum_type*",         true,  0,  1,                      0,                  2,    KernelArgGeneratorT<cl_int>)                    // enum defines 0..2
+TYPE_HNDL("float",                   false, 0,  1,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float*",                  true,  0, 16,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float16",                 false, 0, 16,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float16*",                true,  0, 16,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float2",                  false, 0,  2,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float2*",                 true,  0,  2,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float3",                  false, 0,  3,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float3*",                 true,  0,  3,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float4",                  false, 0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float4*",                 true,  0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float8",                  false, 0,  8,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("float8*",                 true,  0,  8,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)
+TYPE_HNDL("half",                    false, 0,  1,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half*",                   true,  0, 16,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half16",                  false, 0, 16,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half16*",                 true,  0, 16,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half2",                   false, 0,  2,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half2*",                  true,  0,  2,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half3",                   false, 0,  3,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half3*",                  true,  0,  3,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half4",                   false, 0,  4,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half4*",                  true,  0,  4,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half8",                   false, 0,  8,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("half8*",                  true,  0,  8,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_half>)
+TYPE_HNDL("image1d_array_t",         false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dArray<CL_SIGNED_INT32>)
+TYPE_HNDL("image1d_buffer_t",        false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dBuffer<CL_SIGNED_INT32>)
+TYPE_HNDL("image1d_t",               false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1d<CL_SIGNED_INT32>)
+TYPE_HNDL("image2d_array_t",         false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2dArray<CL_SIGNED_INT32>)
+TYPE_HNDL("image2d_t",               false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2d<CL_SIGNED_INT32>)
+TYPE_HNDL("image3d_t",               false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage3d<CL_SIGNED_INT32>)
+TYPE_HNDL("image1d_array_float",     false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dArray<CL_FLOAT>)
+TYPE_HNDL("image1d_array_int",       false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dArray<CL_SIGNED_INT32>)
+TYPE_HNDL("image1d_array_uint",      false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dArray<CL_UNSIGNED_INT32>)
+TYPE_HNDL("image1d_buffer_float",    false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dBuffer<CL_FLOAT>)
+TYPE_HNDL("image1d_buffer_int",      false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dBuffer<CL_SIGNED_INT32>)
+TYPE_HNDL("image1d_buffer_uint",     false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1dBuffer<CL_UNSIGNED_INT32>)
+TYPE_HNDL("image1d_float",           false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1d<CL_FLOAT>)
+TYPE_HNDL("image1d_int",             false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1d<CL_SIGNED_INT32>)
+TYPE_HNDL("image1d_uint",            false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage1d<CL_UNSIGNED_INT32>)
+TYPE_HNDL("image2d_array_float",     false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2dArray<CL_FLOAT>)
+TYPE_HNDL("image2d_array_int",       false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2dArray<CL_SIGNED_INT32>)
+TYPE_HNDL("image2d_array_uint",      false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2dArray<CL_UNSIGNED_INT32>)
+TYPE_HNDL("image2d_float",           false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2d<CL_FLOAT>)
+TYPE_HNDL("image2d_int",             false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2d<CL_SIGNED_INT32>)
+TYPE_HNDL("image2d_uint",            false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage2d<CL_UNSIGNED_INT32>)
+TYPE_HNDL("image3d_float",           false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage3d<CL_FLOAT>)
+TYPE_HNDL("image3d_int",             false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage3d<CL_SIGNED_INT32>)
+TYPE_HNDL("image3d_uint",            false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorImage3d<CL_UNSIGNED_INT32>)
+TYPE_HNDL("image_kernel_data",       false, 0,  1,                    0x0,         0xffffffff,    KernelStructTypeArgGenerator<image_kernel_data>)             //image_kernel_data defines as 5 X int
+TYPE_HNDL("image_kernel_data*",      true,  0,  1,                    0x0,         0xffffffff,    KernelStructTypeArgGenerator<image_kernel_data>)             //image_kernel_data defines as 5 X int
+TYPE_HNDL("int",                     false, 0,  1,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int*",                    true,  0, 16,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int16",                   false, 0, 16,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int16*",                  true,  0, 16,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int2",                    false, 0,  2,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int2*",                   true,  0,  2,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int3",                    false, 0,  3,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int3*",                   true,  0,  3,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int4",                    false, 0,  4,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int4*",                   true,  0,  4,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int8",                    false, 0,  8,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("int8*",                   true,  0,  8,                    0x0,         0x7fffffff,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("intptr_t",                false, 0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("intptr_t*",               true,  0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("long",                    false, 0,  1,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long*",                   true,  0, 16,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long16",                  false, 0, 16,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long16*",                 true,  0, 16,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long2",                   false, 0,  2,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long2*",                  true,  0,  2,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long3",                   false, 0,  3,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long3*",                  true,  0,  3,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long4",                   false, 0,  4,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long4*",                  true,  0,  4,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long8",                   false, 0,  8,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("long8*",                  true,  0,  8,                    0x0, 0x7fffffffffffffff,    KernelArgGeneratorT<cl_long>)
+TYPE_HNDL("ptrdiff_t",               false, 0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("ptrdiff_t*",              true,  0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("sampler_t",               false, 0,  1,                    0x0,               0x7f,    KernelArgGeneratorSampler)
+TYPE_HNDL("short",                   false, 0,  1,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short*",                  true,  0, 16,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short16",                 false, 0, 16,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short16*",                true,  0, 16,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short2",                  false, 0,  2,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short2*",                 true,  0,  2,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short3",                  false, 0,  3,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short3*",                 true,  0,  3,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short4",                  false, 0,  4,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short4*",                 true,  0,  4,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short8",                  false, 0,  8,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("short8*",                 true,  0,  8,                    0x0,             0x7fff,    KernelArgGeneratorT<cl_short>)
+TYPE_HNDL("size_t",                  false, 0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("size_t*",                 true,  0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("struct",                  false, 0,  1,            -0x01000000,         0x01000000,    KernelArgGeneratorNI)
+TYPE_HNDL("struct struct_type",      false, 0,  1,            -0x01000000,         0x01000000,    KernelStructTypeArgGenerator<typedef_struct_type>)       //struct_type defines as {float4, int}
+TYPE_HNDL("struct struct_type*",     true,  0,  1,            -0x01000000,         0x01000000,    KernelStructTypeArgGenerator<typedef_struct_type>)       //struct_type defines as {float4, int}
+TYPE_HNDL("testStruct",              false, 0,  1,                    0x0,         0xffffffff,    KernelStructTypeArgGenerator<testStruct>)                //testStruct
+TYPE_HNDL("testStruct*",             true,  0,  1,                    0x0,         0xffffffff,    KernelStructTypeArgGenerator<testStruct>)                //testStruct
+TYPE_HNDL("typedef_enum_type",       false, 0,  1,                      0,                  2,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("typedef_enum_type*",      true,  0,  1,                      0,                  2,    KernelArgGeneratorT<cl_int>)
+TYPE_HNDL("typedef_struct_type",     false, 0,  1,            -0x01000000,         0x01000000,    KernelStructTypeArgGenerator<typedef_struct_type>)       //typedef_struct_type defines as {float4, int}
+TYPE_HNDL("typedef_struct_type*",    true,  0,  1,            -0x01000000,         0x01000000,    KernelStructTypeArgGenerator<typedef_struct_type>)       //typedef_struct_type defines as {float4, int}
+TYPE_HNDL("typedef_type",            false, 0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //typeded_type defines as float4
+TYPE_HNDL("typedef_type*",           true,  0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //typeded_type defines as float4
+TYPE_HNDL("typedef_union_type",      false, 0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //typedef_union_type defines as float4/int4
+TYPE_HNDL("typedef_union_type*",     true,  0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //typedef_union_type defines as float4/int4
+TYPE_HNDL("uchar",                   false, 0,  1,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar*",                  true,  0, 16,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar16",                 false, 0, 16,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar16*",                true,  0, 16,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar2",                  false, 0,  2,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar2*",                 true,  0,  2,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar3",                  false, 0,  3,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar3*",                 true,  0,  3,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar4",                  false, 0,  4,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar4*",                 true,  0,  4,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar8",                  false, 0,  8,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uchar8*",                 true,  0,  8,                    0x0,               0xff,    KernelArgGeneratorT<cl_uchar>)
+TYPE_HNDL("uint",                    false, 0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint*",                   true,  0, 16,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint16",                  false, 0, 16,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint16*",                 true,  0, 16,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint2",                   false, 0,  2,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint2*",                  true,  0,  2,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint3",                   false, 0,  3,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint3*",                  true,  0,  3,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint4",                   false, 0,  4,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint4*",                  true,  0,  4,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint8",                   false, 0,  8,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uint8*",                  true,  0,  8,                    0x0,         0xffffffff,    KernelArgGeneratorT<cl_uint>)
+TYPE_HNDL("uintptr_t",               false, 0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("uintptr_t*",              true,  0,  1,                    0x0,         0xffffffff,    KernelArgGeneratorT<size_t>)
+TYPE_HNDL("ulong",                   false, 0,  1,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong*",                  true,  0, 16,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong16",                 false, 0, 16,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong16*",                true,  0, 16,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong2",                  false, 0,  2,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong2*",                 true,  0,  2,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong3",                  false, 0,  3,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong3*",                 true,  0,  3,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong4",                  false, 0,  4,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong4*",                 true,  0,  4,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong8",                  false, 0,  8,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("ulong8*",                 true,  0,  8,                    0x0, 0xffffffffffffffff,    KernelArgGeneratorT<cl_ulong>)
+TYPE_HNDL("union union_type",        false, 0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //union union_type defines as float4/int4
+TYPE_HNDL("union union_type*",       true,  0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //union union_type defines as float4/int4
+TYPE_HNDL("union_type",              false, 0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //union_type defines as float4/int4
+TYPE_HNDL("union_type*",             true,  0,  4,            -0x01000000,         0x01000000,    KernelArgGeneratorT<cl_float>)                //union_type defines as float4/int4
+TYPE_HNDL("ushort",                  false, 0,  1,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort*",                 true,  0, 16,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort16",                false, 0, 16,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort16*",               true,  0, 16,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort2",                 false, 0,  2,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort2*",                true,  0,  2,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort3",                 false, 0,  3,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort3*",                true,  0,  3,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort4",                 false, 0,  4,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort4*",                true,  0,  4,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort8",                 false, 0,  8,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("ushort8*",                true,  0,  8,                    0x0,             0xffff,    KernelArgGeneratorT<cl_ushort>)
+TYPE_HNDL("void*",                   true,  0, 16,                    0x0,               0xff,    KernelArgGeneratorT<cl_char>)
+TYPE_HNDL("work_item_data",          false, 0,  1,                    0x0,         0xffffffff,    KernelStructTypeArgGenerator<work_item_data>)                //work_item_data defines as uint, 6 X uint[3]
+TYPE_HNDL("work_item_data*",         true,  0,  1,                    0x0,         0xffffffff,    KernelStructTypeArgGenerator<work_item_data>)                //work_item_data defines as uint, 6 X uint[3]
diff --git a/test_conformance/spir/vec_align.zip b/test_conformance/spir/vec_align.zip
new file mode 100644
index 00000000..0b80c6e8
Binary files /dev/null and b/test_conformance/spir/vec_align.zip differ
diff --git a/test_conformance/spir/vec_step.zip b/test_conformance/spir/vec_step.zip
new file mode 100644
index 00000000..62cc5abe
Binary files /dev/null and b/test_conformance/spir/vec_step.zip differ
diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt
new file mode 100644
index 00000000..37bf3088
--- /dev/null
+++ b/test_conformance/subgroups/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(MODULE_NAME SUBGROUPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_barrier.cpp
+    test_queries.cpp
+    test_workitem.cpp
+    test_workgroup.cpp
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/ThreadPool.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/subgroups/Jamfile b/test_conformance/subgroups/Jamfile
new file mode 100644
index 00000000..728010b8
--- /dev/null
+++ b/test_conformance/subgroups/Jamfile
@@ -0,0 +1,26 @@
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+exe test_subgroups
+    : main.cpp
+      test_queries.cpp
+      test_workitem.cpp
+      test_workgroup.cpp
+      test_barrier.cpp
+      ../../test_common/harness/errorHelpers.c
+      ../../test_common/harness/threadTesting.c
+      ../../test_common/harness/testHarness.c
+      ../../test_common/harness/mt19937.c
+      ../../test_common/harness/conversions.c
+      ../../test_common/harness/kernelHelpers.c
+      ../../test_common/harness/mt19937.c
+    : <target-os>windows:<source>../../test_common/harness/msvc9.c
+    ;
+install dist
+    : test_subgroups
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/subgroups
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/subgroups
+    ;
+
diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
new file mode 100644
index 00000000..5a289126
--- /dev/null
+++ b/test_conformance/subgroups/main.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+MTdata gMTdata;
+
+basefn basefn_list[] = {
+    test_sub_group_info,
+    test_work_item_functions,
+    test_work_group_functions,
+    test_barrier_functions,
+};
+
+const char *basefn_names[] = {
+    "sub_group_info",
+    "work_item_functions",
+    "work_group_functions",
+    "barrier_functions",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+static const int num_fns = sizeof(basefn_names) / sizeof(char *);
+
+static int
+checkSubGroupsExtension(cl_device_id device)
+{
+    if (!is_extension_available(device, "cl_khr_subgroups")) {
+        log_info("Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
+        return CL_INVALID_DEVICE;
+    }
+
+    return CL_SUCCESS;
+}
+
+int
+main(int argc, const char *argv[])
+{
+    gMTdata = init_genrand(0);
+    return runTestHarnessWithCheck(argc, argv, num_fns, basefn_list, basefn_names, false, false, NULL, checkSubGroupsExtension);
+}
+
diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h
new file mode 100644
index 00000000..731ad3b6
--- /dev/null
+++ b/test_conformance/subgroups/procs.h
@@ -0,0 +1,43 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _procs_h
+#define _procs_h
+
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/mt19937.h"
+
+extern MTdata gMTdata;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_pipe_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*_procs_h*/
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
new file mode 100644
index 00000000..60e392d5
--- /dev/null
+++ b/test_conformance/subgroups/subhelpers.h
@@ -0,0 +1,286 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef SUBHELPERS_H
+#define SUBHELPERS_H
+
+#include "testHarness.h"
+#include "kernelHelpers.h"
+#include "typeWrappers.h"
+
+#include <limits>
+#include <vector>
+
+// Some template helpers
+template <typename Ty> struct TypeName;
+template <> struct TypeName<cl_half> { static const char * val() { return "half"; } };
+template <> struct TypeName<cl_uint> { static const char * val() { return "uint"; } };
+template <> struct TypeName<cl_int> { static const char * val() { return "int"; } };
+template <> struct TypeName<cl_ulong> { static const char * val() { return "ulong"; } };
+template <> struct TypeName<cl_long> { static const char * val() { return "long"; } };
+template <> struct TypeName<float> { static const char * val() { return "float"; } };
+template <> struct TypeName<double> { static const char * val() { return "double"; } };
+
+template <typename Ty> struct TypeDef;
+template <> struct TypeDef<cl_half> { static const char * val() { return "typedef half Type;\n"; } };
+template <> struct TypeDef<cl_uint> { static const char * val() { return "typedef uint Type;\n"; } };
+template <> struct TypeDef<cl_int> { static const char * val() { return "typedef int Type;\n"; } };
+template <> struct TypeDef<cl_ulong> { static const char * val() { return "typedef ulong Type;\n"; } };
+template <> struct TypeDef<cl_long> { static const char * val() { return "typedef long Type;\n"; } };
+template <> struct TypeDef<float> { static const char * val() { return "typedef float Type;\n"; } };
+template <> struct TypeDef<double> { static const char * val() { return "typedef double Type;\n"; } };
+
+template <typename Ty, int Which> struct TypeIdentity;
+// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)0.0; } };
+// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return -(cl_half)65536.0; } };
+// template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; } };
+
+template <> struct TypeIdentity<cl_uint,0> { static cl_uint val() { return (cl_uint)0; } };
+template <> struct TypeIdentity<cl_uint,1> { static cl_uint val() { return (cl_uint)0; } };
+template <> struct TypeIdentity<cl_uint,2> { static cl_uint val() { return (cl_uint)0xffffffff; } };
+
+template <> struct TypeIdentity<cl_int,0> { static cl_int val() { return (cl_int)0 ; } };
+template <> struct TypeIdentity<cl_int,1> { static cl_int val() { return (cl_int)0x80000000; } };
+template <> struct TypeIdentity<cl_int,2> { static cl_int val() { return (cl_int)0x7fffffff; } };
+
+template <> struct TypeIdentity<cl_ulong,0> { static cl_ulong val() { return (cl_ulong)0 ; } };
+template <> struct TypeIdentity<cl_ulong,1> { static cl_ulong val() { return (cl_ulong)0 ; } };
+template <> struct TypeIdentity<cl_ulong,2> { static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL ; } };
+
+template <> struct TypeIdentity<cl_long,0> { static cl_long val() { return (cl_long)0; } };
+template <> struct TypeIdentity<cl_long,1> { static cl_long val() { return (cl_long)0x8000000000000000ULL; } };
+template <> struct TypeIdentity<cl_long,2> { static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } };
+
+
+template <> struct TypeIdentity<float,0> { static float val() { return 0.F; } };
+template <> struct TypeIdentity<float,1> { static float val() { return -std::numeric_limits<float>::infinity(); } };
+template <> struct TypeIdentity<float,2> { static float val() { return std::numeric_limits<float>::infinity(); } };
+
+template <> struct TypeIdentity<double,0> { static double val() { return 0.L; } };
+
+template <> struct TypeIdentity<double,1> { static double val() { return -std::numeric_limits<double>::infinity(); } };
+template <> struct TypeIdentity<double,2> { static double val() { return std::numeric_limits<double>::infinity(); } };
+
+template <typename Ty> struct TypeCheck;
+template <> struct TypeCheck<cl_uint> { static bool val(cl_device_id) { return true; } };
+template <> struct TypeCheck<cl_int> { static bool val(cl_device_id) { return true; } };
+
+static bool
+int64_ok(cl_device_id device)
+{
+    char profile[128];
+    int error;
+
+    error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL);
+    if (error) {
+        log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n");
+    return false;
+    }
+
+    if (strcmp(profile, "EMBEDDED_PROFILE") == 0)
+     return is_extension_available(device, "cles_khr_int64");
+
+    return true;
+}
+
+template <> struct TypeCheck<cl_ulong> { static bool val(cl_device_id device) { return int64_ok(device); } };
+template <> struct TypeCheck<cl_long> { static bool val(cl_device_id device) { return int64_ok(device); } };
+template <> struct TypeCheck<cl_float> { static bool val(cl_device_id) { return true; } };
+template <> struct TypeCheck<cl_half> {
+    static bool val(cl_device_id device) { return is_extension_available(device, "cl_khr_fp16"); }
+};
+template <> struct TypeCheck<double> {
+    static bool val(cl_device_id device) {
+        int error;
+        cl_device_fp_config c;
+        error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), (void *)&c, NULL);
+        if (error) {
+            log_info("clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n");
+            return false;
+        }
+        return c != 0;
+    }
+};
+
+
+// Run a test kernel to compute the result of a built-in on an input
+static int
+run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t global, size_t local,
+           void *idata, size_t isize, void *mdata, size_t msize,
+       void *odata, size_t osize, size_t tsize=0)
+{
+    clMemWrapper in;
+    clMemWrapper xy;
+    clMemWrapper out;
+    clMemWrapper tmp;
+    int error;
+
+    in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    if (tsize) {
+        tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, tsize, NULL, &error);
+        test_error(error, "clCreateBuffer failed");
+    }
+
+    error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
+    test_error(error, "clSetKernelArg failed");
+
+    error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
+    test_error(error, "clSetKernelArg failed");
+
+    error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
+    test_error(error, "clSetKernelArg failed");
+
+    if (tsize) {
+        error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
+        test_error(error, "clSetKernelArg failed");
+    }
+
+    error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed");
+
+    return error;
+}
+
+// Driver for testing a single built in function
+template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE, size_t TSIZE=0>
+struct test {
+    static int
+    run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, const char *kname, const char *src, int dynscl=0)
+    {
+        size_t tmp;
+        int error;
+        int subgroup_size, num_subgroups;
+        size_t realSize;
+        size_t global;
+        size_t local;
+        const char *kstrings[3];
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        cl_platform_id platform;
+        cl_int sgmap[2*GSIZE];
+        Ty mapin[LSIZE];
+        Ty mapout[LSIZE];
+
+    // Make sure a test of type Ty is supported by the device
+        if (!TypeCheck<Ty>::val(device))
+            return 0;
+
+        error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
+        test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
+
+        kstrings[0] = "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
+                  "#define XY(M,I) M[I].x = get_sub_group_local_id(); M[I].y = get_sub_group_id();\n";
+        kstrings[1] = TypeDef<Ty>::val();
+        kstrings[2] = src;
+        error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 3, kstrings, kname, "-cl-std=CL2.0");
+        if (error != 0)
+            return error;
+
+        // Determine some local dimensions to use for the test.
+        global = GSIZE;
+        error = get_max_common_work_group_size(context, kernel, GSIZE, &local);
+        test_error(error, "get_max_common_work_group_size failed");
+
+        // Limit it a bit so we have muliple work groups
+        // Ideally this will still be large enough to give us multiple subgroups
+        if (local > LSIZE)
+            local = LSIZE;
+
+    // Get the sub group info
+        clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
+        clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform,
+        "clGetKernelSubGroupInfoKHR");
+        if (clGetKernelSubGroupInfoKHR_ptr == NULL) {
+            log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
+            return -1;
+        }
+
+        error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
+                                           sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
+        test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
+        subgroup_size = (int)tmp;
+
+        error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
+                                           sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL);
+        test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
+        num_subgroups = (int)tmp;
+
+    // Make sure the number of sub groups is what we expect
+        if (num_subgroups != (local + subgroup_size - 1)/ subgroup_size) {
+            log_error("ERROR: unexpected number of subgroups (%d) returned by clGetKernelSubGroupInfoKHR\n", num_subgroups);
+            return -1;
+        }
+
+        std::vector<Ty> idata;
+        std::vector<Ty> odata;
+        size_t input_array_size = GSIZE;
+        size_t output_array_size = GSIZE;
+
+        if (dynscl != 0) {
+          input_array_size = (int)global / (int)local * num_subgroups * dynscl;
+          output_array_size = (int)global / (int)local * dynscl;
+        }
+
+        idata.resize(input_array_size);
+        odata.resize(output_array_size);
+
+    // Run the kernel once on zeroes to get the map
+    memset(&idata[0], 0, input_array_size * sizeof(Ty));
+        error = run_kernel(context, queue, kernel, global, local,
+                           &idata[0], input_array_size * sizeof(Ty),
+               sgmap, global*sizeof(cl_int)*2,
+               &odata[0], output_array_size * sizeof(Ty),
+               TSIZE*sizeof(Ty));
+    if (error)
+        return error;
+
+    // Generate the desired input for the kernel
+        Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, (int)global / (int)local);
+
+        error = run_kernel(context, queue, kernel, global, local,
+                           &idata[0], input_array_size * sizeof(Ty),
+               sgmap, global*sizeof(cl_int)*2,
+               &odata[0], output_array_size * sizeof(Ty),
+               TSIZE*sizeof(Ty));
+    if (error)
+        return error;
+
+
+    // Check the result
+    return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, subgroup_size, (int)local, (int)global / (int)local);
+    }
+};
+
+#endif
diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
new file mode 100644
index 00000000..3b4a0f99
--- /dev/null
+++ b/test_conformance/subgroups/test_barrier.cpp
@@ -0,0 +1,147 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "subhelpers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+static const char * lbar_source =
+"__kernel void test_lbar(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    __local int tmp[200];\n"
+"    int gid = get_global_id(0);\n"
+"    int nid = get_sub_group_size();\n"
+"    int lid = get_sub_group_local_id();\n"
+"    xy[gid].x = lid;\n"
+"    xy[gid].y = get_sub_group_id();\n"
+"    if (get_sub_group_id() == 0) {\n"
+"        tmp[lid] = in[gid];\n"
+"        sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+"        out[gid] = tmp[nid-1-lid];\n"
+"    } else {\n"
+"        out[gid] = -in[gid];\n"
+"    }\n"
+"}\n";
+
+static const char * gbar_source =
+"__kernel void test_gbar(const __global Type *in, __global int2 *xy, __global Type *out, __global Type *tmp)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    int nid = get_sub_group_size();\n"
+"    int lid = get_sub_group_local_id();\n"
+"    int tof = get_group_id(0)*get_max_sub_group_size();\n"
+"    xy[gid].x = lid;\n"
+"    xy[gid].y = get_sub_group_id();\n"
+"    if (get_sub_group_id() == 0) {\n"
+"        tmp[tof+lid] = in[gid];\n"
+"        sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
+"        out[gid] = tmp[tof+nid-1-lid];\n"
+"    } else {\n"
+"        out[gid] = -in[gid];\n"
+"    }\n"
+"}\n";
+
+// barrier test functions
+template <int Which>
+struct BAR {
+    static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+        int e;
+
+        ii = 0;
+        for (k=0; k<ng; ++k) {
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i=0;i<n;++i)
+                    t[ii+i] = genrand_int32(gMTdata);
+            }
+
+            // Now map into work group using map from device
+            for (j=0;j<nw;++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+        m += 2*nw;
+        }
+    }
+
+    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, int ns, int nw, int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+        cl_int tr, rr;
+
+    if (Which == 0)
+            log_info("  sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n");
+    else
+            log_info("  sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n");
+
+        for (k=0; k<ng; ++k) {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j=0; j<nw; ++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i=0; i<n; ++i) {
+                    tr = j == 0 ?  mx[ii + n - 1 - i] : -mx[ii + i];
+                    rr = my[ii + i];
+
+                    if (tr != rr) {
+                        log_error("ERROR: sub_group_barrier mismatch for local id %d in sub group %d in group %d\n",
+                                   i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+        m += 2*nw;
+        }
+
+        return 0;
+    }
+};
+
+
+// Entry point from main
+int
+test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+
+    // Adjust these individually below if desired/needed
+#define G 2000
+#define L 200
+
+    error = test<cl_int, BAR<0>, G, L>::run(device, context, queue, num_elements, "test_lbar", lbar_source);
+    error = test<cl_int, BAR<1>, G, L, G>::run(device, context, queue, num_elements, "test_gbar", gbar_source);
+
+    return error;
+}
+
diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp
new file mode 100644
index 00000000..6055f658
--- /dev/null
+++ b/test_conformance/subgroups/test_queries.cpp
@@ -0,0 +1,136 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+
+typedef struct {
+    cl_uint maxSubGroupSize;
+    cl_uint numSubGroups;
+} result_data;
+
+static const char * query_kernel_source =
+"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
+"\n"
+"typedef struct {\n"
+"    uint maxSubGroupSize;\n"
+"    uint numSubGroups;\n"
+"} result_data;\n"
+"\n"
+"__kernel void query_kernel( __global result_data *outData )\n"
+"{\n"
+"    int gid = get_global_id( 0 );\n"
+"    outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
+"    outData[gid].numSubGroups = get_num_sub_groups();\n"
+"}";
+
+int
+test_sub_group_info(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    static const size_t gsize0 = 80;
+    int i, error;
+    size_t realSize;
+    size_t kernel_max_subgroup_size, kernel_subgroup_count;
+    size_t global[] = {gsize0,14,10};
+    size_t local[] = {0,0,0};
+    result_data result[gsize0];
+
+    cl_uint max_dimensions;
+
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
+    test_error(error,  "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
+
+    cl_platform_id platform;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper out;
+
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &query_kernel_source, "query_kernel", "-cl-std=CL2.0");
+    if (error != 0)
+        return error;
+
+    // Determine some local dimensions to use for the test.
+    if (max_dimensions == 1) {
+        error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
+        test_error(error, "get_max_common_work_group_size failed");
+    } else if (max_dimensions == 2) {
+        error = get_max_common_2D_work_group_size(context, kernel, global, local);
+        test_error(error, "get_max_common_2D_work_group_size failed");
+    } else {
+        error = get_max_common_3D_work_group_size(context, kernel, global, local);
+        test_error(error, "get_max_common_3D_work_group_size failed");
+    }
+
+    error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
+    test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
+
+    clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_ptr;
+    clGetKernelSubGroupInfoKHR_ptr = (clGetKernelSubGroupInfoKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clGetKernelSubGroupInfoKHR");
+    if (clGetKernelSubGroupInfoKHR_ptr == NULL) {
+        log_error("ERROR: clGetKernelSubGroupInfoKHR function not available");
+        return -1;
+    }
+
+    error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,
+            sizeof(local), (void *)&local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize);
+    test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR");
+    log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_max_subgroup_size);
+
+    if (realSize != sizeof(kernel_max_subgroup_size)) {
+        log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize );
+        return -1;
+    }
+
+    error = clGetKernelSubGroupInfoKHR_ptr(kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR,
+            sizeof(local), (void *)&local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize);
+    test_error(error, "clGetKernelSubGroupInfoKHR failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR");
+    log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR for the kernel is %d.\n", (int)kernel_subgroup_count);
+
+    if (realSize != sizeof(kernel_subgroup_count)) {
+        log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
+        return -1;
+    }
+
+    // Verify that the kernel gets the same max_subgroup_size and subgroup_count
+    out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    error = clSetKernelArg(kernel, 0, sizeof(out), &out);
+    test_error(error, "clSetKernelArg failed");
+
+    error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global, local, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), &result, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed");
+
+    for (i=0; i<(int)gsize0; ++i) {
+        if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size) {
+            log_error("ERROR: get_max_subgroup_size() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n",
+                      result[i].maxSubGroupSize, (cl_uint)kernel_max_subgroup_size);
+            return -1;
+        }
+        if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count) {
+            log_error("ERROR: get_num_sub_groups() doesn't match result from clGetKernelSubGroupInfoKHR, %u vs %u\n",
+                      result[i].numSubGroups, (cl_uint)kernel_subgroup_count);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
diff --git a/test_conformance/subgroups/test_workgroup.cpp b/test_conformance/subgroups/test_workgroup.cpp
new file mode 100644
index 00000000..ee7693d3
--- /dev/null
+++ b/test_conformance/subgroups/test_workgroup.cpp
@@ -0,0 +1,867 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "subhelpers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+static const char * any_source =
+"__kernel void test_any(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_any(in[gid]);\n"
+"}\n";
+
+static const char * all_source =
+"__kernel void test_all(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_all(in[gid]);\n"
+"}\n";
+
+static const char * bcast_source =
+"__kernel void test_bcast(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    Type x = in[gid];\n"
+"    size_t loid = (size_t)((int)x % 100);\n"
+"    out[gid] = sub_group_broadcast(x, loid);\n"
+"}\n";
+
+static const char * redadd_source =
+"__kernel void test_redadd(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_reduce_add(in[gid]);\n"
+"}\n";
+
+static const char * redmax_source =
+"__kernel void test_redmax(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_reduce_max(in[gid]);\n"
+"}\n";
+
+static const char * redmin_source =
+"__kernel void test_redmin(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_reduce_min(in[gid]);\n"
+"}\n";
+
+static const char * scinadd_source =
+"__kernel void test_scinadd(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
+"}\n";
+
+static const char * scinmax_source =
+"__kernel void test_scinmax(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
+"}\n";
+
+static const char * scinmin_source =
+"__kernel void test_scinmin(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
+"}\n";
+
+static const char * scexadd_source =
+"__kernel void test_scexadd(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
+"}\n";
+
+static const char * scexmax_source =
+"__kernel void test_scexmax(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
+"}\n";
+
+static const char * scexmin_source =
+"__kernel void test_scexmin(const __global Type *in, __global int2 *xy, __global Type *out)\n"
+"{\n"
+"    int gid = get_global_id(0);\n"
+"    XY(xy,gid);\n"
+"    out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
+"}\n";
+
+// These need to stay in sync with the kernel source below
+#define NUM_LOC 49
+#define INST_LOC_MASK 0x7f
+#define INST_OP_SHIFT 0
+#define INST_OP_MASK 0xf
+#define INST_LOC_SHIFT 4
+#define INST_VAL_SHIFT 12
+#define INST_VAL_MASK 0x7ffff
+#define INST_END 0x0
+#define INST_STORE 0x1
+#define INST_WAIT 0x2
+#define INST_COUNT 0x3
+
+static const char * ifp_source =
+"#define NUM_LOC 49\n"
+"#define INST_LOC_MASK 0x7f\n"
+"#define INST_OP_SHIFT 0\n"
+"#define INST_OP_MASK 0xf\n"
+"#define INST_LOC_SHIFT 4\n"
+"#define INST_VAL_SHIFT 12\n"
+"#define INST_VAL_MASK 0x7ffff\n"
+"#define INST_END 0x0\n"
+"#define INST_STORE 0x1\n"
+"#define INST_WAIT 0x2\n"
+"#define INST_COUNT 0x3\n"
+"\n"
+"__kernel void\n"
+"test_ifp(const __global int *in, __global int2 *xy, __global int *out)\n"
+"{\n"
+"    __local atomic_int loc[NUM_LOC];\n"
+"\n"
+"    // Don't run if there is only one sub group\n"
+"    if (get_num_sub_groups() == 1)\n"
+"        return;\n"
+"\n"
+"    // First initialize loc[]\n"
+"    int lid = (int)get_local_id(0);\n"
+"\n"
+"    if (lid < NUM_LOC)\n"
+"        atomic_init(loc+lid, 0);\n"
+"\n"
+"    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+"\n"
+"    // Compute pointer to this sub group's \"instructions\"\n"
+"    const __global int *pc = in +\n"
+"        ((int)get_group_id(0)*(int)get_enqueued_num_sub_groups() +\n"
+"         (int)get_sub_group_id()) *\n"
+"        (NUM_LOC+1);\n"
+"\n"
+"    // Set up to \"run\"\n"
+"    bool ok = (int)get_sub_group_local_id() == 0;\n"
+"    bool run = true;\n"
+"\n"
+"    while (run) {\n"
+"        int inst = *pc++;\n"
+"        int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;\n"
+"        int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;\n"
+"        int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;\n"
+"\n"
+"        switch (iop) {\n"
+"        case INST_STORE:\n"
+"            if (ok)\n"
+"                atomic_store(loc+iloc, ival);\n"
+"            break;\n"
+"        case INST_WAIT:\n"
+"            if (ok) {\n"
+"                while (atomic_load(loc+iloc) != ival)\n"
+"                    ;\n"
+"            }\n"
+"            break;\n"
+"        case INST_COUNT:\n"
+"            if (ok) {\n"
+"                int i;\n"
+"                for (i=0;i<ival;++i)\n"
+"                    atomic_fetch_add(loc+iloc, 1);\n"
+"            }\n"
+"            break;\n"
+"        case INST_END:\n"
+"            run = false;\n"
+"            break;\n"
+"        }\n"
+"\n"
+"        sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+"    }\n"
+"\n"
+"    work_group_barrier(CLK_LOCAL_MEM_FENCE);\n"
+"\n"
+"    // Save this group's result\n"
+"    __global int *op = out + (int)get_group_id(0)*NUM_LOC;\n"
+"    if (lid < NUM_LOC)\n"
+"        op[lid] = atomic_load(loc+lid);\n"
+"}\n";
+
+// Any/All test functions
+template <int Which>
+struct AA {
+    static void gen(cl_int *x, cl_int *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+        int e;
+
+        ii = 0;
+        for (k=0; k<ng; ++k) {
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                e = (int)(genrand_int32(gMTdata) % 3);
+
+                // Initialize data matrix indexed by local id and sub group id
+                switch (e) {
+                case 0:
+                    memset(&t[ii], 0, n*sizeof(cl_int));
+                    break;
+                case 1:
+                    memset(&t[ii], 0, n*sizeof(cl_int));
+                    i = (int)(genrand_int32(gMTdata) % (cl_uint)n);
+                    t[ii + i] = 41;
+                    break;
+                case 2:
+                    memset(&t[ii], 0xff, n*sizeof(cl_int));
+                    break;
+                }
+            }
+
+            // Now map into work group using map from device
+            for (j=0;j<nw;++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+        m += 2*nw;
+        }
+    }
+
+    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, int ns, int nw, int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+        cl_int taa, raa;
+
+        log_info("  sub_group_%s...\n", Which == 0 ? "any" : "all");
+
+        for (k=0; k<ng; ++k) {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j=0; j<nw; ++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Compute target
+                if (Which == 0) {
+                    taa = 0;
+                    for (i=0; i<n; ++i)
+                        taa |=  mx[ii + i] != 0;
+                } else {
+                    taa = 1;
+                    for (i=0; i<n; ++i)
+                        taa &=  mx[ii + i] != 0;
+                }
+
+                // Check result
+                for (i=0; i<n; ++i) {
+                    raa = my[ii+i] != 0;
+                    if (raa != taa) {
+                        log_error("ERROR: sub_group_%s mismatch for local id %d in sub group %d in group %d\n",
+                                   Which == 0 ? "any" : "all", i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2*nw;
+        }
+
+        return 0;
+    }
+};
+
+// Reduce functions
+template <typename Ty, int Which>
+struct RED {
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+
+        ii = 0;
+        for (k=0; k<ng; ++k) {
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i=0; i<n; ++i)
+                    t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
+            }
+
+            // Now map into work group using map from device
+            for (j=0;j<nw;++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+        m += 2*nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+        Ty tr, rr;
+
+        log_info("  sub_group_reduce_%s(%s)...\n", Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val());
+
+        for (k=0; k<ng; ++k) {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j=0; j<nw; ++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Compute target
+                if (Which == 0) {
+                    // add
+                    tr = mx[ii];
+                    for (i=1; i<n; ++i)
+                        tr +=  mx[ii + i];
+                } else if (Which == 1) {
+                    // max
+                    tr = mx[ii];
+                    for (i=1; i<n; ++i)
+                        tr = tr > mx[ii + i] ? tr : mx[ii + i];
+                } else if (Which == 2) {
+                    // min
+                    tr = mx[ii];
+                    for (i=1; i<n; ++i)
+                        tr = tr > mx[ii + i] ? mx[ii + i] : tr;
+                }
+
+                // Check result
+                for (i=0; i<n; ++i) {
+                    rr = my[ii+i];
+                    if (rr != tr) {
+                        log_error("ERROR: sub_group_reduce_%s(%s) mismatch for local id %d in sub group %d in group %d\n",
+                                   Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2*nw;
+        }
+
+        return 0;
+    }
+};
+
+// Scan Inclusive functions
+template <typename Ty, int Which>
+struct SCIN {
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+
+        ii = 0;
+        for (k=0; k<ng; ++k) {
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i=0; i<n; ++i)
+                    // t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
+                    t[ii+i] = (Ty)i;
+            }
+
+            // Now map into work group using map from device
+            for (j=0;j<nw;++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+        m += 2*nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+        Ty tr, rr;
+
+        log_info("  sub_group_scan_inclusive_%s(%s)...\n",  Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val());
+
+        for (k=0; k<ng; ++k) {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j=0; j<nw; ++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Check result
+                for (i=0; i<n; ++i) {
+                    if (Which == 0) {
+                        tr = i == 0 ? mx[ii] : tr + mx[ii + i];
+                    } else if (Which == 1) {
+                        tr = i == 0 ? mx[ii] : (tr > mx[ii + i] ? tr : mx[ii + i]);
+                    } else {
+                        tr = i == 0 ? mx[ii] : (tr > mx[ii + i] ? mx[ii + i] : tr);
+                    }
+
+                    rr = my[ii+i];
+                    if (rr != tr) {
+                        log_error("ERROR: sub_group_scan_inclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n",
+                                   Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2*nw;
+        }
+
+        return 0;
+    }
+};
+
+// Scan Exclusive functions
+template <typename Ty, int Which>
+struct SCEX {
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+
+        ii = 0;
+        for (k=0; k<ng; ++k) {
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                for (i=0; i<n; ++i)
+                    t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % ns + 1);
+            }
+
+            // Now map into work group using map from device
+            for (j=0;j<nw;++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+        m += 2*nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
+    {
+        int ii, i, j, k, n;
+        int nj = (nw + ns - 1)/ns;
+        Ty tr, trt, rr;
+
+        log_info("  sub_group_scan_exclusive_%s(%s)...\n", Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val());
+
+        for (k=0; k<ng; ++k) {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j=0; j<nw; ++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+
+                // Check result
+                for (i=0; i<n; ++i) {
+                    if (Which == 0) {
+                        tr = i == 0 ? TypeIdentity<Ty,Which>::val() : tr + trt;
+                    } else if (Which == 1) {
+                        tr = i == 0 ? TypeIdentity<Ty,Which>::val() : (trt > tr ? trt : tr);
+                    } else {
+                        tr = i == 0 ? TypeIdentity<Ty,Which>::val() : (trt > tr ? tr : trt);
+                    }
+                    trt = mx[ii+i];
+                    rr = my[ii+i];
+
+                    if (rr != tr) {
+                        log_error("ERROR: sub_group_scan_exclusive_%s(%s) mismatch for local id %d in sub group %d in group %d\n",
+                                   Which == 0 ? "add" : (Which == 1 ? "max" : "min"), TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2*nw;
+        }
+
+        return 0;
+    }
+};
+
+// Broadcast functios
+template <typename Ty>
+struct BC {
+    static void gen(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+    {
+        int i, ii, j, k, l, n;
+        int nj = (nw + ns - 1)/ns;
+        int d = ns > 100 ? 100 : ns;
+
+        ii = 0;
+        for (k=0; k<ng; ++k) {
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                l = (int)(genrand_int32(gMTdata) & 0x7fffffff) % (d > n ? n : d);
+
+                for (i=0; i<n; ++i)
+                    t[ii+i] = (Ty)((int)(genrand_int32(gMTdata) & 0x7fffffff) % 100 * 100 + l);
+            }
+
+            // Now map into work group using map from device
+            for (j=0;j<nw;++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                x[j] = t[i];
+            }
+
+            x += nw;
+        m += 2*nw;
+        }
+    }
+
+    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, int ns, int nw, int ng)
+    {
+        int ii, i, j, k, l, n;
+        int nj = (nw + ns - 1)/ns;
+        Ty tr, rr;
+
+        log_info("  sub_group_broadcast(%s)...\n", TypeName<Ty>::val());
+
+        for (k=0; k<ng; ++k) {
+            // Map to array indexed to array indexed by local ID and sub group
+            for (j=0; j<nw; ++j) {
+                i = m[2*j+1]*ns + m[2*j];
+                mx[i] = x[j];
+                my[i] = y[j];
+            }
+
+            for (j=0; j<nj; ++j) {
+                ii = j*ns;
+                n = ii + ns > nw ? nw - ii : ns;
+                l = (int)mx[ii] % 100;
+                tr = mx[ii+l];
+
+                // Check result
+                for (i=0; i<n; ++i) {
+                    rr = my[ii+i];
+                    if (rr != tr) {
+                        log_error("ERROR: sub_group_broadcast(%s) mismatch for local id %d in sub group %d in group %d\n",
+                                   TypeName<Ty>::val(), i, j, k);
+                        return -1;
+                    }
+                }
+            }
+
+            x += nw;
+            y += nw;
+            m += 2*nw;
+        }
+
+        return 0;
+    }
+};
+
+// Independent forward progress stuff
+// Note:
+//   Output needs num_groups * NUM_LOC elements
+//   local_size must be > NUM_LOC
+//   Input needs num_groups * num_sub_groups * (NUM_LOC+1) elements
+
+static inline int
+inst(int op, int loc, int val)
+{
+    return (val << INST_VAL_SHIFT) | (loc << INST_LOC_SHIFT) | (op << INST_OP_SHIFT);
+}
+
+void gen_insts(cl_int *x, cl_int *p, int n)
+{
+    int i, j0, j1;
+    int val;
+    int ii[NUM_LOC];
+
+    // Create a random permutation of 0...NUM_LOC-1
+    ii[0] = 0;
+    for (i=1; i<NUM_LOC;++i) {
+        j0 = random_in_range(0, i, gMTdata);
+        if (j0 != i)
+            ii[i] = ii[j0];
+        ii[j0] = i;
+    }
+
+    // Initialize "instruction pointers"
+    memset(p, 0, n*4);
+
+    for (i=0; i<NUM_LOC; ++i) {
+        // Randomly choose 2 different sub groups
+        // One does a random amount of work, and the other waits for it
+        j0 = random_in_range(0, n-1, gMTdata);
+
+        do
+            j1 = random_in_range(0, n-1, gMTdata);
+        while (j1 == j0);
+
+        // Randomly choose a wait value and assign "instructions"
+        val = random_in_range(100, 200 + 10*NUM_LOC, gMTdata);
+        x[j0*(NUM_LOC+1) + p[j0]] = inst(INST_COUNT, ii[i], val);
+        x[j1*(NUM_LOC+1) + p[j1]] = inst(INST_WAIT,  ii[i], val);
+        ++p[j0];
+        ++p[j1];
+    }
+
+    // Last "inst" for each sub group is END
+    for (i=0; i<n; ++i)
+        x[i*(NUM_LOC+1) + p[i]] = inst(INST_END, 0, 0);
+}
+
+// Execute one group's "instructions"
+void run_insts(cl_int *x, cl_int *p, int n)
+{
+    int i, nend;
+    bool scont;
+    cl_int loc[NUM_LOC];
+
+    // Initialize result and "instruction pointers"
+    memset(loc, 0, sizeof(loc));
+    memset(p, 0, 4*n);
+
+    // Repetitively loop over subgroups with each executing "instructions" until blocked
+    // The loop terminates when all subgroups have hit the "END instruction"
+    do {
+        nend = 0;
+        for (i=0; i<n; ++i) {
+            do {
+                cl_int inst = x[i*(NUM_LOC+1) + p[i]];
+                cl_int iop = (inst >> INST_OP_SHIFT) & INST_OP_MASK;
+                cl_int iloc = (inst >> INST_LOC_SHIFT) & INST_LOC_MASK;
+                cl_int ival = (inst >> INST_VAL_SHIFT) & INST_VAL_MASK;
+                scont = false;
+
+                switch (iop) {
+                case INST_STORE:
+                    loc[iloc] = ival;
+                    ++p[i];
+                    scont = true;
+                    break;
+                case INST_WAIT:
+                    if (loc[iloc] == ival) {
+                        ++p[i];
+                        scont = true;
+                    }
+                    break;
+                case INST_COUNT:
+                    loc[iloc] += ival;
+                    ++p[i];
+                    scont = true;
+                    break;
+                case INST_END:
+                    ++nend;
+                    break;
+                }
+            } while (scont);
+        }
+    } while (nend < n);
+
+    // Return result, reusing "p"
+    memcpy(p, loc, sizeof(loc));
+}
+
+
+struct IFP {
+    static void gen(cl_int *x, cl_int *t, cl_int *, int ns, int nw, int ng)
+    {
+        int k;
+        int nj = (nw + ns - 1) / ns;
+
+        // We need at least 2 sub groups per group for this test
+        if (nj == 1)
+            return;
+
+        for (k=0; k<ng; ++k) {
+            gen_insts(x, t, nj);
+            x += nj * (NUM_LOC+1);
+        }
+    }
+
+    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *, int ns, int nw, int ng)
+    {
+        int i, k;
+        int nj = (nw + ns - 1) / ns;
+
+        // We need at least 2 sub groups per group for this tes
+        if (nj == 1)
+            return 0;
+
+        log_info("  independent forward progress...\n");
+
+        for (k=0; k<ng; ++k) {
+            run_insts(x, t, nj);
+            for (i=0; i<NUM_LOC; ++i) {
+                if (t[i] != y[i]) {
+                    log_error("ERROR: mismatch at element %d in work group %d\n", i, k);
+                    return -1;
+                }
+            }
+            x += nj * (NUM_LOC+1);
+            y += NUM_LOC;
+        }
+
+        return 0;
+    }
+};
+
+
+// Entry point from main
+int
+test_work_group_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    int error;
+
+    // Adjust these individually below if desired/needed
+#define G 2000
+#define L 200
+
+    error = test<int, AA<0>, G, L>::run(device, context, queue, num_elements, "test_any", any_source);
+    error |= test<int, AA<1>, G, L>::run(device, context, queue, num_elements, "test_all", all_source);
+
+    // error |= test<cl_half, BC<cl_half>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
+    error |= test<cl_uint, BC<cl_uint>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
+    error |= test<cl_int, BC<cl_int>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
+    error |= test<cl_ulong, BC<cl_ulong>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
+    error |= test<cl_long, BC<cl_long>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
+    error |= test<float, BC<float>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
+    error |= test<double, BC<double>, G, L>::run(device, context, queue, num_elements, "test_bcast", bcast_source);
+
+    // error |= test<cl_half, RED<cl_half,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
+    error |= test<cl_uint, RED<cl_uint,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
+    error |= test<cl_int, RED<cl_int,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
+    error |= test<cl_ulong, RED<cl_ulong,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
+    error |= test<cl_long, RED<cl_long,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
+    error |= test<float, RED<float,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
+    error |= test<double, RED<double,0>, G, L>::run(device, context, queue, num_elements, "test_redadd", redadd_source);
+
+    // error |= test<cl_half, RED<cl_half,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
+    error |= test<cl_uint, RED<cl_uint,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
+    error |= test<cl_int, RED<cl_int,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
+    error |= test<cl_ulong, RED<cl_ulong,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
+    error |= test<cl_long, RED<cl_long,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
+    error |= test<float, RED<float,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
+    error |= test<double, RED<double,1>, G, L>::run(device, context, queue, num_elements, "test_redmax", redmax_source);
+
+    // error |= test<cl_half, RED<cl_half,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
+    error |= test<cl_uint, RED<cl_uint,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
+    error |= test<cl_int, RED<cl_int,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
+    error |= test<cl_ulong, RED<cl_ulong,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
+    error |= test<cl_long, RED<cl_long,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
+    error |= test<float, RED<float,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
+    error |= test<double, RED<double,2>, G, L>::run(device, context, queue, num_elements, "test_redmin", redmin_source);
+
+    // error |= test<cl_half, SCIN<cl_half,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<cl_uint, SCIN<cl_uint,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<cl_int, SCIN<cl_int,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<cl_ulong, SCIN<cl_ulong,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<cl_long, SCIN<cl_long,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<float, SCIN<float,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
+    error |= test<double, SCIN<double,0>, G, L>::run(device, context, queue, num_elements, "test_scinadd", scinadd_source);
+
+    // error |= test<cl_half, SCIN<cl_half,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<cl_uint, SCIN<cl_uint,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<cl_int, SCIN<cl_int,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<cl_ulong, SCIN<cl_ulong,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<cl_long, SCIN<cl_long,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<float, SCIN<float,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
+    error |= test<double, SCIN<double,1>, G, L>::run(device, context, queue, num_elements, "test_scinmax", scinmax_source);
+
+    // error |= test<cl_half, SCIN<cl_half,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<cl_uint, SCIN<cl_uint,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<cl_int, SCIN<cl_int,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<cl_ulong, SCIN<cl_ulong,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<cl_long, SCIN<cl_long,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<float, SCIN<float,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
+    error |= test<double, SCIN<double,2>, G, L>::run(device, context, queue, num_elements, "test_scinmin", scinmin_source);
+
+    // error |= test<cl_half, SCEX<cl_half,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<cl_uint, SCEX<cl_uint,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<cl_int, SCEX<cl_int,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<cl_ulong, SCEX<cl_ulong,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<cl_long, SCEX<cl_long,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<float, SCEX<float,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
+    error |= test<double, SCEX<double,0>, G, L>::run(device, context, queue, num_elements, "test_scexadd", scexadd_source);
+
+    // error |= test<cl_half, SCEX<cl_half,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<cl_uint, SCEX<cl_uint,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<cl_int, SCEX<cl_int,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<cl_ulong, SCEX<cl_ulong,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<cl_long, SCEX<cl_long,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<float, SCEX<float,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
+    error |= test<double, SCEX<double,1>, G, L>::run(device, context, queue, num_elements, "test_scexmax", scexmax_source);
+
+    // error |= test<cl_half, SCEX<cl_half,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<cl_uint, SCEX<cl_uint,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<cl_int, SCEX<cl_int,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<cl_ulong, SCEX<cl_ulong,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<cl_long, SCEX<cl_long,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<float, SCEX<float,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
+    error |= test<double, SCEX<double,2>, G, L>::run(device, context, queue, num_elements, "test_scexmin", scexmin_source);
+
+    error |= test<cl_int, IFP, G, L>::run(device, context, queue, num_elements, "test_ifp", ifp_source, NUM_LOC + 1);
+    return error;
+}
+
diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp
new file mode 100644
index 00000000..e18d4ab9
--- /dev/null
+++ b/test_conformance/subgroups/test_workitem.cpp
@@ -0,0 +1,252 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+struct get_test_data {
+    cl_uint subGroupSize;
+    cl_uint maxSubGroupSize;
+    cl_uint numSubGroups;
+    cl_uint enqNumSubGroups;
+    cl_uint subGroupId;
+    cl_uint subGroupLocalId;
+    bool operator==(get_test_data x) {
+        return subGroupSize == x.subGroupSize &&
+               maxSubGroupSize == x.maxSubGroupSize &&
+               numSubGroups == x.numSubGroups &&
+               subGroupId == x.subGroupId &&
+               subGroupLocalId == x.subGroupLocalId;
+    }
+};
+
+static const char * get_test_source =
+"#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
+"\n"
+"typedef struct {\n"
+"    uint subGroupSize;\n"
+"    uint maxSubGroupSize;\n"
+"    uint numSubGroups;\n"
+"    uint enqNumSubGroups;\n"
+"    uint subGroupId;\n"
+"    uint subGroupLocalId;\n"
+"} get_test_data;\n"
+"\n"
+"__kernel void get_test( __global get_test_data *outData )\n"
+"{\n"
+"    int gid = get_global_id( 0 );\n"
+"    outData[gid].subGroupSize = get_sub_group_size();\n"
+"    outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
+"    outData[gid].numSubGroups = get_num_sub_groups();\n"
+"    outData[gid].enqNumSubGroups = get_enqueued_num_sub_groups();\n"
+"    outData[gid].subGroupId = get_sub_group_id();\n"
+"    outData[gid].subGroupLocalId = get_sub_group_local_id();\n"
+"}";
+
+static int
+check_group(const get_test_data *result, int nw, cl_uint ensg, int maxwgs)
+{
+    int first = -1;
+    int last = -1;
+    int i, j;
+    cl_uint hit[32];
+
+    for (i=0; i<nw; ++i) {
+        if (result[i].subGroupId == 0 && result[i].subGroupLocalId == 0)
+            first = i;
+        if (result[i].subGroupId == result[0].numSubGroups-1 && result[i].subGroupLocalId == 0)
+            last = i;
+        if (first != -1 && last != -1)
+            break;
+    }
+
+    if (first == -1 || last == -1) {
+        log_error("ERROR: expected sub group id's are missing\n");
+        return -1;
+    }
+
+    // Check them
+    if (result[first].subGroupSize == 0) {
+        log_error("ERROR: get_sub_group_size() returned 0\n");
+        return -1;
+    }
+    if (result[first].maxSubGroupSize == 0 || result[first].maxSubGroupSize > maxwgs) {
+        log_error("ERROR: get_max_subgroup_size() returned incorrect result: %u\n", result[first].maxSubGroupSize);
+        return -1;
+    }
+    if (result[first].subGroupSize > result[first].maxSubGroupSize) {
+        log_error("ERROR: get_sub_group_size() > get_max_sub_group_size()\n");
+        return -1;
+    }
+    if (result[last].subGroupSize > result[first].subGroupSize) {
+        log_error("ERROR: last sub group larger than first sub group\n");
+        return -1;
+    }
+    if (result[first].numSubGroups == 0 || result[first].numSubGroups > ensg) {
+        log_error("ERROR: get_num_sub_groups() returned incorrect result:  %u \n", result[first].numSubGroups);
+        return -1;
+    }
+
+    memset(hit, 0, sizeof(hit));
+    for (i=0; i<nw; ++i) {
+        if (result[i].maxSubGroupSize != result[first].maxSubGroupSize ||
+            result[i].numSubGroups != result[first].numSubGroups) {
+            log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
+            return -1;
+        }
+        if (result[i].subGroupId >= result[first].numSubGroups) {
+            log_error("ERROR: get_sub_group_id() returned out of range value: %u\n", result[i].subGroupId);
+            return -1;
+        }
+        if (result[i].enqNumSubGroups != ensg) {
+            log_error("ERROR: get_enqueued_num_sub_groups() returned incorrect value: %u\n", result[i].enqNumSubGroups);
+            return -1;
+        }
+        if (result[first].numSubGroups > 1) {
+            if (result[i].subGroupId < result[first].numSubGroups-1) {
+                if (result[i].subGroupSize != result[first].subGroupSize) {
+                    log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
+                    return -1;
+                }
+                if (result[i].subGroupLocalId >= result[first].subGroupSize) {
+                    log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
+                    return -1;
+                }
+            } else {
+                if (result[i].subGroupSize != result[last].subGroupSize) {
+                    log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
+                    return -1;
+                }
+                if (result[i].subGroupLocalId >= result[last].subGroupSize) {
+                    log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
+                    return -1;
+                }
+            }
+        } else {
+            if (result[i].subGroupSize != result[first].subGroupSize) {
+                log_error("ERROR: unexpected variation in get_*_sub_group_*()\n");
+                return -1;
+            }
+            if (result[i].subGroupLocalId >= result[first].subGroupSize) {
+                log_error("ERROR: get_sub_group_local_id() returned out of bounds value: %u \n", result[i].subGroupLocalId);
+                return -1;
+            }
+        }
+
+        j = (result[first].subGroupSize + 31)/32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5);
+        if (j < sizeof(hit)/4) {
+            cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
+            if ((hit[j] & b) != 0) {
+                log_error("ERROR: get_sub_group_local_id() repeated a result in the same sub group\n");
+                return -1;
+            }
+            hit[j] |= b;
+        }
+    }
+
+    return 0;
+}
+
+int
+test_work_item_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    static const size_t lsize = 200;
+    int error;
+    int i, j, k, q, r, nw;
+    int maxwgs;
+    cl_uint ensg;
+    size_t global;
+    size_t local;
+    get_test_data result[lsize*6];
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper out;
+
+    error = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &get_test_source, "get_test", "-cl-std=CL2.0");
+    if (error != 0)
+        return error;
+
+    error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
+    if (error != 0)
+        return error;
+
+    maxwgs = (int)local;
+
+    // Limit it a bit so we have muliple work groups
+    // Ideally this will still be large enough to give us multiple subgroups
+    if (local > lsize)
+        local = lsize;
+
+    // Create our buffer
+    out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(result), NULL, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    // Set argument
+    error = clSetKernelArg(kernel, 0, sizeof(out), &out);
+    test_error(error, "clSetKernelArg failed");
+
+    global = local * 5;
+
+    // Make sure we have a flexible range
+    global += 3 * local / 4;
+
+    // Collect the data
+    memset((void *)&result, 0xf0, sizeof(result));
+
+    error = clEnqueueWriteBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
+    test_error(error, "clEnqueueNDRangeKernel failed");
+
+    error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result), (void *)&result, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed");
+
+    nw = (int)local;
+    ensg = result[0].enqNumSubGroups;
+
+    // Check the first group
+    error = check_group(result, nw, ensg, maxwgs);
+    if (error)
+        return error;
+
+    q = (int)global / nw;
+    r = (int)global % nw;
+
+    // Check the remaining work groups including the last if it is the same size
+    for (k=1; k<q; ++k) {
+        for (j=0; j<nw; ++j) {
+            i = k*nw + j;
+            if (!(result[i] == result[i-nw])) {
+                log_error("ERROR: sub group mapping is not identical for all work groups\n");
+                return -1;
+            }
+        }
+    }
+
+    // Check the last group if it wasn't the same size
+    if (r != 0) {
+        error = check_group(result + q*nw, r, ensg, maxwgs);
+        if (error)
+            return error;
+    }
+
+    return 0;
+}
+
diff --git a/test_conformance/submission_details_template.txt b/test_conformance/submission_details_template.txt
new file mode 100644
index 00000000..20554c83
--- /dev/null
+++ b/test_conformance/submission_details_template.txt
@@ -0,0 +1,145 @@
+##############################################################################
+# OpenCL 2.0 submission details template
+# $Id $
+# $URL $
+##############################################################################
+#
+# This form should be filled in and included in the conformance submission
+# as the file submission_details.txt
+#
+# Lines beginning with "#" are comments to assist in filling out the template.
+#
+##############################################################################
+
+##############################################################################
+#
+# Submission information (mostly section 7 and F.5 of Conformance Process 
+# Document) that needs to be copied onto the web submission form.
+#
+##############################################################################
+
+# Contact details.
+#
+Company:
+Contact Name:
+Contact Email:
+Contact Telephone:
+
+# Version of OpenCL specification being tested
+#
+OpenCL Version: 2.0
+
+# Statement of conformance listing each conformant product (at a specific
+# version) that is covered by this implementation. List each conformant product
+# on a separate line starting "Conformant Product:". The first in the list
+# is the product/device that the tests were actually run on.
+#
+# Each conformant product is likely to be one of:
+#  * a type of computer (e.g. Windows XP PC) plus the software product
+#    that contains this OpenCL implementation required to build and run
+#    the tests, plus the OpenCL device tested, for example
+#  * Conformant Product: Windows XP PC with Buzzleware OpenCL Library for Windows 0.2
+#    with OpenCL running on a BrandX 322 PCIe GPU
+#
+Conformant Product:
+
+# CPU type and instruction set architecture level required by the implementation
+# and method of running the tests (for example X86_64 with SSE 4_2), and/or model
+# number
+#
+CPU Type:
+
+# OS with version number, including GUI framework if used by OpenKODE
+# implementation, e.g. Microsoft Windows XP SP2, Symbian 9.1 UIQ 3.1
+#
+OS Name and Version Number:
+
+# Tested compute device info, fill-in all the below fields for Compute Device, as returned by the clGetDeviceInfo query
+#ex:
+# Compute Device Type: CL_DEVICE_TYPE_GPU                       (show enum returned by CL_DEVICE_TYPE)
+# Compute Device Name: Brand X 322 PICe GPU,                    (as returned by CL_DEVICE_NAME)
+# Compute Device Version: OpenCL 2.0 MyGPU(TM) 1234,            (as returned by CL_DEVICE_VERSION)
+# Compute Device Driver Version: OpenCL 2.0 build: 2.003.05.02  (as returned by CL_DRIVER_VERSION)
+# Compute Device OpenCL C Version: OpenCL 2.0 MyGPU(TM) 1234    (as returned by CL_DEVICE_OPENCL_C_VERSION)
+Compute Device Type:
+Compute Device Name:
+Compute Device Version:
+Compute Device Driver Version:
+Compute Device OpenCL C Version:
+
+# Platform information and version. These are the values returned from the 
+# clGetPlaformInfo API call.
+Platform Profile: 
+Platform Version: 
+
+##############################################################################
+#
+# Further submission information (mostly section 7 of Conformance Process
+# Document).
+#
+##############################################################################
+
+# Date of tests used, 8 digit string as given in filename.
+# 
+Tests date:
+
+
+# SHA-1 git identifier of the tests used from Gitlab (ex: 0a7770f98664a092c70d0a7d9a48d229b5fd8039)
+# 
+Test ID:
+
+# Date of "Khronos Conformance Process" that this submission
+# adheres to (as shown in the change history at the start of the document).
+#
+Conformance Process Document date:
+
+# Date of "OpenCL Conformance Process Details" attachment to
+# the Khronos Conformance Process document that this submission adheres to
+# (as shown in the change history at the start of the attachment).
+#
+OpenCL Conformance Process Attachment date:
+
+# List of Khronos Bugzilla bugs filed for test bugs that have been fixed
+# by modifying the tests used in this submission. Separate bug numbers with
+# commas; if none, specify "none". It is OK to use an existing bug describing
+# the same problem. In any case, after filing this conformance submission,
+# add a comment to the bug referencing the submission tracking number
+# giving justification for the test change.
+#
+Test Bugs:
+
+##############################################################################
+#
+# Tested device configuration
+#
+##############################################################################
+
+# Max compute units (CL_DEVICE_MAX_COMPUTE_UNITS)
+CL_DEVICE_MAX_COMPUTE_UNITS:
+
+# Max clock frequency (CL_DEVICE_MAX_CLOCK_FREQUENCY)
+CL_DEVICE_MAX_CLOCK_FREQUENCY:
+
+# Max memory allocation size (CL_DEVICE_MAX_MEM_ALLOC_SIZE)
+CL_DEVICE_MAX_MEM_ALLOC_SIZE:
+
+# Image support (CL_DEVICE_IMAGE_SUPPORT), 1/0
+CL_DEVICE_IMAGE_SUPPORT:
+
+# Global memory size (CL_DEVICE_GLOBAL_MEM_SIZE)
+CL_DEVICE_GLOBAL_MEM_SIZE:
+
+# Max constant buffer size (CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE)
+CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:
+
+# Local memory size (CL_DEVICE_LOCAL_MEM_SIZE)
+CL_DEVICE_LOCAL_MEM_SIZE:
+
+
+##############################################################################
+#
+# Extension Queries
+#
+##############################################################################
+# Show the list of supported extensions per device tested, each extension string on a separate line.
+
diff --git a/test_conformance/thread_dimensions/CMakeLists.txt b/test_conformance/thread_dimensions/CMakeLists.txt
new file mode 100644
index 00000000..e9022c93
--- /dev/null
+++ b/test_conformance/thread_dimensions/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(MODULE_NAME THREAD_DIMENSIONS)
+
+set(${MODULE_NAME}_SOURCES
+        main.c
+    test_thread_dimensions.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/threadTesting.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/typeWrappers.cpp
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+set_source_files_properties(
+        COMPILE_FLAGS -msse2)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/thread_dimensions/Jamfile b/test_conformance/thread_dimensions/Jamfile
new file mode 100644
index 00000000..243a5ade
--- /dev/null
+++ b/test_conformance/thread_dimensions/Jamfile
@@ -0,0 +1,16 @@
+project
+    : requirements
+#      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_thread_dimensions
+    : main.c
+      test_thread_dimensions.c
+    ;
+
+install dist
+    : test_thread_dimensions
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/thread_dimensions
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/thread_dimensions
+    ;
diff --git a/test_conformance/thread_dimensions/Makefile b/test_conformance/thread_dimensions/Makefile
new file mode 100644
index 00000000..331ce99a
--- /dev/null
+++ b/test_conformance/thread_dimensions/Makefile
@@ -0,0 +1,44 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		test_thread_dimensions.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+                  ../../test_common/harness/kernelHelpers.c \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/typeWrappers.cpp
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_thread_dimensions
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Os -Wshorten-64-to-32
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/thread_dimensions/main.c b/test_conformance/thread_dimensions/main.c
new file mode 100644
index 00000000..2353644a
--- /dev/null
+++ b/test_conformance/thread_dimensions/main.c
@@ -0,0 +1,64 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+
+basefn    basefn_list[] = {
+test_quick_thread_dimensions_1d_explicit_local,
+test_quick_thread_dimensions_2d_explicit_local,
+test_quick_thread_dimensions_3d_explicit_local,
+test_quick_thread_dimensions_1d_implicit_local,
+test_quick_thread_dimensions_2d_implicit_local,
+test_quick_thread_dimensions_3d_implicit_local,
+test_full_thread_dimensions_1d_explicit_local,
+test_full_thread_dimensions_2d_explicit_local,
+test_full_thread_dimensions_3d_explicit_local,
+test_full_thread_dimensions_1d_implicit_local,
+test_full_thread_dimensions_2d_implicit_local,
+test_full_thread_dimensions_3d_implicit_local,
+};
+
+const char *commonfn_names[] = {
+"quick_1d_explicit_local",
+"quick_2d_explicit_local",
+"quick_3d_explicit_local",
+"quick_1d_implicit_local",
+"quick_2d_implicit_local",
+"quick_3d_implicit_local",
+"full_1d_explicit_local",
+"full_2d_explicit_local",
+"full_3d_explicit_local",
+"full_1d_implicit_local",
+"full_2d_implicit_local",
+"full_3d_implicit_local",
+};
+
+ct_assert((sizeof(commonfn_names) / sizeof(commonfn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_commonfns = sizeof(commonfn_names) / sizeof(char *);
+
+int
+main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_commonfns, basefn_list, commonfn_names, false, false, 0 );
+}
+
+
+
+
diff --git a/test_conformance/thread_dimensions/procs.h b/test_conformance/thread_dimensions/procs.h
new file mode 100644
index 00000000..0ffb5caf
--- /dev/null
+++ b/test_conformance/thread_dimensions/procs.h
@@ -0,0 +1,37 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+extern const int kVectorSizeCount;
+
+extern int test_quick_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_quick_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_full_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_full_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_full_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_full_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_full_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_full_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
diff --git a/test_conformance/thread_dimensions/test_thread_dimensions.c b/test_conformance/thread_dimensions/test_thread_dimensions.c
new file mode 100644
index 00000000..1ef832b4
--- /dev/null
+++ b/test_conformance/thread_dimensions/test_thread_dimensions.c
@@ -0,0 +1,991 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#define ITERATIONS 4
+#define DEBUG 0
+
+// If the environment variable DO_NOT_LIMIT_THREAD_SIZE is not set, the test will limit the maximum total
+// global dimensions tested to this value.
+#define MAX_TOTAL_GLOBAL_THREADS_FOR_TEST (1<<24)
+int limit_size = 0;
+
+static int
+get_maximums(cl_kernel kernel, cl_context context,
+             size_t *max_workgroup_size_result,
+             size_t *max_width_result,
+             size_t *max_height_result,
+             cl_ulong *max_allcoation_result,
+             cl_ulong *max_physical_result) {
+    int err = 0;
+    cl_uint i;
+    cl_device_id *devices;
+
+    // Get all the devices in the device group
+    size_t num_devices_returned;
+    err = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &num_devices_returned);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clGetContextInfo() failed (%d).\n", err);
+        return -10;
+    }
+    devices = (cl_device_id *)malloc(num_devices_returned);
+    err = clGetContextInfo(context, CL_CONTEXT_DEVICES, num_devices_returned, devices, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clGetContextInfo() failed (%d).\n", err);
+        return -10;
+    }
+    num_devices_returned /= sizeof(cl_device_id);
+    if (num_devices_returned > 1) log_info("%d devices in device group.\n", (int)num_devices_returned);
+    if (num_devices_returned < 1) {
+        log_error("0 devices found for this kernel.\n");
+        return -1;
+    }
+
+    // Iterate over them and find the maximum local workgroup size and image size
+    size_t max_workgroup_size = 0;
+    size_t current_workgroup_size = 0;
+    size_t max_width = 0;
+    size_t current_width = 0;
+    size_t max_height = 0;
+    size_t current_height = 0;
+    cl_ulong max_allocation = 0;
+    cl_ulong current_allocation = 0;
+    cl_ulong max_physical = 0;
+    cl_ulong current_physical = 0;
+
+    for (i=0; i<num_devices_returned; i++) {
+        // Max workgroup size for this kernel on this device
+        err = clGetKernelWorkGroupInfo(kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof(current_workgroup_size), &current_workgroup_size, NULL);
+        if(err != CL_SUCCESS)
+        {
+            log_error("clGetKernelWorkGroupInfo() failed (%d) for device %d.\n", err, i);
+            return -10;
+        }
+        if (max_workgroup_size == 0)
+            max_workgroup_size = current_workgroup_size;
+        else if (current_workgroup_size < max_workgroup_size)
+            max_workgroup_size = current_workgroup_size;
+
+        // Max image width for this device
+        err = clGetDeviceInfo(devices[i], CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(current_width), &current_width, NULL);
+        if(err != CL_SUCCESS)
+        {
+            log_error("clGetDeviceConfigInfo(CL_DEVICE_IMAGE2D_MAX_WIDTH) failed (%d) for device %d.\n", err, i);
+            return -10;
+        }
+        if (max_width == 0)
+            max_width = current_width;
+        else if (current_width < max_width)
+            max_width = current_width;
+
+        // Max image height for this device
+        err = clGetDeviceInfo(devices[i], CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(current_height), &current_height, NULL);
+        if(err != CL_SUCCESS)
+        {
+            log_error("clGetDeviceConfigInfo(CL_DEVICE_IMAGE2D_MAX_HEIGHT) failed (%d) for device %d.\n", err, i);
+            return -10;
+        }
+        if (max_height == 0)
+            max_height = current_height;
+        else if (current_height < max_height)
+            max_height = current_height;
+
+        // Get the maximum allocation size
+        err = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(current_allocation), &current_allocation, NULL);
+        if(err != CL_SUCCESS)
+        {
+            log_error("clGetDeviceConfigInfo(CL_DEVICE_MAX_MEM_ALLOC_SIZE) failed (%d) for device %d.\n", err, i);
+            return -10;
+        }
+        if (max_allocation == 0)
+            max_allocation = current_allocation;
+        else if (current_allocation < max_allocation)
+            max_allocation = current_allocation;
+
+        // Get the maximum physical size
+        err = clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(current_physical), &current_physical, NULL);
+        if(err != CL_SUCCESS)
+        {
+            log_error("clGetDeviceConfigInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed (%d) for device %d.\n", err, i);
+            return -10;
+        }
+        if (max_physical == 0)
+            max_physical = current_physical;
+        else if (current_physical < max_allocation)
+            max_physical = current_physical;
+    }
+    free(devices);
+
+    log_info("Device maximums: max local workgroup size:%d, max width: %d, max height: %d, max allocation size: %g MB, max physical memory %gMB\n",
+             (int)max_workgroup_size, (int)max_width, (int)max_height, (double)(max_allocation/1024.0/1024.0), (double)(max_physical/1024.0/1024.0));
+    *max_workgroup_size_result = max_workgroup_size;
+    *max_width_result = max_width;
+    *max_height_result = max_height;
+    *max_allcoation_result = max_allocation;
+    *max_physical_result = max_physical;
+    return 0;
+}
+
+static const char *thread_dimension_kernel_code_atomic_long =
+"\n"
+"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
+"__kernel void test_thread_dimension_atomic(__global uint *dst, \n"
+"          uint final_x_size,   uint final_y_size,   uint final_z_size,\n"
+"          ulong start_address,  ulong end_address)\n"
+"{\n"
+"    uint error = 0;\n"
+"            if (get_global_id(0) >= final_x_size)\n"
+"                error = 64;\n"
+"            if (get_global_id(1) >= final_y_size)\n"
+"                error = 128;\n"
+"            if (get_global_id(2) >= final_z_size)\n"
+"                error = 256;\n"
+"\n"
+"        unsigned long t_address = (unsigned long)get_global_id(2)*(unsigned long)final_y_size*(unsigned long)final_x_size + \n"
+"                (unsigned long)get_global_id(1)*(unsigned long)final_x_size + (unsigned long)get_global_id(0);\n"
+"        if ((t_address >= start_address) && (t_address < end_address))\n"
+"                atom_add(&dst[t_address-start_address], 1u);\n"
+"        if (error)\n"
+"                atom_or(&dst[t_address-start_address], error);\n"
+"\n"
+"}\n"
+"\n"
+"__kernel void clear_memory(__global uint *dst)\n\n"
+"{\n"
+"            dst[get_global_id(0)] = 0;\n"
+"\n"
+"}\n";
+
+static const char *thread_dimension_kernel_code_not_atomic_long =
+"\n"
+"__kernel void test_thread_dimension_not_atomic(__global uint *dst, \n"
+"          uint final_x_size,   uint final_y_size,   uint final_z_size,\n"
+"          ulong start_address,  ulong end_address)\n"
+"{\n"
+"    uint error = 0;\n"
+"            if (get_global_id(0) >= final_x_size)\n"
+"                error = 64;\n"
+"            if (get_global_id(1) >= final_y_size)\n"
+"                error = 128;\n"
+"            if (get_global_id(2) >= final_z_size)\n"
+"                error = 256;\n"
+"\n"
+"        unsigned long t_address = (unsigned long)get_global_id(2)*(unsigned long)final_y_size*(unsigned long)final_x_size + \n"
+"                (unsigned long)get_global_id(1)*(unsigned long)final_x_size + (unsigned long)get_global_id(0);\n"
+"        if ((t_address >= start_address) && (t_address < end_address))\n"
+"                dst[t_address-start_address]++;\n"
+"        if (error)\n"
+"                dst[t_address-start_address]|=error;\n"
+"\n"
+"}\n"
+"\n"
+"__kernel void clear_memory(__global uint *dst)\n\n"
+"{\n"
+"            dst[get_global_id(0)] = 0;\n"
+"\n"
+"}\n";
+
+static const char *thread_dimension_kernel_code_atomic_not_long =
+"\n"
+"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n"
+"__kernel void test_thread_dimension_atomic(__global uint *dst, \n"
+"         uint final_x_size,   uint final_y_size,   uint final_z_size,\n"
+"         uint start_address,  uint end_address)\n"
+"{\n"
+"    uint error = 0;\n"
+"           if (get_global_id(0) >= final_x_size)\n"
+"               error = 64;\n"
+"           if (get_global_id(1) >= final_y_size)\n"
+"               error = 128;\n"
+"           if (get_global_id(2) >= final_z_size)\n"
+"               error = 256;\n"
+"\n"
+"       unsigned int t_address = (unsigned int)get_global_id(2)*(unsigned int)final_y_size*(unsigned int)final_x_size + \n"
+"               (unsigned int)get_global_id(1)*(unsigned int)final_x_size + (unsigned int)get_global_id(0);\n"
+"       if ((t_address >= start_address) && (t_address < end_address))\n"
+"               atom_add(&dst[t_address-start_address], 1u);\n"
+"       if (error)\n"
+"               atom_or(&dst[t_address-start_address], error);\n"
+"\n"
+"}\n"
+"\n"
+"__kernel void clear_memory(__global uint *dst)\n\n"
+"{\n"
+"           dst[get_global_id(0)] = 0;\n"
+"\n"
+"}\n";
+
+static const char *thread_dimension_kernel_code_not_atomic_not_long =
+"\n"
+"__kernel void test_thread_dimension_not_atomic(__global uint *dst, \n"
+"         uint final_x_size,   uint final_y_size,   uint final_z_size,\n"
+"         uint start_address,  uint end_address)\n"
+"{\n"
+"    uint error = 0;\n"
+"           if (get_global_id(0) >= final_x_size)\n"
+"               error = 64;\n"
+"           if (get_global_id(1) >= final_y_size)\n"
+"               error = 128;\n"
+"           if (get_global_id(2) >= final_z_size)\n"
+"               error = 256;\n"
+"\n"
+"       unsigned int t_address = (unsigned int)get_global_id(2)*(unsigned int)final_y_size*(unsigned int)final_x_size + \n"
+"               (unsigned int)get_global_id(1)*(unsigned int)final_x_size + (unsigned int)get_global_id(0);\n"
+"       if ((t_address >= start_address) && (t_address < end_address))\n"
+"               dst[t_address-start_address]++;\n"
+"       if (error)\n"
+"               dst[t_address-start_address]|=error;\n"
+"\n"
+"}\n"
+"\n"
+"__kernel void clear_memory(__global uint *dst)\n\n"
+"{\n"
+"           dst[get_global_id(0)] = 0;\n"
+"\n"
+"}\n";
+
+
+
+static size_t max_workgroup_size_for_clear_kernel;
+cl_kernel clear_memory_kernel = 0;
+
+
+char dim_str[128];
+char *
+print_dimensions(size_t x, size_t y, size_t z, cl_uint dim) {
+    // Not thread safe...
+    if (dim == 1) {
+        snprintf(dim_str, 128, "[%d]", (int)x);
+    } else if (dim == 2) {
+        snprintf(dim_str, 128, "[%d x %d]", (int)x, (int)y);
+    } else if (dim == 3) {
+        snprintf(dim_str, 128, "[%d x %d x %d]", (int)x, (int)y, (int)z);
+    } else {
+        snprintf(dim_str, 128, "INVALID DIM: %d", dim);
+    }
+    return dim_str;
+}
+
+char dim_str2[128];
+char *
+print_dimensions2(size_t x, size_t y, size_t z, cl_uint dim) {
+    // Not thread safe...
+    if (dim == 1) {
+        snprintf(dim_str2, 128, "[%d]", (int)x);
+    } else if (dim == 2) {
+        snprintf(dim_str2, 128, "[%d x %d]", (int)x, (int)y);
+    } else if (dim == 3) {
+        snprintf(dim_str2, 128, "[%d x %d x %d]", (int)x, (int)y, (int)z);
+    } else {
+        snprintf(dim_str2, 128, "INVALID DIM: %d", dim);
+    }
+    return dim_str2;
+}
+
+
+/*
+ This tests thread dimensions by executing a kernel across a range of dimensions.
+ Each kernel instance does an atomic write into a specific location in a buffer to
+ ensure that the correct dimensions are run. To handle large dimensions, the kernel
+ masks its execution region internally. This allows a small (128MB) buffer to be used
+ for very large executions by running the kernel multiple times.
+ */
+int run_test(cl_context context, cl_command_queue queue, cl_kernel kernel, cl_mem array, cl_uint memory_size, cl_uint dimensions,
+             cl_uint final_x_size, cl_uint final_y_size, cl_uint final_z_size,
+             cl_uint local_x_size, cl_uint local_y_size, cl_uint local_z_size,
+             int explict_local)
+{
+    cl_uint errors = 0;
+    size_t global_size[3], local_size[3];
+    global_size[0] = final_x_size;        local_size[0] = local_x_size;
+    global_size[1] = final_y_size;        local_size[1] = local_y_size;
+    global_size[2] = final_z_size;        local_size[2] = local_z_size;
+
+    cl_ulong start_valid_memory_address = 0;
+    cl_ulong end_valid_memory_address = memory_size;
+    cl_ulong last_memory_address = (cl_ulong)final_x_size*(cl_ulong)final_y_size*(cl_ulong)final_z_size*sizeof(cl_uint);
+    if (end_valid_memory_address > last_memory_address)
+        end_valid_memory_address = last_memory_address;
+
+    int number_of_iterations_required = (int)ceil((double)last_memory_address/(double)memory_size);
+    log_info("\t\tTest requires %gMB (%d test iterations using an allocation of %gMB).\n",
+             (double)last_memory_address/(1024.0*1024.0), number_of_iterations_required, (double)memory_size/(1024.0*1024.0));
+    //log_info("Last memory address: %llu, memory_size: %llu\n", last_memory_address, memory_size);
+
+    while (end_valid_memory_address <= last_memory_address)
+    {
+        int err;
+        // Clear the memory
+        //    // Manually -- much slower on the GPU
+        //    memset((void*)data, 0, memory_size);
+        //    err = clWriteArray(context, array, 0, 0, memory_size, data, NULL);
+        //    if (err != CL_SUCCESS) {
+        //      log_error("Failed to write to data array: %d\n", err);
+        //      free(data);
+        //      return -4;
+        //    }
+        // In a kernel
+        err = clSetKernelArg(clear_memory_kernel, 0, sizeof(array), &array);
+        if (err != CL_SUCCESS) {
+            print_error( err, "Failed to set args for clear_memory_kernel to clear the memory between runs");
+            return -4;
+        }
+        size_t global[3] = {1,0,0};
+        global[0] = (cl_uint)(memory_size/sizeof(cl_uint));
+        size_t local[3] = {1,0,0};
+        local[0] = max_workgroup_size_for_clear_kernel;
+        while( global[0] % local[0] ) //make sure that global[0] is evenly divided by local[0]. Will stop at 1 in worst case.
+            local[0]--;
+        err = clEnqueueNDRangeKernel(queue, clear_memory_kernel, 1, NULL, global, local, 0, NULL, NULL);
+        if (err != CL_SUCCESS) {
+            print_error( err, "Failed to execute clear_memory_kernel to clear the memory between runs");
+            return -4;
+        }
+
+        cl_ulong start_valid_index = start_valid_memory_address/sizeof(cl_uint);
+        cl_ulong end_valid_index = end_valid_memory_address/sizeof(cl_uint);
+
+        cl_uint start_valid_index_int = (cl_uint) start_valid_index;
+        cl_uint end_valid_index_int   = (cl_uint) end_valid_index;
+
+        // Set the arguments
+        err = clSetKernelArg(kernel, 0, sizeof(array), &array);
+        err |= clSetKernelArg(kernel, 1, sizeof(final_x_size), &final_x_size);
+        err |= clSetKernelArg(kernel, 2, sizeof(final_y_size), &final_y_size);
+        err |= clSetKernelArg(kernel, 3, sizeof(final_z_size), &final_z_size);
+        if (gHasLong)
+        {
+            err |= clSetKernelArg(kernel, 4, sizeof(start_valid_index), &start_valid_index);
+            err |= clSetKernelArg(kernel, 5, sizeof(end_valid_index), &end_valid_index);
+        }
+        else
+        {
+            err |= clSetKernelArg(kernel, 4, sizeof(start_valid_index_int), &start_valid_index_int);
+            err |= clSetKernelArg(kernel, 5, sizeof(end_valid_index_int), &end_valid_index_int);
+        }
+
+        if (err != CL_SUCCESS) {
+            print_error( err, "Failed to set arguments.");
+            return -3;
+        }
+
+
+        // Execute the kernel
+        if (explict_local == 0) {
+            err = clEnqueueNDRangeKernel(queue, kernel, dimensions, NULL, global_size, NULL, 0, NULL, NULL);
+            if (DEBUG) log_info("\t\t\tExecuting kernel with global %s, NULL local, %d dim, start address %llu, end address %llu.\n",
+                                print_dimensions(global_size[0], global_size[1], global_size[2], dimensions),
+                                dimensions, start_valid_memory_address, end_valid_memory_address);
+        } else {
+            err = clEnqueueNDRangeKernel(queue, kernel, dimensions, NULL, global_size, local_size, 0, NULL, NULL);
+            if (DEBUG) log_info("\t\t\tExecuting kernel with global %s, local %s, %d dim, start address %llu, end address %llu.\n",
+                                print_dimensions(global_size[0], global_size[1], global_size[2], dimensions), print_dimensions2(local_size[0], local_size[1], local_size[2], dimensions),
+                                dimensions, start_valid_memory_address, end_valid_memory_address);
+        }
+        if (err == CL_OUT_OF_RESOURCES) {
+            log_info("WARNING: kernel reported CL_OUT_OF_RESOURCES, indicating the global dimensions are too large. Skipping this size.\n");
+            return 0;
+        }
+        if (err != CL_SUCCESS) {
+            print_error( err, "Failed to execute kernel\n");
+            return -3;
+        }
+
+        void* mapped = clEnqueueMapBuffer(queue, array, CL_TRUE, CL_MAP_READ, 0, memory_size, 0, NULL, NULL, &err );
+        if (err != CL_SUCCESS) {
+            print_error( err, "Failed to map results\n");
+            return -4;
+        }
+        cl_uint* data = (cl_uint*)mapped;
+
+        // Verify the data
+        cl_uint i;
+        cl_uint last_address = (cl_uint)(end_valid_memory_address - start_valid_memory_address)/(cl_uint)sizeof(cl_uint);
+        for (i=0; i<last_address; i++) {
+            if (i < last_address) {
+                if (data[i] != 1) {
+                    errors++;
+                    //        log_info("%d expected 1 got %d\n", i, data[i]);
+                }
+            } else {
+                if (data[i] != 0) {
+                    errors++;
+                    log_info("%d expected 0 got %d\n", i, data[i]);
+                }
+            }
+        }
+
+        err = clEnqueueUnmapMemObject(queue, array, mapped, 0, NULL, NULL );
+        if (err != CL_SUCCESS) {
+            print_error( err, "Failed to unmap results\n");
+            return -4;
+        }
+
+        err = clFlush(queue);
+        if (err != CL_SUCCESS) {
+            print_error( err, "Failed to flush\n");
+            return -4;
+        }
+
+        // Increment the addresses
+        if (end_valid_memory_address == last_memory_address)
+            break;
+        start_valid_memory_address += memory_size;
+        end_valid_memory_address += memory_size;
+        if (end_valid_memory_address > last_memory_address)
+            end_valid_memory_address = last_memory_address;
+    }
+
+    if (errors)
+        log_error("%d errors.\n", errors);
+    return errors;
+}
+
+
+
+
+static cl_uint max_x_size=1, min_x_size=1, max_y_size=1, min_y_size=1, max_z_size=1, min_z_size=1;
+
+static void set_min(cl_uint *x, cl_uint *y, cl_uint *z) {
+    if (*x < min_x_size)
+        *x = min_x_size;
+    if (*y < min_y_size)
+        *y = min_y_size;
+    if (*z < min_z_size)
+        *z = min_z_size;
+    if (*x > max_x_size)
+        *x = max_x_size;
+    if (*y > max_y_size)
+        *y = max_y_size;
+    if (*z > max_z_size)
+        *z = max_z_size;
+}
+
+
+int
+test_thread_dimensions(cl_device_id device, cl_context context, cl_command_queue queue, cl_uint dimensions, cl_uint min_dim, cl_uint max_dim, cl_uint quick_test, cl_uint size_increase_per_iteration, int explicit_local) {
+    cl_mem array;
+    cl_program program;
+    cl_kernel kernel;
+    int err;
+    cl_uint memory_size, max_memory_size;
+    size_t max_local_workgroup_size[3];
+    cl_uint device_max_dimensions;
+    int use_atomics = 1;
+    MTdata d;
+
+    if (getenv("CL_WIMPY_MODE") && !quick_test) {
+      log_info("CL_WIMPY_MODE enabled, skipping test\n");
+      return 0;
+    }
+
+    // Unconditionally test larger sizes for CL 1.1
+    log_info("Testing large global dimensions.\n");
+    limit_size = 0;
+
+    /* Check if atomics are supported. */
+    if (!is_extension_available(device, "cl_khr_global_int32_base_atomics")) {
+        log_info("WARNING: Base atomics not supported (cl_khr_global_int32_base_atomics). Test will not be guaranteed to catch overlaping thread dimensions.\n");
+        use_atomics = 0;
+    }
+
+    if (quick_test)
+        log_info("WARNING: Running quick test. This will only test the base dimensions (power of two) and base-1 with all local threads fixed in one dim.\n");
+
+    // Verify that we can test this many dimensions
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(device_max_dimensions), &device_max_dimensions, NULL);
+    test_error(err, "clGetDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
+
+    if (dimensions > device_max_dimensions) {
+        log_info("Can not test %d dimensions when device only supports %d.\n", dimensions, device_max_dimensions);
+        return 0;
+    }
+
+    log_info("Setting random seed to 0.\n");
+
+    if (gHasLong) {
+        if (use_atomics) {
+            err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_atomic_long, "test_thread_dimension_atomic" );
+        } else {
+            err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_not_atomic_long, "test_thread_dimension_not_atomic" );
+        }
+    } else {
+        if (use_atomics) {
+            err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_atomic_not_long, "test_thread_dimension_atomic" );
+        } else {
+            err = create_single_kernel_helper( context, &program, &kernel, 1, &thread_dimension_kernel_code_not_atomic_not_long, "test_thread_dimension_not_atomic" );
+        }
+    }
+    test_error( err, "Unable to create testing kernel" );
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+
+    clear_memory_kernel = clCreateKernel(program, "clear_memory", &err);
+    if (err)
+    {
+        log_error("clCreateKernel failed: %d\n", err);
+        return -1;
+    }
+
+    // Get the maximum sizes supported by this device
+    size_t max_workgroup_size = 0;
+    size_t max_width = 0;
+    size_t max_height = 0;
+    cl_ulong max_allocation = 0;
+    cl_ulong max_physical = 0;
+    int found_size = 0;
+
+    err = get_maximums(kernel, context,
+                       &max_workgroup_size, &max_width, &max_height, &max_allocation, &max_physical);
+
+    err = get_maximums(clear_memory_kernel, context,
+                       &max_workgroup_size_for_clear_kernel, &max_width, &max_height, &max_allocation, &max_physical);
+
+    // Make sure we don't try to allocate more than half the physical memory present.
+    if (max_allocation > (max_physical/2)) {
+        log_info("Limiting max allocation to half of the maximum physical memory (%gMB of %gMB physical).\n",
+                 (max_physical/2/(1024.0*1024.0)), (max_physical/(1024.0*1024.0)));
+        max_allocation = max_physical/2;
+    }
+
+    // Limit the maximum we'll allocate for this test to 512 to be reasonable.
+    if (max_allocation > 1024*1024*512) {
+        log_info("Limiting max allocation to 512MB from device maximum allocation of %gMB.\n", (max_allocation/1024.0/1024.0));
+        max_allocation = 1024*1024*512;
+    }
+
+    max_memory_size = (cl_uint)(max_allocation);
+    if (max_memory_size > 512*1024*1024)
+        max_memory_size = 512*1024*1024;
+    memory_size = max_memory_size;
+
+    log_info("Memory allocation size to use is %gMB, max workgroup size is %d.\n", max_memory_size/(1024.0*1024.0), (int)max_workgroup_size);
+
+    while (!found_size && memory_size >= max_memory_size/8) {
+        array = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), memory_size, NULL, &err);
+        if (err == CL_MEM_OBJECT_ALLOCATION_FAILURE || err == CL_OUT_OF_HOST_MEMORY) {
+            memory_size -= max_memory_size/16;
+            continue;
+        }
+        if (err) {
+            print_error( err, "clCreateBuffer failed");
+            return -1;
+        }
+        found_size = 1;
+    }
+
+    if (!found_size) {
+        log_error("Failed to find a working size greater than 1/8th of the reported allocation size.\n");
+        return -1;
+    }
+
+    if (memory_size < max_memory_size) {
+        log_info("Note: failed to allocate %gMB, using %gMB instead.\n", max_memory_size/(1024.0*1024.0), memory_size/(1024.0*1024.0));
+    }
+
+    int errors = 0;
+    // Each dimension's size is multiplied by this amount on each iteration.
+    //  uint size_increase_per_iteration = 4;
+    // 1 test at the specified size
+    // 2 tests with each dimensions +/- 1
+    // 2 tests with all dimensions +/- 1
+    // 2 random tests
+    cl_uint tests_per_size = 1 + 2*dimensions + 2 + 2;
+
+    // 1 test with 1 as the local threads in each dimensions
+    // 1 test with all the local threads in each dimension
+    // 2 random tests
+    cl_uint local_tests_per_size = 1 + dimensions + 2;
+    if (explicit_local == 0)
+        local_tests_per_size = 1;
+
+    max_x_size=1, min_x_size=1, max_y_size=1, min_y_size=1, max_z_size=1, min_z_size=1;
+
+    if (dimensions > 3) {
+        log_error("Invalid dimensions: %d\n", dimensions);
+        return -1;
+    }
+    max_x_size = max_dim;
+    min_x_size = min_dim;
+    if (dimensions > 1) {
+        max_y_size = max_dim;
+        min_y_size = min_dim;
+    }
+    if (dimensions > 2) {
+        max_z_size = max_dim;
+        min_z_size = min_dim;
+    }
+
+    log_info("Testing with dimensions up to %s.\n", print_dimensions(max_x_size, max_y_size, max_z_size, dimensions));
+    cl_uint x_size, y_size, z_size;
+
+    d = init_genrand( gRandomSeed );
+    z_size = min_z_size;
+    while (z_size <= max_z_size) {
+        y_size = min_y_size;
+        while (y_size <= max_y_size) {
+            x_size = min_x_size;
+            while (x_size <= max_x_size) {
+
+                log_info("Base test size %s:\n", print_dimensions(x_size, y_size, z_size, dimensions));
+
+                cl_uint sub_test;
+                cl_uint final_x_size, final_y_size, final_z_size;
+                for (sub_test = 0; sub_test < tests_per_size; sub_test++) {
+                    final_x_size = x_size;
+                    final_y_size = y_size;
+                    final_z_size = z_size;
+
+                    if (sub_test == 0) {
+                        if (DEBUG) log_info("\tTesting with base dimensions %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
+                    } else if (quick_test) {
+                        // If we are in quick mode just do 1 run with x-1, y-1, and z-1.
+                        if (sub_test > 1)
+                            break;
+                        final_x_size--;
+                        final_y_size--;
+                        final_z_size--;
+                        set_min(&final_x_size, &final_y_size, &final_z_size);
+                        if (DEBUG) log_info("\tTesting with all base dimensions - 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
+                    } else if (sub_test <= dimensions*2) {
+                        int dim_to_change = (sub_test-1)%dimensions;
+                        //log_info ("dim_to_change: %d (sub_test:%d) dimensions %d\n", dim_to_change,sub_test, dimensions);
+                        int up_down = (sub_test > dimensions) ? 0 : 1;
+
+                        if (dim_to_change == 0) {
+                            final_x_size += (up_down) ? -1 : +1;
+                        } else if (dim_to_change == 1) {
+                            final_y_size += (up_down) ? -1 : +1;
+                        } else if (dim_to_change == 2) {
+                            final_z_size += (up_down) ? -1 : +1;
+                        } else {
+                            log_error("Invalid dim_to_change: %d\n", dim_to_change);
+                            return -1;
+                        }
+                        set_min(&final_x_size, &final_y_size, &final_z_size);
+                        if (DEBUG) log_info("\tTesting with one base dimension +/- 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
+                    } else if (sub_test == (dimensions*2+1)) {
+                        if (dimensions == 1)
+                            continue;
+                        final_x_size--;
+                        final_y_size--;
+                        final_z_size--;
+                        set_min(&final_x_size, &final_y_size, &final_z_size);
+                        if (DEBUG) log_info("\tTesting with all base dimensions - 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
+                    } else if (sub_test == (dimensions*2+2)) {
+                        if (dimensions == 1)
+                            continue;
+                        final_x_size++;
+                        final_y_size++;
+                        final_z_size++;
+                        set_min(&final_x_size, &final_y_size, &final_z_size);
+                        if (DEBUG) log_info("\tTesting with all base dimensions + 1 %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
+                    } else {
+                        final_x_size = (int)get_random_float(0, (x_size/size_increase_per_iteration), d)+x_size/size_increase_per_iteration;
+                        final_y_size = (int)get_random_float(0, (y_size/size_increase_per_iteration), d)+y_size/size_increase_per_iteration;
+                        final_z_size = (int)get_random_float(0, (z_size/size_increase_per_iteration), d)+z_size/size_increase_per_iteration;
+                        set_min(&final_x_size, &final_y_size, &final_z_size);
+                        if (DEBUG) log_info("\tTesting with random dimensions %s.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
+                    }
+
+                    if (limit_size && final_x_size*final_y_size*final_z_size >= MAX_TOTAL_GLOBAL_THREADS_FOR_TEST) {
+                        log_info("Skipping size %s as it exceeds max test threads of %d.\n", print_dimensions(final_x_size, final_y_size, final_z_size, dimensions), MAX_TOTAL_GLOBAL_THREADS_FOR_TEST);
+                        continue;
+                    }
+
+                    cl_uint local_test;
+                    cl_uint local_x_size, local_y_size, local_z_size;
+                    cl_uint previous_local_x_size=0, previous_local_y_size=0, previous_local_z_size=0;
+                    for (local_test = 0; local_test < local_tests_per_size; local_test++) {
+
+                        local_x_size = 1;
+                        local_y_size = 1;
+                        local_z_size = 1;
+
+                        if (local_test == 0) {
+                        } else if (local_test <= dimensions) {
+                            int dim_to_change = (local_test-1)%dimensions;
+                            if (dim_to_change == 0) {
+                                local_x_size = (cl_uint)max_workgroup_size;
+                            } else if (dim_to_change == 1) {
+                                local_y_size = (cl_uint)max_workgroup_size;
+                            } else if (dim_to_change == 2) {
+                                local_z_size = (cl_uint)max_workgroup_size;
+                            } else {
+                                log_error("Invalid dim_to_change: %d\n", dim_to_change);
+                                free_mtdata(d);
+                                return -1;
+                            }
+                        } else {
+                            local_x_size = (int)get_random_float(1, (int)max_workgroup_size, d);
+                            while ((local_x_size > 1) && (final_x_size%local_x_size != 0))
+                                local_x_size--;
+                            int remainder = (int)floor((double)max_workgroup_size/local_x_size);
+                            // Evenly prefer dimensions 2 and 1 first
+                            if (local_test % 2) {
+                                if (dimensions > 1) {
+                                    local_y_size = (int)get_random_float(1, (int)remainder, d);
+                                    while ((local_y_size > 1) && (final_y_size%local_y_size != 0))
+                                        local_y_size--;
+                                    remainder = (int)floor((double)remainder/local_y_size);
+                                }
+                                if (dimensions > 2) {
+                                    local_z_size = (int)get_random_float(1, (int)remainder, d);
+                                    while ((local_z_size > 1) && (final_z_size%local_z_size != 0))
+                                        local_z_size--;
+                                }
+                            } else {
+                                if (dimensions > 2) {
+                                    local_z_size = (int)get_random_float(1, (int)remainder, d);
+                                    while ((local_z_size > 1) && (final_z_size%local_z_size != 0))
+                                        local_z_size--;
+                                    remainder = (int)floor((double)remainder/local_z_size);
+                                }
+                                if (dimensions > 1) {
+                                    local_y_size = (int)get_random_float(1, (int)remainder, d);
+                                    while ((local_y_size > 1) && (final_y_size%local_y_size != 0))
+                                        local_y_size--;
+                                }
+                            }
+                        }
+
+                        // Put all the threads in one dimension to speed up the test in quick mode.
+                        if (quick_test) {
+                            local_y_size = 1;
+                            local_z_size = 1;
+                            local_x_size = 1;
+                            if (final_z_size > final_y_size && final_z_size > final_x_size)
+                                local_z_size = (cl_uint)max_workgroup_size;
+                            else if (final_y_size > final_x_size)
+                                local_y_size = (cl_uint)max_workgroup_size;
+                            else
+                                local_x_size = (cl_uint)max_workgroup_size;
+                        }
+
+                        if (local_x_size > max_local_workgroup_size[0])
+                            local_x_size = (int)max_local_workgroup_size[0];
+                        if (dimensions > 1 && local_y_size > max_local_workgroup_size[1])
+                            local_y_size = (int)max_local_workgroup_size[1];
+                        if (dimensions > 2 && local_z_size > max_local_workgroup_size[2])
+                            local_z_size = (int)max_local_workgroup_size[2];
+
+                        // Cleanup the local dimensions
+                        while ((local_x_size > 1) && (final_x_size%local_x_size != 0))
+                            local_x_size--;
+                        while ((local_y_size > 1) && (final_y_size%local_y_size != 0))
+                            local_y_size--;
+                        while ((local_z_size > 1) && (final_z_size%local_z_size != 0))
+                            local_z_size--;
+                        if ((previous_local_x_size == local_x_size) && (previous_local_y_size == local_y_size) && (previous_local_z_size == local_z_size))
+                            continue;
+
+                        if (explicit_local == 0) {
+                            local_x_size = 0;
+                            local_y_size = 0;
+                            local_z_size = 0;
+                        }
+
+                        if (DEBUG) log_info("\t\tTesting local size %s.\n", print_dimensions(local_x_size, local_y_size, local_z_size, dimensions));
+
+                        if (explicit_local == 0) {
+                            log_info("\tTesting global %s local [NULL]...\n",
+                                     print_dimensions(final_x_size, final_y_size, final_z_size, dimensions));
+                        } else {
+                            log_info("\tTesting global %s local %s...\n",
+                                     print_dimensions(final_x_size, final_y_size, final_z_size, dimensions),
+                                     print_dimensions2(local_x_size, local_y_size, local_z_size, dimensions));
+                        }
+
+                        // Avoid running with very small local sizes on very large global sizes
+                        cl_uint total_local_size = local_x_size * local_y_size * local_z_size;
+                        long total_global_size = final_x_size * final_y_size * final_z_size;
+                        if (total_local_size < max_workgroup_size) {
+                            if (total_global_size > 16384*16384) {
+                                if (total_local_size < 64) {
+                                    log_info("Skipping test as local_size is small and it will take a long time.\n");
+                                    continue;
+                                }
+                            }
+                        }
+
+                        err = run_test(context, queue, kernel, array, memory_size, dimensions,
+                                       final_x_size, final_y_size, final_z_size,
+                                       local_x_size, local_y_size, local_z_size, explicit_local);
+
+                        // If we failed to execute, then return so we don't crash.
+                        if (err < 0) {
+                            clReleaseMemObject(array);
+                            clReleaseKernel(kernel);
+                            clReleaseProgram(program);
+                            free_mtdata(d);
+                            return -1;
+                        }
+
+                        // Otherwise, if we had errors add them up.
+                        if (err) {
+                            log_error("Test global %s local %s failed.\n",
+                                      print_dimensions(final_x_size, final_y_size, final_z_size, dimensions),
+                                      print_dimensions2(local_x_size, local_y_size, local_z_size, dimensions));
+                            errors++;
+                            clReleaseMemObject(array);
+                            clReleaseKernel(kernel);
+                            clReleaseKernel(clear_memory_kernel);
+                            clReleaseProgram(program);
+                            free_mtdata(d);
+                            return -1;
+                        }
+
+
+                        previous_local_x_size = local_x_size;
+                        previous_local_y_size = local_y_size;
+                        previous_local_z_size = local_z_size;
+
+                        // Only test one config in quick mode.
+                        if (quick_test)
+                            break;
+                    } // local_test size
+                } // sub_test
+                  // Increment the x_size
+                if (x_size == max_x_size)
+                    break;
+                x_size *= size_increase_per_iteration;
+                if (x_size > max_x_size)
+                    x_size = max_x_size;
+            } // x_size
+              // Increment the y_size
+            if (y_size == max_y_size)
+                break;
+            y_size *= size_increase_per_iteration;
+            if (y_size > max_y_size)
+                y_size = max_y_size;
+        } // y_size
+          // Increment the z_size
+        if (z_size == max_z_size)
+            break;
+        z_size *= size_increase_per_iteration;
+        if (z_size > max_z_size)
+            z_size = max_z_size;
+    } // z_size
+
+
+    free_mtdata(d);
+    clReleaseMemObject(array);
+    clReleaseKernel(kernel);
+    clReleaseKernel(clear_memory_kernel);
+    clReleaseProgram(program);
+    if (errors)
+        log_error("%d total errors.\n", errors);
+    return errors;
+
+}
+
+#define QUICK 1
+#define FULL 0
+
+int
+test_quick_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  1, 1, 65536*512, QUICK, 4, 1);
+}
+
+
+int
+test_quick_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  2, 1, 65536/4, QUICK, 16, 1);
+}
+
+int
+test_quick_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  3, 1, 1024, QUICK, 32, 1);
+}
+
+
+int
+test_quick_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  1, 1, 65536*256, QUICK, 4, 0);
+}
+
+
+int
+test_quick_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  2, 1, 65536/4, QUICK, 16, 0);
+}
+
+int
+test_quick_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  3, 1, 1024, QUICK, 32, 0);
+}
+
+
+
+
+
+int
+test_full_thread_dimensions_1d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  1, 1, 65536*512, FULL, 4, 1);
+}
+
+
+int
+test_full_thread_dimensions_2d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  2, 1, 65536/4, FULL, 16, 1);
+}
+
+int
+test_full_thread_dimensions_3d_explicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  3, 1, 1024, FULL, 32, 1);
+}
+
+
+int
+test_full_thread_dimensions_1d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  1, 1, 65536*256, FULL, 4, 0);
+}
+
+
+int
+test_full_thread_dimensions_2d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  2, 1, 65536/4, FULL, 16, 0);
+}
+
+int
+test_full_thread_dimensions_3d_implicit_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
+    return test_thread_dimensions(deviceID, context, queue,
+                                  3, 1, 1024, FULL, 32, 0);
+}
+
diff --git a/test_conformance/vec_align/CMakeLists.txt b/test_conformance/vec_align/CMakeLists.txt
new file mode 100644
index 00000000..74f41e7c
--- /dev/null
+++ b/test_conformance/vec_align/CMakeLists.txt
@@ -0,0 +1,22 @@
+set(MODULE_NAME VECALIGN)
+
+set(${MODULE_NAME}_SOURCES
+    globals.c
+        main.c
+        structs.c
+        test_vec_align.c
+        type_replacer.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/conversions.c
+        ../../test_common/harness/parseParameters.cpp
+)
+
+set_source_files_properties(
+        COMPILE_FLAGS -msse2)
+
+include(../CMakeCommon.txt)
+
diff --git a/test_conformance/vec_align/Jamfile b/test_conformance/vec_align/Jamfile
new file mode 100644
index 00000000..cc92cb44
--- /dev/null
+++ b/test_conformance/vec_align/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+      -<library>/harness//harness <use>/harness//harness
+#      <toolset>gcc:<cflags>-xc
+      <toolset>msvc:<cflags>"/TP" 
+    ;
+
+exe test_step
+    : test_step.c
+      globals.c
+      main.c
+      /harness//mt19937.c
+    ;
+
+install dist
+    : test_step
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/vec_step
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/vec_step
+    ;
diff --git a/test_conformance/vec_align/Makefile b/test_conformance/vec_align/Makefile
new file mode 100644
index 00000000..262e1386
--- /dev/null
+++ b/test_conformance/vec_align/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = 		main.c \
+		structs.c \
+		test_vec_align.c \
+		globals.c \
+		type_replacer.c \
+		../../test_common/harness/errorHelpers.c \
+		../../test_common/harness/threadTesting.c \
+		../../test_common/harness/testHarness.c \
+		../../test_common/harness/kernelHelpers.c \
+                ../../test_common/harness/mt19937.c \
+		../../test_common/harness/conversions.c 
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_vecalign
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/vec_align/defines.h b/test_conformance/vec_align/defines.h
new file mode 100644
index 00000000..69d9f5fb
--- /dev/null
+++ b/test_conformance/vec_align/defines.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+#define NUM_VECTOR_SIZES 6
+
+extern int g_arrVecSizes[NUM_VECTOR_SIZES];
+extern int g_arrVecSteps[NUM_VECTOR_SIZES];
+extern bool g_wimpyMode;
+
+extern const char * g_arrVecSizeNames[NUM_VECTOR_SIZES];
+extern size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES];
+
+// Define the buffer size that we want to block our test with
+#define BUFFER_SIZE (1024*1024)
+#define KPAGESIZE 4096
+
+extern ExplicitType types[];
+
+extern const char *g_arrTypeNames[];
+extern const size_t g_arrTypeSizes[];
diff --git a/test_conformance/vec_align/globals.c b/test_conformance/vec_align/globals.c
new file mode 100644
index 00000000..3deecadf
--- /dev/null
+++ b/test_conformance/vec_align/globals.c
@@ -0,0 +1,59 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "defines.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+int g_arrVecSizes[NUM_VECTOR_SIZES] = {1,2,3,4,8,16};
+int g_arrVecSteps[NUM_VECTOR_SIZES] = {1,2,4,4,8,16};
+const char * g_arrVecSizeNames[NUM_VECTOR_SIZES] = {"", "2","3","4","8","16"};
+size_t g_arrVecAlignMasks[NUM_VECTOR_SIZES] = {(size_t)0,
+                           (size_t)0x1, // 2
+                           (size_t)0x3, // 3
+                           (size_t)0x3, // 4
+                           (size_t)0x7, // 8
+                           (size_t)0xf // 16
+};
+
+bool g_wimpyMode = false;
+
+ExplicitType types[] = { kChar, kUChar,
+             kShort, kUShort,
+             kInt, kUInt,
+             kLong, kULong,
+             kFloat, kDouble,
+             kNumExplicitTypes };
+
+
+const char *g_arrTypeNames[] =
+    {
+    "char",  "uchar",
+    "short", "ushort",
+    "int",   "uint",
+    "long",  "ulong",
+    "float", "double"
+    };
+
+extern const size_t g_arrTypeSizes[] =
+    {
+    1, 1,
+    2, 2,
+    4, 4,
+    8, 8,
+    4, 8
+    };
+
diff --git a/test_conformance/vec_align/main.c b/test_conformance/vec_align/main.c
new file mode 100644
index 00000000..e206b4f8
--- /dev/null
+++ b/test_conformance/vec_align/main.c
@@ -0,0 +1,57 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+
+
+
+
+basefn    basefn_list[] = {
+    test_vec_align_array,
+    test_vec_align_struct,
+    test_vec_align_packed_struct,
+    test_vec_align_struct_arr,
+    test_vec_align_packed_struct_arr
+
+};
+
+const char    *basefn_names[] = {
+    "vec_align_array",
+    "vec_align_struct",
+    "vec_align_packed_struct",
+    "vec_align_struct_arr",
+    "vec_align_packed_struct_arr",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/vec_align/procs.h b/test_conformance/vec_align/procs.h
new file mode 100644
index 00000000..595938b3
--- /dev/null
+++ b/test_conformance/vec_align/procs.h
@@ -0,0 +1,39 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+// The number of errors to print out for each test in the shuffle tests
+#define MAX_ERRORS_TO_PRINT 1
+
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+
+int test_vec_align_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+int test_vec_align_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+
+int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/vec_align/structs.c b/test_conformance/vec_align/structs.c
new file mode 100644
index 00000000..2e15e36f
--- /dev/null
+++ b/test_conformance/vec_align/structs.c
@@ -0,0 +1,375 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "structs.h"
+
+
+#include "defines.h"
+
+#define DEBUG_MEM_ALLOC 0
+
+/** typedef struct _bufferStruct
+ {
+ void * m_pIn;
+ void * m_pOut;
+
+ cl_mem m_outBuffer;
+ cl_mem m_inBuffer;
+
+ size_t m_bufSize;
+ } bufferStruct;
+ */
+
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue)
+{
+    clState * pResult = (clState *)malloc(sizeof(clState));
+#if DEBUG_MEM_ALLOC
+    log_info("malloc clState * %x\n", pResult);
+#endif
+
+    pResult->m_device = device;
+    pResult->m_context = context;
+    pResult->m_queue = queue;
+
+    pResult->m_kernel = NULL; pResult->m_program = NULL;
+    return pResult;
+}
+
+clState * destroyClState(clState * pState)
+{
+    clStateDestroyProgramAndKernel(pState);
+#if DEBUG_MEM_ALLOC
+    log_info("delete (free) clState * %x\n", pState);
+#endif
+    free(pState);
+    return NULL;
+}
+
+
+int clStateMakeProgram(clState * pState, const char * prog,
+                       const char * kernelName)
+{
+    const char * srcArr[1] = {NULL};
+    srcArr[0] = prog;
+    int err = create_single_kernel_helper(pState->m_context,
+                                          &(pState->m_program),
+                                          &(pState->m_kernel),
+                                          1, srcArr, kernelName );
+#if DEBUG_MEM_ALLOC
+    log_info("create program and kernel\n");
+#endif
+    return err;
+}
+
+int runKernel(clState * pState, size_t numThreads) {
+    int err;
+    pState->m_numThreads = numThreads;
+    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel,
+                                 1, NULL, &(pState->m_numThreads),
+                                 NULL, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel returned %d (%x)\n",
+                  err, err);
+        return -1;
+    }
+    return 0;
+}
+
+
+void clStateDestroyProgramAndKernel(clState * pState)
+{
+#if DEBUG_MEM_ALLOC
+    log_info("destroy program and kernel\n");
+#endif
+    if(pState->m_kernel != NULL) {
+        clReleaseKernel( pState->m_kernel );
+        pState->m_kernel = NULL;
+    }
+    if(pState->m_program != NULL) {
+        clReleaseProgram( pState->m_program );
+        pState->m_program = NULL;
+    }
+}
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState) {
+    int error;
+    bufferStruct * pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
+#if DEBUG_MEM_ALLOC
+    log_info("malloc bufferStruct * %x\n", pResult);
+#endif
+
+    pResult->m_bufSizeIn = inSize;
+    pResult->m_bufSizeOut = outSize;
+
+    pResult->m_pIn = malloc(inSize);
+    pResult->m_pOut = malloc(outSize);
+#if DEBUG_MEM_ALLOC
+    log_info("malloc m_pIn %x\n", pResult->m_pIn);
+    log_info("malloc m_pOut %x\n", pResult->m_pOut);
+#endif
+
+    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
+                                         inSize, NULL, &error);
+    if( pResult->m_inBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+#if DEBUG_MEM_ALLOC
+    log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
+#endif
+
+    pResult->m_outBuffer = clCreateBuffer( pClState->m_context,
+                                          CL_MEM_WRITE_ONLY,
+                                          outSize,
+                                          NULL,
+                                          &error );
+    if( pResult->m_outBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+#if DEBUG_MEM_ALLOC
+    log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
+#endif
+
+    pResult->m_bufferUploaded = false;
+
+    return pResult;
+}
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState) {
+    if(destroyMe)
+    {
+        if(destroyMe->m_outBuffer != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
+#endif
+            clReleaseMemObject(destroyMe->m_outBuffer);
+            destroyMe->m_outBuffer = NULL;
+        }
+        if(destroyMe->m_inBuffer != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
+#endif
+            clReleaseMemObject(destroyMe->m_inBuffer);
+            destroyMe->m_inBuffer = NULL;
+        }
+        if(destroyMe->m_pIn != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
+#endif
+            free(destroyMe->m_pIn);
+            destroyMe->m_pIn = NULL;
+        }
+        if(destroyMe->m_pOut != NULL) {
+#if DEBUG_MEM_ALLOC
+            log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
+#endif
+            free(destroyMe->m_pOut);
+            destroyMe->m_pOut = NULL;
+        }
+#if DEBUG_MEM_ALLOC
+        log_info("delete (free) bufferStruct * %x\n", destroyMe);
+#endif
+        free((void *)destroyMe);
+        destroyMe = NULL;
+    }
+    return destroyMe;
+}
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+                  size_t typeSize,
+                  size_t countIn, size_t countOut )
+{
+    size_t i;
+
+    uint64_t start = 0;
+
+    switch(typeSize)
+    {
+        case 1: {
+            uint8_t* ub = (uint8_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                ub[i] = (uint8_t)start++;
+            }
+            break;
+        }
+        case 2: {
+            uint16_t* us = (uint16_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                us[i] = (uint16_t)start++;
+            }
+            break;
+        }
+        case 4: {
+            if (!g_wimpyMode) {
+                uint32_t* ui = (uint32_t *)(pBufferStruct->m_pIn);
+                for (i=0; i < countIn; ++i) {
+                    ui[i] = (uint32_t)start++;
+                }
+            }
+            else {
+                // The short test doesn't iterate over the entire 32 bit space so
+                // we alternate between positive and negative values
+                int32_t* ui = (int32_t *)(pBufferStruct->m_pIn);
+                int32_t sign = 1;
+                for (i=0; i < countIn; ++i, ++start) {
+                    ui[i] = (int32_t)start*sign;
+                    sign = sign * -1;
+                }
+            }
+            break;
+        }
+        case 8: {
+            // We don't iterate over the entire space of 64 bit so for the
+            // selects, we want to test positive and negative values
+            int64_t* ll = (int64_t *)(pBufferStruct->m_pIn);
+            int64_t sign = 1;
+            for (i=0; i < countIn; ++i, ++start) {
+                ll[i] = start*sign;
+                sign = sign * -1;
+            }
+            break;
+        }
+        default: {
+            log_error("invalid type size %x\n", (int)typeSize);
+        }
+    }
+    // pBufferStruct->m_bufSizeIn
+    // pBufferStruct->m_bufSizeOut
+}
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    if( !pBufferStruct->m_bufferUploaded )
+    {
+        err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
+                                   CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
+                                   pBufferStruct->m_pIn, 0, NULL, NULL);
+#if DEBUG_MEM_ALLOC
+        log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
+#endif
+        if(err != CL_SUCCESS)
+        {
+            log_error("clEnqueueWriteBuffer failed\n");
+            return -1;
+        }
+        pBufferStruct->m_bufferUploaded = true;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 0,
+                         sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
+                         &(pBufferStruct->m_inBuffer));
+#if DEBUG_MEM_ALLOC
+    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
+#endif
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, first arg (0)\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 1,
+                         sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
+                         &(pBufferStruct->m_outBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, second arg (1)\n");
+        return -1;
+    }
+
+#if DEBUG_MEM_ALLOC
+    // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
+#endif
+
+    return 0;
+}
+
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
+                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
+                              pBufferStruct->m_pOut, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+    return 0;
+}
+
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                     size_t minAlign)
+{
+    size_t i;
+    cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut);
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if((targetArr[i])%minAlign != (cl_uint)0)
+        {
+            vlog_error("Error %d (of %d).  Expected a multple of %x, got %x\n",
+                       i, pClState->m_numThreads,
+                       minAlign,
+                       targetArr[i]);
+            return -1;
+        }
+    }
+
+    /*    log_info("\n");
+     for(i = 0; i < 4; ++i) {
+     log_info("%lx, ", targetArr[i]);
+     }
+     log_info("\n");
+     fflush(stdout); */
+    return 0;
+}
+
+
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkPackedCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                           size_t totSize, size_t beforeSize)
+{
+    size_t i;
+    cl_uint * targetArr = (cl_uint *)(pBufferStruct->m_pOut);
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if((targetArr[i]-beforeSize)%totSize != (cl_uint)0)
+        {
+            vlog_error("Error %d (of %d).  Expected %d more than a multple of %d, got %d \n",
+                       i, pClState->m_numThreads, beforeSize,
+                       totSize,
+                       targetArr[i]);
+            return -1;
+        }
+    }
+
+    /*    log_info("\n");
+     for(i = 0; i < 4; ++i) {
+     log_info("%lx, ", targetArr[i]);
+     }
+     log_info("\n");
+     fflush(stdout); */
+    return 0;
+}
diff --git a/test_conformance/vec_align/structs.h b/test_conformance/vec_align/structs.h
new file mode 100644
index 00000000..09c6d367
--- /dev/null
+++ b/test_conformance/vec_align/structs.h
@@ -0,0 +1,73 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+typedef struct _clState
+{
+    cl_device_id m_device;
+    cl_context m_context;
+    cl_command_queue m_queue;
+
+    cl_program m_program;
+    cl_kernel m_kernel;
+    size_t m_numThreads;
+} clState;
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue);
+clState * destroyClState(clState * pState);
+
+int clStateMakeProgram(clState * pState, const char * prog,
+               const char * kernelName);
+void clStateDestroyProgramAndKernel(clState * pState);
+
+int runKernel(clState * pState, size_t numThreads);
+
+typedef struct _bufferStruct
+{
+    void * m_pIn;
+    void * m_pOut;
+
+    cl_mem m_outBuffer;
+    cl_mem m_inBuffer;
+
+    size_t m_bufSizeIn, m_bufSizeOut;
+
+    int       m_bufferUploaded;
+} bufferStruct;
+
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState);
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState);
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState);
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState);
+
+// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
+// and g_arrVecSizes
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+             size_t minAlign);
+
+int checkPackedCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+               size_t totSize, size_t beforeSize);
diff --git a/test_conformance/vec_align/testBase.h b/test_conformance/vec_align/testBase.h
new file mode 100644
index 00000000..00dcc07b
--- /dev/null
+++ b/test_conformance/vec_align/testBase.h
@@ -0,0 +1,28 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
diff --git a/test_conformance/vec_align/test_vec_align.c b/test_conformance/vec_align/test_vec_align.c
new file mode 100644
index 00000000..775dc3ed
--- /dev/null
+++ b/test_conformance/vec_align/test_vec_align.c
@@ -0,0 +1,545 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+#include "structs.h"
+
+#include "defines.h"
+
+#include "type_replacer.h"
+
+
+size_t get_align(size_t vecSize)
+{
+    if(vecSize == 3)
+    {
+        return 4;
+    }
+    return vecSize;
+}
+
+/* // Lots of conditionals means this is not gonna be an optimal min on intel. */
+/* // That's okay, make sure we only call a few times per test, not for every */
+/* // element */
+/* size_t min_of_nonzero(size_t a, size_t b) */
+/* { */
+/*     if(a != 0 && (a<=b || b==0)) */
+/*     { */
+/*     return a; */
+/*     } */
+/*     if(b != 0 && (b<a || a==0)) */
+/*     { */
+/*     return b; */
+/*     } */
+/*     return 0; */
+/* } */
+
+
+/* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize, */
+/*                 size_t postSize, size_t typeMultiplePostSize, */
+/*                 ExplicitType kType, size_t vecSize) */
+/* { */
+/*     size_t pre_min = min_of_nonzero(preSize,  */
+/*                     typeMultiplePreSize* */
+/*                     get_explicit_type_size(kType)); */
+/*     size_t post_min = min_of_nonzero(postSize,  */
+/*                     typeMultiplePostSize* */
+/*                     get_explicit_type_size(kType)); */
+/*     size_t struct_min = min_of_nonzero(pre_min, post_min); */
+/*     size_t result =  min_of_nonzero(struct_min, get_align(vecSize) */
+/*                     *get_explicit_type_size(kType)); */
+/*     return result; */
+
+/* } */
+
+
+
+int test_vec_internal(cl_device_id deviceID, cl_context context,
+                      cl_command_queue queue, const char * pattern,
+                      const char * testName, size_t bufSize,
+                      size_t preSize, size_t typeMultiplePreSize,
+                      size_t postSize, size_t typeMultiplePostSize)
+{
+    int err;
+    int typeIdx, vecSizeIdx;
+
+    char tmpBuffer[2048];
+    char srcBuffer[2048];
+
+    size_t preSizeBytes, postSizeBytes, typeSize, totSize;
+
+    clState * pClState = newClState(deviceID, context, queue);
+    bufferStruct * pBuffers =
+    newBufferStruct(bufSize, bufSize*sizeof(cl_uint)/sizeof(cl_char), pClState);
+
+    if(pBuffers == NULL) {
+        destroyClState(pClState);
+        vlog_error("%s : Could not create buffer\n", testName);
+        return -1;
+    }
+
+    for(typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
+    {
+
+        // Skip doubles if it is not supported otherwise enable pragma
+        if (types[typeIdx] == kDouble) {
+            if (!is_extension_available(deviceID, "cl_khr_fp64")) {
+                continue;
+            } else {
+                doReplace(tmpBuffer, 2048, pattern,
+                          ".PRAGMA.",  "#pragma OPENCL EXTENSION cl_khr_fp64: ",
+                          ".STATE.", "enable");
+            }
+        } else {
+            if (types[typeIdx] == kLong || types[typeIdx] == kULong) {
+                if (gIsEmbedded)
+                    continue;
+            }
+
+            doReplace(tmpBuffer, 2048, pattern,
+                      ".PRAGMA.",  " ",
+                      ".STATE.", " ");
+        }
+
+        typeSize = get_explicit_type_size(types[typeIdx]);
+        preSizeBytes = preSize + typeSize*typeMultiplePreSize;
+        postSizeBytes = postSize + typeSize*typeMultiplePostSize;
+
+
+
+        for(vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)  {
+
+            totSize = preSizeBytes + postSizeBytes +
+            typeSize*get_align(g_arrVecSizes[vecSizeIdx]);
+
+            doReplace(srcBuffer, 2048, tmpBuffer,
+                      ".TYPE.",  g_arrTypeNames[typeIdx],
+                      ".NUM.", g_arrVecSizeNames[vecSizeIdx]);
+
+            if(srcBuffer[0] == '\0') {
+                vlog_error("%s: failed to fill source buf for type %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
+            // fflush(stdout);
+
+            err = clStateMakeProgram(pClState, srcBuffer, testName );
+            if (err) {
+                vlog_error("%s: Error compiling \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = pushArgs(pBuffers, pClState);
+            if(err != 0) {
+                vlog_error("%s: failed to push args %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // log_info("About to Run kernel\n"); fflush(stdout);
+            // now we run the kernel
+            err = runKernel(pClState,
+                            bufSize/(g_arrVecSizes[vecSizeIdx]* g_arrTypeSizes[typeIdx]));
+            if(err != 0) {
+                vlog_error("%s: runKernel fail (%ld threads) %s%s\n",
+                           testName, pClState->m_numThreads,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // log_info("About to retrieve results\n"); fflush(stdout);
+            err = retrieveResults(pBuffers, pClState);
+            if(err != 0) {
+                vlog_error("%s: failed to retrieve results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+
+
+            if(preSizeBytes+postSizeBytes == 0)
+            {
+                // log_info("About to Check Correctness\n"); fflush(stdout);
+                err = checkCorrectness(pBuffers, pClState,
+                                       get_align(g_arrVecSizes[vecSizeIdx])*
+                                       typeSize);
+            }
+            else
+            {
+                // we're checking for an aligned struct
+                err = checkPackedCorrectness(pBuffers, pClState, totSize,
+                                             preSizeBytes);
+            }
+
+            if(err != 0) {
+                vlog_error("%s: incorrect results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                vlog_error("%s: Source was \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            clStateDestroyProgramAndKernel(pClState);
+
+        }
+    }
+
+    destroyBufferStruct(pBuffers, pClState);
+
+    destroyClState(pClState);
+
+
+    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
+    return 0; // -1; // fails on account of not being written.
+}
+
+
+
+const char * patterns[] = {
+    ".PRAGMA..STATE.\n"
+    "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct myUnpackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    .SRC_SCOPE. testStruct test;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    .SRC_SCOPE. testStruct test;\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. uchar *)&test);\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct myStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
+    "}\n",
+    ".PRAGMA..STATE.\n"
+    "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
+    ".PRE."
+    "    .TYPE..NUM. vec;\n"
+    ".POST."
+    "} testStruct;\n"
+    "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE.  testStruct *source, .DST_SCOPE. uint *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - (.SRC_SCOPE. uchar *)&(source[0]));\n"
+    "}\n",
+    // __attribute__ ((packed))
+};
+
+
+
+const char * pre_substitution_arr[] = {
+    "",
+    "char c;\n",
+    "short3 s;",
+    ".TYPE.3 tPre;\n",
+    ".TYPE. arrPre[5];\n",
+    ".TYPE. arrPre[12];\n",
+    NULL
+};
+
+
+// alignments of everything in pre_substitution_arr as raw alignments
+// 0 if such a thing is meaningless
+size_t pre_align_arr[] = {
+    0,
+    sizeof(cl_char),
+    4*sizeof(cl_short),
+    0, // taken care of in type_multiple_pre_align_arr
+    0,
+    0
+};
+
+// alignments of everything in pre_substitution_arr as multiples of
+// sizeof(.TYPE.)
+// 0 if such a thing is meaningless
+size_t type_multiple_pre_align_arr[] = {
+    0,
+    0,
+    0,
+    4,
+    5,
+    12
+};
+
+const char * post_substitution_arr[] = {
+    "",
+    "char cPost;\n",
+    ".TYPE. arrPost[3];\n",
+    ".TYPE. arrPost[5];\n",
+    ".TYPE.3 arrPost;\n",
+    ".TYPE. arrPost[12];\n",
+    NULL
+};
+
+
+// alignments of everything in post_substitution_arr as raw alignments
+// 0 if such a thing is meaningless
+size_t post_align_arr[] = {
+    0,
+    sizeof(cl_char),
+    0, // taken care of in type_multiple_post_align_arr
+    0,
+    0,
+    0
+};
+
+// alignments of everything in post_substitution_arr as multiples of
+// sizeof(.TYPE.)
+// 0 if such a thing is meaningless
+size_t type_multiple_post_align_arr[] = {
+    0,
+    0,
+    3,
+    5,
+    4,
+    12
+};
+
+// there hsould be a packed version of this?
+int test_vec_align_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp[2048];
+    int result;
+
+    log_info("Testing global\n");
+    doReplace(tmp, (size_t)2048, patterns[0],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+    result = test_vec_internal(deviceID, context, queue, tmp,
+                               "test_vec_align_array",
+                               BUFFER_SIZE, 0, 0, 0, 0);
+    return result;
+}
+
+
+int test_vec_align_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+    log_info("testing __private\n");
+    doReplace(tmp2, (size_t)2048, patterns[1],
+              ".SRC_SCOPE.",  "__private",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct",
+                                       512, 0, 0, 0, 0);
+            if (result != 0) {
+                return result;
+            }
+        }
+    }
+
+    log_info("testing __local\n");
+    doReplace(tmp2, (size_t)2048, patterns[1],
+              ".SRC_SCOPE.",  "__local",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct",
+                                       512, 0, 0, 0, 0);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+
+    log_info("Testing __private\n");
+    doReplace(tmp2, (size_t)2048, patterns[2],
+              ".SRC_SCOPE.",  "__private",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct",
+                                       512, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+
+    log_info("testing __local\n");
+    doReplace(tmp2, (size_t)2048, patterns[2],
+              ".SRC_SCOPE.",  "__local",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct",
+                                       512, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if (result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+
+    log_info("testing __global\n");
+    doReplace(tmp2, (size_t)2048, patterns[3],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_struct_arr",
+                                       BUFFER_SIZE, 0, 0, 0, 0);
+            if(result != 0) {
+                return result;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    char tmp1[2048], tmp2[2048];
+    int result = 0;
+    int preIdx, postIdx;
+
+
+    log_info("Testing __global\n");
+    doReplace(tmp2, (size_t)2048, patterns[4],
+              ".SRC_SCOPE.",  "__global",
+              ".DST_SCOPE.", "__global"); //
+
+    for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
+        for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
+            doReplace(tmp1, (size_t)2048, tmp2,
+                      ".PRE.",  pre_substitution_arr[preIdx],
+                      ".POST.",  post_substitution_arr[postIdx]);
+
+            result = test_vec_internal(deviceID, context, queue, tmp1,
+                                       "test_vec_align_packed_struct_arr",
+                                       BUFFER_SIZE, pre_align_arr[preIdx],
+                                       type_multiple_pre_align_arr[preIdx],
+                                       post_align_arr[postIdx],
+                                       type_multiple_post_align_arr[postIdx]);
+            if(result != 0)
+                return result;
+        }
+    }
+    return 0;
+}
+
diff --git a/test_conformance/vec_align/type_replacer.c b/test_conformance/vec_align/type_replacer.c
new file mode 100644
index 00000000..74967b2c
--- /dev/null
+++ b/test_conformance/vec_align/type_replacer.c
@@ -0,0 +1,115 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <string.h>
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif // !_MSC_VER
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr1;
+    const char * ptr2;
+    size_t nJump;
+    size_t len1, len2;
+    size_t lenReplace1, lenReplace2;
+    len1 = strlen(stringToReplace1);
+    len2 = strlen(stringToReplace2);
+    lenReplace1 = strlen(replaceWith1);
+    lenReplace2 = strlen(replaceWith2);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr1 = strstr(sourcePtr, stringToReplace1);
+        ptr2 = strstr(sourcePtr, stringToReplace2);
+        if(ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
+        {
+            nJump = ptr1-sourcePtr;
+            if(((uintptr_t)ptr1-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len1;
+            strcpy(destPtr, replaceWith1);
+            destPtr += lenReplace1;
+        }
+        else if(ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
+        {
+            nJump = ptr2-sourcePtr;
+            if(nJump > destLength-copyCount) { return -2; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len2;
+            strcpy(destPtr, replaceWith2);
+            destPtr += lenReplace2;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr;
+    size_t nJump;
+    size_t len;
+    size_t lenReplace;
+    len = strlen(stringToReplace);
+    lenReplace = strlen(replaceWith);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr = strstr(sourcePtr, stringToReplace);
+        if(ptr != NULL)
+        {
+            nJump = ptr-sourcePtr;
+            if(((uintptr_t)ptr-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len;
+            strcpy(destPtr, replaceWith);
+            destPtr += lenReplace;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}
diff --git a/test_conformance/vec_align/type_replacer.h b/test_conformance/vec_align/type_replacer.h
new file mode 100644
index 00000000..f50b08d7
--- /dev/null
+++ b/test_conformance/vec_align/type_replacer.h
@@ -0,0 +1,23 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdlib.h>
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2);
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith);
diff --git a/test_conformance/vec_step/CMakeLists.txt b/test_conformance/vec_step/CMakeLists.txt
new file mode 100644
index 00000000..847dfd78
--- /dev/null
+++ b/test_conformance/vec_step/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(MODULE_NAME VECSTEP)
+
+set(${MODULE_NAME}_SOURCES
+    globals.c
+        test_step.c
+        main.c
+        structs.c
+        type_replacer.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/parseParameters.cpp
+)
+
+set_source_files_properties(
+        COMPILE_FLAGS -msse2)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/vec_step/Jamfile b/test_conformance/vec_step/Jamfile
new file mode 100644
index 00000000..cc92cb44
--- /dev/null
+++ b/test_conformance/vec_step/Jamfile
@@ -0,0 +1,19 @@
+project
+    : requirements
+      -<library>/harness//harness <use>/harness//harness
+#      <toolset>gcc:<cflags>-xc
+      <toolset>msvc:<cflags>"/TP" 
+    ;
+
+exe test_step
+    : test_step.c
+      globals.c
+      main.c
+      /harness//mt19937.c
+    ;
+
+install dist
+    : test_step
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/vec_step
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/vec_step
+    ;
diff --git a/test_conformance/vec_step/Makefile b/test_conformance/vec_step/Makefile
new file mode 100644
index 00000000..654dfec7
--- /dev/null
+++ b/test_conformance/vec_step/Makefile
@@ -0,0 +1,46 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = 		main.c \
+		structs.c \
+		test_step.c \
+		globals.c \
+		type_replacer.c \
+		../../test_common/harness/errorHelpers.c \
+		../../test_common/harness/threadTesting.c \
+		../../test_common/harness/testHarness.c \
+		../../test_common/harness/kernelHelpers.c \
+                ../../test_common/harness/mt19937.c \
+		../../test_common/harness/conversions.c 
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_vecstep
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/vec_step/defines.h b/test_conformance/vec_step/defines.h
new file mode 100644
index 00000000..47af57ed
--- /dev/null
+++ b/test_conformance/vec_step/defines.h
@@ -0,0 +1,41 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+#define NUM_VECTOR_SIZES 6
+
+extern int g_arrVecSizes[NUM_VECTOR_SIZES];
+extern int g_arrVecSteps[NUM_VECTOR_SIZES];
+extern bool g_wimpyMode;
+
+extern const char * g_arrVecSizeNames[NUM_VECTOR_SIZES];
+
+// Define the buffer size that we want to block our test with
+#define BUFFER_SIZE (1024*1024)
+#define KPAGESIZE 4096
+
+extern ExplicitType types[];
+
+extern const char *g_arrTypeNames[];
+extern const size_t g_arrTypeSizes[];
diff --git a/test_conformance/vec_step/globals.c b/test_conformance/vec_step/globals.c
new file mode 100644
index 00000000..029ae1c6
--- /dev/null
+++ b/test_conformance/vec_step/globals.c
@@ -0,0 +1,52 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "defines.h"
+
+
+// 1,2,3,4,8,16 or
+// 1,2,4,8,16,3
+int g_arrVecSizes[NUM_VECTOR_SIZES] = {1,2,3,4,8,16};
+int g_arrVecSteps[NUM_VECTOR_SIZES] = {1,2,4,4,8,16};
+const char * g_arrVecSizeNames[NUM_VECTOR_SIZES] = {"", "2","3","4","8","16"};
+
+bool g_wimpyMode = false;
+
+ExplicitType types[] = { kChar, kUChar,
+             kShort, kUShort,
+             kInt, kUInt,
+             kLong, kULong,
+             kFloat, kDouble,
+             kNumExplicitTypes };
+
+
+const char *g_arrTypeNames[] =
+    {
+    "char",  "uchar",
+    "short", "ushort",
+    "int",   "uint",
+    "long",  "ulong",
+    "float", "double"
+    };
+
+extern const size_t g_arrTypeSizes[] =
+    {
+    1, 1,
+    2, 2,
+    4, 4,
+    8, 8,
+    4, 8
+    };
+
diff --git a/test_conformance/vec_step/main.c b/test_conformance/vec_step/main.c
new file mode 100644
index 00000000..2adfd5dc
--- /dev/null
+++ b/test_conformance/vec_step/main.c
@@ -0,0 +1,54 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+
+
+
+
+basefn    basefn_list[] = {
+    test_step_type,
+    test_step_var,
+    test_step_typedef_type,
+    test_step_typedef_var,
+};
+
+const char    *basefn_names[] = {
+    "step_type",
+    "step_var",
+    "step_typedef_type",
+    "step_typedef_var",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/vec_step/procs.h b/test_conformance/vec_step/procs.h
new file mode 100644
index 00000000..c7461e17
--- /dev/null
+++ b/test_conformance/vec_step/procs.h
@@ -0,0 +1,43 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/threadTesting.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+// The number of errors to print out for each test in the shuffle tests
+#define MAX_ERRORS_TO_PRINT 1
+
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+
+/*
+    test_step_type,
+    test_step_var,
+    test_step_typedef_type,
+    test_step_typedef_var,
+*/
+
+extern int test_step_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_step_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_step_typedef_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_step_typedef_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/vec_step/structs.c b/test_conformance/vec_step/structs.c
new file mode 100644
index 00000000..b36e892f
--- /dev/null
+++ b/test_conformance/vec_step/structs.c
@@ -0,0 +1,285 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "structs.h"
+
+
+#include "defines.h"
+
+/** typedef struct _bufferStruct
+ {
+ void * m_pIn;
+ void * m_pOut;
+
+ cl_mem m_outBuffer;
+ cl_mem m_inBuffer;
+
+ size_t m_bufSize;
+ } bufferStruct;
+ */
+
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue)
+{
+    clState * pResult = (clState *)malloc(sizeof(clState));
+
+    pResult->m_device = device;
+    pResult->m_context = context;
+    pResult->m_queue = queue;
+
+    pResult->m_kernel = NULL; pResult->m_program = NULL;
+    return pResult;
+}
+
+clState * destroyClState(clState * pState)
+{
+    clStateDestroyProgramAndKernel(pState);
+    free(pState);
+    return NULL;
+}
+
+
+int clStateMakeProgram(clState * pState, const char * prog,
+                       const char * kernelName)
+{
+    const char * srcArr[1] = {NULL};
+    srcArr[0] = prog;
+    int err = create_single_kernel_helper(pState->m_context,
+                                          &(pState->m_program),
+                                          &(pState->m_kernel),
+                                          1, srcArr, kernelName );
+    return err;
+}
+
+int runKernel(clState * pState, size_t numThreads) {
+    int err;
+    pState->m_numThreads = numThreads;
+    err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel,
+                                 1, NULL, &(pState->m_numThreads),
+                                 NULL, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel returned %d (%x)\n",
+                  err, err);
+        return -1;
+    }
+    return 0;
+}
+
+
+void clStateDestroyProgramAndKernel(clState * pState)
+{
+    if(pState->m_kernel != NULL) {
+        clReleaseKernel( pState->m_kernel );
+        pState->m_kernel = NULL;
+    }
+    if(pState->m_program != NULL) {
+        clReleaseProgram( pState->m_program );
+        pState->m_program = NULL;
+    }
+}
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState) {
+    int error;
+    bufferStruct * pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
+
+    pResult->m_bufSizeIn = inSize;
+    pResult->m_bufSizeOut = outSize;
+
+    pResult->m_pIn = malloc(inSize);
+    pResult->m_pOut = malloc(outSize);
+
+    pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
+                                         inSize, NULL, &error);
+    if( pResult->m_inBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for input (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+
+    pResult->m_outBuffer = clCreateBuffer( pClState->m_context,
+                                          CL_MEM_WRITE_ONLY,
+                                          outSize,
+                                          NULL,
+                                          &error );
+    if( pResult->m_outBuffer == NULL )
+    {
+        vlog_error( "clCreateArray failed for output (%d)\n", error );
+        return destroyBufferStruct(pResult, pClState);
+    }
+
+    return pResult;
+}
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState) {
+    if(destroyMe)
+    {
+        if(destroyMe->m_outBuffer != NULL) {
+            clReleaseMemObject(destroyMe->m_outBuffer);
+            destroyMe->m_outBuffer = NULL;
+        }
+        if(destroyMe->m_inBuffer != NULL) {
+            clReleaseMemObject(destroyMe->m_inBuffer);
+            destroyMe->m_inBuffer = NULL;
+        }
+        if(destroyMe->m_pIn != NULL) {
+            free(destroyMe->m_pIn);
+            destroyMe->m_pIn = NULL;
+        }
+        if(destroyMe->m_pOut != NULL) {
+            free(destroyMe->m_pOut);
+            destroyMe->m_pOut = NULL;
+        }
+
+        free((void *)destroyMe);
+        destroyMe = NULL;
+    }
+    return destroyMe;
+}
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+                  size_t typeSize,
+                  size_t countIn, size_t countOut )
+{
+    size_t i;
+
+    uint64_t start = 0;
+
+    switch(typeSize)
+    {
+        case 1: {
+            uint8_t* ub = (uint8_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                ub[i] = (uint8_t)start++;
+            }
+            break;
+        }
+        case 2: {
+            uint16_t* us = (uint16_t *)(pBufferStruct->m_pIn);
+            for (i=0; i < countIn; ++i)
+            {
+                us[i] = (uint16_t)start++;
+            }
+            break;
+        }
+        case 4: {
+            if (!g_wimpyMode) {
+                uint32_t* ui = (uint32_t *)(pBufferStruct->m_pIn);
+                for (i=0; i < countIn; ++i) {
+                    ui[i] = (uint32_t)start++;
+                }
+            }
+            else {
+                // The short test doesn't iterate over the entire 32 bit space so
+                // we alternate between positive and negative values
+                int32_t* ui = (int32_t *)(pBufferStruct->m_pIn);
+                int32_t sign = 1;
+                for (i=0; i < countIn; ++i, ++start) {
+                    ui[i] = (int32_t)start*sign;
+                    sign = sign * -1;
+                }
+            }
+            break;
+        }
+        case 8: {
+            // We don't iterate over the entire space of 64 bit so for the
+            // selects, we want to test positive and negative values
+            int64_t* ll = (int64_t *)(pBufferStruct->m_pIn);
+            int64_t sign = 1;
+            for (i=0; i < countIn; ++i, ++start) {
+                ll[i] = start*sign;
+                sign = sign * -1;
+            }
+            break;
+        }
+        default: {
+            log_error("invalid type size %x\n", (int)typeSize);
+        }
+    }
+    // pBufferStruct->m_bufSizeIn
+    // pBufferStruct->m_bufSizeOut
+}
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
+                               CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
+                               pBufferStruct->m_pIn, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 0,
+                         sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
+                         &(pBufferStruct->m_inBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, first arg (0)\n");
+        return -1;
+    }
+
+    err = clSetKernelArg(pClState->m_kernel, 1,
+                         sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
+                         &(pBufferStruct->m_outBuffer));
+    if(err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed, second arg (1)\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState)
+{
+    int err;
+    err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
+                              CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
+                              pBufferStruct->m_pOut, 0, NULL, NULL);
+    if(err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+    return 0;
+}
+
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+                     size_t typeSize,
+                     size_t vecWidth)
+{
+    size_t i;
+    cl_int targetSize = (cl_int) vecWidth;
+    cl_int * targetArr = (cl_int *)(pBufferStruct->m_pOut);
+    if(targetSize == 3)
+    {
+        targetSize = 4; // hack for 4-aligned vec3 types
+    }
+    for(i = 0; i < pClState->m_numThreads; ++i)
+    {
+        if(targetArr[i] != targetSize)
+        {
+            vlog_error("Error %ld (of %ld).  Expected %d, got %d\n",
+                       i, pClState->m_numThreads,
+                       targetSize, targetArr[i]);
+            return -1;
+        }
+    }
+    return 0;
+}
diff --git a/test_conformance/vec_step/structs.h b/test_conformance/vec_step/structs.h
new file mode 100644
index 00000000..e9a4cdc3
--- /dev/null
+++ b/test_conformance/vec_step/structs.h
@@ -0,0 +1,67 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+
+typedef struct _clState
+{
+    cl_device_id m_device;
+    cl_context m_context;
+    cl_command_queue m_queue;
+
+    cl_program m_program;
+    cl_kernel m_kernel;
+    size_t m_numThreads;
+} clState;
+
+clState * newClState(cl_device_id device, cl_context context, cl_command_queue queue);
+clState * destroyClState(clState * pState);
+
+int clStateMakeProgram(clState * pState, const char * prog,
+               const char * kernelName);
+void clStateDestroyProgramAndKernel(clState * pState);
+
+int runKernel(clState * pState, size_t numThreads);
+
+typedef struct _bufferStruct
+{
+    void * m_pIn;
+    void * m_pOut;
+
+    cl_mem m_outBuffer;
+    cl_mem m_inBuffer;
+
+    size_t m_bufSizeIn, m_bufSizeOut;
+} bufferStruct;
+
+
+bufferStruct * newBufferStruct(size_t inSize, size_t outSize, clState * pClState);
+
+bufferStruct * destroyBufferStruct(bufferStruct * destroyMe, clState * pClState);
+
+void initContents(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
+
+int pushArgs(bufferStruct * pBufferStruct, clState * pClState);
+int retrieveResults(bufferStruct * pBufferStruct, clState * pClState);
+
+int checkCorrectness(bufferStruct * pBufferStruct, clState * pClState,
+             size_t typeSize,
+             size_t vecWidth);
diff --git a/test_conformance/vec_step/testBase.h b/test_conformance/vec_step/testBase.h
new file mode 100644
index 00000000..00dcc07b
--- /dev/null
+++ b/test_conformance/vec_step/testBase.h
@@ -0,0 +1,28 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
diff --git a/test_conformance/vec_step/test_step.c b/test_conformance/vec_step/test_step.c
new file mode 100644
index 00000000..89291abe
--- /dev/null
+++ b/test_conformance/vec_step/test_step.c
@@ -0,0 +1,253 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "testBase.h"
+
+
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/typeWrappers.h"
+#include "../../test_common/harness/testHarness.h"
+
+#include "structs.h"
+
+#include "defines.h"
+
+#include "type_replacer.h"
+
+
+/*
+ test_step_type,
+ test_step_var,
+ test_step_typedef_type,
+ test_step_typedef_var,
+ */
+
+
+
+int test_step_internal(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char * pattern, const char * testName)
+{
+    int err;
+    int typeIdx, vecSizeIdx;
+
+    char tempBuffer[2048];
+
+    clState * pClState = newClState(deviceID, context, queue);
+    bufferStruct * pBuffers =
+    newBufferStruct(BUFFER_SIZE, BUFFER_SIZE, pClState);
+
+    if(pBuffers == NULL) {
+        destroyClState(pClState);
+        vlog_error("%s : Could not create buffer\n", testName);
+        return -1;
+    }
+
+    //detect whether profile of the device is embedded
+    char profile[1024] = "";
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
+    if (err)
+    {
+        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
+        test_finish();
+        return -1;
+    }
+    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
+
+    for(typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
+    {
+        if( types[ typeIdx ] == kDouble )
+        {
+            // If we're testing doubles, we need to check for support first
+            if( !is_extension_available( deviceID, "cl_khr_fp64" ) )
+            {
+                log_info( "Not testing doubles (unsupported on this device)\n" );
+                continue;
+            }
+        }
+
+    if( types[ typeIdx ] == kLong || types[ typeIdx ] == kULong )
+    {
+        // If we're testing long/ulong, we need to check for embedded support
+        if( gIsEmbedded && !is_extension_available( deviceID, "cles_khr_int64") )
+        {
+        log_info( "Not testing longs (unsupported on this embedded device)\n" );
+        continue;
+        }
+    }
+
+        char srcBuffer[2048];
+
+        doSingleReplace(tempBuffer, 2048, pattern,
+                        ".EXTENSIONS.", types[typeIdx] == kDouble
+                            ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                            : "");
+
+        for(vecSizeIdx = 0; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
+        {
+            doReplace(srcBuffer, 2048, tempBuffer,
+                      ".TYPE.",  g_arrTypeNames[typeIdx],
+                      ".NUM.", g_arrVecSizeNames[vecSizeIdx]);
+
+            if(srcBuffer[0] == '\0') {
+                vlog_error("%s: failed to fill source buf for type %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = clStateMakeProgram(pClState, srcBuffer, testName );
+            if (err)
+            {
+                vlog_error("%s: Error compiling \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = pushArgs(pBuffers, pClState);
+            if(err != 0)
+            {
+                vlog_error("%s: failed to push args %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            // now we run the kernel
+            err = runKernel(pClState, 1024);
+            if(err != 0)
+            {
+                vlog_error("%s: runKernel fail (%ld threads) %s%s\n",
+                           testName, pClState->m_numThreads,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = retrieveResults(pBuffers, pClState);
+            if(err != 0)
+            {
+                vlog_error("%s: failed to retrieve results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+
+            err = checkCorrectness(pBuffers, pClState,
+                                   g_arrTypeSizes[typeIdx],
+                                   g_arrVecSizes[vecSizeIdx]);
+
+            if(err != 0)
+            {
+                vlog_error("%s: incorrect results %s%s\n",
+                           testName,
+                           g_arrTypeNames[typeIdx],
+                           g_arrVecSizeNames[vecSizeIdx]);
+                vlog_error("%s: Source was \"\n%s\n\"",
+                           testName, srcBuffer);
+                destroyBufferStruct(pBuffers, pClState);
+                destroyClState(pClState);
+                return -1;
+            }
+        }
+
+    }
+
+    destroyBufferStruct(pBuffers, pClState);
+
+    destroyClState(pClState);
+
+
+    // vlog_error("%s : implementation incomplete : FAIL\n", testName);
+    return 0; // -1; // fails on account of not being written.
+}
+
+const char * patterns[] = {
+    ".EXTENSIONS.\n"
+    "__kernel void test_step_type(__global .TYPE..NUM. *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(.TYPE..NUM.);\n"
+    "\n"
+    "}\n",
+
+    ".EXTENSIONS.\n"
+    "__kernel void test_step_var(__global .TYPE..NUM. *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(source[tid]);\n"
+    "\n"
+    "}\n",
+
+    ".EXTENSIONS.\n"
+    " typedef .TYPE..NUM. TypeToTest;\n"
+    "__kernel void test_step_typedef_type(__global TypeToTest *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(TypeToTest);\n"
+    "\n"
+    "}\n",
+
+    ".EXTENSIONS.\n"
+    " typedef .TYPE..NUM. TypeToTest;\n"
+    "__kernel void test_step_typedef_var(__global TypeToTest *source, __global int *dest)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dest[tid] = vec_step(source[tid]);\n"
+    "\n"
+    "}\n",
+};
+
+/*
+ test_step_type,
+ test_step_var,
+ test_step_typedef_type,
+ test_step_typedef_var,
+ */
+
+int test_step_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[0],
+                              "test_step_type");
+}
+
+int test_step_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[1],
+                              "test_step_var");
+}
+
+int test_step_typedef_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[2],
+                              "test_step_typedef_type");
+}
+
+int test_step_typedef_var(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+    return test_step_internal(deviceID, context, queue, patterns[3],
+                              "test_step_typedef_var");
+}
diff --git a/test_conformance/vec_step/type_replacer.c b/test_conformance/vec_step/type_replacer.c
new file mode 100644
index 00000000..74967b2c
--- /dev/null
+++ b/test_conformance/vec_step/type_replacer.c
@@ -0,0 +1,115 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <string.h>
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif // !_MSC_VER
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr1;
+    const char * ptr2;
+    size_t nJump;
+    size_t len1, len2;
+    size_t lenReplace1, lenReplace2;
+    len1 = strlen(stringToReplace1);
+    len2 = strlen(stringToReplace2);
+    lenReplace1 = strlen(replaceWith1);
+    lenReplace2 = strlen(replaceWith2);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr1 = strstr(sourcePtr, stringToReplace1);
+        ptr2 = strstr(sourcePtr, stringToReplace2);
+        if(ptr1 != NULL && (ptr2 == NULL || ptr2 > ptr1))
+        {
+            nJump = ptr1-sourcePtr;
+            if(((uintptr_t)ptr1-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len1;
+            strcpy(destPtr, replaceWith1);
+            destPtr += lenReplace1;
+        }
+        else if(ptr2 != NULL && (ptr1 == NULL || ptr1 >= ptr2))
+        {
+            nJump = ptr2-sourcePtr;
+            if(nJump > destLength-copyCount) { return -2; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len2;
+            strcpy(destPtr, replaceWith2);
+            destPtr += lenReplace2;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith)
+{
+    size_t copyCount = 0;
+    const char * sourcePtr = source;
+    char * destPtr = dest;
+    const char * ptr;
+    size_t nJump;
+    size_t len;
+    size_t lenReplace;
+    len = strlen(stringToReplace);
+    lenReplace = strlen(replaceWith);
+    for(;copyCount < destLength && *sourcePtr; )
+    {
+        ptr = strstr(sourcePtr, stringToReplace);
+        if(ptr != NULL)
+        {
+            nJump = ptr-sourcePtr;
+            if(((uintptr_t)ptr-(uintptr_t)sourcePtr) > destLength-copyCount) { return -1; }
+            copyCount += nJump;
+            strncpy(destPtr, sourcePtr, nJump);
+            destPtr += nJump;
+            sourcePtr += nJump + len;
+            strcpy(destPtr, replaceWith);
+            destPtr += lenReplace;
+        }
+        else
+        {
+            nJump = strlen(sourcePtr);
+            if(nJump > destLength-copyCount) { return -3; }
+            copyCount += nJump;
+            strcpy(destPtr, sourcePtr);
+            destPtr += nJump;
+            sourcePtr += nJump;
+        }
+    }
+    *destPtr = '\0';
+    return copyCount;
+}
diff --git a/test_conformance/vec_step/type_replacer.h b/test_conformance/vec_step/type_replacer.h
new file mode 100644
index 00000000..f50b08d7
--- /dev/null
+++ b/test_conformance/vec_step/type_replacer.h
@@ -0,0 +1,23 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdlib.h>
+
+size_t doReplace(char * dest, size_t destLength, const char * source,
+          const char * stringToReplace1,  const char * replaceWith1,
+          const char * stringToReplace2, const char * replaceWith2);
+
+size_t doSingleReplace(char * dest, size_t destLength, const char * source,
+               const char * stringToReplace, const char * replaceWith);
diff --git a/test_conformance/workgroups/CMakeLists.txt b/test_conformance/workgroups/CMakeLists.txt
new file mode 100644
index 00000000..ffef9383
--- /dev/null
+++ b/test_conformance/workgroups/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(MODULE_NAME WORKGROUPS)
+
+set(${MODULE_NAME}_SOURCES
+    main.c
+    test_wg_all.c
+    test_wg_any.c
+    test_wg_broadcast.c
+    test_wg_reduce.c
+    test_wg_reduce_max.c
+    test_wg_reduce_min.c
+    test_wg_scan_exclusive_add.c
+    test_wg_scan_exclusive_min.c
+    test_wg_scan_exclusive_max.c
+    test_wg_scan_inclusive_add.c
+    test_wg_scan_inclusive_min.c
+    test_wg_scan_inclusive_max.c
+    ../../test_common/harness/errorHelpers.c
+    ../../test_common/harness/testHarness.c
+    ../../test_common/harness/kernelHelpers.c
+    ../../test_common/harness/mt19937.c
+    ../../test_common/harness/msvc9.c
+    ../../test_common/harness/conversions.c
+    ../../test_common/harness/parseParameters.cpp
+)
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/workgroups/Makefile b/test_conformance/workgroups/Makefile
new file mode 100644
index 00000000..6f9d0a04
--- /dev/null
+++ b/test_conformance/workgroups/Makefile
@@ -0,0 +1,50 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c \
+		  test_wg_all.c \
+		  test_wg_any.c \
+		  test_wg_reduce.c \
+		  test_wg_reduce_min.c \
+		  test_wg_reduce_max.c \
+		  test_wg_scan_inclusive_add.c \
+		  test_wg_scan_exclusive_add.c \
+		  test_wg_broadcast.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+		  ../../test_common/harness/conversions.c \
+		  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/kernelHelpers.c
+		  
+DEFINES = 
+
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_workgroup_fns
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_conformance/workgroups/main.c b/test_conformance/workgroups/main.c
new file mode 100644
index 00000000..0365979c
--- /dev/null
+++ b/test_conformance/workgroups/main.c
@@ -0,0 +1,70 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "procs.h"
+#include "../../test_common/harness/testHarness.h"
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+basefn    basefn_list[] = {
+            test_work_group_all,
+            test_work_group_any,
+            test_work_group_reduce_add,
+            test_work_group_reduce_min,
+            test_work_group_reduce_max,
+            test_work_group_scan_inclusive_add,
+            test_work_group_scan_inclusive_min,
+            test_work_group_scan_inclusive_max,
+            test_work_group_scan_exclusive_add,
+            test_work_group_scan_exclusive_min,
+            test_work_group_scan_exclusive_max,
+            test_work_group_broadcast_1D,
+            test_work_group_broadcast_2D,
+            test_work_group_broadcast_3D,
+};
+
+
+const char    *basefn_names[] = {
+            "work_group_all",
+            "work_group_any",
+            "work_group_reduce_add",
+            "work_group_reduce_min",
+            "work_group_reduce_max",
+            "work_group_scan_inclusive_add",
+            "work_group_scan_inclusive_min",
+            "work_group_scan_inclusive_max",
+            "work_group_scan_exclusive_add",
+            "work_group_scan_exclusive_min",
+            "work_group_scan_exclusive_max",
+            "work_group_broadcast_1D",
+            "work_group_broadcast_2D",
+            "work_group_broadcast_3D",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int    num_fns = sizeof(basefn_names) / sizeof(char *);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
+}
+
+
diff --git a/test_conformance/workgroups/procs.h b/test_conformance/workgroups/procs.h
new file mode 100644
index 00000000..0f919148
--- /dev/null
+++ b/test_conformance/workgroups/procs.h
@@ -0,0 +1,38 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+
+extern int test_work_group_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_any(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_broadcast_1D(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_broadcast_2D(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_broadcast_3D(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_reduce_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_reduce_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_reduce_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_work_group_scan_exclusive_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_scan_exclusive_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_scan_exclusive_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_scan_inclusive_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_scan_inclusive_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_work_group_scan_inclusive_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/workgroups/testBase.h b/test_conformance/workgroups/testBase.h
new file mode 100644
index 00000000..5073b21f
--- /dev/null
+++ b/test_conformance/workgroups/testBase.h
@@ -0,0 +1,31 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _testBase_h
+#define _testBase_h
+
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#endif // _testBase_h
+
+
+
diff --git a/test_conformance/workgroups/test_wg_all.c b/test_conformance/workgroups/test_wg_all.c
new file mode 100644
index 00000000..da775d6b
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_all.c
@@ -0,0 +1,167 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_all_kernel_code =
+"__kernel void test_wg_all(global float *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_all((input[tid] > input[tid+1]));\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_all(float *inptr, int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        int predicate_all = 0xFFFFFFFF;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if (!(inptr[i+j] > inptr[i+j+1]))
+            {
+                predicate_all = 0;
+                break;
+            }
+        }
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( (predicate_all && (outptr[i+j] == 0)) ||
+                 ((predicate_all == 0) && outptr[i+j]) )
+            {
+                log_info("work_group_all: Error at %lu: expected = %d, got = %d\n", i+j, predicate_all, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int
+test_work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_all_kernel_code, "test_wg_all", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * (num_elements+1), NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<(num_elements+1); i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*(num_elements+1), (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_all(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_all test failed\n");
+        return -1;
+    }
+    log_info("work_group_all test passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_any.c b/test_conformance/workgroups/test_wg_any.c
new file mode 100644
index 00000000..e843a5b3
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_any.c
@@ -0,0 +1,167 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_any_kernel_code =
+"__kernel void test_wg_any(global float *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_any((input[tid] > input[tid+1]));\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_any(float *inptr, int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        int predicate_any = 0x0;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if (inptr[i+j] > inptr[i+j+1])
+            {
+                predicate_any = 0xFFFFFFFF;
+                break;
+            }
+        }
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( (predicate_any && (outptr[i+j] == 0)) ||
+                 ((predicate_any == 0) && outptr[i+j]) )
+            {
+                log_info("work_group_any: Error at %lu: expected = %d, got = %d\n", i+j, predicate_any, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int
+test_work_group_any(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_any_kernel_code, "test_wg_any", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * (num_elements+1), NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<(num_elements+1); i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*(num_elements+1), (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_any(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_any test failed\n");
+        return -1;
+    }
+    log_info("work_group_any test passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_broadcast.c b/test_conformance/workgroups/test_wg_broadcast.c
new file mode 100644
index 00000000..6a49aa21
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_broadcast.c
@@ -0,0 +1,523 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_broadcast_1D_kernel_code =
+"__kernel void test_wg_broadcast_1D(global float *input, global float *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    float result = work_group_broadcast(input[tid], get_group_id(0) % get_local_size(0));\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_broadcast_2D_kernel_code =
+"__kernel void test_wg_broadcast_2D(global float *input, global float *output)\n"
+"{\n"
+"    size_t tid_x = get_global_id(0);\n"
+"    size_t tid_y = get_global_id(1);\n"
+"    size_t x = get_group_id(0) % get_local_size(0);\n"
+"    size_t y = get_group_id(1) % get_local_size(1);\n"
+"\n"
+"    size_t indx = (tid_y * get_global_size(0)) + tid_x;\n"
+"    float result = work_group_broadcast(input[indx], x, y);\n"
+"    output[indx] = result;\n"
+"}\n";
+
+const char *wg_broadcast_3D_kernel_code =
+"__kernel void test_wg_broadcast_3D(global float *input, global float *output)\n"
+"{\n"
+"    size_t tid_x = get_global_id(0);\n"
+"    size_t tid_y = get_global_id(1);\n"
+"    size_t tid_z = get_global_id(2);\n"
+"    size_t x = get_group_id(0) % get_local_size(0);\n"
+"    size_t y = get_group_id(1) % get_local_size(1);\n"
+"    size_t z = get_group_id(2) % get_local_size(2);\n"
+"\n"
+"    size_t indx = (tid_z * get_global_size(1) * get_global_size(0)) + (tid_y * get_global_size(0)) + tid_x;\n"
+"    float result = work_group_broadcast(input[indx], x, y, z);\n"
+"    output[indx] = result;\n"
+"}\n";
+
+static int
+verify_wg_broadcast_1D(float *inptr, float *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+    size_t     group_id;
+
+    for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
+    {
+        int local_size = (n-i) > wg_size ? wg_size : (n-i);
+        float broadcast_result = inptr[i + (group_id % local_size)];
+        for (j=0; j<local_size; j++)
+        {
+            if ( broadcast_result != outptr[i+j] )
+            {
+                log_info("work_group_broadcast: Error at %u: expected = %f, got = %f\n", i+j, broadcast_result, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_broadcast_2D(float *inptr, float *outptr, size_t nx, size_t ny, size_t wg_size_x, size_t wg_size_y)
+{
+    size_t i, j, _i, _j;
+    size_t group_id_x, group_id_y;
+
+    for (i=0,group_id_y=0; i<ny; i+=wg_size_y,group_id_y++)
+    {
+        size_t y = group_id_y % wg_size_y;
+        size_t local_size_y = (ny-i) > wg_size_y ? wg_size_y : (ny-i);
+        for (_i=0; _i < local_size_y; _i++)
+        {
+            for (j=0,group_id_x=0; j<nx; j+=wg_size_x,group_id_x++)
+            {
+                size_t x = group_id_x % wg_size_x;
+                size_t local_size_x = (nx-j) > wg_size_x ? wg_size_x : (nx-j);
+                float  broadcast_result = inptr[(i + y) * nx + (j + x)];
+                for (_j=0; _j < local_size_x; _j++)
+                {
+                    size_t indx = (i + _i) * nx + (j + _j);
+                    if ( broadcast_result != outptr[indx] )
+                    {
+                        log_info("work_group_broadcast: Error at (%u, %u): expected = %f, got = %f\n", j+_j, i+_i, broadcast_result, outptr[indx]);
+                        return -1;
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_broadcast_3D(float *inptr, float *outptr, size_t nx, size_t ny, size_t nz, size_t wg_size_x, size_t wg_size_y, size_t wg_size_z)
+{
+    size_t i, j, k, _i, _j, _k;
+    size_t group_id_x, group_id_y, group_id_z;
+
+    for (i=0,group_id_z=0; i<nz; i+=wg_size_z,group_id_z++)
+    {
+        size_t z = group_id_z % wg_size_z;
+        size_t local_size_z = (nz-i) > wg_size_z ? wg_size_z : (nz-i);
+        for (_i=0; _i < local_size_z; _i++)
+        {
+            for (j=0,group_id_y=0; j<ny; j+=wg_size_y,group_id_y++)
+            {
+                size_t y = group_id_y % wg_size_y;
+                size_t local_size_y = (ny-j) > wg_size_y ? wg_size_y : (ny-j);
+                for (_j=0; _j < local_size_y; _j++)
+                {
+                    for (k=0,group_id_x=0; k<nx; k+=wg_size_x,group_id_x++)
+                    {
+                        size_t x = group_id_x % wg_size_x;
+                        size_t local_size_x = (nx-k) > wg_size_x ? wg_size_x : (nx-k);
+                        float  broadcast_result = inptr[(i + z) * ny * nz + (j + y) * nx + (k + x)];
+                        for (_k=0; _k < local_size_x; _k++)
+                        {
+                            size_t indx = (i + _i) * ny * nx + (j + _j) * nx + (k + _k);
+                            if ( broadcast_result != outptr[indx] )
+                            {
+                                log_info("work_group_broadcast: Error at (%u, %u, %u): expected = %f, got = %f\n", k+_k, j+_j, i+_i, broadcast_result, outptr[indx]);
+                                return -1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+int
+test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *p;
+    cl_float     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       globalsize[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_1D_kernel_code, "test_wg_broadcast_1D", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    globalsize[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, globalsize, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_broadcast_1D(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_broadcast_1D test failed\n");
+        return -1;
+    }
+    log_info("work_group_broadcast_1D test passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *p;
+    cl_float     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       globalsize[2];
+    size_t       localsize[2];
+    size_t       wg_size[1];
+    size_t       num_workgroups;
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_2D_kernel_code, "test_wg_broadcast_2D", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    if (wg_size[0] >= 256)
+    {
+        localsize[0] = localsize[1] = 16;
+    }
+    else if (wg_size[0] >=64)
+    {
+        localsize[0] = localsize[1] = 8;
+    }
+    else if (wg_size[0] >= 16)
+    {
+        localsize[0] = localsize[1] = 4;
+    }
+    else
+    {
+        localsize[0] = localsize[1] = 1;
+    }
+
+    num_workgroups = MAX(n_elems/wg_size[0], 16);
+    globalsize[0] = num_workgroups * localsize[0];
+    globalsize[1] = num_workgroups * localsize[1];
+    num_elements = globalsize[0] * globalsize[1];
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, globalsize, localsize, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_broadcast_2D(input_ptr[0], output_ptr, globalsize[0], globalsize[1], localsize[0], localsize[1]))
+    {
+        log_error("work_group_broadcast_2D test failed\n");
+        return -1;
+    }
+    log_info("work_group_broadcast_2D test passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *p;
+    cl_float     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       globalsize[3];
+    size_t       localsize[3];
+    size_t       wg_size[1];
+    size_t       num_workgroups;
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_broadcast_3D_kernel_code, "test_wg_broadcast_3D", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    if (wg_size[0] >=512)
+    {
+        localsize[0] = localsize[1] = localsize[2] = 8;
+    }
+    else if (wg_size[0] >= 64)
+    {
+        localsize[0] = localsize[1] = localsize[2] = 4;
+    }
+    else if (wg_size[0] >= 8)
+    {
+        localsize[0] = localsize[1] = localsize[2] = 2;
+    }
+    else
+    {
+        localsize[0] = localsize[1] = localsize[2] = 1;
+    }
+
+    num_workgroups = MAX(n_elems/wg_size[0], 8);
+    globalsize[0] = num_workgroups * localsize[0];
+    globalsize[1] = num_workgroups * localsize[1];
+    globalsize[2] = num_workgroups * localsize[2];
+    num_elements = globalsize[0] * globalsize[1] * globalsize[2];
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    err = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, globalsize, localsize, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_broadcast_3D(input_ptr[0], output_ptr, globalsize[0], globalsize[1], globalsize[2], localsize[0], localsize[1], localsize[2]))
+    {
+        log_error("work_group_broadcast_3D test failed\n");
+        return -1;
+    }
+    log_info("work_group_broadcast_3D test passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_broadcast(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_broadcast_1D(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_broadcast_2D(device, context, queue, n_elems);
+    if (err) return err;
+    return err;
+}
+
+
diff --git a/test_conformance/workgroups/test_wg_reduce.c b/test_conformance/workgroups/test_wg_reduce.c
new file mode 100644
index 00000000..5b7c0f85
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_reduce.c
@@ -0,0 +1,580 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_reduce_add_kernel_code_int =
+"__kernel void test_wg_reduce_add_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_reduce_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_reduce_add_kernel_code_uint =
+"__kernel void test_wg_reduce_add_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_reduce_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_reduce_add_kernel_code_long =
+"__kernel void test_wg_reduce_add_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_reduce_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_reduce_add_kernel_code_ulong =
+"__kernel void test_wg_reduce_add_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_reduce_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_reduce_add_int(int *inptr, int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        int sum = 0;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            sum += inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( sum != outptr[i+j] )
+            {
+                log_info("work_group_reduce_add int: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        unsigned int sum = 0;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            sum += inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( sum != outptr[i+j] )
+            {
+                log_info("work_group_reduce_add uint: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        cl_long sum = 0;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            sum += inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( sum != outptr[i+j] )
+            {
+                log_info("work_group_reduce_add long: Error at %u: expected = %lld, got = %lld\n", i+j, sum, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        cl_ulong sum = 0;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            sum += inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( sum != outptr[i+j] )
+            {
+                log_info("work_group_reduce_add ulong: Error at %u: expected = %llu, got = %llu\n", i+j, sum, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+
+int
+test_work_group_reduce_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_int, "test_wg_reduce_add_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_add int failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_add int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_uint, "test_wg_reduce_add_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_add uint failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_add uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_reduce_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_long, "test_wg_reduce_add_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_add long failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_add long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_add_kernel_code_ulong, "test_wg_reduce_add_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_add ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_add ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_reduce_add_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_add_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_add_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_add_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_reduce_max.c b/test_conformance/workgroups/test_wg_reduce_max.c
new file mode 100644
index 00000000..2af86f85
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_reduce_max.c
@@ -0,0 +1,580 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_reduce_max_kernel_code_int =
+"__kernel void test_wg_reduce_max_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_reduce_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_reduce_max_kernel_code_uint =
+"__kernel void test_wg_reduce_max_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_reduce_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_reduce_max_kernel_code_long =
+"__kernel void test_wg_reduce_max_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_reduce_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_reduce_max_kernel_code_ulong =
+"__kernel void test_wg_reduce_max_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_reduce_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_reduce_max_int(int *inptr, int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        int max = CL_INT_MIN;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            max = (max > inptr[i+j]) ? max : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( max != outptr[i+j] )
+            {
+                log_info("work_group_reduce_max int: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        unsigned int max = 0;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            max = (max > inptr[i+j]) ? max : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( max != outptr[i+j] )
+            {
+                log_info("work_group_reduce_max uint: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        cl_long max = CL_LONG_MIN;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            max = (max > inptr[i+j]) ? max : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( max != outptr[i+j] )
+            {
+                log_info("work_group_reduce_max long: Error at %u: expected = %lld, got = %lld\n", i+j, max, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        cl_ulong max = 0;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            max = (max > inptr[i+j]) ? max : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( max != outptr[i+j] )
+            {
+                log_info("work_group_reduce_max ulong: Error at %u: expected = %llu, got = %llu\n", i+j, max, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+
+int
+test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_int, "test_wg_reduce_max_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_max int failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_max int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_uint, "test_wg_reduce_max_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_max uint failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_max uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_long, "test_wg_reduce_max_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_max long failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_max long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_max_kernel_code_ulong, "test_wg_reduce_max_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_max ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_max ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_reduce_max_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_max_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_max_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_max_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_reduce_min.c b/test_conformance/workgroups/test_wg_reduce_min.c
new file mode 100644
index 00000000..7b302201
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_reduce_min.c
@@ -0,0 +1,580 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_reduce_min_kernel_code_int =
+"__kernel void test_wg_reduce_min_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_reduce_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_reduce_min_kernel_code_uint =
+"__kernel void test_wg_reduce_min_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_reduce_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_reduce_min_kernel_code_long =
+"__kernel void test_wg_reduce_min_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_reduce_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_reduce_min_kernel_code_ulong =
+"__kernel void test_wg_reduce_min_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_reduce_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_reduce_min_int(int *inptr, int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        int min = CL_INT_MAX;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            min = (min < inptr[i+j]) ? min : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( min != outptr[i+j] )
+            {
+                log_info("work_group_reduce_min int: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        unsigned int min = CL_UINT_MAX;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            min = (min < inptr[i+j]) ? min : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( min != outptr[i+j] )
+            {
+                log_info("work_group_reduce_min uint: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        cl_long min = CL_ULONG_MAX;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            min = (min < inptr[i+j]) ? min : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( min != outptr[i+j] )
+            {
+                log_info("work_group_reduce_min long: Error at %u: expected = %lld, got = %lld\n", i+j, min, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_reduce_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
+{
+    size_t     i, j;
+
+    for (i=0; i<n; i+=wg_size)
+    {
+        cl_ulong min = CL_ULONG_MAX;
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+            min = (min < inptr[i+j]) ? min : inptr[i+j];
+
+        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
+        {
+            if ( min != outptr[i+j] )
+            {
+                log_info("work_group_reduce_min ulong: Error at %u: expected = %llu, got = %llu\n", i+j, min, outptr[i+j]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+
+int
+test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_int, "test_wg_reduce_min_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_min int failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_min int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_uint, "test_wg_reduce_min_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_min uint failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_min uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_long, "test_wg_reduce_min_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_min long failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_min long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_reduce_min_kernel_code_ulong, "test_wg_reduce_min_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_reduce_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_reduce_min ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_reduce_min ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_reduce_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_reduce_min_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_min_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_min_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_reduce_min_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_add.c b/test_conformance/workgroups/test_wg_scan_exclusive_add.c
new file mode 100644
index 00000000..99d9273e
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_add.c
@@ -0,0 +1,588 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_scan_exclusive_add_kernel_code_int =
+"__kernel void test_wg_scan_exclusive_add_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_scan_exclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_exclusive_add_kernel_code_uint =
+"__kernel void test_wg_scan_exclusive_add_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_scan_exclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_scan_exclusive_add_kernel_code_long =
+"__kernel void test_wg_scan_exclusive_add_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_scan_exclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_exclusive_add_kernel_code_ulong =
+"__kernel void test_wg_scan_exclusive_add_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_scan_exclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_scan_exclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+    int s, lasts;
+
+
+
+    for (j = 0; j < n; j += wg_size) {
+        m = n - j;
+        if (m > wg_size) m = wg_size;
+
+        s = 0;
+        lasts = 0;
+        for (i = 0; i < m; ++i) {
+            s += inptr[j + i];
+            if (outptr[j + i] != lasts) {
+                log_info("work_group_scan_exclusive_add int: Error at %u: expected = %d, got = %d\n",
+                         (unsigned int)(j + i), lasts, outptr[j + i]);
+                return -1;
+            }
+            lasts = s;
+        }
+    }
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+    unsigned int s, lasts;
+
+    for (j = 0; j < n; j += wg_size) {
+        m = n - j;
+        if (m > wg_size) m = wg_size;
+        s = 0;
+        lasts = 0;
+        for (i = 0; i < m; ++i) {
+            s += inptr[j + i];
+            if (outptr[j + i] != lasts) {
+                log_info("work_group_scan_exclusive_add uint: Error at %u: expected = %u, got = %u\n",
+                        (unsigned int)(j + i), lasts, outptr[j + i]);
+                return -1;
+            }
+            lasts = s;
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+    cl_long s, lasts;
+
+    for (j = 0; j < n; j += wg_size) {
+        m = n - j;
+        if (m > wg_size) m = wg_size;
+        s = 0;
+
+        lasts = 0;
+        for (i = 0; i < m; ++i) {
+            s += inptr[j + i];
+
+            if (outptr[j + i] != lasts) {
+                log_info("work_group_scan_exclusive_add long: Error at %u: expected = %lld, got = %lld\n",
+                         (unsigned int)(j + i), (long long)lasts, (long long)outptr[j + i]);
+                return -1;
+            }
+            lasts = s;
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    cl_ulong s, lasts;
+
+    for (j = 0; j < n; j += wg_size) {
+        m = n - j;
+        if (m > wg_size) m = wg_size;
+
+        s = 0;
+        lasts = 0;
+        for (i = 0; i < m; ++i) {
+            s += inptr[j + i];
+            if (outptr[j + i] != lasts) {
+                log_info("work_group_scan_exclusive_add ulong: Error at %u: expected = %llu, got = %llu\n",
+                         (unsigned int)(j + i), (unsigned long long)lasts, (unsigned long long)outptr[j + i]);
+                return -1;
+            }
+            lasts = s;
+        }
+    }
+    return 0;
+}
+
+
+int
+test_work_group_scan_exclusive_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_int, "test_wg_scan_exclusive_add_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_add int failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_add int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_uint, "test_wg_scan_exclusive_add_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_add uint failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_add uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_scan_exclusive_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_long, "test_wg_scan_exclusive_add_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_add long failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_add long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_add_kernel_code_ulong, "test_wg_scan_exclusive_add_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusiveadd ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_add ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_scan_exclusive_add_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_add_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_add_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_add_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.c b/test_conformance/workgroups/test_wg_scan_exclusive_max.c
new file mode 100644
index 00000000..87a870b9
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_max.c
@@ -0,0 +1,579 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_scan_exclusive_max_kernel_code_int =
+"__kernel void test_wg_scan_exclusive_max_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_scan_exclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_exclusive_max_kernel_code_uint =
+"__kernel void test_wg_scan_exclusive_max_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_scan_exclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_scan_exclusive_max_kernel_code_long =
+"__kernel void test_wg_scan_exclusive_max_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_scan_exclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_exclusive_max_kernel_code_ulong =
+"__kernel void test_wg_scan_exclusive_max_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_scan_exclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_scan_exclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        int max_ = 0x80000000;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_exclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+            max_ = MAX(inptr[j+i], max_);
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        unsigned int max_ = 0x0;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_exclusive_max int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+            max_ = MAX(inptr[j+i], max_);
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        cl_long max_ = 0x8000000000000000ULL;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_exclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+            max_ = MAX(inptr[j+i], max_);
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        cl_ulong max_ = 0x0;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_exclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+            max_ = MAX(inptr[j+i], max_);
+        }
+    }
+
+    return 0;
+}
+
+
+int
+test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_int, "test_wg_scan_exclusive_max_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_max int failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_max int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_uint, "test_wg_scan_exclusive_max_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_max uint failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_max uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_long, "test_wg_scan_exclusive_max_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_max long failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_max long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_max_kernel_code_ulong, "test_wg_scan_exclusive_max_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusiveadd ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_max ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_scan_exclusive_max_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_max_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_max_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_max_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.c b/test_conformance/workgroups/test_wg_scan_exclusive_min.c
new file mode 100644
index 00000000..435a8704
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_min.c
@@ -0,0 +1,580 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_scan_exclusive_min_kernel_code_int =
+"__kernel void test_wg_scan_exclusive_min_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_scan_exclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_exclusive_min_kernel_code_uint =
+"__kernel void test_wg_scan_exclusive_min_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_scan_exclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_scan_exclusive_min_kernel_code_long =
+"__kernel void test_wg_scan_exclusive_min_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_scan_exclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_exclusive_min_kernel_code_ulong =
+"__kernel void test_wg_scan_exclusive_min_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_scan_exclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+
+static int
+verify_wg_scan_exclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        int min_ = 0x7fffffff;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_exclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
+                return -1;
+            }
+            min_ = MIN(inptr[j+i], min_);
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        unsigned int min_ = 0xffffffff;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_exclusive_min int: Error at %u: expected = %u, got = %u\n", j+i, min_, outptr[j+i]);
+                return -1;
+            }
+            min_ = MIN(inptr[j+i], min_);
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        cl_long min_ = 0x7fffffffffffffffULL;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_exclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
+                return -1;
+            }
+            min_ = MIN(inptr[j+i], min_);
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_exclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+     for (j=0; j<n; j+=wg_size) {
+        cl_ulong min_ = 0xffffffffffffffffULL;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_exclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
+                return -1;
+            }
+            min_ = MIN(inptr[j+i], min_);
+        }
+    }
+
+    return 0;
+}
+
+
+int
+test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_int, "test_wg_scan_exclusive_min_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_min int failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_min int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_uint, "test_wg_scan_exclusive_min_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_min uint failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_min uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_long, "test_wg_scan_exclusive_min_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusive_min long failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_min long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_exclusive_min_kernel_code_ulong, "test_wg_scan_exclusive_min_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_exclusive_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_exclusiveadd ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_exclusive_min ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_exclusive_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_scan_exclusive_min_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_min_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_min_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_exclusive_min_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_add.c b/test_conformance/workgroups/test_wg_scan_inclusive_add.c
new file mode 100644
index 00000000..c64d8047
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_add.c
@@ -0,0 +1,577 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_scan_inclusive_add_kernel_code_int =
+"__kernel void test_wg_scan_inclusive_add_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_scan_inclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_inclusive_add_kernel_code_uint =
+"__kernel void test_wg_scan_inclusive_add_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_scan_inclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_scan_inclusive_add_kernel_code_long =
+"__kernel void test_wg_scan_inclusive_add_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_scan_inclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_inclusive_add_kernel_code_ulong =
+"__kernel void test_wg_scan_inclusive_add_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_scan_inclusive_add(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_scan_inclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size)
+{
+    size_t i, j, m;
+    int s;
+
+    for (j=0; j<n; j+=wg_size) {
+    m = n - j;
+    if (m > wg_size)
+        m = wg_size;
+
+    s = 0;
+    for (i=0; i<m; ++i) {
+        s += inptr[j+i];
+        if (outptr[j+i] != s) {
+                log_info("work_group_scan_inclusive_add int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), s, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
+{
+    size_t i, j, m;
+    unsigned int s;
+
+    for (j=0; j<n; j+=wg_size) {
+    m = n - j;
+    if (m > wg_size)
+        m = wg_size;
+
+    s = 0;
+    for (i=0; i<m; ++i) {
+        s += inptr[j+i];
+        if (outptr[j+i] != s) {
+                log_info("work_group_scan_inclusive_add uint: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), s, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
+{
+    size_t i, j, m;
+    cl_long s;
+
+    for (j=0; j<n; j+=wg_size) {
+    m = n - j;
+    if (m > wg_size)
+        m = wg_size;
+
+    s = 0;
+    for (i=0; i<m; ++i) {
+        s += inptr[j+i];
+        if (outptr[j+i] != s) {
+                log_info("work_group_scan_inclusive_add long: Error at %u: expected = %lld, got = %lld\n",
+            (unsigned int)(j+i), (long long)s, (long long)outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
+{
+    size_t i, j, m;
+    cl_ulong s;
+
+    for (j=0; j<n; j+=wg_size) {
+    m = n - j;
+    if (m > wg_size)
+        m = wg_size;
+
+    s = 0;
+    for (i=0; i<m; ++i) {
+        s += inptr[j+i];
+        if (outptr[j+i] != s) {
+                log_info("work_group_scan_inclusive_add int: Error at %u: expected = %llu, got = %llu\n",
+            (unsigned int)(j+i), (unsigned long long)s, (unsigned long long)outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+
+int
+test_work_group_scan_inclusive_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_int, "test_wg_scan_inclusive_add_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_add int failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_add int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_uint, "test_wg_scan_inclusive_add_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_add uint failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_add uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_scan_inclusive_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_long, "test_wg_scan_inclusive_add_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_add long failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_add long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_add_kernel_code_ulong, "test_wg_scan_inclusive_add_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusiveadd ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_add ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_scan_inclusive_add_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_add_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_add_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_add_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.c b/test_conformance/workgroups/test_wg_scan_inclusive_max.c
new file mode 100644
index 00000000..2b209886
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_max.c
@@ -0,0 +1,579 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_scan_inclusive_max_kernel_code_int =
+"__kernel void test_wg_scan_inclusive_max_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_scan_inclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_inclusive_max_kernel_code_uint =
+"__kernel void test_wg_scan_inclusive_max_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_scan_inclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_scan_inclusive_max_kernel_code_long =
+"__kernel void test_wg_scan_inclusive_max_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_scan_inclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_inclusive_max_kernel_code_ulong =
+"__kernel void test_wg_scan_inclusive_max_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_scan_inclusive_max(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_scan_inclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        int max_ = 0x80000000;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            max_ = MAX(inptr[j+i], max_);
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_inclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        unsigned int max_ = 0x0;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            max_ = MAX(inptr[j+i], max_);
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_inclusive_max int: Error at %lu: expected = %u, got = %u\n", (unsigned long)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        cl_long max_ = 0x8000000000000000ULL;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            max_ = MAX(inptr[j+i], max_);
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_inclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        cl_ulong max_ = 0x0;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            max_ = MAX(inptr[j+i], max_);
+            if (outptr[j+i] != max_) {
+                log_info("work_group_scan_inclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+int
+test_work_group_scan_inclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_int, "test_wg_scan_inclusive_max_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_max int failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_max int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_uint, "test_wg_scan_inclusive_max_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_max uint failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_max uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_scan_inclusive_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_long, "test_wg_scan_inclusive_max_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_max long failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_max long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_max_kernel_code_ulong, "test_wg_scan_inclusive_max_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusiveadd ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_max ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_scan_inclusive_max_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_max_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_max_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_max_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.c b/test_conformance/workgroups/test_wg_scan_inclusive_min.c
new file mode 100644
index 00000000..89012076
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_min.c
@@ -0,0 +1,579 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+
+const char *wg_scan_inclusive_min_kernel_code_int =
+"__kernel void test_wg_scan_inclusive_min_int(global int *input, global int *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    int result = work_group_scan_inclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_inclusive_min_kernel_code_uint =
+"__kernel void test_wg_scan_inclusive_min_uint(global uint *input, global uint *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    uint result = work_group_scan_inclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+const char *wg_scan_inclusive_min_kernel_code_long =
+"__kernel void test_wg_scan_inclusive_min_long(global long *input, global long *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    long result = work_group_scan_inclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+const char *wg_scan_inclusive_min_kernel_code_ulong =
+"__kernel void test_wg_scan_inclusive_min_ulong(global ulong *input, global ulong *output)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    ulong result = work_group_scan_inclusive_min(input[tid]);\n"
+"    output[tid] = result;\n"
+"}\n";
+
+
+static int
+verify_wg_scan_inclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        int min_ = 0x7fffffff;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            min_ = MIN(inptr[j+i], min_);
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_inclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        unsigned int min_ = 0xffffffff;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            min_ = MIN(inptr[j+i], min_);
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_inclusive_min int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), min_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+    for (j=0; j<n; j+=wg_size) {
+        cl_long min_ = 0x7fffffffffffffffULL;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            min_ = MIN(inptr[j+i], min_);
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_inclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int
+verify_wg_scan_inclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
+
+    size_t i, j, m;
+
+     for (j=0; j<n; j+=wg_size) {
+        cl_ulong min_ = 0xffffffffffffffffULL;
+
+        m = n - j;
+        if (m > wg_size)
+            m = wg_size;
+
+        for (i = 0; i < m; ++i) {
+            min_ = MIN(inptr[j+i], min_);
+            if (outptr[j+i] != min_) {
+                log_info("work_group_scan_inclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
+int
+test_work_group_scan_inclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_int       *input_ptr[1], *p;
+    cl_int       *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_int, "test_wg_scan_inclusive_min_int", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_min int failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_min int passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_uint      *input_ptr[1], *p;
+    cl_uint      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_uint, "test_wg_scan_inclusive_min_uint", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int32(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_min uint failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_min uint passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+int
+test_work_group_scan_inclusive_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_long      *input_ptr[1], *p;
+    cl_long      *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_long, "test_wg_scan_inclusive_min_long", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_long) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusive_min long failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_min long passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_ulong     *input_ptr[1], *p;
+    cl_ulong     *output_ptr;
+    cl_program   program;
+    cl_kernel    kernel;
+    void         *values[2];
+    size_t       threads[1];
+    size_t       wg_size[1];
+    size_t       num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    err = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &wg_scan_inclusive_min_kernel_code_ulong, "test_wg_scan_inclusive_min_ulong", "-cl-std=CL2.0" );
+    if (err)
+        return -1;
+
+    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
+    if (err)
+        return -1;
+
+    num_elements = n_elems;
+
+    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_ulong) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = genrand_int64(d);
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    // Line below is troublesome...
+    threads[0] = (size_t)n_elems;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    if (verify_wg_scan_inclusive_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
+    {
+        log_error("work_group_scan_inclusiveadd ulong failed\n");
+        return -1;
+    }
+    log_info("work_group_scan_inclusive_min ulong passed\n");
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+int
+test_work_group_scan_inclusive_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    int err;
+
+    err = test_work_group_scan_inclusive_min_int(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_min_uint(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_min_long(device, context, queue, n_elems);
+    if (err) return err;
+    err = test_work_group_scan_inclusive_min_ulong(device, context, queue, n_elems);
+    return err;
+}
+
diff --git a/test_extensions/media_sharing/CMakeLists.txt b/test_extensions/media_sharing/CMakeLists.txt
new file mode 100644
index 00000000..37073ed9
--- /dev/null
+++ b/test_extensions/media_sharing/CMakeLists.txt
@@ -0,0 +1,33 @@
+if(WIN32)
+list(APPEND CLConform_LIBRARIES d3d9.lib dxva2.lib )
+endif(WIN32)
+
+set (MEDIA_SOURCES
+        main.cpp
+        wrappers.cpp
+        utils.cpp
+        test_create_context.cpp
+        test_functions_api.cpp
+        test_functions_kernel.cpp
+        test_get_device_ids.cpp
+        test_interop_sync.cpp
+        test_memory_access.cpp
+        test_other_data_types.cpp
+        ../../test_common/harness/errorHelpers.c
+        ../../test_common/harness/kernelHelpers.c
+        ../../test_common/harness/testHarness.c
+        ../../test_common/harness/rounding_mode.c
+        ../../test_common/harness/typeWrappers.cpp
+        ../../test_common/harness/mt19937.c
+        ../../test_common/harness/parseParameters.cpp
+    )
+
+add_executable(conformance_test_media_surface_sharing
+        ${MEDIA_SOURCES})
+
+set_source_files_properties(
+        ${MEDIA_SOURCES}
+        PROPERTIES LANGUAGE CXX)
+
+TARGET_LINK_LIBRARIES(conformance_test_media_surface_sharing
+        ${CLConform_LIBRARIES})
diff --git a/test_extensions/media_sharing/Makefile b/test_extensions/media_sharing/Makefile
new file mode 100644
index 00000000..b199ac6f
--- /dev/null
+++ b/test_extensions/media_sharing/Makefile
@@ -0,0 +1,34 @@
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.cpp wrappers.cpp utils.cpp test_create_context.cpp test_functions_api.cpp test_functions_kernel.cpp test_get_device_ids.cpp test_interop_sync.cpp test_memory_access.cpp test_other_data_types.cpp ../../test_common/harness/errorHelpers.c ../../test_common/harness/testHarness.c ../../test_common/harness/mt19937.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/typeWrappers.cpp ../../test_common/harness/kernelHelpers.c
+DEFINES = 
+SOURCES = $(abspath $(SRCS))
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = media_sharing
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
+CC = g++ 
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
diff --git a/test_extensions/media_sharing/main.cpp b/test_extensions/media_sharing/main.cpp
new file mode 100644
index 00000000..75fa44b9
--- /dev/null
+++ b/test_extensions/media_sharing/main.cpp
@@ -0,0 +1,222 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../../test_common/harness/testHarness.h"
+#include "utils.h"
+
+extern int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+basefn basefn_list[] = {
+  test_context_create,
+  test_get_device_ids,
+  test_api,
+  test_kernel,
+  test_other_data_types,
+  test_memory_access,
+  test_interop_user_sync,
+};
+
+const char *basefn_names[] = {
+  "test_context_create",
+  "test_get_device_ids",
+  "test_api",
+  "test_kernel",
+  "test_other_data_types",
+  "test_memory_access",
+  "test_interop_user_sync",
+};
+
+ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
+
+int num_fns = sizeof(basefn_names) / sizeof(char *);
+
+clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR = NULL;
+clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR = NULL;
+clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR = NULL;
+clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR = NULL;
+
+cl_platform_id gPlatformIDdetected;
+cl_device_id gDeviceIDdetected;
+cl_device_type gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+
+bool MediaSurfaceSharingExtensionInit()
+{
+  clGetDeviceIDsFromDX9MediaAdapterKHR = (clGetDeviceIDsFromDX9MediaAdapterKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clGetDeviceIDsFromDX9MediaAdapterKHR");
+  if (clGetDeviceIDsFromDX9MediaAdapterKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clGetDeviceIDsFromDX9MediaAdapterKHR) returned NULL.\n");
+    return false;
+  }
+
+  clCreateFromDX9MediaSurfaceKHR = (clCreateFromDX9MediaSurfaceKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clCreateFromDX9MediaSurfaceKHR");
+  if (clCreateFromDX9MediaSurfaceKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clCreateFromDX9MediaSurfaceKHR) returned NULL.\n");
+    return false;
+  }
+
+  clEnqueueAcquireDX9MediaSurfacesKHR = (clEnqueueAcquireDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueAcquireDX9MediaSurfacesKHR");
+  if (clEnqueueAcquireDX9MediaSurfacesKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueAcquireDX9MediaSurfacesKHR) returned NULL.\n");
+    return false;
+  }
+
+  clEnqueueReleaseDX9MediaSurfacesKHR = (clEnqueueReleaseDX9MediaSurfacesKHR_fn)clGetExtensionFunctionAddressForPlatform(gPlatformIDdetected, "clEnqueueReleaseDX9MediaSurfacesKHR");
+  if (clEnqueueReleaseDX9MediaSurfacesKHR == NULL)
+  {
+    log_error("clGetExtensionFunctionAddressForPlatform(clEnqueueReleaseDX9MediaSurfacesKHR) returned NULL.\n");
+    return false;
+  }
+
+  return true;
+}
+
+bool DetectPlatformAndDevice()
+{
+  std::vector<cl_platform_id> platforms;
+  cl_uint platformsNum = 0;
+  cl_int error = clGetPlatformIDs(0, 0, &platformsNum);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetPlatformIDs failed\n");
+    return false;
+  }
+
+  platforms.resize(platformsNum);
+  error = clGetPlatformIDs(platformsNum, &platforms[0], 0);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetPlatformIDs failed\n");
+    return false;
+  }
+
+  bool found = false;
+  for (size_t i = 0; i < platformsNum; ++i)
+  {
+    std::vector<cl_device_id> devices;
+    cl_uint devicesNum = 0;
+    error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, 0, 0, &devicesNum);
+    if (error != CL_SUCCESS)
+    {
+      print_error(error, "clGetDeviceIDs failed\n");
+      return false;
+    }
+
+    devices.resize(devicesNum);
+    error = clGetDeviceIDs(platforms[i], gDeviceTypeSelected, devicesNum, &devices[0], 0);
+    if (error != CL_SUCCESS)
+    {
+      print_error(error, "clGetDeviceIDs failed\n");
+      return false;
+    }
+
+    for (size_t j = 0; j < devicesNum; ++j)
+    {
+      if (ExtensionCheck("cl_khr_dx9_media_sharing", devices[j]))
+      {
+        gPlatformIDdetected = platforms[i];
+        gDeviceIDdetected = devices[j];
+        found = true;
+        break;
+      }
+    }
+  }
+
+  if (!found)
+  {
+    log_info("Test was not run, because the media surface sharing extension is not supported for any devices.\n");
+    return false;
+  }
+
+  return true;
+}
+
+bool CmdlineParse(int argc, const char *argv[])
+{
+  char *env_mode = getenv( "CL_DEVICE_TYPE" );
+  if( env_mode != NULL )
+  {
+    if(strcmp(env_mode, "gpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_GPU") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
+    else if(strcmp(env_mode, "cpu") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_CPU") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
+    else if(strcmp(env_mode, "accelerator") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
+    else if(strcmp(env_mode, "default") == 0 || strcmp(env_mode, "CL_DEVICE_TYPE_DEFAULT") == 0)
+      gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+    else
+    {
+      log_error("Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode);
+      return false;
+    }
+  }
+
+  for (int i = 0; i < argc; ++i)
+  {
+    if(strcmp(argv[i], "gpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_GPU") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_GPU;
+      continue;
+    }
+    else if(strcmp( argv[i], "cpu") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_CPU") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_CPU;
+      continue;
+    }
+    else if(strcmp( argv[i], "accelerator") == 0 || strcmp(argv[i], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_ACCELERATOR;
+      continue;
+    }
+    else if(strcmp(argv[i], "CL_DEVICE_TYPE_DEFAULT") == 0)
+    {
+      gDeviceTypeSelected = CL_DEVICE_TYPE_DEFAULT;
+      continue;
+    }
+    else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0)
+    {
+      CDeviceWrapper::AccelerationType(CDeviceWrapper::ACCELERATION_SW);
+    }
+  }
+
+  return true;
+}
+
+int main(int argc, const char *argv[])
+{
+  if (!CmdlineParse(argc, argv))
+    return 2;
+
+  if (!DetectPlatformAndDevice())
+  {
+    log_info("Test was not run, because the media surface sharing extension is not supported\n");
+    return TEST_NOT_SUPPORTED;
+  }
+
+  if (!MediaSurfaceSharingExtensionInit())
+    return 2;
+
+  return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, true, 0 );
+}
diff --git a/test_extensions/media_sharing/test_create_context.cpp b/test_extensions/media_sharing/test_create_context.cpp
new file mode 100644
index 00000000..42deaa04
--- /dev/null
+++ b/test_extensions/media_sharing/test_create_context.cpp
@@ -0,0 +1,305 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+                   int num_elements, unsigned int width, unsigned int height,
+                   TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType,
+                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  CResult result;
+
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input data
+  std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
+  if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    cl_int error;
+    clContextWrapper ctx;
+    switch(functionCreate)
+    {
+    case CONTEXT_CREATE_DEFAULT:
+      ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+      break;
+    case CONTEXT_CREATE_FROM_TYPE:
+      ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error);
+      break;
+    default:
+      log_error("Unknown context creation function enum\n");
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+      break;
+    }
+
+    if (error != CL_SUCCESS)
+    {
+      std::string functionName;
+      FunctionContextCreateToString(functionCreate, functionName);
+      log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+#else
+    void *surfaceInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    cl_event event;
+    error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()),
+      &memObjList.at(0), 0, NULL, &event);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    cl_uint eventType = 0;
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if(eventType != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR)
+    {
+      log_error("Invalid event != CL_COMMAND_ACQUIRE_DX9_MEDIA_SURFACES_KHR\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    clReleaseEvent(event);
+
+    size_t origin[3] = {0,0,0};
+    size_t offset = 0;
+    size_t frameSize = width * height * 3 / 2;
+    std::vector<cl_uchar> out( frameSize, 0 );
+    for (size_t i = 0; i < memObjList.size(); ++i)
+    {
+      size_t planeWidth = (i == 0) ? width: width / 2;
+      size_t planeHeight = (i == 0) ? height: height / 2;
+      size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+      error = clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, regionPlane, 0, 0, &out.at(offset), 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      offset += planeWidth * planeHeight;
+    }
+
+    if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
+    {
+      log_error("OCL object verification failed - clEnqueueReadImage\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()),
+      &memObjList.at(0), 0, NULL, &event);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    eventType = 0;
+    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clGetEventInfo failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if(eventType != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR)
+    {
+      log_error("Invalid event != CL_COMMAND_RELEASE_DX9_MEDIA_SURFACES_KHR\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    clReleaseEvent(event);
+
+    //object verification
+    std::vector<cl_uchar> bufferOut(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+  }
+
+  if (!deviceWrapper->Status())
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  return result.Result();
+}
+
+int test_context_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  const unsigned int WIDTH = 256;
+  const unsigned int HEIGHT = 256;
+
+  std::vector<cl_dx9_media_adapter_type_khr> adapterTypes;
+#if defined(_WIN32)
+  adapterTypes.push_back(CL_ADAPTER_D3D9_KHR);
+  adapterTypes.push_back(CL_ADAPTER_D3D9EX_KHR);
+  adapterTypes.push_back(CL_ADAPTER_DXVA_KHR);
+#endif
+
+  std::vector<TContextFuncType> contextFuncs;
+  contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
+  contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
+
+  std::vector<TSurfaceFormat> formats;
+  formats.push_back(SURFACE_FORMAT_NV12);
+  formats.push_back(SURFACE_FORMAT_YV12);
+
+  std::vector<TSharedHandleType> sharedHandleTypes;
+  sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
+#if defined(_WIN32)
+  sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
+#endif
+
+  CResult result;
+  for (size_t adapterTypeIdx = 0; adapterTypeIdx < adapterTypes.size(); ++adapterTypeIdx)
+  {
+    //iteration through all create context functions
+    for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx)
+    {
+      //iteration through surface formats
+      for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
+      {
+        //shared handle enabled or disabled
+        for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx)
+        {
+          if (adapterTypes[adapterTypeIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)
+            continue;
+
+          if(context_create(deviceID, context, queue, num_elements, WIDTH, HEIGHT,
+            contextFuncs[contextFuncIdx], adapterTypes[adapterTypeIdx], formats[formatIdx],
+            sharedHandleTypes[sharedHandleIdx]) != 0)
+          {
+            std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle";
+            std::string formatStr;
+            std::string adapterTypeStr;
+            SurfaceFormatToString(formats[formatIdx], formatStr);
+            AdapterToString(adapterTypes[adapterTypeIdx], adapterTypeStr);
+
+            log_error("\nTest case - clCreateContext (%s, %s, %s) failed\n\n", adapterTypeStr.c_str(), formatStr.c_str(), sharedHandle.c_str());
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+        }
+      }
+    }
+  }
+
+  return result.Result();
+}
diff --git a/test_extensions/media_sharing/test_functions_api.cpp b/test_extensions/media_sharing/test_functions_api.cpp
new file mode 100644
index 00000000..21fe403d
--- /dev/null
+++ b/test_extensions/media_sharing/test_functions_api.cpp
@@ -0,0 +1,604 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int api_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                  unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  const unsigned int FRAME_NUM = 2;
+  const cl_uchar MAX_VALUE = 255 / 2;
+  CResult result;
+
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input and expected data
+  std::vector<std::vector<cl_uchar> > bufferRef1(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferRef2(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferRef3(FRAME_NUM);
+  size_t frameSize = width * height * 3 / 2;
+  cl_uchar step = MAX_VALUE / FRAME_NUM;
+  for (size_t i = 0; i < FRAME_NUM; ++i)
+  {
+    if (!YUVGenerate(surfaceFormat, bufferRef1[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1))) ||
+        !YUVGenerate(surfaceFormat, bufferRef2[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.2) ||
+        !YUVGenerate(surfaceFormat, bufferRef3[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.4))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+  }
+
+  //iterates through all devices
+  while (deviceWrapper->AdapterNext())
+  {
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+
+    //create surface
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    cl_int error;
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+#else
+    void *surfaceInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef1[frameIdx % FRAME_NUM], width, height))
+      {
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      { //read operation
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image is different then shared OCL object: clEnqueueReadImage\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //write operation
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+            0, 0, &bufferRef2[frameIdx % FRAME_NUM][offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+      }
+
+      { //read operation
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, Shared OCL image verification after clEnqueueWriteImage failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy operation (shared OCL to OCL)
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          cl_image_format formatPlane;
+          formatPlane.image_channel_data_type = CL_UNORM_INT8;
+          formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R;
+
+          cl_image_desc imageDesc = {0};
+          imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+          imageDesc.image_width = planeWidth;
+          imageDesc.image_height = planeHeight;
+
+          clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_READ_WRITE, &formatPlane, &imageDesc, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clCreateImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueCopyImage(cmdQueue, memObjList[i], planeOCL, origin, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueReadImage(cmdQueue, planeOCL, CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from shared OCL to OCL) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy operation (OCL to shared OCL)
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth) * sizeof(cl_uchar);
+
+          cl_image_format formatPlane;
+          formatPlane.image_channel_data_type = CL_UNORM_INT8;
+          formatPlane.image_channel_order = (surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? CL_RG: CL_R;
+
+          cl_image_desc imageDesc = {0};
+          imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+          imageDesc.image_width = planeWidth;
+          imageDesc.image_height = planeHeight;
+          imageDesc.image_row_pitch = pitchSize;
+
+          clMemWrapper planeOCL = clCreateImage(ctx, CL_MEM_COPY_HOST_PTR, &formatPlane, &imageDesc, &bufferRef1[frameIdx % FRAME_NUM][offset], &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clCreateImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueCopyImage(cmdQueue, planeOCL, memObjList[i], origin, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyImage (from OCL to shared OCL) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy from image to buffer
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        size_t bufferSize = sizeof(cl_uchar) * frameSize;
+        clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueCopyImageToBuffer(cmdQueue, memObjList[i], buffer, origin, regionPlane, offset, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyImageToBuffer failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight * sizeof(cl_uchar);
+        }
+
+        std::vector<cl_uchar> out( frameSize, 0 );
+        error = clEnqueueReadBuffer( cmdQueue, buffer, CL_TRUE, 0, bufferSize, &out[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL buffer verification after clEnqueueCopyImageToBuffer (from shared OCL image to OCL buffer) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //copy buffer to image
+        size_t bufferSize = sizeof(cl_uchar) * frameSize;
+        clMemWrapper buffer = clCreateBuffer( ctx, CL_MEM_COPY_HOST_PTR, bufferSize, &bufferRef2[frameIdx % FRAME_NUM][0], &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueCopyBufferToImage(cmdQueue, buffer, memObjList[i], offset, origin, regionPlane, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueCopyBufferToImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight * sizeof(cl_uchar);
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, OCL image verification after clEnqueueCopyBufferToImage (from OCL buffer to shared OCL image) failed\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //map operation to read
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        std::vector<cl_uchar> out( frameSize, 0 );
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth);
+
+          size_t rowPitch = 0;
+          size_t slicePitch = 0;
+          void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_READ, origin, regionPlane,
+            &rowPitch, &slicePitch, 0, 0, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          for (size_t y = 0; y < planeHeight; ++y)
+            memcpy(&out[offset + y * pitchSize], static_cast<cl_uchar *>(mapPtr) + y * rowPitch / sizeof(cl_uchar), pitchSize * sizeof(cl_uchar));
+
+          error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += pitchSize * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef2[frameIdx % FRAME_NUM], width, height))
+        {
+          log_error("Frame idx: %i, Mapped shared OCL image is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //map operation to write
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+          size_t pitchSize = ((surfaceFormat == SURFACE_FORMAT_NV12 && i > 0)? width: planeWidth);
+
+          size_t rowPitch = 0;
+          size_t slicePitch = 0;
+          void *mapPtr = clEnqueueMapImage(cmdQueue, memObjList[i], CL_TRUE, CL_MAP_WRITE, origin, regionPlane,
+            &rowPitch, &slicePitch, 0, 0, 0, &error);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          for (size_t y = 0; y < planeHeight; ++y)
+            memcpy(static_cast<cl_uchar *>(mapPtr) + y * rowPitch / sizeof(cl_uchar), &bufferRef3[frameIdx % FRAME_NUM][offset + y * pitchSize], pitchSize * sizeof(cl_uchar));
+
+          error = clEnqueueUnmapMemObject(cmdQueue, memObjList[i], mapPtr, 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += pitchSize * planeHeight;
+        }
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      std::vector<cl_uchar> bufferOut(frameSize, 0);
+      if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+      {
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      if (!YUVCompare(surfaceFormat, bufferOut, bufferRef3[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, media surface is different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (!deviceWrapper->Status())
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  return result.Result();
+}
+
+int test_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 3, 512, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+  if(api_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+  if(api_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(api_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}
diff --git a/test_extensions/media_sharing/test_functions_kernel.cpp b/test_extensions/media_sharing/test_functions_kernel.cpp
new file mode 100644
index 00000000..eff297dc
--- /dev/null
+++ b/test_extensions/media_sharing/test_functions_kernel.cpp
@@ -0,0 +1,433 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+
+#include "utils.h"
+
+int kernel_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                     unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                     TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  const unsigned int FRAME_NUM = 2;
+  const cl_uchar MAX_VALUE = 255 / 2;
+  const std::string PROGRAM_STR =
+    "__kernel void TestFunction( read_only image2d_t planeIn, write_only image2d_t planeOut, "
+    NL "                            sampler_t sampler, __global int *planeRes)"
+    NL "{"
+    NL "  int w = get_global_id(0);"
+    NL "  int h = get_global_id(1);"
+    NL "  int width = get_image_width(planeIn);"
+    NL "  int height = get_image_height(planeOut);"
+    NL "  float4 color0 = read_imagef(planeIn, sampler, (int2)(w,h)) + 0.2f;"
+    NL "  float4 color1 = read_imagef(planeIn, sampler, (float2)(w,h)) + 0.2f;"
+    NL "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);"
+    NL "  write_imagef(planeOut, (int2)(w,h), color0);"
+    NL "  if(w == 0 && h == 0)"
+    NL "  {"
+    NL "    planeRes[0] = width;"
+    NL "    planeRes[1] = height;"
+    NL "  }"
+    NL "}";
+
+  CResult result;
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  std::vector<std::vector<cl_uchar> > bufferIn(FRAME_NUM);
+  std::vector<std::vector<cl_uchar> > bufferExp(FRAME_NUM);
+  size_t frameSize = width * height * 3 / 2;
+  cl_uchar step = MAX_VALUE / FRAME_NUM;
+  for (size_t i = 0; i < FRAME_NUM; ++i)
+  {
+    if (!YUVGenerate(surfaceFormat, bufferIn[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1))) ||
+        !YUVGenerate(surfaceFormat, bufferExp[i], width, height, static_cast<cl_uchar>(step * i), static_cast<cl_uchar>(step * (i + 1)), 0.2))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSrcHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceSrc;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    void *objectDstHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceDst;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    cl_int error;
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfoSrc;
+    surfaceInfoSrc.resource = *(static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
+    surfaceInfoSrc.shared_handle = objectSrcHandle;
+
+    cl_dx9_surface_info_khr surfaceInfoDst;
+    surfaceInfoDst.resource = *(static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
+    surfaceInfoDst.shared_handle = objectDstHandle;
+#else
+    void *surfaceInfoSrc = 0;
+    void *surfaceInfoDst = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjSrcList;
+    std::vector<cl_mem> memObjDstList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planeSrcList(planesNum);
+    std::vector<clMemWrapper> planeDstList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planeSrcList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoSrc, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjSrcList.push_back(planeSrcList[planeIdx]);
+
+      planeDstList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfoDst, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjDstList.push_back(planeDstList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjSrcList, width, height, surfaceSrc, objectSrcHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      if (!YUVSurfaceSet(surfaceFormat, surfaceSrc, bufferIn[frameIdx % FRAME_NUM], width, height))
+      {
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjDstList.size()), &memObjDstList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+      if(error != CL_SUCCESS)
+      {
+        log_error("Unable to create sampler\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      clProgramWrapper program;
+      clKernelWrapper kernel;
+      const char *progPtr = PROGRAM_STR.c_str();
+      if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction"))
+        result.ResultSub(CResult::TEST_FAIL);
+
+      size_t bufferSize = sizeof(cl_int) * 2;
+      clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+      std::vector<cl_uchar> out( frameSize, 0 );
+      for (size_t i = 0; i < memObjSrcList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+        size_t threads[ 2 ] = { planeWidth, planeHeight };
+
+        error = clSetKernelArg( kernel, 0, sizeof( memObjSrcList[i] ), &memObjSrcList[i] );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( memObjDstList[i] ), &memObjDstList[i] );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t localThreads[ 2 ];
+        error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to get work group size to use" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to execute test kernel" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        std::vector<cl_uint> imageResOut(2, 0);
+        error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[0] != planeWidth)
+        {
+          log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], planeWidth);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[1] != planeHeight)
+        {
+          log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], planeHeight);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clEnqueueReadImage(cmdQueue, memObjDstList[i], CL_TRUE, origin, regionPlane, 0, 0, &out[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        offset += planeWidth * planeHeight;
+      }
+
+      if (!YUVCompare(surfaceFormat, out, bufferExp[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, OCL objects are different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjSrcList.size()), &memObjSrcList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjDstList.size()), &memObjDstList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      std::vector<cl_uchar> bufferOut(frameSize, 0);
+      if (!YUVSurfaceGet(surfaceFormat, surfaceDst, bufferOut, width, height))
+      {
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      if (!YUVCompare(surfaceFormat, bufferOut, bufferExp[frameIdx % FRAME_NUM], width, height))
+      {
+        log_error("Frame idx: %i, media surface is different than expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (!deviceWrapper->Status())
+  {
+    std::string adapter;
+    AdapterToString(adapterType, adapter);
+    log_error("%s init failed\n", adapter.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  return result.Result();
+}
+
+int test_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 3, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+  if(kernel_functions(deviceID, context, queue, num_elements, 5, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 7, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 15, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+  if(kernel_functions(deviceID, context, queue, num_elements, 20, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 40, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 5, 512, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(kernel_functions(deviceID, context, queue, num_elements, 2, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}
diff --git a/test_extensions/media_sharing/test_get_device_ids.cpp b/test_extensions/media_sharing/test_get_device_ids.cpp
new file mode 100644
index 00000000..68fdbf74
--- /dev/null
+++ b/test_extensions/media_sharing/test_get_device_ids.cpp
@@ -0,0 +1,183 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                   cl_dx9_media_adapter_type_khr adapterType)
+{
+  CResult result;
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  cl_uint devicesExpectedNum = 0;
+  cl_int error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, 0, 0, &devicesExpectedNum);
+  if (error != CL_SUCCESS || devicesExpectedNum < 1)
+  {
+    log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  std::vector<cl_device_id> devicesExpected(devicesExpectedNum);
+  error = clGetDeviceIDs(gPlatformIDdetected, CL_DEVICE_TYPE_ALL, devicesExpectedNum, &devicesExpected[0], 0);
+  if (error != CL_SUCCESS)
+  {
+    log_error("clGetDeviceIDs failed: %s\n", IGetErrorString(error));
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    std::vector<cl_dx9_media_adapter_type_khr> mediaAdapterTypes;
+    mediaAdapterTypes.push_back(adapterType);
+
+    std::vector<void *> mediaDevices;
+    mediaDevices.push_back(deviceWrapper->Device());
+
+    cl_uint devicesAllNum = 0;
+    error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+      CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesAllNum);
+    if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
+    {
+      log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    std::vector<cl_device_id> devicesAll;
+    if (devicesAllNum > 0)
+    {
+      devicesAll.resize(devicesAllNum);
+       error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+        CL_ALL_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesAllNum, &devicesAll[0], 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+
+    cl_uint devicesPreferredNum = 0;
+    error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+      CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, 0, 0, &devicesPreferredNum);
+    if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND)
+    {
+      log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    std::vector<cl_device_id> devicesPreferred;
+    if (devicesPreferredNum > 0)
+    {
+      devicesPreferred.resize(devicesPreferredNum);
+      error = clGetDeviceIDsFromDX9MediaAdapterKHR(gPlatformIDdetected, 1, &mediaAdapterTypes[0], &mediaDevices[0],
+        CL_PREFERRED_DEVICES_FOR_DX9_MEDIA_ADAPTER_KHR, devicesPreferredNum, &devicesPreferred[0], 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clGetDeviceIDsFromDX9MediaAdapterKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+
+    if (devicesAllNum < devicesPreferredNum)
+    {
+      log_error("Invalid number of preferred devices. It should be a subset of all devices\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (cl_uint i = 0; i < devicesPreferredNum; ++i)
+    {
+      cl_uint j = 0;
+      for (; j < devicesAllNum; ++j)
+      {
+        if (devicesPreferred[i] == devicesAll[j])
+          break;
+      }
+
+      if (j == devicesAllNum)
+      {
+        log_error("Preferred device is not a subset of all devices\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    for (cl_uint i = 0; i < devicesAllNum; ++i)
+    {
+      cl_uint j = 0;
+      for (; j < devicesExpectedNum; ++j)
+      {
+        if (devicesAll[i] == devicesExpected[j])
+          break;
+      }
+
+      if (j == devicesExpectedNum)
+      {
+        log_error("CL_ALL_DEVICES_FOR_MEDIA_ADAPTER_KHR should be a subset of all devices for selected platform\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (!deviceWrapper->Status())
+  {
+    std::string adapter;
+    AdapterToString(adapterType, adapter);
+    log_error("%s init failed\n", adapter.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  return result.Result();
+}
+
+int test_get_device_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9_KHR) != 0)
+  {
+    log_error("\nTest case (D3D9) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_D3D9EX_KHR) != 0)
+  {
+    log_error("\nTest case (D3D9EX) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(get_device_ids(deviceID, context, queue, num_elements, CL_ADAPTER_DXVA_KHR) != 0)
+  {
+    log_error("\nTest case (DXVA) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}
diff --git a/test_extensions/media_sharing/test_interop_sync.cpp b/test_extensions/media_sharing/test_interop_sync.cpp
new file mode 100644
index 00000000..d15e4fbf
--- /dev/null
+++ b/test_extensions/media_sharing/test_interop_sync.cpp
@@ -0,0 +1,342 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+                   int num_elements, unsigned int width, unsigned int height,
+                   TContextFuncType functionCreate, cl_dx9_media_adapter_type_khr adapterType,
+                   TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle, cl_bool userSync)
+{
+  CResult result;
+
+  //create device
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input data
+  std::vector<cl_uchar> bufferIn(width * height * 3 / 2, 0);
+  if(!YUVGenerate(surfaceFormat, bufferIn, width, height, 0, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    if (surfaceFormat != SURFACE_FORMAT_NV12 &&
+      !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string syncStr = (userSync == CL_TRUE) ? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s, user sync: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str(), syncStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      CL_CONTEXT_INTEROP_USER_SYNC, userSync,
+      0,
+    };
+
+    cl_int error;
+    clContextWrapper ctx;
+    switch(functionCreate)
+    {
+    case CONTEXT_CREATE_DEFAULT:
+      ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+      break;
+    case CONTEXT_CREATE_FROM_TYPE:
+      ctx = clCreateContextFromType(&contextProperties[0], gDeviceTypeSelected, NULL, NULL, &error);
+      break;
+    default:
+      log_error("Unknown context creation function enum\n");
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+      break;
+    }
+
+    if (error != CL_SUCCESS)
+    {
+      std::string functionName;
+      FunctionContextCreateToString(functionCreate, functionName);
+      log_error("%s failed: %s\n", functionName.c_str(), IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferIn, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceInfo;
+    surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+    surfaceInfo.shared_handle = objectSharedHandle;
+#else
+    void *surfaceInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    std::vector<cl_mem> memObjList;
+    unsigned int planesNum = PlanesNum(surfaceFormat);
+    std::vector<clMemWrapper> planesList(planesNum);
+    for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+    {
+      planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clCreateFromDX9MediaSurfaceKHR failed for plane %i: %s\n", planeIdx, IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+      memObjList.push_back(planesList[planeIdx]);
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!ImageInfoVerify(adapterType, memObjList, width, height, surface, objectSharedHandle))
+    {
+      log_error("Image info verification failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if (userSync == CL_TRUE)
+    {
+ #if defined(_WIN32)
+      IDirect3DQuery9* eventQuery = NULL;
+      switch (adapterType)
+      {
+      case CL_ADAPTER_D3D9_KHR:
+        {
+          LPDIRECT3DDEVICE9 device = (LPDIRECT3DDEVICE9)deviceWrapper->Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      case CL_ADAPTER_D3D9EX_KHR:
+        {
+          LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)deviceWrapper->Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      case CL_ADAPTER_DXVA_KHR:
+        {
+          CDXVAWrapper *DXVADevice = dynamic_cast<CDXVAWrapper *>(&(*deviceWrapper));
+          LPDIRECT3DDEVICE9EX device = (LPDIRECT3DDEVICE9EX)(DXVADevice->D3D9()).Device();
+          device->CreateQuery(D3DQUERYTYPE_EVENT, &eventQuery);
+          eventQuery->Issue(D3DISSUE_END);
+
+          while (S_FALSE == eventQuery->GetData(NULL, 0, D3DGETDATA_FLUSH))
+            ;
+        }
+        break;
+      default:
+        log_error("Unknown adapter type\n");
+        return false;
+        break;
+      }
+      if(eventQuery)
+      {
+          eventQuery->Release();
+      }
+#else
+      return TEST_NOT_IMPLEMENTED;
+#endif
+    }
+
+    error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList.at(0), 0, 0, 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    size_t origin[3] = {0,0,0};
+    size_t offset = 0;
+    size_t frameSize = width * height * 3 / 2;
+    std::vector<cl_uchar> out( frameSize, 0 );
+    for (size_t i = 0; i < memObjList.size(); ++i)
+    {
+      size_t planeWidth = (i == 0) ? width: width / 2;
+      size_t planeHeight = (i == 0) ? height: height / 2;
+      size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+      error = clEnqueueReadImage(cmdQueue, memObjList.at(i), CL_TRUE, origin, regionPlane, 0, 0, &out.at(offset), 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      offset += planeWidth * planeHeight;
+    }
+
+    if (!YUVCompare(surfaceFormat, out, bufferIn, width, height))
+    {
+      log_error("OCL object verification failed - clEnqueueReadImage\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList.at(0), 0, 0, 0);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if (userSync == CL_TRUE)
+    {
+      error = clFinish(cmdQueue);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clFinish failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    //shared object verification
+    std::vector<cl_uchar> bufferOut(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut, bufferIn, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+  }
+
+  if (!deviceWrapper->Status())
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  return result.Result();
+}
+
+int test_interop_user_sync(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  const unsigned int WIDTH = 256;
+  const unsigned int HEIGHT = 256;
+
+  std::vector<cl_dx9_media_adapter_type_khr> adapters;
+#if defined(_WIN32)
+  adapters.push_back(CL_ADAPTER_D3D9_KHR);
+  adapters.push_back(CL_ADAPTER_D3D9EX_KHR);
+  adapters.push_back(CL_ADAPTER_DXVA_KHR);
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  std::vector<TContextFuncType> contextFuncs;
+  contextFuncs.push_back(CONTEXT_CREATE_DEFAULT);
+  contextFuncs.push_back(CONTEXT_CREATE_FROM_TYPE);
+
+  std::vector<TSurfaceFormat> formats;
+  formats.push_back(SURFACE_FORMAT_NV12);
+  formats.push_back(SURFACE_FORMAT_YV12);
+
+  std::vector<TSharedHandleType> sharedHandleTypes;
+  sharedHandleTypes.push_back(SHARED_HANDLE_DISABLED);
+  sharedHandleTypes.push_back(SHARED_HANDLE_ENABLED);
+
+  std::vector<cl_bool> sync;
+  sync.push_back(CL_FALSE);
+  sync.push_back(CL_TRUE);
+
+  CResult result;
+  for (size_t adapterIdx = 0; adapterIdx < adapters.size(); ++adapterIdx)
+  {
+    //iteration through all create context functions
+    for (size_t contextFuncIdx = 0; contextFuncIdx < contextFuncs.size(); ++contextFuncIdx)
+    {
+      //iteration through YUV formats
+      for (size_t formatIdx = 0; formatIdx < formats.size(); ++formatIdx)
+      {
+        //shared handle enabled or disabled
+        for (size_t sharedHandleIdx = 0; sharedHandleIdx < sharedHandleTypes.size(); ++sharedHandleIdx)
+        {
+          //user sync interop disabled or enabled
+          for (size_t syncIdx = 0; syncIdx < sync.size(); ++syncIdx)
+          {
+            if (adapters[adapterIdx] == CL_ADAPTER_D3D9_KHR && sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)
+              continue;
+
+            if(interop_user_sync(deviceID, context, queue, num_elements, WIDTH, HEIGHT,
+              contextFuncs[contextFuncIdx], adapters[adapterIdx], formats[formatIdx],
+              sharedHandleTypes[sharedHandleIdx], sync[syncIdx]) != 0)
+            {
+              std::string syncStr = (sync[syncIdx] == CL_TRUE) ? "user sync enabled": "user sync disabled";
+              std::string sharedHandle = (sharedHandleTypes[sharedHandleIdx] == SHARED_HANDLE_ENABLED)? "shared handle": "no shared handle";
+              std::string adapterStr;
+              std::string formatStr;
+              SurfaceFormatToString(formats[formatIdx], formatStr);
+              AdapterToString(adapters[adapterIdx], adapterStr);
+
+              log_error("\nTest case - clCreateContext (%s, %s, %s, %s) failed\n\n", adapterStr.c_str(), formatStr.c_str(), sharedHandle.c_str(), syncStr.c_str());
+              result.ResultSub(CResult::TEST_FAIL);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return result.Result();
+}
diff --git a/test_extensions/media_sharing/test_memory_access.cpp b/test_extensions/media_sharing/test_memory_access.cpp
new file mode 100644
index 00000000..10fe7543
--- /dev/null
+++ b/test_extensions/media_sharing/test_memory_access.cpp
@@ -0,0 +1,455 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+int memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                  unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                  TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  CResult result;
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  //creates device
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //generate input and expected data
+  size_t frameSize = width * height * 3 / 2;
+  std::vector<cl_uchar> bufferRef0(frameSize, 0);
+  std::vector<cl_uchar> bufferRef1(frameSize, 0);
+  std::vector<cl_uchar> bufferRef2(frameSize, 0);
+  if (!YUVGenerate(surfaceFormat, bufferRef0, width, height, 0, 90) ||
+    !YUVGenerate(surfaceFormat, bufferRef1, width, height, 91, 180) ||
+    !YUVGenerate(surfaceFormat, bufferRef2, width, height, 181, 255))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  //iterates through all devices
+  while (deviceWrapper->AdapterNext())
+  {
+    if (surfaceFormat != SURFACE_FORMAT_NV12 && !SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    void *objectSharedHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surface;
+
+    //creates surface
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surface,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSharedHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    if (!YUVSurfaceSet(surfaceFormat, surface, bufferRef0, width, height))
+    {
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    cl_int error;
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    { //memory access write
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_WRITE_ONLY, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for WRITE_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+      for (size_t i = 0; i < memObjList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+        error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+          0, 0, &bufferRef1[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        offset += planeWidth * planeHeight;
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    std::vector<cl_uchar> bufferOut0(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut0, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut0, bufferRef1, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    { //memory access read
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_ONLY, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_ONLY plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      std::vector<cl_uchar> out( frameSize, 0 );
+      size_t offset = 0;
+      size_t origin[3] = {0,0,0};
+
+      for (size_t i = 0; i < memObjList.size(); ++i)
+      {
+        size_t planeWidth = (i == 0) ? width: width / 2;
+        size_t planeHeight = (i == 0) ? height: height / 2;
+        size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+        error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+          &out[offset], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        offset += planeWidth * planeHeight;
+      }
+
+      if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
+      {
+        log_error("OCL image (READ_ONLY) is different then expected\n");
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    std::vector<cl_uchar> bufferOut1(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut1, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut1, bufferRef1, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    { //memory access read write
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+      surfaceInfo.resource = *(static_cast<CD3D9SurfaceWrapper *>(surface.get()));
+      surfaceInfo.shared_handle = objectSharedHandle;
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      std::vector<cl_mem> memObjList;
+      unsigned int planesNum = PlanesNum(surfaceFormat);
+      std::vector<clMemWrapper> planesList(planesNum);
+      for (unsigned int planeIdx = 0; planeIdx < planesNum; ++planeIdx)
+      {
+        planesList[planeIdx] = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceInfo, planeIdx, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateFromDX9MediaSurfaceKHR failed for READ_WRITE plane %i: %s\n", planeIdx, IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+          return result.Result();
+        }
+        memObjList.push_back(planesList[planeIdx]);
+      }
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      { //read
+        std::vector<cl_uchar> out( frameSize, 0 );
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueReadImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane, 0, 0,
+            &out[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+
+        if (!YUVCompare(surfaceFormat, out, bufferRef1, width, height))
+        {
+          log_error("OCL image (READ_WRITE) is different then expected\n");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //write
+        size_t offset = 0;
+        size_t origin[3] = {0,0,0};
+        for (size_t i = 0; i < memObjList.size(); ++i)
+        {
+          size_t planeWidth = (i == 0) ? width: width / 2;
+          size_t planeHeight = (i == 0) ? height: height / 2;
+          size_t regionPlane[3] = {planeWidth, planeHeight, 1};
+
+          error = clEnqueueWriteImage(cmdQueue, memObjList[i], CL_TRUE, origin, regionPlane,
+            0, 0, &bufferRef2[offset], 0, 0, 0);
+          if (error != CL_SUCCESS)
+          {
+            log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+            result.ResultSub(CResult::TEST_FAIL);
+          }
+
+          offset += planeWidth * planeHeight;
+        }
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+
+    std::vector<cl_uchar> bufferOut2(frameSize, 0);
+    if (!YUVSurfaceGet(surfaceFormat, surface, bufferOut2, width, height))
+    {
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!YUVCompare(surfaceFormat, bufferOut2, bufferRef2, width, height))
+    {
+      log_error("Media surface is different than expected\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+  }
+
+  if (!deviceWrapper->Status())
+  {
+    std::string adapterName;
+    AdapterToString(adapterType, adapterName);
+    log_error("%s init failed\n", adapterName.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  return result.Result();
+}
+
+int test_memory_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+  if(memory_access(deviceID, context, queue, num_elements, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+  if(memory_access(deviceID, context, queue, num_elements, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_NV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, NV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 512, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(memory_access(deviceID, context, queue, num_elements, 1024, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_YV12, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, YV12, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}
diff --git a/test_extensions/media_sharing/test_other_data_types.cpp b/test_extensions/media_sharing/test_other_data_types.cpp
new file mode 100644
index 00000000..0e0bce35
--- /dev/null
+++ b/test_extensions/media_sharing/test_other_data_types.cpp
@@ -0,0 +1,1009 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/kernelHelpers.h"
+
+#include "utils.h"
+
+template<typename T>
+int other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
+                    unsigned int iterationNum, unsigned int width, unsigned int height, cl_dx9_media_adapter_type_khr adapterType,
+                    TSurfaceFormat surfaceFormat, TSharedHandleType sharedHandle)
+{
+  const unsigned int FRAME_NUM = 2;
+  const float MAX_VALUE = 0.6f;
+  const std::string PROGRAM_STR =
+    "__kernel void TestFunction( read_only image2d_t imageIn, write_only image2d_t imageOut, "
+    NL "                            sampler_t sampler, __global int *imageRes)"
+    NL "{"
+    NL "  int w = get_global_id(0);"
+    NL "  int h = get_global_id(1);"
+    NL "  int width = get_image_width(imageIn);"
+    NL "  int height = get_image_height(imageOut);"
+    NL "  float4 color0 = read_imagef(imageIn, sampler, (int2)(w,h)) - 0.2f;"
+    NL "  float4 color1 = read_imagef(imageIn, sampler, (float2)(w,h)) - 0.2f;"
+    NL "  color0 = (color0 == color1) ? color0: (float4)(0.5, 0.5, 0.5, 0.5);"
+    NL "  write_imagef(imageOut, (int2)(w,h), color0);"
+    NL "  if(w == 0 && h == 0)"
+    NL "  {"
+    NL "    imageRes[0] = width;"
+    NL "    imageRes[1] = height;"
+    NL "  }"
+    NL "}";
+
+  CResult result;
+
+  cl_image_format format;
+  if(!SurfaceFormatToOCL(surfaceFormat, format))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  std::auto_ptr<CDeviceWrapper> deviceWrapper;
+  if (!DeviceCreate(adapterType, deviceWrapper))
+  {
+    result.ResultSub(CResult::TEST_ERROR);
+    return result.Result();
+  }
+
+  while (deviceWrapper->AdapterNext())
+  {
+    cl_context_properties contextProperties[] = {
+      CL_CONTEXT_PLATFORM, (cl_context_properties)gPlatformIDdetected,
+      AdapterTypeToContextInfo(adapterType), (cl_context_properties)deviceWrapper->Device(),
+      0,
+    };
+
+    cl_int error;
+    clContextWrapper ctx = clCreateContext(&contextProperties[0], 1, &gDeviceIDdetected, NULL, NULL, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateContext failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    clCommandQueueWrapper cmdQueue = clCreateCommandQueueWithProperties(ctx, gDeviceIDdetected, 0, &error );
+    if (error != CL_SUCCESS)
+    {
+      log_error("Unable to create command queue: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    if (!SurfaceFormatCheck(adapterType, *deviceWrapper, surfaceFormat))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by a device (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    if(!ImageFormatCheck(ctx, CL_MEM_OBJECT_IMAGE2D, format))
+    {
+      std::string sharedHandleStr = (sharedHandle == SHARED_HANDLE_ENABLED)? "yes": "no";
+      std::string formatStr;
+      std::string adapterStr;
+      SurfaceFormatToString(surfaceFormat, formatStr);
+      AdapterToString(adapterType, adapterStr);
+      log_info("Skipping test case, image format is not supported by OCL (adapter type: %s, format: %s, shared handle: %s)\n",
+        adapterStr.c_str(), formatStr.c_str(), sharedHandleStr.c_str());
+      return result.Result();
+    }
+
+    if (format.image_channel_data_type == CL_HALF_FLOAT)
+    {
+      if (DetectFloatToHalfRoundingMode(cmdQueue))
+      {
+        log_error("Unable to detect rounding mode\n");
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+    }
+
+    std::vector<std::vector<T> > bufferIn(FRAME_NUM);
+    std::vector<std::vector<T> > bufferExp(FRAME_NUM);
+    float step = MAX_VALUE / static_cast<float>(FRAME_NUM);
+    unsigned int planeNum = ChannelNum(surfaceFormat);
+    for (size_t i = 0; i < FRAME_NUM; ++i)
+    {
+      DataGenerate(surfaceFormat, format.image_channel_data_type, bufferIn[i], width, height, planeNum, step * i, step * (i + 1));
+      DataGenerate(surfaceFormat, format.image_channel_data_type, bufferExp[i], width, height, planeNum, step * i, step * (i + 1), 0.2f);
+    }
+
+    void *objectSrcHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceSrc;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceSrc,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectSrcHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+    void *objectDstHandle = 0;
+    std::auto_ptr<CSurfaceWrapper> surfaceDst;
+    if (!MediaSurfaceCreate(adapterType, width, height, surfaceFormat, *deviceWrapper, surfaceDst,
+      (sharedHandle == SHARED_HANDLE_ENABLED) ? true: false, &objectDstHandle))
+    {
+      log_error("Media surface creation failed for %i adapter\n", deviceWrapper->AdapterIdx());
+      result.ResultSub(CResult::TEST_ERROR);
+      return result.Result();
+    }
+
+#if defined(_WIN32)
+    cl_dx9_surface_info_khr surfaceSrcInfo;
+    CD3D9SurfaceWrapper *dx9SurfaceSrc = (static_cast<CD3D9SurfaceWrapper *>(surfaceSrc.get()));
+    surfaceSrcInfo.resource = *dx9SurfaceSrc;
+    surfaceSrcInfo.shared_handle = objectSrcHandle;
+
+    cl_dx9_surface_info_khr surfaceDstInfo;
+    CD3D9SurfaceWrapper *dx9SurfaceDst = (static_cast<CD3D9SurfaceWrapper *>(surfaceDst.get()));
+    surfaceDstInfo.resource = *dx9SurfaceDst;
+    surfaceDstInfo.shared_handle = objectDstHandle;
+#else
+    void *surfaceSrcInfo = 0;
+    void *surfaceDstInfo = 0;
+    return TEST_NOT_IMPLEMENTED;
+#endif
+
+    //create OCL shared object
+    clMemWrapper objectSrcShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceSrcInfo, 0, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    clMemWrapper objectDstShared = clCreateFromDX9MediaSurfaceKHR(ctx, CL_MEM_READ_WRITE, adapterType, &surfaceDstInfo, 0, &error);
+    if (error != CL_SUCCESS)
+    {
+      log_error("clCreateFromDX9MediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+      result.ResultSub(CResult::TEST_FAIL);
+      return result.Result();
+    }
+
+    std::vector<cl_mem> memObjList;
+    memObjList.push_back(objectSrcShared);
+    memObjList.push_back(objectDstShared);
+
+    if (!GetMemObjInfo(objectSrcShared, adapterType, surfaceSrc, objectSrcHandle))
+    {
+      log_error("Invalid memory object info\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    if (!GetImageInfo(objectSrcShared, format, sizeof(T) * planeNum,
+      width * sizeof(T) * planeNum,  0, width, height, 0, 0))
+    {
+      log_error("clGetImageInfo failed\n");
+      result.ResultSub(CResult::TEST_FAIL);
+    }
+
+    for (size_t frameIdx = 0; frameIdx < iterationNum; ++frameIdx)
+    {
+      //surface set
+#if defined(_WIN32)
+      D3DLOCKED_RECT rect;
+      if (FAILED((*dx9SurfaceSrc)->LockRect(&rect, NULL, 0)))
+      {
+        log_error("Surface lock failed\n");
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      size_t pitch = rect.Pitch / sizeof(T);
+      size_t lineSize = width * planeNum * sizeof(T);
+      T *ptr = static_cast<T *>(rect.pBits);
+
+      for (size_t y = 0; y < height; ++y)
+        memcpy(ptr + y * pitch, &bufferIn[frameIdx % FRAME_NUM][y * width * planeNum], lineSize);
+
+      (*dx9SurfaceSrc)->UnlockRect();
+#else
+      void *surfaceInfo = 0;
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      error = clEnqueueAcquireDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+        return result.Result();
+      }
+
+      size_t origin[3] = {0,0,0};
+      size_t region[3] = {width, height, 1};
+
+      { //read operation
+        std::vector<T> out( planeNum * width * height, 0 );
+        error = clEnqueueReadImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region, 0, 0, &out[0], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueReadImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum))
+        {
+          log_error("Frame idx: %i, OCL object is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //write operation
+        error = clEnqueueWriteImage(cmdQueue, objectSrcShared, CL_TRUE, origin, region,
+          0, 0, &bufferExp[frameIdx % FRAME_NUM][0], 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueWriteImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //kernel operations
+        clSamplerWrapper sampler = clCreateSampler( ctx, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+        if(error != CL_SUCCESS)
+        {
+          log_error("Unable to create sampler\n");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t threads[ 2 ] = { width, height };
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+        const char *progPtr = PROGRAM_STR.c_str();
+        if(create_single_kernel_helper(ctx, &program, &kernel, 1, (const char **)&progPtr, "TestFunction"))
+          result.ResultSub(CResult::TEST_FAIL);
+
+        error = clSetKernelArg( kernel, 0, sizeof( objectSrcShared ), &(objectSrcShared) );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 1, sizeof( objectDstShared ), &(objectDstShared) );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 2, sizeof( sampler ), &sampler );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to set kernel arguments" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        size_t bufferSize = sizeof(cl_int) * 2;
+        clMemWrapper imageRes = clCreateBuffer( ctx, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clCreateBuffer failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clSetKernelArg( kernel, 3, sizeof( imageRes ), &imageRes );
+
+        size_t localThreads[ 2 ];
+        error = get_max_common_2D_work_group_size( ctx, kernel, threads, localThreads );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to get work group size to use" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        error = clEnqueueNDRangeKernel( cmdQueue, kernel, 2, NULL, threads, localThreads, 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to execute test kernel" );
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        std::vector<cl_uint> imageResOut(2, 0);
+        error = clEnqueueReadBuffer( cmdQueue, imageRes, CL_TRUE, 0, bufferSize, &imageResOut[0], 0, NULL, NULL );
+        if (error != CL_SUCCESS)
+        {
+          log_error("Unable to read buffer");
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[0] != width)
+        {
+          log_error("Invalid width value, test = %i, expected = %i\n", imageResOut[0], width);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        if(imageResOut[1] != height)
+        {
+          log_error("Invalid height value, test = %i, expected = %i\n", imageResOut[1], height);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      { //map operation
+        size_t mapOrigin[3] = {0,0,0};
+        size_t mapRegion[3] = {width, height, 1};
+
+        std::vector<T> out( width * height * planeNum, 0 );
+        size_t rowPitch = 0;
+        size_t slicePitch = 0;
+        void *mapPtr = clEnqueueMapImage(cmdQueue, objectDstShared, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, mapOrigin, mapRegion,
+          &rowPitch, &slicePitch, 0, 0, 0, &error);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueMapImage failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        for (size_t y = 0; y < height; ++y)
+          memcpy(&out[y * width * planeNum], static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T),
+          width * planeNum * sizeof(T));
+
+        if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferIn[frameIdx % FRAME_NUM], width, height, planeNum))
+        {
+          log_error("Frame idx: %i, Mapped OCL object is different then expected\n", frameIdx);
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+
+        for (size_t y = 0; y < height; ++y)
+          memcpy(static_cast<T *>(mapPtr) + y * rowPitch / sizeof(T), &bufferExp[frameIdx % FRAME_NUM][y * width * planeNum],
+          width * planeNum * sizeof(T));
+
+        error = clEnqueueUnmapMemObject(cmdQueue, objectDstShared, mapPtr, 0, 0, 0);
+        if (error != CL_SUCCESS)
+        {
+          log_error("clEnqueueUnmapMemObject failed: %s\n", IGetErrorString(error));
+          result.ResultSub(CResult::TEST_FAIL);
+        }
+      }
+
+      error = clEnqueueReleaseDX9MediaSurfacesKHR(cmdQueue, static_cast<cl_uint>(memObjList.size()), &memObjList[0], 0, 0, 0);
+      if (error != CL_SUCCESS)
+      {
+        log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n", IGetErrorString(error));
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+
+      std::vector<T> out(width * height * planeNum, 0);
+      //surface get
+#if defined(_WIN32)
+      if (FAILED((*dx9SurfaceDst)->LockRect(&rect, NULL, 0)))
+      {
+        log_error("Surface lock failed\n");
+        result.ResultSub(CResult::TEST_ERROR);
+        return result.Result();
+      }
+
+      pitch = rect.Pitch / sizeof(T);
+      lineSize = width * planeNum * sizeof(T);
+      ptr = static_cast<T *>(rect.pBits);
+      for (size_t y = 0; y < height; ++y)
+        memcpy(&out[y * width * planeNum], ptr + y * pitch, lineSize);
+
+      (*dx9SurfaceDst)->UnlockRect();
+#else
+      return TEST_NOT_IMPLEMENTED;
+#endif
+
+      if (!DataCompare(surfaceFormat, format.image_channel_data_type, out, bufferExp[frameIdx % FRAME_NUM], width, height, planeNum))
+      {
+        log_error("Frame idx: %i, media object is different then expected\n", frameIdx);
+        result.ResultSub(CResult::TEST_FAIL);
+      }
+    }
+  }
+
+  if (!deviceWrapper->Status())
+  {
+    std::string adapter;
+    AdapterToString(adapterType, adapter);
+    log_error("%s init failed\n", adapter.c_str());
+    result.ResultSub(CResult::TEST_FAIL);
+    return result.Result();
+  }
+
+  return result.Result();
+}
+
+int test_other_data_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+{
+  CResult result;
+
+#if defined(_WIN32)
+  //D3D9
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //D3D9EX
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A32B32G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A16B16G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, A8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_D3D9EX_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (D3D9EX, X8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  //DXVA
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 64, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 256, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 512, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 1024, 32, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 32, 1024, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 64, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8L8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8L8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8L8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A32B32G32R32F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_float>(deviceID, context, queue, num_elements, 10, 128, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A32B32G32R32F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A32B32G32R32F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16F, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_half>(deviceID, context, queue, num_elements, 10, 128, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16F, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16F, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_ushort>(deviceID, context, queue, num_elements, 10, 64, 128, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A16B16G16R16, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A16B16G16R16, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 128, 64, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8B8G8R8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 16, 512, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8B8G8R8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8B8G8R8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 512, 16, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_A8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, A8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_DISABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8R8G8B8, no shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+  if(other_data_types<cl_uchar>(deviceID, context, queue, num_elements, 10, 256, 256, CL_ADAPTER_DXVA_KHR,
+    SURFACE_FORMAT_X8R8G8B8, SHARED_HANDLE_ENABLED) != 0)
+  {
+    log_error("\nTest case (DXVA, X8R8G8B8, shared handle) failed\n\n");
+    result.ResultSub(CResult::TEST_FAIL);
+  }
+
+#else
+  return TEST_NOT_IMPLEMENTED;
+#endif
+
+  return result.Result();
+}
diff --git a/test_extensions/media_sharing/utils.cpp b/test_extensions/media_sharing/utils.cpp
new file mode 100644
index 00000000..b32e9556
--- /dev/null
+++ b/test_extensions/media_sharing/utils.cpp
@@ -0,0 +1,1937 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "utils.h"
+
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/rounding_mode.h"
+
+#include <math.h>
+
+static RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
+
+
+CResult::CResult():
+_result(TEST_PASS), _resultLast(TEST_NORESULT)
+{
+
+}
+
+CResult::~CResult()
+{
+
+}
+
+CResult::TTestResult CResult::ResultLast() const
+{
+  return _resultLast;
+}
+
+int CResult::Result() const
+{
+  switch (_result)
+  {
+  case TEST_NORESULT:
+  case TEST_NOTSUPPORTED:
+  case TEST_PASS:
+    return 0;
+    break;
+  case TEST_FAIL:
+    return 1;
+    break;
+  case TEST_ERROR:
+    return 2;
+    break;
+  default:
+    return -1;
+    break;
+  }
+}
+
+void CResult::ResultSub( TTestResult result )
+{
+  _resultLast = result;
+  if (static_cast<int>(result) > static_cast<int>(_result))
+    _result = result;
+}
+
+bool ExtensionCheck(const std::string &extension, cl_device_id deviceID)
+{
+  std::string extensions;
+  size_t size = 0;
+  cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, 0, &size);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetDeviceInfo failed\n");
+    return false;
+  }
+
+  if (size == 0)
+  {
+    print_error(error, "Invalid extension string size\n");
+    return false;
+  }
+
+  extensions.resize(size);
+  error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, size, &extensions[0], 0);
+  if (error != CL_SUCCESS)
+  {
+    print_error(error, "clGetDeviceInfo failed\n");
+    return false;
+  }
+
+  if (extensions.find(extension) != std::string::npos)
+    return true;
+
+  return false;
+}
+
+void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction)
+{
+  switch(contextCreateFunction)
+  {
+  case CONTEXT_CREATE_DEFAULT:
+    contextFunction = "CreateContext";
+    break;
+  case CONTEXT_CREATE_FROM_TYPE:
+    contextFunction = "CreateContextFromType";
+    break;
+  default:
+    contextFunction = "Unknown";
+    log_error("FunctionContextCreateToString(): Unknown create function enum!");
+    break;
+  }
+}
+
+void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter)
+{
+  switch(adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+    adapter = "D3D9";
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    adapter = "D3D9EX";
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    adapter = "DXVA";
+    break;
+  default:
+    adapter = "Unknown";
+    log_error("AdapterToString(): Unknown adapter type!");
+    break;
+  }
+}
+
+cl_context_info AdapterTypeToContextInfo( cl_dx9_media_adapter_type_khr adapterType )
+{
+  switch (adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+    return CL_CONTEXT_ADAPTER_D3D9_KHR;
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    return CL_CONTEXT_ADAPTER_D3D9EX_KHR;
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    return CL_CONTEXT_ADAPTER_DXVA_KHR;
+    break;
+  default:
+    log_error("AdapterTypeToContextInfo(): Unknown adapter type!");
+    return 0;
+    break;
+  }
+}
+
+void YUVGenerateNV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd )
+{
+  yuv.clear();
+  yuv.resize(width * height * 3 / 2, 0);
+
+  double min = static_cast<double>(valueMin);
+  double max = static_cast<double>(valueMax);
+  double range = 255;
+  double add = static_cast<double>(valueAdd * range);
+  double stepX = (max - min) / static_cast<double>(width);
+  double stepY = (max - min) /static_cast<double>(height);
+
+  //generate Y plane
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width;
+    double valueYPlane0 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueXPlane0 = static_cast<double>(stepX * j);
+      yuv.at(offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
+    }
+  }
+
+  //generate UV planes
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int offset = width * height + i * width;
+    double valueYPlane1 = static_cast<double>(stepY * i);
+    double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane1 = static_cast<double>(stepX * j);
+      double valueXPlane2 = static_cast<double>(stepX * (width / 2 + j));
+
+      yuv.at(offset + j * 2) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
+      yuv.at(offset + j * 2 + 1) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
+    }
+  }
+}
+
+void YUVGenerateYV12( std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
+{
+  yuv.clear();
+  yuv.resize(width * height * 3 / 2, 0);
+
+  double min = static_cast<double>(valueMin);
+  double max = static_cast<double>(valueMax);
+  double range = 255;
+  double add = static_cast<double>(valueAdd * range);
+  double stepX = (max - min) / static_cast<double>(width);
+  double stepY = (max - min) /static_cast<double>(height);
+
+  unsigned offset = 0;
+
+  //generate Y plane
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int plane0Offset = offset + i * width;
+    double valueYPlane0 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueXPlane0 = static_cast<double>(stepX * j);
+      yuv.at(plane0Offset + j) = static_cast<cl_uchar>(min + valueXPlane0 / 2 + valueYPlane0 / 2 + add);
+    }
+  }
+
+  //generate V plane
+  offset += width * height;
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int plane1Offset = offset + i * width / 2;
+    double valueYPlane1 = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane1 = static_cast<double>(stepX * j);
+      yuv.at(plane1Offset + j) = static_cast<cl_uchar>(min + valueXPlane1 / 2 + valueYPlane1 / 2 + add);
+    }
+  }
+
+  //generate U plane
+  offset += width * height / 4;
+  for (unsigned int i = 0; i < height / 2; ++i)
+  {
+    unsigned int plane2Offset = offset + i * width / 2;
+    double valueYPlane2 = static_cast<double>(stepY * (height / 2 + i));
+    for (unsigned int j = 0; j < width / 2; ++j)
+    {
+      double valueXPlane2 = static_cast<double>(stepX * j);
+      yuv.at(plane2Offset + j) = static_cast<cl_uchar>(min + valueXPlane2 / 2 + valueYPlane2 / 2 + add);
+    }
+  }
+}
+
+
+bool YUVGenerate( TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height, cl_uchar valueMin, cl_uchar valueMax, double valueAdd /*= 0.0*/ )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    YUVGenerateNV12(yuv, width, height, valueMin, valueMax, valueAdd);
+    break;
+  case SURFACE_FORMAT_YV12:
+    YUVGenerateYV12(yuv, width, height, valueMin, valueMax, valueAdd);
+    break;
+  default:
+    log_error("YUVGenerate(): Invalid surface type\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool YUVSurfaceSetNV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceSetNV12(): Surface lock failed\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t lineSize = width * sizeof(cl_uchar);
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  for (size_t y = 0; y < height; ++y)
+    memcpy(ptr + y * pitch, &yuv.at(y * width), lineSize);
+
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + height * pitch + y * pitch, &yuv.at(width * height + y * width), lineSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceSetYV12( std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceSetYV12(): Surface lock failed!\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t pitchHalf = pitch / 2;
+  size_t lineSize = width * sizeof(cl_uchar);
+  size_t lineHalfSize = lineSize / 2;
+  size_t surfaceOffset = 0;
+  size_t yuvOffset = 0;
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+
+  for (size_t y = 0; y < height; ++y)
+    memcpy(ptr + surfaceOffset + y * pitch, &yuv.at(yuvOffset + y * width), lineSize);
+
+  surfaceOffset += height * pitch;
+  yuvOffset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
+
+  surfaceOffset += pitchHalf * height / 2;
+  yuvOffset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(ptr + surfaceOffset + y * pitchHalf, &yuv.at(yuvOffset + y * lineHalfSize), lineHalfSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    if(!YUVSurfaceSetNV12(surface, yuv, width, height))
+      return false;
+    break;
+  case SURFACE_FORMAT_YV12:
+    if(!YUVSurfaceSetYV12(surface, yuv, width, height))
+      return false;
+    break;
+  default:
+    log_error("YUVSurfaceSet(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool YUVSurfaceGetNV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceGetNV12(): Surface lock failed!\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t lineSize = width * sizeof(cl_uchar);
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+  size_t yuvOffset = 0;
+  size_t surfaceOffset = 0;
+  for (size_t y = 0; y < height; ++y)
+    memcpy(&yuv.at(yuvOffset + y * width), ptr + y * pitch, lineSize);
+
+  yuvOffset += width * height;
+  surfaceOffset += pitch * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceGetYV12( std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height )
+{
+#if defined(_WIN32)
+  CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+  D3DLOCKED_RECT rect;
+  if (FAILED((*d3dSurface)->LockRect(&rect, NULL, 0)))
+  {
+    log_error("YUVSurfaceGetYV12(): Surface lock failed!\n");
+    return false;
+  }
+
+  size_t pitch = rect.Pitch / sizeof(cl_uchar);
+  size_t pitchHalf = pitch / 2;
+  size_t lineSize = width * sizeof(cl_uchar);
+  size_t lineHalfSize = lineSize / 2;
+  size_t surfaceOffset = 0;
+  size_t yuvOffset = 0;
+  cl_uchar *ptr = static_cast<cl_uchar *>(rect.pBits);
+
+  for (size_t y = 0; y < height; ++y)
+    memcpy(&yuv.at(yuvOffset + y * width), ptr + surfaceOffset + y * pitch, lineSize);
+
+  surfaceOffset += pitch * height;
+  yuvOffset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
+
+  surfaceOffset += pitchHalf * height / 2;
+  yuvOffset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+    memcpy(&yuv.at(yuvOffset + y * lineHalfSize), ptr + surfaceOffset + y * pitchHalf, lineHalfSize);
+
+  (*d3dSurface)->UnlockRect();
+
+  return true;
+
+#else
+  return false;
+#endif
+}
+
+bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    if(!YUVSurfaceGetNV12(surface, yuv, width, height))
+      return false;
+    break;
+  case SURFACE_FORMAT_YV12:
+    if(!YUVSurfaceGetYV12(surface, yuv, width, height))
+      return false;
+    break;
+  default:
+    log_error("YUVSurfaceGet(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool YUVCompareNV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height )
+{
+  //plane 0 verification
+  size_t offset = 0;
+  for (size_t y = 0; y < height; ++y)
+  {
+    size_t plane0Offset = offset + width * y;
+    for (size_t x = 0; x < width; ++x)
+    {
+      if (yuvTest[plane0Offset + x] != yuvRef[plane0Offset + x])
+      {
+        log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+
+  //plane 1 and 2 verification
+  offset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane12Offset = offset + width * y;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if (yuvTest.at(plane12Offset + 2 * x) != yuvRef.at(plane12Offset + 2 * x))
+      {
+        log_error("Plane 1 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane12Offset + 2 * x], yuvTest[plane12Offset + 2 * x], x, y);
+        return false;
+      }
+
+      if (yuvTest.at(plane12Offset + 2 * x + 1) != yuvRef.at(plane12Offset + 2 * x + 1))
+      {
+        log_error("Plane 2 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane12Offset + 2 * x + 1], yuvTest[plane12Offset + 2 * x + 1], x, y);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool YUVCompareYV12( const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height )
+{
+  //plane 0 verification
+  size_t offset = 0;
+  for (size_t y = 0; y < height; ++y)
+  {
+    size_t plane0Offset = width * y;
+    for (size_t x = 0; x < width; ++x)
+    {
+      if (yuvTest.at(plane0Offset + x) != yuvRef.at(plane0Offset + x))
+      {
+        log_error("Plane 0 (Y) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane0Offset + x], yuvTest[plane0Offset + x], x ,y);
+        return false;
+      }
+    }
+  }
+
+  //plane 1 verification
+  offset += width * height;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane1Offset = offset + width * y / 2;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if (yuvTest.at(plane1Offset + x) != yuvRef.at(plane1Offset + x))
+      {
+        log_error("Plane 1 (V) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane1Offset + x], yuvTest[plane1Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+
+  //plane 2 verification
+  offset += width * height / 4;
+  for (size_t y = 0; y < height / 2; ++y)
+  {
+    size_t plane2Offset = offset + width * y / 2;
+    for (size_t x = 0; x < width / 2; ++x)
+    {
+      if (yuvTest.at(plane2Offset + x) != yuvRef.at(plane2Offset + x))
+      {
+        log_error("Plane 2 (U) is different than expected, reference value: %i, test value: %i, x: %i, y: %i\n",
+          yuvRef[plane2Offset + x], yuvTest[plane2Offset + x], x, y);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool YUVCompare( TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                unsigned int width, unsigned int height )
+{
+  switch (surfaceFormat)
+  {
+  case SURFACE_FORMAT_NV12:
+    if (!YUVCompareNV12(yuvTest, yuvRef, width, height))
+    {
+      log_error("OCL object is different than expected!\n");
+      return false;
+    }
+    break;
+  case SURFACE_FORMAT_YV12:
+    if (!YUVCompareYV12(yuvTest, yuvRef, width, height))
+    {
+      log_error("OCL object is different than expected!\n");
+      return false;
+    }
+    break;
+  default:
+    log_error("YUVCompare(): Invalid surface type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+{
+  data.clear();
+  data.reserve(width * height * channelNum);
+
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+  double valueAdd = static_cast<double>(add);
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    double valueY = static_cast<double>(stepY * i);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueX = static_cast<double>(stepX * j);
+      switch (channelNum)
+      {
+      case 1:
+        data.push_back(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+        break;
+      case 2:
+        data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
+        break;
+      case 4:
+        data.push_back(static_cast<float>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueX / 2 + valueAdd));
+        data.push_back(static_cast<float>(valueMin + valueY / 2 + valueAdd));
+        break;
+      default:
+        log_error("DataGenerate(): invalid channel number!");
+        return;
+        break;
+      }
+    }
+  }
+}
+
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+{
+  data.clear();
+  data.reserve(width * height * channelNum);
+
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+
+  switch(type)
+  {
+  case CL_HALF_FLOAT:
+    {
+      double valueAdd = static_cast<double>(add);
+
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        double valueY = static_cast<double>(stepY * i);
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          double valueX = static_cast<double>(stepX * j);
+          switch (channelNum)
+          {
+          case 1:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueY / 2 + valueAdd)));
+            break;
+          case 2:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
+            break;
+          case 4:
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueX / 2 + valueAdd)));
+            data.push_back(convert_float_to_half(static_cast<float>(valueMin + valueY / 2 + valueAdd)));
+            break;
+          default:
+            log_error("DataGenerate(): invalid channel number!");
+            return;
+            break;
+          }
+        }
+      }
+      break;
+    }
+  case CL_UNORM_INT16:
+    {
+      double range = 65535;
+      double valueAdd = static_cast<double>(add * range);
+
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        double valueY = static_cast<double>(stepY * i * range);
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          double valueX = static_cast<double>(stepX * j * range);
+          switch (channelNum)
+          {
+          case 1:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+            break;
+          case 2:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
+            break;
+          case 4:
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueX / 2 + valueAdd));
+            data.push_back(static_cast<cl_ushort>(valueMin + valueY / 2 + valueAdd));
+            break;
+          default:
+            log_error("DataGenerate(): invalid channel number!");
+            return;
+            break;
+          }
+        }
+      }
+    }
+    break;
+  default:
+    log_error("DataGenerate(): unknown data type!");
+    return;
+    break;
+  }
+}
+
+void DataGenerate( TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin /*= 0.0f*/, float cmax /*= 1.0f*/, float add /*= 0.0f*/ )
+{
+  data.clear();
+  data.reserve(width * height * channelNum);
+
+  double valueMin = static_cast<double>(cmin);
+  double valueMax = static_cast<double>(cmax);
+  double stepX = (valueMax - valueMin) / static_cast<double>(width);
+  double stepY = (valueMax - valueMin) /static_cast<double>(height);
+
+  double range = 255;
+  double valueAdd = static_cast<double>(add * range);
+
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    double valueY = static_cast<double>(stepY * i * range);
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      double valueX = static_cast<double>(stepX * j * range);
+      switch (channelNum)
+      {
+      case 1:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueY / 2 + valueAdd));
+        break;
+      case 2:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
+        break;
+      case 4:
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueY + valueAdd));
+        data.push_back(static_cast<cl_uchar>(valueMin + valueX / 2 + valueAdd));
+        if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8)
+          data.push_back(static_cast<cl_uchar>(0xff));
+        else
+          data.push_back(static_cast<cl_uchar>(valueMin + valueY / 2 + valueAdd));
+        break;
+      default:
+        log_error("DataGenerate(): invalid channel number!");
+        return;
+        break;
+      }
+    }
+  }
+}
+
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<float> &dataTest, const std::vector<float> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum)
+{
+  float epsilon = 0.000001f;
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width * channelNum;
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+      {
+        if (abs(dataTest.at(offset + j * channelNum + planeIdx) - dataExp.at(offset + j * channelNum + planeIdx)) > epsilon)
+        {
+          log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
+            j, i, planeIdx, dataTest[offset + j * channelNum + planeIdx], dataExp[offset + j * channelNum + planeIdx]);
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum)
+{
+  switch(type)
+  {
+  case CL_HALF_FLOAT:
+    {
+      float epsilon = 0.001f;
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        unsigned int offset = i * width * channelNum;
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+          {
+            float test = convert_half_to_float(dataTest.at(offset + j * channelNum + planeIdx));
+            float ref = convert_half_to_float(dataExp.at(offset + j * channelNum + planeIdx));
+            if (abs(test - ref) > epsilon)
+            {
+              log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %f, expected value = %f\n",
+                j, i, planeIdx, test, ref);
+              return false;
+            }
+          }
+        }
+      }
+    }
+    break;
+  case CL_UNORM_INT16:
+    {
+      cl_ushort epsilon = 1;
+      for (unsigned int i = 0; i < height; ++i)
+      {
+        unsigned int offset = i * width * channelNum;
+        for (unsigned int j = 0; j < width; ++j)
+        {
+          for(unsigned planeIdx = 0; planeIdx < channelNum; ++planeIdx)
+          {
+            cl_ushort test = dataTest.at(offset + j * channelNum + planeIdx);
+            cl_ushort ref = dataExp.at(offset + j * channelNum + planeIdx);
+            if (abs(test - ref) > epsilon)
+            {
+              log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n", j, i, planeIdx, test, ref);
+              return false;
+            }
+          }
+        }
+      }
+    }
+    break;
+  default:
+    log_error("DataCompare(): Invalid data format!");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool DataCompare( TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int planeNum )
+{
+  for (unsigned int i = 0; i < height; ++i)
+  {
+    unsigned int offset = i * width * planeNum;
+    for (unsigned int j = 0; j < width; ++j)
+    {
+      for(unsigned planeIdx = 0; planeIdx < planeNum; ++planeIdx)
+      {
+        if (surfaceFormat == SURFACE_FORMAT_X8R8G8B8 && planeIdx == 3)
+          continue;
+
+        cl_uchar test = dataTest.at(offset + j * planeNum + planeIdx);
+        cl_uchar ref = dataExp.at(offset + j * planeNum + planeIdx);
+        if (test != ref)
+        {
+          log_error("Tested image is different than reference (x,y,plane) = (%i,%i,%i), test value = %i, expected value = %i\n",
+            j, i, planeIdx, test, ref);
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+bool GetImageInfo( cl_mem object, cl_image_format formatExp, size_t elementSizeExp, size_t rowPitchExp,
+                  size_t slicePitchExp, size_t widthExp, size_t heightExp, size_t depthExp , unsigned int planeExp)
+{
+  bool result = true;
+
+  cl_image_format format;
+  if (clGetImageInfo(object, CL_IMAGE_FORMAT, sizeof(cl_image_format), &format, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_FORMAT) failed\n");
+    result = false;
+  }
+
+  if (formatExp.image_channel_order != format.image_channel_order || formatExp.image_channel_data_type != format.image_channel_data_type)
+  {
+    log_error("Value of CL_IMAGE_FORMAT is different than expected\n");
+    result = false;
+  }
+
+  size_t elementSize = 0;
+  if (clGetImageInfo(object, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elementSize, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_ELEMENT_SIZE) failed\n");
+    result = false;
+  }
+
+  if (elementSizeExp != elementSize)
+  {
+    log_error("Value of CL_IMAGE_ELEMENT_SIZE is different than expected (size: %i, exp size: %i)\n", elementSize, elementSizeExp);
+    result = false;
+  }
+
+  size_t rowPitch = 0;
+  if (clGetImageInfo(object, CL_IMAGE_ROW_PITCH, sizeof(size_t), &rowPitch, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_ROW_PITCH) failed\n");
+    result = false;
+  }
+
+  if ((rowPitchExp == 0 && rowPitchExp != rowPitch) || (rowPitchExp > 0 && rowPitchExp > rowPitch))
+  {
+    log_error("Value of CL_IMAGE_ROW_PITCH is different than expected (size: %i, exp size: %i)\n", rowPitch, rowPitchExp);
+    result = false;
+  }
+
+  size_t slicePitch = 0;
+  if (clGetImageInfo(object, CL_IMAGE_SLICE_PITCH, sizeof(size_t), &slicePitch, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_SLICE_PITCH) failed\n");
+    result = false;
+  }
+
+  if ((slicePitchExp == 0 && slicePitchExp != slicePitch) || (slicePitchExp > 0 && slicePitchExp > slicePitch))
+  {
+    log_error("Value of CL_IMAGE_SLICE_PITCH is different than expected (size: %i, exp size: %i)\n", slicePitch, slicePitchExp);
+    result = false;
+  }
+
+  size_t width = 0;
+  if (clGetImageInfo(object, CL_IMAGE_WIDTH, sizeof(size_t), &width, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_WIDTH) failed\n");
+    result = false;
+  }
+
+  if (widthExp != width)
+  {
+    log_error("Value of CL_IMAGE_WIDTH is different than expected (size: %i, exp size: %i)\n", width, widthExp);
+    result = false;
+  }
+
+  size_t height = 0;
+  if (clGetImageInfo(object, CL_IMAGE_HEIGHT, sizeof(size_t), &height, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_HEIGHT) failed\n");
+    result = false;
+  }
+
+  if (heightExp != height)
+  {
+    log_error("Value of CL_IMAGE_HEIGHT is different than expected (size: %i, exp size: %i)\n", height, heightExp);
+    result = false;
+  }
+
+  size_t depth = 0;
+  if (clGetImageInfo(object, CL_IMAGE_DEPTH, sizeof(size_t), &depth, 0) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_DEPTH) failed\n");
+    result = false;
+  }
+
+  if (depthExp != depth)
+  {
+    log_error("Value of CL_IMAGE_DEPTH is different than expected (size: %i, exp size: %i)\n", depth, depthExp);
+    result = false;
+  }
+
+  unsigned int plane = 99;
+  size_t paramSize = 0;
+  if (clGetImageInfo(object, CL_IMAGE_DX9_MEDIA_PLANE_KHR, sizeof(unsigned int), &plane, &paramSize) != CL_SUCCESS)
+  {
+    log_error("clGetImageInfo(CL_IMAGE_MEDIA_SURFACE_PLANE_KHR) failed\n");
+    result = false;
+  }
+
+  if (planeExp != plane)
+  {
+    log_error("Value of CL_IMAGE_MEDIA_SURFACE_PLANE_KHR is different than expected (plane: %i, exp plane: %i)\n", plane, planeExp);
+    result = false;
+  }
+
+  return result;
+}
+
+bool GetMemObjInfo( cl_mem object, cl_dx9_media_adapter_type_khr adapterType,  std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp )
+{
+  bool result = true;
+  switch(adapterType)
+  {
+  case CL_ADAPTER_D3D9_KHR:
+  case CL_ADAPTER_D3D9EX_KHR:
+  case CL_ADAPTER_DXVA_KHR:
+    {
+#if defined(_WIN32)
+      cl_dx9_surface_info_khr surfaceInfo;
+#else
+      void *surfaceInfo = 0;
+      return false;
+#endif
+      size_t paramSize = 0;
+      if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR, sizeof(surfaceInfo), &surfaceInfo, &paramSize) != CL_SUCCESS)
+      {
+        log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR) failed\n");
+        result = false;
+      }
+
+#if defined(_WIN32)
+      CD3D9SurfaceWrapper *d3d9Surface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      if (*d3d9Surface != surfaceInfo.resource)
+      {
+        log_error("Invalid resource for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
+        result = false;
+      }
+
+      if (shareHandleExp != surfaceInfo.shared_handle)
+      {
+        log_error("Invalid shared handle for CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR\n");
+        result = false;
+      }
+#else
+      return false;
+#endif
+
+      if (paramSize != sizeof(surfaceInfo))
+      {
+        log_error("Invalid CL_MEM_DX9_MEDIA_SURFACE_INFO_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(surfaceInfo));
+        result = false;
+      }
+
+      paramSize = 0;
+      cl_dx9_media_adapter_type_khr mediaAdapterType;
+      if(clGetMemObjectInfo(object, CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR, sizeof(mediaAdapterType), &mediaAdapterType, &paramSize) != CL_SUCCESS)
+      {
+        log_error("clGetImageInfo(CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR) failed\n");
+        result = false;
+      }
+
+      if (adapterType != mediaAdapterType)
+      {
+        log_error("Invalid media adapter type for CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR\n");
+        result = false;
+      }
+
+      if (paramSize != sizeof(mediaAdapterType))
+      {
+        log_error("Invalid CL_MEM_DX9_MEDIA_ADAPTER_TYPE_KHR parameter size: %i, expected: %i\n", paramSize, sizeof(mediaAdapterType));
+        result = false;
+      }
+    }
+    break;
+  default:
+    log_error("GetMemObjInfo(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return result;
+}
+
+bool ImageInfoVerify( cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
+                     std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle)
+{
+  if (memObjList.size() != 2 && memObjList.size() != 3)
+  {
+    log_error("ImageInfoVerify(): Invalid object list parameter\n");
+    return false;
+  }
+
+  cl_image_format formatPlane;
+  formatPlane.image_channel_data_type = CL_UNORM_INT8;
+  formatPlane.image_channel_order = CL_R;
+
+  //plane 0 verification
+  if (!GetImageInfo(memObjList[0], formatPlane, sizeof(cl_uchar),
+    width * sizeof(cl_uchar),
+    0,
+    width, height, 0, 0))
+  {
+    log_error("clGetImageInfo failed\n");
+    return false;
+  }
+
+  switch (memObjList.size())
+  {
+  case 2:
+    {
+      formatPlane.image_channel_data_type = CL_UNORM_INT8;
+      formatPlane.image_channel_order = CL_RG;
+      if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar) * 2,
+        width * sizeof(cl_uchar),
+        0,
+        width / 2, height / 2, 0, 1))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+    }
+    break;
+  case 3:
+    {
+      if (!GetImageInfo(memObjList[1], formatPlane, sizeof(cl_uchar),
+        width * sizeof(cl_uchar) / 2,
+        0,
+        width / 2, height / 2, 0, 1))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+
+      if (!GetImageInfo(memObjList[2], formatPlane, sizeof(cl_uchar),
+        width * sizeof(cl_uchar) / 2,
+        0,
+        width / 2, height / 2, 0, 2))
+      {
+        log_error("clGetImageInfo failed\n");
+        return false;
+      }
+    }
+    break;
+  default:
+    log_error("ImageInfoVerify(): Invalid object list parameter\n");
+    return false;
+    break;
+  }
+
+  for (size_t i = 0; i < memObjList.size(); ++i)
+  {
+    if (!GetMemObjInfo(memObjList[i], adapterType, surface, sharedHandle))
+    {
+      log_error("clGetMemObjInfo(%i) failed\n", i);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck)
+{
+  cl_uint imageFormatsNum = 0;
+  cl_int error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, 0, 0, &imageFormatsNum);
+  if(error != CL_SUCCESS)
+  {
+    log_error("clGetSupportedImageFormats failed\n");
+    return false;
+  }
+
+  if(imageFormatsNum < 1)
+  {
+    log_error("Invalid image format number returned by clGetSupportedImageFormats\n");
+    return false;
+  }
+
+  std::vector<cl_image_format> imageFormats(imageFormatsNum);
+  error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, imageType, imageFormatsNum, &imageFormats[0], 0);
+  if(error != CL_SUCCESS)
+  {
+    log_error("clGetSupportedImageFormats failed\n");
+    return false;
+  }
+
+  for(cl_uint i = 0; i < imageFormatsNum; ++i)
+  {
+    if(imageFormats[i].image_channel_data_type == imageFormatCheck.image_channel_data_type
+      && imageFormats[i].image_channel_order == imageFormatCheck.image_channel_order)
+    {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+unsigned int ChannelNum( TSurfaceFormat surfaceFormat )
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+  case SURFACE_FORMAT_R16F:
+  case SURFACE_FORMAT_L16:
+  case SURFACE_FORMAT_A8:
+  case SURFACE_FORMAT_L8:
+    return 1;
+    break;
+  case SURFACE_FORMAT_G32R32F:
+  case SURFACE_FORMAT_G16R16F:
+  case SURFACE_FORMAT_G16R16:
+  case SURFACE_FORMAT_A8L8:
+    return 2;
+    break;
+  case SURFACE_FORMAT_NV12:
+  case SURFACE_FORMAT_YV12:
+    return 3;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+  case SURFACE_FORMAT_A16B16G16R16F:
+  case SURFACE_FORMAT_A16B16G16R16:
+  case SURFACE_FORMAT_A8B8G8R8:
+  case SURFACE_FORMAT_X8B8G8R8:
+  case SURFACE_FORMAT_A8R8G8B8:
+  case SURFACE_FORMAT_X8R8G8B8:
+    return 4;
+    break;
+  default:
+    log_error("ChannelNum(): unknown surface format!\n");
+    return 0;
+    break;
+  }
+}
+
+unsigned int PlanesNum( TSurfaceFormat surfaceFormat )
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+  case SURFACE_FORMAT_R16F:
+  case SURFACE_FORMAT_L16:
+  case SURFACE_FORMAT_A8:
+  case SURFACE_FORMAT_L8:
+  case SURFACE_FORMAT_G32R32F:
+  case SURFACE_FORMAT_G16R16F:
+  case SURFACE_FORMAT_G16R16:
+  case SURFACE_FORMAT_A8L8:
+  case SURFACE_FORMAT_A32B32G32R32F:
+  case SURFACE_FORMAT_A16B16G16R16F:
+  case SURFACE_FORMAT_A16B16G16R16:
+  case SURFACE_FORMAT_A8B8G8R8:
+  case SURFACE_FORMAT_X8B8G8R8:
+  case SURFACE_FORMAT_A8R8G8B8:
+  case SURFACE_FORMAT_X8R8G8B8:
+    return 1;
+    break;
+  case SURFACE_FORMAT_NV12:
+    return 2;
+    break;
+  case SURFACE_FORMAT_YV12:
+    return 3;
+    break;
+  default:
+    log_error("PlanesNum(): unknown surface format!\n");
+    return 0;
+    break;
+  }
+}
+
+#if defined(_WIN32)
+D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat)
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+    return D3DFMT_R32F;
+    break;
+  case SURFACE_FORMAT_R16F:
+    return D3DFMT_R16F;
+    break;
+  case SURFACE_FORMAT_L16:
+    return D3DFMT_L16;
+    break;
+  case SURFACE_FORMAT_A8:
+    return D3DFMT_A8;
+    break;
+  case SURFACE_FORMAT_L8:
+    return D3DFMT_L8;
+    break;
+  case SURFACE_FORMAT_G32R32F:
+    return D3DFMT_G32R32F;
+    break;
+  case SURFACE_FORMAT_G16R16F:
+    return D3DFMT_G16R16F;
+    break;
+  case SURFACE_FORMAT_G16R16:
+    return D3DFMT_G16R16;
+    break;
+  case SURFACE_FORMAT_A8L8:
+    return D3DFMT_A8L8;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    return D3DFMT_A32B32G32R32F;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    return D3DFMT_A16B16G16R16F;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    return D3DFMT_A16B16G16R16;
+    break;
+  case SURFACE_FORMAT_A8B8G8R8:
+    return D3DFMT_A8B8G8R8;
+    break;
+  case SURFACE_FORMAT_X8B8G8R8:
+    return D3DFMT_X8B8G8R8;
+    break;
+  case SURFACE_FORMAT_A8R8G8B8:
+    return D3DFMT_A8R8G8B8;
+    break;
+  case SURFACE_FORMAT_X8R8G8B8:
+    return D3DFMT_X8R8G8B8;
+    break;
+  case SURFACE_FORMAT_NV12:
+    return static_cast<D3DFORMAT>(MAKEFOURCC('N', 'V', '1', '2'));
+    break;
+  case SURFACE_FORMAT_YV12:
+    return static_cast<D3DFORMAT>(MAKEFOURCC('Y', 'V', '1', '2'));
+    break;
+  default:
+    log_error("SurfaceFormatToD3D(): unknown surface format!\n");
+    return D3DFMT_R32F;
+    break;
+  }
+}
+#endif
+
+bool DeviceCreate( cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device )
+{
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+    device = std::auto_ptr<CDeviceWrapper>(new CD3D9Wrapper());
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    device = std::auto_ptr<CDeviceWrapper>(new CD3D9ExWrapper());
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    device = std::auto_ptr<CDeviceWrapper>(new CDXVAWrapper());
+    break;
+#endif
+  default:
+    log_error("DeviceCreate(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return device->Status();
+}
+
+bool SurfaceFormatCheck( cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat )
+{
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+  case CL_ADAPTER_D3D9EX_KHR:
+  case CL_ADAPTER_DXVA_KHR:
+    {
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3D9 d3d9 = static_cast<LPDIRECT3D9>(device.D3D());
+      D3DDISPLAYMODE d3ddm;
+      d3d9->GetAdapterDisplayMode(device.AdapterIdx(), &d3ddm);
+
+      if( FAILED(d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_SURFACE, d3dFormat)) )
+        return false;
+    }
+    break;
+#endif
+  default:
+    log_error("SurfaceFormatCheck(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format)
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+  case SURFACE_FORMAT_R16F:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_L16:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+  case SURFACE_FORMAT_A8:
+    format.image_channel_order = CL_A;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_L8:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_G32R32F:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+  case SURFACE_FORMAT_G16R16F:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_G16R16:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+  case SURFACE_FORMAT_A8L8:
+    format.image_channel_order = CL_RG;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_FLOAT;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_HALF_FLOAT;
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT16;
+    break;
+  case SURFACE_FORMAT_A8B8G8R8:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_X8B8G8R8:
+    format.image_channel_order = CL_RGBA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_A8R8G8B8:
+    format.image_channel_order = CL_BGRA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_X8R8G8B8:
+    format.image_channel_order = CL_BGRA;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_NV12:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  case SURFACE_FORMAT_YV12:
+    format.image_channel_order = CL_R;
+    format.image_channel_data_type = CL_UNORM_INT8;
+    break;
+  default:
+    log_error("SurfaceFormatToOCL(): Unknown surface format!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+void SurfaceFormatToString( TSurfaceFormat surfaceFormat, std::string &str )
+{
+  switch(surfaceFormat)
+  {
+  case SURFACE_FORMAT_R32F:
+    str = "R32F";
+    break;
+  case SURFACE_FORMAT_R16F:
+    str = "R16F";
+    break;
+  case SURFACE_FORMAT_L16:
+    str = "L16";
+    break;
+  case SURFACE_FORMAT_A8:
+    str = "A8";
+    break;
+  case SURFACE_FORMAT_L8:
+    str = "L8";
+    break;
+  case SURFACE_FORMAT_G32R32F:
+    str = "G32R32F";
+    break;
+  case SURFACE_FORMAT_G16R16F:
+    str = "G16R16F";
+    break;
+  case SURFACE_FORMAT_G16R16:
+    str = "G16R16";
+    break;
+  case SURFACE_FORMAT_A8L8:
+    str = "A8L8";
+    break;
+  case SURFACE_FORMAT_A32B32G32R32F:
+    str = "A32B32G32R32F";
+    break;
+  case SURFACE_FORMAT_A16B16G16R16F:
+    str = "A16B16G16R16F";
+    break;
+  case SURFACE_FORMAT_A16B16G16R16:
+    str = "A16B16G16R16";
+    break;
+  case SURFACE_FORMAT_A8B8G8R8:
+    str = "A8B8G8R8";
+    break;
+  case SURFACE_FORMAT_X8B8G8R8:
+    str = "X8B8G8R8";
+    break;
+  case SURFACE_FORMAT_A8R8G8B8:
+    str = "A8R8G8B8";
+    break;
+  case SURFACE_FORMAT_X8R8G8B8:
+    str = "X8R8G8B8";
+    break;
+  case SURFACE_FORMAT_NV12:
+    str = "NV12";
+    break;
+  case SURFACE_FORMAT_YV12:
+    str = "YV12";
+    break;
+  default:
+    log_error("SurfaceFormatToString(): unknown surface format!\n");
+    str = "unknown";
+    break;
+  }
+}
+
+bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
+                        CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle)
+{
+  switch (adapterType)
+  {
+#if defined(_WIN32)
+  case CL_ADAPTER_D3D9_KHR:
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3DDEVICE9 d3d9Device = (LPDIRECT3DDEVICE9)device.Device();
+      hr = d3d9Device->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
+        sharedHandle ? objectSharedHandle: 0);
+
+      if ( FAILED(hr))
+      {
+        log_error("CreateOffscreenPlainSurface failed\n");
+        return false;
+      }
+    }
+    break;
+  case CL_ADAPTER_D3D9EX_KHR:
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      LPDIRECT3DDEVICE9EX d3d9ExDevice = (LPDIRECT3DDEVICE9EX)device.Device();
+      hr = d3d9ExDevice->CreateOffscreenPlainSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, &(*d3dSurface),
+        sharedHandle ? objectSharedHandle: 0);
+
+      if ( FAILED(hr))
+      {
+        log_error("CreateOffscreenPlainSurface failed\n");
+        return false;
+      }
+    }
+    break;
+  case CL_ADAPTER_DXVA_KHR:
+    {
+      surface = std::auto_ptr<CD3D9SurfaceWrapper>(new CD3D9SurfaceWrapper);
+      CD3D9SurfaceWrapper *d3dSurface = static_cast<CD3D9SurfaceWrapper *>(surface.get());
+      HRESULT hr = 0;
+      D3DFORMAT d3dFormat = SurfaceFormatToD3D(surfaceFormat);
+      IDXVAHD_Device *dxvaDevice = (IDXVAHD_Device *)device.Device();
+      hr = dxvaDevice->CreateVideoSurface(width, height, d3dFormat, D3DPOOL_DEFAULT, 0,
+        DXVAHD_SURFACE_TYPE_VIDEO_INPUT,  1, &(*d3dSurface), sharedHandle ? objectSharedHandle: 0);
+
+      if ( FAILED(hr))
+      {
+        log_error("CreateVideoSurface failed\n");
+        return false;
+      }
+    }
+    break;
+#endif
+  default:
+    log_error("MediaSurfaceCreate(): Unknown adapter type!\n");
+    return false;
+    break;
+  }
+
+  return true;
+}
+
+int  DetectFloatToHalfRoundingMode( cl_command_queue q )  // Returns CL_SUCCESS on success
+{
+  cl_int err = CL_SUCCESS;
+
+  if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
+  {
+    // Some numbers near 0.5f, that we look at to see how the values are rounded.
+    static const cl_uint  inData[4*4] = {   0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
+      0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U    };
+    static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
+    const float *inp = (const float*) inData;
+    cl_context context = NULL;
+
+    // Create an input buffer
+    err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
+    if( err )
+    {
+      log_error( "Error:  could not get context from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
+      return err;
+    }
+
+    cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
+    if( NULL == inBuf || err )
+    {
+      log_error( "Error:  could not create input buffer in DetectFloatToHalfRoundingMode  (err: %d)", err );
+      return err;
+    }
+
+    // Create a small output image
+    cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
+    cl_image_desc imageDesc = { 0 };
+    imageDesc.image_type = CL_MEM_OBJECT_IMAGE2D;
+    imageDesc.image_width = count;
+    imageDesc.image_height = 1;
+
+    cl_mem outImage = clCreateImage(context, CL_MEM_READ_WRITE, &fmt, &imageDesc, 0, &err);
+    if( NULL == outImage || err )
+    {
+      log_error( "Error:  could not create half float out image in DetectFloatToHalfRoundingMode  (err: %d)", err );
+      clReleaseMemObject( inBuf );
+      return err;
+    }
+
+    // Create our program, and a kernel
+    const char *kernel[1] = {
+      "kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
+      "{\n"
+      "   write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
+      "}\n" };
+      cl_program program = clCreateProgramWithSource( context, 1, kernel, NULL, &err );
+      if( NULL == program || err )
+      {
+        log_error( "Error:  could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        return err;
+      }
+
+      cl_device_id device = NULL;
+      err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
+      if( err )
+      {
+        log_error( "Error:  could not get device from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseProgram( program );
+        return err;
+      }
+
+      err = clBuildProgram( program, 1, &device, "", NULL, NULL );
+      if( err )
+      {
+        log_error( "Error:  could not build program in DetectFloatToHalfRoundingMode  (%d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseProgram( program );
+        return err;
+      }
+
+      cl_kernel k = clCreateKernel( program, "detect_round", &err );
+      if( NULL == k || err )
+      {
+        log_error( "Error:  could not create kernel in DetectFloatToHalfRoundingMode  (%d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseProgram( program );
+        return err;
+      }
+
+      err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
+      if( err )
+      {
+        log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseProgram( program );
+        clReleaseKernel( k );
+        return err;
+      }
+
+      err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
+      if( err )
+      {
+        log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseProgram( program );
+        clReleaseKernel( k );
+        return err;
+      }
+
+      // Run the kernel
+      size_t global_work_size = count;
+      err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
+      if( err )
+      {
+        log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseProgram( program );
+        clReleaseKernel( k );
+        return err;
+      }
+
+      // read the results
+      cl_ushort outBuf[count*4];
+      memset( outBuf, -1, sizeof( outBuf ) );
+      size_t origin[3] = {0,0,0};
+      size_t region[3] = {count,1,1};
+      err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
+      if( err )
+      {
+        log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
+        clReleaseMemObject( inBuf );
+        clReleaseMemObject( outImage );
+        clReleaseProgram( program );
+        clReleaseKernel( k );
+        return err;
+      }
+
+      // Generate our list of reference results
+      cl_ushort rte_ref[count*4];
+      cl_ushort rtz_ref[count*4];
+      for( size_t i = 0; i < 4 * count; i++ )
+      {
+        rte_ref[i] = float2half_rte( inp[i] );
+        rtz_ref[i] = float2half_rtz( inp[i] );
+      }
+
+      // Verify that we got something in either rtz or rte mode
+      if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
+      {
+        log_info( "Autodetected float->half rounding mode to be rte\n" );
+        gFloatToHalfRoundingMode = kRoundToNearestEven;
+      }
+      else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
+      {
+        log_info( "Autodetected float->half rounding mode to be rtz\n" );
+        gFloatToHalfRoundingMode = kRoundTowardZero;
+      }
+      else
+      {
+        log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
+        log_info( "\nfor:" );
+        for( size_t i = 0; i < count; i++ )
+          log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
+        log_info( "\ngot:" );
+        for( size_t i = 0; i < count; i++ )
+          log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
+        log_info( "\nrte:" );
+        for( size_t i = 0; i < count; i++ )
+          log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
+        log_info( "\nrtz:" );
+        for( size_t i = 0; i < count; i++ )
+          log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
+        log_info( "\n" );
+        err = -1;
+        gFloatToHalfRoundingMode = kRoundingModeCount;  // illegal value
+      }
+
+      // clean up
+      clReleaseMemObject( inBuf );
+      clReleaseMemObject( outImage );
+      clReleaseProgram( program );
+      clReleaseKernel( k );
+      return err;
+  }
+
+  // Make sure that the rounding mode was successfully detected, if we checked earlier
+  if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
+    return -2;
+
+  return err;
+}
+
+cl_ushort convert_float_to_half( float f )
+{
+  switch( gFloatToHalfRoundingMode )
+  {
+  case kRoundToNearestEven:
+    return float2half_rte( f );
+  case kRoundTowardZero:
+    return float2half_rtz( f );
+  default:
+    log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
+    exit(-1);
+    return 0xffff;
+  }
+
+}
+
+cl_ushort float2half_rte( float f )
+{
+  union{ float f; cl_uint u; } u = {f};
+  cl_uint sign = (u.u >> 16) & 0x8000;
+  float x = fabsf(f);
+
+  //Nan
+  if( x != x )
+  {
+    u.u >>= (24-11);
+    u.u &= 0x7fff;
+    u.u |= 0x0200;      //silence the NaN
+    return u.u | sign;
+  }
+
+  // overflow
+  if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
+    return 0x7c00 | sign;
+
+  // underflow
+  if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
+    return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+  // very small
+  if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
+    return sign | 1;
+
+  // half denormal
+  if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+  {
+    u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
+    return sign | u.u;
+  }
+
+  u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
+  u.u &= 0x7f800000;
+  x += u.f;
+  u.f = x - u.f;
+  u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
+
+  return (u.u >> (24-11)) | sign;
+}
+
+cl_ushort float2half_rtz( float f )
+{
+  union{ float f; cl_uint u; } u = {f};
+  cl_uint sign = (u.u >> 16) & 0x8000;
+  float x = fabsf(f);
+
+  //Nan
+  if( x != x )
+  {
+    u.u >>= (24-11);
+    u.u &= 0x7fff;
+    u.u |= 0x0200;      //silence the NaN
+    return u.u | sign;
+  }
+
+  // overflow
+  if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
+  {
+    if( x == INFINITY )
+      return 0x7c00 | sign;
+
+    return 0x7bff | sign;
+  }
+
+  // underflow
+  if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
+    return sign;    // The halfway case can return 0x0001 or 0. 0 is even.
+
+  // half denormal
+  if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
+  {
+    x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
+    return (cl_ushort)((int) x | sign);
+  }
+
+  u.u &= 0xFFFFE000U;
+  u.u -= 0x38000000U;
+
+  return (u.u >> (24-11)) | sign;
+}
+
+float convert_half_to_float( unsigned short halfValue )
+{
+  // We have to take care of a few special cases, but in general, we just extract
+  // the same components from the half that exist in the float and re-stuff them
+  // For a description of the actual half format, see http://en.wikipedia.org/wiki/Half_precision
+  // Note: we store these in 32-bit ints to make the bit manipulations easier later
+  int sign =     ( halfValue >> 15 ) & 0x0001;
+  int exponent = ( halfValue >> 10 ) & 0x001f;
+  int mantissa = ( halfValue )       & 0x03ff;
+
+  // Note: we use a union here to be able to access the bits of a float directly
+  union
+  {
+    unsigned int bits;
+    float floatValue;
+  } outFloat;
+
+  // Special cases first
+  if( exponent == 0 )
+  {
+    if( mantissa == 0 )
+    {
+      // If both exponent and mantissa are 0, the number is +/- 0
+      outFloat.bits  = sign << 31;
+      return outFloat.floatValue; // Already done!
+    }
+
+    // If exponent is 0, it's a denormalized number, so we renormalize it
+    // Note: this is not terribly efficient, but oh well
+    while( ( mantissa & 0x00000400 ) == 0 )
+    {
+      mantissa <<= 1;
+      exponent--;
+    }
+
+    // The first bit is implicit, so we take it off and inc the exponent accordingly
+    exponent++;
+    mantissa &= ~(0x00000400);
+  }
+  else if( exponent == 31 ) // Special-case "numbers"
+  {
+    // If the exponent is 31, it's a special case number (+/- infinity or NAN).
+    // If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
+    // method is the same
+    outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
+    return outFloat.floatValue;
+  }
+
+  // Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
+  exponent += ( 127 - 15 );
+  mantissa <<= 13;
+
+  outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
+  return outFloat.floatValue;
+}
diff --git a/test_extensions/media_sharing/utils.h b/test_extensions/media_sharing/utils.h
new file mode 100644
index 00000000..4731b268
--- /dev/null
+++ b/test_extensions/media_sharing/utils.h
@@ -0,0 +1,171 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __UTILS_KHR_MEDIA_H
+#define __UTILS_KHR_MEDIA_H
+
+#include "wrappers.h"
+#include "CL/cl_dx9_media_sharing.h"
+
+#include "../../test_common/harness/typeWrappers.h"
+
+#include <iostream>
+#include <string>
+#include <memory>
+#include <vector>
+
+extern clGetDeviceIDsFromDX9MediaAdapterKHR_fn clGetDeviceIDsFromDX9MediaAdapterKHR;
+extern clCreateFromDX9MediaSurfaceKHR_fn clCreateFromDX9MediaSurfaceKHR;
+extern clEnqueueAcquireDX9MediaSurfacesKHR_fn clEnqueueAcquireDX9MediaSurfacesKHR;
+extern clEnqueueReleaseDX9MediaSurfacesKHR_fn clEnqueueReleaseDX9MediaSurfacesKHR;
+
+extern cl_platform_id gPlatformIDdetected;
+extern cl_device_id gDeviceIDdetected;
+extern cl_device_type gDeviceTypeSelected;
+
+#define NL "\n"
+#define TEST_NOT_IMPLEMENTED -1
+#define TEST_NOT_SUPPORTED -2
+
+enum TSurfaceFormat
+{
+  SURFACE_FORMAT_NV12,
+  SURFACE_FORMAT_YV12,
+  SURFACE_FORMAT_R32F,
+  SURFACE_FORMAT_R16F,
+  SURFACE_FORMAT_L16,
+  SURFACE_FORMAT_A8,
+  SURFACE_FORMAT_L8,
+  SURFACE_FORMAT_G32R32F,
+  SURFACE_FORMAT_G16R16F,
+  SURFACE_FORMAT_G16R16,
+  SURFACE_FORMAT_A8L8,
+  SURFACE_FORMAT_A32B32G32R32F,
+  SURFACE_FORMAT_A16B16G16R16F,
+  SURFACE_FORMAT_A16B16G16R16,
+  SURFACE_FORMAT_A8B8G8R8,
+  SURFACE_FORMAT_X8B8G8R8,
+  SURFACE_FORMAT_A8R8G8B8,
+  SURFACE_FORMAT_X8R8G8B8,
+};
+
+enum TContextFuncType
+{
+  CONTEXT_CREATE_DEFAULT,
+  CONTEXT_CREATE_FROM_TYPE,
+};
+
+enum TSharedHandleType
+{
+  SHARED_HANDLE_ENABLED,
+  SHARED_HANDLE_DISABLED,
+};
+
+class CResult {
+public:
+  enum TTestResult {
+    TEST_NORESULT,
+    TEST_NOTSUPPORTED,
+    TEST_PASS,
+    TEST_FAIL,
+    TEST_ERROR,
+  };
+
+  CResult();
+  ~CResult();
+
+  void ResultSub(TTestResult result);
+  TTestResult ResultLast() const;
+  int Result() const;
+
+private:
+  TTestResult _result;
+  TTestResult _resultLast;
+};
+
+bool ExtensionCheck(const std::string &extension, cl_device_id deviceID);
+void FunctionContextCreateToString(TContextFuncType contextCreateFunction, std::string &contextFunction);
+void AdapterToString(cl_dx9_media_adapter_type_khr adapterType, std::string &adapter);
+cl_context_info AdapterTypeToContextInfo(cl_dx9_media_adapter_type_khr adapterType);
+
+//YUV utils
+void YUVGenerateNV12(std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+void YUVGenerateYV12(std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                     cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+bool YUVGenerate(TSurfaceFormat surfaceFormat, std::vector<cl_uchar> &yuv, unsigned int width, unsigned int height,
+                 cl_uchar valueMin, cl_uchar valueMax, double valueAdd = 0.0);
+bool YUVSurfaceSetNV12(std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceSetYV12(std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceSet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, const std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height);
+bool YUVSurfaceGetNV12(std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceGetYV12(std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                       unsigned int width, unsigned int height);
+bool YUVSurfaceGet(TSurfaceFormat surfaceFormat, std::auto_ptr<CSurfaceWrapper> &surface, std::vector<cl_uchar> &yuv,
+                   unsigned int width, unsigned int height);
+bool YUVCompareNV12(const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height);
+bool YUVCompareYV12(const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                    unsigned int width, unsigned int height);
+bool YUVCompare(TSurfaceFormat surfaceFormat, const std::vector<cl_uchar> &yuvTest, const std::vector<cl_uchar> &yuvRef,
+                unsigned int width, unsigned int height);
+
+//other types utils
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<float> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_half> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+void DataGenerate(TSurfaceFormat surfaceFormat, cl_channel_type type, std::vector<cl_uchar> &data, unsigned int width, unsigned int height,
+                  unsigned int channelNum, float cmin = 0.0f, float cmax = 1.0f, float add = 0.0f);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_float> &dataTest, const std::vector<cl_float> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_half> &dataTest, const std::vector<cl_half> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+bool DataCompare(TSurfaceFormat surfaceFormat, cl_channel_type type, const std::vector<cl_uchar> &dataTest, const std::vector<cl_uchar> &dataExp,
+                 unsigned int width, unsigned int height, unsigned int channelNum);
+
+bool GetImageInfo(cl_mem object, cl_image_format formatExp, size_t elementSizeExp,
+                  size_t rowPitchExp, size_t slicePitchExp, size_t widthExp,
+                  size_t heightExp, size_t depthExp, unsigned int planeExp);
+bool GetMemObjInfo(cl_mem object, cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CSurfaceWrapper> &surface, void *shareHandleExp);
+bool ImageInfoVerify(cl_dx9_media_adapter_type_khr adapterType, const std::vector<cl_mem> &memObjList, unsigned int width, unsigned int height,
+                     std::auto_ptr<CSurfaceWrapper> &surface, void *sharedHandle);
+bool ImageFormatCheck(cl_context context, cl_mem_object_type imageType, const cl_image_format imageFormatCheck);
+unsigned int ChannelNum(TSurfaceFormat surfaceFormat);
+unsigned int PlanesNum(TSurfaceFormat surfaceFormat);
+
+#if defined(_WIN32)
+D3DFORMAT SurfaceFormatToD3D(TSurfaceFormat surfaceFormat);
+#endif
+
+bool DeviceCreate(cl_dx9_media_adapter_type_khr adapterType, std::auto_ptr<CDeviceWrapper> &device);
+bool SurfaceFormatCheck(cl_dx9_media_adapter_type_khr adapterType, const CDeviceWrapper &device, TSurfaceFormat surfaceFormat);
+bool SurfaceFormatToOCL(TSurfaceFormat surfaceFormat, cl_image_format &format);
+void SurfaceFormatToString(TSurfaceFormat surfaceFormat, std::string &str );
+bool MediaSurfaceCreate(cl_dx9_media_adapter_type_khr adapterType, unsigned int width, unsigned int height, TSurfaceFormat surfaceFormat,
+                      CDeviceWrapper &device, std::auto_ptr<CSurfaceWrapper> &surface, bool sharedHandle, void **objectSharedHandle);
+
+//imported from image helpers
+cl_ushort float2half_rte( float f );
+cl_ushort float2half_rtz( float f );
+cl_ushort convert_float_to_half( float f );
+float convert_half_to_float( unsigned short halfValue );
+int DetectFloatToHalfRoundingMode( cl_command_queue );
+
+#endif  // __UTILS_KHR_MEDIA_H
diff --git a/test_extensions/media_sharing/wrappers.cpp b/test_extensions/media_sharing/wrappers.cpp
new file mode 100644
index 00000000..27907ca3
--- /dev/null
+++ b/test_extensions/media_sharing/wrappers.cpp
@@ -0,0 +1,550 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "wrappers.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+LPCTSTR CDeviceWrapper::WINDOW_TITLE = _T( "cl_khr_dx9_media_sharing" );
+const int CDeviceWrapper::WINDOW_WIDTH = 256;
+const int CDeviceWrapper::WINDOW_HEIGHT = 256;
+CDeviceWrapper::TAccelerationType CDeviceWrapper::accelerationType = CDeviceWrapper::ACCELERATION_HW;
+
+#if defined(_WIN32)
+const D3DFORMAT CDXVAWrapper::RENDER_TARGET_FORMAT = D3DFMT_X8R8G8B8;
+const D3DFORMAT CDXVAWrapper::VIDEO_FORMAT = D3DFMT_X8R8G8B8;
+const unsigned int CDXVAWrapper::VIDEO_FPS = 60;
+#endif
+
+#if defined(_WIN32)
+static LRESULT WINAPI WndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
+{
+  switch(msg)
+  {
+  case WM_DESTROY:
+    PostQuitMessage(0);
+    return 0;
+  case WM_PAINT:
+    ValidateRect(hWnd, 0);
+    return 0;
+  default:
+    break;
+  }
+
+  return DefWindowProc(hWnd, msg, wParam, lParam);
+}
+#endif
+
+CDeviceWrapper::CDeviceWrapper()
+#if defined(_WIN32)
+:_hInstance(NULL),_hWnd(NULL)
+#endif
+{
+
+}
+
+void CDeviceWrapper::WindowInit()
+{
+#if defined(_WIN32)
+  _hInstance = GetModuleHandle(NULL);
+  static WNDCLASSEX wc =
+  {
+    sizeof(WNDCLASSEX),
+    CS_CLASSDC,
+    WndProc,
+    0L,
+    0L,
+    _hInstance,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    WINDOW_TITLE,
+    NULL
+  };
+
+  RegisterClassEx(&wc);
+
+  _hWnd = CreateWindow(
+    WINDOW_TITLE,
+    WINDOW_TITLE,
+    WS_OVERLAPPEDWINDOW,
+    0, 0,
+    WINDOW_WIDTH, WINDOW_HEIGHT,
+    NULL,
+    NULL,
+    wc.hInstance,
+    NULL);
+
+  if (!_hWnd)
+  {
+    log_error("Failed to create window");
+    return;
+  }
+
+  ShowWindow(_hWnd,SW_SHOWDEFAULT);
+  UpdateWindow(_hWnd);
+#endif
+}
+
+void CDeviceWrapper::WindowDestroy()
+{
+#if defined(_WIN32)
+  if (_hWnd)
+    DestroyWindow(_hWnd);
+  _hWnd = NULL;
+#endif
+}
+
+#if defined(_WIN32)
+HWND CDeviceWrapper::WindowHandle() const
+{
+  return _hWnd;
+}
+#endif
+
+int CDeviceWrapper::WindowWidth() const
+{
+  return WINDOW_WIDTH;
+}
+
+int CDeviceWrapper::WindowHeight() const
+{
+  return WINDOW_HEIGHT;
+}
+
+CDeviceWrapper::TAccelerationType CDeviceWrapper::AccelerationType()
+{
+  return accelerationType;
+}
+
+void CDeviceWrapper::AccelerationType( TAccelerationType accelerationTypeNew )
+{
+  accelerationType = accelerationTypeNew;
+}
+
+CDeviceWrapper::~CDeviceWrapper()
+{
+  WindowDestroy();
+}
+
+#if defined(_WIN32)
+CD3D9Wrapper::CD3D9Wrapper():
+_d3d9(NULL), _d3dDevice(NULL), _status(true), _adapterIdx(0), _adapterFound(false)
+{
+  WindowInit();
+
+  _d3d9 = Direct3DCreate9(D3D_SDK_VERSION);
+  if (!_d3d9)
+  {
+    log_error("Direct3DCreate9 failed\n");
+    _status = false;
+  }
+}
+
+CD3D9Wrapper::~CD3D9Wrapper()
+{
+  Destroy();
+
+  if(_d3d9)
+    _d3d9->Release();
+  _d3d9 = 0;
+}
+
+void CD3D9Wrapper::Destroy()
+{
+  if (_d3dDevice)
+    _d3dDevice->Release();
+  _d3dDevice = 0;
+}
+
+bool CD3D9Wrapper::Init()
+{
+  if (!WindowHandle())
+  {
+    log_error("D3D9: Window is not created\n");
+    _status = false;
+    return false;
+  }
+
+  if(!_d3d9 || !_status || !_adapterFound)
+    return false;
+
+  _d3d9->GetAdapterDisplayMode(_adapterIdx - 1, &_d3ddm);
+
+  D3DPRESENT_PARAMETERS d3dParams;
+  ZeroMemory(&d3dParams, sizeof(d3dParams));
+
+  d3dParams.Windowed = TRUE;
+  d3dParams.BackBufferCount = 1;
+  d3dParams.SwapEffect = D3DSWAPEFFECT_DISCARD;
+  d3dParams.hDeviceWindow = WindowHandle();
+  d3dParams.BackBufferWidth = WindowWidth();
+  d3dParams.BackBufferHeight = WindowHeight();
+  d3dParams.BackBufferFormat = _d3ddm.Format;
+
+  DWORD processingType = (AccelerationType() == ACCELERATION_HW)? D3DCREATE_HARDWARE_VERTEXPROCESSING:
+    D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+
+  if ( FAILED( _d3d9->CreateDevice( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(),
+    processingType, &d3dParams, &_d3dDevice) ) )
+  {
+    log_error("CreateDevice failed\n");
+    _status = false;
+    return false;
+  }
+
+  _d3dDevice->BeginScene();
+  _d3dDevice->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
+  _d3dDevice->EndScene();
+
+  return true;
+}
+
+void * CD3D9Wrapper::D3D() const
+{
+  return _d3d9;
+}
+
+void *CD3D9Wrapper::Device() const
+{
+  return _d3dDevice;
+}
+
+D3DFORMAT CD3D9Wrapper::Format()
+{
+  return _d3ddm.Format;
+}
+
+D3DADAPTER_IDENTIFIER9 CD3D9Wrapper::Adapter()
+{
+  return _adapter;
+}
+
+bool CD3D9Wrapper::Status() const
+{
+  return _status;
+}
+
+bool CD3D9Wrapper::AdapterNext()
+{
+  if (!_status)
+    return false;
+
+  _adapterFound = false;
+  for(; _adapterIdx < _d3d9->GetAdapterCount();)
+  {
+    ++_adapterIdx;
+    D3DCAPS9 caps;
+    if (FAILED(_d3d9->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
+      continue;
+
+    if(FAILED(_d3d9->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
+    {
+      log_error("D3D9: GetAdapterIdentifier failed\n");
+      _status = false;
+      return false;
+    }
+
+    _adapterFound = true;
+
+    Destroy();
+    if(!Init())
+    {
+      _status = false;
+      _adapterFound = false;
+    }
+    break;
+  }
+
+  return _adapterFound;
+}
+
+unsigned int CD3D9Wrapper::AdapterIdx() const
+{
+  return _adapterIdx - 1;
+}
+
+
+CD3D9ExWrapper::CD3D9ExWrapper():
+_d3d9Ex(NULL), _d3dDeviceEx(NULL), _status(true), _adapterIdx(0), _adapterFound(false)
+{
+  WindowInit();
+
+  HRESULT result = Direct3DCreate9Ex(D3D_SDK_VERSION, &_d3d9Ex);
+  if (FAILED(result) || !_d3d9Ex)
+  {
+    log_error("Direct3DCreate9Ex failed\n");
+    _status = false;
+  }
+}
+
+CD3D9ExWrapper::~CD3D9ExWrapper()
+{
+  Destroy();
+
+  if(_d3d9Ex)
+    _d3d9Ex->Release();
+  _d3d9Ex = 0;
+}
+
+void * CD3D9ExWrapper::D3D() const
+{
+  return _d3d9Ex;
+}
+
+void *CD3D9ExWrapper::Device() const
+{
+  return _d3dDeviceEx;
+}
+
+D3DFORMAT CD3D9ExWrapper::Format()
+{
+  return _d3ddmEx.Format;
+}
+
+D3DADAPTER_IDENTIFIER9 CD3D9ExWrapper::Adapter()
+{
+  return _adapter;
+}
+
+bool CD3D9ExWrapper::Init()
+{
+  if (!WindowHandle())
+  {
+    log_error("D3D9EX: Window is not created\n");
+    _status = false;
+    return false;
+  }
+
+  if(!_d3d9Ex || !_status || !_adapterFound)
+    return false;
+
+  RECT rect;
+  GetClientRect(WindowHandle(),&rect);
+
+  D3DPRESENT_PARAMETERS d3dParams;
+  ZeroMemory(&d3dParams, sizeof(d3dParams));
+
+  d3dParams.Windowed = TRUE;
+  d3dParams.SwapEffect = D3DSWAPEFFECT_FLIP;
+  d3dParams.BackBufferFormat = D3DFMT_X8R8G8B8;
+  d3dParams.BackBufferWidth = WindowWidth();
+  d3dParams.BackBufferHeight = WindowHeight();
+
+  d3dParams.BackBufferCount = 1;
+  d3dParams.hDeviceWindow = WindowHandle();
+
+  DWORD processingType = (AccelerationType() == ACCELERATION_HW)? D3DCREATE_HARDWARE_VERTEXPROCESSING:
+    D3DCREATE_SOFTWARE_VERTEXPROCESSING;
+
+  if ( FAILED( _d3d9Ex->CreateDeviceEx( _adapterIdx - 1, D3DDEVTYPE_HAL, WindowHandle(),
+    processingType, &d3dParams, NULL, &_d3dDeviceEx) ) )
+  {
+    log_error("CreateDeviceEx failed\n");
+    _status = false;
+    return false;
+  }
+
+  _d3dDeviceEx->BeginScene();
+  _d3dDeviceEx->Clear(0, NULL, D3DCLEAR_TARGET, 0, 1.0f, 0);
+  _d3dDeviceEx->EndScene();
+
+  return true;
+}
+
+void CD3D9ExWrapper::Destroy()
+{
+  if (_d3dDeviceEx)
+    _d3dDeviceEx->Release();
+  _d3dDeviceEx = 0;
+}
+
+bool CD3D9ExWrapper::Status() const
+{
+  return _status;
+}
+
+bool CD3D9ExWrapper::AdapterNext()
+{
+  if (!_status)
+    return false;
+
+  _adapterFound = false;
+  for(; _adapterIdx < _d3d9Ex->GetAdapterCount();)
+  {
+    ++_adapterIdx;
+    D3DCAPS9 caps;
+    if (FAILED(_d3d9Ex->GetDeviceCaps(_adapterIdx - 1, D3DDEVTYPE_HAL, &caps)))
+      continue;
+
+    if(FAILED(_d3d9Ex->GetAdapterIdentifier(_adapterIdx - 1, 0, &_adapter)))
+    {
+      log_error("D3D9EX: GetAdapterIdentifier failed\n");
+      _status = false;
+      return false;
+    }
+
+    _adapterFound = true;
+    Destroy();
+    if(!Init())
+    {
+      _status = false;
+      _adapterFound = false;
+    }
+
+    break;
+  }
+
+  return _adapterFound;
+}
+
+unsigned int CD3D9ExWrapper::AdapterIdx() const
+{
+  return _adapterIdx - 1;
+}
+
+CDXVAWrapper::CDXVAWrapper():
+_dxvaDevice(NULL), _status(true), _adapterFound(false)
+{
+  _status = _d3d9.Status();
+}
+
+CDXVAWrapper::~CDXVAWrapper()
+{
+  DXVAHDDestroy();
+}
+
+void * CDXVAWrapper::Device() const
+{
+  return _dxvaDevice;
+}
+
+bool CDXVAWrapper::Status() const
+{
+  return _status && _d3d9.Status();
+}
+
+bool CDXVAWrapper::AdapterNext()
+{
+  if (!_status)
+    return false;
+
+  _adapterFound = _d3d9.AdapterNext();
+  _status = _d3d9.Status();
+  if (!_status)
+  {
+    _adapterFound = false;
+    return false;
+  }
+
+  if (!_adapterFound)
+    return false;
+
+  DXVAHDDestroy();
+  _status = DXVAHDInit();
+  if (!_status)
+  {
+    _adapterFound = false;
+    return false;
+  }
+
+  return true;
+}
+
+bool CDXVAWrapper::DXVAHDInit()
+{
+  if (!_status || !_d3d9.Status() || !_adapterFound)
+    return false;
+
+  DXVAHD_RATIONAL fps = { VIDEO_FPS, 1 };
+
+  DXVAHD_CONTENT_DESC desc;
+  desc.InputFrameFormat= DXVAHD_FRAME_FORMAT_PROGRESSIVE;
+  desc.InputFrameRate = fps;
+  desc.InputWidth = WindowWidth();
+  desc.InputHeight = WindowHeight();
+  desc.OutputFrameRate = fps;
+  desc.OutputWidth = WindowWidth();
+  desc.OutputHeight = WindowHeight();
+
+#ifdef USE_SOFTWARE_PLUGIN
+  _status = false;
+  return false;
+#endif
+
+  HRESULT hr = DXVAHD_CreateDevice(static_cast<IDirect3DDevice9Ex *>(_d3d9.Device()),
+    &desc, DXVAHD_DEVICE_USAGE_PLAYBACK_NORMAL, NULL, &_dxvaDevice);
+  if(FAILED(hr))
+  {
+    log_error("DXVAHD_CreateDevice failed\n");
+    _status = false;
+    return false;
+  }
+
+  return true;
+}
+
+void CDXVAWrapper::DXVAHDDestroy()
+{
+  if (_dxvaDevice)
+    _dxvaDevice->Release();
+  _dxvaDevice = 0;
+}
+
+void * CDXVAWrapper::D3D() const
+{
+  return _d3d9.D3D();
+}
+
+unsigned int CDXVAWrapper::AdapterIdx() const
+{
+  return _d3d9.AdapterIdx();
+}
+
+const CD3D9ExWrapper & CDXVAWrapper::D3D9() const
+{
+  return _d3d9;
+}
+
+CD3D9SurfaceWrapper::CD3D9SurfaceWrapper():
+mMem(NULL)
+{
+
+}
+
+CD3D9SurfaceWrapper::CD3D9SurfaceWrapper( IDirect3DSurface9* mem ):
+mMem(mem)
+{
+
+}
+
+CD3D9SurfaceWrapper::~CD3D9SurfaceWrapper()
+{
+  if(mMem != NULL)
+    mMem->Release();
+  mMem = NULL;
+}
+
+#endif
+
+CSurfaceWrapper::CSurfaceWrapper()
+{
+
+}
+
+CSurfaceWrapper::~CSurfaceWrapper()
+{
+
+}
diff --git a/test_extensions/media_sharing/wrappers.h b/test_extensions/media_sharing/wrappers.h
new file mode 100644
index 00000000..bafc8a0a
--- /dev/null
+++ b/test_extensions/media_sharing/wrappers.h
@@ -0,0 +1,170 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef __WRAPPERS_H
+#define __WRAPPERS_H
+
+#if defined(_WIN32)
+#include <d3d9.h>
+#include <dxvahd.h>
+#include <tchar.h>
+#endif
+
+class CDeviceWrapper {
+public:
+  enum TAccelerationType
+  {
+    ACCELERATION_HW,
+    ACCELERATION_SW,
+  };
+
+  CDeviceWrapper();
+  virtual ~CDeviceWrapper();
+
+  virtual bool AdapterNext() = 0;
+  virtual unsigned int AdapterIdx() const = 0;
+  virtual void *Device() const = 0;
+  virtual bool Status() const = 0;
+  virtual void *D3D() const = 0;
+
+#if defined(_WIN32)
+  HWND WindowHandle() const;
+#endif
+  int WindowWidth() const;
+  int WindowHeight() const;
+  void WindowInit();
+
+
+  static TAccelerationType AccelerationType();
+  static void AccelerationType(TAccelerationType accelerationTypeNew);
+
+private:
+  static LPCTSTR WINDOW_TITLE;
+  static const int WINDOW_WIDTH;
+  static const int WINDOW_HEIGHT;
+  static TAccelerationType accelerationType;
+
+#if defined(_WIN32)
+  HMODULE _hInstance;
+  HWND _hWnd;
+#endif
+
+  void WindowDestroy();
+};
+
+class CSurfaceWrapper
+{
+public:
+  CSurfaceWrapper();
+  virtual ~CSurfaceWrapper();
+};
+
+#if defined(_WIN32)
+//windows specific wrappers
+class CD3D9Wrapper: public CDeviceWrapper {
+public:
+  CD3D9Wrapper();
+  ~CD3D9Wrapper();
+
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual bool Status() const;
+  virtual void *D3D() const;
+
+private:
+  LPDIRECT3D9 _d3d9;
+  LPDIRECT3DDEVICE9 _d3dDevice;
+  D3DDISPLAYMODE _d3ddm;
+  D3DADAPTER_IDENTIFIER9 _adapter;
+  bool _status;
+  unsigned int _adapterIdx;
+  bool _adapterFound;
+
+  D3DFORMAT Format();
+  D3DADAPTER_IDENTIFIER9 Adapter();
+  bool Init();
+  void Destroy();
+};
+
+class CD3D9ExWrapper: public CDeviceWrapper {
+public:
+  CD3D9ExWrapper();
+  ~CD3D9ExWrapper();
+
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual bool Status() const;
+  virtual void *D3D() const;
+
+private:
+  LPDIRECT3D9EX _d3d9Ex;
+  LPDIRECT3DDEVICE9EX _d3dDeviceEx;
+  D3DDISPLAYMODEEX _d3ddmEx;
+  D3DADAPTER_IDENTIFIER9 _adapter;
+  bool _status;
+  unsigned int _adapterIdx;
+  bool _adapterFound;
+
+  D3DFORMAT Format();
+  D3DADAPTER_IDENTIFIER9 Adapter();
+  bool Init();
+  void Destroy();
+};
+
+class CDXVAWrapper: public CDeviceWrapper {
+public:
+  CDXVAWrapper();
+  ~CDXVAWrapper();
+
+  virtual bool AdapterNext();
+  virtual unsigned int AdapterIdx() const;
+  virtual void *Device() const;
+  virtual bool Status() const;
+  virtual void *D3D() const;
+  const CD3D9ExWrapper &D3D9() const;
+
+private:
+  CD3D9ExWrapper _d3d9;
+  IDXVAHD_Device *_dxvaDevice;
+  bool _status;
+  bool _adapterFound;
+
+  static const D3DFORMAT RENDER_TARGET_FORMAT;
+  static const D3DFORMAT VIDEO_FORMAT;
+  static const unsigned int VIDEO_FPS;
+
+  bool DXVAHDInit();
+  void DXVAHDDestroy();
+};
+
+class CD3D9SurfaceWrapper: public CSurfaceWrapper
+{
+public:
+  CD3D9SurfaceWrapper();
+  CD3D9SurfaceWrapper( IDirect3DSurface9* mem );
+  ~CD3D9SurfaceWrapper();
+
+  operator IDirect3DSurface9*() { return mMem; }
+  IDirect3DSurface9* * operator&() { return &mMem; }
+  IDirect3DSurface9* operator->() const { return mMem; }
+
+private:
+  IDirect3DSurface9* mMem;
+};
+#endif
+
+#endif  // __D3D_WRAPPERS